From 2c4571a3cdc48786568971500f2c4fccf57a7469 Mon Sep 17 00:00:00 2001 From: espes Date: Sun, 25 May 2014 20:58:17 +1000 Subject: [PATCH] Initial integration of JayFoxRox's GLSL vertex program translator --- hw/xbox/Makefile.objs | 2 +- hw/xbox/nv2a.c | 497 +++++++++++++++++------------------------- hw/xbox/nv2a_vsh.c | 479 +++++++++++++++++++++++++--------------- hw/xbox/nv2a_vsh.h | 48 ++++ hw/xbox/swizzle.c | 109 +++++++++ hw/xbox/swizzle.h | 34 +++ 6 files changed, 691 insertions(+), 478 deletions(-) create mode 100644 hw/xbox/swizzle.c create mode 100644 hw/xbox/swizzle.h diff --git a/hw/xbox/Makefile.objs b/hw/xbox/Makefile.objs index 72d8b187f0..3478ce5d4b 100644 --- a/hw/xbox/Makefile.objs +++ b/hw/xbox/Makefile.objs @@ -2,7 +2,7 @@ obj-y += xbox.o chihiro.o obj-y += xbox_pci.o acpi_xbox.o obj-y += amd_smbus.o smbus_xbox_smc.o smbus_cx25871.o smbus_adm1032.o obj-y += nvnet.o -obj-y += nv2a.o nv2a_vsh.o nv2a_psh.o +obj-y += nv2a.o nv2a_vsh.o nv2a_psh.o swizzle.o obj-y += mcpx_apu.o mcpx_aci.o obj-y += lpc47m157.o obj-y += xid.o diff --git a/hw/xbox/nv2a.c b/hw/xbox/nv2a.c index eb9720774e..7f70ac5066 100644 --- a/hw/xbox/nv2a.c +++ b/hw/xbox/nv2a.c @@ -28,6 +28,7 @@ #include "qapi/qmp/qstring.h" #include "gl/gloffscreen.h" +#include "hw/xbox/swizzle.h" #include "hw/xbox/u_format_r11g11b10f.h" #include "hw/xbox/nv2a_vsh.h" #include "hw/xbox/nv2a_psh.h" @@ -692,8 +693,7 @@ static const ColorFormatInfo kelvin_color_format_map[66] = { #define NV2A_NUM_SUBCHANNELS 8 #define NV2A_MAX_BATCH_LENGTH 0xFFFF -#define NV2A_VERTEXSHADER_SLOTS 32 /*???*/ -#define NV2A_MAX_VERTEXSHADER_LENGTH 136 +#define NV2A_MAX_TRANSFORM_PROGRAM_LENGTH 136 #define NV2A_VERTEXSHADER_CONSTANTS 192 #define NV2A_VERTEXSHADER_ATTRIBUTES 16 #define NV2A_MAX_TEXTURES 4 @@ -771,14 +771,6 @@ typedef struct VertexShaderConstant { uint32 data[4]; } VertexShaderConstant; -typedef struct VertexShader { - bool dirty; - unsigned int program_length; - uint32_t program_data[NV2A_MAX_VERTEXSHADER_LENGTH]; - - GLuint gl_program; -} VertexShader; - typedef struct Texture { bool dirty; bool enabled; @@ -818,9 +810,12 @@ typedef struct ShaderState { bool rect_tex[4]; - /* vertex shader */ + bool fixed_function; + bool vertex_program; + uint32_t program_data[NV2A_MAX_TRANSFORM_PROGRAM_LENGTH]; + int program_length; } ShaderState; typedef struct Surface { @@ -842,28 +837,6 @@ typedef struct KelvinState { hwaddr dma_vertex_a, dma_vertex_b; hwaddr dma_semaphore; unsigned int semaphore_offset; - - GLenum gl_primitive_mode; - - bool enable_vertex_program_write; - - unsigned int vertexshader_start_slot; - unsigned int vertexshader_load_slot; - VertexShader vertexshaders[NV2A_VERTEXSHADER_SLOTS]; - - unsigned int constant_load_slot; - VertexShaderConstant constants[NV2A_VERTEXSHADER_CONSTANTS]; - - VertexAttribute vertex_attributes[NV2A_VERTEXSHADER_ATTRIBUTES]; - - unsigned int inline_array_length; - uint32_t inline_array[NV2A_MAX_BATCH_LENGTH]; - - unsigned int inline_elements_length; - uint32_t inline_elements[NV2A_MAX_BATCH_LENGTH]; - - unsigned int inline_buffer_length; - InlineVertexBufferEntry inline_buffer[NV2A_MAX_BATCH_LENGTH]; } KelvinState; typedef struct ContextSurfaces2DState { @@ -956,6 +929,29 @@ typedef struct PGRAPHState { GraphicsSubchannel subchannel_data[NV2A_NUM_SUBCHANNELS]; + GLenum gl_primitive_mode; + + bool enable_vertex_program_write; + + unsigned int program_start; + unsigned int program_load; + uint32_t program_data[NV2A_MAX_TRANSFORM_PROGRAM_LENGTH]; + + unsigned int constant_load_slot; + VertexShaderConstant constants[NV2A_VERTEXSHADER_CONSTANTS]; + + VertexAttribute vertex_attributes[NV2A_VERTEXSHADER_ATTRIBUTES]; + + unsigned int inline_array_length; + uint32_t inline_array[NV2A_MAX_BATCH_LENGTH]; + + unsigned int inline_elements_length; + uint32_t inline_elements[NV2A_MAX_BATCH_LENGTH]; + + unsigned int inline_buffer_length; + InlineVertexBufferEntry inline_buffer[NV2A_MAX_BATCH_LENGTH]; + + uint32_t regs[0x2000]; } PGRAPHState; @@ -1211,7 +1207,6 @@ static void *nv_dma_map(NV2AState *d, hwaddr dma_obj_address, hwaddr *len) static void load_graphics_object(NV2AState *d, hwaddr instance_address, GraphicsObject *obj) { - int i; uint8_t *obj_ptr; uint32_t switch1, switch2, switch3; @@ -1226,21 +1221,9 @@ static void load_graphics_object(NV2AState *d, hwaddr instance_address, obj->graphics_class = switch1 & NV_PGRAPH_CTX_SWITCH1_GRCLASS; /* init graphics object */ - KelvinState *kelvin; switch (obj->graphics_class) { case NV_KELVIN_PRIMITIVE: - kelvin = &obj->data.kelvin; - - /* generate vertex programs */ - for (i = 0; i < NV2A_VERTEXSHADER_SLOTS; i++) { - VertexShader *shader = &kelvin->vertexshaders[i]; - glGenProgramsARB(1, &shader->gl_program); - } - assert(glGetError() == GL_NO_ERROR); - - /* temp hack? */ - kelvin->vertex_attributes[NV2A_VERTEX_ATTR_DIFFUSE].inline_value = 0xFFFFFFF; - + // kelvin->vertex_attributes[NV2A_VERTEX_ATTR_DIFFUSE].inline_value = 0xFFFFFFF; break; default: break; @@ -1260,19 +1243,21 @@ static GraphicsObject* lookup_graphics_object(PGRAPHState *s, } -static void kelvin_bind_converted_vertex_attributes(NV2AState *d, +static void pgraph_bind_converted_vertex_attributes(NV2AState *d, KelvinState *kelvin, bool inline_data, unsigned int num_elements) { int i, j; - for (i=0; ivertex_attributes[i]; - if (attribute->count && attribute->needs_conversion) { + PGRAPHState *pg = &d->pgraph; + for (i=0; ivertex_attributes[i]; + if (attribute->count && attribute->needs_conversion) { + NV2A_DPRINTF("converted %d\n", i); uint8_t *data; if (inline_data) { - data = (uint8_t*)kelvin->inline_array + data = (uint8_t*)pg->inline_array + attribute->inline_array_offset; } else { hwaddr dma_len; @@ -1322,12 +1307,13 @@ static void kelvin_bind_converted_vertex_attributes(NV2AState *d, } } -static unsigned int kelvin_bind_inline_array(KelvinState *kelvin) +static unsigned int pgraph_bind_inline_array(PGRAPHState *pg) { int i; + unsigned int offset = 0; for (i=0; ivertex_attributes[i]; + VertexAttribute *attribute = &pg->vertex_attributes[i]; if (attribute->count) { glEnableVertexAttribArray(i); @@ -1340,7 +1326,7 @@ static unsigned int kelvin_bind_inline_array(KelvinState *kelvin) attribute->gl_type, attribute->gl_normalize, attribute->stride, - (uint8_t*)kelvin->inline_array + offset); + (uint8_t*)pg->inline_array + offset); } offset += attribute->size * attribute->count; @@ -1349,13 +1335,13 @@ static unsigned int kelvin_bind_inline_array(KelvinState *kelvin) return offset; } -static void kelvin_bind_vertex_attributes(NV2AState *d, - KelvinState *kelvin) +static void pgraph_bind_vertex_attributes(NV2AState *d, KelvinState *kelvin) { int i; + PGRAPHState *pg = &d->pgraph; for (i=0; ivertex_attributes[i]; + VertexAttribute *attribute = &pg->vertex_attributes[i]; if (attribute->count) { glEnableVertexAttribArray(i); @@ -1387,157 +1373,6 @@ static void kelvin_bind_vertex_attributes(NV2AState *d, } } -static void kelvin_bind_vertex_program(KelvinState *kelvin) -{ - int i; - VertexShader *shader; - - shader = &kelvin->vertexshaders[kelvin->vertexshader_start_slot]; - - glBindProgramARB(GL_VERTEX_PROGRAM_ARB, shader->gl_program); - - if (shader->dirty) { - QString *program_code = vsh_translate(VSH_VERSION_XVS, - shader->program_data, - shader->program_length); - const char* program_code_str = qstring_get_str(program_code); - - NV2A_DPRINTF("bind vertex program %d, code:\n%s\n", - kelvin->vertexshader_start_slot, - program_code_str); - - glProgramStringARB(GL_VERTEX_PROGRAM_ARB, - GL_PROGRAM_FORMAT_ASCII_ARB, - strlen(program_code_str), - program_code_str); - - /* Check it compiled */ - GLint pos; - glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &pos); - if (pos != -1) { - fprintf(stderr, "nv2a: vertex program compilation failed:\n" - " pos %d, %s\n", - pos, glGetString(GL_PROGRAM_ERROR_STRING_ARB)); - fprintf(stderr, "ucode:\n"); - for (i=0; iprogram_length; i++) { - fprintf(stderr, " 0x%08x,\n", shader->program_data[i]); - } - abort(); - } - - /* Check we're within resource limits */ - GLint native; - glGetProgramivARB(GL_VERTEX_PROGRAM_ARB, - GL_PROGRAM_UNDER_NATIVE_LIMITS_ARB, - &native); - assert(native); - - assert(glGetError() == GL_NO_ERROR); - - QDECREF(program_code); - shader->dirty = false; - } - - /* load constants */ - for (i=0; iconstants[i]; - if (!constant->dirty) continue; - - glProgramEnvParameter4fvARB(GL_VERTEX_PROGRAM_ARB, - i, - (const GLfloat*)constant->data); - constant->dirty = false; - } - - assert(glGetError() == GL_NO_ERROR); -} - - -static void unswizzle_rect( - uint8_t *src_buf, - unsigned int width, - unsigned int height, - unsigned int depth, - uint8_t *dst_buf, - unsigned int pitch, - unsigned int bytes_per_pixel) -{ - unsigned int offset_u = 0, offset_v = 0, offset_w = 0; - uint32_t mask_u = 0, mask_v = 0, mask_w = 0; - - unsigned int i = 1, j = 1; - - while( (i <= width) || (i <= height) || (i <= depth) ) { - if(i < width) { - mask_u |= j; - j<<=1; - } - if(i < height) { - mask_v |= j; - j<<=1; - } - if(i < depth) { - mask_w |= j; - j<<=1; - } - i<<=1; - } - - uint32_t start_u = 0; - uint32_t start_v = 0; - uint32_t start_w = 0; - uint32_t mask_max = 0; - - // get the biggest mask - if(mask_u > mask_v) - mask_max = mask_u; - else - mask_max = mask_v; - if(mask_w > mask_max) - mask_max = mask_w; - - for(i = 1; i <= mask_max; i<<=1) { - if(i<=mask_u) { - if(mask_u & i) start_u |= (offset_u & i); - else offset_u <<= 1; - } - - if(i <= mask_v) { - if(mask_v & i) start_v |= (offset_v & i); - else offset_v<<=1; - } - - if(i <= mask_w) { - if(mask_w & i) start_w |= (offset_w & i); - else offset_w <<= 1; - } - } - - uint32_t w = start_w; - unsigned int z; - for(z=0; zregs[NV_PGRAPH_CSV0_D], + NV_PGRAPH_CSV0_D_MODE) == 2; + bool fixed_function = GET_MASK(pg->regs[NV_PGRAPH_CSV0_D], NV_PGRAPH_CSV0_D_MODE) == 0; @@ -1854,8 +1722,30 @@ static void pgraph_bind_shaders(PGRAPHState *pg) /* fixed function stuff */ .fixed_function = fixed_function, + + /* vertex program stuff */ + .vertex_program = vertex_program, }; + state.program_length = 0; + memset(state.program_data, 0, sizeof(state.program_data)); + + if (vertex_program) { + // copy in vertex program tokens + for (i = pg->program_start; + i < NV2A_MAX_TRANSFORM_PROGRAM_LENGTH; + i += VSH_TOKEN_SIZE) { + uint32_t *cur_token = pg->program_data + i; + memcpy(state.program_data + state.program_length, + cur_token, + VSH_TOKEN_SIZE * sizeof(uint32_t)); + state.program_length += VSH_TOKEN_SIZE; + + if (vsh_get_field(cur_token, FLD_FINAL)) { + break; + } + } + } for (i = 0; i < 8; i++) { state.rgb_inputs[i] = pg->regs[NV_PGRAPH_COMBINECOLORI0 + i * 4]; @@ -1921,9 +1811,11 @@ static void pgraph_bind_shaders(PGRAPHState *pg) } } - /* update fixed function composite matrix */ if (fixed_function) { + /* update fixed function composite matrix */ + GLint comLoc = glGetUniformLocation(pg->gl_program, "composite"); + assert(comLoc != -1); glUniformMatrix4fv(comLoc, 1, GL_FALSE, pg->composite_matrix); @@ -1951,8 +1843,23 @@ static void pgraph_bind_shaders(PGRAPHState *pg) }; GLint viewLoc = glGetUniformLocation(pg->gl_program, "invViewport"); + assert(viewLoc != -1); glUniformMatrix4fv(viewLoc, 1, GL_FALSE, &invViewport[0]); + } else if (vertex_program) { + /* update vertex program constants */ + + for (i=0; iconstants[i]; + + char tmp[8]; + snprintf(tmp, sizeof(tmp), "c[%d]", i); + GLint loc = glGetUniformLocation(pg->gl_program, tmp); + //assert(loc != -1); + if (loc != -1) { + glUniform4fv(loc, 1, (const GLfloat*)constant->data); + } + } } pg->shaders_dirty = false; @@ -2196,7 +2103,6 @@ static void pgraph_method(NV2AState *d, unsigned int slot; VertexAttribute *vertex_attribute; - VertexShader *vertexshader; VertexShaderConstant *constant; PGRAPHState *pg = &d->pgraph; @@ -2481,8 +2387,8 @@ static void pgraph_method(NV2AState *d, slot = (class_method - NV097_SET_VIEWPORT_OFFSET) / 4; /* populate magic viewport offset constant */ - kelvin->constants[59].data[slot] = parameter; - kelvin->constants[59].dirty = true; + pg->constants[59].data[slot] = parameter; + pg->constants[59].dirty = true; break; case NV097_SET_COMBINER_FACTOR0 ... @@ -2519,30 +2425,31 @@ static void pgraph_method(NV2AState *d, slot = (class_method - NV097_SET_VIEWPORT_SCALE) / 4; /* populate magic viewport scale constant */ - kelvin->constants[58].data[slot] = parameter; - kelvin->constants[58].dirty = true; + pg->constants[58].data[slot] = parameter; + pg->constants[58].dirty = true; break; case NV097_SET_TRANSFORM_PROGRAM ... NV097_SET_TRANSFORM_PROGRAM + 0x7c: - slot = (class_method - NV097_SET_TRANSFORM_PROGRAM) / 4; - /* TODO: It should still work using a non-increasing slot??? */ + // slot = (class_method - NV097_SET_TRANSFORM_PROGRAM) / 4; - vertexshader = &kelvin->vertexshaders[kelvin->vertexshader_load_slot]; - assert(vertexshader->program_length < NV2A_MAX_VERTEXSHADER_LENGTH); - vertexshader->program_data[ - vertexshader->program_length++] = parameter; + assert(pg->program_load < NV2A_MAX_TRANSFORM_PROGRAM_LENGTH); + pg->program_data[pg->program_load++] = parameter; + pg->shaders_dirty = true; break; case NV097_SET_TRANSFORM_CONSTANT ... NV097_SET_TRANSFORM_CONSTANT + 0x7c: - slot = (class_method - NV097_SET_TRANSFORM_CONSTANT) / 4; + // slot = (class_method - NV097_SET_TRANSFORM_CONSTANT) / 4; - constant = &kelvin->constants[kelvin->constant_load_slot+slot/4]; - constant->data[slot%4] = parameter; + assert((pg->constant_load_slot/4) < NV2A_VERTEXSHADER_CONSTANTS); + constant = &pg->constants[pg->constant_load_slot/4]; + constant->data[pg->constant_load_slot%4] = parameter; constant->dirty = true; + + pg->constant_load_slot++; break; case NV097_SET_VERTEX4F ... @@ -2550,16 +2457,16 @@ static void pgraph_method(NV2AState *d, slot = (class_method - NV097_SET_VERTEX4F) / 4; - assert(kelvin->inline_buffer_length < NV2A_MAX_BATCH_LENGTH); + assert(pg->inline_buffer_length < NV2A_MAX_BATCH_LENGTH); InlineVertexBufferEntry *entry = - &kelvin->inline_buffer[kelvin->inline_buffer_length]; + &pg->inline_buffer[pg->inline_buffer_length]; entry->position[slot] = parameter; if (slot == 3) { entry->diffuse = - kelvin->vertex_attributes[NV2A_VERTEX_ATTR_DIFFUSE].inline_value; - kelvin->inline_buffer_length++; + pg->vertex_attributes[NV2A_VERTEX_ATTR_DIFFUSE].inline_value; + pg->inline_buffer_length++; } break; } @@ -2568,7 +2475,7 @@ static void pgraph_method(NV2AState *d, NV097_SET_VERTEX_DATA_ARRAY_FORMAT + 0x3c: slot = (class_method - NV097_SET_VERTEX_DATA_ARRAY_FORMAT) / 4; - vertex_attribute = &kelvin->vertex_attributes[slot]; + vertex_attribute = &pg->vertex_attributes[slot]; vertex_attribute->format = GET_MASK(parameter, NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE); @@ -2633,26 +2540,27 @@ static void pgraph_method(NV2AState *d, slot = (class_method - NV097_SET_VERTEX_DATA_ARRAY_OFFSET) / 4; - kelvin->vertex_attributes[slot].dma_select = + pg->vertex_attributes[slot].dma_select = parameter & 0x80000000; - kelvin->vertex_attributes[slot].offset = + pg->vertex_attributes[slot].offset = parameter & 0x7fffffff; - kelvin->vertex_attributes[slot].converted_elements = 0; + pg->vertex_attributes[slot].converted_elements = 0; break; case NV097_SET_BEGIN_END: if (parameter == NV097_SET_BEGIN_END_OP_END) { - if (kelvin->inline_buffer_length) { + if (pg->inline_buffer_length) { glEnableVertexAttribArray(NV2A_VERTEX_ATTR_POSITION); glVertexAttribPointer(NV2A_VERTEX_ATTR_POSITION, 4, GL_FLOAT, GL_FALSE, sizeof(InlineVertexBufferEntry), - kelvin->inline_buffer); + pg->inline_buffer); + glEnableVertexAttribArray(NV2A_VERTEX_ATTR_DIFFUSE); glVertexAttribPointer(NV2A_VERTEX_ATTR_DIFFUSE, @@ -2660,36 +2568,38 @@ static void pgraph_method(NV2AState *d, GL_UNSIGNED_BYTE, GL_TRUE, sizeof(InlineVertexBufferEntry), - &kelvin->inline_buffer[0].diffuse); + &pg->inline_buffer[0].diffuse); - glDrawArrays(kelvin->gl_primitive_mode, - 0, kelvin->inline_buffer_length); - } else if (kelvin->inline_array_length) { + glDrawArrays(pg->gl_primitive_mode, + 0, pg->inline_buffer_length); + } else if (pg->inline_array_length) { unsigned int vertex_size = - kelvin_bind_inline_array(kelvin); + pgraph_bind_inline_array(pg); unsigned int index_count = - kelvin->inline_array_length*4 / vertex_size; + pg->inline_array_length*4 / vertex_size; - kelvin_bind_converted_vertex_attributes(d, kelvin, - true, index_count); - glDrawArrays(kelvin->gl_primitive_mode, + NV2A_DPRINTF("draw inline array %d, %d\n", vertex_size, index_count); + + pgraph_bind_converted_vertex_attributes(d, + kelvin, true, index_count); + glDrawArrays(pg->gl_primitive_mode, 0, index_count); - } else if (kelvin->inline_elements_length) { + } else if (pg->inline_elements_length) { uint32_t max_element = 0; uint32_t min_element = (uint32_t)-1; - for (i=0; iinline_elements_length; i++) { - max_element = MAX(kelvin->inline_elements[i], max_element); - min_element = MIN(kelvin->inline_elements[i], min_element); + for (i=0; iinline_elements_length; i++) { + max_element = MAX(pg->inline_elements[i], max_element); + min_element = MIN(pg->inline_elements[i], min_element); } - kelvin_bind_converted_vertex_attributes(d, kelvin, - false, max_element+1); - glDrawElements(kelvin->gl_primitive_mode, - kelvin->inline_elements_length, + pgraph_bind_converted_vertex_attributes(d, + kelvin, false, max_element+1); + glDrawElements(pg->gl_primitive_mode, + pg->inline_elements_length, GL_UNSIGNED_INT, - kelvin->inline_elements); + pg->inline_elements); }/* else { assert(false); }*/ @@ -2699,26 +2609,18 @@ static void pgraph_method(NV2AState *d, pgraph_update_surface(d, true); - bool use_vertex_program = GET_MASK(pg->regs[NV_PGRAPH_CSV0_D], - NV_PGRAPH_CSV0_D_MODE) == 2; - if (use_vertex_program) { - glEnable(GL_VERTEX_PROGRAM_ARB); - kelvin_bind_vertex_program(kelvin); - } else { - glDisable(GL_VERTEX_PROGRAM_ARB); - } - pgraph_bind_shaders(pg); pgraph_bind_textures(d); - kelvin_bind_vertex_attributes(d, kelvin); + pgraph_bind_vertex_attributes(d, kelvin); - kelvin->gl_primitive_mode = kelvin_primitive_map[parameter]; - kelvin->inline_elements_length = 0; - kelvin->inline_array_length = 0; - kelvin->inline_buffer_length = 0; + pg->gl_primitive_mode = kelvin_primitive_map[parameter]; + + pg->inline_elements_length = 0; + pg->inline_array_length = 0; + pg->inline_buffer_length = 0; } pg->surface_color.draw_dirty = true; break; @@ -2788,38 +2690,38 @@ static void pgraph_method(NV2AState *d, break; case NV097_ARRAY_ELEMENT16: - assert(kelvin->inline_elements_length < NV2A_MAX_BATCH_LENGTH); - kelvin->inline_elements[ - kelvin->inline_elements_length++] = parameter & 0xFFFF; - kelvin->inline_elements[ - kelvin->inline_elements_length++] = parameter >> 16; + assert(pg->inline_elements_length < NV2A_MAX_BATCH_LENGTH); + pg->inline_elements[ + pg->inline_elements_length++] = parameter & 0xFFFF; + pg->inline_elements[ + pg->inline_elements_length++] = parameter >> 16; break; case NV097_ARRAY_ELEMENT32: - assert(kelvin->inline_elements_length < NV2A_MAX_BATCH_LENGTH); - kelvin->inline_elements[ - kelvin->inline_elements_length++] = parameter; + assert(pg->inline_elements_length < NV2A_MAX_BATCH_LENGTH); + pg->inline_elements[ + pg->inline_elements_length++] = parameter; break; case NV097_DRAW_ARRAYS: { unsigned int start = GET_MASK(parameter, NV097_DRAW_ARRAYS_START_INDEX); unsigned int count = GET_MASK(parameter, NV097_DRAW_ARRAYS_COUNT)+1; - kelvin_bind_converted_vertex_attributes(d, kelvin, + pgraph_bind_converted_vertex_attributes(d, kelvin, false, start + count); - glDrawArrays(kelvin->gl_primitive_mode, start, count); + glDrawArrays(pg->gl_primitive_mode, start, count); break; } case NV097_INLINE_ARRAY: - assert(kelvin->inline_array_length < NV2A_MAX_BATCH_LENGTH); - kelvin->inline_array[ - kelvin->inline_array_length++] = parameter; + assert(pg->inline_array_length < NV2A_MAX_BATCH_LENGTH); + pg->inline_array[ + pg->inline_array_length++] = parameter; break; case NV097_SET_VERTEX_DATA4UB ... NV097_SET_VERTEX_DATA4UB + 0x3c: slot = (class_method - NV097_SET_VERTEX_DATA4UB) / 4; - kelvin->vertex_attributes[slot].inline_value = parameter; + pg->vertex_attributes[slot].inline_value = parameter; break; case NV097_SET_SEMAPHORE_OFFSET: @@ -2946,27 +2848,20 @@ static void pgraph_method(NV2AState *d, GET_MASK(parameter, NV_097_SET_TRANSFORM_EXECUTION_MODE_RANGE_MODE)); break; case NV097_SET_TRANSFORM_PROGRAM_CXT_WRITE_EN: - kelvin->enable_vertex_program_write = parameter; + pg->enable_vertex_program_write = parameter; break; case NV097_SET_TRANSFORM_PROGRAM_LOAD: - assert(parameter < NV2A_VERTEXSHADER_SLOTS); - kelvin->vertexshader_load_slot = parameter; - kelvin->vertexshaders[parameter].program_length = 0; /* ??? */ - kelvin->vertexshaders[parameter].dirty = true; + assert(parameter < NV2A_MAX_TRANSFORM_PROGRAM_LENGTH); + pg->program_load = parameter * VSH_TOKEN_SIZE; break; case NV097_SET_TRANSFORM_PROGRAM_START: - assert(parameter < NV2A_VERTEXSHADER_SLOTS); - /* if the shader changed, dirty all the constants */ - if (parameter != kelvin->vertexshader_start_slot) { - for (i=0; iconstants[i].dirty = true; - } - } - kelvin->vertexshader_start_slot = parameter; + assert(parameter < NV2A_MAX_TRANSFORM_PROGRAM_LENGTH); + pg->program_start = parameter * VSH_TOKEN_SIZE; + pg->shaders_dirty = true; break; case NV097_SET_TRANSFORM_CONSTANT_LOAD: assert(parameter < NV2A_VERTEXSHADER_CONSTANTS); - kelvin->constant_load_slot = parameter; + pg->constant_load_slot = parameter * 4; NV2A_DPRINTF("load to %d\n", parameter); break; diff --git a/hw/xbox/nv2a_vsh.c b/hw/xbox/nv2a_vsh.c index eb3aefe988..bd1a6b419e 100644 --- a/hw/xbox/nv2a_vsh.c +++ b/hw/xbox/nv2a_vsh.c @@ -1,6 +1,7 @@ /* * QEMU Geforce NV2A vertex shader translation * + * Copyright (c) 2014 Jannik Vogel * Copyright (c) 2012 espes * * Based on: @@ -33,52 +34,6 @@ #define VSH_D3DSCM_CORRECTION 96 -#define VSH_TOKEN_SIZE 4 - -typedef enum { - FLD_ILU = 0, - FLD_MAC, - FLD_CONST, - FLD_V, - // Input A - FLD_A_NEG, - FLD_A_SWZ_X, - FLD_A_SWZ_Y, - FLD_A_SWZ_Z, - FLD_A_SWZ_W, - FLD_A_R, - FLD_A_MUX, - // Input B - FLD_B_NEG, - FLD_B_SWZ_X, - FLD_B_SWZ_Y, - FLD_B_SWZ_Z, - FLD_B_SWZ_W, - FLD_B_R, - FLD_B_MUX, - // Input C - FLD_C_NEG, - FLD_C_SWZ_X, - FLD_C_SWZ_Y, - FLD_C_SWZ_Z, - FLD_C_SWZ_W, - FLD_C_R_HIGH, - FLD_C_R_LOW, - FLD_C_MUX, - // Output - FLD_OUT_MAC_MASK, - FLD_OUT_R, - FLD_OUT_ILU_MASK, - FLD_OUT_O_MASK, - FLD_OUT_ORB, - FLD_OUT_ADDRESS, - FLD_OUT_MUX, - // Relative addressing - FLD_A0X, - // Final instruction - FLD_FINAL -} VshFieldName; - typedef enum { PARAM_UNKNOWN = 0, @@ -222,7 +177,6 @@ static const VshOpcodeParams mac_opcode_params[] = { }; - static const char* mask_str[] = { // xyzw xyzw "", // 0000 ____ @@ -240,7 +194,7 @@ static const char* mask_str[] = { ".xy", // 1100 xy__ ".xyw", // 1101 xy_w ".xyz", // 1110 xyz_ - ""//.xyzw 1111 xyzw + ".xyzw" // 1111 xyzw }; /* Note: OpenGL seems to be case-sensitive, and requires upper-case opcodes! */ @@ -265,7 +219,7 @@ static const char* ilu_opcode[] = { "NOP", "MOV", "RCP", - "RCP", // Was RCC + "RCC", "RSQ", "EXP", "LOG", @@ -284,7 +238,7 @@ static bool ilu_force_scalar[] = { }; static const char* out_reg_name[] = { - "R12", // "oPos", + "oPos", "???", "???", "oD0", @@ -312,7 +266,8 @@ static int vsh_get_from_token(uint32_t *shader_token, { return (shader_token[subtoken] >> start_bit) & ~(0xFFFFFFFF << bit_length); } -static uint8_t vsh_get_field(uint32_t *shader_token, VshFieldName field_name) + +uint8_t vsh_get_field(uint32_t *shader_token, VshFieldName field_name) { return (uint8_t)(vsh_get_from_token(shader_token, @@ -327,7 +282,7 @@ static int16_t convert_c_register(const int16_t c_reg) { int16_t r = ((((c_reg >> 5) & 7) - 3) * 32) + (c_reg & 31); r += VSH_D3DSCM_CORRECTION; /* to map -96..95 to 0..191 */ - return r; + return r; //FIXME: = c_reg?! } @@ -341,7 +296,7 @@ static QString* decode_swizzle(uint32_t *shader_token, /* some microcode instructions force a scalar value */ if (swizzle_field == FLD_C_SWZ_X && ilu_force_scalar[vsh_get_field(shader_token, FLD_ILU)]) { - x = y = z = w = x = vsh_get_field(shader_token, swizzle_field); + x = y = z = w = vsh_get_field(shader_token, swizzle_field); } else { x = vsh_get_field(shader_token, swizzle_field++); y = vsh_get_field(shader_token, swizzle_field++); @@ -352,21 +307,21 @@ static QString* decode_swizzle(uint32_t *shader_token, if (x == SWIZZLE_X && y == SWIZZLE_Y && z == SWIZZLE_Z && w == SWIZZLE_W) { /* Don't print the swizzle if it's .xyzw */ - return qstring_from_str(""); + return qstring_from_str(""); // Will turn ".xyzw" into "." /* Don't print duplicates */ } else if (x == y && y == z && z == w) { return qstring_from_str((char[]){'.', swizzle_str[x], '\0'}); - } else if (x == y && z == w) { + } else if (y == z && z == w) { return qstring_from_str((char[]){'.', swizzle_str[x], swizzle_str[y], '\0'}); - } /*else if (z == w) { + } else if (z == w) { return qstring_from_str((char[]){'.', swizzle_str[x], swizzle_str[y], swizzle_str[z], '\0'}); - }*/ else { + } else { return qstring_from_str((char[]){'.', swizzle_str[x], swizzle_str[y], swizzle_str[z], swizzle_str[w], - '\0'}); + '\0'}); // Normal swizzle mask } } @@ -400,12 +355,14 @@ static QString* decode_opcode_input(uint32_t *shader_token, case PARAM_C: reg_num = convert_c_register(vsh_get_field(shader_token, FLD_CONST)); if (vsh_get_field(shader_token, FLD_A0X) > 0) { + //FIXME: does this really require the "correction" doe in convert_c_register?! snprintf(tmp, sizeof(tmp), "c[A0+%d]", reg_num); } else { snprintf(tmp, sizeof(tmp), "c[%d]", reg_num); } break; default: + printf("Param: 0x%x\n", param); assert(false); } qstring_append(ret_str, tmp); @@ -444,16 +401,18 @@ static QString* decode_opcode(uint32_t *shader_token, if (mask > 0) { if (strcmp(opcode, mac_opcode[MAC_ARL]) == 0) { - qstring_append(ret, opcode); + qstring_append(ret, " ARL(a0"); qstring_append(ret, qstring_get_str(inputs)); qstring_append(ret, ";\n"); } else { + qstring_append(ret, " "); qstring_append(ret, opcode); - qstring_append(ret, " R"); + qstring_append(ret, "("); + qstring_append(ret, "R"); qstring_append_int(ret, reg_num); qstring_append(ret, mask_str[mask]); qstring_append(ret, qstring_get_str(inputs)); - qstring_append(ret, ";\n"); + qstring_append(ret, ");\n"); } } @@ -462,15 +421,17 @@ static QString* decode_opcode(uint32_t *shader_token, /* Only if it's not masked away: */ && vsh_get_field(shader_token, FLD_OUT_O_MASK) != 0) { + qstring_append(ret, " "); qstring_append(ret, opcode); + qstring_append(ret, "("); + if (vsh_get_field(shader_token, FLD_OUT_ORB) == OUTPUT_C) { /* TODO : Emulate writeable const registers */ - qstring_append(ret, " c"); + qstring_append(ret, "c"); qstring_append_int(ret, convert_c_register( vsh_get_field(shader_token, FLD_OUT_ADDRESS))); } else { - qstring_append_chr(ret, ' '); qstring_append(ret, out_reg_name[ vsh_get_field(shader_token, FLD_OUT_ADDRESS) & 0xF]); @@ -479,7 +440,7 @@ static QString* decode_opcode(uint32_t *shader_token, mask_str[ vsh_get_field(shader_token, FLD_OUT_O_MASK)]); qstring_append(ret, qstring_get_str(inputs)); - qstring_append(ret, ";\n"); + qstring_append(ret, ");\n"); } return ret; @@ -563,141 +524,285 @@ static QString* decode_token(uint32_t *shader_token) return ret; } -/* Vertex shader header, mapping Xbox1 registers to the ARB syntax (original - * version by KingOfC). Note about the use of 'conventional' attributes in here: - * Since we prefer to use only one shader for both immediate and deferred mode - * rendering, we alias all attributes to conventional inputs as much as possible. - * Only when there's no conventional attribute available, we use generic - * attributes. So in the following header, we use conventional attributes first, - * and generic attributes for the rest of the vertex attribute slots. This makes - * it possible to support immediate and deferred mode rendering with the same - * shader, and the use of the OpenGL fixed-function pipeline without a shader. - */ static const char* vsh_header = - "!!ARBvp1.0\n" - "TEMP R0,R1,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12;\n" - "ADDRESS A0;\n" + "#version 110\n" + "\n" + "attribute vec4 v0;\n" + "attribute vec4 v1;\n" + "attribute vec4 v2;\n" + "attribute vec4 v3;\n" + "attribute vec4 v4;\n" + "attribute vec4 v5;\n" + "attribute vec4 v6;\n" + "attribute vec4 v7;\n" + "attribute vec4 v8;\n" + "attribute vec4 v9;\n" + "attribute vec4 v10;\n" + "attribute vec4 v11;\n" + "attribute vec4 v12;\n" + "attribute vec4 v13;\n" + "attribute vec4 v14;\n" + "attribute vec4 v15;\n" + "\n" + //FIXME: What is a0 initialized as? + "int A0 = 0;\n" + "\n" + //FIXME: I just assumed this is true for all registers?! + "vec4 R0 = vec4(0.0,0.0,0.0,1.0);\n" + "vec4 R1 = vec4(0.0,0.0,0.0,1.0);\n" + "vec4 R2 = vec4(0.0,0.0,0.0,1.0);\n" + "vec4 R3 = vec4(0.0,0.0,0.0,1.0);\n" + "vec4 R4 = vec4(0.0,0.0,0.0,1.0);\n" + "vec4 R5 = vec4(0.0,0.0,0.0,1.0);\n" + "vec4 R6 = vec4(0.0,0.0,0.0,1.0);\n" + "vec4 R7 = vec4(0.0,0.0,0.0,1.0);\n" + "vec4 R8 = vec4(0.0,0.0,0.0,1.0);\n" + "vec4 R9 = vec4(0.0,0.0,0.0,1.0);\n" + "vec4 R10 = vec4(0.0,0.0,0.0,1.0);\n" + "vec4 R11 = vec4(0.0,0.0,0.0,1.0);\n" + "vec4 R12 = vec4(0.0,0.0,0.0,1.0);\n" + "\n" + "#define oPos R12\n" /* oPos is a mirror of R12 */ + "vec4 oD0 = vec4(0.0,0.0,0.0,1.0);\n" + "vec4 oD1 = vec4(0.0,0.0,0.0,1.0);\n" + "vec4 oB0 = vec4(0.0,0.0,0.0,1.0);\n" + "vec4 oB1 = vec4(0.0,0.0,0.0,1.0);\n" + "vec4 oPts = vec4(0.0,0.0,0.0,1.0);\n" + "vec4 oFog = vec4(0.0,0.0,0.0,1.0);\n" + "vec4 oT0 = vec4(0.0,0.0,0.0,1.0);\n" + "vec4 oT1 = vec4(0.0,0.0,0.0,1.0);\n" + "vec4 oT2 = vec4(0.0,0.0,0.0,1.0);\n" + "vec4 oT3 = vec4(0.0,0.0,0.0,1.0);\n" + "\n" + + /* All constants in 1 array declaration */ + "uniform vec4 c[192];\n" + "#define viewport_scale c[58]\n" + "#define viewport_offset c[59]\n" + "uniform vec2 cliprange;\n" + + /* See: + * http://msdn.microsoft.com/en-us/library/windows/desktop/bb174703%28v=vs.85%29.aspx + * https://www.opengl.org/registry/specs/NV/vertex_program1_1.txt + */ + "/* Converts number of components of rvalue to lvalue */\n" + "float _out(float l, vec4 r) { return r.x; }\n" + "vec2 _out(vec2 l, vec4 r) { return r.xy; }\n" + "vec3 _out(vec3 l, vec4 r) { return r.xyz; }\n" + "vec4 _out(vec4 l, vec4 r) { return r.xyzw; }\n" + "\n" +//QQQ #ifdef NICE_CODE + "/* Converts the input to vec4, pads with last component */\n" + "vec4 _in(float v) { return vec4(v); }\n" + "vec4 _in(vec2 v) { return v.xyyy; }\n" + "vec4 _in(vec3 v) { return v.xyzz; }\n" + "vec4 _in(vec4 v) { return v.xyzw; }\n" +//#else +// "/* Make sure input is always a vec4 */\n" +// "#define _in(v) vec4(v)\n" +//#endif + "\n" + "#define MOV(dest, src) dest = _out(dest,_MOV(_in(src)))\n" + "vec4 _MOV(vec4 src)\n" + "{\n" + " return src;\n" + "}\n" + "\n" + "#define MUL(dest, src0, src1) dest = _out(dest,_MUL(_in(src0), _in(src1)))\n" + "vec4 _MUL(vec4 src0, vec4 src1)\n" + "{\n" + " return src0 * src1;\n" + "}\n" + "\n" + "#define ADD(dest, src0, src1) dest = _out(dest,_ADD(_in(src0), _in(src1)))\n" + "vec4 _ADD(vec4 src0, vec4 src1)\n" + "{\n" + " return src0 + src1;\n" + "}\n" + "\n" + "#define MAD(dest, src0, src1, src2) dest = _out(dest,_MAD(_in(src0), _in(src1), _in(src2)))\n" + "vec4 _MAD(vec4 src0, vec4 src1, vec4 src2)\n" + "{\n" + " return src0 * src1 + src2;\n" + "}\n" + "\n" + "#define DP3(dest, src0, src1) dest = _out(dest,_DP3(_in(src0), _in(src1)))\n" + "vec4 _DP3(vec4 src0, vec4 src1)\n" + "{\n" + " return vec4(dot(src0.xyz, src1.xyz));\n" + "}\n" + "\n" + "#define DPH(dest, src0, src1) dest = _out(dest,_DPH(_in(src0), _in(src1)))\n" + "vec4 _DPH(vec4 src0, vec4 src1)\n" + "{\n" + " return vec4(dot(vec4(src0.xyz, 1.0), src1));\n" + "}\n" + "\n" + "#define DP4(dest, src0, src1) dest = _out(dest,_DP4(_in(src0), _in(src1)))\n" + "vec4 _DP4(vec4 src0, vec4 src1)\n" + "{\n" + " return vec4(dot(src0, src1));\n" + "}\n" + "\n" + "#define DST(dest, src0, src1) dest = _out(dest,_DST(_in(src0), _in(src1)))\n" + "vec4 _DST(vec4 src0, vec4 src1)\n" + "{\n" + " return vec4(1.0,\n" + " src0.y * src1.y,\n" + " src0.z,\n" + " src1.w);\n" + "}\n" + "\n" + "#define MIN(dest, src0, src1) dest = _out(dest,_MIN(_in(src0), _in(src1)))\n" + "vec4 _MIN(vec4 src0, vec4 src1)\n" + "{\n" + " return min(src0, src1);\n" + "}\n" + "\n" + "#define MAX(dest, src0, src1) dest = _out(dest,_MAX(_in(src0), _in(src1)))\n" + "vec4 _MAX(vec4 src0, vec4 src1)\n" + "{\n" + " return max(src0, src1);\n" + "}\n" + "\n" + "#define SLT(dest, src0, src1) dest = _out(dest,_SLT(_in(src0), _in(src1)))\n" + "vec4 _SLT(vec4 src0, vec4 src1)\n" + "{\n" + " return vec4(lessThan(src0, src1));\n" + "}\n" + "\n" + "#define ARL(dest, src) dest = _ARL(_in(src).x)\n" + "int _ARL(float src)\n" + "{\n" + " return int(src);\n" + "}\n" + "\n" + "#define SGE(dest, src0, src1) dest = _out(dest,_SGE(_in(src0), _in(src1)))\n" + "vec4 _SGE(vec4 src0, vec4 src1)\n" + "{\n" + " return vec4(greaterThanEqual(src0, src1));\n" + "}\n" + "\n" + "#define RCP(dest, src) dest = _out(dest,_RCP(_in(src).x))\n" + "vec4 _RCP(float src)\n" + "{\n" + " return vec4(1.0 / src);\n" + "}\n" + "\n" + "#define RCC(dest, src) dest = _out(dest,_RCC(_in(src).x))\n" + "vec4 _RCC(float src)\n" + "{\n" + " float t = 1.0 / src;\n" + " if (t > 0.0) {\n" + " t = clamp(t, 5.42101e-020, 1.884467e+019);\n" + " } else {\n" + " t = clamp(t, -1.884467e+019, -5.42101e-020);\n" + " }\n" + " return vec4(t);\n" + "}\n" + "\n" + "#define RSQ(dest, src) dest = _out(dest,_RSQ(_in(src).x))\n" + "vec4 _RSQ(float src)\n" + "{\n" + " return vec4(inversesqrt(src));\n" + "}\n" + "\n" + "#define EXP(dest, src) dest = _out(dest,_EXP(_in(src).x))\n" + "vec4 _EXP(float src)\n" + "{\n" + " return vec4(exp2(src));\n" + "}\n" + "\n" + "#define LOG(dest, src) dest = _out(dest,_LOG(_in(src).x))\n" + "vec4 _LOG(float src)\n" + "{\n" + " return vec4(log2(src));\n" + "}\n" + "\n" + "#define LIT(dest, src) dest = _out(dest,_LIT(_in(src)))\n" + "vec4 _LIT(vec4 src)\n" + "{\n" + " vec4 t = vec4(1.0, 0.0, 0.0, 1.0);\n" + " float power = src.w;\n" #if 0 - "ATTRIB v0 = vertex.position;" // (See "conventional" note above) - "ATTRIB v1 = vertex.%s;" // Note : We replace this with "weight" or "attrib[1]" depending GL_ARB_vertex_blend - "ATTRIB v2 = vertex.normal;" - "ATTRIB v3 = vertex.color.primary;" - "ATTRIB v4 = vertex.color.secondary;" - "ATTRIB v5 = vertex.fogcoord;" - "ATTRIB v6 = vertex.attrib[6];" - "ATTRIB v7 = vertex.attrib[7];" - "ATTRIB v8 = vertex.texcoord[0];" - "ATTRIB v9 = vertex.texcoord[1];" - "ATTRIB v10 = vertex.texcoord[2];" - "ATTRIB v11 = vertex.texcoord[3];" -#else - "ATTRIB v0 = vertex.attrib[0];\n" - "ATTRIB v1 = vertex.attrib[1];\n" - "ATTRIB v2 = vertex.attrib[2];\n" - "ATTRIB v3 = vertex.attrib[3];\n" - "ATTRIB v4 = vertex.attrib[4];\n" - "ATTRIB v5 = vertex.attrib[5];\n" - "ATTRIB v6 = vertex.attrib[6];\n" - "ATTRIB v7 = vertex.attrib[7];\n" - "ATTRIB v8 = vertex.attrib[8];\n" - "ATTRIB v9 = vertex.attrib[9];\n" - "ATTRIB v10 = vertex.attrib[10];\n" - "ATTRIB v11 = vertex.attrib[11];\n" + //XXX: Limitation for 8.8 fixed point + " power = max(power, -127.9961);\n" + " power = min(power, 127.9961);\n" #endif - "ATTRIB v12 = vertex.attrib[12];\n" - "ATTRIB v13 = vertex.attrib[13];\n" - "ATTRIB v14 = vertex.attrib[14];\n" - "ATTRIB v15 = vertex.attrib[15];\n" - "OUTPUT oPos = result.position;\n" - "OUTPUT oD0 = result.color.front.primary;\n" - "OUTPUT oD1 = result.color.front.secondary;\n" - "OUTPUT oB0 = result.color.back.primary;\n" - "OUTPUT oB1 = result.color.back.secondary;\n" - "OUTPUT oPts = result.pointsize;\n" - "OUTPUT oFog = result.fogcoord;\n" - "OUTPUT oT0 = result.texcoord[0];\n" - "OUTPUT oT1 = result.texcoord[1];\n" - "OUTPUT oT2 = result.texcoord[2];\n" - "OUTPUT oT3 = result.texcoord[3];\n" - - /* All constants in 1 array declaration (requires NV_gpu_program4?) */ - "PARAM c[] = { program.env[0..191] };\n" - - /* w component of outputs are expected to be initialised to 1 */ - "MOV R12, 0.0;\n" - "MOV R12.w, 1.0;\n" - "MOV oD0.w, 1.0;\n" - "MOV oD1.w, 1.0;\n" - "MOV oB0.w, 1.0;\n" - "MOV oB1.w, 1.0;\n" - "MOV oT0.w, 1.0;\n" - "MOV oT1.w, 1.0;\n" - "MOV oT2.w, 1.0;\n" - "MOV oT3.w, 1.0;\n"; - + " if (src.x > 0.0) {\n" + " t.y = src.x;\n" + " if (src.y > 0.0) {\n" + //XXX: Allowed approximation is EXP(power * LOG(src.y)) + " t.z = pow(src.y, power);\n" + " }\n" + " }\n" + " return t;\n" + "}\n"; QString* vsh_translate(uint16_t version, uint32_t *tokens, unsigned int tokens_length) { - QString *ret = qstring_from_str(vsh_header); - + QString *body = qstring_from_str("\n"); + QString *header = qstring_from_str(vsh_header); + + + bool has_final = false; uint32_t *cur_token = tokens; + unsigned int slot; while (cur_token-tokens < tokens_length) { + slot = (cur_token-tokens) / VSH_TOKEN_SIZE; QString *token_str = decode_token(cur_token); - qstring_append(ret, qstring_get_str(token_str)); + qstring_append_fmt(body, + " /* Slot %d: 0x%08X 0x%08X 0x%08X 0x%08X */", + slot, + cur_token[0],cur_token[1],cur_token[2],cur_token[3]); + qstring_append(body, "\n"); + qstring_append(body, qstring_get_str(token_str)); + qstring_append(body, "\n"); QDECREF(token_str); if (vsh_get_field(cur_token, FLD_FINAL)) { + has_final = true; break; } cur_token += VSH_TOKEN_SIZE; } - - /* Note : Since we replaced oPos with r12 in the above decoding, - * we have to assign oPos at the end; This can be done in two ways; - * 1) When the shader is complete (including transformations), - * we could just do a 'MOV oPos, R12;' and be done with it. - * 2) In case of D3DFVF_XYZRHW, it seems the NV2A applies the mvp - * (model/view/projection) matrix transformation AFTER executing - * the shader (but OpenGL expects *the*shader* to handle this - * transformation). - * Until we can discern these two situations, we apply the matrix - * transformation : - * TODO : What should we do about normals, eye-space lighting and all that? - */ - qstring_append(ret, -/* - '# Dxbx addition : Transform the vertex to clip coordinates :' - "DP4 R0.x, mvp[0], R12;" - "DP4 R0.y, mvp[1], R12;" - "DP4 R0.z, mvp[2], R12;" - "DP4 R0.w, mvp[3], R12;" - "MOV R12, R0;" -*/ + assert(has_final); + qstring_append(body, /* the shaders leave the result in screen space, while * opengl expects it in clip coordinates. * Use the magic viewport constants for now, - * but they're not necessarily present. - * Same idea as above I think, but dono what the mvp stuff is about... + * but they're not necessarily present... */ - "# un-screenspace transform\n" - "ADD R12, R12, -c[59];\n" - "RCP R1.x, c[58].x;\n" - "RCP R1.y, c[58].y;\n" - /* scale_z = view_z == 0 ? 1 : (1 / view_z) */ - "ABS R1.z, c[58].z;\n" - "SGE R1.z, -R1.z, 0;\n" - "ADD R1.z, R1.z, c[58].z;\n" - "RCP R1.z, R1.z;\n" + " /* Un-screenspace transform */\n" + " oPos.xyz = oPos.xyz - viewport_offset.xyz;\n" + " vec3 tmp = vec3(1.0);\n" - "MUL R12.xyz, R12, R1;\n" - "MOV R12.w, 1.0;\n" + /* FIXME: old comment was "scale_z = view_z == 0 ? 1 : (1 / view_z)" */ + " if (viewport_scale.x != 0.0) { tmp.x /= viewport_scale.x; }\n" + " if (viewport_scale.y != 0.0) { tmp.y /= viewport_scale.y; }\n" + " if (viewport_scale.z != 0.0) { tmp.z /= viewport_scale.z; }\n" - /* undo the perspective divide? */ - //"MUL R12.xyz, R12, R12.w;\n" + " oPos.xyz *= tmp.xyz;\n" + " oPos.w = 1.0;\n" //This breaks 2D? Maybe w is zero? + "\n" +#if 0 +//FIXME: Use surface width / height / zeta max + "R12.z /= 16777215.0;\n" // Z[0;1] + "R12.z *= (cliprange.y - cliprange.x) / 16777215.0;\n" // Scale so [0;zmax] -> [0;cliprange_size] + "R12.z -= cliprange.x / 16777215.0;\n" // Move down so [clipmin_min;clipmin_max] + // X = [0;surface_width]; Y = [surface_height;0]; Z = [0;1]; W = ??? + "R12.xyz = R12.xyz / vec3(640.0,480.0,1.0);\n" + // X,Z = [0;1]; Y = [1;0]; W = ??? + "R12.xyz = R12.xyz * vec3(2.0) - vec3(1.0);\n" + "R12.y *= -1.0;\n" + "R12.w = 1.0;\n" + // X,Y,Z = [-1;+1]; W = 1 + "\n" +#endif /* Z coord [0;1]->[-1;1] mapping, see comment in transform_projection * in state.c @@ -711,9 +816,31 @@ QString* vsh_translate(uint16_t version, //"ADD R12.z, R12.z, R12.z;\n" //"ADD R12.z, R12.z, -R12.w;\n" - "# End of shader:\n" - "MOV oPos, R12;\n" - "END" + + " /* Set outputs */\n" + " gl_Position = oPos;\n" + " gl_FrontColor = oD0;\n" + " gl_FrontSecondaryColor = oD1;\n" + " gl_BackColor = oB0;\n" + " gl_BackSecondaryColor = oB1;\n" + " gl_PointSize = oPts.x;\n" + " gl_FogFragCoord = oFog.x;\n" + " gl_TexCoord[0] = oT0;\n" + " gl_TexCoord[1] = oT1;\n" + " gl_TexCoord[2] = oT2;\n" + " gl_TexCoord[3] = oT3;\n" + "\n" ); + + QString *ret = qstring_new(); + qstring_append(ret, qstring_get_str(header)); + qstring_append(ret,"\n" + "void main(void)\n" + "{\n"); + qstring_append(ret, qstring_get_str(body)); + qstring_append(ret,"}\n"); + QDECREF(header); + QDECREF(body); return ret; } + diff --git a/hw/xbox/nv2a_vsh.h b/hw/xbox/nv2a_vsh.h index a2f2abce05..5dd4b6394e 100644 --- a/hw/xbox/nv2a_vsh.h +++ b/hw/xbox/nv2a_vsh.h @@ -36,6 +36,54 @@ // Xbox vertex read/write shader #define VSH_VERSION_XVSW 0x7778 +#define VSH_TOKEN_SIZE 4 + +typedef enum { + FLD_ILU = 0, + FLD_MAC, + FLD_CONST, + FLD_V, + // Input A + FLD_A_NEG, + FLD_A_SWZ_X, + FLD_A_SWZ_Y, + FLD_A_SWZ_Z, + FLD_A_SWZ_W, + FLD_A_R, + FLD_A_MUX, + // Input B + FLD_B_NEG, + FLD_B_SWZ_X, + FLD_B_SWZ_Y, + FLD_B_SWZ_Z, + FLD_B_SWZ_W, + FLD_B_R, + FLD_B_MUX, + // Input C + FLD_C_NEG, + FLD_C_SWZ_X, + FLD_C_SWZ_Y, + FLD_C_SWZ_Z, + FLD_C_SWZ_W, + FLD_C_R_HIGH, + FLD_C_R_LOW, + FLD_C_MUX, + // Output + FLD_OUT_MAC_MASK, + FLD_OUT_R, + FLD_OUT_ILU_MASK, + FLD_OUT_O_MASK, + FLD_OUT_ORB, + FLD_OUT_ADDRESS, + FLD_OUT_MUX, + // Relative addressing + FLD_A0X, + // Final instruction + FLD_FINAL +} VshFieldName; + +uint8_t vsh_get_field(uint32_t *shader_token, VshFieldName field_name); + QString* vsh_translate(uint16_t version, uint32_t *tokens, unsigned int tokens_length); diff --git a/hw/xbox/swizzle.c b/hw/xbox/swizzle.c new file mode 100644 index 0000000000..0d80da9366 --- /dev/null +++ b/hw/xbox/swizzle.c @@ -0,0 +1,109 @@ +/* + * QEMU texture swizzling routines + * + * Copyright (c) 2013 espes + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2 as published by the Free Software Foundation. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ + +#include +#include + +void unswizzle_rect( + uint8_t *src_buf, + unsigned int width, + unsigned int height, + unsigned int depth, + uint8_t *dst_buf, + unsigned int pitch, + unsigned int bytes_per_pixel) +{ + unsigned int offset_u = 0, offset_v = 0, offset_w = 0; + uint32_t mask_u = 0, mask_v = 0, mask_w = 0; + + unsigned int i = 1, j = 1; + + while( (i <= width) || (i <= height) || (i <= depth) ) { + if(i < width) { + mask_u |= j; + j<<=1; + } + if(i < height) { + mask_v |= j; + j<<=1; + } + if(i < depth) { + mask_w |= j; + j<<=1; + } + i<<=1; + } + + uint32_t start_u = 0; + uint32_t start_v = 0; + uint32_t start_w = 0; + uint32_t mask_max = 0; + + // get the biggest mask + if(mask_u > mask_v) + mask_max = mask_u; + else + mask_max = mask_v; + if(mask_w > mask_max) + mask_max = mask_w; + + for(i = 1; i <= mask_max; i<<=1) { + if(i<=mask_u) { + if(mask_u & i) start_u |= (offset_u & i); + else offset_u <<= 1; + } + + if(i <= mask_v) { + if(mask_v & i) start_v |= (offset_v & i); + else offset_v<<=1; + } + + if(i <= mask_w) { + if(mask_w & i) start_w |= (offset_w & i); + else offset_w <<= 1; + } + } + + uint32_t w = start_w; + unsigned int z; + for(z=0; z + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ + +#ifndef HW_XBOX_SWIZZLE_H +#define HW_XBOX_SWIZZLE_H + + void unswizzle_rect( + uint8_t *src_buf, + unsigned int width, + unsigned int height, + unsigned int depth, + uint8_t *dst_buf, + unsigned int pitch, + unsigned int bytes_per_pixel); + +#endif \ No newline at end of file