Initial integration of JayFoxRox's GLSL vertex program translator

This commit is contained in:
espes 2014-05-25 20:58:17 +10:00
parent 7eb49195f0
commit 2c4571a3cd
6 changed files with 691 additions and 478 deletions

View File

@ -2,7 +2,7 @@ obj-y += xbox.o chihiro.o
obj-y += xbox_pci.o acpi_xbox.o
obj-y += amd_smbus.o smbus_xbox_smc.o smbus_cx25871.o smbus_adm1032.o
obj-y += nvnet.o
obj-y += nv2a.o nv2a_vsh.o nv2a_psh.o
obj-y += nv2a.o nv2a_vsh.o nv2a_psh.o swizzle.o
obj-y += mcpx_apu.o mcpx_aci.o
obj-y += lpc47m157.o
obj-y += xid.o

View File

@ -28,6 +28,7 @@
#include "qapi/qmp/qstring.h"
#include "gl/gloffscreen.h"
#include "hw/xbox/swizzle.h"
#include "hw/xbox/u_format_r11g11b10f.h"
#include "hw/xbox/nv2a_vsh.h"
#include "hw/xbox/nv2a_psh.h"
@ -692,8 +693,7 @@ static const ColorFormatInfo kelvin_color_format_map[66] = {
#define NV2A_NUM_SUBCHANNELS 8
#define NV2A_MAX_BATCH_LENGTH 0xFFFF
#define NV2A_VERTEXSHADER_SLOTS 32 /*???*/
#define NV2A_MAX_VERTEXSHADER_LENGTH 136
#define NV2A_MAX_TRANSFORM_PROGRAM_LENGTH 136
#define NV2A_VERTEXSHADER_CONSTANTS 192
#define NV2A_VERTEXSHADER_ATTRIBUTES 16
#define NV2A_MAX_TEXTURES 4
@ -771,14 +771,6 @@ typedef struct VertexShaderConstant {
uint32 data[4];
} VertexShaderConstant;
typedef struct VertexShader {
bool dirty;
unsigned int program_length;
uint32_t program_data[NV2A_MAX_VERTEXSHADER_LENGTH];
GLuint gl_program;
} VertexShader;
typedef struct Texture {
bool dirty;
bool enabled;
@ -818,9 +810,12 @@ typedef struct ShaderState {
bool rect_tex[4];
/* vertex shader */
bool fixed_function;
bool vertex_program;
uint32_t program_data[NV2A_MAX_TRANSFORM_PROGRAM_LENGTH];
int program_length;
} ShaderState;
typedef struct Surface {
@ -842,28 +837,6 @@ typedef struct KelvinState {
hwaddr dma_vertex_a, dma_vertex_b;
hwaddr dma_semaphore;
unsigned int semaphore_offset;
GLenum gl_primitive_mode;
bool enable_vertex_program_write;
unsigned int vertexshader_start_slot;
unsigned int vertexshader_load_slot;
VertexShader vertexshaders[NV2A_VERTEXSHADER_SLOTS];
unsigned int constant_load_slot;
VertexShaderConstant constants[NV2A_VERTEXSHADER_CONSTANTS];
VertexAttribute vertex_attributes[NV2A_VERTEXSHADER_ATTRIBUTES];
unsigned int inline_array_length;
uint32_t inline_array[NV2A_MAX_BATCH_LENGTH];
unsigned int inline_elements_length;
uint32_t inline_elements[NV2A_MAX_BATCH_LENGTH];
unsigned int inline_buffer_length;
InlineVertexBufferEntry inline_buffer[NV2A_MAX_BATCH_LENGTH];
} KelvinState;
typedef struct ContextSurfaces2DState {
@ -956,6 +929,29 @@ typedef struct PGRAPHState {
GraphicsSubchannel subchannel_data[NV2A_NUM_SUBCHANNELS];
GLenum gl_primitive_mode;
bool enable_vertex_program_write;
unsigned int program_start;
unsigned int program_load;
uint32_t program_data[NV2A_MAX_TRANSFORM_PROGRAM_LENGTH];
unsigned int constant_load_slot;
VertexShaderConstant constants[NV2A_VERTEXSHADER_CONSTANTS];
VertexAttribute vertex_attributes[NV2A_VERTEXSHADER_ATTRIBUTES];
unsigned int inline_array_length;
uint32_t inline_array[NV2A_MAX_BATCH_LENGTH];
unsigned int inline_elements_length;
uint32_t inline_elements[NV2A_MAX_BATCH_LENGTH];
unsigned int inline_buffer_length;
InlineVertexBufferEntry inline_buffer[NV2A_MAX_BATCH_LENGTH];
uint32_t regs[0x2000];
} PGRAPHState;
@ -1211,7 +1207,6 @@ static void *nv_dma_map(NV2AState *d, hwaddr dma_obj_address, hwaddr *len)
static void load_graphics_object(NV2AState *d, hwaddr instance_address,
GraphicsObject *obj)
{
int i;
uint8_t *obj_ptr;
uint32_t switch1, switch2, switch3;
@ -1226,21 +1221,9 @@ static void load_graphics_object(NV2AState *d, hwaddr instance_address,
obj->graphics_class = switch1 & NV_PGRAPH_CTX_SWITCH1_GRCLASS;
/* init graphics object */
KelvinState *kelvin;
switch (obj->graphics_class) {
case NV_KELVIN_PRIMITIVE:
kelvin = &obj->data.kelvin;
/* generate vertex programs */
for (i = 0; i < NV2A_VERTEXSHADER_SLOTS; i++) {
VertexShader *shader = &kelvin->vertexshaders[i];
glGenProgramsARB(1, &shader->gl_program);
}
assert(glGetError() == GL_NO_ERROR);
/* temp hack? */
kelvin->vertex_attributes[NV2A_VERTEX_ATTR_DIFFUSE].inline_value = 0xFFFFFFF;
// kelvin->vertex_attributes[NV2A_VERTEX_ATTR_DIFFUSE].inline_value = 0xFFFFFFF;
break;
default:
break;
@ -1260,19 +1243,21 @@ static GraphicsObject* lookup_graphics_object(PGRAPHState *s,
}
static void kelvin_bind_converted_vertex_attributes(NV2AState *d,
static void pgraph_bind_converted_vertex_attributes(NV2AState *d,
KelvinState *kelvin,
bool inline_data,
unsigned int num_elements)
{
int i, j;
for (i=0; i<NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
VertexAttribute *attribute = &kelvin->vertex_attributes[i];
if (attribute->count && attribute->needs_conversion) {
PGRAPHState *pg = &d->pgraph;
for (i=0; i<NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
VertexAttribute *attribute = &pg->vertex_attributes[i];
if (attribute->count && attribute->needs_conversion) {
NV2A_DPRINTF("converted %d\n", i);
uint8_t *data;
if (inline_data) {
data = (uint8_t*)kelvin->inline_array
data = (uint8_t*)pg->inline_array
+ attribute->inline_array_offset;
} else {
hwaddr dma_len;
@ -1322,12 +1307,13 @@ static void kelvin_bind_converted_vertex_attributes(NV2AState *d,
}
}
static unsigned int kelvin_bind_inline_array(KelvinState *kelvin)
static unsigned int pgraph_bind_inline_array(PGRAPHState *pg)
{
int i;
unsigned int offset = 0;
for (i=0; i<NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
VertexAttribute *attribute = &kelvin->vertex_attributes[i];
VertexAttribute *attribute = &pg->vertex_attributes[i];
if (attribute->count) {
glEnableVertexAttribArray(i);
@ -1340,7 +1326,7 @@ static unsigned int kelvin_bind_inline_array(KelvinState *kelvin)
attribute->gl_type,
attribute->gl_normalize,
attribute->stride,
(uint8_t*)kelvin->inline_array + offset);
(uint8_t*)pg->inline_array + offset);
}
offset += attribute->size * attribute->count;
@ -1349,13 +1335,13 @@ static unsigned int kelvin_bind_inline_array(KelvinState *kelvin)
return offset;
}
static void kelvin_bind_vertex_attributes(NV2AState *d,
KelvinState *kelvin)
static void pgraph_bind_vertex_attributes(NV2AState *d, KelvinState *kelvin)
{
int i;
PGRAPHState *pg = &d->pgraph;
for (i=0; i<NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
VertexAttribute *attribute = &kelvin->vertex_attributes[i];
VertexAttribute *attribute = &pg->vertex_attributes[i];
if (attribute->count) {
glEnableVertexAttribArray(i);
@ -1387,157 +1373,6 @@ static void kelvin_bind_vertex_attributes(NV2AState *d,
}
}
static void kelvin_bind_vertex_program(KelvinState *kelvin)
{
int i;
VertexShader *shader;
shader = &kelvin->vertexshaders[kelvin->vertexshader_start_slot];
glBindProgramARB(GL_VERTEX_PROGRAM_ARB, shader->gl_program);
if (shader->dirty) {
QString *program_code = vsh_translate(VSH_VERSION_XVS,
shader->program_data,
shader->program_length);
const char* program_code_str = qstring_get_str(program_code);
NV2A_DPRINTF("bind vertex program %d, code:\n%s\n",
kelvin->vertexshader_start_slot,
program_code_str);
glProgramStringARB(GL_VERTEX_PROGRAM_ARB,
GL_PROGRAM_FORMAT_ASCII_ARB,
strlen(program_code_str),
program_code_str);
/* Check it compiled */
GLint pos;
glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &pos);
if (pos != -1) {
fprintf(stderr, "nv2a: vertex program compilation failed:\n"
" pos %d, %s\n",
pos, glGetString(GL_PROGRAM_ERROR_STRING_ARB));
fprintf(stderr, "ucode:\n");
for (i=0; i<shader->program_length; i++) {
fprintf(stderr, " 0x%08x,\n", shader->program_data[i]);
}
abort();
}
/* Check we're within resource limits */
GLint native;
glGetProgramivARB(GL_VERTEX_PROGRAM_ARB,
GL_PROGRAM_UNDER_NATIVE_LIMITS_ARB,
&native);
assert(native);
assert(glGetError() == GL_NO_ERROR);
QDECREF(program_code);
shader->dirty = false;
}
/* load constants */
for (i=0; i<NV2A_VERTEXSHADER_CONSTANTS; i++) {
VertexShaderConstant *constant = &kelvin->constants[i];
if (!constant->dirty) continue;
glProgramEnvParameter4fvARB(GL_VERTEX_PROGRAM_ARB,
i,
(const GLfloat*)constant->data);
constant->dirty = false;
}
assert(glGetError() == GL_NO_ERROR);
}
static void unswizzle_rect(
uint8_t *src_buf,
unsigned int width,
unsigned int height,
unsigned int depth,
uint8_t *dst_buf,
unsigned int pitch,
unsigned int bytes_per_pixel)
{
unsigned int offset_u = 0, offset_v = 0, offset_w = 0;
uint32_t mask_u = 0, mask_v = 0, mask_w = 0;
unsigned int i = 1, j = 1;
while( (i <= width) || (i <= height) || (i <= depth) ) {
if(i < width) {
mask_u |= j;
j<<=1;
}
if(i < height) {
mask_v |= j;
j<<=1;
}
if(i < depth) {
mask_w |= j;
j<<=1;
}
i<<=1;
}
uint32_t start_u = 0;
uint32_t start_v = 0;
uint32_t start_w = 0;
uint32_t mask_max = 0;
// get the biggest mask
if(mask_u > mask_v)
mask_max = mask_u;
else
mask_max = mask_v;
if(mask_w > mask_max)
mask_max = mask_w;
for(i = 1; i <= mask_max; i<<=1) {
if(i<=mask_u) {
if(mask_u & i) start_u |= (offset_u & i);
else offset_u <<= 1;
}
if(i <= mask_v) {
if(mask_v & i) start_v |= (offset_v & i);
else offset_v<<=1;
}
if(i <= mask_w) {
if(mask_w & i) start_w |= (offset_w & i);
else offset_w <<= 1;
}
}
uint32_t w = start_w;
unsigned int z;
for(z=0; z<depth; z++) {
uint32_t v = start_v;
unsigned int y;
for(y=0; y<height; y++) {
uint32_t u = start_u;
unsigned int x;
for (x=0; x<width; x++) {
memcpy(dst_buf,
src_buf + ( (u|v|w)*bytes_per_pixel ),
bytes_per_pixel);
dst_buf += bytes_per_pixel;
u = (u - mask_u) & mask_u;
}
dst_buf += pitch - width * bytes_per_pixel;
v = (v - mask_v) & mask_v;
}
w = (w - mask_w) & mask_w;
}
}
static void pgraph_bind_textures(NV2AState *d)
{
@ -1707,12 +1542,14 @@ static GLuint generate_shaders(ShaderState state)
GLuint program = glCreateProgram();
/* create the vertex shader */
QString *vertex_shader_code = NULL;
const char *vertex_shader_code_str = NULL;
if (state.fixed_function) {
/* generate vertex shader mimicking fixed function */
GLuint vertex_shader = glCreateShader(GL_VERTEX_SHADER);
glAttachShader(program, vertex_shader);
const char *vertex_shader_code =
vertex_shader_code_str =
"attribute vec4 position;\n"
"attribute vec3 normal;\n"
"attribute vec4 diffuse;\n"
@ -1739,10 +1576,21 @@ static GLuint generate_shaders(ShaderState state)
" gl_TexCoord[3] = multiTexCoord3;\n"
"}\n";
glShaderSource(vertex_shader, 1, &vertex_shader_code, 0);
} else if (state.vertex_program) {
vertex_shader_code = vsh_translate(VSH_VERSION_XVS,
state.program_data,
state.program_length);
vertex_shader_code_str = qstring_get_str(vertex_shader_code);
}
if (vertex_shader_code_str) {
GLuint vertex_shader = glCreateShader(GL_VERTEX_SHADER);
glAttachShader(program, vertex_shader);
glShaderSource(vertex_shader, 1, &vertex_shader_code_str, 0);
glCompileShader(vertex_shader);
NV2A_DPRINTF("bind new vertex shader, code:\n%s\n", vertex_shader_code);
NV2A_DPRINTF("bind new vertex shader, code:\n%s\n", vertex_shader_code_str);
/* Check it compiled */
GLint compiled = 0;
@ -1754,6 +1602,15 @@ static GLuint generate_shaders(ShaderState state)
abort();
}
if (vertex_shader_code) {
QDECREF(vertex_shader_code);
vertex_shader_code = NULL;
}
}
if (state.fixed_function) {
/* bind fixed function vertex attributes */
glBindAttribLocation(program, NV2A_VERTEX_ATTR_POSITION, "position");
glBindAttribLocation(program, NV2A_VERTEX_ATTR_DIFFUSE, "diffuse");
glBindAttribLocation(program, NV2A_VERTEX_ATTR_SPECULAR, "specular");
@ -1762,10 +1619,17 @@ static GLuint generate_shaders(ShaderState state)
glBindAttribLocation(program, NV2A_VERTEX_ATTR_TEXTURE1, "multiTexCoord1");
glBindAttribLocation(program, NV2A_VERTEX_ATTR_TEXTURE2, "multiTexCoord2");
glBindAttribLocation(program, NV2A_VERTEX_ATTR_TEXTURE3, "multiTexCoord3");
} else if (state.vertex_program) {
/* Bind attributes for transform program*/
char tmp[8];
for(i = 0; i < 16; i++) {
snprintf(tmp, sizeof(tmp), "v%d", i);
glBindAttribLocation(program, i, tmp);
}
}
/* generate a fragment hader from register combiners */
/* generate a fragment shader from register combiners */
GLuint fragment_shader = glCreateShader(GL_FRAGMENT_SHADER);
glAttachShader(program, fragment_shader);
@ -1823,6 +1687,7 @@ static GLuint generate_shaders(ShaderState state)
}
}
/* validate the program */
glValidateProgram(program);
GLint valid = 0;
glGetProgramiv(program, GL_VALIDATE_STATUS, &valid);
@ -1840,6 +1705,9 @@ static void pgraph_bind_shaders(PGRAPHState *pg)
{
int i;
bool vertex_program = GET_MASK(pg->regs[NV_PGRAPH_CSV0_D],
NV_PGRAPH_CSV0_D_MODE) == 2;
bool fixed_function = GET_MASK(pg->regs[NV_PGRAPH_CSV0_D],
NV_PGRAPH_CSV0_D_MODE) == 0;
@ -1854,8 +1722,30 @@ static void pgraph_bind_shaders(PGRAPHState *pg)
/* fixed function stuff */
.fixed_function = fixed_function,
/* vertex program stuff */
.vertex_program = vertex_program,
};
state.program_length = 0;
memset(state.program_data, 0, sizeof(state.program_data));
if (vertex_program) {
// copy in vertex program tokens
for (i = pg->program_start;
i < NV2A_MAX_TRANSFORM_PROGRAM_LENGTH;
i += VSH_TOKEN_SIZE) {
uint32_t *cur_token = pg->program_data + i;
memcpy(state.program_data + state.program_length,
cur_token,
VSH_TOKEN_SIZE * sizeof(uint32_t));
state.program_length += VSH_TOKEN_SIZE;
if (vsh_get_field(cur_token, FLD_FINAL)) {
break;
}
}
}
for (i = 0; i < 8; i++) {
state.rgb_inputs[i] = pg->regs[NV_PGRAPH_COMBINECOLORI0 + i * 4];
@ -1921,9 +1811,11 @@ static void pgraph_bind_shaders(PGRAPHState *pg)
}
}
/* update fixed function composite matrix */
if (fixed_function) {
/* update fixed function composite matrix */
GLint comLoc = glGetUniformLocation(pg->gl_program, "composite");
assert(comLoc != -1);
glUniformMatrix4fv(comLoc, 1, GL_FALSE, pg->composite_matrix);
@ -1951,8 +1843,23 @@ static void pgraph_bind_shaders(PGRAPHState *pg)
};
GLint viewLoc = glGetUniformLocation(pg->gl_program, "invViewport");
assert(viewLoc != -1);
glUniformMatrix4fv(viewLoc, 1, GL_FALSE, &invViewport[0]);
} else if (vertex_program) {
/* update vertex program constants */
for (i=0; i<NV2A_VERTEXSHADER_CONSTANTS; i++) {
VertexShaderConstant *constant = &pg->constants[i];
char tmp[8];
snprintf(tmp, sizeof(tmp), "c[%d]", i);
GLint loc = glGetUniformLocation(pg->gl_program, tmp);
//assert(loc != -1);
if (loc != -1) {
glUniform4fv(loc, 1, (const GLfloat*)constant->data);
}
}
}
pg->shaders_dirty = false;
@ -2196,7 +2103,6 @@ static void pgraph_method(NV2AState *d,
unsigned int slot;
VertexAttribute *vertex_attribute;
VertexShader *vertexshader;
VertexShaderConstant *constant;
PGRAPHState *pg = &d->pgraph;
@ -2481,8 +2387,8 @@ static void pgraph_method(NV2AState *d,
slot = (class_method - NV097_SET_VIEWPORT_OFFSET) / 4;
/* populate magic viewport offset constant */
kelvin->constants[59].data[slot] = parameter;
kelvin->constants[59].dirty = true;
pg->constants[59].data[slot] = parameter;
pg->constants[59].dirty = true;
break;
case NV097_SET_COMBINER_FACTOR0 ...
@ -2519,30 +2425,31 @@ static void pgraph_method(NV2AState *d,
slot = (class_method - NV097_SET_VIEWPORT_SCALE) / 4;
/* populate magic viewport scale constant */
kelvin->constants[58].data[slot] = parameter;
kelvin->constants[58].dirty = true;
pg->constants[58].data[slot] = parameter;
pg->constants[58].dirty = true;
break;
case NV097_SET_TRANSFORM_PROGRAM ...
NV097_SET_TRANSFORM_PROGRAM + 0x7c:
slot = (class_method - NV097_SET_TRANSFORM_PROGRAM) / 4;
/* TODO: It should still work using a non-increasing slot??? */
// slot = (class_method - NV097_SET_TRANSFORM_PROGRAM) / 4;
vertexshader = &kelvin->vertexshaders[kelvin->vertexshader_load_slot];
assert(vertexshader->program_length < NV2A_MAX_VERTEXSHADER_LENGTH);
vertexshader->program_data[
vertexshader->program_length++] = parameter;
assert(pg->program_load < NV2A_MAX_TRANSFORM_PROGRAM_LENGTH);
pg->program_data[pg->program_load++] = parameter;
pg->shaders_dirty = true;
break;
case NV097_SET_TRANSFORM_CONSTANT ...
NV097_SET_TRANSFORM_CONSTANT + 0x7c:
slot = (class_method - NV097_SET_TRANSFORM_CONSTANT) / 4;
// slot = (class_method - NV097_SET_TRANSFORM_CONSTANT) / 4;
constant = &kelvin->constants[kelvin->constant_load_slot+slot/4];
constant->data[slot%4] = parameter;
assert((pg->constant_load_slot/4) < NV2A_VERTEXSHADER_CONSTANTS);
constant = &pg->constants[pg->constant_load_slot/4];
constant->data[pg->constant_load_slot%4] = parameter;
constant->dirty = true;
pg->constant_load_slot++;
break;
case NV097_SET_VERTEX4F ...
@ -2550,16 +2457,16 @@ static void pgraph_method(NV2AState *d,
slot = (class_method - NV097_SET_VERTEX4F) / 4;
assert(kelvin->inline_buffer_length < NV2A_MAX_BATCH_LENGTH);
assert(pg->inline_buffer_length < NV2A_MAX_BATCH_LENGTH);
InlineVertexBufferEntry *entry =
&kelvin->inline_buffer[kelvin->inline_buffer_length];
&pg->inline_buffer[pg->inline_buffer_length];
entry->position[slot] = parameter;
if (slot == 3) {
entry->diffuse =
kelvin->vertex_attributes[NV2A_VERTEX_ATTR_DIFFUSE].inline_value;
kelvin->inline_buffer_length++;
pg->vertex_attributes[NV2A_VERTEX_ATTR_DIFFUSE].inline_value;
pg->inline_buffer_length++;
}
break;
}
@ -2568,7 +2475,7 @@ static void pgraph_method(NV2AState *d,
NV097_SET_VERTEX_DATA_ARRAY_FORMAT + 0x3c:
slot = (class_method - NV097_SET_VERTEX_DATA_ARRAY_FORMAT) / 4;
vertex_attribute = &kelvin->vertex_attributes[slot];
vertex_attribute = &pg->vertex_attributes[slot];
vertex_attribute->format =
GET_MASK(parameter, NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE);
@ -2633,26 +2540,27 @@ static void pgraph_method(NV2AState *d,
slot = (class_method - NV097_SET_VERTEX_DATA_ARRAY_OFFSET) / 4;
kelvin->vertex_attributes[slot].dma_select =
pg->vertex_attributes[slot].dma_select =
parameter & 0x80000000;
kelvin->vertex_attributes[slot].offset =
pg->vertex_attributes[slot].offset =
parameter & 0x7fffffff;
kelvin->vertex_attributes[slot].converted_elements = 0;
pg->vertex_attributes[slot].converted_elements = 0;
break;
case NV097_SET_BEGIN_END:
if (parameter == NV097_SET_BEGIN_END_OP_END) {
if (kelvin->inline_buffer_length) {
if (pg->inline_buffer_length) {
glEnableVertexAttribArray(NV2A_VERTEX_ATTR_POSITION);
glVertexAttribPointer(NV2A_VERTEX_ATTR_POSITION,
4,
GL_FLOAT,
GL_FALSE,
sizeof(InlineVertexBufferEntry),
kelvin->inline_buffer);
pg->inline_buffer);
glEnableVertexAttribArray(NV2A_VERTEX_ATTR_DIFFUSE);
glVertexAttribPointer(NV2A_VERTEX_ATTR_DIFFUSE,
@ -2660,36 +2568,38 @@ static void pgraph_method(NV2AState *d,
GL_UNSIGNED_BYTE,
GL_TRUE,
sizeof(InlineVertexBufferEntry),
&kelvin->inline_buffer[0].diffuse);
&pg->inline_buffer[0].diffuse);
glDrawArrays(kelvin->gl_primitive_mode,
0, kelvin->inline_buffer_length);
} else if (kelvin->inline_array_length) {
glDrawArrays(pg->gl_primitive_mode,
0, pg->inline_buffer_length);
} else if (pg->inline_array_length) {
unsigned int vertex_size =
kelvin_bind_inline_array(kelvin);
pgraph_bind_inline_array(pg);
unsigned int index_count =
kelvin->inline_array_length*4 / vertex_size;
pg->inline_array_length*4 / vertex_size;
kelvin_bind_converted_vertex_attributes(d, kelvin,
true, index_count);
glDrawArrays(kelvin->gl_primitive_mode,
NV2A_DPRINTF("draw inline array %d, %d\n", vertex_size, index_count);
pgraph_bind_converted_vertex_attributes(d,
kelvin, true, index_count);
glDrawArrays(pg->gl_primitive_mode,
0, index_count);
} else if (kelvin->inline_elements_length) {
} else if (pg->inline_elements_length) {
uint32_t max_element = 0;
uint32_t min_element = (uint32_t)-1;
for (i=0; i<kelvin->inline_elements_length; i++) {
max_element = MAX(kelvin->inline_elements[i], max_element);
min_element = MIN(kelvin->inline_elements[i], min_element);
for (i=0; i<pg->inline_elements_length; i++) {
max_element = MAX(pg->inline_elements[i], max_element);
min_element = MIN(pg->inline_elements[i], min_element);
}
kelvin_bind_converted_vertex_attributes(d, kelvin,
false, max_element+1);
glDrawElements(kelvin->gl_primitive_mode,
kelvin->inline_elements_length,
pgraph_bind_converted_vertex_attributes(d,
kelvin, false, max_element+1);
glDrawElements(pg->gl_primitive_mode,
pg->inline_elements_length,
GL_UNSIGNED_INT,
kelvin->inline_elements);
pg->inline_elements);
}/* else {
assert(false);
}*/
@ -2699,26 +2609,18 @@ static void pgraph_method(NV2AState *d,
pgraph_update_surface(d, true);
bool use_vertex_program = GET_MASK(pg->regs[NV_PGRAPH_CSV0_D],
NV_PGRAPH_CSV0_D_MODE) == 2;
if (use_vertex_program) {
glEnable(GL_VERTEX_PROGRAM_ARB);
kelvin_bind_vertex_program(kelvin);
} else {
glDisable(GL_VERTEX_PROGRAM_ARB);
}
pgraph_bind_shaders(pg);
pgraph_bind_textures(d);
kelvin_bind_vertex_attributes(d, kelvin);
pgraph_bind_vertex_attributes(d, kelvin);
kelvin->gl_primitive_mode = kelvin_primitive_map[parameter];
kelvin->inline_elements_length = 0;
kelvin->inline_array_length = 0;
kelvin->inline_buffer_length = 0;
pg->gl_primitive_mode = kelvin_primitive_map[parameter];
pg->inline_elements_length = 0;
pg->inline_array_length = 0;
pg->inline_buffer_length = 0;
}
pg->surface_color.draw_dirty = true;
break;
@ -2788,38 +2690,38 @@ static void pgraph_method(NV2AState *d,
break;
case NV097_ARRAY_ELEMENT16:
assert(kelvin->inline_elements_length < NV2A_MAX_BATCH_LENGTH);
kelvin->inline_elements[
kelvin->inline_elements_length++] = parameter & 0xFFFF;
kelvin->inline_elements[
kelvin->inline_elements_length++] = parameter >> 16;
assert(pg->inline_elements_length < NV2A_MAX_BATCH_LENGTH);
pg->inline_elements[
pg->inline_elements_length++] = parameter & 0xFFFF;
pg->inline_elements[
pg->inline_elements_length++] = parameter >> 16;
break;
case NV097_ARRAY_ELEMENT32:
assert(kelvin->inline_elements_length < NV2A_MAX_BATCH_LENGTH);
kelvin->inline_elements[
kelvin->inline_elements_length++] = parameter;
assert(pg->inline_elements_length < NV2A_MAX_BATCH_LENGTH);
pg->inline_elements[
pg->inline_elements_length++] = parameter;
break;
case NV097_DRAW_ARRAYS: {
unsigned int start = GET_MASK(parameter, NV097_DRAW_ARRAYS_START_INDEX);
unsigned int count = GET_MASK(parameter, NV097_DRAW_ARRAYS_COUNT)+1;
kelvin_bind_converted_vertex_attributes(d, kelvin,
pgraph_bind_converted_vertex_attributes(d, kelvin,
false, start + count);
glDrawArrays(kelvin->gl_primitive_mode, start, count);
glDrawArrays(pg->gl_primitive_mode, start, count);
break;
}
case NV097_INLINE_ARRAY:
assert(kelvin->inline_array_length < NV2A_MAX_BATCH_LENGTH);
kelvin->inline_array[
kelvin->inline_array_length++] = parameter;
assert(pg->inline_array_length < NV2A_MAX_BATCH_LENGTH);
pg->inline_array[
pg->inline_array_length++] = parameter;
break;
case NV097_SET_VERTEX_DATA4UB ...
NV097_SET_VERTEX_DATA4UB + 0x3c:
slot = (class_method - NV097_SET_VERTEX_DATA4UB) / 4;
kelvin->vertex_attributes[slot].inline_value = parameter;
pg->vertex_attributes[slot].inline_value = parameter;
break;
case NV097_SET_SEMAPHORE_OFFSET:
@ -2946,27 +2848,20 @@ static void pgraph_method(NV2AState *d,
GET_MASK(parameter, NV_097_SET_TRANSFORM_EXECUTION_MODE_RANGE_MODE));
break;
case NV097_SET_TRANSFORM_PROGRAM_CXT_WRITE_EN:
kelvin->enable_vertex_program_write = parameter;
pg->enable_vertex_program_write = parameter;
break;
case NV097_SET_TRANSFORM_PROGRAM_LOAD:
assert(parameter < NV2A_VERTEXSHADER_SLOTS);
kelvin->vertexshader_load_slot = parameter;
kelvin->vertexshaders[parameter].program_length = 0; /* ??? */
kelvin->vertexshaders[parameter].dirty = true;
assert(parameter < NV2A_MAX_TRANSFORM_PROGRAM_LENGTH);
pg->program_load = parameter * VSH_TOKEN_SIZE;
break;
case NV097_SET_TRANSFORM_PROGRAM_START:
assert(parameter < NV2A_VERTEXSHADER_SLOTS);
/* if the shader changed, dirty all the constants */
if (parameter != kelvin->vertexshader_start_slot) {
for (i=0; i<NV2A_VERTEXSHADER_CONSTANTS; i++) {
kelvin->constants[i].dirty = true;
}
}
kelvin->vertexshader_start_slot = parameter;
assert(parameter < NV2A_MAX_TRANSFORM_PROGRAM_LENGTH);
pg->program_start = parameter * VSH_TOKEN_SIZE;
pg->shaders_dirty = true;
break;
case NV097_SET_TRANSFORM_CONSTANT_LOAD:
assert(parameter < NV2A_VERTEXSHADER_CONSTANTS);
kelvin->constant_load_slot = parameter;
pg->constant_load_slot = parameter * 4;
NV2A_DPRINTF("load to %d\n", parameter);
break;

View File

@ -1,6 +1,7 @@
/*
* QEMU Geforce NV2A vertex shader translation
*
* Copyright (c) 2014 Jannik Vogel
* Copyright (c) 2012 espes
*
* Based on:
@ -33,52 +34,6 @@
#define VSH_D3DSCM_CORRECTION 96
#define VSH_TOKEN_SIZE 4
typedef enum {
FLD_ILU = 0,
FLD_MAC,
FLD_CONST,
FLD_V,
// Input A
FLD_A_NEG,
FLD_A_SWZ_X,
FLD_A_SWZ_Y,
FLD_A_SWZ_Z,
FLD_A_SWZ_W,
FLD_A_R,
FLD_A_MUX,
// Input B
FLD_B_NEG,
FLD_B_SWZ_X,
FLD_B_SWZ_Y,
FLD_B_SWZ_Z,
FLD_B_SWZ_W,
FLD_B_R,
FLD_B_MUX,
// Input C
FLD_C_NEG,
FLD_C_SWZ_X,
FLD_C_SWZ_Y,
FLD_C_SWZ_Z,
FLD_C_SWZ_W,
FLD_C_R_HIGH,
FLD_C_R_LOW,
FLD_C_MUX,
// Output
FLD_OUT_MAC_MASK,
FLD_OUT_R,
FLD_OUT_ILU_MASK,
FLD_OUT_O_MASK,
FLD_OUT_ORB,
FLD_OUT_ADDRESS,
FLD_OUT_MUX,
// Relative addressing
FLD_A0X,
// Final instruction
FLD_FINAL
} VshFieldName;
typedef enum {
PARAM_UNKNOWN = 0,
@ -222,7 +177,6 @@ static const VshOpcodeParams mac_opcode_params[] = {
};
static const char* mask_str[] = {
// xyzw xyzw
"", // 0000 ____
@ -240,7 +194,7 @@ static const char* mask_str[] = {
".xy", // 1100 xy__
".xyw", // 1101 xy_w
".xyz", // 1110 xyz_
""//.xyzw 1111 xyzw
".xyzw" // 1111 xyzw
};
/* Note: OpenGL seems to be case-sensitive, and requires upper-case opcodes! */
@ -265,7 +219,7 @@ static const char* ilu_opcode[] = {
"NOP",
"MOV",
"RCP",
"RCP", // Was RCC
"RCC",
"RSQ",
"EXP",
"LOG",
@ -284,7 +238,7 @@ static bool ilu_force_scalar[] = {
};
static const char* out_reg_name[] = {
"R12", // "oPos",
"oPos",
"???",
"???",
"oD0",
@ -312,7 +266,8 @@ static int vsh_get_from_token(uint32_t *shader_token,
{
return (shader_token[subtoken] >> start_bit) & ~(0xFFFFFFFF << bit_length);
}
static uint8_t vsh_get_field(uint32_t *shader_token, VshFieldName field_name)
uint8_t vsh_get_field(uint32_t *shader_token, VshFieldName field_name)
{
return (uint8_t)(vsh_get_from_token(shader_token,
@ -327,7 +282,7 @@ static int16_t convert_c_register(const int16_t c_reg)
{
int16_t r = ((((c_reg >> 5) & 7) - 3) * 32) + (c_reg & 31);
r += VSH_D3DSCM_CORRECTION; /* to map -96..95 to 0..191 */
return r;
return r; //FIXME: = c_reg?!
}
@ -341,7 +296,7 @@ static QString* decode_swizzle(uint32_t *shader_token,
/* some microcode instructions force a scalar value */
if (swizzle_field == FLD_C_SWZ_X
&& ilu_force_scalar[vsh_get_field(shader_token, FLD_ILU)]) {
x = y = z = w = x = vsh_get_field(shader_token, swizzle_field);
x = y = z = w = vsh_get_field(shader_token, swizzle_field);
} else {
x = vsh_get_field(shader_token, swizzle_field++);
y = vsh_get_field(shader_token, swizzle_field++);
@ -352,21 +307,21 @@ static QString* decode_swizzle(uint32_t *shader_token,
if (x == SWIZZLE_X && y == SWIZZLE_Y
&& z == SWIZZLE_Z && w == SWIZZLE_W) {
/* Don't print the swizzle if it's .xyzw */
return qstring_from_str("");
return qstring_from_str(""); // Will turn ".xyzw" into "."
/* Don't print duplicates */
} else if (x == y && y == z && z == w) {
return qstring_from_str((char[]){'.', swizzle_str[x], '\0'});
} else if (x == y && z == w) {
} else if (y == z && z == w) {
return qstring_from_str((char[]){'.',
swizzle_str[x], swizzle_str[y], '\0'});
} /*else if (z == w) {
} else if (z == w) {
return qstring_from_str((char[]){'.',
swizzle_str[x], swizzle_str[y], swizzle_str[z], '\0'});
}*/ else {
} else {
return qstring_from_str((char[]){'.',
swizzle_str[x], swizzle_str[y],
swizzle_str[z], swizzle_str[w],
'\0'});
'\0'}); // Normal swizzle mask
}
}
@ -400,12 +355,14 @@ static QString* decode_opcode_input(uint32_t *shader_token,
case PARAM_C:
reg_num = convert_c_register(vsh_get_field(shader_token, FLD_CONST));
if (vsh_get_field(shader_token, FLD_A0X) > 0) {
//FIXME: does this really require the "correction" doe in convert_c_register?!
snprintf(tmp, sizeof(tmp), "c[A0+%d]", reg_num);
} else {
snprintf(tmp, sizeof(tmp), "c[%d]", reg_num);
}
break;
default:
printf("Param: 0x%x\n", param);
assert(false);
}
qstring_append(ret_str, tmp);
@ -444,16 +401,18 @@ static QString* decode_opcode(uint32_t *shader_token,
if (mask > 0) {
if (strcmp(opcode, mac_opcode[MAC_ARL]) == 0) {
qstring_append(ret, opcode);
qstring_append(ret, " ARL(a0");
qstring_append(ret, qstring_get_str(inputs));
qstring_append(ret, ";\n");
} else {
qstring_append(ret, " ");
qstring_append(ret, opcode);
qstring_append(ret, " R");
qstring_append(ret, "(");
qstring_append(ret, "R");
qstring_append_int(ret, reg_num);
qstring_append(ret, mask_str[mask]);
qstring_append(ret, qstring_get_str(inputs));
qstring_append(ret, ";\n");
qstring_append(ret, ");\n");
}
}
@ -462,15 +421,17 @@ static QString* decode_opcode(uint32_t *shader_token,
/* Only if it's not masked away: */
&& vsh_get_field(shader_token, FLD_OUT_O_MASK) != 0) {
qstring_append(ret, " ");
qstring_append(ret, opcode);
qstring_append(ret, "(");
if (vsh_get_field(shader_token, FLD_OUT_ORB) == OUTPUT_C) {
/* TODO : Emulate writeable const registers */
qstring_append(ret, " c");
qstring_append(ret, "c");
qstring_append_int(ret,
convert_c_register(
vsh_get_field(shader_token, FLD_OUT_ADDRESS)));
} else {
qstring_append_chr(ret, ' ');
qstring_append(ret,
out_reg_name[
vsh_get_field(shader_token, FLD_OUT_ADDRESS) & 0xF]);
@ -479,7 +440,7 @@ static QString* decode_opcode(uint32_t *shader_token,
mask_str[
vsh_get_field(shader_token, FLD_OUT_O_MASK)]);
qstring_append(ret, qstring_get_str(inputs));
qstring_append(ret, ";\n");
qstring_append(ret, ");\n");
}
return ret;
@ -563,141 +524,285 @@ static QString* decode_token(uint32_t *shader_token)
return ret;
}
/* Vertex shader header, mapping Xbox1 registers to the ARB syntax (original
* version by KingOfC). Note about the use of 'conventional' attributes in here:
* Since we prefer to use only one shader for both immediate and deferred mode
* rendering, we alias all attributes to conventional inputs as much as possible.
* Only when there's no conventional attribute available, we use generic
* attributes. So in the following header, we use conventional attributes first,
* and generic attributes for the rest of the vertex attribute slots. This makes
* it possible to support immediate and deferred mode rendering with the same
* shader, and the use of the OpenGL fixed-function pipeline without a shader.
*/
static const char* vsh_header =
"!!ARBvp1.0\n"
"TEMP R0,R1,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12;\n"
"ADDRESS A0;\n"
"#version 110\n"
"\n"
"attribute vec4 v0;\n"
"attribute vec4 v1;\n"
"attribute vec4 v2;\n"
"attribute vec4 v3;\n"
"attribute vec4 v4;\n"
"attribute vec4 v5;\n"
"attribute vec4 v6;\n"
"attribute vec4 v7;\n"
"attribute vec4 v8;\n"
"attribute vec4 v9;\n"
"attribute vec4 v10;\n"
"attribute vec4 v11;\n"
"attribute vec4 v12;\n"
"attribute vec4 v13;\n"
"attribute vec4 v14;\n"
"attribute vec4 v15;\n"
"\n"
//FIXME: What is a0 initialized as?
"int A0 = 0;\n"
"\n"
//FIXME: I just assumed this is true for all registers?!
"vec4 R0 = vec4(0.0,0.0,0.0,1.0);\n"
"vec4 R1 = vec4(0.0,0.0,0.0,1.0);\n"
"vec4 R2 = vec4(0.0,0.0,0.0,1.0);\n"
"vec4 R3 = vec4(0.0,0.0,0.0,1.0);\n"
"vec4 R4 = vec4(0.0,0.0,0.0,1.0);\n"
"vec4 R5 = vec4(0.0,0.0,0.0,1.0);\n"
"vec4 R6 = vec4(0.0,0.0,0.0,1.0);\n"
"vec4 R7 = vec4(0.0,0.0,0.0,1.0);\n"
"vec4 R8 = vec4(0.0,0.0,0.0,1.0);\n"
"vec4 R9 = vec4(0.0,0.0,0.0,1.0);\n"
"vec4 R10 = vec4(0.0,0.0,0.0,1.0);\n"
"vec4 R11 = vec4(0.0,0.0,0.0,1.0);\n"
"vec4 R12 = vec4(0.0,0.0,0.0,1.0);\n"
"\n"
"#define oPos R12\n" /* oPos is a mirror of R12 */
"vec4 oD0 = vec4(0.0,0.0,0.0,1.0);\n"
"vec4 oD1 = vec4(0.0,0.0,0.0,1.0);\n"
"vec4 oB0 = vec4(0.0,0.0,0.0,1.0);\n"
"vec4 oB1 = vec4(0.0,0.0,0.0,1.0);\n"
"vec4 oPts = vec4(0.0,0.0,0.0,1.0);\n"
"vec4 oFog = vec4(0.0,0.0,0.0,1.0);\n"
"vec4 oT0 = vec4(0.0,0.0,0.0,1.0);\n"
"vec4 oT1 = vec4(0.0,0.0,0.0,1.0);\n"
"vec4 oT2 = vec4(0.0,0.0,0.0,1.0);\n"
"vec4 oT3 = vec4(0.0,0.0,0.0,1.0);\n"
"\n"
/* All constants in 1 array declaration */
"uniform vec4 c[192];\n"
"#define viewport_scale c[58]\n"
"#define viewport_offset c[59]\n"
"uniform vec2 cliprange;\n"
/* See:
* http://msdn.microsoft.com/en-us/library/windows/desktop/bb174703%28v=vs.85%29.aspx
* https://www.opengl.org/registry/specs/NV/vertex_program1_1.txt
*/
"/* Converts number of components of rvalue to lvalue */\n"
"float _out(float l, vec4 r) { return r.x; }\n"
"vec2 _out(vec2 l, vec4 r) { return r.xy; }\n"
"vec3 _out(vec3 l, vec4 r) { return r.xyz; }\n"
"vec4 _out(vec4 l, vec4 r) { return r.xyzw; }\n"
"\n"
//QQQ #ifdef NICE_CODE
"/* Converts the input to vec4, pads with last component */\n"
"vec4 _in(float v) { return vec4(v); }\n"
"vec4 _in(vec2 v) { return v.xyyy; }\n"
"vec4 _in(vec3 v) { return v.xyzz; }\n"
"vec4 _in(vec4 v) { return v.xyzw; }\n"
//#else
// "/* Make sure input is always a vec4 */\n"
// "#define _in(v) vec4(v)\n"
//#endif
"\n"
"#define MOV(dest, src) dest = _out(dest,_MOV(_in(src)))\n"
"vec4 _MOV(vec4 src)\n"
"{\n"
" return src;\n"
"}\n"
"\n"
"#define MUL(dest, src0, src1) dest = _out(dest,_MUL(_in(src0), _in(src1)))\n"
"vec4 _MUL(vec4 src0, vec4 src1)\n"
"{\n"
" return src0 * src1;\n"
"}\n"
"\n"
"#define ADD(dest, src0, src1) dest = _out(dest,_ADD(_in(src0), _in(src1)))\n"
"vec4 _ADD(vec4 src0, vec4 src1)\n"
"{\n"
" return src0 + src1;\n"
"}\n"
"\n"
"#define MAD(dest, src0, src1, src2) dest = _out(dest,_MAD(_in(src0), _in(src1), _in(src2)))\n"
"vec4 _MAD(vec4 src0, vec4 src1, vec4 src2)\n"
"{\n"
" return src0 * src1 + src2;\n"
"}\n"
"\n"
"#define DP3(dest, src0, src1) dest = _out(dest,_DP3(_in(src0), _in(src1)))\n"
"vec4 _DP3(vec4 src0, vec4 src1)\n"
"{\n"
" return vec4(dot(src0.xyz, src1.xyz));\n"
"}\n"
"\n"
"#define DPH(dest, src0, src1) dest = _out(dest,_DPH(_in(src0), _in(src1)))\n"
"vec4 _DPH(vec4 src0, vec4 src1)\n"
"{\n"
" return vec4(dot(vec4(src0.xyz, 1.0), src1));\n"
"}\n"
"\n"
"#define DP4(dest, src0, src1) dest = _out(dest,_DP4(_in(src0), _in(src1)))\n"
"vec4 _DP4(vec4 src0, vec4 src1)\n"
"{\n"
" return vec4(dot(src0, src1));\n"
"}\n"
"\n"
"#define DST(dest, src0, src1) dest = _out(dest,_DST(_in(src0), _in(src1)))\n"
"vec4 _DST(vec4 src0, vec4 src1)\n"
"{\n"
" return vec4(1.0,\n"
" src0.y * src1.y,\n"
" src0.z,\n"
" src1.w);\n"
"}\n"
"\n"
"#define MIN(dest, src0, src1) dest = _out(dest,_MIN(_in(src0), _in(src1)))\n"
"vec4 _MIN(vec4 src0, vec4 src1)\n"
"{\n"
" return min(src0, src1);\n"
"}\n"
"\n"
"#define MAX(dest, src0, src1) dest = _out(dest,_MAX(_in(src0), _in(src1)))\n"
"vec4 _MAX(vec4 src0, vec4 src1)\n"
"{\n"
" return max(src0, src1);\n"
"}\n"
"\n"
"#define SLT(dest, src0, src1) dest = _out(dest,_SLT(_in(src0), _in(src1)))\n"
"vec4 _SLT(vec4 src0, vec4 src1)\n"
"{\n"
" return vec4(lessThan(src0, src1));\n"
"}\n"
"\n"
"#define ARL(dest, src) dest = _ARL(_in(src).x)\n"
"int _ARL(float src)\n"
"{\n"
" return int(src);\n"
"}\n"
"\n"
"#define SGE(dest, src0, src1) dest = _out(dest,_SGE(_in(src0), _in(src1)))\n"
"vec4 _SGE(vec4 src0, vec4 src1)\n"
"{\n"
" return vec4(greaterThanEqual(src0, src1));\n"
"}\n"
"\n"
"#define RCP(dest, src) dest = _out(dest,_RCP(_in(src).x))\n"
"vec4 _RCP(float src)\n"
"{\n"
" return vec4(1.0 / src);\n"
"}\n"
"\n"
"#define RCC(dest, src) dest = _out(dest,_RCC(_in(src).x))\n"
"vec4 _RCC(float src)\n"
"{\n"
" float t = 1.0 / src;\n"
" if (t > 0.0) {\n"
" t = clamp(t, 5.42101e-020, 1.884467e+019);\n"
" } else {\n"
" t = clamp(t, -1.884467e+019, -5.42101e-020);\n"
" }\n"
" return vec4(t);\n"
"}\n"
"\n"
"#define RSQ(dest, src) dest = _out(dest,_RSQ(_in(src).x))\n"
"vec4 _RSQ(float src)\n"
"{\n"
" return vec4(inversesqrt(src));\n"
"}\n"
"\n"
"#define EXP(dest, src) dest = _out(dest,_EXP(_in(src).x))\n"
"vec4 _EXP(float src)\n"
"{\n"
" return vec4(exp2(src));\n"
"}\n"
"\n"
"#define LOG(dest, src) dest = _out(dest,_LOG(_in(src).x))\n"
"vec4 _LOG(float src)\n"
"{\n"
" return vec4(log2(src));\n"
"}\n"
"\n"
"#define LIT(dest, src) dest = _out(dest,_LIT(_in(src)))\n"
"vec4 _LIT(vec4 src)\n"
"{\n"
" vec4 t = vec4(1.0, 0.0, 0.0, 1.0);\n"
" float power = src.w;\n"
#if 0
"ATTRIB v0 = vertex.position;" // (See "conventional" note above)
"ATTRIB v1 = vertex.%s;" // Note : We replace this with "weight" or "attrib[1]" depending GL_ARB_vertex_blend
"ATTRIB v2 = vertex.normal;"
"ATTRIB v3 = vertex.color.primary;"
"ATTRIB v4 = vertex.color.secondary;"
"ATTRIB v5 = vertex.fogcoord;"
"ATTRIB v6 = vertex.attrib[6];"
"ATTRIB v7 = vertex.attrib[7];"
"ATTRIB v8 = vertex.texcoord[0];"
"ATTRIB v9 = vertex.texcoord[1];"
"ATTRIB v10 = vertex.texcoord[2];"
"ATTRIB v11 = vertex.texcoord[3];"
#else
"ATTRIB v0 = vertex.attrib[0];\n"
"ATTRIB v1 = vertex.attrib[1];\n"
"ATTRIB v2 = vertex.attrib[2];\n"
"ATTRIB v3 = vertex.attrib[3];\n"
"ATTRIB v4 = vertex.attrib[4];\n"
"ATTRIB v5 = vertex.attrib[5];\n"
"ATTRIB v6 = vertex.attrib[6];\n"
"ATTRIB v7 = vertex.attrib[7];\n"
"ATTRIB v8 = vertex.attrib[8];\n"
"ATTRIB v9 = vertex.attrib[9];\n"
"ATTRIB v10 = vertex.attrib[10];\n"
"ATTRIB v11 = vertex.attrib[11];\n"
//XXX: Limitation for 8.8 fixed point
" power = max(power, -127.9961);\n"
" power = min(power, 127.9961);\n"
#endif
"ATTRIB v12 = vertex.attrib[12];\n"
"ATTRIB v13 = vertex.attrib[13];\n"
"ATTRIB v14 = vertex.attrib[14];\n"
"ATTRIB v15 = vertex.attrib[15];\n"
"OUTPUT oPos = result.position;\n"
"OUTPUT oD0 = result.color.front.primary;\n"
"OUTPUT oD1 = result.color.front.secondary;\n"
"OUTPUT oB0 = result.color.back.primary;\n"
"OUTPUT oB1 = result.color.back.secondary;\n"
"OUTPUT oPts = result.pointsize;\n"
"OUTPUT oFog = result.fogcoord;\n"
"OUTPUT oT0 = result.texcoord[0];\n"
"OUTPUT oT1 = result.texcoord[1];\n"
"OUTPUT oT2 = result.texcoord[2];\n"
"OUTPUT oT3 = result.texcoord[3];\n"
/* All constants in 1 array declaration (requires NV_gpu_program4?) */
"PARAM c[] = { program.env[0..191] };\n"
/* w component of outputs are expected to be initialised to 1 */
"MOV R12, 0.0;\n"
"MOV R12.w, 1.0;\n"
"MOV oD0.w, 1.0;\n"
"MOV oD1.w, 1.0;\n"
"MOV oB0.w, 1.0;\n"
"MOV oB1.w, 1.0;\n"
"MOV oT0.w, 1.0;\n"
"MOV oT1.w, 1.0;\n"
"MOV oT2.w, 1.0;\n"
"MOV oT3.w, 1.0;\n";
" if (src.x > 0.0) {\n"
" t.y = src.x;\n"
" if (src.y > 0.0) {\n"
//XXX: Allowed approximation is EXP(power * LOG(src.y))
" t.z = pow(src.y, power);\n"
" }\n"
" }\n"
" return t;\n"
"}\n";
QString* vsh_translate(uint16_t version,
uint32_t *tokens, unsigned int tokens_length)
{
QString *ret = qstring_from_str(vsh_header);
QString *body = qstring_from_str("\n");
QString *header = qstring_from_str(vsh_header);
bool has_final = false;
uint32_t *cur_token = tokens;
unsigned int slot;
while (cur_token-tokens < tokens_length) {
slot = (cur_token-tokens) / VSH_TOKEN_SIZE;
QString *token_str = decode_token(cur_token);
qstring_append(ret, qstring_get_str(token_str));
qstring_append_fmt(body,
" /* Slot %d: 0x%08X 0x%08X 0x%08X 0x%08X */",
slot,
cur_token[0],cur_token[1],cur_token[2],cur_token[3]);
qstring_append(body, "\n");
qstring_append(body, qstring_get_str(token_str));
qstring_append(body, "\n");
QDECREF(token_str);
if (vsh_get_field(cur_token, FLD_FINAL)) {
has_final = true;
break;
}
cur_token += VSH_TOKEN_SIZE;
}
/* Note : Since we replaced oPos with r12 in the above decoding,
* we have to assign oPos at the end; This can be done in two ways;
* 1) When the shader is complete (including transformations),
* we could just do a 'MOV oPos, R12;' and be done with it.
* 2) In case of D3DFVF_XYZRHW, it seems the NV2A applies the mvp
* (model/view/projection) matrix transformation AFTER executing
* the shader (but OpenGL expects *the*shader* to handle this
* transformation).
* Until we can discern these two situations, we apply the matrix
* transformation :
* TODO : What should we do about normals, eye-space lighting and all that?
*/
qstring_append(ret,
/*
'# Dxbx addition : Transform the vertex to clip coordinates :'
"DP4 R0.x, mvp[0], R12;"
"DP4 R0.y, mvp[1], R12;"
"DP4 R0.z, mvp[2], R12;"
"DP4 R0.w, mvp[3], R12;"
"MOV R12, R0;"
*/
assert(has_final);
qstring_append(body,
/* the shaders leave the result in screen space, while
* opengl expects it in clip coordinates.
* Use the magic viewport constants for now,
* but they're not necessarily present.
* Same idea as above I think, but dono what the mvp stuff is about...
* but they're not necessarily present...
*/
"# un-screenspace transform\n"
"ADD R12, R12, -c[59];\n"
"RCP R1.x, c[58].x;\n"
"RCP R1.y, c[58].y;\n"
/* scale_z = view_z == 0 ? 1 : (1 / view_z) */
"ABS R1.z, c[58].z;\n"
"SGE R1.z, -R1.z, 0;\n"
"ADD R1.z, R1.z, c[58].z;\n"
"RCP R1.z, R1.z;\n"
" /* Un-screenspace transform */\n"
" oPos.xyz = oPos.xyz - viewport_offset.xyz;\n"
" vec3 tmp = vec3(1.0);\n"
"MUL R12.xyz, R12, R1;\n"
"MOV R12.w, 1.0;\n"
/* FIXME: old comment was "scale_z = view_z == 0 ? 1 : (1 / view_z)" */
" if (viewport_scale.x != 0.0) { tmp.x /= viewport_scale.x; }\n"
" if (viewport_scale.y != 0.0) { tmp.y /= viewport_scale.y; }\n"
" if (viewport_scale.z != 0.0) { tmp.z /= viewport_scale.z; }\n"
/* undo the perspective divide? */
//"MUL R12.xyz, R12, R12.w;\n"
" oPos.xyz *= tmp.xyz;\n"
" oPos.w = 1.0;\n" //This breaks 2D? Maybe w is zero?
"\n"
#if 0
//FIXME: Use surface width / height / zeta max
"R12.z /= 16777215.0;\n" // Z[0;1]
"R12.z *= (cliprange.y - cliprange.x) / 16777215.0;\n" // Scale so [0;zmax] -> [0;cliprange_size]
"R12.z -= cliprange.x / 16777215.0;\n" // Move down so [clipmin_min;clipmin_max]
// X = [0;surface_width]; Y = [surface_height;0]; Z = [0;1]; W = ???
"R12.xyz = R12.xyz / vec3(640.0,480.0,1.0);\n"
// X,Z = [0;1]; Y = [1;0]; W = ???
"R12.xyz = R12.xyz * vec3(2.0) - vec3(1.0);\n"
"R12.y *= -1.0;\n"
"R12.w = 1.0;\n"
// X,Y,Z = [-1;+1]; W = 1
"\n"
#endif
/* Z coord [0;1]->[-1;1] mapping, see comment in transform_projection
* in state.c
@ -711,9 +816,31 @@ QString* vsh_translate(uint16_t version,
//"ADD R12.z, R12.z, R12.z;\n"
//"ADD R12.z, R12.z, -R12.w;\n"
"# End of shader:\n"
"MOV oPos, R12;\n"
"END"
" /* Set outputs */\n"
" gl_Position = oPos;\n"
" gl_FrontColor = oD0;\n"
" gl_FrontSecondaryColor = oD1;\n"
" gl_BackColor = oB0;\n"
" gl_BackSecondaryColor = oB1;\n"
" gl_PointSize = oPts.x;\n"
" gl_FogFragCoord = oFog.x;\n"
" gl_TexCoord[0] = oT0;\n"
" gl_TexCoord[1] = oT1;\n"
" gl_TexCoord[2] = oT2;\n"
" gl_TexCoord[3] = oT3;\n"
"\n"
);
QString *ret = qstring_new();
qstring_append(ret, qstring_get_str(header));
qstring_append(ret,"\n"
"void main(void)\n"
"{\n");
qstring_append(ret, qstring_get_str(body));
qstring_append(ret,"}\n");
QDECREF(header);
QDECREF(body);
return ret;
}

View File

@ -36,6 +36,54 @@
// Xbox vertex read/write shader
#define VSH_VERSION_XVSW 0x7778
#define VSH_TOKEN_SIZE 4
typedef enum {
FLD_ILU = 0,
FLD_MAC,
FLD_CONST,
FLD_V,
// Input A
FLD_A_NEG,
FLD_A_SWZ_X,
FLD_A_SWZ_Y,
FLD_A_SWZ_Z,
FLD_A_SWZ_W,
FLD_A_R,
FLD_A_MUX,
// Input B
FLD_B_NEG,
FLD_B_SWZ_X,
FLD_B_SWZ_Y,
FLD_B_SWZ_Z,
FLD_B_SWZ_W,
FLD_B_R,
FLD_B_MUX,
// Input C
FLD_C_NEG,
FLD_C_SWZ_X,
FLD_C_SWZ_Y,
FLD_C_SWZ_Z,
FLD_C_SWZ_W,
FLD_C_R_HIGH,
FLD_C_R_LOW,
FLD_C_MUX,
// Output
FLD_OUT_MAC_MASK,
FLD_OUT_R,
FLD_OUT_ILU_MASK,
FLD_OUT_O_MASK,
FLD_OUT_ORB,
FLD_OUT_ADDRESS,
FLD_OUT_MUX,
// Relative addressing
FLD_A0X,
// Final instruction
FLD_FINAL
} VshFieldName;
uint8_t vsh_get_field(uint32_t *shader_token, VshFieldName field_name);
QString* vsh_translate(uint16_t version,
uint32_t *tokens, unsigned int tokens_length);

109
hw/xbox/swizzle.c Normal file
View File

@ -0,0 +1,109 @@
/*
* QEMU texture swizzling routines
*
* Copyright (c) 2013 espes
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License version 2 as published by the Free Software Foundation.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>
*
* Contributions after 2012-01-13 are licensed under the terms of the
* GNU GPL, version 2 or (at your option) any later version.
*/
#include <stdint.h>
#include <string.h>
void unswizzle_rect(
uint8_t *src_buf,
unsigned int width,
unsigned int height,
unsigned int depth,
uint8_t *dst_buf,
unsigned int pitch,
unsigned int bytes_per_pixel)
{
unsigned int offset_u = 0, offset_v = 0, offset_w = 0;
uint32_t mask_u = 0, mask_v = 0, mask_w = 0;
unsigned int i = 1, j = 1;
while( (i <= width) || (i <= height) || (i <= depth) ) {
if(i < width) {
mask_u |= j;
j<<=1;
}
if(i < height) {
mask_v |= j;
j<<=1;
}
if(i < depth) {
mask_w |= j;
j<<=1;
}
i<<=1;
}
uint32_t start_u = 0;
uint32_t start_v = 0;
uint32_t start_w = 0;
uint32_t mask_max = 0;
// get the biggest mask
if(mask_u > mask_v)
mask_max = mask_u;
else
mask_max = mask_v;
if(mask_w > mask_max)
mask_max = mask_w;
for(i = 1; i <= mask_max; i<<=1) {
if(i<=mask_u) {
if(mask_u & i) start_u |= (offset_u & i);
else offset_u <<= 1;
}
if(i <= mask_v) {
if(mask_v & i) start_v |= (offset_v & i);
else offset_v<<=1;
}
if(i <= mask_w) {
if(mask_w & i) start_w |= (offset_w & i);
else offset_w <<= 1;
}
}
uint32_t w = start_w;
unsigned int z;
for(z=0; z<depth; z++) {
uint32_t v = start_v;
unsigned int y;
for(y=0; y<height; y++) {
uint32_t u = start_u;
unsigned int x;
for (x=0; x<width; x++) {
memcpy(dst_buf,
src_buf + ( (u|v|w)*bytes_per_pixel ),
bytes_per_pixel);
dst_buf += bytes_per_pixel;
u = (u - mask_u) & mask_u;
}
dst_buf += pitch - width * bytes_per_pixel;
v = (v - mask_v) & mask_v;
}
w = (w - mask_w) & mask_w;
}
}

34
hw/xbox/swizzle.h Normal file
View File

@ -0,0 +1,34 @@
/*
* QEMU texture swizzling routines
*
* Copyright (c) 2013 espes
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License version 2 as published by the Free Software Foundation.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>
*
* Contributions after 2012-01-13 are licensed under the terms of the
* GNU GPL, version 2 or (at your option) any later version.
*/
#ifndef HW_XBOX_SWIZZLE_H
#define HW_XBOX_SWIZZLE_H
void unswizzle_rect(
uint8_t *src_buf,
unsigned int width,
unsigned int height,
unsigned int depth,
uint8_t *dst_buf,
unsigned int pitch,
unsigned int bytes_per_pixel);
#endif