GLSL shaders translating (modulo texture fetches).

This commit is contained in:
Ben Vanik 2014-12-27 16:30:54 -08:00
parent 58cff84550
commit 5b2672a1b8
7 changed files with 173 additions and 258 deletions

View File

@ -48,7 +48,7 @@ void StringBuffer::AppendVarargs(const char* format, va_list args) {
auto offset = buffer_.size();
Grow(length + 1);
buffer_.resize(buffer_.size() + length);
vsnprintf(buffer_.data() + offset, buffer_.capacity() - 1, format, args);
vsnprintf(buffer_.data() + offset, buffer_.capacity(), format, args);
buffer_[buffer_.size()] = 0;
}
@ -62,7 +62,7 @@ void StringBuffer::AppendBytes(const uint8_t* buffer, size_t length) {
const char* StringBuffer::GetString() const { return buffer_.data(); }
std::string StringBuffer::to_string() { return std::string(buffer_.data()); }
std::string StringBuffer::to_string() { return std::string(buffer_.data(), buffer_.size()); }
char* StringBuffer::ToString() { return strdup(buffer_.data()); }

View File

@ -1661,15 +1661,6 @@ bool CommandProcessor::UpdateRenderTargets(DrawCommand* draw_command) {
// TODO(benvanik): do we want this on READ too?
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, cached_framebuffer->framebuffer);
// TEST TEST TEST TEST TEST TEST TEST TEST TEST TEST
// Pretend we are drawing.
// glEnable(GL_SCISSOR_TEST);
// glScissor(100, 100, 100, 100);
// float red[] = {rand() / (float)RAND_MAX, 0, 0, 1.0f};
// glClearNamedFramebufferfv(active_framebuffer_->framebuffer, GL_COLOR, 0,
// red);
// glDisable(GL_SCISSOR_TEST);
return true;
}
@ -1691,8 +1682,7 @@ bool CommandProcessor::UpdateShaders(DrawCommand* draw_command) {
}
if (!active_pixel_shader_->has_prepared()) {
if (!active_pixel_shader_->PreparePixelShader(program_cntl,
active_vertex_shader_)) {
if (!active_pixel_shader_->PreparePixelShader(program_cntl)) {
XELOGE("Unable to prepare pixel shader");
return false;
}

View File

@ -92,22 +92,17 @@ bool GL4Shader::PrepareVertexShader(
" float gl_PointSize;\n"
" float gl_ClipDistance[];\n"
"};\n"
"layout(location = 0) in vec3 iF0;\n"
"layout(location = 1) in vec4 iF1;\n"
"layout(location = 0) out VertexData vtx;\n"
"void main() {\n"
//" vec4 oPos = vec4(iF0.xy, 0.0, 1.0);\n"
" vec4 oPos = iF0.xxxx * state->float_consts[0];\n"
" oPos = (iF0.yyyy * state->float_consts[1]) + oPos;\n"
" oPos = (iF0.zzzz * state->float_consts[2]) + oPos;\n"
" oPos = (vec4(1.0, 1.0, 1.0, 1.0) * state->float_consts[3]) + oPos;\n"
//" gl_PointSize = 1.0;\n"
"void processVertex();\n"
"void main() {\n" +
(alloc_counts().positions ? " gl_Position = vec4(0.0, 0.0, 0.0, 1.0);\n"
: "") +
(alloc_counts().point_size ? " gl_PointSize = 1.0;\n" : "") +
" for (int i = 0; i < vtx.o.length(); ++i) {\n"
" vtx.o[0] = vec4(0.0, 0.0, 0.0, 0.0);\n"
" vtx.o[i] = vec4(0.0, 0.0, 0.0, 0.0);\n"
" }\n"
" vtx.o[0] = iF1;\n"
" gl_Position = applyViewport(oPos);\n"
//" gl_Position = oPos;\n"
" processVertex();\n"
" gl_Position = applyViewport(gl_Position);\n"
"}\n";
std::string translated_source =
@ -116,6 +111,7 @@ bool GL4Shader::PrepareVertexShader(
PLOGE("Vertex shader failed translation");
return false;
}
source += translated_source;
if (!CompileProgram(source)) {
return false;
@ -126,31 +122,34 @@ bool GL4Shader::PrepareVertexShader(
}
bool GL4Shader::PreparePixelShader(
const xenos::xe_gpu_program_cntl_t& program_cntl,
GL4Shader* vertex_shader) {
const xenos::xe_gpu_program_cntl_t& program_cntl) {
if (has_prepared_) {
return is_valid_;
}
has_prepared_ = true;
std::string source = header +
std::string source =
header +
"layout(location = 0) in VertexData vtx;\n"
"layout(location = 0) out vec4 oC[4];\n"
"void processFragment();\n"
"void main() {\n"
" for (int i = 0; i < oC.length(); ++i) {\n"
" oC[i] = vec4(1.0, 0.0, 0.0, 1.0);\n"
" }\n"
" oC[0] = vtx.o[0];\n"
//" gl_FragDepth = 0.0;\n"
" oC[i] = vec4(0.0, 0.0, 0.0, 0.0);\n"
" }\n" +
(program_cntl.ps_export_depth ? " gl_FragDepth = 0.0\n" : "") +
" processFragment();\n"
"}\n";
std::string translated_source = shader_translator_.TranslatePixelShader(
this, program_cntl, vertex_shader->alloc_counts());
std::string translated_source =
shader_translator_.TranslatePixelShader(this, program_cntl);
if (translated_source.empty()) {
PLOGE("Pixel shader failed translation");
return false;
}
source += translated_source;
if (!CompileProgram(source)) {
return false;
}
@ -166,12 +165,13 @@ bool GL4Shader::CompileProgram(std::string source) {
const char* source_str = translated_disassembly_.c_str();
// Save to disk, if we asked for it.
if (FLAGS_dump_shaders.size()) {
auto base_path = FLAGS_dump_shaders.c_str();
char file_name[poly::max_path];
snprintf(file_name, poly::countof(file_name), "%s/gl4_gen_%.16llX.%s",
base_path, data_hash_,
shader_type_ == ShaderType::kVertex ? "vert" : "frag");
if (FLAGS_dump_shaders.size()) {
// Note that we put the translated source first so we get good line numbers.
FILE* f = fopen(file_name, "w");
fprintf(f, translated_disassembly_.c_str());
fprintf(f, "\n\n");
@ -190,6 +190,7 @@ bool GL4Shader::CompileProgram(std::string source) {
return false;
}
// Get error log, if we failed to link.
GLint link_status = 0;
glGetProgramiv(program_, GL_LINK_STATUS, &link_status);
if (!link_status) {
@ -205,6 +206,50 @@ bool GL4Shader::CompileProgram(std::string source) {
return false;
}
// Get program binary, if it's available.
GLint binary_length = 0;
glGetProgramiv(program_, GL_PROGRAM_BINARY_LENGTH, &binary_length);
if (binary_length) {
translated_binary_.resize(binary_length);
GLenum binary_format;
glGetProgramBinary(program_, binary_length, &binary_length, &binary_format,
translated_binary_.data());
// Append to shader dump.
if (FLAGS_dump_shaders.size()) {
// If we are on nvidia, we can find the disassembly string.
// I haven't been able to figure out from the format how to do this
// without a search like this.
const char* disasm_start = nullptr;
size_t search_offset = 0;
char* search_start = reinterpret_cast<char*>(translated_binary_.data());
while (true) {
auto p = reinterpret_cast<char*>(
memchr(translated_binary_.data() + search_offset, '!',
translated_binary_.size() - search_offset));
if (!p) {
break;
}
if (p[0] == '!' && p[1] == '!' && p[2] == 'N' && p[3] == 'V') {
disasm_start = p;
break;
}
search_offset = p - search_start;
++search_offset;
}
if (disasm_start) {
FILE* f = fopen(file_name, "a");
fprintf(f, "\n\n/*\n");
fprintf(f, disasm_start);
fprintf(f, "\n*/\n");
fclose(f);
} else {
PLOGW("Got program binary but unable to find disassembly");
}
}
}
return true;
}

View File

@ -27,8 +27,7 @@ class GL4Shader : public Shader {
GLuint program() const { return program_; }
bool PrepareVertexShader(const xenos::xe_gpu_program_cntl_t& program_cntl);
bool PreparePixelShader(const xenos::xe_gpu_program_cntl_t& program_cntl,
GL4Shader* vertex_shader);
bool PreparePixelShader(const xenos::xe_gpu_program_cntl_t& program_cntl);
protected:
bool CompileProgram(std::string source);

View File

@ -33,35 +33,27 @@ const char* GetVertexFormatTypeName(const GL4Shader::BufferDescElement& el) {
return "float";
case VertexFormat::k_16_16:
case VertexFormat::k_32_32:
if (el.is_normalized) {
return el.is_signed ? "snorm float2" : "unorm float2";
} else {
return el.is_signed ? "int2" : "uint2";
}
return el.is_signed ? "ivec2" : "uvec2";
case VertexFormat::k_16_16_FLOAT:
case VertexFormat::k_32_32_FLOAT:
return "float2";
return "vec2";
case VertexFormat::k_10_11_11:
case VertexFormat::k_11_11_10:
return "int3"; // ?
case VertexFormat::k_32_32_32_FLOAT:
return "float3";
return "vec3";
case VertexFormat::k_8_8_8_8:
case VertexFormat::k_2_10_10_10:
case VertexFormat::k_16_16_16_16:
case VertexFormat::k_32_32_32_32:
if (el.is_normalized) {
return el.is_signed ? "snorm float4" : "unorm float4";
} else {
return el.is_signed ? "int4" : "uint4";
}
return el.is_signed ? "ivec4" : "uvec4";
case VertexFormat::k_16_16_16_16_FLOAT:
case VertexFormat::k_32_32_32_32_FLOAT:
return "float4";
return "vec4";
default:
XELOGE("Unknown vertex format: %d", el.format);
assert_always();
return "float4";
return "vec4";
}
}
@ -81,45 +73,12 @@ std::string GL4ShaderTranslator::TranslateVertexShader(
GL4Shader* vertex_shader, const xe_gpu_program_cntl_t& program_cntl) {
Reset(vertex_shader);
// Add constants buffers.
// We could optimize this by only including used buffers, but the compiler
// seems to do a good job of doing this for us.
// It also does read detection, so c[512] can end up c[4] in the asm -
// instead of doing this optimization ourselves we could maybe just query
// this from the compiler.
Append(
"cbuffer float_consts : register(b0) {\n"
" float4 c[512];\n"
"};\n");
// TODO(benvanik): add bool/loop constants.
// Normal shaders only, for now.
assert_true(program_cntl.vs_export_mode == 0);
AppendTextureHeader(vertex_shader->sampler_inputs());
// Transform utilities. We adjust the output position in various ways
// as we can't do this via D3D11 APIs.
Append(
"cbuffer vs_consts : register(b3) {\n"
" float4 window;\n" // x,y,w,h
" float4 viewport_z_enable;\n" // min,(max - min),?,enabled
" float4 viewport_size;\n" // x,y,w,h
"};"
"float4 applyViewport(float4 pos) {\n"
" if (viewport_z_enable.w) {\n"
//" pos.x = (pos.x + 1) * viewport_size.z * 0.5 + viewport_size.x;\n"
//" pos.y = (1 - pos.y) * viewport_size.w * 0.5 + viewport_size.y;\n"
//" pos.z = viewport_z_enable.x + pos.z * viewport_z_enable.y;\n"
// w?
" } else {\n"
" pos.xy = pos.xy / float2(window.z / 2.0, -window.w / 2.0) + "
"float2(-1.0, 1.0);\n"
" pos.zw = float2(0.0, 1.0);\n"
" }\n"
" pos.xy += window.xy;\n"
" return pos;\n"
"}\n");
// Add vertex shader input.
Append("struct VS_INPUT {\n");
uint32_t el_index = 0;
const auto& buffer_inputs = vertex_shader->buffer_inputs();
for (uint32_t n = 0; n < buffer_inputs.count; n++) {
@ -129,55 +88,23 @@ std::string GL4ShaderTranslator::TranslateVertexShader(
const char* type_name = GetVertexFormatTypeName(el);
const auto& fetch = el.vtx_fetch;
uint32_t fetch_slot = fetch.const_index * 3 + fetch.const_index_sel;
Append(" %s vf%u_%d : XE_VF%u;\n", type_name, fetch_slot, fetch.offset,
el_index);
Append("layout(location = %d) in %s vf%u_%d;\n", el_index, type_name,
fetch_slot, fetch.offset);
el_index++;
}
}
Append("};\n");
// Add vertex shader output (pixel shader input).
const auto& alloc_counts = vertex_shader->alloc_counts();
Append("struct VS_OUTPUT {\n");
if (alloc_counts.positions) {
assert_true(alloc_counts.positions == 1);
Append(" float4 oPos : SV_POSITION;\n");
}
if (alloc_counts.params) {
Append(" float4 o[%d] : XE_O;\n", kMaxInterpolators);
}
if (alloc_counts.point_size) {
Append(" float4 oPointSize : PSIZE;\n");
}
Append("};\n");
// Vertex shader main() header.
Append(
"VS_OUTPUT main(VS_INPUT i) {\n"
" VS_OUTPUT o;\n");
// Always write position, as some shaders seem to only write certain values.
if (alloc_counts.positions) {
Append(" o.oPos = float4(0.0, 0.0, 0.0, 1.0);\n");
}
if (alloc_counts.point_size) {
Append(" o.oPointSize = float4(1.0, 0.0, 0.0, 0.0);\n");
}
// TODO(benvanik): remove this, if possible (though the compiler may be smart
// enough to do it for us).
if (alloc_counts.params) {
for (uint32_t n = 0; n < kMaxInterpolators; n++) {
Append(" o.o[%d] = float4(0.0, 0.0, 0.0, 0.0);\n", n);
}
}
Append("void processVertex() {\n");
// Add temporaries for any registers we may use.
uint32_t temp_regs = program_cntl.vs_regs + program_cntl.ps_regs;
for (uint32_t n = 0; n <= temp_regs; n++) {
Append(" float4 r%d = c[%d];\n", n, n);
Append(" vec4 r%d = state->float_consts[%d];\n", n, n);
}
Append(" float4 t;\n");
Append(" vec4 t;\n");
// Execute blocks.
const auto& execs = vertex_shader->execs();
@ -189,20 +116,12 @@ std::string GL4ShaderTranslator::TranslateVertexShader(
}
}
// main footer.
if (alloc_counts.positions) {
Append(" o.oPos = applyViewport(o.oPos);\n");
}
Append(
" return o;\n"
"};\n");
Append("}\n");
return output_.to_string();
}
std::string GL4ShaderTranslator::TranslatePixelShader(
GL4Shader* pixel_shader, const xe_gpu_program_cntl_t& program_cntl,
const GL4Shader::AllocCounts& alloc_counts) {
GL4Shader* pixel_shader, const xe_gpu_program_cntl_t& program_cntl) {
Reset(pixel_shader);
// We need an input VS to make decisions here.
@ -210,63 +129,22 @@ std::string GL4ShaderTranslator::TranslatePixelShader(
// If the same PS is used with different VS that output different amounts
// (and less than the number of required registers), things may die.
// Add constants buffers.
// We could optimize this by only including used buffers, but the compiler
// seems to do a good job of doing this for us.
// It also does read detection, so c[512] can end up c[4] in the asm -
// instead of doing this optimization ourselves we could maybe just query
// this from the compiler.
Append(
"cbuffer float_consts : register(b0) {\n"
" float4 c[512];\n"
"};\n");
// TODO(benvanik): add bool/loop constants.
AppendTextureHeader(pixel_shader->sampler_inputs());
// Add vertex shader output (pixel shader input).
Append("struct VS_OUTPUT {\n");
if (alloc_counts.positions) {
assert_true(alloc_counts.positions == 1);
Append(" float4 oPos : SV_POSITION;\n");
}
if (alloc_counts.params) {
Append(" float4 o[%d] : XE_O;\n", kMaxInterpolators);
}
Append("};\n");
// Add pixel shader output.
Append("struct PS_OUTPUT {\n");
for (uint32_t n = 0; n < alloc_counts.params; n++) {
Append(" float4 oC%d : SV_TARGET%d;\n", n, n);
if (program_cntl.ps_export_depth) {
// Is this per render-target?
Append(" float oD%d : SV_DEPTH%d;\n", n, n);
}
}
Append("};\n");
// Pixel shader main() header.
Append(
"PS_OUTPUT main(VS_OUTPUT i) {\n"
" PS_OUTPUT o;\n");
for (uint32_t n = 0; n < alloc_counts.params; n++) {
Append(" o.oC%d = float4(1.0, 0.0, 0.0, 1.0);\n", n);
}
Append("void processFragment() {\n");
// Add temporary registers.
uint32_t temp_regs = program_cntl.vs_regs + program_cntl.ps_regs;
for (uint32_t n = 0; n <= std::max(15u, temp_regs); n++) {
Append(" float4 r%d = c[%d];\n", n, n + 256);
Append(" vec4 r%d = state->float_consts[%d];\n", n, n + 256);
}
Append(" float4 t;\n");
Append(" vec4 t;\n");
Append(" float s;\n"); // scalar result (used for RETAIN_PREV)
// Bring registers local.
if (alloc_counts.params) {
for (uint32_t n = 0; n < kMaxInterpolators; n++) {
Append(" r%d = i.o[%d];\n", n, n);
}
Append(" r%d = vtx.o[%d];\n", n, n);
}
// Execute blocks.
@ -279,11 +157,7 @@ std::string GL4ShaderTranslator::TranslatePixelShader(
}
}
// main footer.
Append(
" return o;\n"
"}\n");
Append("}\n");
return output_.to_string();
}
@ -343,7 +217,7 @@ void GL4ShaderTranslator::AppendSrcReg(uint32_t num, uint32_t type,
if (abs_constants) {
Append("abs(");
}
Append("c[%u]", is_pixel_shader() ? num + 256 : num);
Append("state->float_consts[%u]", is_pixel_shader() ? num + 256 : num);
if (abs_constants) {
Append(")");
}
@ -367,14 +241,14 @@ void GL4ShaderTranslator::AppendDestRegName(uint32_t num, uint32_t dst_exp) {
case ShaderType::kVertex:
switch (num) {
case 62:
Append("o.oPos");
Append("gl_Position");
break;
case 63:
Append("o.oPointSize");
Append("gl_PointSize");
break;
default:
// Varying.
Append("o.o[%u]", num);
Append("vtx.o[%u]", num);
;
break;
}
@ -382,7 +256,7 @@ void GL4ShaderTranslator::AppendDestRegName(uint32_t num, uint32_t dst_exp) {
case ShaderType::kPixel:
switch (num) {
case 0:
Append("o.oC0");
Append("oC[0]");
break;
default:
// TODO(benvanik): other render targets?
@ -412,7 +286,7 @@ void GL4ShaderTranslator::AppendDestRegPost(uint32_t num, uint32_t mask,
// Masking.
Append(" ");
AppendDestRegName(num, dst_exp);
Append(" = float4(");
Append(" = vec4(");
for (int i = 0; i < 4; i++) {
// TODO(benvanik): mask out values? mix in old value as temp?
// Append("%c", (mask & 0x1) ? chan_names[i] : 'w');
@ -487,6 +361,9 @@ void GL4ShaderTranslator::PrintExportComment(uint32_t num) {
case 63:
name = "gl_PointSize";
break;
default:
name = "??";
break;
}
break;
case ShaderType::kPixel:
@ -494,6 +371,9 @@ void GL4ShaderTranslator::PrintExportComment(uint32_t num) {
case 0:
name = "gl_FragColor";
break;
default:
name = "??";
break;
}
break;
}
@ -509,7 +389,7 @@ bool GL4ShaderTranslator::TranslateALU_ADDv(const instr_alu_t& alu) {
AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data);
Append(" = ");
if (alu.vector_clamp) {
Append("saturate(");
Append("clamp(");
}
Append("(");
AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate,
@ -530,7 +410,7 @@ bool GL4ShaderTranslator::TranslateALU_MULv(const instr_alu_t& alu) {
AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data);
Append(" = ");
if (alu.vector_clamp) {
Append("saturate(");
Append("clamp(");
}
Append("(");
AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate,
@ -540,7 +420,7 @@ bool GL4ShaderTranslator::TranslateALU_MULv(const instr_alu_t& alu) {
alu.abs_constants);
Append(")");
if (alu.vector_clamp) {
Append(")");
Append(", 0.0, 1.0)");
}
Append(";\n");
AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data);
@ -551,7 +431,7 @@ bool GL4ShaderTranslator::TranslateALU_MAXv(const instr_alu_t& alu) {
AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data);
Append(" = ");
if (alu.vector_clamp) {
Append("saturate(");
Append("clamp(");
}
if (alu.src1_reg == alu.src2_reg && alu.src1_sel == alu.src2_sel &&
alu.src1_swiz == alu.src2_swiz &&
@ -569,7 +449,7 @@ bool GL4ShaderTranslator::TranslateALU_MAXv(const instr_alu_t& alu) {
Append(")");
}
if (alu.vector_clamp) {
Append(")");
Append(", 0.0, 1.0)");
}
Append(";\n");
AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data);
@ -580,7 +460,7 @@ bool GL4ShaderTranslator::TranslateALU_MINv(const instr_alu_t& alu) {
AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data);
Append(" = ");
if (alu.vector_clamp) {
Append("saturate(");
Append("clamp(");
}
Append("min(");
AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate,
@ -590,7 +470,7 @@ bool GL4ShaderTranslator::TranslateALU_MINv(const instr_alu_t& alu) {
alu.abs_constants);
Append(")");
if (alu.vector_clamp) {
Append(")");
Append(", 0.0, 1.0)");
}
Append(";\n");
AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data);
@ -602,9 +482,9 @@ bool GL4ShaderTranslator::TranslateALU_SETXXv(const instr_alu_t& alu,
AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data);
Append(" = ");
if (alu.vector_clamp) {
Append("saturate(");
Append("clamp(");
}
Append("float4((");
Append("vec4((");
AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate,
alu.abs_constants);
Append(").x %s (", op);
@ -630,7 +510,7 @@ bool GL4ShaderTranslator::TranslateALU_SETXXv(const instr_alu_t& alu,
alu.abs_constants);
Append(").w ? 1.0 : 0.0)");
if (alu.vector_clamp) {
Append(")");
Append(", 0.0, 1.0)");
}
Append(";\n");
AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data);
@ -653,14 +533,14 @@ bool GL4ShaderTranslator::TranslateALU_FRACv(const instr_alu_t& alu) {
AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data);
Append(" = ");
if (alu.vector_clamp) {
Append("saturate(");
Append("clamp(");
}
Append("frac(");
AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate,
alu.abs_constants);
Append(")");
if (alu.vector_clamp) {
Append(")");
Append(", 0.0, 1.0)");
}
Append(";\n");
AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data);
@ -671,14 +551,14 @@ bool GL4ShaderTranslator::TranslateALU_TRUNCv(const instr_alu_t& alu) {
AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data);
Append(" = ");
if (alu.vector_clamp) {
Append("saturate(");
Append("clamp(");
}
Append("trunc(");
AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate,
alu.abs_constants);
Append(")");
if (alu.vector_clamp) {
Append(")");
Append(", 0.0, 1.0)");
}
Append(";\n");
AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data);
@ -689,14 +569,14 @@ bool GL4ShaderTranslator::TranslateALU_FLOORv(const instr_alu_t& alu) {
AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data);
Append(" = ");
if (alu.vector_clamp) {
Append("saturate(");
Append("clamp(");
}
Append("floor(");
AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate,
alu.abs_constants);
Append(")");
if (alu.vector_clamp) {
Append(")");
Append(", 0.0, 1.0)");
}
Append(";\n");
AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data);
@ -707,20 +587,19 @@ bool GL4ShaderTranslator::TranslateALU_MULADDv(const instr_alu_t& alu) {
AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data);
Append(" = ");
if (alu.vector_clamp) {
Append("saturate(");
Append("clamp(");
}
Append("mad(");
Append("(");
AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate,
alu.abs_constants);
Append(", ");
Append(" * ");
AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate,
alu.abs_constants);
Append(", ");
Append(") + ");
AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate,
alu.abs_constants);
Append(")");
if (alu.vector_clamp) {
Append(")");
Append(", 0.0, 1.0)");
}
Append(";\n");
AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data);
@ -732,11 +611,11 @@ bool GL4ShaderTranslator::TranslateALU_CNDXXv(const instr_alu_t& alu,
AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data);
Append(" = ");
if (alu.vector_clamp) {
Append("saturate(");
Append("clamp(");
}
// TODO(benvanik): check argument order - could be 3 as compare and 1 and 2 as
// values.
Append("float4((");
Append("vec4((");
AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate,
alu.abs_constants);
Append(").x %s 0.0 ? (", op);
@ -774,7 +653,7 @@ bool GL4ShaderTranslator::TranslateALU_CNDXXv(const instr_alu_t& alu,
alu.abs_constants);
Append(").w)");
if (alu.vector_clamp) {
Append(")");
Append(", 0.0, 1.0)");
}
Append(";\n");
AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data);
@ -794,7 +673,7 @@ bool GL4ShaderTranslator::TranslateALU_DOT4v(const instr_alu_t& alu) {
AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data);
Append(" = ");
if (alu.vector_clamp) {
Append("saturate(");
Append("clamp(");
}
Append("dot(");
AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate,
@ -804,7 +683,7 @@ bool GL4ShaderTranslator::TranslateALU_DOT4v(const instr_alu_t& alu) {
alu.abs_constants);
Append(")");
if (alu.vector_clamp) {
Append(")");
Append(", 0.0, 1.0)");
}
Append(";\n");
AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data);
@ -815,17 +694,17 @@ bool GL4ShaderTranslator::TranslateALU_DOT3v(const instr_alu_t& alu) {
AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data);
Append(" = ");
if (alu.vector_clamp) {
Append("saturate(");
Append("clamp(");
}
Append("dot(float4(");
Append("dot(vec4(");
AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate,
alu.abs_constants);
Append(").xyz, float4(");
Append(").xyz, vec4(");
AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate,
alu.abs_constants);
Append(").xyz)");
if (alu.vector_clamp) {
Append(")");
Append(", 0.0, 1.0)");
}
Append(";\n");
AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data);
@ -836,12 +715,12 @@ bool GL4ShaderTranslator::TranslateALU_DOT2ADDv(const instr_alu_t& alu) {
AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data);
Append(" = ");
if (alu.vector_clamp) {
Append("saturate(");
Append("clamp(");
}
Append("dot(float4(");
Append("dot(vec4(");
AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate,
alu.abs_constants);
Append(").xy, float4(");
Append(").xy, vec4(");
AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate,
alu.abs_constants);
Append(").xy) + ");
@ -849,7 +728,7 @@ bool GL4ShaderTranslator::TranslateALU_DOT2ADDv(const instr_alu_t& alu) {
alu.abs_constants);
Append(".x");
if (alu.vector_clamp) {
Append(")");
Append(", 0.0, 1.0)");
}
Append(";\n");
AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data);
@ -862,7 +741,7 @@ bool GL4ShaderTranslator::TranslateALU_MAX4v(const instr_alu_t& alu) {
AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data);
Append(" = ");
if (alu.vector_clamp) {
Append("saturate(");
Append("clamp(");
}
Append("max(");
Append("max(");
@ -880,7 +759,7 @@ bool GL4ShaderTranslator::TranslateALU_MAX4v(const instr_alu_t& alu) {
alu.abs_constants);
Append(".w)");
if (alu.vector_clamp) {
Append(")");
Append(", 0.0, 1.0)");
}
Append(";\n");
AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data);
@ -894,7 +773,7 @@ bool GL4ShaderTranslator::TranslateALU_MAXs(const instr_alu_t& alu) {
alu.export_data);
Append(" = ");
if (alu.scalar_clamp) {
Append("saturate(");
Append("clamp(");
}
if ((alu.src3_swiz & 0x3) == (((alu.src3_swiz >> 2) + 1) & 0x3)) {
// This is a mov.
@ -910,7 +789,7 @@ bool GL4ShaderTranslator::TranslateALU_MAXs(const instr_alu_t& alu) {
Append(".y).xxxx");
}
if (alu.scalar_clamp) {
Append(")");
Append(", 0.0, 1.0)");
}
Append(";\n");
AppendDestRegPost(get_alu_scalar_dest(alu), alu.scalar_write_mask,
@ -923,7 +802,7 @@ bool GL4ShaderTranslator::TranslateALU_MINs(const instr_alu_t& alu) {
alu.export_data);
Append(" = ");
if (alu.scalar_clamp) {
Append("saturate(");
Append("clamp(");
}
Append("min(");
AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate,
@ -933,7 +812,7 @@ bool GL4ShaderTranslator::TranslateALU_MINs(const instr_alu_t& alu) {
alu.abs_constants);
Append(".y).xxxx");
if (alu.scalar_clamp) {
Append(")");
Append(", 0.0, 1.0)");
}
Append(";\n");
AppendDestRegPost(get_alu_scalar_dest(alu), alu.scalar_write_mask,
@ -947,14 +826,14 @@ bool GL4ShaderTranslator::TranslateALU_SETXXs(const instr_alu_t& alu,
alu.export_data);
Append(" = ");
if (alu.scalar_clamp) {
Append("saturate(");
Append("clamp(");
}
Append("((");
AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate,
alu.abs_constants);
Append(".x %s 0.0) ? 1.0 : 0.0).xxxx", op);
if (alu.scalar_clamp) {
Append(")");
Append(", 0.0, 1.0)");
}
Append(";\n");
AppendDestRegPost(get_alu_scalar_dest(alu), alu.scalar_write_mask,
@ -979,14 +858,14 @@ bool GL4ShaderTranslator::TranslateALU_RECIP_IEEE(const instr_alu_t& alu) {
alu.export_data);
Append(" = ");
if (alu.scalar_clamp) {
Append("saturate(");
Append("clamp(");
}
Append("(1.0 / ");
AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate,
alu.abs_constants);
Append(")");
if (alu.scalar_clamp) {
Append(")");
Append(", 0.0, 1.0)");
}
Append(";\n");
AppendDestRegPost(get_alu_scalar_dest(alu), alu.scalar_write_mask,
@ -999,7 +878,7 @@ bool GL4ShaderTranslator::TranslateALU_MUL_CONST_0(const instr_alu_t& alu) {
alu.export_data);
Append(" = ");
if (alu.scalar_clamp) {
Append("saturate(");
Append("clamp(");
}
uint32_t src3_swiz = alu.src3_swiz & ~0x3C;
uint32_t swiz_a = ((src3_swiz >> 6) - 1) & 0x3;
@ -1013,7 +892,7 @@ bool GL4ShaderTranslator::TranslateALU_MUL_CONST_0(const instr_alu_t& alu) {
Append(".%c", chan_names[swiz_b]);
Append(").xxxx");
if (alu.scalar_clamp) {
Append(")");
Append(", 0.0, 1.0)");
}
Append(";\n");
AppendDestRegPost(get_alu_scalar_dest(alu), alu.scalar_write_mask,
@ -1029,7 +908,7 @@ bool GL4ShaderTranslator::TranslateALU_ADD_CONST_0(const instr_alu_t& alu) {
alu.export_data);
Append(" = ");
if (alu.scalar_clamp) {
Append("saturate(");
Append("clamp(");
}
uint32_t src3_swiz = alu.src3_swiz & ~0x3C;
uint32_t swiz_a = ((src3_swiz >> 6) - 1) & 0x3;
@ -1043,7 +922,7 @@ bool GL4ShaderTranslator::TranslateALU_ADD_CONST_0(const instr_alu_t& alu) {
Append(".%c", chan_names[swiz_b]);
Append(").xxxx");
if (alu.scalar_clamp) {
Append(")");
Append(", 0.0, 1.0)");
}
Append(";\n");
AppendDestRegPost(get_alu_scalar_dest(alu), alu.scalar_write_mask,
@ -1059,7 +938,7 @@ bool GL4ShaderTranslator::TranslateALU_SUB_CONST_0(const instr_alu_t& alu) {
alu.export_data);
Append(" = ");
if (alu.scalar_clamp) {
Append("saturate(");
Append("clamp(");
}
uint32_t src3_swiz = alu.src3_swiz & ~0x3C;
uint32_t swiz_a = ((src3_swiz >> 6) - 1) & 0x3;
@ -1073,7 +952,7 @@ bool GL4ShaderTranslator::TranslateALU_SUB_CONST_0(const instr_alu_t& alu) {
Append(".%c", chan_names[swiz_b]);
Append(").xxxx");
if (alu.scalar_clamp) {
Append(")");
Append(", 0.0, 1.0)");
}
Append(";\n");
AppendDestRegPost(get_alu_scalar_dest(alu), alu.scalar_write_mask,
@ -1491,10 +1370,10 @@ bool GL4ShaderTranslator::TranslateVertexFetch(const instr_fetch_vtx_t* vtx,
// Translate.
Append(" ");
Append("r%u.xyzw", vtx->dst_reg);
Append(" = float4(");
Append(" = vec4(");
uint32_t fetch_slot = vtx->const_index * 3 + vtx->const_index_sel;
// TODO(benvanik): detect xyzw = xyzw, etc.
// TODO(benvanik): detect and set as rN = float4(samp.xyz, 1.0); / etc
// TODO(benvanik): detect and set as rN = vec4(samp.xyz, 1.0); / etc
uint32_t component_count =
GetVertexFormatComponentCount(static_cast<VertexFormat>(vtx->format));
uint32_t dst_swiz = vtx->dst_swiz;
@ -1509,8 +1388,7 @@ bool GL4ShaderTranslator::TranslateVertexFetch(const instr_fetch_vtx_t* vtx,
} else if ((dst_swiz & 0x7) == 7) {
Append("r%u.%c", vtx->dst_reg, chan_names[i]);
} else {
Append("i.vf%u_%d.%c", fetch_slot, vtx->offset,
chan_names[dst_swiz & 0x3]);
Append("vf%u_%d.%c", fetch_slot, vtx->offset, chan_names[dst_swiz & 0x3]);
}
if (i < 3) {
Append(", ");
@ -1633,7 +1511,7 @@ bool GL4ShaderTranslator::TranslateTextureFetch(const instr_fetch_tex_t* tex,
}
Append(");\n");
Append(" r%u.xyzw = float4(", tex->dst_reg);
Append(" r%u.xyzw = vec4(", tex->dst_reg);
uint32_t dst_swiz = tex->dst_swiz;
for (int i = 0; i < 4; i++) {
if (i) {

View File

@ -34,8 +34,8 @@ class GL4ShaderTranslator {
GL4Shader* vertex_shader,
const xenos::xe_gpu_program_cntl_t& program_cntl);
std::string TranslatePixelShader(
GL4Shader* pixel_shader, const xenos::xe_gpu_program_cntl_t& program_cntl,
const GL4Shader::AllocCounts& alloc_counts);
GL4Shader* pixel_shader,
const xenos::xe_gpu_program_cntl_t& program_cntl);
protected:
ShaderType shader_type_;

View File

@ -11,6 +11,7 @@
#define XENIA_GPU_SHADER_H_
#include <string>
#include <vector>
#include <xenia/gpu/ucode.h>
#include <xenia/gpu/xenos.h>
@ -29,6 +30,7 @@ class Shader {
const std::string& translated_disassembly() const {
return translated_disassembly_;
}
const std::vector<uint8_t> translated_binary() { return translated_binary_; }
const uint32_t* data() const { return data_.data(); }
@ -93,6 +95,7 @@ class Shader {
std::string ucode_disassembly_;
std::string translated_disassembly_;
std::vector<uint8_t> translated_binary_;
std::string error_log_;
AllocCounts alloc_counts_;