GLSL shaders translating (modulo texture fetches).

This commit is contained in:
Ben Vanik 2014-12-27 16:30:54 -08:00
parent 58cff84550
commit 5b2672a1b8
7 changed files with 173 additions and 258 deletions

View File

@ -48,7 +48,7 @@ void StringBuffer::AppendVarargs(const char* format, va_list args) {
auto offset = buffer_.size(); auto offset = buffer_.size();
Grow(length + 1); Grow(length + 1);
buffer_.resize(buffer_.size() + length); buffer_.resize(buffer_.size() + length);
vsnprintf(buffer_.data() + offset, buffer_.capacity() - 1, format, args); vsnprintf(buffer_.data() + offset, buffer_.capacity(), format, args);
buffer_[buffer_.size()] = 0; buffer_[buffer_.size()] = 0;
} }
@ -62,7 +62,7 @@ void StringBuffer::AppendBytes(const uint8_t* buffer, size_t length) {
const char* StringBuffer::GetString() const { return buffer_.data(); } const char* StringBuffer::GetString() const { return buffer_.data(); }
std::string StringBuffer::to_string() { return std::string(buffer_.data()); } std::string StringBuffer::to_string() { return std::string(buffer_.data(), buffer_.size()); }
char* StringBuffer::ToString() { return strdup(buffer_.data()); } char* StringBuffer::ToString() { return strdup(buffer_.data()); }

View File

@ -1661,15 +1661,6 @@ bool CommandProcessor::UpdateRenderTargets(DrawCommand* draw_command) {
// TODO(benvanik): do we want this on READ too? // TODO(benvanik): do we want this on READ too?
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, cached_framebuffer->framebuffer); glBindFramebuffer(GL_DRAW_FRAMEBUFFER, cached_framebuffer->framebuffer);
// TEST TEST TEST TEST TEST TEST TEST TEST TEST TEST
// Pretend we are drawing.
// glEnable(GL_SCISSOR_TEST);
// glScissor(100, 100, 100, 100);
// float red[] = {rand() / (float)RAND_MAX, 0, 0, 1.0f};
// glClearNamedFramebufferfv(active_framebuffer_->framebuffer, GL_COLOR, 0,
// red);
// glDisable(GL_SCISSOR_TEST);
return true; return true;
} }
@ -1691,8 +1682,7 @@ bool CommandProcessor::UpdateShaders(DrawCommand* draw_command) {
} }
if (!active_pixel_shader_->has_prepared()) { if (!active_pixel_shader_->has_prepared()) {
if (!active_pixel_shader_->PreparePixelShader(program_cntl, if (!active_pixel_shader_->PreparePixelShader(program_cntl)) {
active_vertex_shader_)) {
XELOGE("Unable to prepare pixel shader"); XELOGE("Unable to prepare pixel shader");
return false; return false;
} }

View File

@ -92,22 +92,17 @@ bool GL4Shader::PrepareVertexShader(
" float gl_PointSize;\n" " float gl_PointSize;\n"
" float gl_ClipDistance[];\n" " float gl_ClipDistance[];\n"
"};\n" "};\n"
"layout(location = 0) in vec3 iF0;\n"
"layout(location = 1) in vec4 iF1;\n"
"layout(location = 0) out VertexData vtx;\n" "layout(location = 0) out VertexData vtx;\n"
"void main() {\n" "void processVertex();\n"
//" vec4 oPos = vec4(iF0.xy, 0.0, 1.0);\n" "void main() {\n" +
" vec4 oPos = iF0.xxxx * state->float_consts[0];\n" (alloc_counts().positions ? " gl_Position = vec4(0.0, 0.0, 0.0, 1.0);\n"
" oPos = (iF0.yyyy * state->float_consts[1]) + oPos;\n" : "") +
" oPos = (iF0.zzzz * state->float_consts[2]) + oPos;\n" (alloc_counts().point_size ? " gl_PointSize = 1.0;\n" : "") +
" oPos = (vec4(1.0, 1.0, 1.0, 1.0) * state->float_consts[3]) + oPos;\n"
//" gl_PointSize = 1.0;\n"
" for (int i = 0; i < vtx.o.length(); ++i) {\n" " for (int i = 0; i < vtx.o.length(); ++i) {\n"
" vtx.o[0] = vec4(0.0, 0.0, 0.0, 0.0);\n" " vtx.o[i] = vec4(0.0, 0.0, 0.0, 0.0);\n"
" }\n" " }\n"
" vtx.o[0] = iF1;\n" " processVertex();\n"
" gl_Position = applyViewport(oPos);\n" " gl_Position = applyViewport(gl_Position);\n"
//" gl_Position = oPos;\n"
"}\n"; "}\n";
std::string translated_source = std::string translated_source =
@ -116,6 +111,7 @@ bool GL4Shader::PrepareVertexShader(
PLOGE("Vertex shader failed translation"); PLOGE("Vertex shader failed translation");
return false; return false;
} }
source += translated_source;
if (!CompileProgram(source)) { if (!CompileProgram(source)) {
return false; return false;
@ -126,31 +122,34 @@ bool GL4Shader::PrepareVertexShader(
} }
bool GL4Shader::PreparePixelShader( bool GL4Shader::PreparePixelShader(
const xenos::xe_gpu_program_cntl_t& program_cntl, const xenos::xe_gpu_program_cntl_t& program_cntl) {
GL4Shader* vertex_shader) {
if (has_prepared_) { if (has_prepared_) {
return is_valid_; return is_valid_;
} }
has_prepared_ = true; has_prepared_ = true;
std::string source = header + std::string source =
"layout(location = 0) in VertexData vtx;\n" header +
"layout(location = 0) out vec4 oC[4];\n" "layout(location = 0) in VertexData vtx;\n"
"void main() {\n" "layout(location = 0) out vec4 oC[4];\n"
" for (int i = 0; i < oC.length(); ++i) {\n" "void processFragment();\n"
" oC[i] = vec4(1.0, 0.0, 0.0, 1.0);\n" "void main() {\n"
" }\n" " for (int i = 0; i < oC.length(); ++i) {\n"
" oC[0] = vtx.o[0];\n" " oC[i] = vec4(0.0, 0.0, 0.0, 0.0);\n"
//" gl_FragDepth = 0.0;\n" " }\n" +
"}\n"; (program_cntl.ps_export_depth ? " gl_FragDepth = 0.0\n" : "") +
" processFragment();\n"
"}\n";
std::string translated_source = shader_translator_.TranslatePixelShader( std::string translated_source =
this, program_cntl, vertex_shader->alloc_counts()); shader_translator_.TranslatePixelShader(this, program_cntl);
if (translated_source.empty()) { if (translated_source.empty()) {
PLOGE("Pixel shader failed translation"); PLOGE("Pixel shader failed translation");
return false; return false;
} }
source += translated_source;
if (!CompileProgram(source)) { if (!CompileProgram(source)) {
return false; return false;
} }
@ -166,12 +165,13 @@ bool GL4Shader::CompileProgram(std::string source) {
const char* source_str = translated_disassembly_.c_str(); const char* source_str = translated_disassembly_.c_str();
// Save to disk, if we asked for it. // Save to disk, if we asked for it.
auto base_path = FLAGS_dump_shaders.c_str();
char file_name[poly::max_path];
snprintf(file_name, poly::countof(file_name), "%s/gl4_gen_%.16llX.%s",
base_path, data_hash_,
shader_type_ == ShaderType::kVertex ? "vert" : "frag");
if (FLAGS_dump_shaders.size()) { if (FLAGS_dump_shaders.size()) {
auto base_path = FLAGS_dump_shaders.c_str(); // Note that we put the translated source first so we get good line numbers.
char file_name[poly::max_path];
snprintf(file_name, poly::countof(file_name), "%s/gl4_gen_%.16llX.%s",
base_path, data_hash_,
shader_type_ == ShaderType::kVertex ? "vert" : "frag");
FILE* f = fopen(file_name, "w"); FILE* f = fopen(file_name, "w");
fprintf(f, translated_disassembly_.c_str()); fprintf(f, translated_disassembly_.c_str());
fprintf(f, "\n\n"); fprintf(f, "\n\n");
@ -190,6 +190,7 @@ bool GL4Shader::CompileProgram(std::string source) {
return false; return false;
} }
// Get error log, if we failed to link.
GLint link_status = 0; GLint link_status = 0;
glGetProgramiv(program_, GL_LINK_STATUS, &link_status); glGetProgramiv(program_, GL_LINK_STATUS, &link_status);
if (!link_status) { if (!link_status) {
@ -205,6 +206,50 @@ bool GL4Shader::CompileProgram(std::string source) {
return false; return false;
} }
// Get program binary, if it's available.
GLint binary_length = 0;
glGetProgramiv(program_, GL_PROGRAM_BINARY_LENGTH, &binary_length);
if (binary_length) {
translated_binary_.resize(binary_length);
GLenum binary_format;
glGetProgramBinary(program_, binary_length, &binary_length, &binary_format,
translated_binary_.data());
// Append to shader dump.
if (FLAGS_dump_shaders.size()) {
// If we are on nvidia, we can find the disassembly string.
// I haven't been able to figure out from the format how to do this
// without a search like this.
const char* disasm_start = nullptr;
size_t search_offset = 0;
char* search_start = reinterpret_cast<char*>(translated_binary_.data());
while (true) {
auto p = reinterpret_cast<char*>(
memchr(translated_binary_.data() + search_offset, '!',
translated_binary_.size() - search_offset));
if (!p) {
break;
}
if (p[0] == '!' && p[1] == '!' && p[2] == 'N' && p[3] == 'V') {
disasm_start = p;
break;
}
search_offset = p - search_start;
++search_offset;
}
if (disasm_start) {
FILE* f = fopen(file_name, "a");
fprintf(f, "\n\n/*\n");
fprintf(f, disasm_start);
fprintf(f, "\n*/\n");
fclose(f);
} else {
PLOGW("Got program binary but unable to find disassembly");
}
}
}
return true; return true;
} }

View File

@ -27,8 +27,7 @@ class GL4Shader : public Shader {
GLuint program() const { return program_; } GLuint program() const { return program_; }
bool PrepareVertexShader(const xenos::xe_gpu_program_cntl_t& program_cntl); bool PrepareVertexShader(const xenos::xe_gpu_program_cntl_t& program_cntl);
bool PreparePixelShader(const xenos::xe_gpu_program_cntl_t& program_cntl, bool PreparePixelShader(const xenos::xe_gpu_program_cntl_t& program_cntl);
GL4Shader* vertex_shader);
protected: protected:
bool CompileProgram(std::string source); bool CompileProgram(std::string source);

View File

@ -33,35 +33,27 @@ const char* GetVertexFormatTypeName(const GL4Shader::BufferDescElement& el) {
return "float"; return "float";
case VertexFormat::k_16_16: case VertexFormat::k_16_16:
case VertexFormat::k_32_32: case VertexFormat::k_32_32:
if (el.is_normalized) { return el.is_signed ? "ivec2" : "uvec2";
return el.is_signed ? "snorm float2" : "unorm float2";
} else {
return el.is_signed ? "int2" : "uint2";
}
case VertexFormat::k_16_16_FLOAT: case VertexFormat::k_16_16_FLOAT:
case VertexFormat::k_32_32_FLOAT: case VertexFormat::k_32_32_FLOAT:
return "float2"; return "vec2";
case VertexFormat::k_10_11_11: case VertexFormat::k_10_11_11:
case VertexFormat::k_11_11_10: case VertexFormat::k_11_11_10:
return "int3"; // ? return "int3"; // ?
case VertexFormat::k_32_32_32_FLOAT: case VertexFormat::k_32_32_32_FLOAT:
return "float3"; return "vec3";
case VertexFormat::k_8_8_8_8: case VertexFormat::k_8_8_8_8:
case VertexFormat::k_2_10_10_10: case VertexFormat::k_2_10_10_10:
case VertexFormat::k_16_16_16_16: case VertexFormat::k_16_16_16_16:
case VertexFormat::k_32_32_32_32: case VertexFormat::k_32_32_32_32:
if (el.is_normalized) { return el.is_signed ? "ivec4" : "uvec4";
return el.is_signed ? "snorm float4" : "unorm float4";
} else {
return el.is_signed ? "int4" : "uint4";
}
case VertexFormat::k_16_16_16_16_FLOAT: case VertexFormat::k_16_16_16_16_FLOAT:
case VertexFormat::k_32_32_32_32_FLOAT: case VertexFormat::k_32_32_32_32_FLOAT:
return "float4"; return "vec4";
default: default:
XELOGE("Unknown vertex format: %d", el.format); XELOGE("Unknown vertex format: %d", el.format);
assert_always(); assert_always();
return "float4"; return "vec4";
} }
} }
@ -81,45 +73,12 @@ std::string GL4ShaderTranslator::TranslateVertexShader(
GL4Shader* vertex_shader, const xe_gpu_program_cntl_t& program_cntl) { GL4Shader* vertex_shader, const xe_gpu_program_cntl_t& program_cntl) {
Reset(vertex_shader); Reset(vertex_shader);
// Add constants buffers. // Normal shaders only, for now.
// We could optimize this by only including used buffers, but the compiler assert_true(program_cntl.vs_export_mode == 0);
// seems to do a good job of doing this for us.
// It also does read detection, so c[512] can end up c[4] in the asm -
// instead of doing this optimization ourselves we could maybe just query
// this from the compiler.
Append(
"cbuffer float_consts : register(b0) {\n"
" float4 c[512];\n"
"};\n");
// TODO(benvanik): add bool/loop constants.
AppendTextureHeader(vertex_shader->sampler_inputs()); AppendTextureHeader(vertex_shader->sampler_inputs());
// Transform utilities. We adjust the output position in various ways
// as we can't do this via D3D11 APIs.
Append(
"cbuffer vs_consts : register(b3) {\n"
" float4 window;\n" // x,y,w,h
" float4 viewport_z_enable;\n" // min,(max - min),?,enabled
" float4 viewport_size;\n" // x,y,w,h
"};"
"float4 applyViewport(float4 pos) {\n"
" if (viewport_z_enable.w) {\n"
//" pos.x = (pos.x + 1) * viewport_size.z * 0.5 + viewport_size.x;\n"
//" pos.y = (1 - pos.y) * viewport_size.w * 0.5 + viewport_size.y;\n"
//" pos.z = viewport_z_enable.x + pos.z * viewport_z_enable.y;\n"
// w?
" } else {\n"
" pos.xy = pos.xy / float2(window.z / 2.0, -window.w / 2.0) + "
"float2(-1.0, 1.0);\n"
" pos.zw = float2(0.0, 1.0);\n"
" }\n"
" pos.xy += window.xy;\n"
" return pos;\n"
"}\n");
// Add vertex shader input. // Add vertex shader input.
Append("struct VS_INPUT {\n");
uint32_t el_index = 0; uint32_t el_index = 0;
const auto& buffer_inputs = vertex_shader->buffer_inputs(); const auto& buffer_inputs = vertex_shader->buffer_inputs();
for (uint32_t n = 0; n < buffer_inputs.count; n++) { for (uint32_t n = 0; n < buffer_inputs.count; n++) {
@ -129,55 +88,23 @@ std::string GL4ShaderTranslator::TranslateVertexShader(
const char* type_name = GetVertexFormatTypeName(el); const char* type_name = GetVertexFormatTypeName(el);
const auto& fetch = el.vtx_fetch; const auto& fetch = el.vtx_fetch;
uint32_t fetch_slot = fetch.const_index * 3 + fetch.const_index_sel; uint32_t fetch_slot = fetch.const_index * 3 + fetch.const_index_sel;
Append(" %s vf%u_%d : XE_VF%u;\n", type_name, fetch_slot, fetch.offset, Append("layout(location = %d) in %s vf%u_%d;\n", el_index, type_name,
el_index); fetch_slot, fetch.offset);
el_index++; el_index++;
} }
} }
Append("};\n");
// Add vertex shader output (pixel shader input).
const auto& alloc_counts = vertex_shader->alloc_counts(); const auto& alloc_counts = vertex_shader->alloc_counts();
Append("struct VS_OUTPUT {\n");
if (alloc_counts.positions) {
assert_true(alloc_counts.positions == 1);
Append(" float4 oPos : SV_POSITION;\n");
}
if (alloc_counts.params) {
Append(" float4 o[%d] : XE_O;\n", kMaxInterpolators);
}
if (alloc_counts.point_size) {
Append(" float4 oPointSize : PSIZE;\n");
}
Append("};\n");
// Vertex shader main() header. // Vertex shader main() header.
Append( Append("void processVertex() {\n");
"VS_OUTPUT main(VS_INPUT i) {\n"
" VS_OUTPUT o;\n");
// Always write position, as some shaders seem to only write certain values.
if (alloc_counts.positions) {
Append(" o.oPos = float4(0.0, 0.0, 0.0, 1.0);\n");
}
if (alloc_counts.point_size) {
Append(" o.oPointSize = float4(1.0, 0.0, 0.0, 0.0);\n");
}
// TODO(benvanik): remove this, if possible (though the compiler may be smart
// enough to do it for us).
if (alloc_counts.params) {
for (uint32_t n = 0; n < kMaxInterpolators; n++) {
Append(" o.o[%d] = float4(0.0, 0.0, 0.0, 0.0);\n", n);
}
}
// Add temporaries for any registers we may use. // Add temporaries for any registers we may use.
uint32_t temp_regs = program_cntl.vs_regs + program_cntl.ps_regs; uint32_t temp_regs = program_cntl.vs_regs + program_cntl.ps_regs;
for (uint32_t n = 0; n <= temp_regs; n++) { for (uint32_t n = 0; n <= temp_regs; n++) {
Append(" float4 r%d = c[%d];\n", n, n); Append(" vec4 r%d = state->float_consts[%d];\n", n, n);
} }
Append(" float4 t;\n"); Append(" vec4 t;\n");
// Execute blocks. // Execute blocks.
const auto& execs = vertex_shader->execs(); const auto& execs = vertex_shader->execs();
@ -189,20 +116,12 @@ std::string GL4ShaderTranslator::TranslateVertexShader(
} }
} }
// main footer. Append("}\n");
if (alloc_counts.positions) {
Append(" o.oPos = applyViewport(o.oPos);\n");
}
Append(
" return o;\n"
"};\n");
return output_.to_string(); return output_.to_string();
} }
std::string GL4ShaderTranslator::TranslatePixelShader( std::string GL4ShaderTranslator::TranslatePixelShader(
GL4Shader* pixel_shader, const xe_gpu_program_cntl_t& program_cntl, GL4Shader* pixel_shader, const xe_gpu_program_cntl_t& program_cntl) {
const GL4Shader::AllocCounts& alloc_counts) {
Reset(pixel_shader); Reset(pixel_shader);
// We need an input VS to make decisions here. // We need an input VS to make decisions here.
@ -210,63 +129,22 @@ std::string GL4ShaderTranslator::TranslatePixelShader(
// If the same PS is used with different VS that output different amounts // If the same PS is used with different VS that output different amounts
// (and less than the number of required registers), things may die. // (and less than the number of required registers), things may die.
// Add constants buffers.
// We could optimize this by only including used buffers, but the compiler
// seems to do a good job of doing this for us.
// It also does read detection, so c[512] can end up c[4] in the asm -
// instead of doing this optimization ourselves we could maybe just query
// this from the compiler.
Append(
"cbuffer float_consts : register(b0) {\n"
" float4 c[512];\n"
"};\n");
// TODO(benvanik): add bool/loop constants.
AppendTextureHeader(pixel_shader->sampler_inputs()); AppendTextureHeader(pixel_shader->sampler_inputs());
// Add vertex shader output (pixel shader input).
Append("struct VS_OUTPUT {\n");
if (alloc_counts.positions) {
assert_true(alloc_counts.positions == 1);
Append(" float4 oPos : SV_POSITION;\n");
}
if (alloc_counts.params) {
Append(" float4 o[%d] : XE_O;\n", kMaxInterpolators);
}
Append("};\n");
// Add pixel shader output.
Append("struct PS_OUTPUT {\n");
for (uint32_t n = 0; n < alloc_counts.params; n++) {
Append(" float4 oC%d : SV_TARGET%d;\n", n, n);
if (program_cntl.ps_export_depth) {
// Is this per render-target?
Append(" float oD%d : SV_DEPTH%d;\n", n, n);
}
}
Append("};\n");
// Pixel shader main() header. // Pixel shader main() header.
Append( Append("void processFragment() {\n");
"PS_OUTPUT main(VS_OUTPUT i) {\n"
" PS_OUTPUT o;\n");
for (uint32_t n = 0; n < alloc_counts.params; n++) {
Append(" o.oC%d = float4(1.0, 0.0, 0.0, 1.0);\n", n);
}
// Add temporary registers. // Add temporary registers.
uint32_t temp_regs = program_cntl.vs_regs + program_cntl.ps_regs; uint32_t temp_regs = program_cntl.vs_regs + program_cntl.ps_regs;
for (uint32_t n = 0; n <= std::max(15u, temp_regs); n++) { for (uint32_t n = 0; n <= std::max(15u, temp_regs); n++) {
Append(" float4 r%d = c[%d];\n", n, n + 256); Append(" vec4 r%d = state->float_consts[%d];\n", n, n + 256);
} }
Append(" float4 t;\n"); Append(" vec4 t;\n");
Append(" float s;\n"); // scalar result (used for RETAIN_PREV) Append(" float s;\n"); // scalar result (used for RETAIN_PREV)
// Bring registers local. // Bring registers local.
if (alloc_counts.params) { for (uint32_t n = 0; n < kMaxInterpolators; n++) {
for (uint32_t n = 0; n < kMaxInterpolators; n++) { Append(" r%d = vtx.o[%d];\n", n, n);
Append(" r%d = i.o[%d];\n", n, n);
}
} }
// Execute blocks. // Execute blocks.
@ -279,11 +157,7 @@ std::string GL4ShaderTranslator::TranslatePixelShader(
} }
} }
// main footer. Append("}\n");
Append(
" return o;\n"
"}\n");
return output_.to_string(); return output_.to_string();
} }
@ -343,7 +217,7 @@ void GL4ShaderTranslator::AppendSrcReg(uint32_t num, uint32_t type,
if (abs_constants) { if (abs_constants) {
Append("abs("); Append("abs(");
} }
Append("c[%u]", is_pixel_shader() ? num + 256 : num); Append("state->float_consts[%u]", is_pixel_shader() ? num + 256 : num);
if (abs_constants) { if (abs_constants) {
Append(")"); Append(")");
} }
@ -367,14 +241,14 @@ void GL4ShaderTranslator::AppendDestRegName(uint32_t num, uint32_t dst_exp) {
case ShaderType::kVertex: case ShaderType::kVertex:
switch (num) { switch (num) {
case 62: case 62:
Append("o.oPos"); Append("gl_Position");
break; break;
case 63: case 63:
Append("o.oPointSize"); Append("gl_PointSize");
break; break;
default: default:
// Varying. // Varying.
Append("o.o[%u]", num); Append("vtx.o[%u]", num);
; ;
break; break;
} }
@ -382,7 +256,7 @@ void GL4ShaderTranslator::AppendDestRegName(uint32_t num, uint32_t dst_exp) {
case ShaderType::kPixel: case ShaderType::kPixel:
switch (num) { switch (num) {
case 0: case 0:
Append("o.oC0"); Append("oC[0]");
break; break;
default: default:
// TODO(benvanik): other render targets? // TODO(benvanik): other render targets?
@ -412,7 +286,7 @@ void GL4ShaderTranslator::AppendDestRegPost(uint32_t num, uint32_t mask,
// Masking. // Masking.
Append(" "); Append(" ");
AppendDestRegName(num, dst_exp); AppendDestRegName(num, dst_exp);
Append(" = float4("); Append(" = vec4(");
for (int i = 0; i < 4; i++) { for (int i = 0; i < 4; i++) {
// TODO(benvanik): mask out values? mix in old value as temp? // TODO(benvanik): mask out values? mix in old value as temp?
// Append("%c", (mask & 0x1) ? chan_names[i] : 'w'); // Append("%c", (mask & 0x1) ? chan_names[i] : 'w');
@ -487,6 +361,9 @@ void GL4ShaderTranslator::PrintExportComment(uint32_t num) {
case 63: case 63:
name = "gl_PointSize"; name = "gl_PointSize";
break; break;
default:
name = "??";
break;
} }
break; break;
case ShaderType::kPixel: case ShaderType::kPixel:
@ -494,6 +371,9 @@ void GL4ShaderTranslator::PrintExportComment(uint32_t num) {
case 0: case 0:
name = "gl_FragColor"; name = "gl_FragColor";
break; break;
default:
name = "??";
break;
} }
break; break;
} }
@ -509,7 +389,7 @@ bool GL4ShaderTranslator::TranslateALU_ADDv(const instr_alu_t& alu) {
AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data); AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data);
Append(" = "); Append(" = ");
if (alu.vector_clamp) { if (alu.vector_clamp) {
Append("saturate("); Append("clamp(");
} }
Append("("); Append("(");
AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate,
@ -530,7 +410,7 @@ bool GL4ShaderTranslator::TranslateALU_MULv(const instr_alu_t& alu) {
AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data); AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data);
Append(" = "); Append(" = ");
if (alu.vector_clamp) { if (alu.vector_clamp) {
Append("saturate("); Append("clamp(");
} }
Append("("); Append("(");
AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate,
@ -540,7 +420,7 @@ bool GL4ShaderTranslator::TranslateALU_MULv(const instr_alu_t& alu) {
alu.abs_constants); alu.abs_constants);
Append(")"); Append(")");
if (alu.vector_clamp) { if (alu.vector_clamp) {
Append(")"); Append(", 0.0, 1.0)");
} }
Append(";\n"); Append(";\n");
AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data); AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data);
@ -551,7 +431,7 @@ bool GL4ShaderTranslator::TranslateALU_MAXv(const instr_alu_t& alu) {
AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data); AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data);
Append(" = "); Append(" = ");
if (alu.vector_clamp) { if (alu.vector_clamp) {
Append("saturate("); Append("clamp(");
} }
if (alu.src1_reg == alu.src2_reg && alu.src1_sel == alu.src2_sel && if (alu.src1_reg == alu.src2_reg && alu.src1_sel == alu.src2_sel &&
alu.src1_swiz == alu.src2_swiz && alu.src1_swiz == alu.src2_swiz &&
@ -569,7 +449,7 @@ bool GL4ShaderTranslator::TranslateALU_MAXv(const instr_alu_t& alu) {
Append(")"); Append(")");
} }
if (alu.vector_clamp) { if (alu.vector_clamp) {
Append(")"); Append(", 0.0, 1.0)");
} }
Append(";\n"); Append(";\n");
AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data); AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data);
@ -580,7 +460,7 @@ bool GL4ShaderTranslator::TranslateALU_MINv(const instr_alu_t& alu) {
AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data); AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data);
Append(" = "); Append(" = ");
if (alu.vector_clamp) { if (alu.vector_clamp) {
Append("saturate("); Append("clamp(");
} }
Append("min("); Append("min(");
AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate,
@ -590,7 +470,7 @@ bool GL4ShaderTranslator::TranslateALU_MINv(const instr_alu_t& alu) {
alu.abs_constants); alu.abs_constants);
Append(")"); Append(")");
if (alu.vector_clamp) { if (alu.vector_clamp) {
Append(")"); Append(", 0.0, 1.0)");
} }
Append(";\n"); Append(";\n");
AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data); AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data);
@ -602,9 +482,9 @@ bool GL4ShaderTranslator::TranslateALU_SETXXv(const instr_alu_t& alu,
AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data); AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data);
Append(" = "); Append(" = ");
if (alu.vector_clamp) { if (alu.vector_clamp) {
Append("saturate("); Append("clamp(");
} }
Append("float4(("); Append("vec4((");
AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate,
alu.abs_constants); alu.abs_constants);
Append(").x %s (", op); Append(").x %s (", op);
@ -630,7 +510,7 @@ bool GL4ShaderTranslator::TranslateALU_SETXXv(const instr_alu_t& alu,
alu.abs_constants); alu.abs_constants);
Append(").w ? 1.0 : 0.0)"); Append(").w ? 1.0 : 0.0)");
if (alu.vector_clamp) { if (alu.vector_clamp) {
Append(")"); Append(", 0.0, 1.0)");
} }
Append(";\n"); Append(";\n");
AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data); AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data);
@ -653,14 +533,14 @@ bool GL4ShaderTranslator::TranslateALU_FRACv(const instr_alu_t& alu) {
AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data); AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data);
Append(" = "); Append(" = ");
if (alu.vector_clamp) { if (alu.vector_clamp) {
Append("saturate("); Append("clamp(");
} }
Append("frac("); Append("frac(");
AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate,
alu.abs_constants); alu.abs_constants);
Append(")"); Append(")");
if (alu.vector_clamp) { if (alu.vector_clamp) {
Append(")"); Append(", 0.0, 1.0)");
} }
Append(";\n"); Append(";\n");
AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data); AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data);
@ -671,14 +551,14 @@ bool GL4ShaderTranslator::TranslateALU_TRUNCv(const instr_alu_t& alu) {
AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data); AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data);
Append(" = "); Append(" = ");
if (alu.vector_clamp) { if (alu.vector_clamp) {
Append("saturate("); Append("clamp(");
} }
Append("trunc("); Append("trunc(");
AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate,
alu.abs_constants); alu.abs_constants);
Append(")"); Append(")");
if (alu.vector_clamp) { if (alu.vector_clamp) {
Append(")"); Append(", 0.0, 1.0)");
} }
Append(";\n"); Append(";\n");
AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data); AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data);
@ -689,14 +569,14 @@ bool GL4ShaderTranslator::TranslateALU_FLOORv(const instr_alu_t& alu) {
AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data); AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data);
Append(" = "); Append(" = ");
if (alu.vector_clamp) { if (alu.vector_clamp) {
Append("saturate("); Append("clamp(");
} }
Append("floor("); Append("floor(");
AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate,
alu.abs_constants); alu.abs_constants);
Append(")"); Append(")");
if (alu.vector_clamp) { if (alu.vector_clamp) {
Append(")"); Append(", 0.0, 1.0)");
} }
Append(";\n"); Append(";\n");
AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data); AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data);
@ -707,20 +587,19 @@ bool GL4ShaderTranslator::TranslateALU_MULADDv(const instr_alu_t& alu) {
AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data); AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data);
Append(" = "); Append(" = ");
if (alu.vector_clamp) { if (alu.vector_clamp) {
Append("saturate("); Append("clamp(");
} }
Append("mad("); Append("(");
AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate,
alu.abs_constants); alu.abs_constants);
Append(", "); Append(" * ");
AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate,
alu.abs_constants); alu.abs_constants);
Append(", "); Append(") + ");
AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate,
alu.abs_constants); alu.abs_constants);
Append(")");
if (alu.vector_clamp) { if (alu.vector_clamp) {
Append(")"); Append(", 0.0, 1.0)");
} }
Append(";\n"); Append(";\n");
AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data); AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data);
@ -732,11 +611,11 @@ bool GL4ShaderTranslator::TranslateALU_CNDXXv(const instr_alu_t& alu,
AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data); AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data);
Append(" = "); Append(" = ");
if (alu.vector_clamp) { if (alu.vector_clamp) {
Append("saturate("); Append("clamp(");
} }
// TODO(benvanik): check argument order - could be 3 as compare and 1 and 2 as // TODO(benvanik): check argument order - could be 3 as compare and 1 and 2 as
// values. // values.
Append("float4(("); Append("vec4((");
AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate,
alu.abs_constants); alu.abs_constants);
Append(").x %s 0.0 ? (", op); Append(").x %s 0.0 ? (", op);
@ -774,7 +653,7 @@ bool GL4ShaderTranslator::TranslateALU_CNDXXv(const instr_alu_t& alu,
alu.abs_constants); alu.abs_constants);
Append(").w)"); Append(").w)");
if (alu.vector_clamp) { if (alu.vector_clamp) {
Append(")"); Append(", 0.0, 1.0)");
} }
Append(";\n"); Append(";\n");
AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data); AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data);
@ -794,7 +673,7 @@ bool GL4ShaderTranslator::TranslateALU_DOT4v(const instr_alu_t& alu) {
AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data); AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data);
Append(" = "); Append(" = ");
if (alu.vector_clamp) { if (alu.vector_clamp) {
Append("saturate("); Append("clamp(");
} }
Append("dot("); Append("dot(");
AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate,
@ -804,7 +683,7 @@ bool GL4ShaderTranslator::TranslateALU_DOT4v(const instr_alu_t& alu) {
alu.abs_constants); alu.abs_constants);
Append(")"); Append(")");
if (alu.vector_clamp) { if (alu.vector_clamp) {
Append(")"); Append(", 0.0, 1.0)");
} }
Append(";\n"); Append(";\n");
AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data); AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data);
@ -815,17 +694,17 @@ bool GL4ShaderTranslator::TranslateALU_DOT3v(const instr_alu_t& alu) {
AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data); AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data);
Append(" = "); Append(" = ");
if (alu.vector_clamp) { if (alu.vector_clamp) {
Append("saturate("); Append("clamp(");
} }
Append("dot(float4("); Append("dot(vec4(");
AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate,
alu.abs_constants); alu.abs_constants);
Append(").xyz, float4("); Append(").xyz, vec4(");
AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate,
alu.abs_constants); alu.abs_constants);
Append(").xyz)"); Append(").xyz)");
if (alu.vector_clamp) { if (alu.vector_clamp) {
Append(")"); Append(", 0.0, 1.0)");
} }
Append(";\n"); Append(";\n");
AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data); AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data);
@ -836,12 +715,12 @@ bool GL4ShaderTranslator::TranslateALU_DOT2ADDv(const instr_alu_t& alu) {
AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data); AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data);
Append(" = "); Append(" = ");
if (alu.vector_clamp) { if (alu.vector_clamp) {
Append("saturate("); Append("clamp(");
} }
Append("dot(float4("); Append("dot(vec4(");
AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate,
alu.abs_constants); alu.abs_constants);
Append(").xy, float4("); Append(").xy, vec4(");
AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate,
alu.abs_constants); alu.abs_constants);
Append(").xy) + "); Append(").xy) + ");
@ -849,7 +728,7 @@ bool GL4ShaderTranslator::TranslateALU_DOT2ADDv(const instr_alu_t& alu) {
alu.abs_constants); alu.abs_constants);
Append(".x"); Append(".x");
if (alu.vector_clamp) { if (alu.vector_clamp) {
Append(")"); Append(", 0.0, 1.0)");
} }
Append(";\n"); Append(";\n");
AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data); AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data);
@ -862,7 +741,7 @@ bool GL4ShaderTranslator::TranslateALU_MAX4v(const instr_alu_t& alu) {
AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data); AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data);
Append(" = "); Append(" = ");
if (alu.vector_clamp) { if (alu.vector_clamp) {
Append("saturate("); Append("clamp(");
} }
Append("max("); Append("max(");
Append("max("); Append("max(");
@ -880,7 +759,7 @@ bool GL4ShaderTranslator::TranslateALU_MAX4v(const instr_alu_t& alu) {
alu.abs_constants); alu.abs_constants);
Append(".w)"); Append(".w)");
if (alu.vector_clamp) { if (alu.vector_clamp) {
Append(")"); Append(", 0.0, 1.0)");
} }
Append(";\n"); Append(";\n");
AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data); AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data);
@ -894,7 +773,7 @@ bool GL4ShaderTranslator::TranslateALU_MAXs(const instr_alu_t& alu) {
alu.export_data); alu.export_data);
Append(" = "); Append(" = ");
if (alu.scalar_clamp) { if (alu.scalar_clamp) {
Append("saturate("); Append("clamp(");
} }
if ((alu.src3_swiz & 0x3) == (((alu.src3_swiz >> 2) + 1) & 0x3)) { if ((alu.src3_swiz & 0x3) == (((alu.src3_swiz >> 2) + 1) & 0x3)) {
// This is a mov. // This is a mov.
@ -910,7 +789,7 @@ bool GL4ShaderTranslator::TranslateALU_MAXs(const instr_alu_t& alu) {
Append(".y).xxxx"); Append(".y).xxxx");
} }
if (alu.scalar_clamp) { if (alu.scalar_clamp) {
Append(")"); Append(", 0.0, 1.0)");
} }
Append(";\n"); Append(";\n");
AppendDestRegPost(get_alu_scalar_dest(alu), alu.scalar_write_mask, AppendDestRegPost(get_alu_scalar_dest(alu), alu.scalar_write_mask,
@ -923,7 +802,7 @@ bool GL4ShaderTranslator::TranslateALU_MINs(const instr_alu_t& alu) {
alu.export_data); alu.export_data);
Append(" = "); Append(" = ");
if (alu.scalar_clamp) { if (alu.scalar_clamp) {
Append("saturate("); Append("clamp(");
} }
Append("min("); Append("min(");
AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate,
@ -933,7 +812,7 @@ bool GL4ShaderTranslator::TranslateALU_MINs(const instr_alu_t& alu) {
alu.abs_constants); alu.abs_constants);
Append(".y).xxxx"); Append(".y).xxxx");
if (alu.scalar_clamp) { if (alu.scalar_clamp) {
Append(")"); Append(", 0.0, 1.0)");
} }
Append(";\n"); Append(";\n");
AppendDestRegPost(get_alu_scalar_dest(alu), alu.scalar_write_mask, AppendDestRegPost(get_alu_scalar_dest(alu), alu.scalar_write_mask,
@ -947,14 +826,14 @@ bool GL4ShaderTranslator::TranslateALU_SETXXs(const instr_alu_t& alu,
alu.export_data); alu.export_data);
Append(" = "); Append(" = ");
if (alu.scalar_clamp) { if (alu.scalar_clamp) {
Append("saturate("); Append("clamp(");
} }
Append("(("); Append("((");
AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate,
alu.abs_constants); alu.abs_constants);
Append(".x %s 0.0) ? 1.0 : 0.0).xxxx", op); Append(".x %s 0.0) ? 1.0 : 0.0).xxxx", op);
if (alu.scalar_clamp) { if (alu.scalar_clamp) {
Append(")"); Append(", 0.0, 1.0)");
} }
Append(";\n"); Append(";\n");
AppendDestRegPost(get_alu_scalar_dest(alu), alu.scalar_write_mask, AppendDestRegPost(get_alu_scalar_dest(alu), alu.scalar_write_mask,
@ -979,14 +858,14 @@ bool GL4ShaderTranslator::TranslateALU_RECIP_IEEE(const instr_alu_t& alu) {
alu.export_data); alu.export_data);
Append(" = "); Append(" = ");
if (alu.scalar_clamp) { if (alu.scalar_clamp) {
Append("saturate("); Append("clamp(");
} }
Append("(1.0 / "); Append("(1.0 / ");
AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate,
alu.abs_constants); alu.abs_constants);
Append(")"); Append(")");
if (alu.scalar_clamp) { if (alu.scalar_clamp) {
Append(")"); Append(", 0.0, 1.0)");
} }
Append(";\n"); Append(";\n");
AppendDestRegPost(get_alu_scalar_dest(alu), alu.scalar_write_mask, AppendDestRegPost(get_alu_scalar_dest(alu), alu.scalar_write_mask,
@ -999,7 +878,7 @@ bool GL4ShaderTranslator::TranslateALU_MUL_CONST_0(const instr_alu_t& alu) {
alu.export_data); alu.export_data);
Append(" = "); Append(" = ");
if (alu.scalar_clamp) { if (alu.scalar_clamp) {
Append("saturate("); Append("clamp(");
} }
uint32_t src3_swiz = alu.src3_swiz & ~0x3C; uint32_t src3_swiz = alu.src3_swiz & ~0x3C;
uint32_t swiz_a = ((src3_swiz >> 6) - 1) & 0x3; uint32_t swiz_a = ((src3_swiz >> 6) - 1) & 0x3;
@ -1013,7 +892,7 @@ bool GL4ShaderTranslator::TranslateALU_MUL_CONST_0(const instr_alu_t& alu) {
Append(".%c", chan_names[swiz_b]); Append(".%c", chan_names[swiz_b]);
Append(").xxxx"); Append(").xxxx");
if (alu.scalar_clamp) { if (alu.scalar_clamp) {
Append(")"); Append(", 0.0, 1.0)");
} }
Append(";\n"); Append(";\n");
AppendDestRegPost(get_alu_scalar_dest(alu), alu.scalar_write_mask, AppendDestRegPost(get_alu_scalar_dest(alu), alu.scalar_write_mask,
@ -1029,7 +908,7 @@ bool GL4ShaderTranslator::TranslateALU_ADD_CONST_0(const instr_alu_t& alu) {
alu.export_data); alu.export_data);
Append(" = "); Append(" = ");
if (alu.scalar_clamp) { if (alu.scalar_clamp) {
Append("saturate("); Append("clamp(");
} }
uint32_t src3_swiz = alu.src3_swiz & ~0x3C; uint32_t src3_swiz = alu.src3_swiz & ~0x3C;
uint32_t swiz_a = ((src3_swiz >> 6) - 1) & 0x3; uint32_t swiz_a = ((src3_swiz >> 6) - 1) & 0x3;
@ -1043,7 +922,7 @@ bool GL4ShaderTranslator::TranslateALU_ADD_CONST_0(const instr_alu_t& alu) {
Append(".%c", chan_names[swiz_b]); Append(".%c", chan_names[swiz_b]);
Append(").xxxx"); Append(").xxxx");
if (alu.scalar_clamp) { if (alu.scalar_clamp) {
Append(")"); Append(", 0.0, 1.0)");
} }
Append(";\n"); Append(";\n");
AppendDestRegPost(get_alu_scalar_dest(alu), alu.scalar_write_mask, AppendDestRegPost(get_alu_scalar_dest(alu), alu.scalar_write_mask,
@ -1059,7 +938,7 @@ bool GL4ShaderTranslator::TranslateALU_SUB_CONST_0(const instr_alu_t& alu) {
alu.export_data); alu.export_data);
Append(" = "); Append(" = ");
if (alu.scalar_clamp) { if (alu.scalar_clamp) {
Append("saturate("); Append("clamp(");
} }
uint32_t src3_swiz = alu.src3_swiz & ~0x3C; uint32_t src3_swiz = alu.src3_swiz & ~0x3C;
uint32_t swiz_a = ((src3_swiz >> 6) - 1) & 0x3; uint32_t swiz_a = ((src3_swiz >> 6) - 1) & 0x3;
@ -1073,7 +952,7 @@ bool GL4ShaderTranslator::TranslateALU_SUB_CONST_0(const instr_alu_t& alu) {
Append(".%c", chan_names[swiz_b]); Append(".%c", chan_names[swiz_b]);
Append(").xxxx"); Append(").xxxx");
if (alu.scalar_clamp) { if (alu.scalar_clamp) {
Append(")"); Append(", 0.0, 1.0)");
} }
Append(";\n"); Append(";\n");
AppendDestRegPost(get_alu_scalar_dest(alu), alu.scalar_write_mask, AppendDestRegPost(get_alu_scalar_dest(alu), alu.scalar_write_mask,
@ -1491,10 +1370,10 @@ bool GL4ShaderTranslator::TranslateVertexFetch(const instr_fetch_vtx_t* vtx,
// Translate. // Translate.
Append(" "); Append(" ");
Append("r%u.xyzw", vtx->dst_reg); Append("r%u.xyzw", vtx->dst_reg);
Append(" = float4("); Append(" = vec4(");
uint32_t fetch_slot = vtx->const_index * 3 + vtx->const_index_sel; uint32_t fetch_slot = vtx->const_index * 3 + vtx->const_index_sel;
// TODO(benvanik): detect xyzw = xyzw, etc. // TODO(benvanik): detect xyzw = xyzw, etc.
// TODO(benvanik): detect and set as rN = float4(samp.xyz, 1.0); / etc // TODO(benvanik): detect and set as rN = vec4(samp.xyz, 1.0); / etc
uint32_t component_count = uint32_t component_count =
GetVertexFormatComponentCount(static_cast<VertexFormat>(vtx->format)); GetVertexFormatComponentCount(static_cast<VertexFormat>(vtx->format));
uint32_t dst_swiz = vtx->dst_swiz; uint32_t dst_swiz = vtx->dst_swiz;
@ -1509,8 +1388,7 @@ bool GL4ShaderTranslator::TranslateVertexFetch(const instr_fetch_vtx_t* vtx,
} else if ((dst_swiz & 0x7) == 7) { } else if ((dst_swiz & 0x7) == 7) {
Append("r%u.%c", vtx->dst_reg, chan_names[i]); Append("r%u.%c", vtx->dst_reg, chan_names[i]);
} else { } else {
Append("i.vf%u_%d.%c", fetch_slot, vtx->offset, Append("vf%u_%d.%c", fetch_slot, vtx->offset, chan_names[dst_swiz & 0x3]);
chan_names[dst_swiz & 0x3]);
} }
if (i < 3) { if (i < 3) {
Append(", "); Append(", ");
@ -1633,7 +1511,7 @@ bool GL4ShaderTranslator::TranslateTextureFetch(const instr_fetch_tex_t* tex,
} }
Append(");\n"); Append(");\n");
Append(" r%u.xyzw = float4(", tex->dst_reg); Append(" r%u.xyzw = vec4(", tex->dst_reg);
uint32_t dst_swiz = tex->dst_swiz; uint32_t dst_swiz = tex->dst_swiz;
for (int i = 0; i < 4; i++) { for (int i = 0; i < 4; i++) {
if (i) { if (i) {

View File

@ -34,8 +34,8 @@ class GL4ShaderTranslator {
GL4Shader* vertex_shader, GL4Shader* vertex_shader,
const xenos::xe_gpu_program_cntl_t& program_cntl); const xenos::xe_gpu_program_cntl_t& program_cntl);
std::string TranslatePixelShader( std::string TranslatePixelShader(
GL4Shader* pixel_shader, const xenos::xe_gpu_program_cntl_t& program_cntl, GL4Shader* pixel_shader,
const GL4Shader::AllocCounts& alloc_counts); const xenos::xe_gpu_program_cntl_t& program_cntl);
protected: protected:
ShaderType shader_type_; ShaderType shader_type_;

View File

@ -11,6 +11,7 @@
#define XENIA_GPU_SHADER_H_ #define XENIA_GPU_SHADER_H_
#include <string> #include <string>
#include <vector>
#include <xenia/gpu/ucode.h> #include <xenia/gpu/ucode.h>
#include <xenia/gpu/xenos.h> #include <xenia/gpu/xenos.h>
@ -29,6 +30,7 @@ class Shader {
const std::string& translated_disassembly() const { const std::string& translated_disassembly() const {
return translated_disassembly_; return translated_disassembly_;
} }
const std::vector<uint8_t> translated_binary() { return translated_binary_; }
const uint32_t* data() const { return data_.data(); } const uint32_t* data() const { return data_.data(); }
@ -93,6 +95,7 @@ class Shader {
std::string ucode_disassembly_; std::string ucode_disassembly_;
std::string translated_disassembly_; std::string translated_disassembly_;
std::vector<uint8_t> translated_binary_;
std::string error_log_; std::string error_log_;
AllocCounts alloc_counts_; AllocCounts alloc_counts_;