Moving GL backend to new shader translator.
This seems to make a lot of things better, but may also break things. Cleanup to follow.
This commit is contained in:
parent
295c62c7a4
commit
51a8002629
|
@ -54,6 +54,7 @@ GL4CommandProcessor::CachedPipeline::~CachedPipeline() {
|
||||||
GL4CommandProcessor::GL4CommandProcessor(GL4GraphicsSystem* graphics_system,
|
GL4CommandProcessor::GL4CommandProcessor(GL4GraphicsSystem* graphics_system,
|
||||||
kernel::KernelState* kernel_state)
|
kernel::KernelState* kernel_state)
|
||||||
: CommandProcessor(graphics_system, kernel_state),
|
: CommandProcessor(graphics_system, kernel_state),
|
||||||
|
shader_translator_(GlslShaderTranslator::Dialect::kGL45),
|
||||||
draw_batcher_(graphics_system_->register_file()),
|
draw_batcher_(graphics_system_->register_file()),
|
||||||
scratch_buffer_(kScratchBufferCapacity, kScratchBufferAlignment) {}
|
scratch_buffer_(kScratchBufferCapacity, kScratchBufferAlignment) {}
|
||||||
|
|
||||||
|
@ -490,16 +491,12 @@ Shader* GL4CommandProcessor::LoadShader(ShaderType shader_type,
|
||||||
|
|
||||||
// Perform translation.
|
// Perform translation.
|
||||||
// If this fails the shader will be marked as invalid and ignored later.
|
// If this fails the shader will be marked as invalid and ignored later.
|
||||||
if (shader_type == ShaderType::kVertex) {
|
shader_ptr->Prepare(&shader_translator_);
|
||||||
shader_ptr->PrepareVertexShader(&shader_translator_);
|
|
||||||
} else {
|
|
||||||
shader_ptr->PreparePixelShader(&shader_translator_);
|
|
||||||
}
|
|
||||||
|
|
||||||
XELOGGPU("Set %s shader at %0.8X (%db):\n%s",
|
XELOGGPU("Set %s shader at %0.8X (%db):\n%s",
|
||||||
shader_type == ShaderType::kVertex ? "vertex" : "pixel",
|
shader_type == ShaderType::kVertex ? "vertex" : "pixel",
|
||||||
guest_address, dword_count * 4,
|
guest_address, dword_count * 4,
|
||||||
shader_ptr->ucode_disassembly().c_str());
|
shader_ptr->translated_shader()->ucode_disassembly().c_str());
|
||||||
}
|
}
|
||||||
return shader_ptr;
|
return shader_ptr;
|
||||||
}
|
}
|
||||||
|
@ -782,8 +779,7 @@ GL4CommandProcessor::UpdateStatus GL4CommandProcessor::UpdateRenderTargets() {
|
||||||
// Note that write mask may be more permissive than we want, so we mix that
|
// Note that write mask may be more permissive than we want, so we mix that
|
||||||
// with the actual targets the pixel shader writes to.
|
// with the actual targets the pixel shader writes to.
|
||||||
GLenum draw_buffers[4] = {GL_NONE, GL_NONE, GL_NONE, GL_NONE};
|
GLenum draw_buffers[4] = {GL_NONE, GL_NONE, GL_NONE, GL_NONE};
|
||||||
const auto& shader_targets =
|
auto pixel_shader = active_pixel_shader_->translated_shader();
|
||||||
active_pixel_shader_->alloc_counts().color_targets;
|
|
||||||
GLuint color_targets[4] = {kAnyTarget, kAnyTarget, kAnyTarget, kAnyTarget};
|
GLuint color_targets[4] = {kAnyTarget, kAnyTarget, kAnyTarget, kAnyTarget};
|
||||||
if (enable_mode == ModeControl::kColorDepth) {
|
if (enable_mode == ModeControl::kColorDepth) {
|
||||||
uint32_t color_info[4] = {
|
uint32_t color_info[4] = {
|
||||||
|
@ -793,7 +789,7 @@ GL4CommandProcessor::UpdateStatus GL4CommandProcessor::UpdateRenderTargets() {
|
||||||
// A2XX_RB_COLOR_MASK_WRITE_* == D3DRS_COLORWRITEENABLE
|
// A2XX_RB_COLOR_MASK_WRITE_* == D3DRS_COLORWRITEENABLE
|
||||||
for (int n = 0; n < xe::countof(color_info); n++) {
|
for (int n = 0; n < xe::countof(color_info); n++) {
|
||||||
uint32_t write_mask = (regs.rb_color_mask >> (n * 4)) & 0xF;
|
uint32_t write_mask = (regs.rb_color_mask >> (n * 4)) & 0xF;
|
||||||
if (!write_mask || !shader_targets[n]) {
|
if (!write_mask || !pixel_shader->writes_color_target(n)) {
|
||||||
// Unused, so keep disabled and set to wildcard so we'll take any
|
// Unused, so keep disabled and set to wildcard so we'll take any
|
||||||
// framebuffer that has it.
|
// framebuffer that has it.
|
||||||
continue;
|
continue;
|
||||||
|
@ -1366,14 +1362,14 @@ GL4CommandProcessor::UpdateStatus GL4CommandProcessor::PopulateVertexBuffers() {
|
||||||
auto& regs = *register_file_;
|
auto& regs = *register_file_;
|
||||||
assert_not_null(active_vertex_shader_);
|
assert_not_null(active_vertex_shader_);
|
||||||
|
|
||||||
const auto& buffer_inputs = active_vertex_shader_->buffer_inputs();
|
const auto& vertex_bindings =
|
||||||
for (uint32_t buffer_index = 0; buffer_index < buffer_inputs.count;
|
active_vertex_shader_->translated_shader()->vertex_bindings();
|
||||||
++buffer_index) {
|
for (const auto& vertex_binding : vertex_bindings) {
|
||||||
const auto& desc = buffer_inputs.descs[buffer_index];
|
int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 +
|
||||||
int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + (desc.fetch_slot / 3) * 6;
|
(vertex_binding.fetch_constant / 3) * 6;
|
||||||
const auto group = reinterpret_cast<xe_gpu_fetch_group_t*>(®s.values[r]);
|
const auto group = reinterpret_cast<xe_gpu_fetch_group_t*>(®s.values[r]);
|
||||||
const xe_gpu_vertex_fetch_t* fetch = nullptr;
|
const xe_gpu_vertex_fetch_t* fetch = nullptr;
|
||||||
switch (desc.fetch_slot % 3) {
|
switch (vertex_binding.fetch_constant % 3) {
|
||||||
case 0:
|
case 0:
|
||||||
fetch = &group->vertex_fetch_0;
|
fetch = &group->vertex_fetch_0;
|
||||||
break;
|
break;
|
||||||
|
@ -1405,17 +1401,21 @@ GL4CommandProcessor::UpdateStatus GL4CommandProcessor::PopulateVertexBuffers() {
|
||||||
|
|
||||||
// TODO(benvanik): if we could find a way to avoid this, we could use
|
// TODO(benvanik): if we could find a way to avoid this, we could use
|
||||||
// multidraw without flushing.
|
// multidraw without flushing.
|
||||||
glVertexArrayVertexBuffer(vertex_shader->vao(), buffer_index,
|
glVertexArrayVertexBuffer(
|
||||||
scratch_buffer_.handle(), allocation.offset,
|
vertex_shader->vao(),
|
||||||
desc.stride_words * 4);
|
static_cast<GLuint>(vertex_binding.binding_index),
|
||||||
|
scratch_buffer_.handle(), allocation.offset,
|
||||||
|
vertex_binding.stride_words * 4);
|
||||||
|
|
||||||
scratch_buffer_.Commit(std::move(allocation));
|
scratch_buffer_.Commit(std::move(allocation));
|
||||||
} else {
|
} else {
|
||||||
// TODO(benvanik): if we could find a way to avoid this, we could use
|
// TODO(benvanik): if we could find a way to avoid this, we could use
|
||||||
// multidraw without flushing.
|
// multidraw without flushing.
|
||||||
glVertexArrayVertexBuffer(vertex_shader->vao(), buffer_index,
|
glVertexArrayVertexBuffer(
|
||||||
scratch_buffer_.handle(), allocation.offset,
|
vertex_shader->vao(),
|
||||||
desc.stride_words * 4);
|
static_cast<GLuint>(vertex_binding.binding_index),
|
||||||
|
scratch_buffer_.handle(), allocation.offset,
|
||||||
|
vertex_binding.stride_words * 4);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1434,14 +1434,14 @@ GL4CommandProcessor::UpdateStatus GL4CommandProcessor::PopulateSamplers() {
|
||||||
bool has_setup_sampler[32] = {false};
|
bool has_setup_sampler[32] = {false};
|
||||||
|
|
||||||
// Vertex texture samplers.
|
// Vertex texture samplers.
|
||||||
const auto& vertex_sampler_inputs = active_vertex_shader_->sampler_inputs();
|
const auto& vertex_sampler_inputs =
|
||||||
for (size_t i = 0; i < vertex_sampler_inputs.count; ++i) {
|
active_vertex_shader_->translated_shader()->texture_bindings();
|
||||||
const auto& desc = vertex_sampler_inputs.descs[i];
|
for (auto& texture_binding : vertex_sampler_inputs) {
|
||||||
if (has_setup_sampler[desc.fetch_slot]) {
|
if (has_setup_sampler[texture_binding.fetch_constant]) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
has_setup_sampler[desc.fetch_slot] = true;
|
has_setup_sampler[texture_binding.fetch_constant] = true;
|
||||||
auto status = PopulateSampler(desc);
|
auto status = PopulateSampler(texture_binding);
|
||||||
if (status == UpdateStatus::kError) {
|
if (status == UpdateStatus::kError) {
|
||||||
return status;
|
return status;
|
||||||
} else if (status == UpdateStatus::kMismatch) {
|
} else if (status == UpdateStatus::kMismatch) {
|
||||||
|
@ -1450,14 +1450,14 @@ GL4CommandProcessor::UpdateStatus GL4CommandProcessor::PopulateSamplers() {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Pixel shader texture sampler.
|
// Pixel shader texture sampler.
|
||||||
const auto& pixel_sampler_inputs = active_pixel_shader_->sampler_inputs();
|
const auto& pixel_sampler_inputs =
|
||||||
for (size_t i = 0; i < pixel_sampler_inputs.count; ++i) {
|
active_pixel_shader_->translated_shader()->texture_bindings();
|
||||||
const auto& desc = pixel_sampler_inputs.descs[i];
|
for (auto& texture_binding : pixel_sampler_inputs) {
|
||||||
if (has_setup_sampler[desc.fetch_slot]) {
|
if (has_setup_sampler[texture_binding.fetch_constant]) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
has_setup_sampler[desc.fetch_slot] = true;
|
has_setup_sampler[texture_binding.fetch_constant] = true;
|
||||||
auto status = PopulateSampler(desc);
|
auto status = PopulateSampler(texture_binding);
|
||||||
if (status == UpdateStatus::kError) {
|
if (status == UpdateStatus::kError) {
|
||||||
return UpdateStatus::kError;
|
return UpdateStatus::kError;
|
||||||
} else if (status == UpdateStatus::kMismatch) {
|
} else if (status == UpdateStatus::kMismatch) {
|
||||||
|
@ -1469,15 +1469,16 @@ GL4CommandProcessor::UpdateStatus GL4CommandProcessor::PopulateSamplers() {
|
||||||
}
|
}
|
||||||
|
|
||||||
GL4CommandProcessor::UpdateStatus GL4CommandProcessor::PopulateSampler(
|
GL4CommandProcessor::UpdateStatus GL4CommandProcessor::PopulateSampler(
|
||||||
const Shader::SamplerDesc& desc) {
|
const TranslatedShader::TextureBinding& texture_binding) {
|
||||||
auto& regs = *register_file_;
|
auto& regs = *register_file_;
|
||||||
int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + desc.fetch_slot * 6;
|
int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 +
|
||||||
|
texture_binding.fetch_constant * 6;
|
||||||
auto group = reinterpret_cast<const xe_gpu_fetch_group_t*>(®s.values[r]);
|
auto group = reinterpret_cast<const xe_gpu_fetch_group_t*>(®s.values[r]);
|
||||||
auto& fetch = group->texture_fetch;
|
auto& fetch = group->texture_fetch;
|
||||||
|
|
||||||
// Reset slot.
|
// Reset slot.
|
||||||
// If we fail, we still draw but with an invalid texture.
|
// If we fail, we still draw but with an invalid texture.
|
||||||
draw_batcher_.set_texture_sampler(desc.fetch_slot, 0);
|
draw_batcher_.set_texture_sampler(texture_binding.fetch_constant, 0);
|
||||||
|
|
||||||
if (FLAGS_disable_textures) {
|
if (FLAGS_disable_textures) {
|
||||||
return UpdateStatus::kCompatible;
|
return UpdateStatus::kCompatible;
|
||||||
|
@ -1495,7 +1496,8 @@ GL4CommandProcessor::UpdateStatus GL4CommandProcessor::PopulateSampler(
|
||||||
return UpdateStatus::kCompatible; // invalid texture used
|
return UpdateStatus::kCompatible; // invalid texture used
|
||||||
}
|
}
|
||||||
SamplerInfo sampler_info;
|
SamplerInfo sampler_info;
|
||||||
if (!SamplerInfo::Prepare(fetch, desc.tex_fetch, &sampler_info)) {
|
if (!SamplerInfo::Prepare(fetch, texture_binding.fetch_instr,
|
||||||
|
&sampler_info)) {
|
||||||
XELOGE("Unable to parse sampler info");
|
XELOGE("Unable to parse sampler info");
|
||||||
return UpdateStatus::kCompatible; // invalid texture used
|
return UpdateStatus::kCompatible; // invalid texture used
|
||||||
}
|
}
|
||||||
|
@ -1511,7 +1513,7 @@ GL4CommandProcessor::UpdateStatus GL4CommandProcessor::PopulateSampler(
|
||||||
}
|
}
|
||||||
|
|
||||||
// Shaders will use bindless to fetch right from it.
|
// Shaders will use bindless to fetch right from it.
|
||||||
draw_batcher_.set_texture_sampler(desc.fetch_slot,
|
draw_batcher_.set_texture_sampler(texture_binding.fetch_constant,
|
||||||
entry_view->texture_sampler_handle);
|
entry_view->texture_sampler_handle);
|
||||||
|
|
||||||
return UpdateStatus::kCompatible;
|
return UpdateStatus::kCompatible;
|
||||||
|
|
|
@ -24,8 +24,8 @@
|
||||||
#include "xenia/gpu/command_processor.h"
|
#include "xenia/gpu/command_processor.h"
|
||||||
#include "xenia/gpu/gl4/draw_batcher.h"
|
#include "xenia/gpu/gl4/draw_batcher.h"
|
||||||
#include "xenia/gpu/gl4/gl4_shader.h"
|
#include "xenia/gpu/gl4/gl4_shader.h"
|
||||||
#include "xenia/gpu/gl4/gl4_shader_translator.h"
|
|
||||||
#include "xenia/gpu/gl4/texture_cache.h"
|
#include "xenia/gpu/gl4/texture_cache.h"
|
||||||
|
#include "xenia/gpu/glsl_shader_translator.h"
|
||||||
#include "xenia/gpu/register_file.h"
|
#include "xenia/gpu/register_file.h"
|
||||||
#include "xenia/gpu/xenos.h"
|
#include "xenia/gpu/xenos.h"
|
||||||
#include "xenia/kernel/xthread.h"
|
#include "xenia/kernel/xthread.h"
|
||||||
|
@ -123,13 +123,14 @@ class GL4CommandProcessor : public CommandProcessor {
|
||||||
UpdateStatus PopulateIndexBuffer(IndexBufferInfo* index_buffer_info);
|
UpdateStatus PopulateIndexBuffer(IndexBufferInfo* index_buffer_info);
|
||||||
UpdateStatus PopulateVertexBuffers();
|
UpdateStatus PopulateVertexBuffers();
|
||||||
UpdateStatus PopulateSamplers();
|
UpdateStatus PopulateSamplers();
|
||||||
UpdateStatus PopulateSampler(const Shader::SamplerDesc& desc);
|
UpdateStatus PopulateSampler(
|
||||||
|
const TranslatedShader::TextureBinding& texture_binding);
|
||||||
bool IssueCopy() override;
|
bool IssueCopy() override;
|
||||||
|
|
||||||
CachedFramebuffer* GetFramebuffer(GLuint color_targets[4],
|
CachedFramebuffer* GetFramebuffer(GLuint color_targets[4],
|
||||||
GLuint depth_target);
|
GLuint depth_target);
|
||||||
|
|
||||||
GL4ShaderTranslator shader_translator_;
|
GlslShaderTranslator shader_translator_;
|
||||||
std::vector<std::unique_ptr<GL4Shader>> all_shaders_;
|
std::vector<std::unique_ptr<GL4Shader>> all_shaders_;
|
||||||
std::unordered_map<uint64_t, GL4Shader*> shader_cache_;
|
std::unordered_map<uint64_t, GL4Shader*> shader_cache_;
|
||||||
CachedFramebuffer* active_framebuffer_ = nullptr;
|
CachedFramebuffer* active_framebuffer_ = nullptr;
|
||||||
|
|
|
@ -13,7 +13,6 @@
|
||||||
#include "xenia/base/logging.h"
|
#include "xenia/base/logging.h"
|
||||||
#include "xenia/base/math.h"
|
#include "xenia/base/math.h"
|
||||||
#include "xenia/gpu/gl4/gl4_gpu_flags.h"
|
#include "xenia/gpu/gl4/gl4_gpu_flags.h"
|
||||||
#include "xenia/gpu/gl4/gl4_shader_translator.h"
|
|
||||||
#include "xenia/gpu/gpu_flags.h"
|
#include "xenia/gpu/gpu_flags.h"
|
||||||
|
|
||||||
namespace xe {
|
namespace xe {
|
||||||
|
@ -31,143 +30,68 @@ GL4Shader::~GL4Shader() {
|
||||||
glDeleteVertexArrays(1, &vao_);
|
glDeleteVertexArrays(1, &vao_);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string GL4Shader::GetHeader() {
|
bool GL4Shader::Prepare(ShaderTranslator* shader_translator) {
|
||||||
static const std::string header =
|
if (!Shader::Prepare(shader_translator)) {
|
||||||
"#version 450\n"
|
return false;
|
||||||
"#extension all : warn\n"
|
}
|
||||||
"#extension GL_ARB_bindless_texture : require\n"
|
|
||||||
"#extension GL_ARB_explicit_uniform_location : require\n"
|
|
||||||
"#extension GL_ARB_shader_draw_parameters : require\n"
|
|
||||||
"#extension GL_ARB_shader_storage_buffer_object : require\n"
|
|
||||||
"#extension GL_ARB_shading_language_420pack : require\n"
|
|
||||||
"#extension GL_ARB_fragment_coord_conventions : require\n"
|
|
||||||
"#define FLT_MAX 3.402823466e+38\n"
|
|
||||||
"precision highp float;\n"
|
|
||||||
"precision highp int;\n"
|
|
||||||
"layout(std140, column_major) uniform;\n"
|
|
||||||
"layout(std430, column_major) buffer;\n"
|
|
||||||
"\n"
|
|
||||||
// This must match DrawBatcher::CommonHeader.
|
|
||||||
"struct StateData {\n"
|
|
||||||
" vec4 window_scale;\n"
|
|
||||||
" vec4 vtx_fmt;\n"
|
|
||||||
" vec4 alpha_test;\n"
|
|
||||||
// TODO(benvanik): variable length.
|
|
||||||
" uvec2 texture_samplers[32];\n"
|
|
||||||
" vec4 float_consts[512];\n"
|
|
||||||
" int bool_consts[8];\n"
|
|
||||||
" int loop_consts[32];\n"
|
|
||||||
"};\n"
|
|
||||||
"layout(binding = 0) buffer State {\n"
|
|
||||||
" StateData states[];\n"
|
|
||||||
"};\n"
|
|
||||||
"\n"
|
|
||||||
"struct VertexData {\n"
|
|
||||||
" vec4 o[16];\n"
|
|
||||||
"};\n";
|
|
||||||
return header;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string GL4Shader::GetFooter() {
|
// Build static vertex array descriptor.
|
||||||
// http://www.nvidia.com/object/cube_map_ogl_tutorial.html
|
if (!PrepareVertexArrayObject()) {
|
||||||
// http://developer.amd.com/wordpress/media/2012/10/R600_Instruction_Set_Architecture.pdf
|
XELOGE("Unable to prepare vertex shader array object");
|
||||||
// src0 = Rn.zzxy, src1 = Rn.yxzz
|
return false;
|
||||||
// dst.W = FaceId;
|
}
|
||||||
// dst.Z = 2.0f * MajorAxis;
|
|
||||||
// dst.Y = S cube coordinate;
|
if (!CompileProgram()) {
|
||||||
// dst.X = T cube coordinate;
|
return false;
|
||||||
/*
|
}
|
||||||
major axis
|
|
||||||
direction target sc tc ma
|
return true;
|
||||||
---------- ------------------------------------ --- --- ---
|
|
||||||
+rx GL_TEXTURE_CUBE_MAP_POSITIVE_X_EXT=0 -rz -ry rx
|
|
||||||
-rx GL_TEXTURE_CUBE_MAP_NEGATIVE_X_EXT=1 +rz -ry rx
|
|
||||||
+ry GL_TEXTURE_CUBE_MAP_POSITIVE_Y_EXT=2 +rx +rz ry
|
|
||||||
-ry GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT=3 +rx -rz ry
|
|
||||||
+rz GL_TEXTURE_CUBE_MAP_POSITIVE_Z_EXT=4 +rx -ry rz
|
|
||||||
-rz GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT=5 -rx -ry rz
|
|
||||||
*/
|
|
||||||
static const std::string footer =
|
|
||||||
"vec4 cube(vec4 src0, vec4 src1) {\n"
|
|
||||||
" vec3 src = vec3(src1.y, src1.x, src1.z);\n"
|
|
||||||
" vec3 abs_src = abs(src);\n"
|
|
||||||
" int face_id;\n"
|
|
||||||
" float sc;\n"
|
|
||||||
" float tc;\n"
|
|
||||||
" float ma;\n"
|
|
||||||
" if (abs_src.x > abs_src.y && abs_src.x > abs_src.z) {\n"
|
|
||||||
" if (src.x > 0.0) {\n"
|
|
||||||
" face_id = 0; sc = -abs_src.z; tc = -abs_src.y; ma = abs_src.x;\n"
|
|
||||||
" } else {\n"
|
|
||||||
" face_id = 1; sc = abs_src.z; tc = -abs_src.y; ma = abs_src.x;\n"
|
|
||||||
" }\n"
|
|
||||||
" } else if (abs_src.y > abs_src.x && abs_src.y > abs_src.z) {\n"
|
|
||||||
" if (src.y > 0.0) {\n"
|
|
||||||
" face_id = 2; sc = abs_src.x; tc = abs_src.z; ma = abs_src.y;\n"
|
|
||||||
" } else {\n"
|
|
||||||
" face_id = 3; sc = abs_src.x; tc = -abs_src.z; ma = abs_src.y;\n"
|
|
||||||
" }\n"
|
|
||||||
" } else {\n"
|
|
||||||
" if (src.z > 0.0) {\n"
|
|
||||||
" face_id = 4; sc = abs_src.x; tc = -abs_src.y; ma = abs_src.z;\n"
|
|
||||||
" } else {\n"
|
|
||||||
" face_id = 5; sc = -abs_src.x; tc = -abs_src.y; ma = abs_src.z;\n"
|
|
||||||
" }\n"
|
|
||||||
" }\n"
|
|
||||||
" float s = (sc / ma + 1.0) / 2.0;\n"
|
|
||||||
" float t = (tc / ma + 1.0) / 2.0;\n"
|
|
||||||
" return vec4(t, s, 2.0 * ma, float(face_id));\n"
|
|
||||||
"}\n";
|
|
||||||
return footer;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool GL4Shader::PrepareVertexArrayObject() {
|
bool GL4Shader::PrepareVertexArrayObject() {
|
||||||
glCreateVertexArrays(1, &vao_);
|
glCreateVertexArrays(1, &vao_);
|
||||||
|
|
||||||
uint32_t el_index = 0;
|
for (const auto& vertex_binding : translated_shader_->vertex_bindings()) {
|
||||||
for (uint32_t buffer_index = 0; buffer_index < buffer_inputs_.count;
|
for (const auto& attrib : vertex_binding.attributes) {
|
||||||
++buffer_index) {
|
auto comp_count = GetVertexFormatComponentCount(
|
||||||
const auto& desc = buffer_inputs_.descs[buffer_index];
|
attrib.fetch_instr.attributes.data_format);
|
||||||
|
|
||||||
for (uint32_t i = 0; i < desc.element_count; ++i, ++el_index) {
|
|
||||||
const auto& el = desc.elements[i];
|
|
||||||
auto comp_count = xenos::GetVertexFormatComponentCount(el.format);
|
|
||||||
GLenum comp_type;
|
GLenum comp_type;
|
||||||
switch (el.format) {
|
bool is_signed = attrib.fetch_instr.attributes.is_signed;
|
||||||
|
switch (attrib.fetch_instr.attributes.data_format) {
|
||||||
case VertexFormat::k_8_8_8_8:
|
case VertexFormat::k_8_8_8_8:
|
||||||
comp_type = el.is_signed ? GL_BYTE : GL_UNSIGNED_BYTE;
|
comp_type = is_signed ? GL_BYTE : GL_UNSIGNED_BYTE;
|
||||||
break;
|
break;
|
||||||
case VertexFormat::k_2_10_10_10:
|
case VertexFormat::k_2_10_10_10:
|
||||||
comp_type = el.is_signed ? GL_INT_2_10_10_10_REV
|
comp_type = is_signed ? GL_INT_2_10_10_10_REV
|
||||||
: GL_UNSIGNED_INT_2_10_10_10_REV;
|
: GL_UNSIGNED_INT_2_10_10_10_REV;
|
||||||
break;
|
break;
|
||||||
case VertexFormat::k_10_11_11:
|
case VertexFormat::k_10_11_11:
|
||||||
// assert_false(el.is_signed);
|
// assert_false(is_signed);
|
||||||
XELOGW("Signed k_10_11_11 vertex format not supported by GL");
|
XELOGW("Signed k_10_11_11 vertex format not supported by GL");
|
||||||
comp_type = GL_UNSIGNED_INT_10F_11F_11F_REV;
|
comp_type = GL_UNSIGNED_INT_10F_11F_11F_REV;
|
||||||
break;
|
break;
|
||||||
/*case VertexFormat::k_11_11_10:
|
/*case VertexFormat::k_11_11_10:
|
||||||
break;*/
|
break;*/
|
||||||
case VertexFormat::k_16_16:
|
case VertexFormat::k_16_16:
|
||||||
comp_type = el.is_signed ? GL_SHORT : GL_UNSIGNED_SHORT;
|
comp_type = is_signed ? GL_SHORT : GL_UNSIGNED_SHORT;
|
||||||
break;
|
break;
|
||||||
case VertexFormat::k_16_16_FLOAT:
|
case VertexFormat::k_16_16_FLOAT:
|
||||||
comp_type = GL_HALF_FLOAT;
|
comp_type = GL_HALF_FLOAT;
|
||||||
break;
|
break;
|
||||||
case VertexFormat::k_16_16_16_16:
|
case VertexFormat::k_16_16_16_16:
|
||||||
comp_type = el.is_signed ? GL_SHORT : GL_UNSIGNED_SHORT;
|
comp_type = is_signed ? GL_SHORT : GL_UNSIGNED_SHORT;
|
||||||
break;
|
break;
|
||||||
case VertexFormat::k_16_16_16_16_FLOAT:
|
case VertexFormat::k_16_16_16_16_FLOAT:
|
||||||
comp_type = GL_HALF_FLOAT;
|
comp_type = GL_HALF_FLOAT;
|
||||||
break;
|
break;
|
||||||
case VertexFormat::k_32:
|
case VertexFormat::k_32:
|
||||||
comp_type = el.is_signed ? GL_INT : GL_UNSIGNED_INT;
|
comp_type = is_signed ? GL_INT : GL_UNSIGNED_INT;
|
||||||
break;
|
break;
|
||||||
case VertexFormat::k_32_32:
|
case VertexFormat::k_32_32:
|
||||||
comp_type = el.is_signed ? GL_INT : GL_UNSIGNED_INT;
|
comp_type = is_signed ? GL_INT : GL_UNSIGNED_INT;
|
||||||
break;
|
break;
|
||||||
case VertexFormat::k_32_32_32_32:
|
case VertexFormat::k_32_32_32_32:
|
||||||
comp_type = el.is_signed ? GL_INT : GL_UNSIGNED_INT;
|
comp_type = is_signed ? GL_INT : GL_UNSIGNED_INT;
|
||||||
break;
|
break;
|
||||||
case VertexFormat::k_32_FLOAT:
|
case VertexFormat::k_32_FLOAT:
|
||||||
comp_type = GL_FLOAT;
|
comp_type = GL_FLOAT;
|
||||||
|
@ -182,145 +106,27 @@ bool GL4Shader::PrepareVertexArrayObject() {
|
||||||
comp_type = GL_FLOAT;
|
comp_type = GL_FLOAT;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
assert_unhandled_case(el.format);
|
assert_unhandled_case(attrib.fetch_instr.attributes.data_format);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
glEnableVertexArrayAttrib(vao_, el_index);
|
glEnableVertexArrayAttrib(vao_, attrib.attrib_index);
|
||||||
glVertexArrayAttribBinding(vao_, el_index, buffer_index);
|
glVertexArrayAttribBinding(vao_, attrib.attrib_index,
|
||||||
glVertexArrayAttribFormat(vao_, el_index, comp_count, comp_type,
|
vertex_binding.binding_index);
|
||||||
el.is_normalized, el.offset_words * 4);
|
glVertexArrayAttribFormat(vao_, attrib.attrib_index, comp_count,
|
||||||
|
comp_type,
|
||||||
|
!attrib.fetch_instr.attributes.is_integer,
|
||||||
|
attrib.fetch_instr.attributes.offset * 4);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool GL4Shader::PrepareVertexShader(GL4ShaderTranslator* shader_translator) {
|
bool GL4Shader::CompileProgram() {
|
||||||
if (is_valid_) {
|
|
||||||
return is_valid_;
|
|
||||||
}
|
|
||||||
is_valid_ = false;
|
|
||||||
|
|
||||||
// Build static vertex array descriptor.
|
|
||||||
if (!PrepareVertexArrayObject()) {
|
|
||||||
XELOGE("Unable to prepare vertex shader array object");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
std::string apply_transform =
|
|
||||||
"vec4 applyTransform(const in StateData state, vec4 pos) {\n"
|
|
||||||
" if (state.vtx_fmt.w == 0.0) {\n"
|
|
||||||
" // w is 1/W0, so fix it.\n"
|
|
||||||
" pos.w = 1.0 / pos.w;\n"
|
|
||||||
" }\n"
|
|
||||||
" if (state.vtx_fmt.x != 0.0) {\n"
|
|
||||||
" // Already multiplied by 1/W0, so pull it out.\n"
|
|
||||||
" pos.xy /= pos.w;\n"
|
|
||||||
" }\n"
|
|
||||||
" if (state.vtx_fmt.z != 0.0) {\n"
|
|
||||||
" // Already multiplied by 1/W0, so pull it out.\n"
|
|
||||||
" pos.z /= pos.w;\n"
|
|
||||||
" }\n"
|
|
||||||
" pos.xy *= state.window_scale.xy;\n"
|
|
||||||
" return pos;\n"
|
|
||||||
"}\n";
|
|
||||||
std::string source =
|
|
||||||
GetHeader() + apply_transform +
|
|
||||||
"out gl_PerVertex {\n"
|
|
||||||
" vec4 gl_Position;\n"
|
|
||||||
" float gl_PointSize;\n"
|
|
||||||
" float gl_ClipDistance[];\n"
|
|
||||||
"};\n"
|
|
||||||
"layout(location = 0) flat out uint draw_id;\n"
|
|
||||||
"layout(location = 1) out VertexData vtx;\n"
|
|
||||||
"void processVertex(const in StateData state);\n"
|
|
||||||
"void main() {\n" +
|
|
||||||
(alloc_counts().positions ? " gl_Position = vec4(0.0, 0.0, 0.0, 1.0);\n"
|
|
||||||
: "") +
|
|
||||||
(alloc_counts().point_size ? " gl_PointSize = 1.0;\n" : "") +
|
|
||||||
" for (int i = 0; i < vtx.o.length(); ++i) {\n"
|
|
||||||
" vtx.o[i] = vec4(0.0, 0.0, 0.0, 0.0);\n"
|
|
||||||
" }\n"
|
|
||||||
" const StateData state = states[gl_DrawIDARB];\n"
|
|
||||||
" processVertex(state);\n"
|
|
||||||
" gl_Position = applyTransform(state, gl_Position);\n"
|
|
||||||
" draw_id = gl_DrawIDARB;\n"
|
|
||||||
"}\n" +
|
|
||||||
GetFooter();
|
|
||||||
|
|
||||||
std::string translated_source =
|
|
||||||
shader_translator->TranslateVertexShader(this);
|
|
||||||
if (translated_source.empty()) {
|
|
||||||
XELOGE("Vertex shader failed translation");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
source += translated_source;
|
|
||||||
|
|
||||||
if (!CompileProgram(source)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
is_valid_ = true;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool GL4Shader::PreparePixelShader(GL4ShaderTranslator* shader_translator) {
|
|
||||||
if (is_valid_) {
|
|
||||||
return is_valid_;
|
|
||||||
}
|
|
||||||
is_valid_ = false;
|
|
||||||
|
|
||||||
std::string source =
|
|
||||||
GetHeader() +
|
|
||||||
"layout(origin_upper_left, pixel_center_integer) in vec4 gl_FragCoord;\n"
|
|
||||||
"layout(location = 0) flat in uint draw_id;\n"
|
|
||||||
"layout(location = 1) in VertexData vtx;\n"
|
|
||||||
"layout(location = 0) out vec4 oC[4];\n"
|
|
||||||
"void processFragment(const in StateData state);\n"
|
|
||||||
"void applyAlphaTest(int alpha_func, float alpha_ref) {\n"
|
|
||||||
" bool passes = false;\n"
|
|
||||||
" switch (alpha_func) {\n"
|
|
||||||
" case 0: break;\n"
|
|
||||||
" case 1: if (oC[0].a < alpha_ref) passes = true; break;\n"
|
|
||||||
" case 2: if (oC[0].a == alpha_ref) passes = true; break;\n"
|
|
||||||
" case 3: if (oC[0].a <= alpha_ref) passes = true; break;\n"
|
|
||||||
" case 4: if (oC[0].a > alpha_ref) passes = true; break;\n"
|
|
||||||
" case 5: if (oC[0].a != alpha_ref) passes = true; break;\n"
|
|
||||||
" case 6: if (oC[0].a >= alpha_ref) passes = true; break;\n"
|
|
||||||
" case 7: passes = true; break;\n"
|
|
||||||
" };\n"
|
|
||||||
" if (!passes) discard;\n"
|
|
||||||
"}\n"
|
|
||||||
"void main() {\n" +
|
|
||||||
" const StateData state = states[draw_id];\n"
|
|
||||||
" processFragment(state);\n"
|
|
||||||
" if (state.alpha_test.x != 0.0) {\n"
|
|
||||||
" applyAlphaTest(int(state.alpha_test.y), state.alpha_test.z);\n"
|
|
||||||
" }\n"
|
|
||||||
"}\n" +
|
|
||||||
GetFooter();
|
|
||||||
|
|
||||||
std::string translated_source = shader_translator->TranslatePixelShader(this);
|
|
||||||
if (translated_source.empty()) {
|
|
||||||
XELOGE("Pixel shader failed translation");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
source += translated_source;
|
|
||||||
|
|
||||||
if (!CompileProgram(source)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
is_valid_ = true;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool GL4Shader::CompileProgram(std::string source) {
|
|
||||||
assert_zero(program_);
|
assert_zero(program_);
|
||||||
|
|
||||||
translated_disassembly_ = std::move(source);
|
auto source_str = translated_shader_->GetBinaryString();
|
||||||
const char* source_str = translated_disassembly_.c_str();
|
|
||||||
|
|
||||||
// Save to disk, if we asked for it.
|
// Save to disk, if we asked for it.
|
||||||
auto base_path = FLAGS_dump_shaders.c_str();
|
auto base_path = FLAGS_dump_shaders.c_str();
|
||||||
|
@ -349,18 +155,19 @@ bool GL4Shader::CompileProgram(std::string source) {
|
||||||
// Note that we put the translated source first so we get good line numbers.
|
// Note that we put the translated source first so we get good line numbers.
|
||||||
f = fopen(file_name, "w");
|
f = fopen(file_name, "w");
|
||||||
if (f) {
|
if (f) {
|
||||||
fprintf(f, "%s", translated_disassembly_.c_str());
|
fprintf(f, "%s", source_str.c_str());
|
||||||
fprintf(f, "/*\n");
|
fprintf(f, "/*\n");
|
||||||
fprintf(f, "%s", ucode_disassembly_.c_str());
|
fprintf(f, "%s", translated_shader_->ucode_disassembly().c_str());
|
||||||
fprintf(f, " */\n");
|
fprintf(f, " */\n");
|
||||||
fclose(f);
|
fclose(f);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
auto source_str_ptr = source_str.c_str();
|
||||||
program_ = glCreateShaderProgramv(shader_type_ == ShaderType::kVertex
|
program_ = glCreateShaderProgramv(shader_type_ == ShaderType::kVertex
|
||||||
? GL_VERTEX_SHADER
|
? GL_VERTEX_SHADER
|
||||||
: GL_FRAGMENT_SHADER,
|
: GL_FRAGMENT_SHADER,
|
||||||
1, &source_str);
|
1, &source_str_ptr);
|
||||||
if (!program_) {
|
if (!program_) {
|
||||||
XELOGE("Unable to create shader program");
|
XELOGE("Unable to create shader program");
|
||||||
return false;
|
return false;
|
||||||
|
|
|
@ -13,14 +13,13 @@
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
#include "xenia/gpu/shader.h"
|
#include "xenia/gpu/shader.h"
|
||||||
|
#include "xenia/gpu/shader_translator.h"
|
||||||
#include "xenia/ui/gl/gl_context.h"
|
#include "xenia/ui/gl/gl_context.h"
|
||||||
|
|
||||||
namespace xe {
|
namespace xe {
|
||||||
namespace gpu {
|
namespace gpu {
|
||||||
namespace gl4 {
|
namespace gl4 {
|
||||||
|
|
||||||
class GL4ShaderTranslator;
|
|
||||||
|
|
||||||
class GL4Shader : public Shader {
|
class GL4Shader : public Shader {
|
||||||
public:
|
public:
|
||||||
GL4Shader(ShaderType shader_type, uint64_t data_hash,
|
GL4Shader(ShaderType shader_type, uint64_t data_hash,
|
||||||
|
@ -30,14 +29,11 @@ class GL4Shader : public Shader {
|
||||||
GLuint program() const { return program_; }
|
GLuint program() const { return program_; }
|
||||||
GLuint vao() const { return vao_; }
|
GLuint vao() const { return vao_; }
|
||||||
|
|
||||||
bool PrepareVertexShader(GL4ShaderTranslator* shader_translator);
|
bool Prepare(ShaderTranslator* shader_translator);
|
||||||
bool PreparePixelShader(GL4ShaderTranslator* shader_translator);
|
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
std::string GetHeader();
|
|
||||||
std::string GetFooter();
|
|
||||||
bool PrepareVertexArrayObject();
|
bool PrepareVertexArrayObject();
|
||||||
bool CompileProgram(std::string source);
|
bool CompileProgram();
|
||||||
|
|
||||||
GLuint program_;
|
GLuint program_;
|
||||||
GLuint vao_;
|
GLuint vao_;
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,165 +0,0 @@
|
||||||
/**
|
|
||||||
******************************************************************************
|
|
||||||
* Xenia : Xbox 360 Emulator Research Project *
|
|
||||||
******************************************************************************
|
|
||||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
|
||||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
|
||||||
******************************************************************************
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef XENIA_GPU_GL4_GL4_SHADER_TRANSLATOR_H_
|
|
||||||
#define XENIA_GPU_GL4_GL4_SHADER_TRANSLATOR_H_
|
|
||||||
|
|
||||||
#include <string>
|
|
||||||
|
|
||||||
#include "xenia/base/string_buffer.h"
|
|
||||||
#include "xenia/gpu/gl4/gl4_shader.h"
|
|
||||||
#include "xenia/gpu/ucode.h"
|
|
||||||
#include "xenia/gpu/xenos.h"
|
|
||||||
#include "xenia/ui/gl/gl_context.h"
|
|
||||||
|
|
||||||
namespace xe {
|
|
||||||
namespace gpu {
|
|
||||||
namespace gl4 {
|
|
||||||
|
|
||||||
class GL4ShaderTranslator {
|
|
||||||
public:
|
|
||||||
static const uint32_t kMaxInterpolators = 16;
|
|
||||||
|
|
||||||
GL4ShaderTranslator();
|
|
||||||
~GL4ShaderTranslator();
|
|
||||||
|
|
||||||
std::string TranslateVertexShader(GL4Shader* vertex_shader);
|
|
||||||
std::string TranslatePixelShader(GL4Shader* pixel_shader);
|
|
||||||
|
|
||||||
protected:
|
|
||||||
ShaderType shader_type_;
|
|
||||||
const uint32_t* dwords_ = nullptr;
|
|
||||||
|
|
||||||
static const int kOutputCapacity = 64 * 1024;
|
|
||||||
StringBuffer output_;
|
|
||||||
|
|
||||||
bool is_vertex_shader() const { return shader_type_ == ShaderType::kVertex; }
|
|
||||||
bool is_pixel_shader() const { return shader_type_ == ShaderType::kPixel; }
|
|
||||||
|
|
||||||
void Reset(GL4Shader* shader);
|
|
||||||
|
|
||||||
void AppendSrcReg(const ucode::instr_alu_t& op, int i);
|
|
||||||
void AppendSrcReg(const ucode::instr_alu_t& op, uint32_t num, uint32_t type,
|
|
||||||
uint32_t swiz, uint32_t negate, int const_slot);
|
|
||||||
void PrintSrcReg(uint32_t num, uint32_t type, uint32_t swiz, uint32_t negate,
|
|
||||||
uint32_t abs);
|
|
||||||
void PrintVectorDstReg(const ucode::instr_alu_t& alu);
|
|
||||||
void PrintScalarDstReg(const ucode::instr_alu_t& alu);
|
|
||||||
void PrintExportComment(uint32_t num);
|
|
||||||
|
|
||||||
bool TranslateALU(const ucode::instr_alu_t* alu, int sync);
|
|
||||||
bool TranslateALU_ADDv(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_MULv(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_MAXv(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_MINv(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_SETXXv(const ucode::instr_alu_t& alu, const char* op);
|
|
||||||
bool TranslateALU_SETEv(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_SETNEv(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_SETGTv(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_SETGTEv(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_FRACv(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_TRUNCv(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_FLOORv(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_MULADDv(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_CNDXXv(const ucode::instr_alu_t& alu, const char* op);
|
|
||||||
bool TranslateALU_CNDEv(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_CNDGTEv(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_CNDGTv(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_DOT4v(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_DOT3v(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_DOT2ADDv(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_CUBEv(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_MAX4v(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_PRED_SETXX_PUSHv(const ucode::instr_alu_t& alu,
|
|
||||||
const char* op);
|
|
||||||
bool TranslateALU_PRED_SETE_PUSHv(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_PRED_SETNE_PUSHv(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_PRED_SETGT_PUSHv(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_PRED_SETGTE_PUSHv(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_DSTv(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_MOVAv(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_ADDs(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_ADD_PREVs(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_MULs(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_MUL_PREVs(const ucode::instr_alu_t& alu);
|
|
||||||
// ...
|
|
||||||
bool TranslateALU_MAXs(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_MINs(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_SETXXs(const ucode::instr_alu_t& alu, const char* op);
|
|
||||||
bool TranslateALU_SETEs(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_SETGTs(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_SETGTEs(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_SETNEs(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_FRACs(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_TRUNCs(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_FLOORs(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_EXP_IEEE(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_LOG_CLAMP(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_LOG_IEEE(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_RECIP_CLAMP(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_RECIP_FF(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_RECIP_IEEE(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_RECIPSQ_CLAMP(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_RECIPSQ_FF(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_RECIPSQ_IEEE(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_MOVAs(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_MOVA_FLOORs(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_SUBs(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_SUB_PREVs(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_PRED_SETXXs(const ucode::instr_alu_t& alu, const char* op);
|
|
||||||
bool TranslateALU_PRED_SETEs(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_PRED_SETNEs(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_PRED_SETGTs(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_PRED_SETGTEs(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_PRED_SET_INVs(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_PRED_SET_POPs(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_SQRT_IEEE(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_MUL_CONST_0(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_MUL_CONST_1(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_ADD_CONST_0(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_ADD_CONST_1(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_SUB_CONST_0(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_SUB_CONST_1(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_SIN(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_COS(const ucode::instr_alu_t& alu);
|
|
||||||
bool TranslateALU_RETAIN_PREV(const ucode::instr_alu_t& alu);
|
|
||||||
|
|
||||||
struct AppendFlag {};
|
|
||||||
void BeginAppendVectorOp(const ucode::instr_alu_t& op);
|
|
||||||
void AppendVectorOpSrcReg(const ucode::instr_alu_t& op, int i);
|
|
||||||
void EndAppendVectorOp(const ucode::instr_alu_t& op,
|
|
||||||
uint32_t append_flags = 0);
|
|
||||||
void BeginAppendScalarOp(const ucode::instr_alu_t& op);
|
|
||||||
void AppendScalarOpSrcReg(const ucode::instr_alu_t& op, int i);
|
|
||||||
void EndAppendScalarOp(const ucode::instr_alu_t& op,
|
|
||||||
uint32_t append_flags = 0);
|
|
||||||
void AppendOpDestRegName(const ucode::instr_alu_t& op, uint32_t dest_num);
|
|
||||||
|
|
||||||
void PrintDestFetch(uint32_t dst_reg, uint32_t dst_swiz);
|
|
||||||
void AppendFetchDest(uint32_t dst_reg, uint32_t dst_swiz);
|
|
||||||
|
|
||||||
void AppendPredPre(bool is_cond_cf, uint32_t cf_condition,
|
|
||||||
uint32_t pred_select, uint32_t condition);
|
|
||||||
void AppendPredPost(bool is_cond_cf, uint32_t cf_condition,
|
|
||||||
uint32_t pred_select, uint32_t condition);
|
|
||||||
|
|
||||||
bool TranslateBlocks(GL4Shader* shader);
|
|
||||||
bool TranslateExec(const ucode::instr_cf_exec_t& cf);
|
|
||||||
bool TranslateJmp(const ucode::instr_cf_jmp_call_t& cf);
|
|
||||||
bool TranslateLoopStart(const ucode::instr_cf_loop_t& cf);
|
|
||||||
bool TranslateLoopEnd(const ucode::instr_cf_loop_t& cf);
|
|
||||||
bool TranslateVertexFetch(const ucode::instr_fetch_vtx_t* vtx, int sync);
|
|
||||||
bool TranslateTextureFetch(const ucode::instr_fetch_tex_t* tex, int sync);
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace gl4
|
|
||||||
} // namespace gpu
|
|
||||||
} // namespace xe
|
|
||||||
|
|
||||||
#endif // XENIA_GPU_GL4_GL4_SHADER_TRANSLATOR_H_
|
|
|
@ -1,549 +0,0 @@
|
||||||
/**
|
|
||||||
******************************************************************************
|
|
||||||
* Xenia : Xbox 360 Emulator Research Project *
|
|
||||||
******************************************************************************
|
|
||||||
* Copyright 2013 Ben Vanik. All rights reserved. *
|
|
||||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
|
||||||
******************************************************************************
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef XENIA_GPU_GL4_UCODE_H_
|
|
||||||
#define XENIA_GPU_GL4_UCODE_H_
|
|
||||||
|
|
||||||
#include <cstdint>
|
|
||||||
|
|
||||||
#include "xenia/base/assert.h"
|
|
||||||
#include "xenia/base/platform.h"
|
|
||||||
#include "xenia/gpu/xenos.h"
|
|
||||||
|
|
||||||
// Closest AMD doc:
|
|
||||||
// http://developer.amd.com/wordpress/media/2012/10/R600_Instruction_Set_Architecture.pdf
|
|
||||||
// Microcode format differs, but most fields/enums are the same.
|
|
||||||
|
|
||||||
// This code comes from the freedreno project:
|
|
||||||
// https://github.com/freedreno/freedreno/blob/master/includes/instr-a2xx.h
|
|
||||||
/*
|
|
||||||
* Copyright (c) 2012 Rob Clark <robdclark@gmail.com>
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
||||||
* copy of this software and associated documentation files (the "Software"),
|
|
||||||
* to deal in the Software without restriction, including without limitation
|
|
||||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
||||||
* and/or sell copies of the Software, and to permit persons to whom the
|
|
||||||
* Software is furnished to do so, subject to the following conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice (including the next
|
|
||||||
* paragraph) shall be included in all copies or substantial portions of the
|
|
||||||
* Software.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
||||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
||||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
||||||
* SOFTWARE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
namespace xe {
|
|
||||||
namespace gpu {
|
|
||||||
namespace gl4 {
|
|
||||||
namespace ucode {
|
|
||||||
|
|
||||||
enum a2xx_sq_surfaceformat {
|
|
||||||
FMT_1_REVERSE = 0,
|
|
||||||
FMT_1 = 1,
|
|
||||||
FMT_8 = 2,
|
|
||||||
FMT_1_5_5_5 = 3,
|
|
||||||
FMT_5_6_5 = 4,
|
|
||||||
FMT_6_5_5 = 5,
|
|
||||||
FMT_8_8_8_8 = 6,
|
|
||||||
FMT_2_10_10_10 = 7,
|
|
||||||
FMT_8_A = 8,
|
|
||||||
FMT_8_B = 9,
|
|
||||||
FMT_8_8 = 10,
|
|
||||||
FMT_Cr_Y1_Cb_Y0 = 11,
|
|
||||||
FMT_Y1_Cr_Y0_Cb = 12,
|
|
||||||
FMT_5_5_5_1 = 13,
|
|
||||||
FMT_8_8_8_8_A = 14,
|
|
||||||
FMT_4_4_4_4 = 15,
|
|
||||||
FMT_10_11_11 = 16,
|
|
||||||
FMT_11_11_10 = 17,
|
|
||||||
FMT_DXT1 = 18,
|
|
||||||
FMT_DXT2_3 = 19,
|
|
||||||
FMT_DXT4_5 = 20,
|
|
||||||
FMT_24_8 = 22,
|
|
||||||
FMT_24_8_FLOAT = 23,
|
|
||||||
FMT_16 = 24,
|
|
||||||
FMT_16_16 = 25,
|
|
||||||
FMT_16_16_16_16 = 26,
|
|
||||||
FMT_16_EXPAND = 27,
|
|
||||||
FMT_16_16_EXPAND = 28,
|
|
||||||
FMT_16_16_16_16_EXPAND = 29,
|
|
||||||
FMT_16_FLOAT = 30,
|
|
||||||
FMT_16_16_FLOAT = 31,
|
|
||||||
FMT_16_16_16_16_FLOAT = 32,
|
|
||||||
FMT_32 = 33,
|
|
||||||
FMT_32_32 = 34,
|
|
||||||
FMT_32_32_32_32 = 35,
|
|
||||||
FMT_32_FLOAT = 36,
|
|
||||||
FMT_32_32_FLOAT = 37,
|
|
||||||
FMT_32_32_32_32_FLOAT = 38,
|
|
||||||
FMT_32_AS_8 = 39,
|
|
||||||
FMT_32_AS_8_8 = 40,
|
|
||||||
FMT_16_MPEG = 41,
|
|
||||||
FMT_16_16_MPEG = 42,
|
|
||||||
FMT_8_INTERLACED = 43,
|
|
||||||
FMT_32_AS_8_INTERLACED = 44,
|
|
||||||
FMT_32_AS_8_8_INTERLACED = 45,
|
|
||||||
FMT_16_INTERLACED = 46,
|
|
||||||
FMT_16_MPEG_INTERLACED = 47,
|
|
||||||
FMT_16_16_MPEG_INTERLACED = 48,
|
|
||||||
FMT_DXN = 49,
|
|
||||||
FMT_8_8_8_8_AS_16_16_16_16 = 50,
|
|
||||||
FMT_DXT1_AS_16_16_16_16 = 51,
|
|
||||||
FMT_DXT2_3_AS_16_16_16_16 = 52,
|
|
||||||
FMT_DXT4_5_AS_16_16_16_16 = 53,
|
|
||||||
FMT_2_10_10_10_AS_16_16_16_16 = 54,
|
|
||||||
FMT_10_11_11_AS_16_16_16_16 = 55,
|
|
||||||
FMT_11_11_10_AS_16_16_16_16 = 56,
|
|
||||||
FMT_32_32_32_FLOAT = 57,
|
|
||||||
FMT_DXT3A = 58,
|
|
||||||
FMT_DXT5A = 59,
|
|
||||||
FMT_CTX1 = 60,
|
|
||||||
FMT_DXT3A_AS_1_1_1_1 = 61,
|
|
||||||
};
|
|
||||||
|
|
||||||
/*
|
|
||||||
* ALU instructions:
|
|
||||||
*/
|
|
||||||
|
|
||||||
typedef enum {
|
|
||||||
ADDs = 0,
|
|
||||||
ADD_PREVs = 1,
|
|
||||||
MULs = 2,
|
|
||||||
MUL_PREVs = 3,
|
|
||||||
MUL_PREV2s = 4,
|
|
||||||
MAXs = 5,
|
|
||||||
MINs = 6,
|
|
||||||
SETEs = 7,
|
|
||||||
SETGTs = 8,
|
|
||||||
SETGTEs = 9,
|
|
||||||
SETNEs = 10,
|
|
||||||
FRACs = 11,
|
|
||||||
TRUNCs = 12,
|
|
||||||
FLOORs = 13,
|
|
||||||
EXP_IEEE = 14,
|
|
||||||
LOG_CLAMP = 15,
|
|
||||||
LOG_IEEE = 16,
|
|
||||||
RECIP_CLAMP = 17,
|
|
||||||
RECIP_FF = 18,
|
|
||||||
RECIP_IEEE = 19,
|
|
||||||
RECIPSQ_CLAMP = 20,
|
|
||||||
RECIPSQ_FF = 21,
|
|
||||||
RECIPSQ_IEEE = 22,
|
|
||||||
MOVAs = 23,
|
|
||||||
MOVA_FLOORs = 24,
|
|
||||||
SUBs = 25,
|
|
||||||
SUB_PREVs = 26,
|
|
||||||
PRED_SETEs = 27,
|
|
||||||
PRED_SETNEs = 28,
|
|
||||||
PRED_SETGTs = 29,
|
|
||||||
PRED_SETGTEs = 30,
|
|
||||||
PRED_SET_INVs = 31,
|
|
||||||
PRED_SET_POPs = 32,
|
|
||||||
PRED_SET_CLRs = 33,
|
|
||||||
PRED_SET_RESTOREs = 34,
|
|
||||||
KILLEs = 35,
|
|
||||||
KILLGTs = 36,
|
|
||||||
KILLGTEs = 37,
|
|
||||||
KILLNEs = 38,
|
|
||||||
KILLONEs = 39,
|
|
||||||
SQRT_IEEE = 40,
|
|
||||||
MUL_CONST_0 = 42,
|
|
||||||
MUL_CONST_1 = 43,
|
|
||||||
ADD_CONST_0 = 44,
|
|
||||||
ADD_CONST_1 = 45,
|
|
||||||
SUB_CONST_0 = 46,
|
|
||||||
SUB_CONST_1 = 47,
|
|
||||||
SIN = 48,
|
|
||||||
COS = 49,
|
|
||||||
RETAIN_PREV = 50,
|
|
||||||
} instr_scalar_opc_t;
|
|
||||||
|
|
||||||
typedef enum {
|
|
||||||
ADDv = 0,
|
|
||||||
MULv = 1,
|
|
||||||
MAXv = 2,
|
|
||||||
MINv = 3,
|
|
||||||
SETEv = 4,
|
|
||||||
SETGTv = 5,
|
|
||||||
SETGTEv = 6,
|
|
||||||
SETNEv = 7,
|
|
||||||
FRACv = 8,
|
|
||||||
TRUNCv = 9,
|
|
||||||
FLOORv = 10,
|
|
||||||
MULADDv = 11,
|
|
||||||
CNDEv = 12,
|
|
||||||
CNDGTEv = 13,
|
|
||||||
CNDGTv = 14,
|
|
||||||
DOT4v = 15,
|
|
||||||
DOT3v = 16,
|
|
||||||
DOT2ADDv = 17,
|
|
||||||
CUBEv = 18,
|
|
||||||
MAX4v = 19,
|
|
||||||
PRED_SETE_PUSHv = 20,
|
|
||||||
PRED_SETNE_PUSHv = 21,
|
|
||||||
PRED_SETGT_PUSHv = 22,
|
|
||||||
PRED_SETGTE_PUSHv = 23,
|
|
||||||
KILLEv = 24,
|
|
||||||
KILLGTv = 25,
|
|
||||||
KILLGTEv = 26,
|
|
||||||
KILLNEv = 27,
|
|
||||||
DSTv = 28,
|
|
||||||
MOVAv = 29,
|
|
||||||
} instr_vector_opc_t;
|
|
||||||
|
|
||||||
XEPACKEDSTRUCT(instr_alu_t, {
|
|
||||||
/* dword0: */
|
|
||||||
XEPACKEDSTRUCTANONYMOUS({
|
|
||||||
uint32_t vector_dest : 6;
|
|
||||||
uint32_t vector_dest_rel : 1;
|
|
||||||
uint32_t abs_constants : 1;
|
|
||||||
uint32_t scalar_dest : 6;
|
|
||||||
uint32_t scalar_dest_rel : 1;
|
|
||||||
uint32_t export_data : 1;
|
|
||||||
uint32_t vector_write_mask : 4;
|
|
||||||
uint32_t scalar_write_mask : 4;
|
|
||||||
uint32_t vector_clamp : 1;
|
|
||||||
uint32_t scalar_clamp : 1;
|
|
||||||
uint32_t scalar_opc : 6; // instr_scalar_opc_t
|
|
||||||
});
|
|
||||||
/* dword1: */
|
|
||||||
XEPACKEDSTRUCTANONYMOUS({
|
|
||||||
uint32_t src3_swiz : 8;
|
|
||||||
uint32_t src2_swiz : 8;
|
|
||||||
uint32_t src1_swiz : 8;
|
|
||||||
uint32_t src3_reg_negate : 1;
|
|
||||||
uint32_t src2_reg_negate : 1;
|
|
||||||
uint32_t src1_reg_negate : 1;
|
|
||||||
uint32_t pred_condition : 1;
|
|
||||||
uint32_t pred_select : 1;
|
|
||||||
uint32_t relative_addr : 1;
|
|
||||||
uint32_t const_1_rel_abs : 1;
|
|
||||||
uint32_t const_0_rel_abs : 1;
|
|
||||||
});
|
|
||||||
/* dword2: */
|
|
||||||
XEPACKEDSTRUCTANONYMOUS({
|
|
||||||
uint32_t src3_reg : 8;
|
|
||||||
uint32_t src2_reg : 8;
|
|
||||||
uint32_t src1_reg : 8;
|
|
||||||
uint32_t vector_opc : 5; // instr_vector_opc_t
|
|
||||||
uint32_t src3_sel : 1;
|
|
||||||
uint32_t src2_sel : 1;
|
|
||||||
uint32_t src1_sel : 1;
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
/*
|
|
||||||
* CF instructions:
|
|
||||||
*/
|
|
||||||
|
|
||||||
typedef enum {
|
|
||||||
NOP = 0,
|
|
||||||
EXEC = 1,
|
|
||||||
EXEC_END = 2,
|
|
||||||
COND_EXEC = 3,
|
|
||||||
COND_EXEC_END = 4,
|
|
||||||
COND_PRED_EXEC = 5,
|
|
||||||
COND_PRED_EXEC_END = 6,
|
|
||||||
LOOP_START = 7,
|
|
||||||
LOOP_END = 8,
|
|
||||||
COND_CALL = 9,
|
|
||||||
RETURN = 10,
|
|
||||||
COND_JMP = 11,
|
|
||||||
ALLOC = 12,
|
|
||||||
COND_EXEC_PRED_CLEAN = 13,
|
|
||||||
COND_EXEC_PRED_CLEAN_END = 14,
|
|
||||||
MARK_VS_FETCH_DONE = 15,
|
|
||||||
} instr_cf_opc_t;
|
|
||||||
|
|
||||||
typedef enum {
|
|
||||||
RELATIVE_ADDR = 0,
|
|
||||||
ABSOLUTE_ADDR = 1,
|
|
||||||
} instr_addr_mode_t;
|
|
||||||
|
|
||||||
typedef enum {
|
|
||||||
SQ_NO_ALLOC = 0,
|
|
||||||
SQ_POSITION = 1,
|
|
||||||
SQ_PARAMETER_PIXEL = 2,
|
|
||||||
SQ_MEMORY = 3,
|
|
||||||
} instr_alloc_type_t;
|
|
||||||
|
|
||||||
XEPACKEDSTRUCT(instr_cf_exec_t, {
|
|
||||||
XEPACKEDSTRUCTANONYMOUS({
|
|
||||||
uint32_t address : 12;
|
|
||||||
uint32_t count : 3;
|
|
||||||
uint32_t yeild : 1;
|
|
||||||
uint32_t serialize : 12;
|
|
||||||
uint32_t vc_hi : 4;
|
|
||||||
});
|
|
||||||
XEPACKEDSTRUCTANONYMOUS({
|
|
||||||
uint32_t vc_lo : 2; /* vertex cache? */
|
|
||||||
uint32_t bool_addr : 8;
|
|
||||||
uint32_t pred_condition : 1;
|
|
||||||
uint32_t address_mode : 1; // instr_addr_mode_t
|
|
||||||
uint32_t opc : 4; // instr_cf_opc_t
|
|
||||||
});
|
|
||||||
bool is_cond_exec() const {
|
|
||||||
return (this->opc == COND_EXEC) || (this->opc == COND_EXEC_END) ||
|
|
||||||
(this->opc == COND_PRED_EXEC) || (this->opc == COND_PRED_EXEC_END) ||
|
|
||||||
(this->opc == COND_EXEC_PRED_CLEAN) ||
|
|
||||||
(this->opc == COND_EXEC_PRED_CLEAN_END);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
XEPACKEDSTRUCT(instr_cf_loop_t, {
|
|
||||||
XEPACKEDSTRUCTANONYMOUS({
|
|
||||||
uint32_t address : 13;
|
|
||||||
uint32_t repeat : 1;
|
|
||||||
uint32_t reserved0 : 2;
|
|
||||||
uint32_t loop_id : 5;
|
|
||||||
uint32_t pred_break : 1;
|
|
||||||
uint32_t reserved1_hi : 10;
|
|
||||||
});
|
|
||||||
XEPACKEDSTRUCTANONYMOUS({
|
|
||||||
uint32_t reserved1_lo : 10;
|
|
||||||
uint32_t condition : 1;
|
|
||||||
uint32_t address_mode : 1; // instr_addr_mode_t
|
|
||||||
uint32_t opc : 4; // instr_cf_opc_t
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
XEPACKEDSTRUCT(instr_cf_jmp_call_t, {
|
|
||||||
XEPACKEDSTRUCTANONYMOUS({
|
|
||||||
uint32_t address : 13;
|
|
||||||
uint32_t force_call : 1;
|
|
||||||
uint32_t predicated_jmp : 1;
|
|
||||||
uint32_t reserved1_hi : 17;
|
|
||||||
});
|
|
||||||
XEPACKEDSTRUCTANONYMOUS({
|
|
||||||
uint32_t reserved1_lo : 1;
|
|
||||||
uint32_t direction : 1;
|
|
||||||
uint32_t bool_addr : 8;
|
|
||||||
uint32_t condition : 1;
|
|
||||||
uint32_t address_mode : 1; // instr_addr_mode_t
|
|
||||||
uint32_t opc : 4; // instr_cf_opc_t
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
XEPACKEDSTRUCT(instr_cf_alloc_t, {
|
|
||||||
XEPACKEDSTRUCTANONYMOUS({
|
|
||||||
uint32_t size : 3;
|
|
||||||
uint32_t reserved0_hi : 29;
|
|
||||||
});
|
|
||||||
XEPACKEDSTRUCTANONYMOUS({
|
|
||||||
uint32_t reserved0_lo : 8;
|
|
||||||
uint32_t no_serial : 1;
|
|
||||||
uint32_t buffer_select : 2; // instr_alloc_type_t
|
|
||||||
uint32_t alloc_mode : 1;
|
|
||||||
uint32_t opc : 4; // instr_cf_opc_t
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
XEPACKEDUNION(instr_cf_t, {
|
|
||||||
instr_cf_exec_t exec;
|
|
||||||
instr_cf_loop_t loop;
|
|
||||||
instr_cf_jmp_call_t jmp_call;
|
|
||||||
instr_cf_alloc_t alloc;
|
|
||||||
XEPACKEDSTRUCTANONYMOUS({
|
|
||||||
uint32_t:
|
|
||||||
32;
|
|
||||||
uint32_t:
|
|
||||||
12;
|
|
||||||
uint32_t opc : 4; // instr_cf_opc_t
|
|
||||||
});
|
|
||||||
XEPACKEDSTRUCTANONYMOUS({
|
|
||||||
uint32_t dword_0;
|
|
||||||
uint32_t dword_1;
|
|
||||||
});
|
|
||||||
|
|
||||||
bool is_exec() const {
|
|
||||||
return (this->opc == EXEC) || (this->opc == EXEC_END) ||
|
|
||||||
(this->opc == COND_EXEC) || (this->opc == COND_EXEC_END) ||
|
|
||||||
(this->opc == COND_PRED_EXEC) || (this->opc == COND_PRED_EXEC_END) ||
|
|
||||||
(this->opc == COND_EXEC_PRED_CLEAN) ||
|
|
||||||
(this->opc == COND_EXEC_PRED_CLEAN_END);
|
|
||||||
}
|
|
||||||
bool is_cond_exec() const {
|
|
||||||
return (this->opc == COND_EXEC) || (this->opc == COND_EXEC_END) ||
|
|
||||||
(this->opc == COND_PRED_EXEC) || (this->opc == COND_PRED_EXEC_END) ||
|
|
||||||
(this->opc == COND_EXEC_PRED_CLEAN) ||
|
|
||||||
(this->opc == COND_EXEC_PRED_CLEAN_END);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
/*
|
|
||||||
* FETCH instructions:
|
|
||||||
*/
|
|
||||||
|
|
||||||
typedef enum {
|
|
||||||
VTX_FETCH = 0,
|
|
||||||
TEX_FETCH = 1,
|
|
||||||
TEX_GET_BORDER_COLOR_FRAC = 16,
|
|
||||||
TEX_GET_COMP_TEX_LOD = 17,
|
|
||||||
TEX_GET_GRADIENTS = 18,
|
|
||||||
TEX_GET_WEIGHTS = 19,
|
|
||||||
TEX_SET_TEX_LOD = 24,
|
|
||||||
TEX_SET_GRADIENTS_H = 25,
|
|
||||||
TEX_SET_GRADIENTS_V = 26,
|
|
||||||
TEX_RESERVED_4 = 27,
|
|
||||||
} instr_fetch_opc_t;
|
|
||||||
|
|
||||||
typedef enum {
|
|
||||||
TEX_FILTER_POINT = 0,
|
|
||||||
TEX_FILTER_LINEAR = 1,
|
|
||||||
TEX_FILTER_BASEMAP = 2, /* only applicable for mip-filter */
|
|
||||||
TEX_FILTER_USE_FETCH_CONST = 3,
|
|
||||||
} instr_tex_filter_t;
|
|
||||||
|
|
||||||
typedef enum {
|
|
||||||
ANISO_FILTER_DISABLED = 0,
|
|
||||||
ANISO_FILTER_MAX_1_1 = 1,
|
|
||||||
ANISO_FILTER_MAX_2_1 = 2,
|
|
||||||
ANISO_FILTER_MAX_4_1 = 3,
|
|
||||||
ANISO_FILTER_MAX_8_1 = 4,
|
|
||||||
ANISO_FILTER_MAX_16_1 = 5,
|
|
||||||
ANISO_FILTER_USE_FETCH_CONST = 7,
|
|
||||||
} instr_aniso_filter_t;
|
|
||||||
|
|
||||||
typedef enum {
|
|
||||||
ARBITRARY_FILTER_2X4_SYM = 0,
|
|
||||||
ARBITRARY_FILTER_2X4_ASYM = 1,
|
|
||||||
ARBITRARY_FILTER_4X2_SYM = 2,
|
|
||||||
ARBITRARY_FILTER_4X2_ASYM = 3,
|
|
||||||
ARBITRARY_FILTER_4X4_SYM = 4,
|
|
||||||
ARBITRARY_FILTER_4X4_ASYM = 5,
|
|
||||||
ARBITRARY_FILTER_USE_FETCH_CONST = 7,
|
|
||||||
} instr_arbitrary_filter_t;
|
|
||||||
|
|
||||||
typedef enum {
|
|
||||||
SAMPLE_CENTROID = 0,
|
|
||||||
SAMPLE_CENTER = 1,
|
|
||||||
} instr_sample_loc_t;
|
|
||||||
|
|
||||||
typedef enum {
|
|
||||||
DIMENSION_1D = 0,
|
|
||||||
DIMENSION_2D = 1,
|
|
||||||
DIMENSION_3D = 2,
|
|
||||||
DIMENSION_CUBE = 3,
|
|
||||||
} instr_dimension_t;
|
|
||||||
|
|
||||||
typedef enum a2xx_sq_surfaceformat instr_surf_fmt_t;
|
|
||||||
|
|
||||||
XEPACKEDSTRUCT(instr_fetch_tex_t, {
|
|
||||||
/* dword0: */
|
|
||||||
XEPACKEDSTRUCTANONYMOUS({
|
|
||||||
uint32_t opc : 5; // instr_fetch_opc_t
|
|
||||||
uint32_t src_reg : 6;
|
|
||||||
uint32_t src_reg_am : 1;
|
|
||||||
uint32_t dst_reg : 6;
|
|
||||||
uint32_t dst_reg_am : 1;
|
|
||||||
uint32_t fetch_valid_only : 1;
|
|
||||||
uint32_t const_idx : 5;
|
|
||||||
uint32_t tx_coord_denorm : 1;
|
|
||||||
uint32_t src_swiz : 6; // xyz
|
|
||||||
});
|
|
||||||
/* dword1: */
|
|
||||||
XEPACKEDSTRUCTANONYMOUS({
|
|
||||||
uint32_t dst_swiz : 12; // xyzw
|
|
||||||
uint32_t mag_filter : 2; // instr_tex_filter_t
|
|
||||||
uint32_t min_filter : 2; // instr_tex_filter_t
|
|
||||||
uint32_t mip_filter : 2; // instr_tex_filter_t
|
|
||||||
uint32_t aniso_filter : 3; // instr_aniso_filter_t
|
|
||||||
uint32_t arbitrary_filter : 3; // instr_arbitrary_filter_t
|
|
||||||
uint32_t vol_mag_filter : 2; // instr_tex_filter_t
|
|
||||||
uint32_t vol_min_filter : 2; // instr_tex_filter_t
|
|
||||||
uint32_t use_comp_lod : 1;
|
|
||||||
uint32_t use_reg_lod : 1;
|
|
||||||
uint32_t unk : 1;
|
|
||||||
uint32_t pred_select : 1;
|
|
||||||
});
|
|
||||||
/* dword2: */
|
|
||||||
XEPACKEDSTRUCTANONYMOUS({
|
|
||||||
uint32_t use_reg_gradients : 1;
|
|
||||||
uint32_t sample_location : 1; // instr_sample_loc_t
|
|
||||||
uint32_t lod_bias : 7;
|
|
||||||
uint32_t unused : 5;
|
|
||||||
uint32_t dimension : 2; // instr_dimension_t
|
|
||||||
uint32_t offset_x : 5;
|
|
||||||
uint32_t offset_y : 5;
|
|
||||||
uint32_t offset_z : 5;
|
|
||||||
uint32_t pred_condition : 1;
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
XEPACKEDSTRUCT(instr_fetch_vtx_t, {
|
|
||||||
/* dword0: */
|
|
||||||
XEPACKEDSTRUCTANONYMOUS({
|
|
||||||
uint32_t opc : 5; // instr_fetch_opc_t
|
|
||||||
uint32_t src_reg : 6;
|
|
||||||
uint32_t src_reg_am : 1;
|
|
||||||
uint32_t dst_reg : 6;
|
|
||||||
uint32_t dst_reg_am : 1;
|
|
||||||
uint32_t must_be_one : 1;
|
|
||||||
uint32_t const_index : 5;
|
|
||||||
uint32_t const_index_sel : 2;
|
|
||||||
uint32_t reserved0 : 3;
|
|
||||||
uint32_t src_swiz : 2;
|
|
||||||
});
|
|
||||||
/* dword1: */
|
|
||||||
XEPACKEDSTRUCTANONYMOUS({
|
|
||||||
uint32_t dst_swiz : 12;
|
|
||||||
uint32_t format_comp_all : 1; /* '1' for signed, '0' for unsigned? */
|
|
||||||
uint32_t num_format_all : 1; /* '0' for normalized, '1' for unnormalized */
|
|
||||||
uint32_t signed_rf_mode_all : 1;
|
|
||||||
uint32_t reserved1 : 1;
|
|
||||||
uint32_t format : 6; // instr_surf_fmt_t
|
|
||||||
uint32_t reserved2 : 1;
|
|
||||||
uint32_t exp_adjust_all : 7;
|
|
||||||
uint32_t reserved3 : 1;
|
|
||||||
uint32_t pred_select : 1;
|
|
||||||
});
|
|
||||||
/* dword2: */
|
|
||||||
XEPACKEDSTRUCTANONYMOUS({
|
|
||||||
uint32_t stride : 8;
|
|
||||||
uint32_t offset : 23;
|
|
||||||
uint32_t pred_condition : 1;
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
XEPACKEDUNION(instr_fetch_t, {
|
|
||||||
instr_fetch_tex_t tex;
|
|
||||||
instr_fetch_vtx_t vtx;
|
|
||||||
XEPACKEDSTRUCTANONYMOUS({
|
|
||||||
/* dword0: */
|
|
||||||
XEPACKEDSTRUCTANONYMOUS({
|
|
||||||
uint32_t opc : 5; // instr_fetch_opc_t
|
|
||||||
uint32_t:
|
|
||||||
27;
|
|
||||||
});
|
|
||||||
/* dword1: */
|
|
||||||
XEPACKEDSTRUCTANONYMOUS({
|
|
||||||
uint32_t:
|
|
||||||
32;
|
|
||||||
});
|
|
||||||
/* dword2: */
|
|
||||||
XEPACKEDSTRUCTANONYMOUS({
|
|
||||||
uint32_t:
|
|
||||||
32;
|
|
||||||
});
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
} // namespace ucode
|
|
||||||
} // namespace gl4
|
|
||||||
} // namespace gpu
|
|
||||||
} // namespace xe
|
|
||||||
|
|
||||||
#endif // XENIA_GPU_GL4_UCODE_H_
|
|
|
@ -1,780 +0,0 @@
|
||||||
/**
|
|
||||||
******************************************************************************
|
|
||||||
* Xenia : Xbox 360 Emulator Research Project *
|
|
||||||
******************************************************************************
|
|
||||||
* Copyright 2013 Ben Vanik. All rights reserved. *
|
|
||||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
|
||||||
******************************************************************************
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Copyright (c) 2012 Rob Clark <robdclark@gmail.com>
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
||||||
* copy of this software and associated documentation files (the "Software"),
|
|
||||||
* to deal in the Software without restriction, including without limitation
|
|
||||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
||||||
* and/or sell copies of the Software, and to permit persons to whom the
|
|
||||||
* Software is furnished to do so, subject to the following conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice (including the next
|
|
||||||
* paragraph) shall be included in all copies or substantial portions of the
|
|
||||||
* Software.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
||||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
||||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
||||||
* SOFTWARE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "xenia/gpu/gl4/ucode_disassembler.h"
|
|
||||||
|
|
||||||
#include <cstdint>
|
|
||||||
#include <cstdio>
|
|
||||||
#include <cstdlib>
|
|
||||||
#include <string>
|
|
||||||
|
|
||||||
#include "xenia/base/assert.h"
|
|
||||||
#include "xenia/base/string_buffer.h"
|
|
||||||
#include "xenia/gpu/gl4/ucode.h"
|
|
||||||
|
|
||||||
namespace xe {
|
|
||||||
namespace gpu {
|
|
||||||
namespace gl4 {
|
|
||||||
|
|
||||||
using namespace xe::gpu::gl4::ucode;
|
|
||||||
using namespace xe::gpu::xenos;
|
|
||||||
|
|
||||||
static const char* levels[] = {
|
|
||||||
"",
|
|
||||||
"\t",
|
|
||||||
"\t\t",
|
|
||||||
"\t\t\t",
|
|
||||||
"\t\t\t\t",
|
|
||||||
"\t\t\t\t\t",
|
|
||||||
"\t\t\t\t\t\t",
|
|
||||||
"\t\t\t\t\t\t\t",
|
|
||||||
"\t\t\t\t\t\t\t\t",
|
|
||||||
"\t\t\t\t\t\t\t\t\t",
|
|
||||||
"x",
|
|
||||||
"x",
|
|
||||||
"x",
|
|
||||||
"x",
|
|
||||||
"x",
|
|
||||||
"x",
|
|
||||||
};
|
|
||||||
|
|
||||||
/*
|
|
||||||
* ALU instructions:
|
|
||||||
*/
|
|
||||||
|
|
||||||
static const char chan_names[] = {
|
|
||||||
'x', 'y', 'z', 'w',
|
|
||||||
/* these only apply to FETCH dst's: */
|
|
||||||
'0', '1', '?', '_',
|
|
||||||
};
|
|
||||||
|
|
||||||
void print_srcreg(StringBuffer* output, uint32_t num, uint32_t type,
|
|
||||||
uint32_t swiz, uint32_t negate, uint32_t abs_constants,
|
|
||||||
bool const_rel, ShaderType shader_type) {
|
|
||||||
if (negate) {
|
|
||||||
output->Append('-');
|
|
||||||
}
|
|
||||||
if (type) {
|
|
||||||
if (num & 0x80) {
|
|
||||||
output->Append("abs(");
|
|
||||||
}
|
|
||||||
output->AppendFormat("R%u", num & 0x7F);
|
|
||||||
if (num & 0x80) {
|
|
||||||
output->Append(')');
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (abs_constants) {
|
|
||||||
output->Append('|');
|
|
||||||
}
|
|
||||||
num += shader_type == ShaderType::kPixel ? 256 : 0;
|
|
||||||
|
|
||||||
if (const_rel) {
|
|
||||||
output->AppendFormat("C[%u + a0]", num);
|
|
||||||
} else {
|
|
||||||
output->AppendFormat("C%u", num);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (abs_constants) {
|
|
||||||
output->Append('|');
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (swiz) {
|
|
||||||
output->Append('.');
|
|
||||||
for (int i = 0; i < 4; i++) {
|
|
||||||
output->Append(chan_names[(swiz + i) & 0x3]);
|
|
||||||
swiz >>= 2;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void print_dstreg(StringBuffer* output, uint32_t num, uint32_t mask,
|
|
||||||
uint32_t dst_exp) {
|
|
||||||
output->AppendFormat("%s%u", dst_exp ? "export" : "R", num);
|
|
||||||
if (mask != 0xf) {
|
|
||||||
output->Append('.');
|
|
||||||
for (int i = 0; i < 4; i++) {
|
|
||||||
output->Append((mask & 0x1) ? chan_names[i] : '_');
|
|
||||||
mask >>= 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void print_export_comment(StringBuffer* output, uint32_t num, ShaderType type) {
|
|
||||||
const char* name = NULL;
|
|
||||||
switch (type) {
|
|
||||||
case ShaderType::kVertex:
|
|
||||||
switch (num) {
|
|
||||||
case 62:
|
|
||||||
name = "gl_Position";
|
|
||||||
break;
|
|
||||||
case 63:
|
|
||||||
name = "gl_PointSize";
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case ShaderType::kPixel:
|
|
||||||
switch (num) {
|
|
||||||
case 0:
|
|
||||||
name = "gl_FragColor";
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
/* if we had a symbol table here, we could look
|
|
||||||
* up the name of the varying..
|
|
||||||
*/
|
|
||||||
if (name) {
|
|
||||||
output->AppendFormat("\t; %s", name);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#define INSTR(opc, num_srcs) \
|
|
||||||
{ num_srcs, #opc }
|
|
||||||
struct {
|
|
||||||
uint32_t num_srcs;
|
|
||||||
const char* name;
|
|
||||||
} vector_instructions[0x20] =
|
|
||||||
{
|
|
||||||
INSTR(ADDv, 2), // 0
|
|
||||||
INSTR(MULv, 2), // 1
|
|
||||||
INSTR(MAXv, 2), // 2
|
|
||||||
INSTR(MINv, 2), // 3
|
|
||||||
INSTR(SETEv, 2), // 4
|
|
||||||
INSTR(SETGTv, 2), // 5
|
|
||||||
INSTR(SETGTEv, 2), // 6
|
|
||||||
INSTR(SETNEv, 2), // 7
|
|
||||||
INSTR(FRACv, 1), // 8
|
|
||||||
INSTR(TRUNCv, 1), // 9
|
|
||||||
INSTR(FLOORv, 1), // 10
|
|
||||||
INSTR(MULADDv, 3), // 11
|
|
||||||
INSTR(CNDEv, 3), // 12
|
|
||||||
INSTR(CNDGTEv, 3), // 13
|
|
||||||
INSTR(CNDGTv, 3), // 14
|
|
||||||
INSTR(DOT4v, 2), // 15
|
|
||||||
INSTR(DOT3v, 2), // 16
|
|
||||||
INSTR(DOT2ADDv, 3), // 17 -- ???
|
|
||||||
INSTR(CUBEv, 2), // 18
|
|
||||||
INSTR(MAX4v, 1), // 19
|
|
||||||
INSTR(PRED_SETE_PUSHv, 2), // 20
|
|
||||||
INSTR(PRED_SETNE_PUSHv, 2), // 21
|
|
||||||
INSTR(PRED_SETGT_PUSHv, 2), // 22
|
|
||||||
INSTR(PRED_SETGTE_PUSHv, 2), // 23
|
|
||||||
INSTR(KILLEv, 2), // 24
|
|
||||||
INSTR(KILLGTv, 2), // 25
|
|
||||||
INSTR(KILLGTEv, 2), // 26
|
|
||||||
INSTR(KILLNEv, 2), // 27
|
|
||||||
INSTR(DSTv, 2), // 28
|
|
||||||
INSTR(MOVAv, 1), // 29
|
|
||||||
},
|
|
||||||
scalar_instructions[0x40] = {
|
|
||||||
INSTR(ADDs, 1), // 0
|
|
||||||
INSTR(ADD_PREVs, 1), // 1
|
|
||||||
INSTR(MULs, 1), // 2
|
|
||||||
INSTR(MUL_PREVs, 1), // 3
|
|
||||||
INSTR(MUL_PREV2s, 1), // 4
|
|
||||||
INSTR(MAXs, 1), // 5
|
|
||||||
INSTR(MINs, 1), // 6
|
|
||||||
INSTR(SETEs, 1), // 7
|
|
||||||
INSTR(SETGTs, 1), // 8
|
|
||||||
INSTR(SETGTEs, 1), // 9
|
|
||||||
INSTR(SETNEs, 1), // 10
|
|
||||||
INSTR(FRACs, 1), // 11
|
|
||||||
INSTR(TRUNCs, 1), // 12
|
|
||||||
INSTR(FLOORs, 1), // 13
|
|
||||||
INSTR(EXP_IEEE, 1), // 14
|
|
||||||
INSTR(LOG_CLAMP, 1), // 15
|
|
||||||
INSTR(LOG_IEEE, 1), // 16
|
|
||||||
INSTR(RECIP_CLAMP, 1), // 17
|
|
||||||
INSTR(RECIP_FF, 1), // 18
|
|
||||||
INSTR(RECIP_IEEE, 1), // 19
|
|
||||||
INSTR(RECIPSQ_CLAMP, 1), // 20
|
|
||||||
INSTR(RECIPSQ_FF, 1), // 21
|
|
||||||
INSTR(RECIPSQ_IEEE, 1), // 22
|
|
||||||
INSTR(MOVAs, 1), // 23
|
|
||||||
INSTR(MOVA_FLOORs, 1), // 24
|
|
||||||
INSTR(SUBs, 1), // 25
|
|
||||||
INSTR(SUB_PREVs, 1), // 26
|
|
||||||
INSTR(PRED_SETEs, 1), // 27
|
|
||||||
INSTR(PRED_SETNEs, 1), // 28
|
|
||||||
INSTR(PRED_SETGTs, 1), // 29
|
|
||||||
INSTR(PRED_SETGTEs, 1), // 30
|
|
||||||
INSTR(PRED_SET_INVs, 1), // 31
|
|
||||||
INSTR(PRED_SET_POPs, 1), // 32
|
|
||||||
INSTR(PRED_SET_CLRs, 1), // 33
|
|
||||||
INSTR(PRED_SET_RESTOREs, 1), // 34
|
|
||||||
INSTR(KILLEs, 1), // 35
|
|
||||||
INSTR(KILLGTs, 1), // 36
|
|
||||||
INSTR(KILLGTEs, 1), // 37
|
|
||||||
INSTR(KILLNEs, 1), // 38
|
|
||||||
INSTR(KILLONEs, 1), // 39
|
|
||||||
INSTR(SQRT_IEEE, 1), // 40
|
|
||||||
{0, 0}, //
|
|
||||||
INSTR(MUL_CONST_0, 2), // 42
|
|
||||||
INSTR(MUL_CONST_1, 2), // 43
|
|
||||||
INSTR(ADD_CONST_0, 2), // 44
|
|
||||||
INSTR(ADD_CONST_1, 2), // 45
|
|
||||||
INSTR(SUB_CONST_0, 2), // 46
|
|
||||||
INSTR(SUB_CONST_1, 2), // 47
|
|
||||||
INSTR(SIN, 1), // 48
|
|
||||||
INSTR(COS, 1), // 49
|
|
||||||
INSTR(RETAIN_PREV, 1), // 50
|
|
||||||
#undef INSTR
|
|
||||||
};
|
|
||||||
|
|
||||||
int disasm_alu(StringBuffer* output, const uint32_t* dwords, uint32_t alu_off,
|
|
||||||
int level, int sync, ShaderType type) {
|
|
||||||
const instr_alu_t* alu = (const instr_alu_t*)dwords;
|
|
||||||
|
|
||||||
output->Append(levels[level]);
|
|
||||||
output->AppendFormat("%02x: %08x %08x %08x\t", alu_off, dwords[0], dwords[1],
|
|
||||||
dwords[2]);
|
|
||||||
|
|
||||||
output->AppendFormat(" %sALU:\t", sync ? "(S)" : " ");
|
|
||||||
|
|
||||||
if (!alu->scalar_write_mask && !alu->vector_write_mask) {
|
|
||||||
output->Append(" <nop>\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (alu->vector_write_mask) {
|
|
||||||
output->Append(vector_instructions[alu->vector_opc].name);
|
|
||||||
|
|
||||||
if (alu->pred_select & 0x2) {
|
|
||||||
// seems to work similar to conditional execution in ARM instruction
|
|
||||||
// set, so let's use a similar syntax for now:
|
|
||||||
output->Append((alu->pred_select & 0x1) ? "EQ" : "NE");
|
|
||||||
}
|
|
||||||
|
|
||||||
output->Append("\t");
|
|
||||||
|
|
||||||
print_dstreg(output, alu->vector_dest, alu->vector_write_mask,
|
|
||||||
alu->export_data);
|
|
||||||
output->Append(" = ");
|
|
||||||
if (vector_instructions[alu->vector_opc].num_srcs == 3) {
|
|
||||||
print_srcreg(output, alu->src3_reg, alu->src3_sel, alu->src3_swiz,
|
|
||||||
alu->src3_reg_negate, alu->abs_constants, false, type);
|
|
||||||
output->Append(", ");
|
|
||||||
}
|
|
||||||
bool const_rel = alu->const_0_rel_abs && alu->relative_addr;
|
|
||||||
print_srcreg(output, alu->src1_reg, alu->src1_sel, alu->src1_swiz,
|
|
||||||
alu->src1_reg_negate, alu->abs_constants, const_rel, type);
|
|
||||||
if (vector_instructions[alu->vector_opc].num_srcs > 1) {
|
|
||||||
if (alu->src1_sel == 0) {
|
|
||||||
const_rel = alu->const_1_rel_abs && alu->relative_addr;
|
|
||||||
}
|
|
||||||
output->Append(", ");
|
|
||||||
print_srcreg(output, alu->src2_reg, alu->src2_sel, alu->src2_swiz,
|
|
||||||
alu->src2_reg_negate, alu->abs_constants, const_rel, type);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (alu->vector_clamp) {
|
|
||||||
output->Append(" CLAMP");
|
|
||||||
}
|
|
||||||
if (alu->pred_select) {
|
|
||||||
output->AppendFormat(" COND(%d)", alu->pred_condition);
|
|
||||||
}
|
|
||||||
if (alu->export_data) {
|
|
||||||
print_export_comment(output, alu->vector_dest, type);
|
|
||||||
}
|
|
||||||
|
|
||||||
output->Append('\n');
|
|
||||||
}
|
|
||||||
|
|
||||||
if (alu->scalar_write_mask || !alu->vector_write_mask) {
|
|
||||||
// 2nd optional scalar op:
|
|
||||||
|
|
||||||
if (alu->vector_write_mask) {
|
|
||||||
output->Append(levels[level]);
|
|
||||||
output->AppendFormat(" \t\t\t\t\t\t \t");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (scalar_instructions[alu->scalar_opc].name) {
|
|
||||||
output->AppendFormat("%s\t", scalar_instructions[alu->scalar_opc].name);
|
|
||||||
} else {
|
|
||||||
output->AppendFormat("OP(%u)\t", alu->scalar_opc);
|
|
||||||
}
|
|
||||||
|
|
||||||
print_dstreg(output, alu->scalar_dest, alu->scalar_write_mask,
|
|
||||||
alu->export_data);
|
|
||||||
output->Append(" = ");
|
|
||||||
if (scalar_instructions[alu->scalar_opc].num_srcs == 2) {
|
|
||||||
// MUL/ADD/etc
|
|
||||||
// Clever, CONST_0 and CONST_1 are just an extra storage bit.
|
|
||||||
// ADD_CONST_0 dest, [const], [reg]
|
|
||||||
uint32_t src3_swiz = alu->src3_swiz & ~0x3C;
|
|
||||||
uint32_t swiz_a = ((src3_swiz >> 6) - 1) & 0x3;
|
|
||||||
uint32_t swiz_b = (src3_swiz & 0x3);
|
|
||||||
print_srcreg(output, alu->src3_reg, 0, 0, alu->src3_reg_negate,
|
|
||||||
alu->abs_constants, false, type);
|
|
||||||
output->AppendFormat(".%c", chan_names[swiz_a]);
|
|
||||||
output->Append(", ");
|
|
||||||
uint32_t reg2 = (alu->scalar_opc & 1) | (alu->src3_swiz & 0x3C) |
|
|
||||||
(alu->src3_sel << 1);
|
|
||||||
print_srcreg(output, reg2, 1, 0, alu->src3_reg_negate, alu->abs_constants,
|
|
||||||
false, type);
|
|
||||||
output->AppendFormat(".%c", chan_names[swiz_b]);
|
|
||||||
} else {
|
|
||||||
print_srcreg(output, alu->src3_reg, alu->src3_sel, alu->src3_swiz,
|
|
||||||
alu->src3_reg_negate, alu->abs_constants, false, type);
|
|
||||||
}
|
|
||||||
if (alu->scalar_clamp) {
|
|
||||||
output->Append(" CLAMP");
|
|
||||||
}
|
|
||||||
if (alu->export_data) {
|
|
||||||
print_export_comment(output, alu->scalar_dest, type);
|
|
||||||
}
|
|
||||||
output->Append('\n');
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
struct {
|
|
||||||
const char* name;
|
|
||||||
} fetch_types[0xff] = {
|
|
||||||
#define TYPE(id) \
|
|
||||||
{ #id }
|
|
||||||
TYPE(FMT_1_REVERSE), // 0
|
|
||||||
{0},
|
|
||||||
TYPE(FMT_8), // 2
|
|
||||||
{0},
|
|
||||||
{0},
|
|
||||||
{0},
|
|
||||||
TYPE(FMT_8_8_8_8), // 6
|
|
||||||
TYPE(FMT_2_10_10_10), // 7
|
|
||||||
{0},
|
|
||||||
{0},
|
|
||||||
TYPE(FMT_8_8), // 10
|
|
||||||
{0},
|
|
||||||
{0},
|
|
||||||
{0},
|
|
||||||
{0},
|
|
||||||
{0},
|
|
||||||
{0},
|
|
||||||
{0},
|
|
||||||
{0},
|
|
||||||
{0},
|
|
||||||
{0},
|
|
||||||
{0},
|
|
||||||
{0},
|
|
||||||
{0},
|
|
||||||
TYPE(FMT_16), // 24
|
|
||||||
TYPE(FMT_16_16), // 25
|
|
||||||
TYPE(FMT_16_16_16_16), // 26
|
|
||||||
{0},
|
|
||||||
{0},
|
|
||||||
{0},
|
|
||||||
{0},
|
|
||||||
{0},
|
|
||||||
{0},
|
|
||||||
TYPE(FMT_32), // 33
|
|
||||||
TYPE(FMT_32_32), // 34
|
|
||||||
TYPE(FMT_32_32_32_32), // 35
|
|
||||||
TYPE(FMT_32_FLOAT), // 36
|
|
||||||
TYPE(FMT_32_32_FLOAT), // 37
|
|
||||||
TYPE(FMT_32_32_32_32_FLOAT), // 38
|
|
||||||
{0},
|
|
||||||
{0},
|
|
||||||
{0},
|
|
||||||
{0},
|
|
||||||
{0},
|
|
||||||
{0},
|
|
||||||
{0},
|
|
||||||
{0},
|
|
||||||
{0},
|
|
||||||
{0},
|
|
||||||
{0},
|
|
||||||
{0},
|
|
||||||
{0},
|
|
||||||
{0},
|
|
||||||
{0},
|
|
||||||
{0},
|
|
||||||
{0},
|
|
||||||
{0},
|
|
||||||
TYPE(FMT_32_32_32_FLOAT), // 57
|
|
||||||
#undef TYPE
|
|
||||||
};
|
|
||||||
|
|
||||||
void print_fetch_dst(StringBuffer* output, uint32_t dst_reg,
|
|
||||||
uint32_t dst_swiz) {
|
|
||||||
output->AppendFormat("\tR%u.", dst_reg);
|
|
||||||
for (int i = 0; i < 4; i++) {
|
|
||||||
output->Append(chan_names[dst_swiz & 0x7]);
|
|
||||||
dst_swiz >>= 3;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void print_fetch_vtx(StringBuffer* output, const instr_fetch_t* fetch) {
|
|
||||||
const instr_fetch_vtx_t* vtx = &fetch->vtx;
|
|
||||||
|
|
||||||
if (vtx->pred_select) {
|
|
||||||
// seems to work similar to conditional execution in ARM instruction
|
|
||||||
// set, so let's use a similar syntax for now:
|
|
||||||
output->Append(vtx->pred_condition ? "EQ" : "NE");
|
|
||||||
}
|
|
||||||
|
|
||||||
print_fetch_dst(output, vtx->dst_reg, vtx->dst_swiz);
|
|
||||||
output->AppendFormat(" = R%u.", vtx->src_reg);
|
|
||||||
output->Append(chan_names[vtx->src_swiz & 0x3]);
|
|
||||||
if (fetch_types[vtx->format].name) {
|
|
||||||
output->AppendFormat(" %s", fetch_types[vtx->format].name);
|
|
||||||
} else {
|
|
||||||
output->AppendFormat(" TYPE(0x%x)", vtx->format);
|
|
||||||
}
|
|
||||||
output->AppendFormat(" %s", vtx->format_comp_all ? "SIGNED" : "UNSIGNED");
|
|
||||||
if (!vtx->num_format_all) {
|
|
||||||
output->Append(" NORMALIZED");
|
|
||||||
}
|
|
||||||
output->AppendFormat(" STRIDE(%u)", vtx->stride);
|
|
||||||
if (vtx->offset) {
|
|
||||||
output->AppendFormat(" OFFSET(%u)", vtx->offset);
|
|
||||||
}
|
|
||||||
output->AppendFormat(" CONST(%u, %u)", vtx->const_index,
|
|
||||||
vtx->const_index_sel);
|
|
||||||
if (vtx->pred_select) {
|
|
||||||
output->AppendFormat(" COND(%d)", vtx->pred_condition);
|
|
||||||
}
|
|
||||||
if (1) {
|
|
||||||
// XXX
|
|
||||||
output->AppendFormat(" src_reg_am=%u", vtx->src_reg_am);
|
|
||||||
output->AppendFormat(" dst_reg_am=%u", vtx->dst_reg_am);
|
|
||||||
output->AppendFormat(" num_format_all=%u", vtx->num_format_all);
|
|
||||||
output->AppendFormat(" signed_rf_mode_all=%u", vtx->signed_rf_mode_all);
|
|
||||||
output->AppendFormat(" exp_adjust_all=%u", vtx->exp_adjust_all);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void print_fetch_tex(StringBuffer* output, const instr_fetch_t* fetch) {
|
|
||||||
static const char* filter[] = {
|
|
||||||
"POINT", // TEX_FILTER_POINT
|
|
||||||
"LINEAR", // TEX_FILTER_LINEAR
|
|
||||||
"BASEMAP", // TEX_FILTER_BASEMAP
|
|
||||||
};
|
|
||||||
static const char* aniso_filter[] = {
|
|
||||||
"DISABLED", // ANISO_FILTER_DISABLED
|
|
||||||
"MAX_1_1", // ANISO_FILTER_MAX_1_1
|
|
||||||
"MAX_2_1", // ANISO_FILTER_MAX_2_1
|
|
||||||
"MAX_4_1", // ANISO_FILTER_MAX_4_1
|
|
||||||
"MAX_8_1", // ANISO_FILTER_MAX_8_1
|
|
||||||
"MAX_16_1", // ANISO_FILTER_MAX_16_1
|
|
||||||
};
|
|
||||||
static const char* arbitrary_filter[] = {
|
|
||||||
"2x4_SYM", // ARBITRARY_FILTER_2X4_SYM
|
|
||||||
"2x4_ASYM", // ARBITRARY_FILTER_2X4_ASYM
|
|
||||||
"4x2_SYM", // ARBITRARY_FILTER_4X2_SYM
|
|
||||||
"4x2_ASYM", // ARBITRARY_FILTER_4X2_ASYM
|
|
||||||
"4x4_SYM", // ARBITRARY_FILTER_4X4_SYM
|
|
||||||
"4x4_ASYM", // ARBITRARY_FILTER_4X4_ASYM
|
|
||||||
};
|
|
||||||
static const char* sample_loc[] = {
|
|
||||||
"CENTROID", // SAMPLE_CENTROID
|
|
||||||
"CENTER", // SAMPLE_CENTER
|
|
||||||
};
|
|
||||||
const instr_fetch_tex_t* tex = &fetch->tex;
|
|
||||||
uint32_t src_swiz = tex->src_swiz;
|
|
||||||
|
|
||||||
if (tex->pred_select) {
|
|
||||||
// seems to work similar to conditional execution in ARM instruction
|
|
||||||
// set, so let's use a similar syntax for now:
|
|
||||||
output->Append(tex->pred_condition ? "EQ" : "NE");
|
|
||||||
}
|
|
||||||
|
|
||||||
print_fetch_dst(output, tex->dst_reg, tex->dst_swiz);
|
|
||||||
output->AppendFormat(" = R%u.", tex->src_reg);
|
|
||||||
for (int i = 0; i < 3; i++) {
|
|
||||||
output->Append(chan_names[src_swiz & 0x3]);
|
|
||||||
src_swiz >>= 2;
|
|
||||||
}
|
|
||||||
output->AppendFormat(" CONST(%u)", tex->const_idx);
|
|
||||||
if (tex->fetch_valid_only) {
|
|
||||||
output->Append(" VALID_ONLY");
|
|
||||||
}
|
|
||||||
if (tex->tx_coord_denorm) {
|
|
||||||
output->Append(" DENORM");
|
|
||||||
}
|
|
||||||
if (tex->mag_filter != TEX_FILTER_USE_FETCH_CONST) {
|
|
||||||
output->AppendFormat(" MAG(%s)", filter[tex->mag_filter]);
|
|
||||||
}
|
|
||||||
if (tex->min_filter != TEX_FILTER_USE_FETCH_CONST) {
|
|
||||||
output->AppendFormat(" MIN(%s)", filter[tex->min_filter]);
|
|
||||||
}
|
|
||||||
if (tex->mip_filter != TEX_FILTER_USE_FETCH_CONST) {
|
|
||||||
output->AppendFormat(" MIP(%s)", filter[tex->mip_filter]);
|
|
||||||
}
|
|
||||||
if (tex->aniso_filter != ANISO_FILTER_USE_FETCH_CONST) {
|
|
||||||
output->AppendFormat(" ANISO(%s)", aniso_filter[tex->aniso_filter]);
|
|
||||||
}
|
|
||||||
if (tex->arbitrary_filter != ARBITRARY_FILTER_USE_FETCH_CONST) {
|
|
||||||
output->AppendFormat(" ARBITRARY(%s)",
|
|
||||||
arbitrary_filter[tex->arbitrary_filter]);
|
|
||||||
}
|
|
||||||
if (tex->vol_mag_filter != TEX_FILTER_USE_FETCH_CONST) {
|
|
||||||
output->AppendFormat(" VOL_MAG(%s)", filter[tex->vol_mag_filter]);
|
|
||||||
}
|
|
||||||
if (tex->vol_min_filter != TEX_FILTER_USE_FETCH_CONST) {
|
|
||||||
output->AppendFormat(" VOL_MIN(%s)", filter[tex->vol_min_filter]);
|
|
||||||
}
|
|
||||||
if (!tex->use_comp_lod) {
|
|
||||||
output->AppendFormat(" LOD(%u)", tex->use_comp_lod);
|
|
||||||
output->AppendFormat(" LOD_BIAS(%u)", tex->lod_bias);
|
|
||||||
}
|
|
||||||
if (tex->use_reg_lod) {
|
|
||||||
output->AppendFormat(" REG_LOD(%u)", tex->use_reg_lod);
|
|
||||||
}
|
|
||||||
if (tex->use_reg_gradients) {
|
|
||||||
output->Append(" USE_REG_GRADIENTS");
|
|
||||||
}
|
|
||||||
output->AppendFormat(" LOCATION(%s)", sample_loc[tex->sample_location]);
|
|
||||||
if (tex->offset_x || tex->offset_y || tex->offset_z) {
|
|
||||||
output->AppendFormat(" OFFSET(%u,%u,%u)", tex->offset_x, tex->offset_y,
|
|
||||||
tex->offset_z);
|
|
||||||
}
|
|
||||||
if (tex->pred_select) {
|
|
||||||
output->AppendFormat(" COND(%d)", tex->pred_condition);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
struct {
|
|
||||||
const char* name;
|
|
||||||
void (*fxn)(StringBuffer* output, const instr_fetch_t* cf);
|
|
||||||
} fetch_instructions[] = {
|
|
||||||
#define INSTR(opc, name, fxn) \
|
|
||||||
{ name, fxn }
|
|
||||||
INSTR(VTX_FETCH, "VERTEX", print_fetch_vtx), // 0
|
|
||||||
INSTR(TEX_FETCH, "SAMPLE", print_fetch_tex), // 1
|
|
||||||
{0, 0},
|
|
||||||
{0, 0},
|
|
||||||
{0, 0},
|
|
||||||
{0, 0},
|
|
||||||
{0, 0},
|
|
||||||
{0, 0},
|
|
||||||
{0, 0},
|
|
||||||
{0, 0},
|
|
||||||
{0, 0},
|
|
||||||
{0, 0},
|
|
||||||
{0, 0},
|
|
||||||
{0, 0},
|
|
||||||
{0, 0},
|
|
||||||
{0, 0},
|
|
||||||
INSTR(TEX_GET_BORDER_COLOR_FRAC, "?", print_fetch_tex), // 16
|
|
||||||
INSTR(TEX_GET_COMP_TEX_LOD, "?", print_fetch_tex), // 17
|
|
||||||
INSTR(TEX_GET_GRADIENTS, "?", print_fetch_tex), // 18
|
|
||||||
INSTR(TEX_GET_WEIGHTS, "?", print_fetch_tex), // 19
|
|
||||||
{0, 0},
|
|
||||||
{0, 0},
|
|
||||||
{0, 0},
|
|
||||||
{0, 0},
|
|
||||||
INSTR(TEX_SET_TEX_LOD, "SET_TEX_LOD", print_fetch_tex), // 24
|
|
||||||
INSTR(TEX_SET_GRADIENTS_H, "?", print_fetch_tex), // 25
|
|
||||||
INSTR(TEX_SET_GRADIENTS_V, "?", print_fetch_tex), // 26
|
|
||||||
INSTR(TEX_RESERVED_4, "?", print_fetch_tex), // 27
|
|
||||||
#undef INSTR
|
|
||||||
};
|
|
||||||
|
|
||||||
int disasm_fetch(StringBuffer* output, const uint32_t* dwords, uint32_t alu_off,
|
|
||||||
int level, int sync) {
|
|
||||||
const instr_fetch_t* fetch = (const instr_fetch_t*)dwords;
|
|
||||||
|
|
||||||
output->Append(levels[level]);
|
|
||||||
output->AppendFormat("%02x: %08x %08x %08x\t", alu_off, dwords[0], dwords[1],
|
|
||||||
dwords[2]);
|
|
||||||
|
|
||||||
output->AppendFormat(" %sFETCH:\t", sync ? "(S)" : " ");
|
|
||||||
if (fetch_instructions[fetch->opc].fxn) {
|
|
||||||
output->Append(fetch_instructions[fetch->opc].name);
|
|
||||||
fetch_instructions[fetch->opc].fxn(output, fetch);
|
|
||||||
} else {
|
|
||||||
output->Append("???");
|
|
||||||
}
|
|
||||||
output->Append('\n');
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
void print_cf_nop(StringBuffer* output, const instr_cf_t* cf) {}
|
|
||||||
|
|
||||||
void print_cf_exec(StringBuffer* output, const instr_cf_t* cf) {
|
|
||||||
output->AppendFormat(" ADDR(0x%x) CNT(0x%x)", cf->exec.address,
|
|
||||||
cf->exec.count);
|
|
||||||
if (cf->exec.yeild) {
|
|
||||||
output->Append(" YIELD");
|
|
||||||
}
|
|
||||||
uint8_t vc = uint8_t(cf->exec.vc_hi | (cf->exec.vc_lo << 2));
|
|
||||||
if (vc) {
|
|
||||||
output->AppendFormat(" VC(0x%x)", vc);
|
|
||||||
}
|
|
||||||
if (cf->exec.bool_addr) {
|
|
||||||
output->AppendFormat(" BOOL_ADDR(0x%x)", cf->exec.bool_addr);
|
|
||||||
}
|
|
||||||
if (cf->exec.address_mode == ABSOLUTE_ADDR) {
|
|
||||||
output->Append(" ABSOLUTE_ADDR");
|
|
||||||
}
|
|
||||||
if (cf->is_cond_exec()) {
|
|
||||||
output->AppendFormat(" COND(%d)", cf->exec.pred_condition);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void print_cf_loop(StringBuffer* output, const instr_cf_t* cf) {
|
|
||||||
output->AppendFormat(" ADDR(0x%x) LOOP_ID(%d)", cf->loop.address,
|
|
||||||
cf->loop.loop_id);
|
|
||||||
if (cf->loop.address_mode == ABSOLUTE_ADDR) {
|
|
||||||
output->Append(" ABSOLUTE_ADDR");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void print_cf_jmp_call(StringBuffer* output, const instr_cf_t* cf) {
|
|
||||||
output->AppendFormat(" ADDR(0x%x) DIR(%d)", cf->jmp_call.address,
|
|
||||||
cf->jmp_call.direction);
|
|
||||||
if (cf->jmp_call.force_call) {
|
|
||||||
output->Append(" FORCE_CALL");
|
|
||||||
}
|
|
||||||
if (cf->jmp_call.predicated_jmp) {
|
|
||||||
output->AppendFormat(" COND(%d)", cf->jmp_call.condition);
|
|
||||||
}
|
|
||||||
if (cf->jmp_call.bool_addr) {
|
|
||||||
output->AppendFormat(" BOOL_ADDR(0x%x)", cf->jmp_call.bool_addr);
|
|
||||||
}
|
|
||||||
if (cf->jmp_call.address_mode == ABSOLUTE_ADDR) {
|
|
||||||
output->Append(" ABSOLUTE_ADDR");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void print_cf_alloc(StringBuffer* output, const instr_cf_t* cf) {
|
|
||||||
static const char* bufname[] = {
|
|
||||||
"NO ALLOC", // SQ_NO_ALLOC
|
|
||||||
"POSITION", // SQ_POSITION
|
|
||||||
"PARAM/PIXEL", // SQ_PARAMETER_PIXEL
|
|
||||||
"MEMORY", // SQ_MEMORY
|
|
||||||
};
|
|
||||||
output->AppendFormat(" %s SIZE(0x%x)", bufname[cf->alloc.buffer_select],
|
|
||||||
cf->alloc.size);
|
|
||||||
if (cf->alloc.no_serial) {
|
|
||||||
output->Append(" NO_SERIAL");
|
|
||||||
}
|
|
||||||
if (cf->alloc.alloc_mode) {
|
|
||||||
// ???
|
|
||||||
output->Append(" ALLOC_MODE");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
struct {
|
|
||||||
const char* name;
|
|
||||||
void (*fxn)(StringBuffer* output, const instr_cf_t* cf);
|
|
||||||
} cf_instructions[] = {
|
|
||||||
#define INSTR(opc, fxn) \
|
|
||||||
{ #opc, fxn }
|
|
||||||
INSTR(NOP, print_cf_nop), //
|
|
||||||
INSTR(EXEC, print_cf_exec), //
|
|
||||||
INSTR(EXEC_END, print_cf_exec), //
|
|
||||||
INSTR(COND_EXEC, print_cf_exec), //
|
|
||||||
INSTR(COND_EXEC_END, print_cf_exec), //
|
|
||||||
INSTR(COND_PRED_EXEC, print_cf_exec), //
|
|
||||||
INSTR(COND_PRED_EXEC_END, print_cf_exec), //
|
|
||||||
INSTR(LOOP_START, print_cf_loop), //
|
|
||||||
INSTR(LOOP_END, print_cf_loop), //
|
|
||||||
INSTR(COND_CALL, print_cf_jmp_call), //
|
|
||||||
INSTR(RETURN, print_cf_jmp_call), //
|
|
||||||
INSTR(COND_JMP, print_cf_jmp_call), //
|
|
||||||
INSTR(ALLOC, print_cf_alloc), //
|
|
||||||
INSTR(COND_EXEC_PRED_CLEAN, print_cf_exec), //
|
|
||||||
INSTR(COND_EXEC_PRED_CLEAN_END, print_cf_exec), //
|
|
||||||
INSTR(MARK_VS_FETCH_DONE, print_cf_nop), // ??
|
|
||||||
#undef INSTR
|
|
||||||
};
|
|
||||||
|
|
||||||
static void print_cf(StringBuffer* output, const instr_cf_t* cf, int level) {
|
|
||||||
output->Append(levels[level]);
|
|
||||||
|
|
||||||
auto words = reinterpret_cast<const uint16_t*>(cf);
|
|
||||||
output->AppendFormat(" %04x %04x %04x \t", words[0], words[1],
|
|
||||||
words[2]);
|
|
||||||
|
|
||||||
output->AppendFormat(cf_instructions[cf->opc].name);
|
|
||||||
cf_instructions[cf->opc].fxn(output, cf);
|
|
||||||
output->Append('\n');
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* The adreno shader microcode consists of two parts:
|
|
||||||
* 1) A CF (control-flow) program, at the header of the compiled shader,
|
|
||||||
* which refers to ALU/FETCH instructions that follow it by address.
|
|
||||||
* 2) ALU and FETCH instructions
|
|
||||||
*/
|
|
||||||
void disasm_exec(StringBuffer* output, const uint32_t* dwords,
|
|
||||||
size_t dword_count, int level, ShaderType type,
|
|
||||||
const instr_cf_t* cf) {
|
|
||||||
uint32_t sequence = cf->exec.serialize;
|
|
||||||
for (uint32_t i = 0; i < cf->exec.count; i++) {
|
|
||||||
uint32_t alu_off = (cf->exec.address + i);
|
|
||||||
if (sequence & 0x1) {
|
|
||||||
disasm_fetch(output, dwords + alu_off * 3, alu_off, level,
|
|
||||||
sequence & 0x2);
|
|
||||||
} else {
|
|
||||||
disasm_alu(output, dwords + alu_off * 3, alu_off, level, sequence & 0x2,
|
|
||||||
type);
|
|
||||||
}
|
|
||||||
sequence >>= 2;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string DisassembleShader(ShaderType type, const uint32_t* dwords,
|
|
||||||
size_t dword_count) {
|
|
||||||
StringBuffer string_buffer(256 * 1024);
|
|
||||||
|
|
||||||
instr_cf_t cfa;
|
|
||||||
instr_cf_t cfb;
|
|
||||||
for (int idx = 0; idx < dword_count; idx += 3) {
|
|
||||||
uint32_t dword_0 = dwords[idx + 0];
|
|
||||||
uint32_t dword_1 = dwords[idx + 1];
|
|
||||||
uint32_t dword_2 = dwords[idx + 2];
|
|
||||||
cfa.dword_0 = dword_0;
|
|
||||||
cfa.dword_1 = dword_1 & 0xFFFF;
|
|
||||||
cfb.dword_0 = (dword_1 >> 16) | (dword_2 << 16);
|
|
||||||
cfb.dword_1 = dword_2 >> 16;
|
|
||||||
print_cf(&string_buffer, &cfa, 0);
|
|
||||||
if (cfa.is_exec()) {
|
|
||||||
disasm_exec(&string_buffer, dwords, dword_count, 0, type, &cfa);
|
|
||||||
}
|
|
||||||
print_cf(&string_buffer, &cfb, 0);
|
|
||||||
if (cfb.is_exec()) {
|
|
||||||
disasm_exec(&string_buffer, dwords, dword_count, 0, type, &cfb);
|
|
||||||
}
|
|
||||||
if (cfa.opc == EXEC_END || cfb.opc == EXEC_END) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return string_buffer.to_string();
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace gl4
|
|
||||||
} // namespace gpu
|
|
||||||
} // namespace xe
|
|
|
@ -1,28 +0,0 @@
|
||||||
/**
|
|
||||||
******************************************************************************
|
|
||||||
* Xenia : Xbox 360 Emulator Research Project *
|
|
||||||
******************************************************************************
|
|
||||||
* Copyright 2013 Ben Vanik. All rights reserved. *
|
|
||||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
|
||||||
******************************************************************************
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef XENIA_GPU_GL4_UCODE_DISASSEMBLER_H_
|
|
||||||
#define XENIA_GPU_GL4_UCODE_DISASSEMBLER_H_
|
|
||||||
|
|
||||||
#include <string>
|
|
||||||
|
|
||||||
#include "xenia/gpu/xenos.h"
|
|
||||||
|
|
||||||
namespace xe {
|
|
||||||
namespace gpu {
|
|
||||||
namespace gl4 {
|
|
||||||
|
|
||||||
std::string DisassembleShader(ShaderType type, const uint32_t* dwords,
|
|
||||||
size_t dword_count);
|
|
||||||
|
|
||||||
} // namespace gl4
|
|
||||||
} // namespace gpu
|
|
||||||
} // namespace xe
|
|
||||||
|
|
||||||
#endif // XENIA_GPU_GL4_UCODE_DISASSEMBLER_H_
|
|
|
@ -55,6 +55,13 @@ GlslShaderTranslator::GlslShaderTranslator(Dialect dialect)
|
||||||
|
|
||||||
GlslShaderTranslator::~GlslShaderTranslator() = default;
|
GlslShaderTranslator::~GlslShaderTranslator() = default;
|
||||||
|
|
||||||
|
void GlslShaderTranslator::Reset() {
|
||||||
|
ShaderTranslator::Reset();
|
||||||
|
depth_ = 0;
|
||||||
|
depth_prefix_[0] = 0;
|
||||||
|
source_.Reset();
|
||||||
|
}
|
||||||
|
|
||||||
void GlslShaderTranslator::EmitTranslationError(const char* message) {
|
void GlslShaderTranslator::EmitTranslationError(const char* message) {
|
||||||
ShaderTranslator::EmitTranslationError(message);
|
ShaderTranslator::EmitTranslationError(message);
|
||||||
EmitSourceDepth("// TRANSLATION ERROR: %s\n", message);
|
EmitSourceDepth("// TRANSLATION ERROR: %s\n", message);
|
||||||
|
@ -84,7 +91,7 @@ void GlslShaderTranslator::StartTranslation() {
|
||||||
// We have a large amount of shared state defining uniforms and some common
|
// We have a large amount of shared state defining uniforms and some common
|
||||||
// utility functions used in both vertex and pixel shaders.
|
// utility functions used in both vertex and pixel shaders.
|
||||||
EmitSource(R"(
|
EmitSource(R"(
|
||||||
version 450
|
#version 450
|
||||||
#extension all : warn
|
#extension all : warn
|
||||||
#extension GL_ARB_bindless_texture : require
|
#extension GL_ARB_bindless_texture : require
|
||||||
#extension GL_ARB_explicit_uniform_location : require
|
#extension GL_ARB_explicit_uniform_location : require
|
||||||
|
@ -228,7 +235,7 @@ void applyAlphaTest(int alpha_func, float alpha_ref) {
|
||||||
if (!passes) discard;
|
if (!passes) discard;
|
||||||
}
|
}
|
||||||
void processFragment(const in StateData state);
|
void processFragment(const in StateData state);
|
||||||
void main() { +
|
void main() {
|
||||||
const StateData state = states[draw_id];
|
const StateData state = states[draw_id];
|
||||||
processFragment(state);
|
processFragment(state);
|
||||||
if (state.alpha_test.x != 0.0) {
|
if (state.alpha_test.x != 0.0) {
|
||||||
|
@ -241,11 +248,13 @@ void main() { +
|
||||||
// Add vertex shader input declarations.
|
// Add vertex shader input declarations.
|
||||||
if (is_vertex_shader()) {
|
if (is_vertex_shader()) {
|
||||||
for (auto& binding : vertex_bindings()) {
|
for (auto& binding : vertex_bindings()) {
|
||||||
const char* type_name =
|
for (auto& attrib : binding.attributes) {
|
||||||
GetVertexFormatTypeName(binding.fetch_instr.attributes.data_format);
|
const char* type_name =
|
||||||
EmitSource("layout(location = %d) in %s vf%u_%d;\n",
|
GetVertexFormatTypeName(attrib.fetch_instr.attributes.data_format);
|
||||||
binding.binding_index, type_name, binding.fetch_constant,
|
EmitSource("layout(location = %d) in %s vf%u_%d;\n",
|
||||||
binding.fetch_instr.attributes.offset);
|
attrib.attrib_index, type_name, binding.fetch_constant,
|
||||||
|
attrib.fetch_instr.attributes.offset);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -273,6 +282,10 @@ void main() { +
|
||||||
EmitSource(" bool p0 = false;\n");
|
EmitSource(" bool p0 = false;\n");
|
||||||
// Address register when using absolute addressing.
|
// Address register when using absolute addressing.
|
||||||
EmitSource(" int a0 = 0;\n");
|
EmitSource(" int a0 = 0;\n");
|
||||||
|
// Temps for source register values.
|
||||||
|
EmitSource(" vec4 src0;\n");
|
||||||
|
EmitSource(" vec4 src1;\n");
|
||||||
|
EmitSource(" vec4 src2;\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<uint8_t> GlslShaderTranslator::CompleteTranslation() {
|
std::vector<uint8_t> GlslShaderTranslator::CompleteTranslation() {
|
||||||
|
@ -300,7 +313,7 @@ void GlslShaderTranslator::ProcessExecInstructionBegin(
|
||||||
EmitSourceDepth("{\n");
|
EmitSourceDepth("{\n");
|
||||||
break;
|
break;
|
||||||
case ParsedExecInstruction::Type::kConditional:
|
case ParsedExecInstruction::Type::kConditional:
|
||||||
EmitSourceDepth("if (state.bool_consts[%d] & (1 << %d) == %c) {\n",
|
EmitSourceDepth("if ((state.bool_consts[%d] & (1 << %d)) == %c) {\n",
|
||||||
instr.bool_constant_index / 32,
|
instr.bool_constant_index / 32,
|
||||||
instr.bool_constant_index % 32,
|
instr.bool_constant_index % 32,
|
||||||
instr.condition ? '1' : '0');
|
instr.condition ? '1' : '0');
|
||||||
|
@ -374,18 +387,26 @@ void GlslShaderTranslator::ProcessVertexFetchInstruction(
|
||||||
Indent();
|
Indent();
|
||||||
}
|
}
|
||||||
|
|
||||||
for (size_t i = 0; i < instr.operand_count; ++i) {
|
if (instr.result.stores_non_constants()) {
|
||||||
if (instr.operands[i].storage_source !=
|
for (size_t i = 0; i < instr.operand_count; ++i) {
|
||||||
InstructionStorageSource::kVertexFetchConstant) {
|
if (instr.operands[i].storage_source !=
|
||||||
EmitLoadOperand(i, instr.operands[i]);
|
InstructionStorageSource::kVertexFetchConstant) {
|
||||||
|
EmitLoadOperand(i, instr.operands[i]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
switch (instr.opcode) {
|
switch (instr.opcode) {
|
||||||
case FetchOpcode::kVertexFetch:
|
case FetchOpcode::kVertexFetch:
|
||||||
EmitSourceDepth("pv = vf%u_%d;\n", instr.operands[1].storage_index,
|
EmitSourceDepth("pv.");
|
||||||
instr.attributes.offset);
|
for (int i = 0;
|
||||||
break;
|
i < GetVertexFormatComponentCount(instr.attributes.data_format);
|
||||||
|
++i) {
|
||||||
|
EmitSource("%c", GetCharForComponentIndex(i));
|
||||||
|
}
|
||||||
|
EmitSource(" = vf%u_%d;\n", instr.operands[1].storage_index,
|
||||||
|
instr.attributes.offset);
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
EmitStoreVectorResult(instr.result);
|
EmitStoreVectorResult(instr.result);
|
||||||
|
@ -417,7 +438,7 @@ void GlslShaderTranslator::ProcessTextureFetchInstruction(
|
||||||
case FetchOpcode::kTextureFetch:
|
case FetchOpcode::kTextureFetch:
|
||||||
switch (instr.dimension) {
|
switch (instr.dimension) {
|
||||||
case TextureDimension::k1D:
|
case TextureDimension::k1D:
|
||||||
EmitSourceDepth("if (state.texture_samplers[%d] != 0.0) {\n",
|
EmitSourceDepth("if (state.texture_samplers[%d] != 0) {\n",
|
||||||
instr.operands[1].storage_index);
|
instr.operands[1].storage_index);
|
||||||
EmitSourceDepth(
|
EmitSourceDepth(
|
||||||
" pv = texture(sampler1D(state.texture_samplers[%d]), "
|
" pv = texture(sampler1D(state.texture_samplers[%d]), "
|
||||||
|
@ -428,7 +449,7 @@ void GlslShaderTranslator::ProcessTextureFetchInstruction(
|
||||||
EmitSourceDepth("}\n");
|
EmitSourceDepth("}\n");
|
||||||
break;
|
break;
|
||||||
case TextureDimension::k2D:
|
case TextureDimension::k2D:
|
||||||
EmitSourceDepth("if (state.texture_samplers[%d] != 0.0) {\n",
|
EmitSourceDepth("if (state.texture_samplers[%d] != 0) {\n",
|
||||||
instr.operands[1].storage_index);
|
instr.operands[1].storage_index);
|
||||||
EmitSourceDepth(
|
EmitSourceDepth(
|
||||||
" pv = texture(sampler2D(state.texture_samplers[%d]), "
|
" pv = texture(sampler2D(state.texture_samplers[%d]), "
|
||||||
|
@ -439,7 +460,7 @@ void GlslShaderTranslator::ProcessTextureFetchInstruction(
|
||||||
EmitSourceDepth("}\n");
|
EmitSourceDepth("}\n");
|
||||||
break;
|
break;
|
||||||
case TextureDimension::k3D:
|
case TextureDimension::k3D:
|
||||||
EmitSourceDepth("if (state.texture_samplers[%d] != 0.0) {\n",
|
EmitSourceDepth("if (state.texture_samplers[%d] != 0) {\n",
|
||||||
instr.operands[1].storage_index);
|
instr.operands[1].storage_index);
|
||||||
EmitSourceDepth(
|
EmitSourceDepth(
|
||||||
" pv = texture(sampler3D(state.texture_samplers[%d]), "
|
" pv = texture(sampler3D(state.texture_samplers[%d]), "
|
||||||
|
@ -451,7 +472,7 @@ void GlslShaderTranslator::ProcessTextureFetchInstruction(
|
||||||
break;
|
break;
|
||||||
case TextureDimension::kCube:
|
case TextureDimension::kCube:
|
||||||
// TODO(benvanik): undo CUBEv logic on t? (s,t,faceid)
|
// TODO(benvanik): undo CUBEv logic on t? (s,t,faceid)
|
||||||
EmitSourceDepth("if (state.texture_samplers[%d] != 0.0) {\n",
|
EmitSourceDepth("if (state.texture_samplers[%d] != 0) {\n",
|
||||||
instr.operands[1].storage_index);
|
instr.operands[1].storage_index);
|
||||||
EmitSourceDepth(
|
EmitSourceDepth(
|
||||||
" pv = texture(samplerCube(state.texture_samplers[%d]), "
|
" pv = texture(samplerCube(state.texture_samplers[%d]), "
|
||||||
|
@ -835,35 +856,36 @@ void GlslShaderTranslator::ProcessVectorAluInstruction(
|
||||||
|
|
||||||
// max4 dest, src0
|
// max4 dest, src0
|
||||||
case AluVectorOpcode::kMax4:
|
case AluVectorOpcode::kMax4:
|
||||||
EmitSourceDepth("pv = max(src0.x, src0.y, src0.z, src0.w).xxxx;\n");
|
EmitSourceDepth(
|
||||||
|
"pv = max(src0.x, max(src0.y, max(src0.z, src0.w))).xxxx;\n");
|
||||||
break;
|
break;
|
||||||
|
|
||||||
// setp_eq_push dest, src0, src1
|
// setp_eq_push dest, src0, src1
|
||||||
case AluVectorOpcode::kSetpEqPush:
|
case AluVectorOpcode::kSetpEqPush:
|
||||||
EmitSourceDepth("p0 = src0.w == 0.0 && src1.w == 0.0 ? true : false;\n");
|
EmitSourceDepth("p0 = src0.w == 0.0 && src1.w == 0.0 ? true : false;\n");
|
||||||
EmitSourceDepth(
|
EmitSourceDepth(
|
||||||
"pv = src0.x == 0.0 && src1.x == 0.0 ? 0.0 : src0.x + 1.0;\n");
|
"pv = vec4(src0.x == 0.0 && src1.x == 0.0 ? 0.0 : src0.x + 1.0);\n");
|
||||||
break;
|
break;
|
||||||
|
|
||||||
// setp_ne_push dest, src0, src1
|
// setp_ne_push dest, src0, src1
|
||||||
case AluVectorOpcode::kSetpNePush:
|
case AluVectorOpcode::kSetpNePush:
|
||||||
EmitSourceDepth("p0 = src0.w == 0.0 && src1.w != 0.0 ? true : false;\n");
|
EmitSourceDepth("p0 = src0.w == 0.0 && src1.w != 0.0 ? true : false;\n");
|
||||||
EmitSourceDepth(
|
EmitSourceDepth(
|
||||||
"pv = src0.x == 0.0 && src1.x != 0.0 ? 0.0 : src0.x + 1.0;\n");
|
"pv = vec4(src0.x == 0.0 && src1.x != 0.0 ? 0.0 : src0.x + 1.0);\n");
|
||||||
break;
|
break;
|
||||||
|
|
||||||
// setp_gt_push dest, src0, src1
|
// setp_gt_push dest, src0, src1
|
||||||
case AluVectorOpcode::kSetpGtPush:
|
case AluVectorOpcode::kSetpGtPush:
|
||||||
EmitSourceDepth("p0 = src0.w == 0.0 && src1.w > 0.0 ? true : false;\n");
|
EmitSourceDepth("p0 = src0.w == 0.0 && src1.w > 0.0 ? true : false;\n");
|
||||||
EmitSourceDepth(
|
EmitSourceDepth(
|
||||||
"pv = src0.x == 0.0 && src1.x > 0.0 ? 0.0 : src0.x + 1.0;\n");
|
"pv = vec4(src0.x == 0.0 && src1.x > 0.0 ? 0.0 : src0.x + 1.0);\n");
|
||||||
break;
|
break;
|
||||||
|
|
||||||
// setp_ge_push dest, src0, src1
|
// setp_ge_push dest, src0, src1
|
||||||
case AluVectorOpcode::kSetpGePush:
|
case AluVectorOpcode::kSetpGePush:
|
||||||
EmitSourceDepth("p0 = src0.w == 0.0 && src1.w >= 0.0 ? true : false;\n");
|
EmitSourceDepth("p0 = src0.w == 0.0 && src1.w >= 0.0 ? true : false;\n");
|
||||||
EmitSourceDepth(
|
EmitSourceDepth(
|
||||||
"pv = src0.x == 0.0 && src1.x >= 0.0 ? 0.0 : src0.x + 1.0;\n");
|
"pv = vec4(src0.x == 0.0 && src1.x >= 0.0 ? 0.0 : src0.x + 1.0);\n");
|
||||||
break;
|
break;
|
||||||
|
|
||||||
// kill_eq dest, src0, src1
|
// kill_eq dest, src0, src1
|
||||||
|
@ -951,7 +973,7 @@ void GlslShaderTranslator::ProcessScalarAluInstruction(
|
||||||
switch (instr.scalar_opcode) {
|
switch (instr.scalar_opcode) {
|
||||||
// adds dest, src0.ab
|
// adds dest, src0.ab
|
||||||
case AluScalarOpcode::kAdds:
|
case AluScalarOpcode::kAdds:
|
||||||
EmitSourceDepth("ps = src0.x + src1.y;\n");
|
EmitSourceDepth("ps = src0.x + src0.y;\n");
|
||||||
break;
|
break;
|
||||||
|
|
||||||
// adds_prev dest, src0.a
|
// adds_prev dest, src0.a
|
||||||
|
@ -961,7 +983,7 @@ void GlslShaderTranslator::ProcessScalarAluInstruction(
|
||||||
|
|
||||||
// muls dest, src0.ab
|
// muls dest, src0.ab
|
||||||
case AluScalarOpcode::kMuls:
|
case AluScalarOpcode::kMuls:
|
||||||
EmitSourceDepth("ps = src0.x * src1.y;\n");
|
EmitSourceDepth("ps = src0.x * src0.y;\n");
|
||||||
break;
|
break;
|
||||||
|
|
||||||
// muls_prev dest, src0.a
|
// muls_prev dest, src0.a
|
||||||
|
@ -972,18 +994,18 @@ void GlslShaderTranslator::ProcessScalarAluInstruction(
|
||||||
// muls_prev2 dest, src0.ab
|
// muls_prev2 dest, src0.ab
|
||||||
case AluScalarOpcode::kMulsPrev2:
|
case AluScalarOpcode::kMulsPrev2:
|
||||||
EmitSourceDepth(
|
EmitSourceDepth(
|
||||||
"ps = ps == -FLT_MAX || isinf(ps) || isinf(src0.y) || src0.y <= 0.0 "
|
"ps = ps == -FLT_MAX || isinf(ps) || isnan(ps) || isnan(src0.y) || "
|
||||||
"? -FLT_MAX : src0.x * ps;\n");
|
"src0.y <= 0.0 ? -FLT_MAX : src0.x * ps;\n");
|
||||||
break;
|
break;
|
||||||
|
|
||||||
// maxs dest, src0.ab
|
// maxs dest, src0.ab
|
||||||
case AluScalarOpcode::kMaxs:
|
case AluScalarOpcode::kMaxs:
|
||||||
EmitSourceDepth("ps = max(src0.x, src1.y);\n");
|
EmitSourceDepth("ps = max(src0.x, src0.y);\n");
|
||||||
break;
|
break;
|
||||||
|
|
||||||
// mins dest, src0.ab
|
// mins dest, src0.ab
|
||||||
case AluScalarOpcode::kMins:
|
case AluScalarOpcode::kMins:
|
||||||
EmitSourceDepth("ps = min(src0.x, src1.y);\n");
|
EmitSourceDepth("ps = min(src0.x, src0.y);\n");
|
||||||
break;
|
break;
|
||||||
|
|
||||||
// seqs dest, src0.a
|
// seqs dest, src0.a
|
||||||
|
@ -1023,52 +1045,52 @@ void GlslShaderTranslator::ProcessScalarAluInstruction(
|
||||||
|
|
||||||
// exp dest, src0.a
|
// exp dest, src0.a
|
||||||
case AluScalarOpcode::kExp:
|
case AluScalarOpcode::kExp:
|
||||||
EmitSourceDepth("ps = src0.x == 0.0 ? 1.0 : exp2(src0.x);\n");
|
EmitSourceDepth("ps = exp2(src0.x);\n");
|
||||||
break;
|
break;
|
||||||
|
|
||||||
// logc dest, src0.a
|
// logc dest, src0.a
|
||||||
case AluScalarOpcode::kLogc:
|
case AluScalarOpcode::kLogc:
|
||||||
EmitSourceDepth("ps = src0.x == 1.0 ? 0.0 : log2(src0.x);\n");
|
EmitSourceDepth("ps = log2(src0.x);\n");
|
||||||
EmitSourceDepth("ps = isinf(ps) ? -FLT_MAX : ps;\n");
|
EmitSourceDepth("ps = isinf(ps) ? -FLT_MAX : ps;\n");
|
||||||
break;
|
break;
|
||||||
|
|
||||||
// log dest, src0.a
|
// log dest, src0.a
|
||||||
case AluScalarOpcode::kLog:
|
case AluScalarOpcode::kLog:
|
||||||
EmitSourceDepth("ps = src0.x == 1.0 ? 0.0 : log2(src0.x);\n");
|
EmitSourceDepth("ps = log2(src0.x);\n");
|
||||||
break;
|
break;
|
||||||
|
|
||||||
// rcpc dest, src0.a
|
// rcpc dest, src0.a
|
||||||
case AluScalarOpcode::kRcpc:
|
case AluScalarOpcode::kRcpc:
|
||||||
EmitSourceDepth("ps = src0.x == 1.0 ? 1.0 : 1.0 / src0.x;\n");
|
EmitSourceDepth("ps = 1.0 / src0.x;\n");
|
||||||
EmitSourceDepth("if (isinf(ps)) ps = FLT_MAX;\n");
|
EmitSourceDepth("if (isinf(ps)) ps = FLT_MAX;\n");
|
||||||
break;
|
break;
|
||||||
|
|
||||||
// rcpf dest, src0.a
|
// rcpf dest, src0.a
|
||||||
case AluScalarOpcode::kRcpf:
|
case AluScalarOpcode::kRcpf:
|
||||||
EmitSourceDepth("ps = src0.x == 1.0 ? 1.0 : 1.0 / src0.x;\n");
|
EmitSourceDepth("ps = 1.0 / src0.x;\n");
|
||||||
EmitSourceDepth("if (isinf(ps)) ps = 0.0;\n");
|
EmitSourceDepth("if (isinf(ps)) ps = 0.0;\n");
|
||||||
break;
|
break;
|
||||||
|
|
||||||
// rcp dest, src0.a
|
// rcp dest, src0.a
|
||||||
case AluScalarOpcode::kRcp:
|
case AluScalarOpcode::kRcp:
|
||||||
EmitSourceDepth("ps = src0.x == 1.0 ? 1.0 : 1.0 / src0.x;\n");
|
EmitSourceDepth("ps = 1.0 / src0.x;\n");
|
||||||
break;
|
break;
|
||||||
|
|
||||||
// rsqc dest, src0.a
|
// rsqc dest, src0.a
|
||||||
case AluScalarOpcode::kRsqc:
|
case AluScalarOpcode::kRsqc:
|
||||||
EmitSourceDepth("ps = src0.x == 1.0 ? 1.0 : inversesqrt(src0.x);\n");
|
EmitSourceDepth("ps = inversesqrt(src0.x);\n");
|
||||||
EmitSourceDepth("if (isinf(ps)) ps = FLT_MAX;\n");
|
EmitSourceDepth("if (isinf(ps)) ps = FLT_MAX;\n");
|
||||||
break;
|
break;
|
||||||
|
|
||||||
// rsqc dest, src0.a
|
// rsqc dest, src0.a
|
||||||
case AluScalarOpcode::kRsqf:
|
case AluScalarOpcode::kRsqf:
|
||||||
EmitSourceDepth("ps = src0.x == 1.0 ? 1.0 : inversesqrt(src0.x);\n");
|
EmitSourceDepth("ps = inversesqrt(src0.x);\n");
|
||||||
EmitSourceDepth("if (isinf(ps)) ps = 0.0;\n");
|
EmitSourceDepth("if (isinf(ps)) ps = 0.0;\n");
|
||||||
break;
|
break;
|
||||||
|
|
||||||
// rsq dest, src0.a
|
// rsq dest, src0.a
|
||||||
case AluScalarOpcode::kRsq:
|
case AluScalarOpcode::kRsq:
|
||||||
EmitSourceDepth("ps = src0.x == 1.0 ? 1.0 : inversesqrt(src0.x);\n");
|
EmitSourceDepth("ps = inversesqrt(src0.x);\n");
|
||||||
break;
|
break;
|
||||||
|
|
||||||
// maxas dest, src0.ab
|
// maxas dest, src0.ab
|
||||||
|
@ -1145,7 +1167,7 @@ void GlslShaderTranslator::ProcessScalarAluInstruction(
|
||||||
EmitSourceDepth(" ps = 0.0;\n");
|
EmitSourceDepth(" ps = 0.0;\n");
|
||||||
EmitSourceDepth(" p0 = true;\n");
|
EmitSourceDepth(" p0 = true;\n");
|
||||||
EmitSourceDepth("} else {\n");
|
EmitSourceDepth("} else {\n");
|
||||||
EmitSourceDepth(" ps = src0.x == 0.0 ? 1.0 : src1.x;\n");
|
EmitSourceDepth(" ps = src0.x == 0.0 ? 1.0 : src0.x;\n");
|
||||||
EmitSourceDepth(" p0 = false;\n");
|
EmitSourceDepth(" p0 = false;\n");
|
||||||
EmitSourceDepth("}\n");
|
EmitSourceDepth("}\n");
|
||||||
break;
|
break;
|
||||||
|
@ -1169,13 +1191,8 @@ void GlslShaderTranslator::ProcessScalarAluInstruction(
|
||||||
|
|
||||||
// setp_rstr dest, src0.a
|
// setp_rstr dest, src0.a
|
||||||
case AluScalarOpcode::kSetpRstr:
|
case AluScalarOpcode::kSetpRstr:
|
||||||
EmitSourceDepth("if (src0.x == 0.0) {\n");
|
EmitSourceDepth("ps = src0.x;\n");
|
||||||
EmitSourceDepth(" ps = 0.0;\n");
|
EmitSourceDepth("p0 = src0.x == 0.0 ? true : false;\n");
|
||||||
EmitSourceDepth(" p0 = true;\n");
|
|
||||||
EmitSourceDepth("} else {\n");
|
|
||||||
EmitSourceDepth(" ps = src0.x;\n");
|
|
||||||
EmitSourceDepth(" p0 = false;\n");
|
|
||||||
EmitSourceDepth("}\n");
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
// kills_eq dest, src0.a
|
// kills_eq dest, src0.a
|
||||||
|
|
|
@ -30,6 +30,8 @@ class GlslShaderTranslator : public ShaderTranslator {
|
||||||
~GlslShaderTranslator() override;
|
~GlslShaderTranslator() override;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
void Reset() override;
|
||||||
|
|
||||||
void EmitTranslationError(const char* message) override;
|
void EmitTranslationError(const char* message) override;
|
||||||
void EmitUnimplementedTranslationError() override;
|
void EmitUnimplementedTranslationError() override;
|
||||||
|
|
||||||
|
|
|
@ -18,22 +18,29 @@ namespace xe {
|
||||||
namespace gpu {
|
namespace gpu {
|
||||||
|
|
||||||
bool SamplerInfo::Prepare(const xenos::xe_gpu_texture_fetch_t& fetch,
|
bool SamplerInfo::Prepare(const xenos::xe_gpu_texture_fetch_t& fetch,
|
||||||
const gl4::ucode::instr_fetch_tex_t& fetch_instr,
|
const ParsedTextureFetchInstruction& fetch_instr,
|
||||||
SamplerInfo* out_info) {
|
SamplerInfo* out_info) {
|
||||||
std::memset(out_info, 0, sizeof(SamplerInfo));
|
std::memset(out_info, 0, sizeof(SamplerInfo));
|
||||||
|
|
||||||
out_info->min_filter = static_cast<TextureFilter>(
|
out_info->min_filter =
|
||||||
fetch_instr.min_filter == 3 ? fetch.min_filter : fetch_instr.min_filter);
|
fetch_instr.attributes.min_filter == TextureFilter::kUseFetchConst
|
||||||
out_info->mag_filter = static_cast<TextureFilter>(
|
? static_cast<TextureFilter>(fetch.min_filter)
|
||||||
fetch_instr.mag_filter == 3 ? fetch.mag_filter : fetch_instr.mag_filter);
|
: fetch_instr.attributes.min_filter;
|
||||||
out_info->mip_filter = static_cast<TextureFilter>(
|
out_info->mag_filter =
|
||||||
fetch_instr.mip_filter == 3 ? fetch.mip_filter : fetch_instr.mip_filter);
|
fetch_instr.attributes.mag_filter == TextureFilter::kUseFetchConst
|
||||||
|
? static_cast<TextureFilter>(fetch.mag_filter)
|
||||||
|
: fetch_instr.attributes.mag_filter;
|
||||||
|
out_info->mip_filter =
|
||||||
|
fetch_instr.attributes.mip_filter == TextureFilter::kUseFetchConst
|
||||||
|
? static_cast<TextureFilter>(fetch.mip_filter)
|
||||||
|
: fetch_instr.attributes.mip_filter;
|
||||||
out_info->clamp_u = static_cast<ClampMode>(fetch.clamp_x);
|
out_info->clamp_u = static_cast<ClampMode>(fetch.clamp_x);
|
||||||
out_info->clamp_v = static_cast<ClampMode>(fetch.clamp_y);
|
out_info->clamp_v = static_cast<ClampMode>(fetch.clamp_y);
|
||||||
out_info->clamp_w = static_cast<ClampMode>(fetch.clamp_z);
|
out_info->clamp_w = static_cast<ClampMode>(fetch.clamp_z);
|
||||||
out_info->aniso_filter = static_cast<AnisoFilter>(
|
out_info->aniso_filter =
|
||||||
fetch_instr.aniso_filter == 7 ? fetch.aniso_filter
|
fetch_instr.attributes.aniso_filter == AnisoFilter::kUseFetchConst
|
||||||
: fetch_instr.aniso_filter);
|
? static_cast<AnisoFilter>(fetch.aniso_filter)
|
||||||
|
: fetch_instr.attributes.aniso_filter;
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
|
@ -10,7 +10,7 @@
|
||||||
#ifndef XENIA_GPU_SAMPLER_INFO_H_
|
#ifndef XENIA_GPU_SAMPLER_INFO_H_
|
||||||
#define XENIA_GPU_SAMPLER_INFO_H_
|
#define XENIA_GPU_SAMPLER_INFO_H_
|
||||||
|
|
||||||
#include "xenia/gpu/gl4/ucode.h"
|
#include "xenia/gpu/shader_translator.h"
|
||||||
#include "xenia/gpu/xenos.h"
|
#include "xenia/gpu/xenos.h"
|
||||||
|
|
||||||
namespace xe {
|
namespace xe {
|
||||||
|
@ -26,7 +26,7 @@ struct SamplerInfo {
|
||||||
AnisoFilter aniso_filter;
|
AnisoFilter aniso_filter;
|
||||||
|
|
||||||
static bool Prepare(const xenos::xe_gpu_texture_fetch_t& fetch,
|
static bool Prepare(const xenos::xe_gpu_texture_fetch_t& fetch,
|
||||||
const gl4::ucode::instr_fetch_tex_t& fetch_instr,
|
const ParsedTextureFetchInstruction& fetch_instr,
|
||||||
SamplerInfo* out_info);
|
SamplerInfo* out_info);
|
||||||
|
|
||||||
uint64_t hash() const;
|
uint64_t hash() const;
|
||||||
|
|
|
@ -9,267 +9,31 @@
|
||||||
|
|
||||||
#include "xenia/gpu/shader.h"
|
#include "xenia/gpu/shader.h"
|
||||||
|
|
||||||
#include <cstring>
|
#include "xenia/base/logging.h"
|
||||||
|
|
||||||
#include "xenia/base/math.h"
|
|
||||||
#include "xenia/base/memory.h"
|
#include "xenia/base/memory.h"
|
||||||
#include "xenia/gpu/gl4/ucode_disassembler.h"
|
|
||||||
|
|
||||||
namespace xe {
|
namespace xe {
|
||||||
namespace gpu {
|
namespace gpu {
|
||||||
|
|
||||||
using namespace xe::gpu::gl4::ucode;
|
|
||||||
using namespace xe::gpu::xenos;
|
|
||||||
|
|
||||||
Shader::Shader(ShaderType shader_type, uint64_t data_hash,
|
Shader::Shader(ShaderType shader_type, uint64_t data_hash,
|
||||||
const uint32_t* dword_ptr, uint32_t dword_count)
|
const uint32_t* dword_ptr, uint32_t dword_count)
|
||||||
: shader_type_(shader_type), data_hash_(data_hash) {
|
: shader_type_(shader_type), data_hash_(data_hash) {
|
||||||
data_.resize(dword_count);
|
data_.resize(dword_count);
|
||||||
xe::copy_and_swap(data_.data(), dword_ptr, dword_count);
|
xe::copy_and_swap(data_.data(), dword_ptr, dword_count);
|
||||||
std::memset(&alloc_counts_, 0, sizeof(alloc_counts_));
|
|
||||||
std::memset(&buffer_inputs_, 0, sizeof(buffer_inputs_));
|
|
||||||
std::memset(&sampler_inputs_, 0, sizeof(sampler_inputs_));
|
|
||||||
|
|
||||||
// Disassemble ucode and stash.
|
|
||||||
// TODO(benvanik): debug only.
|
|
||||||
ucode_disassembly_ =
|
|
||||||
gl4::DisassembleShader(shader_type_, data_.data(), data_.size());
|
|
||||||
|
|
||||||
// Gather input/output registers/etc.
|
|
||||||
GatherIO();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Shader::~Shader() = default;
|
Shader::~Shader() = default;
|
||||||
|
|
||||||
void Shader::GatherIO() {
|
bool Shader::Prepare(ShaderTranslator* shader_translator) {
|
||||||
// Process all execution blocks.
|
// Perform translation.
|
||||||
instr_cf_t cfa;
|
translated_shader_ = shader_translator->Translate(shader_type_, data_hash_,
|
||||||
instr_cf_t cfb;
|
data_.data(), data_.size());
|
||||||
for (size_t idx = 0; idx < data_.size(); idx += 3) {
|
if (!translated_shader_) {
|
||||||
uint32_t dword_0 = data_[idx + 0];
|
XELOGE("Shader failed translation");
|
||||||
uint32_t dword_1 = data_[idx + 1];
|
return false;
|
||||||
uint32_t dword_2 = data_[idx + 2];
|
|
||||||
cfa.dword_0 = dword_0;
|
|
||||||
cfa.dword_1 = dword_1 & 0xFFFF;
|
|
||||||
cfb.dword_0 = (dword_1 >> 16) | (dword_2 << 16);
|
|
||||||
cfb.dword_1 = dword_2 >> 16;
|
|
||||||
if (cfa.opc == ALLOC) {
|
|
||||||
GatherAlloc(&cfa.alloc);
|
|
||||||
} else if (cfa.is_exec()) {
|
|
||||||
GatherExec(&cfa.exec);
|
|
||||||
}
|
|
||||||
if (cfb.opc == ALLOC) {
|
|
||||||
GatherAlloc(&cfb.alloc);
|
|
||||||
} else if (cfb.is_exec()) {
|
|
||||||
GatherExec(&cfb.exec);
|
|
||||||
}
|
|
||||||
if (cfa.opc == EXEC_END || cfb.opc == EXEC_END) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Shader::GatherAlloc(const instr_cf_alloc_t* cf) {
|
|
||||||
allocs_.push_back(*cf);
|
|
||||||
|
|
||||||
switch (cf->buffer_select) {
|
|
||||||
case SQ_POSITION:
|
|
||||||
// Position (SV_POSITION).
|
|
||||||
alloc_counts_.positions += cf->size + 1;
|
|
||||||
break;
|
|
||||||
case SQ_PARAMETER_PIXEL:
|
|
||||||
// Output to PS (if VS), or frag output (if PS).
|
|
||||||
alloc_counts_.params += cf->size + 1;
|
|
||||||
break;
|
|
||||||
case SQ_MEMORY:
|
|
||||||
// MEMEXPORT?
|
|
||||||
alloc_counts_.memories += cf->size + 1;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void Shader::GatherExec(const instr_cf_exec_t* cf) {
|
|
||||||
uint32_t sequence = cf->serialize;
|
|
||||||
for (uint32_t i = 0; i < cf->count; i++) {
|
|
||||||
uint32_t alu_off = (cf->address + i);
|
|
||||||
// int sync = sequence & 0x2;
|
|
||||||
if (sequence & 0x1) {
|
|
||||||
auto fetch =
|
|
||||||
reinterpret_cast<const instr_fetch_t*>(data_.data() + alu_off * 3);
|
|
||||||
switch (fetch->opc) {
|
|
||||||
case VTX_FETCH:
|
|
||||||
GatherVertexFetch(&fetch->vtx);
|
|
||||||
break;
|
|
||||||
case TEX_FETCH:
|
|
||||||
GatherTextureFetch(&fetch->tex);
|
|
||||||
break;
|
|
||||||
case TEX_GET_BORDER_COLOR_FRAC:
|
|
||||||
case TEX_GET_COMP_TEX_LOD:
|
|
||||||
case TEX_GET_GRADIENTS:
|
|
||||||
case TEX_GET_WEIGHTS:
|
|
||||||
case TEX_SET_TEX_LOD:
|
|
||||||
case TEX_SET_GRADIENTS_H:
|
|
||||||
case TEX_SET_GRADIENTS_V:
|
|
||||||
default:
|
|
||||||
assert_always();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// TODO(benvanik): gather registers used, predicate bits used, etc.
|
|
||||||
auto alu =
|
|
||||||
reinterpret_cast<const instr_alu_t*>(data_.data() + alu_off * 3);
|
|
||||||
if (alu->export_data && alu->vector_write_mask) {
|
|
||||||
switch (alu->vector_dest) {
|
|
||||||
case 0:
|
|
||||||
case 1:
|
|
||||||
case 2:
|
|
||||||
case 3:
|
|
||||||
alloc_counts_.color_targets[alu->vector_dest] = true;
|
|
||||||
break;
|
|
||||||
case 63:
|
|
||||||
alloc_counts_.point_size = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (alu->export_data &&
|
|
||||||
(alu->scalar_write_mask || !alu->vector_write_mask)) {
|
|
||||||
switch (alu->scalar_dest) {
|
|
||||||
case 0:
|
|
||||||
case 1:
|
|
||||||
case 2:
|
|
||||||
case 3:
|
|
||||||
alloc_counts_.color_targets[alu->scalar_dest] = true;
|
|
||||||
break;
|
|
||||||
case 63:
|
|
||||||
alloc_counts_.point_size = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
sequence >>= 2;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void Shader::GatherVertexFetch(const instr_fetch_vtx_t* vtx) {
|
|
||||||
// dst_reg/dst_swiz
|
|
||||||
// src_reg/src_swiz
|
|
||||||
// format = a2xx_sq_surfaceformat
|
|
||||||
// format_comp_all ? signed : unsigned
|
|
||||||
// num_format_all ? normalized
|
|
||||||
// stride
|
|
||||||
// offset
|
|
||||||
// const_index/const_index_sel -- fetch constant register
|
|
||||||
// num_format_all ? integer : fraction
|
|
||||||
// exp_adjust_all - [-32,31] - (2^exp_adjust_all)*fetch - 0 = default
|
|
||||||
|
|
||||||
if (!vtx->must_be_one) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Sometimes games have fetches that just produce constants. We can
|
|
||||||
// ignore those.
|
|
||||||
uint32_t dst_swiz = vtx->dst_swiz;
|
|
||||||
bool fetches_any_data = false;
|
|
||||||
for (int i = 0; i < 4; i++) {
|
|
||||||
if ((dst_swiz & 0x7) == 4) {
|
|
||||||
// 0.0
|
|
||||||
} else if ((dst_swiz & 0x7) == 5) {
|
|
||||||
// 1.0
|
|
||||||
} else if ((dst_swiz & 0x7) == 6) {
|
|
||||||
// ?
|
|
||||||
} else if ((dst_swiz & 0x7) == 7) {
|
|
||||||
// Previous register value.
|
|
||||||
} else {
|
|
||||||
fetches_any_data = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
dst_swiz >>= 3;
|
|
||||||
}
|
|
||||||
if (!fetches_any_data) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
assert_true(vtx->const_index <= 0x1F);
|
|
||||||
|
|
||||||
uint32_t fetch_slot = vtx->const_index * 3 + vtx->const_index_sel;
|
|
||||||
auto& inputs = buffer_inputs_;
|
|
||||||
BufferDescElement* el = nullptr;
|
|
||||||
for (size_t n = 0; n < inputs.count; n++) {
|
|
||||||
auto& desc = inputs.descs[n];
|
|
||||||
if (desc.fetch_slot == fetch_slot) {
|
|
||||||
assert_true(desc.element_count <= xe::countof(desc.elements));
|
|
||||||
// It may not hold that all strides are equal, but I hope it does.
|
|
||||||
assert_true(!vtx->stride || desc.stride_words == vtx->stride);
|
|
||||||
el = &desc.elements[desc.element_count++];
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (!el) {
|
|
||||||
assert_not_zero(vtx->stride);
|
|
||||||
assert_true(inputs.count + 1 < xe::countof(inputs.descs));
|
|
||||||
auto& desc = inputs.descs[inputs.count++];
|
|
||||||
desc.input_index = inputs.count - 1;
|
|
||||||
desc.fetch_slot = fetch_slot;
|
|
||||||
desc.stride_words = vtx->stride;
|
|
||||||
el = &desc.elements[desc.element_count++];
|
|
||||||
}
|
|
||||||
++inputs.total_elements_count;
|
|
||||||
|
|
||||||
el->vtx_fetch = *vtx;
|
|
||||||
el->format = static_cast<VertexFormat>(vtx->format);
|
|
||||||
el->is_normalized = vtx->num_format_all == 0;
|
|
||||||
el->is_signed = vtx->format_comp_all == 1;
|
|
||||||
el->offset_words = vtx->offset;
|
|
||||||
el->size_words = 0;
|
|
||||||
switch (el->format) {
|
|
||||||
case VertexFormat::k_8_8_8_8:
|
|
||||||
case VertexFormat::k_2_10_10_10:
|
|
||||||
case VertexFormat::k_10_11_11:
|
|
||||||
case VertexFormat::k_11_11_10:
|
|
||||||
el->size_words = 1;
|
|
||||||
break;
|
|
||||||
case VertexFormat::k_16_16:
|
|
||||||
case VertexFormat::k_16_16_FLOAT:
|
|
||||||
el->size_words = 1;
|
|
||||||
break;
|
|
||||||
case VertexFormat::k_16_16_16_16:
|
|
||||||
case VertexFormat::k_16_16_16_16_FLOAT:
|
|
||||||
el->size_words = 2;
|
|
||||||
break;
|
|
||||||
case VertexFormat::k_32:
|
|
||||||
case VertexFormat::k_32_FLOAT:
|
|
||||||
el->size_words = 1;
|
|
||||||
break;
|
|
||||||
case VertexFormat::k_32_32:
|
|
||||||
case VertexFormat::k_32_32_FLOAT:
|
|
||||||
el->size_words = 2;
|
|
||||||
break;
|
|
||||||
case VertexFormat::k_32_32_32_FLOAT:
|
|
||||||
el->size_words = 3;
|
|
||||||
break;
|
|
||||||
case VertexFormat::k_32_32_32_32:
|
|
||||||
case VertexFormat::k_32_32_32_32_FLOAT:
|
|
||||||
el->size_words = 4;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
assert_unhandled_case(el->format);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void Shader::GatherTextureFetch(const instr_fetch_tex_t* tex) {
|
|
||||||
// TODO(benvanik): check dest_swiz to see if we are writing anything.
|
|
||||||
|
|
||||||
assert_true(tex->const_idx < 0x1F);
|
|
||||||
|
|
||||||
assert_true(sampler_inputs_.count + 1 <= xe::countof(sampler_inputs_.descs));
|
|
||||||
auto& input = sampler_inputs_.descs[sampler_inputs_.count++];
|
|
||||||
input.input_index = sampler_inputs_.count - 1;
|
|
||||||
input.fetch_slot = tex->const_idx & 0xF; // ??????????????????????????????
|
|
||||||
input.tex_fetch = *tex;
|
|
||||||
|
|
||||||
// Format mangling, size estimation, etc.
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace gpu
|
} // namespace gpu
|
||||||
} // namespace xe
|
} // namespace xe
|
||||||
|
|
|
@ -13,7 +13,7 @@
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "xenia/gpu/gl4/ucode.h"
|
#include "xenia/gpu/shader_translator.h"
|
||||||
#include "xenia/gpu/xenos.h"
|
#include "xenia/gpu/xenos.h"
|
||||||
|
|
||||||
namespace xe {
|
namespace xe {
|
||||||
|
@ -24,85 +24,31 @@ class Shader {
|
||||||
virtual ~Shader();
|
virtual ~Shader();
|
||||||
|
|
||||||
ShaderType type() const { return shader_type_; }
|
ShaderType type() const { return shader_type_; }
|
||||||
bool is_valid() const { return is_valid_; }
|
bool is_valid() const { return !!translated_shader_; }
|
||||||
const std::string& ucode_disassembly() const { return ucode_disassembly_; }
|
|
||||||
const std::string& translated_disassembly() const {
|
|
||||||
return translated_disassembly_;
|
|
||||||
}
|
|
||||||
const std::vector<uint8_t> translated_binary() { return translated_binary_; }
|
|
||||||
const std::string& host_disassembly() const { return host_disassembly_; }
|
const std::string& host_disassembly() const { return host_disassembly_; }
|
||||||
|
TranslatedShader* translated_shader() const {
|
||||||
|
return translated_shader_.get();
|
||||||
|
}
|
||||||
|
|
||||||
const uint32_t* data() const { return data_.data(); }
|
const uint32_t* data() const { return data_.data(); }
|
||||||
uint32_t dword_count() const { return uint32_t(data_.size()); }
|
uint32_t dword_count() const { return uint32_t(data_.size()); }
|
||||||
|
|
||||||
struct BufferDescElement {
|
virtual bool Prepare(ShaderTranslator* shader_translator);
|
||||||
gl4::ucode::instr_fetch_vtx_t vtx_fetch;
|
|
||||||
VertexFormat format;
|
|
||||||
uint32_t offset_words;
|
|
||||||
uint32_t size_words;
|
|
||||||
bool is_signed;
|
|
||||||
bool is_normalized;
|
|
||||||
};
|
|
||||||
struct BufferDesc {
|
|
||||||
uint32_t input_index;
|
|
||||||
uint32_t fetch_slot;
|
|
||||||
uint32_t stride_words;
|
|
||||||
uint32_t element_count;
|
|
||||||
BufferDescElement elements[16];
|
|
||||||
};
|
|
||||||
struct BufferInputs {
|
|
||||||
uint32_t count;
|
|
||||||
uint32_t total_elements_count;
|
|
||||||
BufferDesc descs[32];
|
|
||||||
};
|
|
||||||
const BufferInputs& buffer_inputs() { return buffer_inputs_; }
|
|
||||||
|
|
||||||
struct SamplerDesc {
|
|
||||||
uint32_t input_index;
|
|
||||||
uint32_t fetch_slot;
|
|
||||||
uint32_t format;
|
|
||||||
gl4::ucode::instr_fetch_tex_t tex_fetch;
|
|
||||||
};
|
|
||||||
struct SamplerInputs {
|
|
||||||
uint32_t count;
|
|
||||||
SamplerDesc descs[32];
|
|
||||||
};
|
|
||||||
const SamplerInputs& sampler_inputs() { return sampler_inputs_; }
|
|
||||||
|
|
||||||
struct AllocCounts {
|
|
||||||
uint32_t positions;
|
|
||||||
uint32_t params;
|
|
||||||
uint32_t memories;
|
|
||||||
bool point_size;
|
|
||||||
bool color_targets[4];
|
|
||||||
};
|
|
||||||
const AllocCounts& alloc_counts() const { return alloc_counts_; }
|
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
Shader(ShaderType shader_type, uint64_t data_hash, const uint32_t* dword_ptr,
|
Shader(ShaderType shader_type, uint64_t data_hash, const uint32_t* dword_ptr,
|
||||||
uint32_t dword_count);
|
uint32_t dword_count);
|
||||||
|
|
||||||
void GatherIO();
|
|
||||||
void GatherAlloc(const gl4::ucode::instr_cf_alloc_t* cf);
|
|
||||||
void GatherExec(const gl4::ucode::instr_cf_exec_t* cf);
|
|
||||||
void GatherVertexFetch(const gl4::ucode::instr_fetch_vtx_t* vtx);
|
|
||||||
void GatherTextureFetch(const gl4::ucode::instr_fetch_tex_t* tex);
|
|
||||||
|
|
||||||
ShaderType shader_type_;
|
ShaderType shader_type_;
|
||||||
uint64_t data_hash_;
|
uint64_t data_hash_;
|
||||||
std::vector<uint32_t> data_;
|
std::vector<uint32_t> data_;
|
||||||
bool is_valid_ = false;
|
|
||||||
|
|
||||||
std::string ucode_disassembly_;
|
|
||||||
std::string translated_disassembly_;
|
std::string translated_disassembly_;
|
||||||
std::vector<uint8_t> translated_binary_;
|
std::vector<uint8_t> translated_binary_;
|
||||||
std::string host_disassembly_;
|
std::string host_disassembly_;
|
||||||
std::string error_log_;
|
std::string error_log_;
|
||||||
|
|
||||||
AllocCounts alloc_counts_;
|
std::unique_ptr<TranslatedShader> translated_shader_;
|
||||||
std::vector<gl4::ucode::instr_cf_alloc_t> allocs_;
|
|
||||||
BufferInputs buffer_inputs_;
|
|
||||||
SamplerInputs sampler_inputs_;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace gpu
|
} // namespace gpu
|
||||||
|
|
|
@ -46,7 +46,7 @@ TranslatedShader::TranslatedShader(ShaderType shader_type,
|
||||||
size_t ucode_dword_count,
|
size_t ucode_dword_count,
|
||||||
std::vector<Error> errors)
|
std::vector<Error> errors)
|
||||||
: shader_type_(shader_type),
|
: shader_type_(shader_type),
|
||||||
ucode_data_hash_(ucode_data_hash_),
|
ucode_data_hash_(ucode_data_hash),
|
||||||
errors_(std::move(errors)) {
|
errors_(std::move(errors)) {
|
||||||
ucode_data_.resize(ucode_dword_count);
|
ucode_data_.resize(ucode_dword_count);
|
||||||
std::memcpy(ucode_data_.data(), ucode_dwords,
|
std::memcpy(ucode_data_.data(), ucode_dwords,
|
||||||
|
@ -63,13 +63,35 @@ TranslatedShader::TranslatedShader(ShaderType shader_type,
|
||||||
|
|
||||||
TranslatedShader::~TranslatedShader() = default;
|
TranslatedShader::~TranslatedShader() = default;
|
||||||
|
|
||||||
|
std::string TranslatedShader::GetBinaryString() const {
|
||||||
|
std::string result;
|
||||||
|
result.resize(binary_.size());
|
||||||
|
std::memcpy(const_cast<char*>(result.data()), binary_.data(), binary_.size());
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
ShaderTranslator::ShaderTranslator() = default;
|
ShaderTranslator::ShaderTranslator() = default;
|
||||||
|
|
||||||
ShaderTranslator::~ShaderTranslator() = default;
|
ShaderTranslator::~ShaderTranslator() = default;
|
||||||
|
|
||||||
|
void ShaderTranslator::Reset() {
|
||||||
|
errors_.clear();
|
||||||
|
ucode_disasm_buffer_.Reset();
|
||||||
|
ucode_disasm_line_number_ = 0;
|
||||||
|
previous_ucode_disasm_scan_offset_ = 0;
|
||||||
|
total_attrib_count_ = 0;
|
||||||
|
vertex_bindings_.clear();
|
||||||
|
texture_bindings_.clear();
|
||||||
|
for (size_t i = 0; i < xe::countof(writes_color_targets_); ++i) {
|
||||||
|
writes_color_targets_[i] = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
std::unique_ptr<TranslatedShader> ShaderTranslator::Translate(
|
std::unique_ptr<TranslatedShader> ShaderTranslator::Translate(
|
||||||
ShaderType shader_type, uint64_t ucode_data_hash,
|
ShaderType shader_type, uint64_t ucode_data_hash,
|
||||||
const uint32_t* ucode_dwords, size_t ucode_dword_count) {
|
const uint32_t* ucode_dwords, size_t ucode_dword_count) {
|
||||||
|
Reset();
|
||||||
|
|
||||||
shader_type_ = shader_type;
|
shader_type_ = shader_type;
|
||||||
ucode_dwords_ = ucode_dwords;
|
ucode_dwords_ = ucode_dwords;
|
||||||
ucode_dword_count_ = ucode_dword_count;
|
ucode_dword_count_ = ucode_dword_count;
|
||||||
|
@ -101,8 +123,12 @@ std::unique_ptr<TranslatedShader> ShaderTranslator::Translate(
|
||||||
new TranslatedShader(shader_type, ucode_data_hash, ucode_dwords,
|
new TranslatedShader(shader_type, ucode_data_hash, ucode_dwords,
|
||||||
ucode_dword_count, std::move(errors_)));
|
ucode_dword_count, std::move(errors_)));
|
||||||
translated_shader->binary_ = CompleteTranslation();
|
translated_shader->binary_ = CompleteTranslation();
|
||||||
|
translated_shader->ucode_disassembly_ = ucode_disasm_buffer_.to_string();
|
||||||
translated_shader->vertex_bindings_ = std::move(vertex_bindings_);
|
translated_shader->vertex_bindings_ = std::move(vertex_bindings_);
|
||||||
translated_shader->texture_bindings_ = std::move(texture_bindings_);
|
translated_shader->texture_bindings_ = std::move(texture_bindings_);
|
||||||
|
for (size_t i = 0; i < xe::countof(writes_color_targets_); ++i) {
|
||||||
|
translated_shader->writes_color_targets_[i] = writes_color_targets_[i];
|
||||||
|
}
|
||||||
return translated_shader;
|
return translated_shader;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -165,6 +191,7 @@ void ShaderTranslator::GatherBindingInformation(
|
||||||
++instr_offset, sequence >>= 2) {
|
++instr_offset, sequence >>= 2) {
|
||||||
bool is_fetch = (sequence & 0x1) == 0x1;
|
bool is_fetch = (sequence & 0x1) == 0x1;
|
||||||
if (is_fetch) {
|
if (is_fetch) {
|
||||||
|
// Gather vertex and texture fetches.
|
||||||
auto fetch_opcode =
|
auto fetch_opcode =
|
||||||
static_cast<FetchOpcode>(ucode_dwords_[instr_offset * 3] & 0x1F);
|
static_cast<FetchOpcode>(ucode_dwords_[instr_offset * 3] & 0x1F);
|
||||||
if (fetch_opcode == FetchOpcode::kVertexFetch) {
|
if (fetch_opcode == FetchOpcode::kVertexFetch) {
|
||||||
|
@ -176,6 +203,20 @@ void ShaderTranslator::GatherBindingInformation(
|
||||||
*reinterpret_cast<const TextureFetchInstruction*>(
|
*reinterpret_cast<const TextureFetchInstruction*>(
|
||||||
ucode_dwords_ + instr_offset * 3));
|
ucode_dwords_ + instr_offset * 3));
|
||||||
}
|
}
|
||||||
|
} else if (is_pixel_shader()) {
|
||||||
|
// Gather up color targets written to.
|
||||||
|
auto& op = *reinterpret_cast<const AluInstruction*>(ucode_dwords_ +
|
||||||
|
instr_offset * 3);
|
||||||
|
if (op.has_vector_op() && op.is_export()) {
|
||||||
|
if (op.vector_dest() >= 0 && op.vector_dest() <= 3) {
|
||||||
|
writes_color_targets_[op.vector_dest()] = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (op.has_scalar_op() && op.is_export()) {
|
||||||
|
if (op.vector_dest() >= 0 && op.vector_dest() <= 3) {
|
||||||
|
writes_color_targets_[op.vector_dest()] = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
@ -184,11 +225,39 @@ void ShaderTranslator::GatherBindingInformation(
|
||||||
|
|
||||||
void ShaderTranslator::GatherVertexBindingInformation(
|
void ShaderTranslator::GatherVertexBindingInformation(
|
||||||
const VertexFetchInstruction& op) {
|
const VertexFetchInstruction& op) {
|
||||||
TranslatedShader::VertexBinding binding;
|
if (!op.fetches_any_data()) {
|
||||||
binding.binding_index = vertex_bindings_.size();
|
return;
|
||||||
ParseVertexFetchInstruction(op, &binding.fetch_instr);
|
}
|
||||||
binding.fetch_constant = binding.fetch_instr.operands[1].storage_index;
|
|
||||||
vertex_bindings_.emplace_back(std::move(binding));
|
// Try to allocate an attribute on an existing binding.
|
||||||
|
// If no binding for this fetch slot is found create it.
|
||||||
|
using VertexBinding = TranslatedShader::VertexBinding;
|
||||||
|
VertexBinding::Attribute* attrib = nullptr;
|
||||||
|
for (auto& vertex_binding : vertex_bindings_) {
|
||||||
|
if (vertex_binding.fetch_constant == op.fetch_constant_index()) {
|
||||||
|
// It may not hold that all strides are equal, but I hope it does.
|
||||||
|
assert_true(!op.stride() || vertex_binding.stride_words == op.stride());
|
||||||
|
vertex_binding.attributes.push_back({});
|
||||||
|
attrib = &vertex_binding.attributes.back();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!attrib) {
|
||||||
|
assert_not_zero(op.stride());
|
||||||
|
TranslatedShader::VertexBinding vertex_binding;
|
||||||
|
vertex_binding.binding_index = static_cast<int>(vertex_bindings_.size());
|
||||||
|
vertex_binding.fetch_constant = op.fetch_constant_index();
|
||||||
|
vertex_binding.stride_words = op.stride();
|
||||||
|
vertex_binding.attributes.push_back({});
|
||||||
|
vertex_bindings_.emplace_back(std::move(vertex_binding));
|
||||||
|
attrib = &vertex_bindings_.back().attributes.back();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Populate attribute.
|
||||||
|
attrib->attrib_index = total_attrib_count_++;
|
||||||
|
ParseVertexFetchInstruction(op, &attrib->fetch_instr);
|
||||||
|
attrib->size_words =
|
||||||
|
GetVertexFormatSizeInWords(attrib->fetch_instr.attributes.data_format);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ShaderTranslator::GatherTextureBindingInformation(
|
void ShaderTranslator::GatherTextureBindingInformation(
|
||||||
|
|
|
@ -66,6 +66,10 @@ enum class SwizzleSource {
|
||||||
constexpr SwizzleSource GetSwizzleFromComponentIndex(int i) {
|
constexpr SwizzleSource GetSwizzleFromComponentIndex(int i) {
|
||||||
return static_cast<SwizzleSource>(i);
|
return static_cast<SwizzleSource>(i);
|
||||||
}
|
}
|
||||||
|
inline char GetCharForComponentIndex(int i) {
|
||||||
|
const static char kChars[] = {'x', 'y', 'z', 'w'};
|
||||||
|
return kChars[i];
|
||||||
|
}
|
||||||
inline char GetCharForSwizzle(SwizzleSource swizzle_source) {
|
inline char GetCharForSwizzle(SwizzleSource swizzle_source) {
|
||||||
const static char kChars[] = {'x', 'y', 'z', 'w', '0', '1'};
|
const static char kChars[] = {'x', 'y', 'z', 'w', '0', '1'};
|
||||||
return kChars[static_cast<int>(swizzle_source)];
|
return kChars[static_cast<int>(swizzle_source)];
|
||||||
|
@ -96,6 +100,16 @@ struct InstructionResult {
|
||||||
bool has_all_writes() const {
|
bool has_all_writes() const {
|
||||||
return write_mask[0] && write_mask[1] && write_mask[2] && write_mask[3];
|
return write_mask[0] && write_mask[1] && write_mask[2] && write_mask[3];
|
||||||
}
|
}
|
||||||
|
// Returns true if any non-constant components are written.
|
||||||
|
bool stores_non_constants() const {
|
||||||
|
for (int i = 0; i < 4; ++i) {
|
||||||
|
if (write_mask[i] && components[i] != SwizzleSource::k0 &&
|
||||||
|
components[i] != SwizzleSource::k1) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
// True if the components are in their 'standard' swizzle arrangement (xyzw).
|
// True if the components are in their 'standard' swizzle arrangement (xyzw).
|
||||||
bool is_standard_swizzle() const {
|
bool is_standard_swizzle() const {
|
||||||
return has_all_writes() && components[0] == SwizzleSource::kX &&
|
return has_all_writes() && components[0] == SwizzleSource::kX &&
|
||||||
|
@ -337,7 +351,7 @@ struct ParsedVertexFetchInstruction {
|
||||||
struct Attributes {
|
struct Attributes {
|
||||||
VertexFormat data_format = VertexFormat::kUndefined;
|
VertexFormat data_format = VertexFormat::kUndefined;
|
||||||
int offset = 0;
|
int offset = 0;
|
||||||
int stride = 0;
|
int stride = 0; // In dwords.
|
||||||
int exp_adjust = 0;
|
int exp_adjust = 0;
|
||||||
bool is_index_rounded = false;
|
bool is_index_rounded = false;
|
||||||
bool is_signed = false;
|
bool is_signed = false;
|
||||||
|
@ -450,12 +464,23 @@ class TranslatedShader {
|
||||||
};
|
};
|
||||||
|
|
||||||
struct VertexBinding {
|
struct VertexBinding {
|
||||||
|
struct Attribute {
|
||||||
|
// Attribute index, 0-based in the entire shader.
|
||||||
|
int attrib_index;
|
||||||
|
// Fetch instruction with all parameters.
|
||||||
|
ParsedVertexFetchInstruction fetch_instr;
|
||||||
|
// Size of the attribute, in words.
|
||||||
|
uint32_t size_words;
|
||||||
|
};
|
||||||
|
|
||||||
// Index within the vertex binding listing.
|
// Index within the vertex binding listing.
|
||||||
size_t binding_index;
|
int binding_index;
|
||||||
// Fetch constant index [0-95].
|
// Fetch constant index [0-95].
|
||||||
uint32_t fetch_constant;
|
uint32_t fetch_constant;
|
||||||
// Fetch instruction with all parameters.
|
// Stride of the entire binding, in words.
|
||||||
ParsedVertexFetchInstruction fetch_instr;
|
uint32_t stride_words;
|
||||||
|
// Packed attributes within the binding buffer.
|
||||||
|
std::vector<Attribute> attributes;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct TextureBinding {
|
struct TextureBinding {
|
||||||
|
@ -480,11 +505,16 @@ class TranslatedShader {
|
||||||
const std::vector<TextureBinding>& texture_bindings() const {
|
const std::vector<TextureBinding>& texture_bindings() const {
|
||||||
return texture_bindings_;
|
return texture_bindings_;
|
||||||
}
|
}
|
||||||
|
// Returns true if the given color target index [0-3].
|
||||||
|
bool writes_color_target(int i) const { return writes_color_targets_[i]; }
|
||||||
|
|
||||||
bool is_valid() const { return is_valid_; }
|
bool is_valid() const { return is_valid_; }
|
||||||
const std::vector<Error>& errors() const { return errors_; }
|
const std::vector<Error>& errors() const { return errors_; }
|
||||||
|
|
||||||
const std::vector<uint8_t>& binary() const { return binary_; }
|
const std::vector<uint8_t>& binary() const { return binary_; }
|
||||||
|
const std::string& ucode_disassembly() const { return ucode_disassembly_; }
|
||||||
|
|
||||||
|
std::string GetBinaryString() const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
friend class ShaderTranslator;
|
friend class ShaderTranslator;
|
||||||
|
@ -499,10 +529,12 @@ class TranslatedShader {
|
||||||
|
|
||||||
std::vector<VertexBinding> vertex_bindings_;
|
std::vector<VertexBinding> vertex_bindings_;
|
||||||
std::vector<TextureBinding> texture_bindings_;
|
std::vector<TextureBinding> texture_bindings_;
|
||||||
|
bool writes_color_targets_[4] = {false, false, false, false};
|
||||||
|
|
||||||
bool is_valid_ = false;
|
bool is_valid_ = false;
|
||||||
std::vector<Error> errors_;
|
std::vector<Error> errors_;
|
||||||
|
|
||||||
|
std::string ucode_disassembly_;
|
||||||
std::vector<uint8_t> binary_;
|
std::vector<uint8_t> binary_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -518,6 +550,9 @@ class ShaderTranslator {
|
||||||
protected:
|
protected:
|
||||||
ShaderTranslator();
|
ShaderTranslator();
|
||||||
|
|
||||||
|
// Resets translator state before beginning translation.
|
||||||
|
virtual void Reset();
|
||||||
|
|
||||||
// True if the current shader is a vertex shader.
|
// True if the current shader is a vertex shader.
|
||||||
bool is_vertex_shader() const { return shader_type_ == ShaderType::kVertex; }
|
bool is_vertex_shader() const { return shader_type_ == ShaderType::kVertex; }
|
||||||
// True if the current shader is a pixel shader.
|
// True if the current shader is a pixel shader.
|
||||||
|
@ -662,8 +697,10 @@ class ShaderTranslator {
|
||||||
ucode::VertexFetchInstruction previous_vfetch_full_;
|
ucode::VertexFetchInstruction previous_vfetch_full_;
|
||||||
|
|
||||||
// Detected binding information gathered before translation.
|
// Detected binding information gathered before translation.
|
||||||
|
int total_attrib_count_ = 0;
|
||||||
std::vector<TranslatedShader::VertexBinding> vertex_bindings_;
|
std::vector<TranslatedShader::VertexBinding> vertex_bindings_;
|
||||||
std::vector<TranslatedShader::TextureBinding> texture_bindings_;
|
std::vector<TranslatedShader::TextureBinding> texture_bindings_;
|
||||||
|
bool writes_color_targets_[4] = {false, false, false, false};
|
||||||
|
|
||||||
static const AluOpcodeInfo alu_vector_opcode_infos_[0x20];
|
static const AluOpcodeInfo alu_vector_opcode_infos_[0x20];
|
||||||
static const AluOpcodeInfo alu_scalar_opcode_infos_[0x40];
|
static const AluOpcodeInfo alu_scalar_opcode_infos_[0x40];
|
||||||
|
|
|
@ -96,7 +96,11 @@ void DisassembleSourceOperand(const InstructionOperand& op, StringBuffer* out) {
|
||||||
}
|
}
|
||||||
switch (op.storage_addressing_mode) {
|
switch (op.storage_addressing_mode) {
|
||||||
case InstructionStorageAddressingMode::kStatic:
|
case InstructionStorageAddressingMode::kStatic:
|
||||||
out->AppendFormat("%d", op.storage_index);
|
if (op.is_absolute_value) {
|
||||||
|
out->AppendFormat("[%d]", op.storage_index);
|
||||||
|
} else {
|
||||||
|
out->AppendFormat("%d", op.storage_index);
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
case InstructionStorageAddressingMode::kAddressAbsolute:
|
case InstructionStorageAddressingMode::kAddressAbsolute:
|
||||||
out->AppendFormat("[%d+a0]", op.storage_index);
|
out->AppendFormat("[%d+a0]", op.storage_index);
|
||||||
|
|
|
@ -486,11 +486,11 @@ void TraceViewer::DrawShaderUI(Shader* shader, ShaderDisplayType display_type) {
|
||||||
|
|
||||||
switch (display_type) {
|
switch (display_type) {
|
||||||
case ShaderDisplayType::kUcode: {
|
case ShaderDisplayType::kUcode: {
|
||||||
DrawMultilineString(shader->ucode_disassembly());
|
DrawMultilineString(shader->translated_shader()->ucode_disassembly());
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case ShaderDisplayType::kTranslated: {
|
case ShaderDisplayType::kTranslated: {
|
||||||
const auto& str = shader->translated_disassembly();
|
const auto& str = shader->translated_shader()->GetBinaryString();
|
||||||
size_t i = 0;
|
size_t i = 0;
|
||||||
bool done = false;
|
bool done = false;
|
||||||
while (!done && i < str.size()) {
|
while (!done && i < str.size()) {
|
||||||
|
@ -566,29 +566,33 @@ void TraceViewer::DrawBlendMode(uint32_t src_blend, uint32_t dest_blend,
|
||||||
ImGui::Text(op_template, src_str, dest_str);
|
ImGui::Text(op_template, src_str, dest_str);
|
||||||
}
|
}
|
||||||
|
|
||||||
void TraceViewer::DrawTextureInfo(const Shader::SamplerDesc& desc) {
|
void TraceViewer::DrawTextureInfo(
|
||||||
|
const TranslatedShader::TextureBinding& texture_binding) {
|
||||||
auto& regs = *graphics_system_->register_file();
|
auto& regs = *graphics_system_->register_file();
|
||||||
|
|
||||||
int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + desc.fetch_slot * 6;
|
int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 +
|
||||||
|
texture_binding.fetch_constant * 6;
|
||||||
auto group = reinterpret_cast<const xe_gpu_fetch_group_t*>(®s.values[r]);
|
auto group = reinterpret_cast<const xe_gpu_fetch_group_t*>(®s.values[r]);
|
||||||
auto& fetch = group->texture_fetch;
|
auto& fetch = group->texture_fetch;
|
||||||
if (fetch.type != 0x2) {
|
if (fetch.type != 0x2) {
|
||||||
DrawFailedTextureInfo(desc, "Invalid fetch type");
|
DrawFailedTextureInfo(texture_binding, "Invalid fetch type");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
TextureInfo texture_info;
|
TextureInfo texture_info;
|
||||||
if (!TextureInfo::Prepare(fetch, &texture_info)) {
|
if (!TextureInfo::Prepare(fetch, &texture_info)) {
|
||||||
DrawFailedTextureInfo(desc, "Unable to parse texture fetcher info");
|
DrawFailedTextureInfo(texture_binding,
|
||||||
|
"Unable to parse texture fetcher info");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
SamplerInfo sampler_info;
|
SamplerInfo sampler_info;
|
||||||
if (!SamplerInfo::Prepare(fetch, desc.tex_fetch, &sampler_info)) {
|
if (!SamplerInfo::Prepare(fetch, texture_binding.fetch_instr,
|
||||||
DrawFailedTextureInfo(desc, "Unable to parse sampler info");
|
&sampler_info)) {
|
||||||
|
DrawFailedTextureInfo(texture_binding, "Unable to parse sampler info");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
auto texture = GetTextureEntry(texture_info, sampler_info);
|
auto texture = GetTextureEntry(texture_info, sampler_info);
|
||||||
if (!texture) {
|
if (!texture) {
|
||||||
DrawFailedTextureInfo(desc, "Failed to demand texture");
|
DrawFailedTextureInfo(texture_binding, "Failed to demand texture");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -599,7 +603,7 @@ void TraceViewer::DrawTextureInfo(const Shader::SamplerDesc& desc) {
|
||||||
// show viewer
|
// show viewer
|
||||||
}
|
}
|
||||||
ImGui::NextColumn();
|
ImGui::NextColumn();
|
||||||
ImGui::Text("Fetch Slot: %d", desc.fetch_slot);
|
ImGui::Text("Fetch Slot: %u", texture_binding.fetch_constant);
|
||||||
ImGui::Text("Guest Address: %.8X", texture_info.guest_address);
|
ImGui::Text("Guest Address: %.8X", texture_info.guest_address);
|
||||||
switch (texture_info.dimension) {
|
switch (texture_info.dimension) {
|
||||||
case Dimension::k1D:
|
case Dimension::k1D:
|
||||||
|
@ -628,21 +632,21 @@ void TraceViewer::DrawTextureInfo(const Shader::SamplerDesc& desc) {
|
||||||
ImGui::Columns(1);
|
ImGui::Columns(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void TraceViewer::DrawFailedTextureInfo(const Shader::SamplerDesc& desc,
|
void TraceViewer::DrawFailedTextureInfo(
|
||||||
const char* message) {
|
const TranslatedShader::TextureBinding& texture_binding,
|
||||||
|
const char* message) {
|
||||||
// TODO(benvanik): better error info/etc.
|
// TODO(benvanik): better error info/etc.
|
||||||
ImGui::TextColored(kColorError, "ERROR: %s", message);
|
ImGui::TextColored(kColorError, "ERROR: %s", message);
|
||||||
}
|
}
|
||||||
|
|
||||||
void TraceViewer::DrawVertexFetcher(Shader* shader,
|
void TraceViewer::DrawVertexFetcher(
|
||||||
const Shader::BufferDesc& desc,
|
Shader* shader, const TranslatedShader::VertexBinding& vertex_binding,
|
||||||
const xe_gpu_vertex_fetch_t* fetch) {
|
const xe_gpu_vertex_fetch_t* fetch) {
|
||||||
const uint8_t* addr = memory_->TranslatePhysical(fetch->address << 2);
|
const uint8_t* addr = memory_->TranslatePhysical(fetch->address << 2);
|
||||||
uint32_t vertex_count = (fetch->size * 4) / desc.stride_words;
|
uint32_t vertex_count = (fetch->size * 4) / vertex_binding.stride_words;
|
||||||
int column_count = 0;
|
int column_count = 0;
|
||||||
for (uint32_t el_index = 0; el_index < desc.element_count; ++el_index) {
|
for (const auto& attrib : vertex_binding.attributes) {
|
||||||
const auto& el = desc.elements[el_index];
|
switch (attrib.fetch_instr.attributes.data_format) {
|
||||||
switch (el.format) {
|
|
||||||
case VertexFormat::k_32:
|
case VertexFormat::k_32:
|
||||||
case VertexFormat::k_32_FLOAT:
|
case VertexFormat::k_32_FLOAT:
|
||||||
++column_count;
|
++column_count;
|
||||||
|
@ -679,9 +683,10 @@ void TraceViewer::DrawVertexFetcher(Shader* shader,
|
||||||
(display_start)*ImGui::GetTextLineHeight());
|
(display_start)*ImGui::GetTextLineHeight());
|
||||||
ImGui::Columns(column_count);
|
ImGui::Columns(column_count);
|
||||||
if (display_start <= 1) {
|
if (display_start <= 1) {
|
||||||
for (uint32_t el_index = 0; el_index < desc.element_count; ++el_index) {
|
for (size_t el_index = 0; el_index < vertex_binding.attributes.size();
|
||||||
const auto& el = desc.elements[el_index];
|
++el_index) {
|
||||||
switch (el.format) {
|
const auto& attrib = vertex_binding.attributes[el_index];
|
||||||
|
switch (attrib.fetch_instr.attributes.data_format) {
|
||||||
case VertexFormat::k_32:
|
case VertexFormat::k_32:
|
||||||
case VertexFormat::k_32_FLOAT:
|
case VertexFormat::k_32_FLOAT:
|
||||||
ImGui::Text("e%d.x", el_index);
|
ImGui::Text("e%d.x", el_index);
|
||||||
|
@ -729,13 +734,13 @@ void TraceViewer::DrawVertexFetcher(Shader* shader,
|
||||||
ImGui::Separator();
|
ImGui::Separator();
|
||||||
}
|
}
|
||||||
for (int i = display_start; i < display_end; ++i) {
|
for (int i = display_start; i < display_end; ++i) {
|
||||||
const uint8_t* vstart = addr + i * desc.stride_words * 4;
|
const uint8_t* vstart = addr + i * vertex_binding.stride_words * 4;
|
||||||
for (uint32_t el_index = 0; el_index < desc.element_count; ++el_index) {
|
for (const auto& attrib : vertex_binding.attributes) {
|
||||||
const auto& el = desc.elements[el_index];
|
#define LOADEL(type, wo) \
|
||||||
#define LOADEL(type, wo) \
|
GpuSwap(xe::load<type>(vstart + \
|
||||||
GpuSwap(xe::load<type>(vstart + (el.offset_words + wo) * 4), \
|
(attrib.fetch_instr.attributes.offset + wo) * 4), \
|
||||||
Endian(fetch->endian))
|
Endian(fetch->endian))
|
||||||
switch (el.format) {
|
switch (attrib.fetch_instr.attributes.data_format) {
|
||||||
case VertexFormat::k_32:
|
case VertexFormat::k_32:
|
||||||
ImGui::Text("%.8X", LOADEL(uint32_t, 0));
|
ImGui::Text("%.8X", LOADEL(uint32_t, 0));
|
||||||
ImGui::NextColumn();
|
ImGui::NextColumn();
|
||||||
|
@ -1406,16 +1411,15 @@ void TraceViewer::DrawStateUI() {
|
||||||
if (ImGui::CollapsingHeader("Vertex Buffers")) {
|
if (ImGui::CollapsingHeader("Vertex Buffers")) {
|
||||||
auto shader = command_processor->active_vertex_shader();
|
auto shader = command_processor->active_vertex_shader();
|
||||||
if (shader) {
|
if (shader) {
|
||||||
const auto& buffer_inputs = shader->buffer_inputs();
|
const auto& vertex_bindings =
|
||||||
for (uint32_t buffer_index = 0; buffer_index < buffer_inputs.count;
|
shader->translated_shader()->vertex_bindings();
|
||||||
++buffer_index) {
|
for (const auto& vertex_binding : vertex_bindings) {
|
||||||
const auto& desc = buffer_inputs.descs[buffer_index];
|
int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 +
|
||||||
int r =
|
(vertex_binding.fetch_constant / 3) * 6;
|
||||||
XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + (desc.fetch_slot / 3) * 6;
|
|
||||||
const auto group =
|
const auto group =
|
||||||
reinterpret_cast<xe_gpu_fetch_group_t*>(®s.values[r]);
|
reinterpret_cast<xe_gpu_fetch_group_t*>(®s.values[r]);
|
||||||
const xe_gpu_vertex_fetch_t* fetch = nullptr;
|
const xe_gpu_vertex_fetch_t* fetch = nullptr;
|
||||||
switch (desc.fetch_slot % 3) {
|
switch (vertex_binding.fetch_constant % 3) {
|
||||||
case 0:
|
case 0:
|
||||||
fetch = &group->vertex_fetch_0;
|
fetch = &group->vertex_fetch_0;
|
||||||
break;
|
break;
|
||||||
|
@ -1428,13 +1432,14 @@ void TraceViewer::DrawStateUI() {
|
||||||
}
|
}
|
||||||
assert_true(fetch->endian == 2);
|
assert_true(fetch->endian == 2);
|
||||||
char tree_root_id[32];
|
char tree_root_id[32];
|
||||||
sprintf(tree_root_id, "#vertices_root_%d", desc.fetch_slot);
|
sprintf(tree_root_id, "#vertices_root_%d",
|
||||||
|
vertex_binding.fetch_constant);
|
||||||
if (ImGui::TreeNode(tree_root_id, "vf%d: 0x%.8X (%db), %s",
|
if (ImGui::TreeNode(tree_root_id, "vf%d: 0x%.8X (%db), %s",
|
||||||
desc.fetch_slot, fetch->address << 2,
|
vertex_binding.fetch_constant, fetch->address << 2,
|
||||||
fetch->size * 4,
|
fetch->size * 4,
|
||||||
kEndiannessNames[int(fetch->endian)])) {
|
kEndiannessNames[int(fetch->endian)])) {
|
||||||
ImGui::BeginChild("#vertices", ImVec2(0, 300));
|
ImGui::BeginChild("#vertices", ImVec2(0, 300));
|
||||||
DrawVertexFetcher(shader, desc, fetch);
|
DrawVertexFetcher(shader, vertex_binding, fetch);
|
||||||
ImGui::EndChild();
|
ImGui::EndChild();
|
||||||
ImGui::TreePop();
|
ImGui::TreePop();
|
||||||
}
|
}
|
||||||
|
@ -1446,10 +1451,11 @@ void TraceViewer::DrawStateUI() {
|
||||||
if (ImGui::CollapsingHeader("Vertex Textures")) {
|
if (ImGui::CollapsingHeader("Vertex Textures")) {
|
||||||
auto shader = command_processor->active_vertex_shader();
|
auto shader = command_processor->active_vertex_shader();
|
||||||
if (shader) {
|
if (shader) {
|
||||||
const auto& sampler_inputs = shader->sampler_inputs();
|
const auto& texture_bindings =
|
||||||
if (sampler_inputs.count) {
|
shader->translated_shader()->texture_bindings();
|
||||||
for (size_t i = 0; i < sampler_inputs.count; ++i) {
|
if (!texture_bindings.empty()) {
|
||||||
DrawTextureInfo(sampler_inputs.descs[i]);
|
for (const auto& texture_binding : texture_bindings) {
|
||||||
|
DrawTextureInfo(texture_binding);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
ImGui::Text("No vertex shader samplers");
|
ImGui::Text("No vertex shader samplers");
|
||||||
|
@ -1461,10 +1467,11 @@ void TraceViewer::DrawStateUI() {
|
||||||
if (ImGui::CollapsingHeader("Textures")) {
|
if (ImGui::CollapsingHeader("Textures")) {
|
||||||
auto shader = command_processor->active_pixel_shader();
|
auto shader = command_processor->active_pixel_shader();
|
||||||
if (shader) {
|
if (shader) {
|
||||||
const auto& sampler_inputs = shader->sampler_inputs();
|
const auto& texture_bindings =
|
||||||
if (sampler_inputs.count) {
|
shader->translated_shader()->texture_bindings();
|
||||||
for (size_t i = 0; i < sampler_inputs.count; ++i) {
|
if (!texture_bindings.empty()) {
|
||||||
DrawTextureInfo(sampler_inputs.descs[i]);
|
for (const auto& texture_binding : texture_bindings) {
|
||||||
|
DrawTextureInfo(texture_binding);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
ImGui::Text("No pixel shader samplers");
|
ImGui::Text("No pixel shader samplers");
|
||||||
|
|
|
@ -85,11 +85,13 @@ class TraceViewer {
|
||||||
void DrawBlendMode(uint32_t src_blend, uint32_t dest_blend,
|
void DrawBlendMode(uint32_t src_blend, uint32_t dest_blend,
|
||||||
uint32_t blend_op);
|
uint32_t blend_op);
|
||||||
|
|
||||||
void DrawTextureInfo(const Shader::SamplerDesc& desc);
|
void DrawTextureInfo(const TranslatedShader::TextureBinding& texture_binding);
|
||||||
void DrawFailedTextureInfo(const Shader::SamplerDesc& desc,
|
void DrawFailedTextureInfo(
|
||||||
const char* message);
|
const TranslatedShader::TextureBinding& texture_binding,
|
||||||
|
const char* message);
|
||||||
|
|
||||||
void DrawVertexFetcher(Shader* shader, const Shader::BufferDesc& desc,
|
void DrawVertexFetcher(Shader* shader,
|
||||||
|
const TranslatedShader::VertexBinding& vertex_binding,
|
||||||
const xenos::xe_gpu_vertex_fetch_t* fetch);
|
const xenos::xe_gpu_vertex_fetch_t* fetch);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -548,6 +548,29 @@ struct VertexFetchInstruction {
|
||||||
uint32_t src_swizzle() const { return data_.src_swiz; }
|
uint32_t src_swizzle() const { return data_.src_swiz; }
|
||||||
bool is_src_relative() const { return data_.src_reg_am; }
|
bool is_src_relative() const { return data_.src_reg_am; }
|
||||||
|
|
||||||
|
// Returns true if the fetch actually fetches data.
|
||||||
|
// This may be false if it's used only to populate constants.
|
||||||
|
bool fetches_any_data() const {
|
||||||
|
uint32_t dst_swiz = data_.dst_swiz;
|
||||||
|
bool fetches_any_data = false;
|
||||||
|
for (int i = 0; i < 4; i++) {
|
||||||
|
if ((dst_swiz & 0x7) == 4) {
|
||||||
|
// 0.0
|
||||||
|
} else if ((dst_swiz & 0x7) == 5) {
|
||||||
|
// 1.0
|
||||||
|
} else if ((dst_swiz & 0x7) == 6) {
|
||||||
|
// ?
|
||||||
|
} else if ((dst_swiz & 0x7) == 7) {
|
||||||
|
// Previous register value.
|
||||||
|
} else {
|
||||||
|
fetches_any_data = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
dst_swiz >>= 3;
|
||||||
|
}
|
||||||
|
return fetches_any_data;
|
||||||
|
}
|
||||||
|
|
||||||
uint32_t prefetch_count() const { return data_.prefetch_count; }
|
uint32_t prefetch_count() const { return data_.prefetch_count; }
|
||||||
bool is_mini_fetch() const { return data_.is_mini_fetch == 1; }
|
bool is_mini_fetch() const { return data_.is_mini_fetch == 1; }
|
||||||
|
|
||||||
|
|
|
@ -208,6 +208,60 @@ enum class VertexFormat : uint32_t {
|
||||||
k_32_32_32_FLOAT = 57,
|
k_32_32_32_FLOAT = 57,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
inline int GetVertexFormatComponentCount(VertexFormat format) {
|
||||||
|
switch (format) {
|
||||||
|
case VertexFormat::k_32:
|
||||||
|
case VertexFormat::k_32_FLOAT:
|
||||||
|
return 1;
|
||||||
|
case VertexFormat::k_16_16:
|
||||||
|
case VertexFormat::k_16_16_FLOAT:
|
||||||
|
case VertexFormat::k_32_32:
|
||||||
|
case VertexFormat::k_32_32_FLOAT:
|
||||||
|
return 2;
|
||||||
|
case VertexFormat::k_10_11_11:
|
||||||
|
case VertexFormat::k_11_11_10:
|
||||||
|
case VertexFormat::k_32_32_32_FLOAT:
|
||||||
|
return 3;
|
||||||
|
case VertexFormat::k_8_8_8_8:
|
||||||
|
case VertexFormat::k_2_10_10_10:
|
||||||
|
case VertexFormat::k_16_16_16_16:
|
||||||
|
case VertexFormat::k_16_16_16_16_FLOAT:
|
||||||
|
case VertexFormat::k_32_32_32_32:
|
||||||
|
case VertexFormat::k_32_32_32_32_FLOAT:
|
||||||
|
return 4;
|
||||||
|
default:
|
||||||
|
assert_unhandled_case(format);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inline int GetVertexFormatSizeInWords(VertexFormat format) {
|
||||||
|
switch (format) {
|
||||||
|
case VertexFormat::k_8_8_8_8:
|
||||||
|
case VertexFormat::k_2_10_10_10:
|
||||||
|
case VertexFormat::k_10_11_11:
|
||||||
|
case VertexFormat::k_11_11_10:
|
||||||
|
case VertexFormat::k_16_16:
|
||||||
|
case VertexFormat::k_16_16_FLOAT:
|
||||||
|
case VertexFormat::k_32:
|
||||||
|
case VertexFormat::k_32_FLOAT:
|
||||||
|
return 1;
|
||||||
|
case VertexFormat::k_16_16_16_16:
|
||||||
|
case VertexFormat::k_16_16_16_16_FLOAT:
|
||||||
|
case VertexFormat::k_32_32:
|
||||||
|
case VertexFormat::k_32_32_FLOAT:
|
||||||
|
return 2;
|
||||||
|
case VertexFormat::k_32_32_32_FLOAT:
|
||||||
|
return 3;
|
||||||
|
case VertexFormat::k_32_32_32_32:
|
||||||
|
case VertexFormat::k_32_32_32_32_FLOAT:
|
||||||
|
return 4;
|
||||||
|
default:
|
||||||
|
assert_unhandled_case(format);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
namespace xenos {
|
namespace xenos {
|
||||||
|
|
||||||
typedef enum {
|
typedef enum {
|
||||||
|
@ -231,37 +285,6 @@ enum class CopyCommand : uint32_t {
|
||||||
kNull = 3, // ?
|
kNull = 3, // ?
|
||||||
};
|
};
|
||||||
|
|
||||||
inline int GetVertexFormatComponentCount(VertexFormat format) {
|
|
||||||
switch (format) {
|
|
||||||
case VertexFormat::k_32:
|
|
||||||
case VertexFormat::k_32_FLOAT:
|
|
||||||
return 1;
|
|
||||||
break;
|
|
||||||
case VertexFormat::k_16_16:
|
|
||||||
case VertexFormat::k_16_16_FLOAT:
|
|
||||||
case VertexFormat::k_32_32:
|
|
||||||
case VertexFormat::k_32_32_FLOAT:
|
|
||||||
return 2;
|
|
||||||
break;
|
|
||||||
case VertexFormat::k_10_11_11:
|
|
||||||
case VertexFormat::k_11_11_10:
|
|
||||||
case VertexFormat::k_32_32_32_FLOAT:
|
|
||||||
return 3;
|
|
||||||
break;
|
|
||||||
case VertexFormat::k_8_8_8_8:
|
|
||||||
case VertexFormat::k_2_10_10_10:
|
|
||||||
case VertexFormat::k_16_16_16_16:
|
|
||||||
case VertexFormat::k_16_16_16_16_FLOAT:
|
|
||||||
case VertexFormat::k_32_32_32_32:
|
|
||||||
case VertexFormat::k_32_32_32_32_FLOAT:
|
|
||||||
return 4;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
assert_unhandled_case(format);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#define XE_GPU_MAKE_SWIZZLE(x, y, z, w) \
|
#define XE_GPU_MAKE_SWIZZLE(x, y, z, w) \
|
||||||
(((XE_GPU_SWIZZLE_##x) << 0) | ((XE_GPU_SWIZZLE_##y) << 3) | \
|
(((XE_GPU_SWIZZLE_##x) << 0) | ((XE_GPU_SWIZZLE_##y) << 3) | \
|
||||||
((XE_GPU_SWIZZLE_##z) << 6) | ((XE_GPU_SWIZZLE_##w) << 9))
|
((XE_GPU_SWIZZLE_##z) << 6) | ((XE_GPU_SWIZZLE_##w) << 9))
|
||||||
|
|
Loading…
Reference in New Issue