From e8de42d9ea1b03261fb6abd9153abe36e314c3f3 Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Tue, 23 Dec 2014 10:29:01 -0800 Subject: [PATCH] Copying in generic shader code. --- src/xenia/gpu/gl4/command_processor.cc | 67 ++++++- src/xenia/gpu/gl4/command_processor.h | 11 ++ src/xenia/gpu/gl4/gl4_shader.cc | 22 +++ src/xenia/gpu/gl4/gl4_shader.h | 32 ++++ src/xenia/gpu/gl4/sources.gypi | 2 + src/xenia/gpu/shader.cc | 256 +++++++++++++++++++++++++ src/xenia/gpu/shader.h | 105 ++++++++++ src/xenia/gpu/sources.gypi | 2 + src/xenia/gpu/ucode_disassembler.h | 2 +- src/xenia/gpu/xenos.h | 58 +++--- 10 files changed, 518 insertions(+), 39 deletions(-) create mode 100644 src/xenia/gpu/gl4/gl4_shader.cc create mode 100644 src/xenia/gpu/gl4/gl4_shader.h create mode 100644 src/xenia/gpu/shader.cc create mode 100644 src/xenia/gpu/shader.h diff --git a/src/xenia/gpu/gl4/command_processor.cc b/src/xenia/gpu/gl4/command_processor.cc index 19fb02745..6f7e964b4 100644 --- a/src/xenia/gpu/gl4/command_processor.cc +++ b/src/xenia/gpu/gl4/command_processor.cc @@ -16,6 +16,8 @@ #include #include +#include + #define XETRACECP(fmt, ...) \ if (FLAGS_trace_ring_buffer) XELOGGPU(fmt, ##__VA_ARGS__) @@ -41,7 +43,9 @@ CommandProcessor::CommandProcessor(GL4GraphicsSystem* graphics_system) write_ptr_index_event_(CreateEvent(NULL, FALSE, FALSE, NULL)), write_ptr_index_(0), bin_select_(0xFFFFFFFFull), - bin_mask_(0xFFFFFFFFull) { + bin_mask_(0xFFFFFFFFull), + active_vertex_shader_(nullptr), + active_pixel_shader_(nullptr) { LARGE_INTEGER perf_counter; QueryPerformanceCounter(&perf_counter); time_base_ = perf_counter.QuadPart; @@ -76,6 +80,9 @@ void CommandProcessor::Shutdown() { worker_running_ = false; SetEvent(write_ptr_index_event_); worker_thread_.join(); + + all_shaders_.clear(); + shader_cache_.clear(); } void CommandProcessor::WorkerMain() { @@ -918,10 +925,11 @@ bool CommandProcessor::ExecutePacketType3_IM_LOAD(RingbufferReader* reader, uint32_t addr = addr_type & ~0x3; uint32_t start_size = reader->Read(); uint32_t start = start_size >> 16; - uint32_t size = start_size & 0xFFFF; // dwords + uint32_t size_dwords = start_size & 0xFFFF; // dwords assert_true(start == 0); - /*driver_->LoadShader(shader_type, - GpuToCpu(packet_ptr, addr), size * 4, start);*/ + LoadShader(shader_type, + reinterpret_cast(membase_ + GpuToCpu(packet_ptr, addr)), + size_dwords); return true; } @@ -936,13 +944,12 @@ bool CommandProcessor::ExecutePacketType3_IM_LOAD_IMMEDIATE( auto shader_type = static_cast(dword0); uint32_t start_size = dword1; uint32_t start = start_size >> 16; - uint32_t size = start_size & 0xFFFF; // dwords + uint32_t size_dwords = start_size & 0xFFFF; // dwords assert_true(start == 0); - // TODO(benvanik): figure out if this could wrap. - reader->CheckRead(size); - /*driver_->LoadShader(shader_type, reader->ptr(), size * 4, - start);*/ - reader->Advance(size); + reader->CheckRead(size_dwords); + LoadShader(shader_type, reinterpret_cast(membase_ + reader->ptr()), + size_dwords); + reader->Advance(size_dwords); return true; } @@ -957,6 +964,46 @@ bool CommandProcessor::ExecutePacketType3_INVALIDATE_STATE( return true; } +bool CommandProcessor::LoadShader(ShaderType shader_type, + const uint32_t* address, + uint32_t dword_count) { + // Hash the input memory and lookup the shader. + GL4Shader* shader_ptr = nullptr; + uint64_t hash = XXH64(address, dword_count * sizeof(uint32_t), 0); + auto it = shader_cache_.find(hash); + if (it != shader_cache_.end()) { + // Found in the cache. + // TODO(benvanik): compare bytes? Likelyhood of collision is low. + shader_ptr = it->second; + } else { + // Not found in cache. + // No translation is performed here, as it depends on program_cntl. + auto shader = + std::make_unique(shader_type, hash, address, dword_count); + shader_ptr = shader.get(); + shader_cache_.insert({hash, shader_ptr}); + all_shaders_.emplace_back(std::move(shader)); + + XELOGGPU("Set %s shader at %0.8X (%db):\n%s", + shader_type == ShaderType::kVertex ? "vertex" : "pixel", + uint32_t(reinterpret_cast(address) - + reinterpret_cast(membase_)), + dword_count * 4, shader_ptr->ucode_disassembly().c_str()); + } + switch (shader_type) { + case ShaderType::kVertex: + active_vertex_shader_ = shader_ptr; + break; + case ShaderType::kPixel: + active_pixel_shader_ = shader_ptr; + break; + default: + assert_unhandled_case(shader_type); + return false; + } + return true; +} + } // namespace gl4 } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/gl4/command_processor.h b/src/xenia/gpu/gl4/command_processor.h index 25be16dfe..81be5e5b5 100644 --- a/src/xenia/gpu/gl4/command_processor.h +++ b/src/xenia/gpu/gl4/command_processor.h @@ -13,7 +13,10 @@ #include #include #include +#include +#include +#include #include #include #include @@ -107,6 +110,9 @@ class CommandProcessor { uint32_t packet_ptr, uint32_t packet, uint32_t count); + bool LoadShader(ShaderType shader_type, const uint32_t* address, + uint32_t dword_count); + Memory* memory_; uint8_t* membase_; GL4GraphicsSystem* graphics_system_; @@ -132,6 +138,11 @@ class CommandProcessor { uint64_t bin_select_; uint64_t bin_mask_; + + std::vector> all_shaders_; + std::unordered_map shader_cache_; + GL4Shader* active_vertex_shader_; + GL4Shader* active_pixel_shader_; }; } // namespace gl4 diff --git a/src/xenia/gpu/gl4/gl4_shader.cc b/src/xenia/gpu/gl4/gl4_shader.cc new file mode 100644 index 000000000..a2f0dd5a2 --- /dev/null +++ b/src/xenia/gpu/gl4/gl4_shader.cc @@ -0,0 +1,22 @@ +/** +****************************************************************************** +* Xenia : Xbox 360 Emulator Research Project * +****************************************************************************** +* Copyright 2014 Ben Vanik. All rights reserved. * +* Released under the BSD license - see LICENSE in the root for more details. * +****************************************************************************** +*/ + +#include + +#include + +namespace xe { +namespace gpu { +namespace gl4 { + +bool GL4Shader::TranslateImpl() { return true; } + +} // namespace gl4 +} // namespace gpu +} // namespace xe diff --git a/src/xenia/gpu/gl4/gl4_shader.h b/src/xenia/gpu/gl4/gl4_shader.h new file mode 100644 index 000000000..d33fc9239 --- /dev/null +++ b/src/xenia/gpu/gl4/gl4_shader.h @@ -0,0 +1,32 @@ +/** +****************************************************************************** +* Xenia : Xbox 360 Emulator Research Project * +****************************************************************************** +* Copyright 2014 Ben Vanik. All rights reserved. * +* Released under the BSD license - see LICENSE in the root for more details. * +****************************************************************************** +*/ + +#ifndef XENIA_GPU_GL4_GL4_SHADER_H_ +#define XENIA_GPU_GL4_GL4_SHADER_H_ + +#include +#include + +namespace xe { +namespace gpu { +namespace gl4 { + +class GL4Shader : public Shader { + public: + using Shader::Shader; + + protected: + bool TranslateImpl() override; +}; + +} // namespace gl4 +} // namespace gpu +} // namespace xe + +#endif // XENIA_GPU_GL4_GL4_SHADER_H_ diff --git a/src/xenia/gpu/gl4/sources.gypi b/src/xenia/gpu/gl4/sources.gypi index e6b5bd3ea..0b8673696 100644 --- a/src/xenia/gpu/gl4/sources.gypi +++ b/src/xenia/gpu/gl4/sources.gypi @@ -8,6 +8,8 @@ 'gl4_gpu.h', 'gl4_graphics_system.cc', 'gl4_graphics_system.h', + 'gl4_shader.cc', + 'gl4_shader.h', 'gl_context.cc', 'gl_context.h', ], diff --git a/src/xenia/gpu/shader.cc b/src/xenia/gpu/shader.cc new file mode 100644 index 000000000..d49fa5554 --- /dev/null +++ b/src/xenia/gpu/shader.cc @@ -0,0 +1,256 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2014 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include + +#include +#include + +namespace xe { +namespace gpu { + +using namespace xe::gpu::ucode; + +Shader::Shader(ShaderType shader_type, uint64_t data_hash, + const uint32_t* dword_ptr, uint32_t dword_count) + : shader_type_(shader_type), data_hash_(data_hash), is_valid_(false) { + data_.resize(dword_count); + poly::copy_and_swap(data_.data(), dword_ptr, dword_count); + + // Disassemble ucode and stash. + // TODO(benvanik): debug only. + ucode_disassembly_ = + DisassembleShader(shader_type_, data_.data(), data_.size()); + + // Gather input/output registers/etc. + GatherIO(); +} + +bool Shader::Translate() { + assert_false(is_valid_); + + // TODO(benvanik): disk cache/etc - lookup hash and load if found. + // TODO(benvanik): dump to disk. + + // Attempt implementation-specific translation. + // This may take awhile, and probably will fail. + // TODO(benvanik): parallelize? (allow two translations at once, etc). + is_valid_ = TranslateImpl(); + return is_valid_; +} + +void Shader::GatherIO() { + // Process all execution blocks. + instr_cf_t cfa; + instr_cf_t cfb; + for (size_t idx = 0; idx < data_.size(); idx += 3) { + uint32_t dword_0 = data_[idx + 0]; + uint32_t dword_1 = data_[idx + 1]; + uint32_t dword_2 = data_[idx + 2]; + cfa.dword_0 = dword_0; + cfa.dword_1 = dword_1 & 0xFFFF; + cfb.dword_0 = (dword_1 >> 16) | (dword_2 << 16); + cfb.dword_1 = dword_2 >> 16; + if (cfa.opc == ALLOC) { + GatherAlloc(&cfa.alloc); + } else if (cfa.is_exec()) { + GatherExec(&cfa.exec); + } + if (cfb.opc == ALLOC) { + GatherAlloc(&cfb.alloc); + } else if (cfb.is_exec()) { + GatherExec(&cfb.exec); + } + if (cfa.opc == EXEC_END || cfb.opc == EXEC_END) { + break; + } + } +} + +void Shader::GatherAlloc(const instr_cf_alloc_t* cf) { + allocs_.push_back(*cf); + + switch (cf->buffer_select) { + case SQ_POSITION: + // Position (SV_POSITION). + alloc_counts_.positions += cf->size + 1; + break; + case SQ_PARAMETER_PIXEL: + // Output to PS (if VS), or frag output (if PS). + alloc_counts_.params += cf->size + 1; + break; + case SQ_MEMORY: + // MEMEXPORT? + alloc_counts_.memories += cf->size + 1; + break; + } +} + +void Shader::GatherExec(const instr_cf_exec_t* cf) { + execs_.push_back(*cf); + + uint32_t sequence = cf->serialize; + for (uint32_t i = 0; i < cf->count; i++) { + uint32_t alu_off = (cf->address + i); + int sync = sequence & 0x2; + if (sequence & 0x1) { + auto fetch = reinterpret_cast(&data_[alu_off * 3]); + switch (fetch->opc) { + case VTX_FETCH: + GatherVertexFetch(&fetch->vtx); + break; + case TEX_FETCH: + GatherTextureFetch(&fetch->tex); + break; + case TEX_GET_BORDER_COLOR_FRAC: + case TEX_GET_COMP_TEX_LOD: + case TEX_GET_GRADIENTS: + case TEX_GET_WEIGHTS: + case TEX_SET_TEX_LOD: + case TEX_SET_GRADIENTS_H: + case TEX_SET_GRADIENTS_V: + default: + assert_always(); + break; + } + } else { + // TODO(benvanik): gather registers used, predicate bits used, etc. + auto alu = reinterpret_cast(&data_[alu_off * 3]); + if (alu->vector_write_mask) { + if (alu->export_data && alu->vector_dest == 63) { + alloc_counts_.point_size = true; + } + } + if (alu->scalar_write_mask || !alu->vector_write_mask) { + if (alu->export_data && alu->scalar_dest == 63) { + alloc_counts_.point_size = true; + } + } + } + sequence >>= 2; + } +} + +void Shader::GatherVertexFetch(const instr_fetch_vtx_t* vtx) { + assert_true(shader_type_ == ShaderType::kVertex); + + // dst_reg/dst_swiz + // src_reg/src_swiz + // format = a2xx_sq_surfaceformat + // format_comp_all ? signed : unsigned + // num_format_all ? normalized + // stride + // offset + // const_index/const_index_sel -- fetch constant register + // num_format_all ? integer : fraction + // exp_adjust_all - [-32,31] - (2^exp_adjust_all)*fetch - 0 = default + + // Sometimes games have fetches that just produce constants. We can + // ignore those. + uint32_t dst_swiz = vtx->dst_swiz; + bool fetches_any_data = false; + for (int i = 0; i < 4; i++) { + if ((dst_swiz & 0x7) == 4) { + // 0.0 + } else if ((dst_swiz & 0x7) == 5) { + // 1.0 + } else if ((dst_swiz & 0x7) == 6) { + // ? + } else if ((dst_swiz & 0x7) == 7) { + // Previous register value. + } else { + fetches_any_data = true; + break; + } + dst_swiz >>= 3; + } + if (!fetches_any_data) { + return; + } + + uint32_t fetch_slot = vtx->const_index * 3 + vtx->const_index_sel; + auto& inputs = buffer_inputs_; + BufferDescElement* el = nullptr; + for (size_t n = 0; n < inputs.count; n++) { + auto& desc = inputs.descs[n]; + if (desc.fetch_slot == fetch_slot) { + assert_true(desc.element_count <= poly::countof(desc.elements)); + // It may not hold that all strides are equal, but I hope it does. + assert_true(!vtx->stride || desc.stride_words == vtx->stride); + el = &desc.elements[desc.element_count++]; + break; + } + } + if (!el) { + assert_not_zero(vtx->stride); + assert_true(inputs.count + 1 < poly::countof(inputs.descs)); + auto& desc = inputs.descs[inputs.count++]; + desc.input_index = inputs.count - 1; + desc.fetch_slot = fetch_slot; + desc.stride_words = vtx->stride; + el = &desc.elements[desc.element_count++]; + } + + el->vtx_fetch = *vtx; + el->format = vtx->format; + el->is_normalized = vtx->num_format_all == 0; + el->is_signed = vtx->format_comp_all == 1; + el->offset_words = vtx->offset; + el->size_words = 0; + switch (el->format) { + case FMT_8_8_8_8: + case FMT_2_10_10_10: + case FMT_10_11_11: + case FMT_11_11_10: + el->size_words = 1; + break; + case FMT_16_16: + case FMT_16_16_FLOAT: + el->size_words = 1; + break; + case FMT_16_16_16_16: + case FMT_16_16_16_16_FLOAT: + el->size_words = 2; + break; + case FMT_32: + case FMT_32_FLOAT: + el->size_words = 1; + break; + case FMT_32_32: + case FMT_32_32_FLOAT: + el->size_words = 2; + break; + case FMT_32_32_32_FLOAT: + el->size_words = 3; + break; + case FMT_32_32_32_32: + case FMT_32_32_32_32_FLOAT: + el->size_words = 4; + break; + default: + XELOGE("Unknown vertex format: %d", el->format); + assert_always(); + break; + } +} + +void Shader::GatherTextureFetch(const instr_fetch_tex_t* tex) { + // TODO(benvanik): check dest_swiz to see if we are writing anything. + + assert_true(sampler_inputs_.count + 1 < poly::countof(sampler_inputs_.descs)); + auto& input = sampler_inputs_.descs[sampler_inputs_.count++]; + input.input_index = sampler_inputs_.count - 1; + input.fetch_slot = tex->const_idx & 0xF; // ? + input.tex_fetch = *tex; + + // Format mangling, size estimation, etc. +} + +} // namespace gpu +} // namespace xe diff --git a/src/xenia/gpu/shader.h b/src/xenia/gpu/shader.h new file mode 100644 index 000000000..0b755e7c9 --- /dev/null +++ b/src/xenia/gpu/shader.h @@ -0,0 +1,105 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2014 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_SHADER_H_ +#define XENIA_GPU_SHADER_H_ + +#include + +#include +#include + +namespace xe { +namespace gpu { + +class Shader { + public: + Shader(ShaderType shader_type, uint64_t data_hash, const uint32_t* dword_ptr, + uint32_t dword_count); + + ShaderType type() const { return shader_type_; } + bool is_valid() const { return is_valid_; } + const std::string& ucode_disassembly() const { return ucode_disassembly_; } + const std::string& translated_disassembly() const { + return translated_disassembly_; + } + + bool Translate(); + + struct BufferDescElement { + ucode::instr_fetch_vtx_t vtx_fetch; + uint32_t format; + uint32_t offset_words; + uint32_t size_words; + bool is_signed; + bool is_normalized; + }; + struct BufferDesc { + uint32_t input_index; + uint32_t fetch_slot; + uint32_t stride_words; + uint32_t element_count; + BufferDescElement elements[16]; + }; + struct BufferInputs { + uint32_t count; + BufferDesc descs[32]; + }; + const BufferInputs& buffer_inputs() { return buffer_inputs_; } + + struct SamplerDesc { + uint32_t input_index; + uint32_t fetch_slot; + uint32_t format; + ucode::instr_fetch_tex_t tex_fetch; + }; + struct SamplerInputs { + uint32_t count; + SamplerDesc descs[32]; + }; + const SamplerInputs& sampler_inputs() { return sampler_inputs_; } + + struct AllocCounts { + uint32_t positions; + uint32_t params; + uint32_t memories; + bool point_size; + }; + const AllocCounts& alloc_counts() const { return alloc_counts_; } + const std::vector& execs() const { return execs_; } + const std::vector& allocs() const { return allocs_; } + + protected: + virtual bool TranslateImpl() = 0; + + void GatherIO(); + void GatherAlloc(const ucode::instr_cf_alloc_t* cf); + void GatherExec(const ucode::instr_cf_exec_t* cf); + void GatherVertexFetch(const ucode::instr_fetch_vtx_t* vtx); + void GatherTextureFetch(const ucode::instr_fetch_tex_t* tex); + + ShaderType shader_type_; + uint64_t data_hash_; + std::vector data_; + bool is_valid_; + + std::string ucode_disassembly_; + std::string translated_disassembly_; + + AllocCounts alloc_counts_; + std::vector execs_; + std::vector allocs_; + BufferInputs buffer_inputs_; + SamplerInputs sampler_inputs_; +}; + +} // namespace gpu +} // namespace xe + +#endif // XENIA_GPU_SHADER_H_ diff --git a/src/xenia/gpu/sources.gypi b/src/xenia/gpu/sources.gypi index d55d68409..416884d5d 100644 --- a/src/xenia/gpu/sources.gypi +++ b/src/xenia/gpu/sources.gypi @@ -9,6 +9,8 @@ 'register_file.cc', 'register_file.h', 'register_table.inc', + 'shader.cc', + 'shader.h', 'ucode.h', 'ucode_disassembler.cc', 'ucode_disassembler.h', diff --git a/src/xenia/gpu/ucode_disassembler.h b/src/xenia/gpu/ucode_disassembler.h index fe3cb2868..bd5ab4eff 100644 --- a/src/xenia/gpu/ucode_disassembler.h +++ b/src/xenia/gpu/ucode_disassembler.h @@ -18,7 +18,7 @@ namespace xe { namespace gpu { -std::string DisassembleShader(xenos::ShaderType type, const uint32_t* dwords, +std::string DisassembleShader(ShaderType type, const uint32_t* dwords, size_t dword_count); } // namespace gpu diff --git a/src/xenia/gpu/xenos.h b/src/xenia/gpu/xenos.h index e7c386cc5..449a7a9be 100644 --- a/src/xenia/gpu/xenos.h +++ b/src/xenia/gpu/xenos.h @@ -15,20 +15,12 @@ namespace xe { namespace gpu { -namespace xenos { enum class ShaderType : uint32_t { kVertex = 0, kPixel = 1, }; -typedef enum { - XE_GPU_INVALIDATE_MASK_VERTEX_SHADER = 1 << 8, - XE_GPU_INVALIDATE_MASK_PIXEL_SHADER = 1 << 9, - - XE_GPU_INVALIDATE_MASK_ALL = 0x7FFF, -} XE_GPU_INVALIDATE_MASK; - enum class PrimitiveType : uint32_t { kNone = 0x00, kPointList = 0x01, @@ -43,6 +35,15 @@ enum class PrimitiveType : uint32_t { kQuadList = 0x0D, }; +namespace xenos { + +typedef enum { + XE_GPU_INVALIDATE_MASK_VERTEX_SHADER = 1 << 8, + XE_GPU_INVALIDATE_MASK_PIXEL_SHADER = 1 << 9, + + XE_GPU_INVALIDATE_MASK_ALL = 0x7FFF, +} XE_GPU_INVALIDATE_MASK; + enum class Endian : uint32_t { kUnspecified = 0, k8in16 = 1, @@ -50,27 +51,28 @@ enum class Endian : uint32_t { k16in32 = 3, }; -#define XE_GPU_MAKE_SWIZZLE(x, y, z, w) \ - (((XE_GPU_SWIZZLE_##x) << 0) | ((XE_GPU_SWIZZLE_##y) << 3) | ((XE_GPU_SWIZZLE_##z) << 6) | ((XE_GPU_SWIZZLE_##w) << 9)) +#define XE_GPU_MAKE_SWIZZLE(x, y, z, w) \ + (((XE_GPU_SWIZZLE_##x) << 0) | ((XE_GPU_SWIZZLE_##y) << 3) | \ + ((XE_GPU_SWIZZLE_##z) << 6) | ((XE_GPU_SWIZZLE_##w) << 9)) typedef enum { - XE_GPU_SWIZZLE_X = 0, - XE_GPU_SWIZZLE_R = 0, - XE_GPU_SWIZZLE_Y = 1, - XE_GPU_SWIZZLE_G = 1, - XE_GPU_SWIZZLE_Z = 2, - XE_GPU_SWIZZLE_B = 2, - XE_GPU_SWIZZLE_W = 3, - XE_GPU_SWIZZLE_A = 3, - XE_GPU_SWIZZLE_0 = 4, - XE_GPU_SWIZZLE_1 = 5, - XE_GPU_SWIZZLE_RGBA = XE_GPU_MAKE_SWIZZLE(R, G, B, A), - XE_GPU_SWIZZLE_BGRA = XE_GPU_MAKE_SWIZZLE(B, G, R, A), - XE_GPU_SWIZZLE_RGB1 = XE_GPU_MAKE_SWIZZLE(R, G, B, 1), - XE_GPU_SWIZZLE_BGR1 = XE_GPU_MAKE_SWIZZLE(B, G, R, 1), - XE_GPU_SWIZZLE_000R = XE_GPU_MAKE_SWIZZLE(0, 0, 0, R), - XE_GPU_SWIZZLE_RRR1 = XE_GPU_MAKE_SWIZZLE(R, R, R, 1), - XE_GPU_SWIZZLE_R111 = XE_GPU_MAKE_SWIZZLE(R, 1, 1, 1), - XE_GPU_SWIZZLE_R000 = XE_GPU_MAKE_SWIZZLE(R, 0, 0, 0), + XE_GPU_SWIZZLE_X = 0, + XE_GPU_SWIZZLE_R = 0, + XE_GPU_SWIZZLE_Y = 1, + XE_GPU_SWIZZLE_G = 1, + XE_GPU_SWIZZLE_Z = 2, + XE_GPU_SWIZZLE_B = 2, + XE_GPU_SWIZZLE_W = 3, + XE_GPU_SWIZZLE_A = 3, + XE_GPU_SWIZZLE_0 = 4, + XE_GPU_SWIZZLE_1 = 5, + XE_GPU_SWIZZLE_RGBA = XE_GPU_MAKE_SWIZZLE(R, G, B, A), + XE_GPU_SWIZZLE_BGRA = XE_GPU_MAKE_SWIZZLE(B, G, R, A), + XE_GPU_SWIZZLE_RGB1 = XE_GPU_MAKE_SWIZZLE(R, G, B, 1), + XE_GPU_SWIZZLE_BGR1 = XE_GPU_MAKE_SWIZZLE(B, G, R, 1), + XE_GPU_SWIZZLE_000R = XE_GPU_MAKE_SWIZZLE(0, 0, 0, R), + XE_GPU_SWIZZLE_RRR1 = XE_GPU_MAKE_SWIZZLE(R, R, R, 1), + XE_GPU_SWIZZLE_R111 = XE_GPU_MAKE_SWIZZLE(R, 1, 1, 1), + XE_GPU_SWIZZLE_R000 = XE_GPU_MAKE_SWIZZLE(R, 0, 0, 0), } XE_GPU_SWIZZLE; inline uint32_t GpuSwap(uint32_t value, Endian endianness) {