diff --git a/src/xenia/gpu/buffer.cc b/src/xenia/gpu/buffer.cc deleted file mode 100644 index 499cb43a6..000000000 --- a/src/xenia/gpu/buffer.cc +++ /dev/null @@ -1,42 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2014 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include - -#include - - -using namespace xe; -using namespace xe::gpu; -using namespace xe::gpu::xenos; - - -Buffer::Buffer( - const uint8_t* src_ptr, size_t length) : - src_(src_ptr), length_(length) { -} - -Buffer::~Buffer() { -} - -IndexBuffer::IndexBuffer(const IndexBufferInfo& info, - const uint8_t* src_ptr, size_t length) - : Buffer(src_ptr, length), - info_(info) { -} - -IndexBuffer::~IndexBuffer() {} - -VertexBuffer::VertexBuffer(const VertexBufferInfo& info, - const uint8_t* src_ptr, size_t length) - : Buffer(src_ptr, length), - info_(info) { -} - -VertexBuffer::~VertexBuffer() {} diff --git a/src/xenia/gpu/buffer.h b/src/xenia/gpu/buffer.h deleted file mode 100644 index 9c8e3c654..000000000 --- a/src/xenia/gpu/buffer.h +++ /dev/null @@ -1,91 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2014 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_GPU_BUFFER_H_ -#define XENIA_GPU_BUFFER_H_ - -#include -#include -#include - - -namespace xe { -namespace gpu { - - -class Buffer { -public: - Buffer(const uint8_t* src_ptr, size_t length); - virtual ~Buffer(); - - const uint8_t* src() const { return src_; } - size_t length() const { return length_; } - uint64_t hash() const { return hash_; } - - virtual bool FetchNew(uint64_t hash) = 0; - virtual bool FetchDirty(uint64_t hash) = 0; - -protected: - const uint8_t* src_; - size_t length_; - uint64_t hash_; -}; - - -struct IndexBufferInfo { - bool index_32bit; - uint32_t index_count; - uint32_t index_size; - uint32_t endianness; -}; - - -class IndexBuffer : public Buffer { -public: - IndexBuffer(const IndexBufferInfo& info, - const uint8_t* src_ptr, size_t length); - virtual ~IndexBuffer(); - -protected: - IndexBufferInfo info_; -}; - - -struct VertexBufferLayout { - uint32_t stride_words; - uint32_t element_count; - struct { - uint32_t format; - uint32_t offset_words; - uint32_t size_words; - } elements[16]; -}; - -struct VertexBufferInfo { - VertexBufferLayout layout; -}; - - -class VertexBuffer : public Buffer { -public: - VertexBuffer(const VertexBufferInfo& info, - const uint8_t* src_ptr, size_t length); - virtual ~VertexBuffer(); - -protected: - VertexBufferInfo info_; -}; - - - -} // namespace gpu -} // namespace xe - - -#endif // XENIA_GPU_BUFFER_H_ diff --git a/src/xenia/gpu/buffer_cache.cc b/src/xenia/gpu/buffer_cache.cc deleted file mode 100644 index cc963d817..000000000 --- a/src/xenia/gpu/buffer_cache.cc +++ /dev/null @@ -1,79 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2014 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include - -#include - - -using namespace std; -using namespace xe; -using namespace xe::gpu; -using namespace xe::gpu::xenos; - - -BufferCache::BufferCache() { -} - -BufferCache::~BufferCache() { - Clear(); -} - -IndexBuffer* BufferCache::FetchIndexBuffer( - const IndexBufferInfo& info, - const uint8_t* src_ptr, size_t length) { - size_t key = hash_combine(info.endianness, info.index_32bit, info.index_count, info.index_size); - size_t hash = xe_hash64(src_ptr, length); - auto it = index_buffer_map_.find(key); - if (it != index_buffer_map_.end()) { - if (hash == it->second->hash()) { - return it->second; - } else { - return it->second->FetchDirty(hash) ? it->second : nullptr; - } - } else { - auto buffer = CreateIndexBuffer(info, src_ptr, length); - index_buffer_map_.insert({ key, buffer }); - if (!buffer->FetchNew(hash)) { - return nullptr; - } - return buffer; - } -} - -VertexBuffer* BufferCache::FetchVertexBuffer( - const VertexBufferInfo& info, - const uint8_t* src_ptr, size_t length) { - size_t key = reinterpret_cast(src_ptr); - size_t hash = xe_hash64(src_ptr, length); - auto it = vertex_buffer_map_.find(key); - if (it != vertex_buffer_map_.end()) { - if (hash == it->second->hash()) { - return it->second; - } else { - return it->second->FetchDirty(hash) ? it->second : nullptr; - } - } else { - auto buffer = CreateVertexBuffer(info, src_ptr, length); - vertex_buffer_map_.insert({ key, buffer }); - if (!buffer->FetchNew(hash)) { - return nullptr; - } - return buffer; - } -} - -void BufferCache::Clear() { - for (auto it = index_buffer_map_.begin(); - it != index_buffer_map_.end(); ++it) { - auto buffer = it->second; - delete buffer; - } - index_buffer_map_.clear(); -} diff --git a/src/xenia/gpu/buffer_cache.h b/src/xenia/gpu/buffer_cache.h deleted file mode 100644 index bcba6f9de..000000000 --- a/src/xenia/gpu/buffer_cache.h +++ /dev/null @@ -1,55 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2014 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_GPU_BUFFER_CACHE_H_ -#define XENIA_GPU_BUFFER_CACHE_H_ - -#include -#include -#include - - -namespace xe { -namespace gpu { - - -class BufferCache { -public: - BufferCache(); - virtual ~BufferCache(); - - IndexBuffer* FetchIndexBuffer( - const IndexBufferInfo& info, - const uint8_t* src_ptr, size_t length); - - VertexBuffer* FetchVertexBuffer( - const VertexBufferInfo& info, - const uint8_t* src_ptr, size_t length); - - void Clear(); - -protected: - virtual IndexBuffer* CreateIndexBuffer( - const IndexBufferInfo& info, - const uint8_t* src_ptr, size_t length) = 0; - virtual VertexBuffer* CreateVertexBuffer( - const VertexBufferInfo& info, - const uint8_t* src_ptr, size_t length) = 0; - -private: - std::unordered_map index_buffer_map_; - std::unordered_map vertex_buffer_map_; -}; - - -} // namespace gpu -} // namespace xe - - -#endif // XENIA_GPU_BUFFER_CACHE_H_ diff --git a/src/xenia/gpu/buffer_resource.cc b/src/xenia/gpu/buffer_resource.cc index d6019d95f..9f9accb9b 100644 --- a/src/xenia/gpu/buffer_resource.cc +++ b/src/xenia/gpu/buffer_resource.cc @@ -15,3 +15,42 @@ using namespace xe; using namespace xe::gpu; using namespace xe::gpu::xenos; + +BufferResource::BufferResource(const MemoryRange& memory_range) + : PagedResource(memory_range) { +} + +BufferResource::~BufferResource() = default; + +int BufferResource::Prepare() { + if (!handle()) { + if (CreateHandle()) { + XELOGE("Unable to create buffer handle"); + return 1; + } + } + + if (!dirtied_) { + return 0; + } + dirtied_ = false; + + // pass dirty regions? + return InvalidateRegion(memory_range_); +} + +IndexBufferResource::IndexBufferResource(const MemoryRange& memory_range, + const Info& info) + : BufferResource(memory_range), + info_(info) { +} + +IndexBufferResource::~IndexBufferResource() = default; + +VertexBufferResource::VertexBufferResource(const MemoryRange& memory_range, + const Info& info) + : BufferResource(memory_range), + info_(info) { +} + +VertexBufferResource::~VertexBufferResource() = default; diff --git a/src/xenia/gpu/buffer_resource.h b/src/xenia/gpu/buffer_resource.h index 385a5049a..a88d1ae06 100644 --- a/src/xenia/gpu/buffer_resource.h +++ b/src/xenia/gpu/buffer_resource.h @@ -10,7 +10,8 @@ #ifndef XENIA_GPU_BUFFER_RESOURCE_H_ #define XENIA_GPU_BUFFER_RESOURCE_H_ -#include +#include +#include #include @@ -18,8 +19,76 @@ namespace xe { namespace gpu { -class BufferResource : public Resource { +class BufferResource : public PagedResource { public: + BufferResource(const MemoryRange& memory_range); + ~BufferResource() override; + + virtual int Prepare(); + +protected: + virtual int CreateHandle() = 0; + virtual int InvalidateRegion(const MemoryRange& memory_range) = 0; +}; + + +enum IndexFormat { + INDEX_FORMAT_16BIT = 0, + INDEX_FORMAT_32BIT = 1, +}; + +class IndexBufferResource : public BufferResource { +public: + struct Info { + IndexFormat format; + xenos::XE_GPU_ENDIAN endianness; + }; + + IndexBufferResource(const MemoryRange& memory_range, + const Info& info); + ~IndexBufferResource() override; + + const Info& info() const { return info_; } + + bool Equals(const void* info_ptr, size_t info_length) override { + return info_length == sizeof(Info) && + memcmp(info_ptr, &info_, info_length) == 0; + } + +protected: + Info info_; +}; + + +class VertexBufferResource : public BufferResource { +public: + struct DeclElement { + xenos::instr_fetch_vtx_t vtx_fetch; + uint32_t format; + uint32_t offset_words; + uint32_t size_words; + bool is_signed; + bool is_normalized; + }; + struct Info { + uint32_t stride_words; + uint32_t element_count; + DeclElement elements[16]; + }; + + VertexBufferResource(const MemoryRange& memory_range, + const Info& info); + ~VertexBufferResource() override; + + const Info& info() const { return info_; } + + bool Equals(const void* info_ptr, size_t info_length) override { + return info_length == sizeof(Info) && + memcmp(info_ptr, &info_, info_length) == 0; + } + +protected: + Info info_; }; diff --git a/src/xenia/gpu/command_processor.cc b/src/xenia/gpu/command_processor.cc index bb7a02bc7..c7a6a166b 100644 --- a/src/xenia/gpu/command_processor.cc +++ b/src/xenia/gpu/command_processor.cc @@ -9,9 +9,782 @@ #include +#include +#include +#include +#include + -using namespace std; using namespace xe; using namespace xe::gpu; using namespace xe::gpu::xenos; + +#define XETRACECP(fmt, ...) if (FLAGS_trace_ring_buffer) XELOGGPU(fmt, ##__VA_ARGS__) + + +CommandProcessor::CommandProcessor( + GraphicsSystem* graphics_system, Memory* memory) : + graphics_system_(graphics_system), memory_(memory), driver_(0) { + write_ptr_index_event_ = CreateEvent(NULL, FALSE, FALSE, NULL); + + primary_buffer_ptr_ = 0; + primary_buffer_size_ = 0; + read_ptr_index_ = 0; + read_ptr_update_freq_ = 0; + read_ptr_writeback_ptr_ = 0; + write_ptr_index_ = 0; + write_ptr_max_index_ = 0; + + LARGE_INTEGER perf_counter; + QueryPerformanceCounter(&perf_counter); + time_base_ = perf_counter.QuadPart; + counter_ = 0; +} + +CommandProcessor::~CommandProcessor() { + SetEvent(write_ptr_index_event_); + CloseHandle(write_ptr_index_event_); +} + +uint64_t CommandProcessor::QueryTime() { + LARGE_INTEGER perf_counter; + QueryPerformanceCounter(&perf_counter); + return perf_counter.QuadPart - time_base_; +} + +void CommandProcessor::Initialize(GraphicsDriver* driver, + uint32_t ptr, uint32_t page_count) { + driver_ = driver; + primary_buffer_ptr_ = ptr; + // Not sure this is correct, but it's a way to take the page_count back to + // the number of bytes allocated by the physical alloc. + uint32_t original_size = 1 << (0x1C - page_count - 1); + primary_buffer_size_ = original_size; + read_ptr_index_ = 0; + + // Tell the driver what to use for translation. + driver_->set_address_translation(primary_buffer_ptr_ & ~0x1FFFFFFF); +} + +void CommandProcessor::EnableReadPointerWriteBack(uint32_t ptr, + uint32_t block_size) { + // CP_RB_RPTR_ADDR Ring Buffer Read Pointer Address 0x70C + // ptr = RB_RPTR_ADDR, pointer to write back the address to. + read_ptr_writeback_ptr_ = (primary_buffer_ptr_ & ~0x1FFFFFFF) + ptr; + // CP_RB_CNTL Ring Buffer Control 0x704 + // block_size = RB_BLKSZ, number of quadwords read between updates of the + // read pointer. + read_ptr_update_freq_ = (uint32_t)pow(2.0, (double)block_size) / 4; +} + +void CommandProcessor::UpdateWritePointer(uint32_t value) { + write_ptr_max_index_ = MAX(write_ptr_max_index_, value); + write_ptr_index_ = value; + SetEvent(write_ptr_index_event_); +} + +void CommandProcessor::Pump() { + uint8_t* p = memory_->membase(); + + while (write_ptr_index_ == 0xBAADF00D || + read_ptr_index_ == write_ptr_index_) { + // Check if the pointer has moved. + // We wait a short bit here to yield time. Since we are also running the + // main window display we don't want to pause too long, though. + // YieldProcessor(); + const int wait_time_ms = 1; + if (WaitForSingleObject(write_ptr_index_event_, + wait_time_ms) == WAIT_TIMEOUT) { + return; + } + } + + // Bring local so we don't have to worry about them changing out from under + // us. + uint32_t write_ptr_index = write_ptr_index_; + uint32_t write_ptr_max_index = write_ptr_max_index_; + if (read_ptr_index_ == write_ptr_index) { + return; + } + + // Process the new commands. + XETRACECP("Command processor thread work"); + + // Execute. Note that we handle wraparound transparently. + ExecutePrimaryBuffer(read_ptr_index_, write_ptr_index); + read_ptr_index_ = write_ptr_index; + + // TODO(benvanik): use read_ptr_update_freq_ and only issue after moving + // that many indices. + if (read_ptr_writeback_ptr_) { + XESETUINT32BE(p + read_ptr_writeback_ptr_, read_ptr_index_); + } +} + +void CommandProcessor::ExecutePrimaryBuffer( + uint32_t start_index, uint32_t end_index) { + SCOPE_profile_cpu_f("gpu"); + + // Adjust pointer base. + uint32_t ptr = primary_buffer_ptr_ + start_index * 4; + ptr = (primary_buffer_ptr_ & ~0x1FFFFFFF) | (ptr & 0x1FFFFFFF); + uint32_t end_ptr = primary_buffer_ptr_ + end_index * 4; + end_ptr = (primary_buffer_ptr_ & ~0x1FFFFFFF) | (end_ptr & 0x1FFFFFFF); + + XETRACECP("[%.8X] ExecutePrimaryBuffer(%dw -> %dw)", + ptr, start_index, end_index); + + // Execute commands! + PacketArgs args; + args.ptr = ptr; + args.base_ptr = primary_buffer_ptr_; + args.max_address = primary_buffer_ptr_ + primary_buffer_size_; + args.ptr_mask = (primary_buffer_size_ / 4) - 1; + uint32_t n = 0; + while (args.ptr != end_ptr) { + n += ExecutePacket(args); + XEASSERT(args.ptr < args.max_address); + } + if (end_index > start_index) { + XEASSERT(n == (end_index - start_index)); + } + + XETRACECP(" ExecutePrimaryBuffer End"); +} + +void CommandProcessor::ExecuteIndirectBuffer(uint32_t ptr, uint32_t length) { + XETRACECP("[%.8X] ExecuteIndirectBuffer(%dw)", ptr, length); + + // Execute commands! + PacketArgs args; + args.ptr = ptr; + args.base_ptr = ptr; + args.max_address = ptr + length * 4; + args.ptr_mask = 0; + for (uint32_t n = 0; n < length;) { + n += ExecutePacket(args); + XEASSERT(n <= length); + } + + XETRACECP(" ExecuteIndirectBuffer End"); +} + +#define LOG_DATA(count) \ + for (uint32_t __m = 0; __m < count; __m++) { \ + XETRACECP("[%.8X] %.8X", \ + packet_ptr + (1 + __m) * 4, \ + XEGETUINT32BE(packet_base + 1 * 4 + __m * 4)); \ + } + +void CommandProcessor::AdvancePtr(PacketArgs& args, uint32_t n) { + args.ptr = args.ptr + n * 4; + if (args.ptr_mask) { + args.ptr = + args.base_ptr + (((args.ptr - args.base_ptr) / 4) & args.ptr_mask) * 4; + } +} +#define ADVANCE_PTR(n) AdvancePtr(args, n) +#define PEEK_PTR() \ + XEGETUINT32BE(p + args.ptr) +#define READ_PTR() \ + XEGETUINT32BE(p + args.ptr); ADVANCE_PTR(1); + +uint32_t CommandProcessor::ExecutePacket(PacketArgs& args) { + uint8_t* p = memory_->membase(); + RegisterFile* regs = driver_->register_file(); + + uint32_t packet_ptr = args.ptr; + const uint8_t* packet_base = p + packet_ptr; + const uint32_t packet = PEEK_PTR(); + ADVANCE_PTR(1); + const uint32_t packet_type = packet >> 30; + if (packet == 0) { + XETRACECP("[%.8X] Packet(%.8X): 0?", + packet_ptr, packet); + return 1; + } + + switch (packet_type) { + case 0x00: + { + // Type-0 packet. + // Write count registers in sequence to the registers starting at + // (base_index << 2). + XETRACECP("[%.8X] Packet(%.8X): set registers:", + packet_ptr, packet); + uint32_t count = ((packet >> 16) & 0x3FFF) + 1; + uint32_t base_index = (packet & 0x7FFF); + uint32_t write_one_reg = (packet >> 15) & 0x1; + for (uint32_t m = 0; m < count; m++) { + uint32_t reg_data = PEEK_PTR(); + uint32_t target_index = write_one_reg ? base_index : base_index + m; + const char* reg_name = regs->GetRegisterName(target_index); + XETRACECP("[%.8X] %.8X -> %.4X %s", + args.ptr, + reg_data, target_index, reg_name ? reg_name : ""); + ADVANCE_PTR(1); + WriteRegister(packet_ptr, target_index, reg_data); + } + return 1 + count; + } + break; + case 0x01: + { + // Type-1 packet. + // Contains two registers of data. Type-0 should be more common. + XETRACECP("[%.8X] Packet(%.8X): set registers:", + packet_ptr, packet); + uint32_t reg_index_1 = packet & 0x7FF; + uint32_t reg_index_2 = (packet >> 11) & 0x7FF; + uint32_t reg_ptr_1 = args.ptr; + uint32_t reg_data_1 = READ_PTR(); + uint32_t reg_ptr_2 = args.ptr; + uint32_t reg_data_2 = READ_PTR(); + const char* reg_name_1 = regs->GetRegisterName(reg_index_1); + const char* reg_name_2 = regs->GetRegisterName(reg_index_2); + XETRACECP("[%.8X] %.8X -> %.4X %s", + reg_ptr_1, + reg_data_1, reg_index_1, reg_name_1 ? reg_name_1 : ""); + XETRACECP("[%.8X] %.8X -> %.4X %s", + reg_ptr_2, + reg_data_2, reg_index_2, reg_name_2 ? reg_name_2 : ""); + WriteRegister(packet_ptr, reg_index_1, reg_data_1); + WriteRegister(packet_ptr, reg_index_2, reg_data_2); + return 1 + 2; + } + break; + case 0x02: + // Type-2 packet. + // No-op. Do nothing. + XETRACECP("[%.8X] Packet(%.8X): padding", + packet_ptr, packet); + return 1; + case 0x03: + { + // Type-3 packet. + uint32_t count = ((packet >> 16) & 0x3FFF) + 1; + uint32_t opcode = (packet >> 8) & 0x7F; + // & 1 == predicate, maybe? + + switch (opcode) { + case PM4_ME_INIT: + // initialize CP's micro-engine + XETRACECP("[%.8X] Packet(%.8X): PM4_ME_INIT", + packet_ptr, packet); + LOG_DATA(count); + ADVANCE_PTR(count); + break; + + case PM4_NOP: + // skip N 32-bit words to get to the next packet + // No-op, ignore some data. + XETRACECP("[%.8X] Packet(%.8X): PM4_NOP", + packet_ptr, packet); + LOG_DATA(count); + ADVANCE_PTR(count); + break; + + case PM4_INTERRUPT: + // generate interrupt from the command stream + { + XETRACECP("[%.8X] Packet(%.8X): PM4_INTERRUPT", + packet_ptr, packet); + LOG_DATA(count); + uint32_t cpu_mask = READ_PTR(); + for (int n = 0; n < 6; n++) { + if (cpu_mask & (1 << n)) { + graphics_system_->DispatchInterruptCallback(1, n); + } + } + } + break; + + case PM4_INDIRECT_BUFFER: + // indirect buffer dispatch + { + uint32_t list_ptr = READ_PTR(); + uint32_t list_length = READ_PTR(); + XETRACECP("[%.8X] Packet(%.8X): PM4_INDIRECT_BUFFER %.8X (%dw)", + packet_ptr, packet, list_ptr, list_length); + ExecuteIndirectBuffer(GpuToCpu(list_ptr), list_length); + } + break; + + case PM4_WAIT_REG_MEM: + // wait until a register or memory location is a specific value + { + XETRACECP("[%.8X] Packet(%.8X): PM4_WAIT_REG_MEM", + packet_ptr, packet); + LOG_DATA(count); + uint32_t wait_info = READ_PTR(); + uint32_t poll_reg_addr = READ_PTR(); + uint32_t ref = READ_PTR(); + uint32_t mask = READ_PTR(); + uint32_t wait = READ_PTR(); + bool matched = false; + do { + uint32_t value; + if (wait_info & 0x10) { + // Memory. + XE_GPU_ENDIAN endianness = (XE_GPU_ENDIAN)(poll_reg_addr & 0x3); + poll_reg_addr &= ~0x3; + value = XEGETUINT32LE(p + GpuToCpu(packet_ptr, poll_reg_addr)); + value = GpuSwap(value, endianness); + } else { + // Register. + XEASSERT(poll_reg_addr < RegisterFile::kRegisterCount); + + if (poll_reg_addr == XE_GPU_REG_COHER_STATUS_HOST) { + // Waiting for coherency. We should have all the info we need + // now (base+size+mode), so kick it off. + MakeCoherent(); + } + + value = regs->values[poll_reg_addr].u32; + } + switch (wait_info & 0x7) { + case 0x0: // Never. + matched = false; + break; + case 0x1: // Less than reference. + matched = (value & mask) < ref; + break; + case 0x2: // Less than or equal to reference. + matched = (value & mask) <= ref; + break; + case 0x3: // Equal to reference. + matched = (value & mask) == ref; + break; + case 0x4: // Not equal to reference. + matched = (value & mask) != ref; + break; + case 0x5: // Greater than or equal to reference. + matched = (value & mask) >= ref; + break; + case 0x6: // Greater than reference. + matched = (value & mask) > ref; + break; + case 0x7: // Always + matched = true; + break; + } + if (!matched) { + // Wait. + if (wait >= 0x100) { + Sleep(wait / 0x100); + } else { + SwitchToThread(); + } + } + } while (!matched); + } + break; + + case PM4_REG_RMW: + // register read/modify/write + // ? (used during shader upload and edram setup) + { + XETRACECP("[%.8X] Packet(%.8X): PM4_REG_RMW", + packet_ptr, packet); + LOG_DATA(count); + uint32_t rmw_info = READ_PTR(); + uint32_t and_mask = READ_PTR(); + uint32_t or_mask = READ_PTR(); + uint32_t value = regs->values[rmw_info & 0x1FFF].u32; + if ((rmw_info >> 30) & 0x1) { + // | reg + value |= regs->values[or_mask & 0x1FFF].u32; + } else { + // | imm + value |= or_mask; + } + if ((rmw_info >> 31) & 0x1) { + // & reg + value &= regs->values[and_mask & 0x1FFF].u32; + } else { + // & imm + value &= and_mask; + } + WriteRegister(packet_ptr, rmw_info & 0x1FFF, value); + } + break; + + case PM4_COND_WRITE: + // conditional write to memory or register + { + XETRACECP("[%.8X] Packet(%.8X): PM4_COND_WRITE", + packet_ptr, packet); + LOG_DATA(count); + uint32_t wait_info = READ_PTR(); + uint32_t poll_reg_addr = READ_PTR(); + uint32_t ref = READ_PTR(); + uint32_t mask = READ_PTR(); + uint32_t write_reg_addr = READ_PTR(); + uint32_t write_data = READ_PTR(); + uint32_t value; + if (wait_info & 0x10) { + // Memory. + XE_GPU_ENDIAN endianness = (XE_GPU_ENDIAN)(poll_reg_addr & 0x3); + poll_reg_addr &= ~0x3; + value = XEGETUINT32LE(p + GpuToCpu(packet_ptr, poll_reg_addr)); + value = GpuSwap(value, endianness); + } else { + // Register. + XEASSERT(poll_reg_addr < RegisterFile::kRegisterCount); + value = regs->values[poll_reg_addr].u32; + } + bool matched = false; + switch (wait_info & 0x7) { + case 0x0: // Never. + matched = false; + break; + case 0x1: // Less than reference. + matched = (value & mask) < ref; + break; + case 0x2: // Less than or equal to reference. + matched = (value & mask) <= ref; + break; + case 0x3: // Equal to reference. + matched = (value & mask) == ref; + break; + case 0x4: // Not equal to reference. + matched = (value & mask) != ref; + break; + case 0x5: // Greater than or equal to reference. + matched = (value & mask) >= ref; + break; + case 0x6: // Greater than reference. + matched = (value & mask) > ref; + break; + case 0x7: // Always + matched = true; + break; + } + if (matched) { + // Write. + if (wait_info & 0x100) { + // Memory. + XE_GPU_ENDIAN endianness = (XE_GPU_ENDIAN)(write_reg_addr & 0x3); + write_reg_addr &= ~0x3; + write_data = GpuSwap(write_data, endianness); + XESETUINT32LE(p + GpuToCpu(packet_ptr, write_reg_addr), + write_data); + } else { + // Register. + WriteRegister(packet_ptr, write_reg_addr, write_data); + } + } + } + break; + + case PM4_EVENT_WRITE: + // generate an event that creates a write to memory when completed + { + XETRACECP("[%.8X] Packet(%.8X): PM4_EVENT_WRITE (unimplemented!)", + packet_ptr, packet); + LOG_DATA(count); + uint32_t initiator = READ_PTR(); + if (count == 1) { + // Just an event flag? Where does this write? + } else { + // Write to an address. + XEASSERTALWAYS(); + ADVANCE_PTR(count - 1); + } + } + break; + case PM4_EVENT_WRITE_SHD: + // generate a VS|PS_done event + { + XETRACECP("[%.8X] Packet(%.8X): PM4_EVENT_WRITE_SHD", + packet_ptr, packet); + LOG_DATA(count); + uint32_t initiator = READ_PTR(); + uint32_t address = READ_PTR(); + uint32_t value = READ_PTR(); + // Writeback initiator. + WriteRegister(packet_ptr, XE_GPU_REG_VGT_EVENT_INITIATOR, + initiator & 0x1F); + uint32_t data_value; + if ((initiator >> 31) & 0x1) { + // Write counter (GPU vblank counter?). + data_value = counter_; + } else { + // Write value. + data_value = value; + } + XE_GPU_ENDIAN endianness = (XE_GPU_ENDIAN)(address & 0x3); + address &= ~0x3; + data_value = GpuSwap(data_value, endianness); + XESETUINT32LE(p + GpuToCpu(address), data_value); + } + break; + + case PM4_DRAW_INDX: + // initiate fetch of index buffer and draw + { + XETRACECP("[%.8X] Packet(%.8X): PM4_DRAW_INDX", + packet_ptr, packet); + LOG_DATA(count); + // d0 = viz query info + uint32_t d0 = READ_PTR(); + uint32_t d1 = READ_PTR(); + uint32_t index_count = d1 >> 16; + uint32_t prim_type = d1 & 0x3F; + uint32_t src_sel = (d1 >> 6) & 0x3; + if (!driver_->PrepareDraw(draw_command_)) { + draw_command_.prim_type = (XE_GPU_PRIMITIVE_TYPE)prim_type; + draw_command_.start_index = 0; + draw_command_.index_count = index_count; + draw_command_.base_vertex = 0; + if (src_sel == 0x0) { + // Indexed draw. + // TODO(benvanik): detect subregions of larger index buffers! + uint32_t index_base = READ_PTR(); + uint32_t index_size = READ_PTR(); + uint32_t endianness = index_size >> 29; + index_size &= 0x00FFFFFF; + bool index_32bit = (d1 >> 11) & 0x1; + index_size *= index_32bit ? 4 : 2; + driver_->PrepareDrawIndexBuffer( + draw_command_, + index_base, index_size, + (XE_GPU_ENDIAN)endianness, + index_32bit ? INDEX_FORMAT_32BIT : INDEX_FORMAT_16BIT); + } else if (src_sel == 0x2) { + // Auto draw. + draw_command_.index_buffer = nullptr; + } else { + // Unknown source select. + XEASSERTALWAYS(); + } + driver_->Draw(draw_command_); + } else { + if (src_sel == 0x0) { + ADVANCE_PTR(2); // skip + } + } + } + break; + case PM4_DRAW_INDX_2: + // draw using supplied indices in packet + { + XETRACECP("[%.8X] Packet(%.8X): PM4_DRAW_INDX_2", + packet_ptr, packet); + LOG_DATA(count); + uint32_t d0 = READ_PTR(); + uint32_t index_count = d0 >> 16; + uint32_t prim_type = d0 & 0x3F; + uint32_t src_sel = (d0 >> 6) & 0x3; + XEASSERT(src_sel == 0x2); // 'SrcSel=AutoIndex' + if (!driver_->PrepareDraw(draw_command_)) { + draw_command_.prim_type = (XE_GPU_PRIMITIVE_TYPE)prim_type; + draw_command_.start_index = 0; + draw_command_.index_count = index_count; + draw_command_.base_vertex = 0; + draw_command_.index_buffer = nullptr; + driver_->Draw(draw_command_); + } + } + break; + + case PM4_SET_CONSTANT: + // load constant into chip and to memory + { + XETRACECP("[%.8X] Packet(%.8X): PM4_SET_CONSTANT", + packet_ptr, packet); + // PM4_REG(reg) ((0x4 << 16) | (GSL_HAL_SUBBLOCK_OFFSET(reg))) + // reg - 0x2000 + uint32_t offset_type = READ_PTR(); + uint32_t index = offset_type & 0x7FF; + uint32_t type = (offset_type >> 16) & 0xFF; + switch (type) { + case 0x4: // REGISTER + index += 0x2000; // registers + for (uint32_t n = 0; n < count - 1; n++, index++) { + uint32_t data = READ_PTR(); + const char* reg_name = regs->GetRegisterName(index); + XETRACECP("[%.8X] %.8X -> %.4X %s", + packet_ptr + (1 + n) * 4, + data, index, reg_name ? reg_name : ""); + WriteRegister(packet_ptr, index, data); + } + break; + default: + XEASSERTALWAYS(); + break; + } + } + break; + case PM4_LOAD_ALU_CONSTANT: + // load constants from memory + { + XETRACECP("[%.8X] Packet(%.8X): PM4_LOAD_ALU_CONSTANT", + packet_ptr, packet); + uint32_t address = READ_PTR(); + address &= 0x3FFFFFFF; + uint32_t offset_type = READ_PTR(); + uint32_t index = offset_type & 0x7FF; + uint32_t size = READ_PTR(); + size &= 0xFFF; + index += 0x4000; // alu constants + for (uint32_t n = 0; n < size; n++, index++) { + uint32_t data = XEGETUINT32BE( + p + GpuToCpu(packet_ptr, address + n * 4)); + const char* reg_name = regs->GetRegisterName(index); + XETRACECP("[%.8X] %.8X -> %.4X %s", + packet_ptr, + data, index, reg_name ? reg_name : ""); + WriteRegister(packet_ptr, index, data); + } + } + break; + + case PM4_IM_LOAD: + // load sequencer instruction memory (pointer-based) + { + XETRACECP("[%.8X] Packet(%.8X): PM4_IM_LOAD", + packet_ptr, packet); + LOG_DATA(count); + uint32_t addr_type = READ_PTR(); + uint32_t type = addr_type & 0x3; + uint32_t addr = addr_type & ~0x3; + uint32_t start_size = READ_PTR(); + uint32_t start = start_size >> 16; + uint32_t size = start_size & 0xFFFF; // dwords + XEASSERT(start == 0); + driver_->LoadShader((XE_GPU_SHADER_TYPE)type, + GpuToCpu(packet_ptr, addr), size * 4, start); + } + break; + case PM4_IM_LOAD_IMMEDIATE: + // load sequencer instruction memory (code embedded in packet) + { + XETRACECP("[%.8X] Packet(%.8X): PM4_IM_LOAD_IMMEDIATE", + packet_ptr, packet); + LOG_DATA(count); + uint32_t type = READ_PTR(); + uint32_t start_size = READ_PTR(); + uint32_t start = start_size >> 16; + uint32_t size = start_size & 0xFFFF; // dwords + XEASSERT(start == 0); + // TODO(benvanik): figure out if this could wrap. + XEASSERT(args.ptr + size * 4 < args.max_address); + driver_->LoadShader((XE_GPU_SHADER_TYPE)type, + args.ptr, size * 4, start); + ADVANCE_PTR(size); + } + break; + + case PM4_INVALIDATE_STATE: + // selective invalidation of state pointers + { + XETRACECP("[%.8X] Packet(%.8X): PM4_INVALIDATE_STATE", + packet_ptr, packet); + LOG_DATA(count); + uint32_t mask = READ_PTR(); + //driver_->InvalidateState(mask); + } + break; + + case PM4_SET_BIN_MASK_LO: + { + uint32_t value = READ_PTR(); + XETRACECP("[%.8X] Packet(%.8X): PM4_SET_BIN_MASK_LO = %.8X", + packet_ptr, packet, value); + } + break; + case PM4_SET_BIN_MASK_HI: + { + uint32_t value = READ_PTR(); + XETRACECP("[%.8X] Packet(%.8X): PM4_SET_BIN_MASK_HI = %.8X", + packet_ptr, packet, value); + } + break; + case PM4_SET_BIN_SELECT_LO: + { + uint32_t value = READ_PTR(); + XETRACECP("[%.8X] Packet(%.8X): PM4_SET_BIN_SELECT_LO = %.8X", + packet_ptr, packet, value); + } + break; + case PM4_SET_BIN_SELECT_HI: + { + uint32_t value = READ_PTR(); + XETRACECP("[%.8X] Packet(%.8X): PM4_SET_BIN_SELECT_HI = %.8X", + packet_ptr, packet, value); + } + break; + + // Ignored packets - useful if breaking on the default handler below. + case 0x50: // 0xC0015000 usually 2 words, 0xFFFFFFFF / 0x00000000 + XETRACECP("[%.8X] Packet(%.8X): unknown!", + packet_ptr, packet); + LOG_DATA(count); + ADVANCE_PTR(count); + break; + + default: + XETRACECP("[%.8X] Packet(%.8X): unknown!", + packet_ptr, packet); + LOG_DATA(count); + ADVANCE_PTR(count); + break; + } + + return 1 + count; + } + break; + } + + return 0; +} + +void CommandProcessor::WriteRegister( + uint32_t packet_ptr, uint32_t index, uint32_t value) { + RegisterFile* regs = driver_->register_file(); + XEASSERT(index < RegisterFile::kRegisterCount); + regs->values[index].u32 = value; + + // If this is a COHER register, set the dirty flag. + // This will block the command processor the next time it WAIT_MEM_REGs and + // allow us to synchronize the memory. + if (index == XE_GPU_REG_COHER_STATUS_HOST) { + regs->values[index].u32 |= 0x80000000ul; + } + + // Scratch register writeback. + if (index >= XE_GPU_REG_SCRATCH_REG0 && index <= XE_GPU_REG_SCRATCH_REG7) { + uint32_t scratch_reg = index - XE_GPU_REG_SCRATCH_REG0; + if ((1 << scratch_reg) & regs->values[XE_GPU_REG_SCRATCH_UMSK].u32) { + // Enabled - write to address. + uint8_t* p = memory_->membase(); + uint32_t scratch_addr = regs->values[XE_GPU_REG_SCRATCH_ADDR].u32; + uint32_t mem_addr = scratch_addr + (scratch_reg * 4); + XESETUINT32BE(p + GpuToCpu(primary_buffer_ptr_, mem_addr), value); + } + } +} + +void CommandProcessor::MakeCoherent() { + RegisterFile* regs = driver_->register_file(); + auto status_host = regs->values[XE_GPU_REG_COHER_STATUS_HOST].u32; + auto base_host = regs->values[XE_GPU_REG_COHER_BASE_HOST].u32; + auto size_host = regs->values[XE_GPU_REG_COHER_SIZE_HOST].u32; + + // Status host often has 0x01000000 or 0x03000000. + // This is likely toggling VC (vertex cache) or TC (texture cache). + // Or, it also has a direction in here maybe - there is probably + // some way to check for dest coherency (what all the COHER_DEST_BASE_* + // registers are for). + + // TODO(benvanik): notify resource cache of base->size and type. + XETRACECP("Make %.8X -> %.8X (%db) coherent", + base_host, base_host + size_host, size_host); + driver_->resource_cache()->SyncRange(base_host, size_host); + + // Mark coherent. + status_host &= ~0x80000000ul; + regs->values[XE_GPU_REG_COHER_STATUS_HOST].u32 = status_host; +} diff --git a/src/xenia/gpu/command_processor.h b/src/xenia/gpu/command_processor.h index 65d5dfc71..ba081aefb 100644 --- a/src/xenia/gpu/command_processor.h +++ b/src/xenia/gpu/command_processor.h @@ -11,15 +11,70 @@ #define XENIA_GPU_COMMAND_PROCESSOR_H_ #include +#include +#include #include namespace xe { namespace gpu { +class GraphicsDriver; +class GraphicsSystem; + class CommandProcessor { public: + CommandProcessor(GraphicsSystem* graphics_system, Memory* memory); + virtual ~CommandProcessor(); + + Memory* memory() const { return memory_; } + + uint64_t QueryTime(); + uint32_t counter() const { return counter_; } + void increment_counter() { counter_++; } + + void Initialize(GraphicsDriver* driver, uint32_t ptr, uint32_t page_count); + void EnableReadPointerWriteBack(uint32_t ptr, uint32_t block_size); + + void UpdateWritePointer(uint32_t value); + + void Pump(); + +private: + typedef struct { + uint32_t ptr; + uint32_t base_ptr; + uint32_t max_address; + uint32_t ptr_mask; + } PacketArgs; + + void AdvancePtr(PacketArgs& args, uint32_t n); + void ExecutePrimaryBuffer(uint32_t start_index, uint32_t end_index); + void ExecuteIndirectBuffer(uint32_t ptr, uint32_t length); + uint32_t ExecutePacket(PacketArgs& args); + void WriteRegister(uint32_t packet_ptr, uint32_t index, uint32_t value); + void MakeCoherent(); + + Memory* memory_; + GraphicsSystem* graphics_system_; + GraphicsDriver* driver_; + + uint64_t time_base_; + uint32_t counter_; + + uint32_t primary_buffer_ptr_; + uint32_t primary_buffer_size_; + + uint32_t read_ptr_index_; + uint32_t read_ptr_update_freq_; + uint32_t read_ptr_writeback_ptr_; + + HANDLE write_ptr_index_event_; + volatile uint32_t write_ptr_index_; + volatile uint32_t write_ptr_max_index_; + + DrawCommand draw_command_; }; diff --git a/src/xenia/gpu/d3d11/d3d11_buffer.cc b/src/xenia/gpu/d3d11/d3d11_buffer.cc deleted file mode 100644 index 84c0d901e..000000000 --- a/src/xenia/gpu/d3d11/d3d11_buffer.cc +++ /dev/null @@ -1,150 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2014 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include - -#include -#include - - -using namespace xe; -using namespace xe::gpu; -using namespace xe::gpu::d3d11; -using namespace xe::gpu::xenos; - - -D3D11IndexBuffer::D3D11IndexBuffer( - D3D11BufferCache* buffer_cache, - const IndexBufferInfo& info, - const uint8_t* src_ptr, size_t length) - : IndexBuffer(info, src_ptr, length), - buffer_cache_(buffer_cache), - handle_(nullptr) { -} - -D3D11IndexBuffer::~D3D11IndexBuffer() { - XESAFERELEASE(handle_); -} - -bool D3D11IndexBuffer::FetchNew(uint64_t hash) { - hash_ = hash; - - D3D11_BUFFER_DESC buffer_desc; - xe_zero_struct(&buffer_desc, sizeof(buffer_desc)); - buffer_desc.ByteWidth = info_.index_size; - buffer_desc.Usage = D3D11_USAGE_DYNAMIC; - buffer_desc.BindFlags = D3D11_BIND_INDEX_BUFFER; - buffer_desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; - HRESULT hr = buffer_cache_->device()->CreateBuffer(&buffer_desc, NULL, &handle_); - if (FAILED(hr)) { - XELOGW("D3D11: failed to create index buffer"); - return false; - } - - return FetchDirty(hash); -} - -bool D3D11IndexBuffer::FetchDirty(uint64_t hash) { - hash_ = hash; - - // All that's done so far: - XEASSERT(info_.endianness == 0x2); - - D3D11_MAPPED_SUBRESOURCE res; - HRESULT hr = buffer_cache_->context()->Map( - handle_, 0, D3D11_MAP_WRITE_DISCARD, 0, &res); - if (FAILED(hr)) { - XELOGE("D3D11: unable to map index buffer"); - return false; - } - - if (info_.index_32bit) { - const uint32_t* src = reinterpret_cast(src_); - uint32_t* dest = reinterpret_cast(res.pData); - for (uint32_t n = 0; n < info_.index_count; n++) { - uint32_t d = { XESWAP32(src[n]) }; - dest[n] = d; - } - } else { - const uint16_t* src = reinterpret_cast(src_); - uint16_t* dest = reinterpret_cast(res.pData); - for (uint32_t n = 0; n < info_.index_count; n++) { - uint16_t d = XESWAP16(src[n]); - dest[n] = d; - } - } - buffer_cache_->context()->Unmap(handle_, 0); - - return true; -} - - -D3D11VertexBuffer::D3D11VertexBuffer( - D3D11BufferCache* buffer_cache, - const VertexBufferInfo& info, - const uint8_t* src_ptr, size_t length) - : VertexBuffer(info, src_ptr, length), - buffer_cache_(buffer_cache), - handle_(nullptr) { -} - -D3D11VertexBuffer::~D3D11VertexBuffer() { - XESAFERELEASE(handle_); -} - -bool D3D11VertexBuffer::FetchNew(uint64_t hash) { - hash_ = hash; - - D3D11_BUFFER_DESC buffer_desc; - xe_zero_struct(&buffer_desc, sizeof(buffer_desc)); - buffer_desc.ByteWidth = static_cast(length_); - buffer_desc.Usage = D3D11_USAGE_DYNAMIC; - buffer_desc.BindFlags = D3D11_BIND_VERTEX_BUFFER; - buffer_desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; - HRESULT hr = buffer_cache_->device()->CreateBuffer(&buffer_desc, NULL, &handle_); - if (FAILED(hr)) { - XELOGW("D3D11: failed to create index buffer"); - return false; - } - - return FetchDirty(hash); -} - -bool D3D11VertexBuffer::FetchDirty(uint64_t hash) { - hash_ = hash; - - D3D11_MAPPED_SUBRESOURCE res; - HRESULT hr = buffer_cache_->context()->Map( - handle_, 0, D3D11_MAP_WRITE_DISCARD, 0, &res); - if (FAILED(hr)) { - XELOGE("D3D11: unable to map vertex buffer"); - return false; - } - uint8_t* dest = reinterpret_cast(res.pData); - - // TODO(benvanik): rewrite to be faster/special case common/etc - uint32_t stride = info_.layout.stride_words; - size_t count = (length_ / 4) / stride; - for (size_t n = 0; n < info_.layout.element_count; n++) { - const auto& el = info_.layout.elements[n]; - const uint32_t* src_ptr = (const uint32_t*)(src_ + el.offset_words * 4); - uint32_t* dest_ptr = (uint32_t*)(dest + el.offset_words * 4); - uint32_t o = 0; - for (uint32_t i = 0; i < count; i++) { - for (uint32_t j = 0; j < el.size_words; j++) { - dest_ptr[o + j] = XESWAP32(src_ptr[o + j]); - } - o += stride; - } - } - - - buffer_cache_->context()->Unmap(handle_, 0); - return true; -} diff --git a/src/xenia/gpu/d3d11/d3d11_buffer.h b/src/xenia/gpu/d3d11/d3d11_buffer.h deleted file mode 100644 index 924fb3da4..000000000 --- a/src/xenia/gpu/d3d11/d3d11_buffer.h +++ /dev/null @@ -1,69 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2014 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_GPU_D3D11_D3D11_BUFFER_H_ -#define XENIA_GPU_D3D11_D3D11_BUFFER_H_ - -#include - -#include -#include - -#include - - -namespace xe { -namespace gpu { -namespace d3d11 { - -class D3D11BufferCache; - - -class D3D11IndexBuffer : public IndexBuffer { -public: - D3D11IndexBuffer(D3D11BufferCache* buffer_cache, - const IndexBufferInfo& info, - const uint8_t* src_ptr, size_t length); - virtual ~D3D11IndexBuffer(); - - ID3D11Buffer* handle() const { return handle_; } - - bool FetchNew(uint64_t hash) override; - bool FetchDirty(uint64_t hash) override; - -private: - D3D11BufferCache* buffer_cache_; - ID3D11Buffer* handle_; -}; - - -class D3D11VertexBuffer : public VertexBuffer { -public: - D3D11VertexBuffer(D3D11BufferCache* buffer_cache, - const VertexBufferInfo& info, - const uint8_t* src_ptr, size_t length); - virtual ~D3D11VertexBuffer(); - - ID3D11Buffer* handle() const { return handle_; } - - bool FetchNew(uint64_t hash) override; - bool FetchDirty(uint64_t hash) override; - -private: - D3D11BufferCache* buffer_cache_; - ID3D11Buffer* handle_; -}; - - -} // namespace d3d11 -} // namespace gpu -} // namespace xe - - -#endif // XENIA_GPU_D3D11_D3D11_BUFFER_H_ diff --git a/src/xenia/gpu/d3d11/d3d11_buffer_cache.cc b/src/xenia/gpu/d3d11/d3d11_buffer_cache.cc deleted file mode 100644 index 48eb8fbf8..000000000 --- a/src/xenia/gpu/d3d11/d3d11_buffer_cache.cc +++ /dev/null @@ -1,44 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2014 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include - -#include -#include - - -using namespace xe; -using namespace xe::gpu; -using namespace xe::gpu::d3d11; -using namespace xe::gpu::xenos; - - -D3D11BufferCache::D3D11BufferCache(ID3D11DeviceContext* context, - ID3D11Device* device) - : context_(context), device_(device) { - context->AddRef(); - device_->AddRef(); -} - -D3D11BufferCache::~D3D11BufferCache() { - XESAFERELEASE(device_); - XESAFERELEASE(context_); -} - -IndexBuffer* D3D11BufferCache::CreateIndexBuffer( - const IndexBufferInfo& info, - const uint8_t* src_ptr, size_t length) { - return new D3D11IndexBuffer(this, info, src_ptr, length); -} - -VertexBuffer* D3D11BufferCache::CreateVertexBuffer( - const VertexBufferInfo& info, - const uint8_t* src_ptr, size_t length) { - return new D3D11VertexBuffer(this, info, src_ptr, length); -} diff --git a/src/xenia/gpu/d3d11/d3d11_buffer_cache.h b/src/xenia/gpu/d3d11/d3d11_buffer_cache.h deleted file mode 100644 index 284536ab7..000000000 --- a/src/xenia/gpu/d3d11/d3d11_buffer_cache.h +++ /dev/null @@ -1,53 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2014 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_GPU_D3D11_D3D11_BUFFER_CACHE_H_ -#define XENIA_GPU_D3D11_D3D11_BUFFER_CACHE_H_ - -#include - -#include -#include - -#include - - -namespace xe { -namespace gpu { -namespace d3d11 { - - -class D3D11BufferCache : public BufferCache { -public: - D3D11BufferCache(ID3D11DeviceContext* context, ID3D11Device* device); - virtual ~D3D11BufferCache(); - - ID3D11DeviceContext* context() const { return context_; } - ID3D11Device* device() const { return device_; } - -protected: - IndexBuffer* CreateIndexBuffer( - const IndexBufferInfo& info, - const uint8_t* src_ptr, size_t length) override; - VertexBuffer* CreateVertexBuffer( - const VertexBufferInfo& info, - const uint8_t* src_ptr, size_t length) override; - -protected: - ID3D11DeviceContext* context_; - ID3D11Device* device_; -}; - - -} // namespace d3d11 -} // namespace gpu -} // namespace xe - - -#endif // XENIA_GPU_D3D11_D3D11_BUFFER_CACHE_H_ diff --git a/src/xenia/gpu/d3d11/d3d11_buffer_resource.cc b/src/xenia/gpu/d3d11/d3d11_buffer_resource.cc new file mode 100644 index 000000000..8f03cfe58 --- /dev/null +++ b/src/xenia/gpu/d3d11/d3d11_buffer_resource.cc @@ -0,0 +1,149 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2014 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include + +#include +#include + + +using namespace xe; +using namespace xe::gpu; +using namespace xe::gpu::d3d11; +using namespace xe::gpu::xenos; + + +D3D11IndexBufferResource::D3D11IndexBufferResource( + D3D11ResourceCache* resource_cache, + const MemoryRange& memory_range, + const Info& info) + : IndexBufferResource(memory_range, info), + resource_cache_(resource_cache), + handle_(nullptr) { +} + +D3D11IndexBufferResource::~D3D11IndexBufferResource() { + XESAFERELEASE(handle_); +} + +int D3D11IndexBufferResource::CreateHandle() { + D3D11_BUFFER_DESC buffer_desc; + xe_zero_struct(&buffer_desc, sizeof(buffer_desc)); + buffer_desc.ByteWidth = static_cast(memory_range_.length); + buffer_desc.Usage = D3D11_USAGE_DYNAMIC; + buffer_desc.BindFlags = D3D11_BIND_INDEX_BUFFER; + buffer_desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + HRESULT hr = resource_cache_->device()->CreateBuffer( + &buffer_desc, nullptr, &handle_); + if (FAILED(hr)) { + XELOGW("D3D11: failed to create index buffer"); + return 1; + } + return 0; +} + +int D3D11IndexBufferResource::InvalidateRegion( + const MemoryRange& memory_range) { + SCOPE_profile_cpu_f("gpu"); + + // All that's done so far: + XEASSERT(info_.endianness == 0x2); + + D3D11_MAPPED_SUBRESOURCE res; + HRESULT hr = resource_cache_->context()->Map( + handle_, 0, D3D11_MAP_WRITE_DISCARD, 0, &res); + if (FAILED(hr)) { + XELOGE("D3D11: unable to map index buffer"); + return 1; + } + + if (info_.format == INDEX_FORMAT_32BIT) { + uint32_t index_count = memory_range_.length / 4; + const uint32_t* src = reinterpret_cast( + memory_range_.host_base); + uint32_t* dest = reinterpret_cast(res.pData); + for (uint32_t n = 0; n < index_count; n++) { + dest[n] = XESWAP32(src[n]); + } + } else { + uint32_t index_count = memory_range_.length / 2; + const uint16_t* src = reinterpret_cast( + memory_range_.host_base); + uint16_t* dest = reinterpret_cast(res.pData); + for (uint32_t n = 0; n < index_count; n++) { + dest[n] = XESWAP16(src[n]); + } + } + resource_cache_->context()->Unmap(handle_, 0); + + return 0; +} + +D3D11VertexBufferResource::D3D11VertexBufferResource( + D3D11ResourceCache* resource_cache, + const MemoryRange& memory_range, + const Info& info) + : VertexBufferResource(memory_range, info), + resource_cache_(resource_cache), + handle_(nullptr) { +} + +D3D11VertexBufferResource::~D3D11VertexBufferResource() { + XESAFERELEASE(handle_); +} + +int D3D11VertexBufferResource::CreateHandle() { + D3D11_BUFFER_DESC buffer_desc; + xe_zero_struct(&buffer_desc, sizeof(buffer_desc)); + buffer_desc.ByteWidth = static_cast(memory_range_.length); + buffer_desc.Usage = D3D11_USAGE_DYNAMIC; + buffer_desc.BindFlags = D3D11_BIND_VERTEX_BUFFER; + buffer_desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + HRESULT hr = resource_cache_->device()->CreateBuffer( + &buffer_desc, nullptr, &handle_); + if (FAILED(hr)) { + XELOGW("D3D11: failed to create vertex buffer"); + return 1; + } + return 0; +} + +int D3D11VertexBufferResource::InvalidateRegion( + const MemoryRange& memory_range) { + SCOPE_profile_cpu_f("gpu"); + + D3D11_MAPPED_SUBRESOURCE res; + HRESULT hr = resource_cache_->context()->Map( + handle_, 0, D3D11_MAP_WRITE_DISCARD, 0, &res); + if (FAILED(hr)) { + XELOGE("D3D11: unable to map vertex buffer"); + return 1; + } + uint8_t* dest = reinterpret_cast(res.pData); + + // TODO(benvanik): rewrite to be faster/special case common/etc + uint32_t stride = info_.stride_words; + size_t count = (memory_range_.length / 4) / stride; + for (size_t n = 0; n < info_.element_count; n++) { + const auto& el = info_.elements[n]; + const uint32_t* src_ptr = (const uint32_t*)( + memory_range_.host_base + el.offset_words * 4); + uint32_t* dest_ptr = (uint32_t*)(dest + el.offset_words * 4); + uint32_t o = 0; + for (uint32_t i = 0; i < count; i++) { + for (uint32_t j = 0; j < el.size_words; j++) { + dest_ptr[o + j] = XESWAP32(src_ptr[o + j]); + } + o += stride; + } + } + + resource_cache_->context()->Unmap(handle_, 0); + return 0; +} diff --git a/src/xenia/gpu/d3d11/d3d11_buffer_resource.h b/src/xenia/gpu/d3d11/d3d11_buffer_resource.h new file mode 100644 index 000000000..2e8071ae1 --- /dev/null +++ b/src/xenia/gpu/d3d11/d3d11_buffer_resource.h @@ -0,0 +1,69 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2014 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_D3D11_D3D11_BUFFER_RESOURCE_H_ +#define XENIA_GPU_D3D11_D3D11_BUFFER_RESOURCE_H_ + +#include +#include + +#include + + +namespace xe { +namespace gpu { +namespace d3d11 { + +class D3D11ResourceCache; + + +class D3D11IndexBufferResource : public IndexBufferResource { +public: + D3D11IndexBufferResource(D3D11ResourceCache* resource_cache, + const MemoryRange& memory_range, + const Info& info); + ~D3D11IndexBufferResource() override; + + void* handle() const override { return handle_; } + +protected: + int CreateHandle() override; + int InvalidateRegion(const MemoryRange& memory_range) override; + +private: + D3D11ResourceCache* resource_cache_; + ID3D11Buffer* handle_; +}; + + +class D3D11VertexBufferResource : public VertexBufferResource { +public: + D3D11VertexBufferResource(D3D11ResourceCache* resource_cache, + const MemoryRange& memory_range, + const Info& info); + ~D3D11VertexBufferResource() override; + + void* handle() const override { return handle_; } + +protected: + int CreateHandle() override; + int InvalidateRegion(const MemoryRange& memory_range) override; + +private: + D3D11ResourceCache* resource_cache_; + ID3D11Buffer* handle_; +}; + + +} // namespace d3d11 +} // namespace gpu +} // namespace xe + + +#endif // XENIA_GPU_D3D11_D3D11_BUFFER_RESOURCE_H_ diff --git a/src/xenia/gpu/d3d11/d3d11_geometry_shader.cc b/src/xenia/gpu/d3d11/d3d11_geometry_shader.cc index ba677f7a0..d8660cbfe 100644 --- a/src/xenia/gpu/d3d11/d3d11_geometry_shader.cc +++ b/src/xenia/gpu/d3d11/d3d11_geometry_shader.cc @@ -10,7 +10,8 @@ #include #include -#include +#include +#include #include #include @@ -22,8 +23,8 @@ using namespace xe::gpu::d3d11; using namespace xe::gpu::xenos; -D3D11GeometryShader::D3D11GeometryShader(ID3D11Device* device, uint64_t hash) : - hash_(hash), handle_(NULL) { +D3D11GeometryShader::D3D11GeometryShader(ID3D11Device* device) + : handle_(nullptr) { device_ = device; device_->AddRef(); } @@ -33,7 +34,7 @@ D3D11GeometryShader::~D3D11GeometryShader() { XESAFERELEASE(device_); } -int D3D11GeometryShader::Prepare(D3D11VertexShader* vertex_shader) { +int D3D11GeometryShader::Prepare(D3D11VertexShaderResource* vertex_shader) { SCOPE_profile_cpu_f("gpu"); if (handle_) { @@ -94,11 +95,12 @@ ID3D10Blob* D3D11GeometryShader::Compile(const char* shader_source) { if (FLAGS_dump_shaders.size()) { base_path = FLAGS_dump_shaders.c_str(); } + uint64_t hash = xe_hash64(shader_source, xestrlena(shader_source)); // ? char file_name[XE_MAX_PATH]; xesnprintfa(file_name, XECOUNT(file_name), "%s/gen_%.16llX.gs", base_path, - hash_); + hash); if (FLAGS_dump_shaders.size()) { FILE* f = fopen(file_name, "w"); @@ -128,7 +130,7 @@ ID3D10Blob* D3D11GeometryShader::Compile(const char* shader_source) { return shader_blob; } -int D3D11GeometryShader::Generate(D3D11VertexShader* vertex_shader, +int D3D11GeometryShader::Generate(D3D11VertexShaderResource* vertex_shader, alloy::StringBuffer* output) { output->Append( "struct VERTEX {\n" @@ -138,7 +140,7 @@ int D3D11GeometryShader::Generate(D3D11VertexShader* vertex_shader, // TODO(benvanik): only add used ones? output->Append( " float4 o[%d] : XE_O;\n", - D3D11Shader::MAX_INTERPOLATORS); + D3D11ShaderTranslator::kMaxInterpolators); } if (alloc_counts.point_size) { output->Append( @@ -156,15 +158,15 @@ int D3D11GeometryShader::Generate(D3D11VertexShader* vertex_shader, D3D11PointSpriteGeometryShader::D3D11PointSpriteGeometryShader( - ID3D11Device* device, uint64_t hash) : - D3D11GeometryShader(device, hash) { + ID3D11Device* device) : D3D11GeometryShader(device) { } D3D11PointSpriteGeometryShader::~D3D11PointSpriteGeometryShader() { } -int D3D11PointSpriteGeometryShader::Generate(D3D11VertexShader* vertex_shader, - alloy::StringBuffer* output) { +int D3D11PointSpriteGeometryShader::Generate( + D3D11VertexShaderResource* vertex_shader, + alloy::StringBuffer* output) { SCOPE_profile_cpu_f("gpu"); if (D3D11GeometryShader::Generate(vertex_shader, output)) { return 1; @@ -211,15 +213,15 @@ int D3D11PointSpriteGeometryShader::Generate(D3D11VertexShader* vertex_shader, D3D11RectListGeometryShader::D3D11RectListGeometryShader( - ID3D11Device* device, uint64_t hash) : - D3D11GeometryShader(device, hash) { + ID3D11Device* device) : D3D11GeometryShader(device) { } D3D11RectListGeometryShader::~D3D11RectListGeometryShader() { } -int D3D11RectListGeometryShader::Generate(D3D11VertexShader* vertex_shader, - alloy::StringBuffer* output) { +int D3D11RectListGeometryShader::Generate( + D3D11VertexShaderResource* vertex_shader, + alloy::StringBuffer* output) { SCOPE_profile_cpu_f("gpu"); if (D3D11GeometryShader::Generate(vertex_shader, output)) { return 1; @@ -256,15 +258,15 @@ int D3D11RectListGeometryShader::Generate(D3D11VertexShader* vertex_shader, D3D11QuadListGeometryShader::D3D11QuadListGeometryShader( - ID3D11Device* device, uint64_t hash) : - D3D11GeometryShader(device, hash) { + ID3D11Device* device) : D3D11GeometryShader(device) { } D3D11QuadListGeometryShader::~D3D11QuadListGeometryShader() { } -int D3D11QuadListGeometryShader::Generate(D3D11VertexShader* vertex_shader, - alloy::StringBuffer* output) { +int D3D11QuadListGeometryShader::Generate( + D3D11VertexShaderResource* vertex_shader, + alloy::StringBuffer* output) { SCOPE_profile_cpu_f("gpu"); if (D3D11GeometryShader::Generate(vertex_shader, output)) { return 1; diff --git a/src/xenia/gpu/d3d11/d3d11_geometry_shader.h b/src/xenia/gpu/d3d11/d3d11_geometry_shader.h index cdfebad5f..89529b2a4 100644 --- a/src/xenia/gpu/d3d11/d3d11_geometry_shader.h +++ b/src/xenia/gpu/d3d11/d3d11_geometry_shader.h @@ -21,7 +21,7 @@ namespace xe { namespace gpu { namespace d3d11 { -class D3D11VertexShader; +class D3D11VertexShaderResource; class D3D11GeometryShader { @@ -30,53 +30,52 @@ public: ID3D11GeometryShader* handle() const { return handle_; } - int Prepare(D3D11VertexShader* vertex_shader); + int Prepare(D3D11VertexShaderResource* vertex_shader); protected: - D3D11GeometryShader(ID3D11Device* device, uint64_t hash); + D3D11GeometryShader(ID3D11Device* device); ID3D10Blob* Compile(const char* shader_source); - virtual int Generate(D3D11VertexShader* vertex_shader, + virtual int Generate(D3D11VertexShaderResource* vertex_shader, alloy::StringBuffer* output); protected: ID3D11Device* device_; - uint64_t hash_; ID3D11GeometryShader* handle_; }; class D3D11PointSpriteGeometryShader : public D3D11GeometryShader { public: - D3D11PointSpriteGeometryShader(ID3D11Device* device, uint64_t hash); - virtual ~D3D11PointSpriteGeometryShader(); + D3D11PointSpriteGeometryShader(ID3D11Device* device); + ~D3D11PointSpriteGeometryShader() override; protected: - virtual int Generate(D3D11VertexShader* vertex_shader, - alloy::StringBuffer* output); + int Generate(D3D11VertexShaderResource* vertex_shader, + alloy::StringBuffer* output) override; }; class D3D11RectListGeometryShader : public D3D11GeometryShader { public: - D3D11RectListGeometryShader(ID3D11Device* device, uint64_t hash); - virtual ~D3D11RectListGeometryShader(); + D3D11RectListGeometryShader(ID3D11Device* device); + ~D3D11RectListGeometryShader() override; protected: - virtual int Generate(D3D11VertexShader* vertex_shader, - alloy::StringBuffer* output); + int Generate(D3D11VertexShaderResource* vertex_shader, + alloy::StringBuffer* output) override; }; class D3D11QuadListGeometryShader : public D3D11GeometryShader { public: - D3D11QuadListGeometryShader(ID3D11Device* device, uint64_t hash); - virtual ~D3D11QuadListGeometryShader(); + D3D11QuadListGeometryShader(ID3D11Device* device); + ~D3D11QuadListGeometryShader() override; protected: - virtual int Generate(D3D11VertexShader* vertex_shader, - alloy::StringBuffer* output); + int Generate(D3D11VertexShaderResource* vertex_shader, + alloy::StringBuffer* output) override; }; diff --git a/src/xenia/gpu/d3d11/d3d11_graphics_driver.cc b/src/xenia/gpu/d3d11/d3d11_graphics_driver.cc index 886643e32..a671b4626 100644 --- a/src/xenia/gpu/d3d11/d3d11_graphics_driver.cc +++ b/src/xenia/gpu/d3d11/d3d11_graphics_driver.cc @@ -10,13 +10,12 @@ #include #include -#include -#include +#include +#include +#include #include -#include -#include -#include -#include +#include + using namespace xe; using namespace xe::gpu; @@ -35,9 +34,8 @@ D3D11GraphicsDriver::D3D11GraphicsDriver( device_ = device; device_->AddRef(); device_->GetImmediateContext(&context_); - buffer_cache_ = new D3D11BufferCache(context_, device_); - shader_cache_ = new D3D11ShaderCache(device_); - texture_cache_ = new D3D11TextureCache(memory_, context_, device_); + + resource_cache_ = new D3D11ResourceCache(memory, device_, context_); xe_zero_struct(&state_, sizeof(state_)); @@ -64,7 +62,29 @@ D3D11GraphicsDriver::D3D11GraphicsDriver( buffer_desc.ByteWidth = (32) * sizeof(int); hr = device_->CreateBuffer( &buffer_desc, NULL, &state_.constant_buffers.gs_consts); +} +D3D11GraphicsDriver::~D3D11GraphicsDriver() { + RebuildRenderTargets(0, 0); + XESAFERELEASE(state_.constant_buffers.float_constants); + XESAFERELEASE(state_.constant_buffers.bool_constants); + XESAFERELEASE(state_.constant_buffers.loop_constants); + XESAFERELEASE(state_.constant_buffers.vs_consts); + XESAFERELEASE(state_.constant_buffers.gs_consts); + XESAFERELEASE(invalid_texture_view_); + XESAFERELEASE(invalid_texture_sampler_state_); + delete resource_cache_; + XESAFERELEASE(context_); + XESAFERELEASE(device_); + XESAFERELEASE(swap_chain_); +} + +int D3D11GraphicsDriver::Initialize() { + InitializeInvalidTexture(); + return 0; +} + +void D3D11GraphicsDriver::InitializeInvalidTexture() { // TODO(benvanik): pattern? D3D11_TEXTURE2D_DESC texture_desc; xe_zero_struct(&texture_desc, sizeof(texture_desc)); @@ -90,7 +110,7 @@ D3D11GraphicsDriver::D3D11GraphicsDriver( initial_data.SysMemSlicePitch = 0; initial_data.pSysMem = texture_data; ID3D11Texture2D* texture = NULL; - hr = device_->CreateTexture2D( + HRESULT hr = device_->CreateTexture2D( &texture_desc, &initial_data, (ID3D11Texture2D**)&texture); if (FAILED(hr)) { XEFATAL("D3D11: unable to create invalid texture"); @@ -130,315 +150,53 @@ D3D11GraphicsDriver::D3D11GraphicsDriver( } } -D3D11GraphicsDriver::~D3D11GraphicsDriver() { - RebuildRenderTargets(0, 0); - XESAFERELEASE(state_.constant_buffers.float_constants); - XESAFERELEASE(state_.constant_buffers.bool_constants); - XESAFERELEASE(state_.constant_buffers.loop_constants); - XESAFERELEASE(state_.constant_buffers.vs_consts); - XESAFERELEASE(state_.constant_buffers.gs_consts); - XESAFERELEASE(invalid_texture_view_); - XESAFERELEASE(invalid_texture_sampler_state_); - delete buffer_cache_; - delete texture_cache_; - delete shader_cache_; - XESAFERELEASE(context_); - XESAFERELEASE(device_); - XESAFERELEASE(swap_chain_); -} - -void D3D11GraphicsDriver::Initialize() { -} - -void D3D11GraphicsDriver::InvalidateState( - uint32_t mask) { - if (mask == XE_GPU_INVALIDATE_MASK_ALL) { - XETRACED3D("D3D11: (invalidate all)"); - } - if (mask & XE_GPU_INVALIDATE_MASK_VERTEX_SHADER) { - XETRACED3D("D3D11: invalidate vertex shader"); - } - if (mask & XE_GPU_INVALIDATE_MASK_PIXEL_SHADER) { - XETRACED3D("D3D11: invalidate pixel shader"); - } -} - -void D3D11GraphicsDriver::SetShader( - XE_GPU_SHADER_TYPE type, - uint32_t address, - uint32_t start, - uint32_t length) { - // Find or create shader in the cache. - uint8_t* p = memory_->Translate(address); - Shader* shader = shader_cache_->FindOrCreate( - type, p, length); - - if (!shader->is_prepared()) { - // Disassemble. - const char* source = shader->disasm_src(); - if (!source) { - source = ""; - } - XETRACED3D("D3D11: set shader %d at %0.8X (%db):\n%s", - type, address, length, source); - } - - // Stash for later. - switch (type) { - case XE_GPU_SHADER_TYPE_VERTEX: - state_.vertex_shader = (D3D11VertexShader*)shader; - break; - case XE_GPU_SHADER_TYPE_PIXEL: - state_.pixel_shader = (D3D11PixelShader*)shader; - break; - } -} - -int D3D11GraphicsDriver::SetupDraw(XE_GPU_PRIMITIVE_TYPE prim_type) { +int D3D11GraphicsDriver::Draw(const DrawCommand& command) { SCOPE_profile_cpu_f("gpu"); - RegisterFile& rf = register_file_; - - // Ignore copies. - uint32_t enable_mode = rf.values[XE_GPU_REG_RB_MODECONTROL].u32 & 0x7; - if (enable_mode != 4) { - XELOGW("D3D11: ignoring draw with enable mode %d", enable_mode); - return 1; - } - - uint32_t state_overrides = 0; - if (prim_type == XE_GPU_PRIMITIVE_TYPE_RECTANGLE_LIST) { - // Rect lists aren't culled. There may be other things they skip too. - state_overrides |= STATE_OVERRIDE_DISABLE_CULLING; - } - // Misc state. - if (UpdateState(state_overrides)) { + if (UpdateState(command)) { return 1; } // Build constant buffers. - if (UpdateConstantBuffers()) { + if (SetupConstantBuffers(command)) { return 1; } // Bind shaders. - if (BindShaders()) { + if (SetupShaders(command)) { return 1; } - // Switch primitive topology. - // Some are unsupported on D3D11 and must be emulated. - D3D11_PRIMITIVE_TOPOLOGY primitive_topology; - D3D11GeometryShader* geometry_shader = NULL; - switch (prim_type) { - case XE_GPU_PRIMITIVE_TYPE_POINT_LIST: - primitive_topology = D3D_PRIMITIVE_TOPOLOGY_POINTLIST; - if (state_.vertex_shader) { - if (state_.vertex_shader->DemandGeometryShader( - D3D11VertexShader::POINT_SPRITE_SHADER, &geometry_shader)) { - return 1; - } - } - break; - case XE_GPU_PRIMITIVE_TYPE_LINE_LIST: - primitive_topology = D3D_PRIMITIVE_TOPOLOGY_LINELIST; - break; - case XE_GPU_PRIMITIVE_TYPE_LINE_STRIP: - primitive_topology = D3D_PRIMITIVE_TOPOLOGY_LINESTRIP; - break; - case XE_GPU_PRIMITIVE_TYPE_TRIANGLE_LIST: - primitive_topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; - break; - case XE_GPU_PRIMITIVE_TYPE_TRIANGLE_STRIP: - primitive_topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP; - break; - case XE_GPU_PRIMITIVE_TYPE_RECTANGLE_LIST: - primitive_topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP; - if (state_.vertex_shader) { - if (state_.vertex_shader->DemandGeometryShader( - D3D11VertexShader::RECT_LIST_SHADER, &geometry_shader)) { - return 1; - } - } - break; - case XE_GPU_PRIMITIVE_TYPE_QUAD_LIST: - primitive_topology = D3D_PRIMITIVE_TOPOLOGY_LINELIST_ADJ; - if (state_.vertex_shader) { - if (state_.vertex_shader->DemandGeometryShader( - D3D11VertexShader::QUAD_LIST_SHADER, &geometry_shader)) { - return 1; - } - } - break; - default: - case XE_GPU_PRIMITIVE_TYPE_TRIANGLE_FAN: - case XE_GPU_PRIMITIVE_TYPE_UNKNOWN_07: - case XE_GPU_PRIMITIVE_TYPE_LINE_LOOP: - primitive_topology = D3D_PRIMITIVE_TOPOLOGY_POINTLIST; - XELOGE("D3D11: unsupported primitive type %d", prim_type); - break; + // Bind vertex buffers/index buffer. + if (SetupInputAssembly(command)) { + return 1; } - context_->IASetPrimitiveTopology(primitive_topology); - if (geometry_shader) { - context_->GSSetShader(geometry_shader->handle(), NULL, NULL); - context_->GSSetConstantBuffers( - 0, 1, &state_.constant_buffers.gs_consts); + // Bind texture fetchers. + if (SetupSamplers(command)) { + return 1; + } + + if (command.index_buffer) { + // Have an actual index buffer. + XETRACED3D("D3D11: draw indexed %d (indicies [%d,%d] (%d))", + command.prim_type, command.start_index, + command.start_index + command.index_count, command.index_count); + context_->DrawIndexed(command.index_count, command.start_index, + command.base_vertex); } else { - context_->GSSetShader(NULL, NULL, NULL); + // Auto draw. + XETRACED3D("D3D11: draw indexed auto %d (indicies [%d,%d] (%d))", + command.prim_type, command.start_index, + command.start_index + command.index_count, command.index_count); + context_->Draw(command.index_count, 0); } - // Setup all fetchers (vertices/textures). - if (PrepareFetchers()) { - return 1; - } - - // All ready to draw (except index buffer)! - return 0; } -void D3D11GraphicsDriver::DrawIndexBuffer( - XE_GPU_PRIMITIVE_TYPE prim_type, - bool index_32bit, uint32_t index_count, - uint32_t index_base, uint32_t index_size, uint32_t endianness) { - SCOPE_profile_cpu_f("gpu"); - - RegisterFile& rf = register_file_; - - XETRACED3D("D3D11: draw indexed %d (%d indicies) from %.8X", - prim_type, index_count, index_base); - - // Setup shaders/etc. - if (SetupDraw(prim_type)) { - return; - } - - // Setup index buffer. - if (PrepareIndexBuffer( - index_32bit, index_count, index_base, index_size, endianness)) { - return; - } - - // Issue draw. - uint32_t start_index = rf.values[XE_GPU_REG_VGT_INDX_OFFSET].u32; - uint32_t base_vertex = 0; - context_->DrawIndexed(index_count, start_index, base_vertex); -} - -void D3D11GraphicsDriver::DrawIndexAuto( - XE_GPU_PRIMITIVE_TYPE prim_type, - uint32_t index_count) { - SCOPE_profile_cpu_f("gpu"); - - RegisterFile& rf = register_file_; - - XETRACED3D("D3D11: draw indexed %d (%d indicies)", - prim_type, index_count); - - // Setup shaders/etc. - if (SetupDraw(prim_type)) { - return; - } - - // Issue draw. - uint32_t start_index = rf.values[XE_GPU_REG_VGT_INDX_OFFSET].u32; - uint32_t base_vertex = 0; - //context_->DrawIndexed(index_count, start_index, base_vertex); - context_->Draw(index_count, 0); -} - -int D3D11GraphicsDriver::RebuildRenderTargets( - uint32_t width, uint32_t height) { - if (width == render_targets_.width && - height == render_targets_.height) { - // Cached copies are good. - return 0; - } - - SCOPE_profile_cpu_f("gpu"); - - // Remove old versions. - for (int n = 0; n < XECOUNT(render_targets_.color_buffers); n++) { - auto& cb = render_targets_.color_buffers[n]; - XESAFERELEASE(cb.buffer); - XESAFERELEASE(cb.color_view_8888); - } - XESAFERELEASE(render_targets_.depth_buffer); - XESAFERELEASE(render_targets_.depth_view_d28s8); - XESAFERELEASE(render_targets_.depth_view_d28fs8); - - render_targets_.width = width; - render_targets_.height = height; - - if (!width || !height) { - // This should only happen when cleaning up. - return 0; - } - - for (int n = 0; n < XECOUNT(render_targets_.color_buffers); n++) { - auto& cb = render_targets_.color_buffers[n]; - D3D11_TEXTURE2D_DESC color_buffer_desc; - xe_zero_struct(&color_buffer_desc, sizeof(color_buffer_desc)); - color_buffer_desc.Width = width; - color_buffer_desc.Height = height; - color_buffer_desc.MipLevels = 1; - color_buffer_desc.ArraySize = 1; - color_buffer_desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - color_buffer_desc.SampleDesc.Count = 1; - color_buffer_desc.SampleDesc.Quality = 0; - color_buffer_desc.Usage = D3D11_USAGE_DEFAULT; - color_buffer_desc.BindFlags = - D3D11_BIND_SHADER_RESOURCE | - D3D11_BIND_RENDER_TARGET; - color_buffer_desc.CPUAccessFlags = 0; - color_buffer_desc.MiscFlags = 0; - device_->CreateTexture2D( - &color_buffer_desc, NULL, &cb.buffer); - - D3D11_RENDER_TARGET_VIEW_DESC render_target_view_desc; - xe_zero_struct(&render_target_view_desc, sizeof(render_target_view_desc)); - render_target_view_desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - render_target_view_desc.ViewDimension = D3D11_RTV_DIMENSION_TEXTURE2D; - // render_target_view_desc.Buffer ? - device_->CreateRenderTargetView( - cb.buffer, - &render_target_view_desc, - &cb.color_view_8888); - } - - D3D11_TEXTURE2D_DESC depth_stencil_desc; - xe_zero_struct(&depth_stencil_desc, sizeof(depth_stencil_desc)); - depth_stencil_desc.Width = width; - depth_stencil_desc.Height = height; - depth_stencil_desc.MipLevels = 1; - depth_stencil_desc.ArraySize = 1; - depth_stencil_desc.Format = DXGI_FORMAT_D24_UNORM_S8_UINT; - depth_stencil_desc.SampleDesc.Count = 1; - depth_stencil_desc.SampleDesc.Quality = 0; - depth_stencil_desc.Usage = D3D11_USAGE_DEFAULT; - depth_stencil_desc.BindFlags = - D3D11_BIND_DEPTH_STENCIL; - depth_stencil_desc.CPUAccessFlags = 0; - depth_stencil_desc.MiscFlags = 0; - device_->CreateTexture2D( - &depth_stencil_desc, NULL, &render_targets_.depth_buffer); - - D3D11_DEPTH_STENCIL_VIEW_DESC depth_stencil_view_desc; - xe_zero_struct(&depth_stencil_view_desc, sizeof(depth_stencil_view_desc)); - depth_stencil_view_desc.Format = DXGI_FORMAT_D24_UNORM_S8_UINT; - depth_stencil_view_desc.ViewDimension = D3D11_DSV_DIMENSION_TEXTURE2D; - depth_stencil_view_desc.Flags = 0; - device_->CreateDepthStencilView( - render_targets_.depth_buffer, - &depth_stencil_view_desc, - &render_targets_.depth_view_d28s8); - - return 0; -} - -int D3D11GraphicsDriver::UpdateState(uint32_t state_overrides) { +int D3D11GraphicsDriver::UpdateState(const DrawCommand& command) { SCOPE_profile_cpu_f("gpu"); // Most information comes from here: @@ -449,8 +207,8 @@ int D3D11GraphicsDriver::UpdateState(uint32_t state_overrides) { RegisterFile& rf = register_file_; - uint32_t window_scissor_tl = rf.values[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL].u32; - uint32_t window_scissor_br = rf.values[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR].u32; + uint32_t window_scissor_tl = register_file_[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL].u32; + uint32_t window_scissor_br = register_file_[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR].u32; //uint32_t window_width = // (window_scissor_br & 0x7FFF) - (window_scissor_tl & 0x7FFF); //uint32_t window_height = @@ -466,16 +224,16 @@ int D3D11GraphicsDriver::UpdateState(uint32_t state_overrides) { // RB_SURFACE_INFO ? // Enable buffers. - uint32_t enable_mode = rf.values[XE_GPU_REG_RB_MODECONTROL].u32 & 0x7; + uint32_t enable_mode = register_file_[XE_GPU_REG_RB_MODECONTROL].u32 & 0x7; // 4 = color + depth // 6 = copy ? // color_info[0-3] has format 8888 uint32_t color_info[4] = { - rf.values[XE_GPU_REG_RB_COLOR_INFO].u32, - rf.values[XE_GPU_REG_RB_COLOR1_INFO].u32, - rf.values[XE_GPU_REG_RB_COLOR2_INFO].u32, - rf.values[XE_GPU_REG_RB_COLOR3_INFO].u32, + register_file_[XE_GPU_REG_RB_COLOR_INFO].u32, + register_file_[XE_GPU_REG_RB_COLOR1_INFO].u32, + register_file_[XE_GPU_REG_RB_COLOR2_INFO].u32, + register_file_[XE_GPU_REG_RB_COLOR3_INFO].u32, }; ID3D11RenderTargetView* render_target_views[4] = { 0 }; for (int n = 0; n < XECOUNT(color_info); n++) { @@ -494,7 +252,7 @@ int D3D11GraphicsDriver::UpdateState(uint32_t state_overrides) { } // depth_info has format 24_8 - uint32_t depth_info = rf.values[XE_GPU_REG_RB_DEPTH_INFO].u32; + uint32_t depth_info = register_file_[XE_GPU_REG_RB_DEPTH_INFO].u32; uint32_t depth_format = (depth_info >> 16) & 0x1; ID3D11DepthStencilView* depth_stencil_view = 0; switch (depth_format) { @@ -514,7 +272,7 @@ int D3D11GraphicsDriver::UpdateState(uint32_t state_overrides) { context_->OMSetRenderTargets(4, render_target_views, depth_stencil_view); // General rasterizer state. - uint32_t mode_control = rf.values[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32; + uint32_t mode_control = register_file_[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32; D3D11_RASTERIZER_DESC rasterizer_desc; xe_zero_struct(&rasterizer_desc, sizeof(rasterizer_desc)); rasterizer_desc.FillMode = D3D11_FILL_SOLID; // D3D11_FILL_WIREFRAME; @@ -529,7 +287,8 @@ int D3D11GraphicsDriver::UpdateState(uint32_t state_overrides) { rasterizer_desc.CullMode = D3D11_CULL_BACK; break; } - if (state_overrides & STATE_OVERRIDE_DISABLE_CULLING) { + if (command.prim_type == XE_GPU_PRIMITIVE_TYPE_RECTANGLE_LIST) { + // Rect lists aren't culled. There may be other things they skip too. rasterizer_desc.CullMode = D3D11_CULL_NONE; } rasterizer_desc.FrontCounterClockwise = (mode_control & 0x4) == 0; @@ -547,7 +306,7 @@ int D3D11GraphicsDriver::UpdateState(uint32_t state_overrides) { // Viewport. // If we have resized the window we will want to change this. - uint32_t window_offset = rf.values[XE_GPU_REG_PA_SC_WINDOW_OFFSET].u32; + uint32_t window_offset = register_file_[XE_GPU_REG_PA_SC_WINDOW_OFFSET].u32; // signed? uint32_t window_offset_x = window_offset & 0x7FFF; uint32_t window_offset_y = (window_offset >> 16) & 0x7FFF; @@ -555,19 +314,19 @@ int D3D11GraphicsDriver::UpdateState(uint32_t state_overrides) { // ? // TODO(benvanik): figure out how to emulate viewports in D3D11. Could use // viewport above to scale, though that doesn't support negatives/etc. - uint32_t vte_control = rf.values[XE_GPU_REG_PA_CL_VTE_CNTL].u32; + uint32_t vte_control = register_file_[XE_GPU_REG_PA_CL_VTE_CNTL].u32; bool vport_xscale_enable = (vte_control & (1 << 0)) > 0; - float vport_xscale = rf.values[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32; // 640 + float vport_xscale = register_file_[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32; // 640 bool vport_xoffset_enable = (vte_control & (1 << 1)) > 0; - float vport_xoffset = rf.values[XE_GPU_REG_PA_CL_VPORT_XOFFSET].f32; // 640 + float vport_xoffset = register_file_[XE_GPU_REG_PA_CL_VPORT_XOFFSET].f32; // 640 bool vport_yscale_enable = (vte_control & (1 << 2)) > 0; - float vport_yscale = rf.values[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32; // -360 + float vport_yscale = register_file_[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32; // -360 bool vport_yoffset_enable = (vte_control & (1 << 3)) > 0; - float vport_yoffset = rf.values[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32; // 360 + float vport_yoffset = register_file_[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32; // 360 bool vport_zscale_enable = (vte_control & (1 << 4)) > 0; - float vport_zscale = rf.values[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32; // 1 + float vport_zscale = register_file_[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32; // 1 bool vport_zoffset_enable = (vte_control & (1 << 5)) > 0; - float vport_zoffset = rf.values[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32; // 0 + float vport_zoffset = register_file_[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32; // 0 // TODO(benvanik): compute viewport values. D3D11_VIEWPORT viewport; @@ -619,8 +378,8 @@ int D3D11GraphicsDriver::UpdateState(uint32_t state_overrides) { // Scissoring. // TODO(benvanik): pull from scissor registers. // ScissorEnable must be set in raster state above. - uint32_t screen_scissor_tl = rf.values[XE_GPU_REG_PA_SC_SCREEN_SCISSOR_TL].u32; - uint32_t screen_scissor_br = rf.values[XE_GPU_REG_PA_SC_SCREEN_SCISSOR_BR].u32; + uint32_t screen_scissor_tl = register_file_[XE_GPU_REG_PA_SC_SCREEN_SCISSOR_TL].u32; + uint32_t screen_scissor_br = register_file_[XE_GPU_REG_PA_SC_SCREEN_SCISSOR_BR].u32; if (screen_scissor_tl != 0 && screen_scissor_br != 0x20002000) { D3D11_RECT scissor_rect; scissor_rect.top = (screen_scissor_tl >> 16) & 0x7FFF; @@ -654,8 +413,8 @@ int D3D11GraphicsDriver::UpdateState(uint32_t state_overrides) { }; // Depth-stencil state. - uint32_t depth_control = rf.values[XE_GPU_REG_RB_DEPTHCONTROL].u32; - uint32_t stencil_ref_mask = rf.values[XE_GPU_REG_RB_STENCILREFMASK].u32; + uint32_t depth_control = register_file_[XE_GPU_REG_RB_DEPTHCONTROL].u32; + uint32_t stencil_ref_mask = register_file_[XE_GPU_REG_RB_STENCILREFMASK].u32; D3D11_DEPTH_STENCIL_DESC depth_stencil_desc; xe_zero_struct(&depth_stencil_desc, sizeof(depth_stencil_desc)); // A2XX_RB_DEPTHCONTROL_BACKFACE_ENABLE @@ -727,22 +486,22 @@ int D3D11GraphicsDriver::UpdateState(uint32_t state_overrides) { // alpha testing -- ALPHAREF, ALPHAFUNC, ALPHATESTENABLE // Not in D3D11! // http://msdn.microsoft.com/en-us/library/windows/desktop/bb205120(v=vs.85).aspx - uint32_t color_control = rf.values[XE_GPU_REG_RB_COLORCONTROL].u32; + uint32_t color_control = register_file_[XE_GPU_REG_RB_COLORCONTROL].u32; // Blend state. - uint32_t color_mask = rf.values[XE_GPU_REG_RB_COLOR_MASK].u32; + uint32_t color_mask = register_file_[XE_GPU_REG_RB_COLOR_MASK].u32; uint32_t sample_mask = 0xFFFFFFFF; // ? float blend_factor[4] = { - rf.values[XE_GPU_REG_RB_BLEND_RED].f32, - rf.values[XE_GPU_REG_RB_BLEND_GREEN].f32, - rf.values[XE_GPU_REG_RB_BLEND_BLUE].f32, - rf.values[XE_GPU_REG_RB_BLEND_ALPHA].f32, + register_file_[XE_GPU_REG_RB_BLEND_RED].f32, + register_file_[XE_GPU_REG_RB_BLEND_GREEN].f32, + register_file_[XE_GPU_REG_RB_BLEND_BLUE].f32, + register_file_[XE_GPU_REG_RB_BLEND_ALPHA].f32, }; uint32_t blend_control[4] = { - rf.values[XE_GPU_REG_RB_BLENDCONTROL_0].u32, - rf.values[XE_GPU_REG_RB_BLENDCONTROL_1].u32, - rf.values[XE_GPU_REG_RB_BLENDCONTROL_2].u32, - rf.values[XE_GPU_REG_RB_BLENDCONTROL_3].u32, + register_file_[XE_GPU_REG_RB_BLENDCONTROL_0].u32, + register_file_[XE_GPU_REG_RB_BLENDCONTROL_1].u32, + register_file_[XE_GPU_REG_RB_BLENDCONTROL_2].u32, + register_file_[XE_GPU_REG_RB_BLENDCONTROL_3].u32, }; D3D11_BLEND_DESC blend_desc; xe_zero_struct(&blend_desc, sizeof(blend_desc)); @@ -782,60 +541,43 @@ int D3D11GraphicsDriver::UpdateState(uint32_t state_overrides) { return 0; } -int D3D11GraphicsDriver::UpdateConstantBuffers() { +int D3D11GraphicsDriver::SetupConstantBuffers(const DrawCommand& command) { SCOPE_profile_cpu_f("gpu"); - RegisterFile& rf = register_file_; - D3D11_MAPPED_SUBRESOURCE res; context_->Map( state_.constant_buffers.float_constants, 0, D3D11_MAP_WRITE_DISCARD, 0, &res); memcpy(res.pData, - &rf.values[XE_GPU_REG_SHADER_CONSTANT_000_X], - (512 * 4) * sizeof(float)); + command.float4_constants.values, + command.float4_constants.count * 4 * sizeof(float)); context_->Unmap(state_.constant_buffers.float_constants, 0); context_->Map( state_.constant_buffers.loop_constants, 0, D3D11_MAP_WRITE_DISCARD, 0, &res); memcpy(res.pData, - &rf.values[XE_GPU_REG_SHADER_CONSTANT_LOOP_00], - (32) * sizeof(int)); + command.loop_constants.values, + command.loop_constants.count * sizeof(int)); context_->Unmap(state_.constant_buffers.loop_constants, 0); context_->Map( state_.constant_buffers.bool_constants, 0, D3D11_MAP_WRITE_DISCARD, 0, &res); memcpy(res.pData, - &rf.values[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031], - (8) * sizeof(int)); + command.bool_constants.values, + command.bool_constants.count * sizeof(int)); context_->Unmap(state_.constant_buffers.bool_constants, 0); return 0; } -int D3D11GraphicsDriver::BindShaders() { +int D3D11GraphicsDriver::SetupShaders(const DrawCommand& command) { SCOPE_profile_cpu_f("gpu"); - RegisterFile& rf = register_file_; - xe_gpu_program_cntl_t program_cntl; - program_cntl.dword_0 = rf.values[XE_GPU_REG_SQ_PROGRAM_CNTL].u32; - - // Vertex shader setup. - D3D11VertexShader* vs = state_.vertex_shader; - if (vs) { - if (!vs->is_prepared()) { - // Prepare for use. - if (vs->Prepare(&program_cntl)) { - XELOGGPU("D3D11: failed to prepare vertex shader"); - state_.vertex_shader = NULL; - return 1; - } - } - - // Bind. - context_->VSSetShader(vs->handle(), NULL, 0); + if (command.vertex_shader) { + context_->VSSetShader( + command.vertex_shader->handle_as(), nullptr, 0); // Set constant buffers. ID3D11Buffer* vs_constant_buffers[] = { @@ -844,31 +586,22 @@ int D3D11GraphicsDriver::BindShaders() { state_.constant_buffers.loop_constants, state_.constant_buffers.vs_consts, }; - context_->VSSetConstantBuffers( - 0, XECOUNT(vs_constant_buffers), vs_constant_buffers); + context_->VSSetConstantBuffers(0, XECOUNT(vs_constant_buffers), + vs_constant_buffers); // Setup input layout (as encoded in vertex shader). + auto vs = static_cast(command.vertex_shader); context_->IASetInputLayout(vs->input_layout()); } else { - context_->VSSetShader(NULL, NULL, 0); - context_->IASetInputLayout(NULL); + context_->VSSetShader(nullptr, nullptr, 0); + context_->IASetInputLayout(nullptr); return 1; } // Pixel shader setup. - D3D11PixelShader* ps = state_.pixel_shader; - if (ps) { - if (!ps->is_prepared()) { - // Prepare for use. - if (ps->Prepare(&program_cntl, vs)) { - XELOGGPU("D3D11: failed to prepare pixel shader"); - state_.pixel_shader = NULL; - return 1; - } - } - - // Bind. - context_->PSSetShader(ps->handle(), NULL, 0); + if (command.pixel_shader) { + context_->PSSetShader( + command.pixel_shader->handle_as(), nullptr, 0); // Set constant buffers. ID3D11Buffer* vs_constant_buffers[] = { @@ -876,232 +609,233 @@ int D3D11GraphicsDriver::BindShaders() { state_.constant_buffers.bool_constants, state_.constant_buffers.loop_constants, }; - context_->PSSetConstantBuffers( - 0, XECOUNT(vs_constant_buffers), vs_constant_buffers); + context_->PSSetConstantBuffers(0, XECOUNT(vs_constant_buffers), + vs_constant_buffers); } else { - context_->PSSetShader(NULL, NULL, 0); + context_->PSSetShader(nullptr, nullptr, 0); return 1; } return 0; } -int D3D11GraphicsDriver::PrepareFetchers() { +int D3D11GraphicsDriver::SetupInputAssembly(const DrawCommand& command) { SCOPE_profile_cpu_f("gpu"); - // Input assembly. - XEASSERTNOTNULL(state_.vertex_shader); - auto vtx_inputs = state_.vertex_shader->GetVertexBufferInputs(); - for (size_t n = 0; n < vtx_inputs->count; n++) { - auto input = vtx_inputs->descs[n]; - if (PrepareVertexBuffer(input)) { - XELOGE("D3D11: unable to prepare vertex buffer"); - return 1; - } - } - - // All texture inputs. - if (PrepareTextureFetchers()) { - XELOGE("D3D11: unable to prepare texture fetchers"); - return 1; - } - - // Vertex texture samplers. - auto tex_inputs = state_.vertex_shader->GetTextureBufferInputs(); - for (size_t n = 0; n < tex_inputs->count; n++) { - auto input = tex_inputs->descs[n]; - if (PrepareTextureSampler(XE_GPU_SHADER_TYPE_VERTEX, input)) { - XELOGE("D3D11: unable to prepare texture buffer"); - return 1; - } - } - - // Pixel shader texture sampler. - XEASSERTNOTNULL(state_.pixel_shader); - tex_inputs = state_.pixel_shader->GetTextureBufferInputs(); - for (size_t n = 0; n < tex_inputs->count; n++) { - auto input = tex_inputs->descs[n]; - if (PrepareTextureSampler(XE_GPU_SHADER_TYPE_PIXEL, input)) { - XELOGE("D3D11: unable to prepare texture buffer"); - return 1; - } - } - - return 0; -} - -int D3D11GraphicsDriver::PrepareVertexBuffer(Shader::vtx_buffer_desc_t& desc) { - SCOPE_profile_cpu_f("gpu"); - - D3D11VertexShader* vs = state_.vertex_shader; + auto vs = static_cast(command.vertex_shader); if (!vs) { return 1; } - RegisterFile& rf = register_file_; - int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + (desc.fetch_slot / 3) * 6; - xe_gpu_fetch_group_t* group = (xe_gpu_fetch_group_t*)&rf.values[r]; - xe_gpu_vertex_fetch_t* fetch = NULL; - switch (desc.fetch_slot % 3) { - case 0: - fetch = &group->vertex_fetch_0; + // Switch primitive topology. + // Some are unsupported on D3D11 and must be emulated. + D3D11_PRIMITIVE_TOPOLOGY primitive_topology; + D3D11GeometryShader* geometry_shader = NULL; + switch (command.prim_type) { + case XE_GPU_PRIMITIVE_TYPE_POINT_LIST: + primitive_topology = D3D_PRIMITIVE_TOPOLOGY_POINTLIST; + if (vs->DemandGeometryShader( + D3D11VertexShaderResource::POINT_SPRITE_SHADER, &geometry_shader)) { + return 1; + } break; - case 1: - fetch = &group->vertex_fetch_1; + case XE_GPU_PRIMITIVE_TYPE_LINE_LIST: + primitive_topology = D3D_PRIMITIVE_TOPOLOGY_LINELIST; break; - case 2: - fetch = &group->vertex_fetch_2; + case XE_GPU_PRIMITIVE_TYPE_LINE_STRIP: + primitive_topology = D3D_PRIMITIVE_TOPOLOGY_LINESTRIP; + break; + case XE_GPU_PRIMITIVE_TYPE_TRIANGLE_LIST: + primitive_topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; + break; + case XE_GPU_PRIMITIVE_TYPE_TRIANGLE_STRIP: + primitive_topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP; + break; + case XE_GPU_PRIMITIVE_TYPE_RECTANGLE_LIST: + primitive_topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP; + if (vs->DemandGeometryShader( + D3D11VertexShaderResource::RECT_LIST_SHADER, &geometry_shader)) { + return 1; + } + break; + case XE_GPU_PRIMITIVE_TYPE_QUAD_LIST: + primitive_topology = D3D_PRIMITIVE_TOPOLOGY_LINELIST_ADJ; + if (vs->DemandGeometryShader( + D3D11VertexShaderResource::QUAD_LIST_SHADER, &geometry_shader)) { + return 1; + } + break; + default: + case XE_GPU_PRIMITIVE_TYPE_TRIANGLE_FAN: + case XE_GPU_PRIMITIVE_TYPE_UNKNOWN_07: + case XE_GPU_PRIMITIVE_TYPE_LINE_LOOP: + primitive_topology = D3D_PRIMITIVE_TOPOLOGY_POINTLIST; + XELOGE("D3D11: unsupported primitive type %d", command.prim_type); break; } - XEASSERTNOTNULL(fetch); - // If this assert doesn't hold, maybe we just abort? - XEASSERT(fetch->type == 0x3); - XEASSERTNOTZERO(fetch->size); + context_->IASetPrimitiveTopology(primitive_topology); - VertexBufferInfo info; - // TODO(benvanik): make these structs the same so we can share. - info.layout.stride_words = desc.stride_words; - info.layout.element_count = desc.element_count; - for (uint32_t i = 0; i < desc.element_count; ++i) { - const auto& src_el = desc.elements[i]; - auto& dest_el = info.layout.elements[i]; - dest_el.format = src_el.format; - dest_el.offset_words = src_el.offset_words; - dest_el.size_words = src_el.size_words; + // Set the geometry shader, if we are emulating a primitive type. + if (geometry_shader) { + context_->GSSetShader(geometry_shader->handle(), NULL, NULL); + context_->GSSetConstantBuffers(0, 1, &state_.constant_buffers.gs_consts); + } else { + context_->GSSetShader(NULL, NULL, NULL); } - uint32_t address = (fetch->address << 2) + address_translation_; - const uint8_t* src = reinterpret_cast( - memory_->Translate(address)); - - VertexBuffer* vertex_buffer = buffer_cache_->FetchVertexBuffer( - info, src, fetch->size * 4); - if (!vertex_buffer) { - XELOGE("D3D11: unable to create vertex fetch buffer"); - return 1; + // Index buffer, if any. May be auto draw. + if (command.index_buffer) { + DXGI_FORMAT format; + switch (command.index_buffer->info().format) { + case INDEX_FORMAT_16BIT: + format = DXGI_FORMAT_R16_UINT; + break; + case INDEX_FORMAT_32BIT: + format = DXGI_FORMAT_R32_UINT; + break; + } + context_->IASetIndexBuffer( + command.index_buffer->handle_as(), + format, 0); + } else { + context_->IASetIndexBuffer(nullptr, DXGI_FORMAT_UNKNOWN, 0); } - auto d3d_vb = static_cast(vertex_buffer); - // TODO(benvanik): always dword aligned? - uint32_t stride = desc.stride_words * 4; - uint32_t offset = 0; - int vb_slot = desc.input_index; - ID3D11Buffer* buffers[] = { d3d_vb->handle() }; - context_->IASetVertexBuffers(vb_slot, XECOUNT(buffers), buffers, - &stride, &offset); - - return 0; -} - -int D3D11GraphicsDriver::PrepareTextureFetchers() { - SCOPE_profile_cpu_f("gpu"); - - RegisterFile& rf = register_file_; - - for (int n = 0; n < XECOUNT(state_.texture_fetchers); n++) { - auto& fetcher = state_.texture_fetchers[n]; - - // TODO(benvanik): quick validate without refetching. - fetcher.enabled = false; - fetcher.view = NULL; - - int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + n * 6; - xe_gpu_fetch_group_t* group = (xe_gpu_fetch_group_t*)&rf.values[r]; - auto& fetch = group->texture_fetch; - if (fetch.type != 0x2) { - continue; - } - - // Stash a copy of the fetch register. - fetcher.fetch = fetch; - - // Fetch texture from the cache. - uint32_t address = (fetch.address << 12) + address_translation_; - auto texture_view = texture_cache_->FetchTexture(address, fetch); - if (!texture_view) { - XELOGW("D3D11: unable to fetch texture at %.8X", address); - continue; - } - if (texture_view->format == DXGI_FORMAT_UNKNOWN) { - XELOGW("D3D11: unknown texture format %d", fetch.format); - continue; - } - fetcher.view = static_cast(texture_view); - - // Only enable if we get all the way down here successfully. - fetcher.enabled = true; + // All vertex buffers. + for (auto i = 0; i < command.vertex_buffer_count; ++i) { + const auto& vb = command.vertex_buffers[i]; + auto buffer = vb.buffer->handle_as(); + auto stride = vb.stride; + auto offset = vb.offset; + context_->IASetVertexBuffers(vb.input_index, 1, &buffer, + &stride, &offset); } return 0; } -int D3D11GraphicsDriver::PrepareTextureSampler( - xenos::XE_GPU_SHADER_TYPE shader_type, Shader::tex_buffer_desc_t& desc) { +int D3D11GraphicsDriver::SetupSamplers(const DrawCommand& command) { SCOPE_profile_cpu_f("gpu"); - // If the fetcher is disabled or invalid, set some default textures. - auto& fetcher = state_.texture_fetchers[desc.fetch_slot]; - if (!fetcher.enabled || - fetcher.view->format == DXGI_FORMAT_UNKNOWN) { - XELOGW("D3D11: ignoring texture fetch: disabled or an unknown format"); - if (shader_type == XE_GPU_SHADER_TYPE_VERTEX) { - context_->VSSetShaderResources(desc.input_index, - 1, &invalid_texture_view_); - context_->VSSetSamplers(desc.input_index, - 1, &invalid_texture_sampler_state_); + for (auto i = 0; i < command.vertex_shader_sampler_count; ++i) { + const auto& input = command.vertex_shader_samplers[i]; + if (input.texture) { + auto texture = input.texture->handle_as(); + context_->VSSetShaderResources(input.input_index, 1, &texture); } else { - context_->PSSetShaderResources(desc.input_index, - 1, &invalid_texture_view_); - context_->PSSetSamplers(desc.input_index, - 1, &invalid_texture_sampler_state_); + context_->VSSetShaderResources(input.input_index, 1, &invalid_texture_view_); } + if (input.sampler_state) { + auto sampler_state = input.sampler_state->handle_as(); + context_->VSSetSamplers(input.input_index, 1, &sampler_state); + } else { + context_->VSSetSamplers(input.input_index, 1, &invalid_texture_sampler_state_); + } + } + + for (auto i = 0; i < command.pixel_shader_sampler_count; ++i) { + const auto& input = command.pixel_shader_samplers[i]; + if (input.texture) { + auto texture = input.texture->handle_as(); + context_->PSSetShaderResources(input.input_index, 1, &texture); + } else { + context_->PSSetShaderResources(input.input_index, 1, &invalid_texture_view_); + } + if (input.sampler_state) { + auto sampler_state = input.sampler_state->handle_as(); + context_->PSSetSamplers(input.input_index, 1, &sampler_state); + } else { + context_->PSSetSamplers(input.input_index, 1, &invalid_texture_sampler_state_); + } + } + + return 0; +} + +int D3D11GraphicsDriver::RebuildRenderTargets(uint32_t width, + uint32_t height) { + if (width == render_targets_.width && + height == render_targets_.height) { + // Cached copies are good. return 0; } - // Get and set the real shader resource views/samplers. - if (shader_type == XE_GPU_SHADER_TYPE_VERTEX) { - context_->VSSetShaderResources(desc.input_index, 1, &fetcher.view->srv); - } else { - context_->PSSetShaderResources(desc.input_index, 1, &fetcher.view->srv); - } - ID3D11SamplerState* sampler_state = texture_cache_->GetSamplerState( - fetcher.fetch, desc); - if (!sampler_state) { - XELOGW("D3D11: failed to set sampler state; ignoring texture"); - return 1; - } - if (shader_type == XE_GPU_SHADER_TYPE_VERTEX) { - context_->VSSetSamplers(desc.input_index, 1, &sampler_state); - } else { - context_->PSSetSamplers(desc.input_index, 1, &sampler_state); - } - - return 0; -} - -int D3D11GraphicsDriver::PrepareIndexBuffer( - bool index_32bit, uint32_t index_count, - uint32_t index_base, uint32_t index_size, uint32_t endianness) { SCOPE_profile_cpu_f("gpu"); - RegisterFile& rf = register_file_; + // Remove old versions. + for (int n = 0; n < XECOUNT(render_targets_.color_buffers); n++) { + auto& cb = render_targets_.color_buffers[n]; + XESAFERELEASE(cb.buffer); + XESAFERELEASE(cb.color_view_8888); + } + XESAFERELEASE(render_targets_.depth_buffer); + XESAFERELEASE(render_targets_.depth_view_d28s8); + XESAFERELEASE(render_targets_.depth_view_d28fs8); - uint32_t address = index_base + address_translation_; + render_targets_.width = width; + render_targets_.height = height; - IndexBufferInfo info; - info.endianness = endianness; - info.index_32bit = index_32bit; - info.index_count = index_count; - info.index_size = index_size; - auto ib = static_cast(buffer_cache_->FetchIndexBuffer( - info, memory_->Translate(address), index_size)); - if (!ib) { - return 1; + if (!width || !height) { + // This should only happen when cleaning up. + return 0; } - DXGI_FORMAT format; - format = index_32bit ? DXGI_FORMAT_R32_UINT : DXGI_FORMAT_R16_UINT; - context_->IASetIndexBuffer(ib->handle(), format, 0); + for (int n = 0; n < XECOUNT(render_targets_.color_buffers); n++) { + auto& cb = render_targets_.color_buffers[n]; + D3D11_TEXTURE2D_DESC color_buffer_desc; + xe_zero_struct(&color_buffer_desc, sizeof(color_buffer_desc)); + color_buffer_desc.Width = width; + color_buffer_desc.Height = height; + color_buffer_desc.MipLevels = 1; + color_buffer_desc.ArraySize = 1; + color_buffer_desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + color_buffer_desc.SampleDesc.Count = 1; + color_buffer_desc.SampleDesc.Quality = 0; + color_buffer_desc.Usage = D3D11_USAGE_DEFAULT; + color_buffer_desc.BindFlags = + D3D11_BIND_SHADER_RESOURCE | + D3D11_BIND_RENDER_TARGET; + color_buffer_desc.CPUAccessFlags = 0; + color_buffer_desc.MiscFlags = 0; + device_->CreateTexture2D( + &color_buffer_desc, NULL, &cb.buffer); + + D3D11_RENDER_TARGET_VIEW_DESC render_target_view_desc; + xe_zero_struct(&render_target_view_desc, sizeof(render_target_view_desc)); + render_target_view_desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + render_target_view_desc.ViewDimension = D3D11_RTV_DIMENSION_TEXTURE2D; + // render_target_view_desc.Buffer ? + device_->CreateRenderTargetView( + cb.buffer, + &render_target_view_desc, + &cb.color_view_8888); + } + + D3D11_TEXTURE2D_DESC depth_stencil_desc; + xe_zero_struct(&depth_stencil_desc, sizeof(depth_stencil_desc)); + depth_stencil_desc.Width = width; + depth_stencil_desc.Height = height; + depth_stencil_desc.MipLevels = 1; + depth_stencil_desc.ArraySize = 1; + depth_stencil_desc.Format = DXGI_FORMAT_D24_UNORM_S8_UINT; + depth_stencil_desc.SampleDesc.Count = 1; + depth_stencil_desc.SampleDesc.Quality = 0; + depth_stencil_desc.Usage = D3D11_USAGE_DEFAULT; + depth_stencil_desc.BindFlags = D3D11_BIND_DEPTH_STENCIL; + depth_stencil_desc.CPUAccessFlags = 0; + depth_stencil_desc.MiscFlags = 0; + device_->CreateTexture2D( + &depth_stencil_desc, NULL, &render_targets_.depth_buffer); + + D3D11_DEPTH_STENCIL_VIEW_DESC depth_stencil_view_desc; + xe_zero_struct(&depth_stencil_view_desc, sizeof(depth_stencil_view_desc)); + depth_stencil_view_desc.Format = DXGI_FORMAT_D24_UNORM_S8_UINT; + depth_stencil_view_desc.ViewDimension = D3D11_DSV_DIMENSION_TEXTURE2D; + depth_stencil_view_desc.Flags = 0; + device_->CreateDepthStencilView( + render_targets_.depth_buffer, + &depth_stencil_view_desc, + &render_targets_.depth_view_d28s8); return 0; } diff --git a/src/xenia/gpu/d3d11/d3d11_graphics_driver.h b/src/xenia/gpu/d3d11/d3d11_graphics_driver.h index 2f2316488..4faa493ee 100644 --- a/src/xenia/gpu/d3d11/d3d11_graphics_driver.h +++ b/src/xenia/gpu/d3d11/d3d11_graphics_driver.h @@ -13,8 +13,8 @@ #include #include -#include #include +#include #include #include @@ -24,13 +24,6 @@ namespace xe { namespace gpu { namespace d3d11 { -class D3D11BufferCache; -class D3D11PixelShader; -class D3D11ShaderCache; -class D3D11TextureCache; -struct D3D11TextureView; -class D3D11VertexShader; - class D3D11GraphicsDriver : public GraphicsDriver { public: @@ -38,48 +31,32 @@ public: Memory* memory, IDXGISwapChain* swap_chain, ID3D11Device* device); virtual ~D3D11GraphicsDriver(); - virtual void Initialize(); + ResourceCache* resource_cache() const override { return resource_cache_; } - virtual void InvalidateState( - uint32_t mask); - virtual void SetShader( - xenos::XE_GPU_SHADER_TYPE type, - uint32_t address, - uint32_t start, - uint32_t length); - virtual void DrawIndexBuffer( - xenos::XE_GPU_PRIMITIVE_TYPE prim_type, - bool index_32bit, uint32_t index_count, - uint32_t index_base, uint32_t index_size, uint32_t endianness); - virtual void DrawIndexAuto( - xenos::XE_GPU_PRIMITIVE_TYPE prim_type, - uint32_t index_count); + int Initialize() override; + + int Draw(const DrawCommand& command) override; // TODO(benvanik): figure this out. - virtual int Resolve(); + int Resolve() override; private: - int SetupDraw(xenos::XE_GPU_PRIMITIVE_TYPE prim_type); + void InitializeInvalidTexture(); + + int UpdateState(const DrawCommand& command); + int SetupConstantBuffers(const DrawCommand& command); + int SetupShaders(const DrawCommand& command); + int SetupInputAssembly(const DrawCommand& command); + int SetupSamplers(const DrawCommand& command); + int RebuildRenderTargets(uint32_t width, uint32_t height); - int UpdateState(uint32_t state_overrides = 0); - int UpdateConstantBuffers(); - int BindShaders(); - int PrepareFetchers(); - int PrepareVertexBuffer(Shader::vtx_buffer_desc_t& desc); - int PrepareTextureFetchers(); - int PrepareTextureSampler(xenos::XE_GPU_SHADER_TYPE shader_type, - Shader::tex_buffer_desc_t& desc); - int PrepareIndexBuffer( - bool index_32bit, uint32_t index_count, - uint32_t index_base, uint32_t index_size, uint32_t endianness); private: IDXGISwapChain* swap_chain_; ID3D11Device* device_; ID3D11DeviceContext* context_; - D3D11BufferCache* buffer_cache_; - D3D11ShaderCache* shader_cache_; - D3D11TextureCache* texture_cache_; + + D3D11ResourceCache* resource_cache_; ID3D11ShaderResourceView* invalid_texture_view_; ID3D11SamplerState* invalid_texture_sampler_state_; @@ -97,9 +74,6 @@ private: } render_targets_; struct { - D3D11VertexShader* vertex_shader; - D3D11PixelShader* pixel_shader; - struct { ID3D11Buffer* float_constants; ID3D11Buffer* bool_constants; @@ -107,17 +81,7 @@ private: ID3D11Buffer* vs_consts; ID3D11Buffer* gs_consts; } constant_buffers; - - struct { - bool enabled; - xenos::xe_gpu_texture_fetch_t fetch; - D3D11TextureView* view; - } texture_fetchers[32]; } state_; - - enum StateOverrides { - STATE_OVERRIDE_DISABLE_CULLING = (1 << 0), - }; }; diff --git a/src/xenia/gpu/d3d11/d3d11_graphics_system.cc b/src/xenia/gpu/d3d11/d3d11_graphics_system.cc index 553ed8828..7258195d3 100644 --- a/src/xenia/gpu/d3d11/d3d11_graphics_system.cc +++ b/src/xenia/gpu/d3d11/d3d11_graphics_system.cc @@ -146,12 +146,18 @@ void D3D11GraphicsSystem::Initialize() { XEASSERTNULL(driver_); driver_ = new D3D11GraphicsDriver( memory_, window_->swap_chain(), device_); + if (driver_->Initialize()) { + XELOGE("Unable to initialize D3D11 driver"); + return; + } // Initial vsync kick. DispatchInterruptCallback(0); } void D3D11GraphicsSystem::Pump() { + SCOPE_profile_cpu_f("gpu"); + if (swap_pending_) { swap_pending_ = false; diff --git a/src/xenia/gpu/d3d11/d3d11_resource_cache.cc b/src/xenia/gpu/d3d11/d3d11_resource_cache.cc new file mode 100644 index 000000000..145e3d395 --- /dev/null +++ b/src/xenia/gpu/d3d11/d3d11_resource_cache.cc @@ -0,0 +1,71 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2014 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include + +#include +#include +#include +#include +#include + + +using namespace xe; +using namespace xe::gpu; +using namespace xe::gpu::d3d11; + + +D3D11ResourceCache::D3D11ResourceCache(Memory* memory, + ID3D11Device* device, + ID3D11DeviceContext* context) + : ResourceCache(memory), + device_(device), context_(context) { + device_->AddRef(); + context_->AddRef(); +} + +D3D11ResourceCache::~D3D11ResourceCache() { + XESAFERELEASE(device_); + XESAFERELEASE(context_); +} + +VertexShaderResource* D3D11ResourceCache::CreateVertexShader( + const MemoryRange& memory_range, + const VertexShaderResource::Info& info) { + return new D3D11VertexShaderResource(this, memory_range, info); +} + +PixelShaderResource* D3D11ResourceCache::CreatePixelShader( + const MemoryRange& memory_range, + const PixelShaderResource::Info& info) { + return new D3D11PixelShaderResource(this, memory_range, info); +} + +TextureResource* D3D11ResourceCache::CreateTexture( + const MemoryRange& memory_range, + const TextureResource::Info& info) { + return new D3D11TextureResource(this, memory_range, info); +} + +SamplerStateResource* D3D11ResourceCache::CreateSamplerState( + const SamplerStateResource::Info& info) { + return new D3D11SamplerStateResource(this, info); +} + +IndexBufferResource* D3D11ResourceCache::CreateIndexBuffer( + const MemoryRange& memory_range, + const IndexBufferResource::Info& info) { + return new D3D11IndexBufferResource(this, memory_range, info); +} + +VertexBufferResource* D3D11ResourceCache::CreateVertexBuffer( + const MemoryRange& memory_range, + const VertexBufferResource::Info& info) { + return new D3D11VertexBufferResource(this, memory_range, info); +} diff --git a/src/xenia/gpu/d3d11/d3d11_resource_cache.h b/src/xenia/gpu/d3d11/d3d11_resource_cache.h new file mode 100644 index 000000000..27248eb9c --- /dev/null +++ b/src/xenia/gpu/d3d11/d3d11_resource_cache.h @@ -0,0 +1,64 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2014 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_D3D11_D3D11_RESOURCE_CACHE_H_ +#define XENIA_GPU_D3D11_D3D11_RESOURCE_CACHE_H_ + +#include + +#include + +#include + + +namespace xe { +namespace gpu { +namespace d3d11 { + + +class D3D11ResourceCache : public ResourceCache { +public: + D3D11ResourceCache(Memory* memory, + ID3D11Device* device, ID3D11DeviceContext* context); + virtual ~D3D11ResourceCache(); + + ID3D11Device* device() const { return device_; } + ID3D11DeviceContext* context() const { return context_; } + +protected: + VertexShaderResource* CreateVertexShader( + const MemoryRange& memory_range, + const VertexShaderResource::Info& info) override; + PixelShaderResource* CreatePixelShader( + const MemoryRange& memory_range, + const PixelShaderResource::Info& info) override; + TextureResource* CreateTexture( + const MemoryRange& memory_range, + const TextureResource::Info& info) override; + SamplerStateResource* CreateSamplerState( + const SamplerStateResource::Info& info) override; + IndexBufferResource* CreateIndexBuffer( + const MemoryRange& memory_range, + const IndexBufferResource::Info& info) override; + VertexBufferResource* CreateVertexBuffer( + const MemoryRange& memory_range, + const VertexBufferResource::Info& info) override; + +private: + ID3D11Device* device_; + ID3D11DeviceContext* context_; +}; + + +} // namespace d3d11 +} // namespace gpu +} // namespace xe + + +#endif // XENIA_GPU_D3D11_D3D11_RESOURCE_CACHE_H_ diff --git a/src/xenia/gpu/d3d11/d3d11_texture_cache.cc b/src/xenia/gpu/d3d11/d3d11_sampler_state_resource.cc similarity index 51% rename from src/xenia/gpu/d3d11/d3d11_texture_cache.cc rename to src/xenia/gpu/d3d11/d3d11_sampler_state_resource.cc index eb3442bfc..7fb09858a 100644 --- a/src/xenia/gpu/d3d11/d3d11_texture_cache.cc +++ b/src/xenia/gpu/d3d11/d3d11_sampler_state_resource.cc @@ -7,53 +7,36 @@ ****************************************************************************** */ -#include +#include -#include +#include +using namespace std; using namespace xe; using namespace xe::gpu; using namespace xe::gpu::d3d11; +using namespace xe::gpu::xenos; -D3D11TextureCache::D3D11TextureCache( - Memory* memory, - ID3D11DeviceContext* context, ID3D11Device* device) - : TextureCache(memory), - context_(context), device_(device) { - context_->AddRef(); - device_->AddRef(); +D3D11SamplerStateResource::D3D11SamplerStateResource( + D3D11ResourceCache* resource_cache, const Info& info) + : SamplerStateResource(info), + resource_cache_(resource_cache), + handle_(nullptr) { } -D3D11TextureCache::~D3D11TextureCache() { - for (auto it = samplers_.begin(); it != samplers_.end(); ++it) { - auto& cached_state = it->second; - XESAFERELEASE(cached_state.state); +D3D11SamplerStateResource::~D3D11SamplerStateResource() { + XESAFERELEASE(handle_); +} + +int D3D11SamplerStateResource::Prepare() { + if (handle_) { + return 0; } - samplers_.clear(); - XESAFERELEASE(device_); - XESAFERELEASE(context_); -} - -Texture* D3D11TextureCache::CreateTexture( - uint32_t address, const uint8_t* host_address, - const xenos::xe_gpu_texture_fetch_t& fetch) { - return new D3D11Texture(this, address, host_address); -} - -ID3D11SamplerState* D3D11TextureCache::GetSamplerState( - const xenos::xe_gpu_texture_fetch_t& fetch, - const Shader::tex_buffer_desc_t& desc) { D3D11_SAMPLER_DESC sampler_desc; xe_zero_struct(&sampler_desc, sizeof(sampler_desc)); - uint32_t min_filter = desc.tex_fetch.min_filter == 3 ? - fetch.min_filter : desc.tex_fetch.min_filter; - uint32_t mag_filter = desc.tex_fetch.mag_filter == 3 ? - fetch.mag_filter : desc.tex_fetch.mag_filter; - uint32_t mip_filter = desc.tex_fetch.mip_filter == 3 ? - fetch.mip_filter : desc.tex_fetch.mip_filter; // MIN, MAG, MIP static const D3D11_FILTER filter_matrix[2][2][3] = { { @@ -87,7 +70,8 @@ ID3D11SamplerState* D3D11TextureCache::GetSamplerState( }, }, }; - sampler_desc.Filter = filter_matrix[min_filter][mag_filter][mip_filter]; + sampler_desc.Filter = + filter_matrix[info_.min_filter][info_.mag_filter][info_.mip_filter]; static const D3D11_TEXTURE_ADDRESS_MODE mode_map[] = { D3D11_TEXTURE_ADDRESS_WRAP, D3D11_TEXTURE_ADDRESS_MIRROR, @@ -98,9 +82,9 @@ ID3D11SamplerState* D3D11TextureCache::GetSamplerState( D3D11_TEXTURE_ADDRESS_BORDER, // ? D3D11_TEXTURE_ADDRESS_MIRROR, // ? }; - sampler_desc.AddressU = mode_map[fetch.clamp_x]; - sampler_desc.AddressV = mode_map[fetch.clamp_y]; - sampler_desc.AddressW = mode_map[fetch.clamp_z]; + sampler_desc.AddressU = mode_map[info_.clamp_u]; + sampler_desc.AddressV = mode_map[info_.clamp_v]; + sampler_desc.AddressW = mode_map[info_.clamp_w]; sampler_desc.MipLODBias; sampler_desc.MaxAnisotropy = 1; sampler_desc.ComparisonFunc = D3D11_COMPARISON_ALWAYS; @@ -111,29 +95,12 @@ ID3D11SamplerState* D3D11TextureCache::GetSamplerState( sampler_desc.MinLOD; sampler_desc.MaxLOD; - // TODO(benvanik): do this earlier without having to setup the whole struct? - size_t hash = hash_combine( - sampler_desc.Filter, - sampler_desc.AddressU, - sampler_desc.AddressV, - sampler_desc.AddressW); - auto range = samplers_.equal_range(hash); - for (auto it = range.first; it != range.second; ++it) { - const auto& cached_state = it->second; - // TODO(benvanik): faster compare? - if (memcmp(&sampler_desc, &cached_state.desc, sizeof(sampler_desc)) == 0) { - return cached_state.state; - } - } - - ID3D11SamplerState* sampler_state = NULL; - HRESULT hr = device_->CreateSamplerState(&sampler_desc, &sampler_state); + HRESULT hr = resource_cache_->device()->CreateSamplerState( + &sampler_desc, &handle_); if (FAILED(hr)) { XELOGE("D3D11: unable to create sampler state"); - return nullptr; + return 1; } - samplers_.insert({ hash, { sampler_desc, sampler_state } }); - - return sampler_state; + return 0; } diff --git a/src/xenia/gpu/d3d11/d3d11_sampler_state_resource.h b/src/xenia/gpu/d3d11/d3d11_sampler_state_resource.h new file mode 100644 index 000000000..6097339b4 --- /dev/null +++ b/src/xenia/gpu/d3d11/d3d11_sampler_state_resource.h @@ -0,0 +1,48 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2014 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_D3D11_D3D11_SAMPLER_STATE_RESOURCE_H_ +#define XENIA_GPU_D3D11_D3D11_SAMPLER_STATE_RESOURCE_H_ + +#include +#include +#include + +#include + + +namespace xe { +namespace gpu { +namespace d3d11 { + +class D3D11ResourceCache; + + +class D3D11SamplerStateResource : public SamplerStateResource { +public: + D3D11SamplerStateResource(D3D11ResourceCache* resource_cache, + const Info& info); + ~D3D11SamplerStateResource() override; + + void* handle() const override { return handle_; } + + int Prepare() override; + +protected: + D3D11ResourceCache* resource_cache_; + ID3D11SamplerState* handle_; +}; + + +} // namespace d3d11 +} // namespace gpu +} // namespace xe + + +#endif // XENIA_GPU_D3D11_D3D11_SAMPLER_STATE_RESOURCE_H_ diff --git a/src/xenia/gpu/d3d11/d3d11_shader.cc b/src/xenia/gpu/d3d11/d3d11_shader.cc deleted file mode 100644 index 97e0cb295..000000000 --- a/src/xenia/gpu/d3d11/d3d11_shader.cc +++ /dev/null @@ -1,2059 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2013 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include - -#include -#include -#include - -#include - - -using namespace xe; -using namespace xe::gpu; -using namespace xe::gpu::d3d11; -using namespace xe::gpu::xenos; - - -namespace { - -const int OUTPUT_CAPACITY = 64 * 1024; - -int GetFormatComponentCount(uint32_t format) { - switch (format) { - case FMT_32: - case FMT_32_FLOAT: - return 1; - case FMT_16_16: - case FMT_16_16_FLOAT: - case FMT_32_32: - case FMT_32_32_FLOAT: - return 2; - case FMT_10_11_11: - case FMT_11_11_10: - case FMT_32_32_32_FLOAT: - return 3; - case FMT_8_8_8_8: - case FMT_2_10_10_10: - case FMT_16_16_16_16: - case FMT_16_16_16_16_FLOAT: - case FMT_32_32_32_32: - case FMT_32_32_32_32_FLOAT: - return 4; - default: - XELOGE("Unknown vertex format: %d", format); - XEASSERTALWAYS(); - return 4; - } -} - -const char* GetFormatTypeName( - uint32_t format, uint32_t format_comp_all, uint32_t num_format_all) { - switch (format) { - case FMT_32: - return format_comp_all ? "int" : "uint"; - case FMT_32_FLOAT: - return "float"; - case FMT_16_16: - case FMT_32_32: - if (!num_format_all) { - return format_comp_all ? "snorm float2" : "unorm float2"; - } else { - return format_comp_all ? "int2" : "uint2"; - } - case FMT_16_16_FLOAT: - case FMT_32_32_FLOAT: - return "float2"; - case FMT_10_11_11: - case FMT_11_11_10: - return "int3"; // ? - case FMT_32_32_32_FLOAT: - return "float3"; - case FMT_8_8_8_8: - case FMT_2_10_10_10: - case FMT_16_16_16_16: - case FMT_32_32_32_32: - if (!num_format_all) { - return format_comp_all ? "snorm float4" : "unorm float4"; - } else { - return format_comp_all ? "int4" : "uint4"; - } - case FMT_16_16_16_16_FLOAT: - case FMT_32_32_32_32_FLOAT: - return "float4"; - default: - XELOGE("Unknown vertex format: %d", format); - XEASSERTALWAYS(); - return "float4"; - } -} - -} // anonymous namespace - - -struct xe::gpu::d3d11::Output { - char buffer[OUTPUT_CAPACITY]; - size_t capacity; - size_t offset; - Output() : - capacity(OUTPUT_CAPACITY), - offset(0) { - buffer[0] = 0; - } - void append(const char* format, ...) { - va_list args; - va_start(args, format); - int len = xevsnprintfa( - buffer + offset, capacity - offset, format, args); - va_end(args); - offset += len; - buffer[offset] = 0; - } -}; - - -D3D11Shader::D3D11Shader( - ID3D11Device* device, - XE_GPU_SHADER_TYPE type, - const uint8_t* src_ptr, size_t length, - uint64_t hash) : - translated_src_(NULL), - Shader(type, src_ptr, length, hash) { - device_ = device; - device_->AddRef(); -} - -D3D11Shader::~D3D11Shader() { - if (translated_src_) { - xe_free(translated_src_); - } - XESAFERELEASE(device_); -} - -void D3D11Shader::set_translated_src(char* value) { - if (translated_src_) { - xe_free(translated_src_); - } - translated_src_ = xestrdupa(value); -} - -ID3D10Blob* D3D11Shader::Compile(const char* shader_source) { - SCOPE_profile_cpu_f("gpu"); - - // TODO(benvanik): pick shared runtime mode defines. - D3D10_SHADER_MACRO defines[] = { - "TEST_DEFINE", "1", - 0, 0, - }; - - uint32_t flags1 = 0; - flags1 |= D3D10_SHADER_DEBUG; - flags1 |= D3D10_SHADER_ENABLE_STRICTNESS; - uint32_t flags2 = 0; - - // Create a name. - const char* base_path = ""; - if (FLAGS_dump_shaders.size()) { - base_path = FLAGS_dump_shaders.c_str(); - } - char file_name[XE_MAX_PATH]; - xesnprintfa(file_name, XECOUNT(file_name), - "%s/gen_%.16llX.%s", - base_path, - hash_, - type_ == XE_GPU_SHADER_TYPE_VERTEX ? "vs" : "ps"); - - if (FLAGS_dump_shaders.size()) { - FILE* f = fopen(file_name, "w"); - fprintf(f, shader_source); - fprintf(f, "\n\n"); - fprintf(f, "/*\n"); - fprintf(f, disasm_src_); - fprintf(f, " */\n"); - fclose(f); - } - - // Compile shader to bytecode blob. - ID3D10Blob* shader_blob = 0; - ID3D10Blob* error_blob = 0; - HRESULT hr = D3DCompile( - shader_source, strlen(shader_source), - file_name, - defines, NULL, - "main", - type_ == XE_GPU_SHADER_TYPE_VERTEX ? - "vs_5_0" : "ps_5_0", - flags1, flags2, - &shader_blob, &error_blob); - if (error_blob) { - char* msg = (char*)error_blob->GetBufferPointer(); - XELOGE("D3D11: shader compile failed with %s", msg); - } - XESAFERELEASE(error_blob); - if (FAILED(hr)) { - return NULL; - } - return shader_blob; -} - -void D3D11Shader::AppendTextureHeader(Output* output) { - bool fetch_setup[32] = { false }; - - // 1 texture per constant slot, 1 sampler per fetch. - for (uint32_t n = 0; n < tex_buffer_inputs_.count; n++) { - auto& input = tex_buffer_inputs_.descs[n]; - auto& fetch = input.tex_fetch; - - // Add texture, if needed. - if (!fetch_setup[fetch.const_idx]) { - fetch_setup[fetch.const_idx] = true; - const char* texture_type = NULL; - switch (fetch.dimension) { - case DIMENSION_1D: - texture_type = "Texture1D"; - break; - default: - case DIMENSION_2D: - texture_type = "Texture2D"; - break; - case DIMENSION_3D: - texture_type = "Texture3D"; - break; - case DIMENSION_CUBE: - texture_type = "TextureCube"; - break; - } - output->append("%s x_texture_%d;\n", texture_type, fetch.const_idx); - } - - // Add sampler. - output->append("SamplerState x_sampler_%d;\n", n); - } -} - - -D3D11VertexShader::D3D11VertexShader( - ID3D11Device* device, - const uint8_t* src_ptr, size_t length, - uint64_t hash) : - handle_(0), input_layout_(0), - D3D11Shader(device, XE_GPU_SHADER_TYPE_VERTEX, - src_ptr, length, hash) { - xe_zero_struct(geometry_shaders_, sizeof(geometry_shaders_)); -} - -D3D11VertexShader::~D3D11VertexShader() { - for (size_t n = 0; n < XECOUNT(geometry_shaders_); n++) { - delete geometry_shaders_[n]; - } - XESAFERELEASE(input_layout_); - XESAFERELEASE(handle_); -} - -int D3D11VertexShader::Prepare(xe_gpu_program_cntl_t* program_cntl) { - SCOPE_profile_cpu_f("gpu"); - if (handle_) { - return 0; - } - - // TODO(benvanik): look in file based on hash/etc. - void* byte_code = NULL; - size_t byte_code_length = 0; - - // Translate and compile source. - const char* shader_source = Translate(program_cntl); - if (!shader_source) { - return 1; - } - ID3D10Blob* shader_blob = Compile(shader_source); - if (!shader_blob) { - return 1; - } - byte_code_length = shader_blob->GetBufferSize(); - byte_code = xe_malloc(byte_code_length); - xe_copy_struct( - byte_code, shader_blob->GetBufferPointer(), byte_code_length); - XESAFERELEASE(shader_blob); - - // Create shader. - HRESULT hr = device_->CreateVertexShader( - byte_code, byte_code_length, - NULL, - &handle_); - if (FAILED(hr)) { - XELOGE("D3D11: failed to create vertex shader"); - xe_free(byte_code); - return 1; - } - - // Create input layout. - size_t element_count = 0; - for (uint32_t n = 0; n < vtx_buffer_inputs_.count; n++) { - element_count += vtx_buffer_inputs_.descs[n].element_count; - } - if (!element_count) { - XELOGW("D3D11: vertex shader with zero inputs -- retaining previous values?"); - input_layout_ = NULL; - return 0; - } - - D3D11_INPUT_ELEMENT_DESC* element_descs = - (D3D11_INPUT_ELEMENT_DESC*)xe_alloca( - sizeof(D3D11_INPUT_ELEMENT_DESC) * element_count); - uint32_t el_index = 0; - for (uint32_t n = 0; n < vtx_buffer_inputs_.count; n++) { - auto& input = vtx_buffer_inputs_.descs[n]; - for (uint32_t m = 0; m < input.element_count; m++) { - auto& el = input.elements[m]; - uint32_t vb_slot = input.input_index; - uint32_t num_format_all = el.vtx_fetch.num_format_all; - uint32_t format_comp_all = el.vtx_fetch.format_comp_all; - DXGI_FORMAT vtx_format; - switch (el.format) { - case FMT_8_8_8_8: - if (!num_format_all) { - vtx_format = format_comp_all ? - DXGI_FORMAT_R8G8B8A8_SNORM : DXGI_FORMAT_R8G8B8A8_UNORM; - } else { - vtx_format = format_comp_all ? - DXGI_FORMAT_R8G8B8A8_SINT : DXGI_FORMAT_R8G8B8A8_UINT; - } - break; - case FMT_2_10_10_10: - if (!num_format_all) { - vtx_format = DXGI_FORMAT_R10G10B10A2_UNORM; - } else { - vtx_format = DXGI_FORMAT_R10G10B10A2_UINT; - } - break; - // DXGI_FORMAT_R11G11B10_FLOAT? - case FMT_16_16: - if (!num_format_all) { - vtx_format = format_comp_all ? - DXGI_FORMAT_R16G16_SNORM : DXGI_FORMAT_R16G16_UNORM; - } else { - vtx_format = format_comp_all ? - DXGI_FORMAT_R16G16_SINT : DXGI_FORMAT_R16G16_UINT; - } - break; - case FMT_16_16_16_16: - if (!num_format_all) { - vtx_format = format_comp_all ? - DXGI_FORMAT_R16G16B16A16_SNORM : DXGI_FORMAT_R16G16B16A16_UNORM; - } else { - vtx_format = format_comp_all ? - DXGI_FORMAT_R16G16B16A16_SINT : DXGI_FORMAT_R16G16B16A16_UINT; - } - break; - case FMT_16_16_FLOAT: - vtx_format = DXGI_FORMAT_R16G16_FLOAT; - break; - case FMT_16_16_16_16_FLOAT: - vtx_format = DXGI_FORMAT_R16G16B16A16_FLOAT; - break; - case FMT_32: - vtx_format = format_comp_all ? - DXGI_FORMAT_R32_SINT : DXGI_FORMAT_R32_UINT; - break; - case FMT_32_32: - vtx_format = format_comp_all ? - DXGI_FORMAT_R32G32_SINT : DXGI_FORMAT_R32G32_UINT; - break; - case FMT_32_32_32_32: - vtx_format = format_comp_all ? - DXGI_FORMAT_R32G32B32A32_SINT : DXGI_FORMAT_R32G32B32A32_UINT; - break; - case FMT_32_FLOAT: - vtx_format = DXGI_FORMAT_R32_FLOAT; - break; - case FMT_32_32_FLOAT: - vtx_format = DXGI_FORMAT_R32G32_FLOAT; - break; - case FMT_32_32_32_FLOAT: - vtx_format = DXGI_FORMAT_R32G32B32_FLOAT; - break; - case FMT_32_32_32_32_FLOAT: - vtx_format = DXGI_FORMAT_R32G32B32A32_FLOAT; - break; - default: - XEASSERTALWAYS(); - break; - } - element_descs[el_index].SemanticName = "XE_VF"; - element_descs[el_index].SemanticIndex = el_index; - element_descs[el_index].Format = vtx_format; - element_descs[el_index].InputSlot = vb_slot; - element_descs[el_index].AlignedByteOffset = el.offset_words * 4; - element_descs[el_index].InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA; - element_descs[el_index].InstanceDataStepRate = 0; - el_index++; - } - } - hr = device_->CreateInputLayout( - element_descs, - (UINT)element_count, - byte_code, byte_code_length, - &input_layout_); - if (FAILED(hr)) { - XELOGE("D3D11: failed to create vertex shader input layout"); - xe_free(byte_code); - return 1; - } - - xe_free(byte_code); - - is_prepared_ = true; - return 0; -} - -const char* D3D11VertexShader::Translate(xe_gpu_program_cntl_t* program_cntl) { - SCOPE_profile_cpu_f("gpu"); - - Output* output = new Output(); - xe_gpu_translate_ctx_t ctx; - ctx.output = output; - ctx.type = type_; - ctx.tex_fetch_index = 0; - - // Add constants buffers. - // We could optimize this by only including used buffers, but the compiler - // seems to do a good job of doing this for us. - // It also does read detection, so c[512] can end up c[4] in the asm - - // instead of doing this optimization ourselves we could maybe just query - // this from the compiler. - output->append( - "cbuffer float_consts : register(b0) {\n" - " float4 c[512];\n" - "};\n"); - // TODO(benvanik): add bool/loop constants. - - AppendTextureHeader(output); - - // Transform utilities. We adjust the output position in various ways - // as we can't do this via D3D11 APIs. - output->append( - "cbuffer vs_consts : register(b3) {\n" - " float4 window;\n" // x,y,w,h - " float4 viewport_z_enable;\n" // min,(max - min),?,enabled - " float4 viewport_size;\n" // x,y,w,h - "};" - "float4 applyViewport(float4 pos) {\n" - " if (viewport_z_enable.w) {\n" - //" pos.x = (pos.x + 1) * viewport_size.z * 0.5 + viewport_size.x;\n" - //" pos.y = (1 - pos.y) * viewport_size.w * 0.5 + viewport_size.y;\n" - //" pos.z = viewport_z_enable.x + pos.z * viewport_z_enable.y;\n" - // w? - " } else {\n" - " pos.xy = pos.xy / float2(window.z / 2.0, -window.w / 2.0) + float2(-1.0, 1.0);\n" - " pos.zw = float2(0.0, 1.0);\n" - " }\n" - " pos.xy += window.xy;\n" - " return pos;\n" - "}\n"); - - // Add vertex shader input. - output->append( - "struct VS_INPUT {\n"); - uint32_t el_index = 0; - for (uint32_t n = 0; n < vtx_buffer_inputs_.count; n++) { - auto& input = vtx_buffer_inputs_.descs[n]; - for (uint32_t m = 0; m < input.element_count; m++) { - auto& el = input.elements[m]; - auto& vtx = el.vtx_fetch; - const char* type_name = GetFormatTypeName( - el.format, el.vtx_fetch.format_comp_all, el.vtx_fetch.num_format_all); - uint32_t fetch_slot = vtx.const_index * 3 + vtx.const_index_sel; - output->append( - " %s vf%u_%d : XE_VF%u;\n", - type_name, fetch_slot, vtx.offset, el_index); - el_index++; - } - } - output->append( - "};\n"); - - // Add vertex shader output (pixel shader input). - output->append( - "struct VS_OUTPUT {\n"); - if (alloc_counts_.positions) { - XEASSERT(alloc_counts_.positions == 1); - output->append( - " float4 oPos : SV_POSITION;\n"); - } - if (alloc_counts_.params) { - output->append( - " float4 o[%d] : XE_O;\n", - MAX_INTERPOLATORS); - } - if (alloc_counts_.point_size) { - output->append( - " float4 oPointSize : PSIZE;\n"); - } - output->append( - "};\n"); - - // Vertex shader main() header. - output->append( - "VS_OUTPUT main(VS_INPUT i) {\n" - " VS_OUTPUT o;\n"); - - // Always write position, as some shaders seem to only write certain values. - output->append( - " o.oPos = float4(0.0, 0.0, 0.0, 0.0);\n"); - if (alloc_counts_.point_size) { - output->append( - " o.oPointSize = float4(1.0, 0.0, 0.0, 0.0);\n"); - } - - // TODO(benvanik): remove this, if possible (though the compiler may be smart - // enough to do it for us). - if (alloc_counts_.params) { - for (uint32_t n = 0; n < MAX_INTERPOLATORS; n++) { - output->append( - " o.o[%d] = float4(0.0, 0.0, 0.0, 0.0);\n", n); - } - } - - // Add temporaries for any registers we may use. - uint32_t temp_regs = program_cntl->vs_regs + program_cntl->ps_regs; - for (uint32_t n = 0; n <= temp_regs; n++) { - output->append( - " float4 r%d = c[%d];\n", n, n); - } - output->append(" float4 t;\n"); - - // Execute blocks. - for (std::vector::iterator it = execs_.begin(); - it != execs_.end(); ++it) { - instr_cf_exec_t& cf = *it; - // TODO(benvanik): figure out how sequences/jmps/loops/etc work. - if (TranslateExec(ctx, cf)) { - delete output; - return NULL; - } - } - - // main footer. - output->append( - " o.oPos = applyViewport(o.oPos);\n" - " return o;\n" - "};\n"); - - set_translated_src(output->buffer); - delete output; - return translated_src_; -} - -int D3D11VertexShader::DemandGeometryShader(GeometryShaderType type, - D3D11GeometryShader** out_shader) { - if (geometry_shaders_[type]) { - *out_shader = geometry_shaders_[type]; - return 0; - } - - // Demand generate. - D3D11GeometryShader* shader = NULL; - switch (type) { - case POINT_SPRITE_SHADER: - shader = new D3D11PointSpriteGeometryShader(device_, hash_); - break; - case RECT_LIST_SHADER: - shader = new D3D11RectListGeometryShader(device_, hash_); - break; - case QUAD_LIST_SHADER: - shader = new D3D11QuadListGeometryShader(device_, hash_); - break; - default: - XEASSERTALWAYS(); - return 1; - } - if (!shader) { - return 1; - } - - if (shader->Prepare(this)) { - delete shader; - return 1; - } - - geometry_shaders_[type] = shader; - *out_shader = geometry_shaders_[type]; - return 0; -} - - -D3D11PixelShader::D3D11PixelShader( - ID3D11Device* device, - const uint8_t* src_ptr, size_t length, - uint64_t hash) : - handle_(0), - D3D11Shader(device, XE_GPU_SHADER_TYPE_PIXEL, - src_ptr, length, hash) { -} - -D3D11PixelShader::~D3D11PixelShader() { - XESAFERELEASE(handle_); -} - -int D3D11PixelShader::Prepare(xe_gpu_program_cntl_t* program_cntl, - D3D11VertexShader* input_shader) { - SCOPE_profile_cpu_f("gpu"); - if (handle_) { - return 0; - } - - // TODO(benvanik): look in file based on hash/etc. - void* byte_code = NULL; - size_t byte_code_length = 0; - - // Translate and compile source. - const char* shader_source = Translate(program_cntl, input_shader); - if (!shader_source) { - return 1; - } - ID3D10Blob* shader_blob = Compile(shader_source); - if (!shader_blob) { - return 1; - } - byte_code_length = shader_blob->GetBufferSize(); - byte_code = xe_malloc(byte_code_length); - xe_copy_struct( - byte_code, shader_blob->GetBufferPointer(), byte_code_length); - XESAFERELEASE(shader_blob); - - // Create shader. - HRESULT hr = device_->CreatePixelShader( - byte_code, byte_code_length, - NULL, - &handle_); - if (FAILED(hr)) { - XELOGE("D3D11: failed to create pixel shader"); - xe_free(byte_code); - return 1; - } - - xe_free(byte_code); - - is_prepared_ = true; - return 0; -} - -const char* D3D11PixelShader::Translate( - xe_gpu_program_cntl_t* program_cntl, D3D11VertexShader* input_shader) { - SCOPE_profile_cpu_f("gpu"); - Output* output = new Output(); - xe_gpu_translate_ctx_t ctx; - ctx.output = output; - ctx.type = type_; - ctx.tex_fetch_index = 0; - - // We need an input VS to make decisions here. - // TODO(benvanik): do we need to pair VS/PS up and store the combination? - // If the same PS is used with different VS that output different amounts - // (and less than the number of required registers), things may die. - XEASSERTNOTNULL(input_shader); - const Shader::alloc_counts_t& input_alloc_counts = - input_shader->alloc_counts(); - - // Add constants buffers. - // We could optimize this by only including used buffers, but the compiler - // seems to do a good job of doing this for us. - // It also does read detection, so c[512] can end up c[4] in the asm - - // instead of doing this optimization ourselves we could maybe just query - // this from the compiler. - output->append( - "cbuffer float_consts : register(b0) {\n" - " float4 c[512];\n" - "};\n"); - // TODO(benvanik): add bool/loop constants. - - AppendTextureHeader(output); - - // Add vertex shader output (pixel shader input). - output->append( - "struct VS_OUTPUT {\n"); - if (input_alloc_counts.positions) { - XEASSERT(input_alloc_counts.positions == 1); - output->append( - " float4 oPos : SV_POSITION;\n"); - } - if (input_alloc_counts.params) { - output->append( - " float4 o[%d] : XE_O;\n", - MAX_INTERPOLATORS); - } - output->append( - "};\n"); - - // Add pixel shader output. - output->append( - "struct PS_OUTPUT {\n"); - for (uint32_t n = 0; n < alloc_counts_.params; n++) { - output->append( - " float4 oC%d : SV_TARGET%d;\n", n, n); - if (program_cntl->ps_export_depth) { - // Is this per render-target? - output->append( - " float oD%d : SV_DEPTH%d;\n", n, n); - } - } - output->append( - "};\n"); - - // Pixel shader main() header. - output->append( - "PS_OUTPUT main(VS_OUTPUT i) {\n" - " PS_OUTPUT o;\n"); - - // Add temporary registers. - uint32_t temp_regs = program_cntl->vs_regs + program_cntl->ps_regs; - for (uint32_t n = 0; n <= MAX(15, temp_regs); n++) { - output->append( - " float4 r%d = c[%d];\n", n, n); - } - output->append(" float4 t;\n"); - - // Bring registers local. - if (input_alloc_counts.params) { - for (uint32_t n = 0; n < MAX_INTERPOLATORS; n++) { - output->append( - " r%d = i.o[%d];\n", n, n); - } - } - - // Execute blocks. - for (std::vector::iterator it = execs_.begin(); - it != execs_.end(); ++it) { - instr_cf_exec_t& cf = *it; - // TODO(benvanik): figure out how sequences/jmps/loops/etc work. - if (TranslateExec(ctx, cf)) { - delete output; - return NULL; - } - } - - // main footer. - output->append( - " return o;\n" - "}\n"); - - set_translated_src(output->buffer); - delete output; - return translated_src_; -} - - -namespace { - -static const char chan_names[] = { - 'x', 'y', 'z', 'w', - // these only apply to FETCH dst's, and we shouldn't be using them: - '0', '1', '?', '_', -}; - -void AppendSrcReg( - xe_gpu_translate_ctx_t& ctx, - uint32_t num, uint32_t type, - uint32_t swiz, uint32_t negate, uint32_t abs) { - if (negate) { - ctx.output->append("-"); - } - if (abs) { - ctx.output->append("abs("); - } - if (type) { - // Register. - ctx.output->append("r%u", num); - } else { - // Constant. - ctx.output->append("c[%u]", num); - } - if (swiz) { - ctx.output->append("."); - for (int i = 0; i < 4; i++) { - ctx.output->append("%c", chan_names[(swiz + i) & 0x3]); - swiz >>= 2; - } - } - if (abs) { - ctx.output->append(")"); - } -} - -void AppendDestRegName( - xe_gpu_translate_ctx_t& ctx, - uint32_t num, uint32_t dst_exp) { - if (!dst_exp) { - // Register. - ctx.output->append("r%u", num); - } else { - // Export. - switch (ctx.type) { - case XE_GPU_SHADER_TYPE_VERTEX: - switch (num) { - case 62: - ctx.output->append("o.oPos"); - break; - case 63: - ctx.output->append("o.oPointSize"); - break; - default: - // Varying. - ctx.output->append("o.o[%u]", num);; - break; - } - break; - case XE_GPU_SHADER_TYPE_PIXEL: - switch (num) { - case 0: - ctx.output->append("o.oC0"); - break; - default: - // TODO(benvanik): other render targets? - // TODO(benvanik): depth? - XEASSERTALWAYS(); - break; - } - break; - } - } -} - -void AppendDestReg( - xe_gpu_translate_ctx_t& ctx, - uint32_t num, uint32_t mask, uint32_t dst_exp) { - if (mask != 0xF) { - // If masking, store to a temporary variable and clean it up later. - ctx.output->append("t"); - } else { - // Store directly to output. - AppendDestRegName(ctx, num, dst_exp); - } -} - -void AppendDestRegPost( - xe_gpu_translate_ctx_t& ctx, - uint32_t num, uint32_t mask, uint32_t dst_exp) { - if (mask != 0xF) { - // Masking. - ctx.output->append(" "); - AppendDestRegName(ctx, num, dst_exp); - ctx.output->append(" = float4("); - for (int i = 0; i < 4; i++) { - // TODO(benvanik): mask out values? mix in old value as temp? - // ctx.output->append("%c", (mask & 0x1) ? chan_names[i] : 'w'); - if (!(mask & 0x1)) { - AppendDestRegName(ctx, num, dst_exp); - } else { - ctx.output->append("t"); - } - ctx.output->append(".%c", chan_names[i]); - mask >>= 1; - if (i < 3) { - ctx.output->append(", "); - } - } - ctx.output->append(");\n"); - } -} - -void print_srcreg( - Output* output, - uint32_t num, uint32_t type, - uint32_t swiz, uint32_t negate, uint32_t abs) { - if (negate) { - output->append("-"); - } - if (abs) { - output->append("|"); - } - output->append("%c%u", type ? 'R' : 'C', num); - if (swiz) { - output->append("."); - for (int i = 0; i < 4; i++) { - output->append("%c", chan_names[(swiz + i) & 0x3]); - swiz >>= 2; - } - } - if (abs) { - output->append("|"); - } -} - -void print_dstreg( - Output* output, uint32_t num, uint32_t mask, uint32_t dst_exp) { - output->append("%s%u", dst_exp ? "export" : "R", num); - if (mask != 0xf) { - output->append("."); - for (int i = 0; i < 4; i++) { - output->append("%c", (mask & 0x1) ? chan_names[i] : '_'); - mask >>= 1; - } - } -} - -void print_export_comment( - Output* output, uint32_t num, XE_GPU_SHADER_TYPE type) { - const char *name = NULL; - switch (type) { - case XE_GPU_SHADER_TYPE_VERTEX: - switch (num) { - case 62: name = "gl_Position"; break; - case 63: name = "gl_PointSize"; break; - } - break; - case XE_GPU_SHADER_TYPE_PIXEL: - switch (num) { - case 0: name = "gl_FragColor"; break; - } - break; - } - /* if we had a symbol table here, we could look - * up the name of the varying.. - */ - if (name) { - output->append("\t; %s", name); - } -} - -int TranslateALU_ADDv( - xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) { - AppendDestReg(ctx, alu.vector_dest, alu.vector_write_mask, alu.export_data); - ctx.output->append(" = "); - if (alu.vector_clamp) { - ctx.output->append("saturate("); - } - ctx.output->append("("); - AppendSrcReg(ctx, alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); - ctx.output->append(" + "); - AppendSrcReg(ctx, alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs); - ctx.output->append(")"); - if (alu.vector_clamp) { - ctx.output->append(")"); - } - ctx.output->append(";\n"); - AppendDestRegPost(ctx, alu.vector_dest, alu.vector_write_mask, alu.export_data); - return 0; -} - -int TranslateALU_MULv( - xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) { - AppendDestReg(ctx, alu.vector_dest, alu.vector_write_mask, alu.export_data); - ctx.output->append(" = "); - if (alu.vector_clamp) { - ctx.output->append("saturate("); - } - ctx.output->append("("); - AppendSrcReg(ctx, alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); - ctx.output->append(" * "); - AppendSrcReg(ctx, alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs); - ctx.output->append(")"); - if (alu.vector_clamp) { - ctx.output->append(")"); - } - ctx.output->append(";\n"); - AppendDestRegPost(ctx, alu.vector_dest, alu.vector_write_mask, alu.export_data); - return 0; -} - -int TranslateALU_MAXv( - xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) { - AppendDestReg(ctx, alu.vector_dest, alu.vector_write_mask, alu.export_data); - ctx.output->append(" = "); - if (alu.vector_clamp) { - ctx.output->append("saturate("); - } - if (alu.src1_reg == alu.src2_reg && - alu.src1_sel == alu.src2_sel && - alu.src1_swiz == alu.src2_swiz && - alu.src1_reg_negate == alu.src2_reg_negate && - alu.src1_reg_abs == alu.src2_reg_abs) { - // This is a mov. - AppendSrcReg(ctx, alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); - } else { - ctx.output->append("max("); - AppendSrcReg(ctx, alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); - ctx.output->append(", "); - AppendSrcReg(ctx, alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs); - ctx.output->append(")"); - } - if (alu.vector_clamp) { - ctx.output->append(")"); - } - ctx.output->append(";\n"); - AppendDestRegPost(ctx, alu.vector_dest, alu.vector_write_mask, alu.export_data); - return 0; -} - -int TranslateALU_MINv( - xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) { - AppendDestReg(ctx, alu.vector_dest, alu.vector_write_mask, alu.export_data); - ctx.output->append(" = "); - if (alu.vector_clamp) { - ctx.output->append("saturate("); - } - ctx.output->append("min("); - AppendSrcReg(ctx, alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); - ctx.output->append(", "); - AppendSrcReg(ctx, alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs); - ctx.output->append(")"); - if (alu.vector_clamp) { - ctx.output->append(")"); - } - ctx.output->append(";\n"); - AppendDestRegPost(ctx, alu.vector_dest, alu.vector_write_mask, alu.export_data); - return 0; -} - -int TranslateALU_SETXXv( - xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu, const char* op) { - AppendDestReg(ctx, alu.vector_dest, alu.vector_write_mask, alu.export_data); - ctx.output->append(" = "); - if (alu.vector_clamp) { - ctx.output->append("saturate("); - } - ctx.output->append("float4(("); - AppendSrcReg(ctx, alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); - ctx.output->append(").x %s (", op); - AppendSrcReg(ctx, alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs); - ctx.output->append(").x ? 1.0 : 0.0, ("); - AppendSrcReg(ctx, alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); - ctx.output->append(").y %s (", op); - AppendSrcReg(ctx, alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs); - ctx.output->append(").y ? 1.0 : 0.0, ("); - AppendSrcReg(ctx, alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); - ctx.output->append(").z %s (", op); - AppendSrcReg(ctx, alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs); - ctx.output->append(").z ? 1.0 : 0.0, ("); - AppendSrcReg(ctx, alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); - ctx.output->append(").w %s (", op); - AppendSrcReg(ctx, alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs); - ctx.output->append(").w ? 1.0 : 0.0)"); - if (alu.vector_clamp) { - ctx.output->append(")"); - } - ctx.output->append(";\n"); - AppendDestRegPost(ctx, alu.vector_dest, alu.vector_write_mask, alu.export_data); - return 0; -} -int TranslateALU_SETEv( - xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) { - return TranslateALU_SETXXv(ctx, alu, "=="); -} -int TranslateALU_SETGTv( - xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) { - return TranslateALU_SETXXv(ctx, alu, ">"); -} -int TranslateALU_SETGTEv( - xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) { - return TranslateALU_SETXXv(ctx, alu, ">="); -} -int TranslateALU_SETNEv( - xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) { - return TranslateALU_SETXXv(ctx, alu, "!="); -} - -int TranslateALU_FRACv( - xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) { - AppendDestReg(ctx, alu.vector_dest, alu.vector_write_mask, alu.export_data); - ctx.output->append(" = "); - if (alu.vector_clamp) { - ctx.output->append("saturate("); - } - ctx.output->append("frac("); - AppendSrcReg(ctx, alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); - ctx.output->append(")"); - if (alu.vector_clamp) { - ctx.output->append(")"); - } - ctx.output->append(";\n"); - AppendDestRegPost(ctx, alu.vector_dest, alu.vector_write_mask, alu.export_data); - return 0; -} - -int TranslateALU_TRUNCv( - xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) { - AppendDestReg(ctx, alu.vector_dest, alu.vector_write_mask, alu.export_data); - ctx.output->append(" = "); - if (alu.vector_clamp) { - ctx.output->append("saturate("); - } - ctx.output->append("trunc("); - AppendSrcReg(ctx, alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); - ctx.output->append(")"); - if (alu.vector_clamp) { - ctx.output->append(")"); - } - ctx.output->append(";\n"); - AppendDestRegPost(ctx, alu.vector_dest, alu.vector_write_mask, alu.export_data); - return 0; -} - -int TranslateALU_FLOORv( - xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) { - AppendDestReg(ctx, alu.vector_dest, alu.vector_write_mask, alu.export_data); - ctx.output->append(" = "); - if (alu.vector_clamp) { - ctx.output->append("saturate("); - } - ctx.output->append("floor("); - AppendSrcReg(ctx, alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); - ctx.output->append(")"); - if (alu.vector_clamp) { - ctx.output->append(")"); - } - ctx.output->append(";\n"); - AppendDestRegPost(ctx, alu.vector_dest, alu.vector_write_mask, alu.export_data); - return 0; -} - -int TranslateALU_MULADDv( - xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) { - AppendDestReg(ctx, alu.vector_dest, alu.vector_write_mask, alu.export_data); - ctx.output->append(" = "); - if (alu.vector_clamp) { - ctx.output->append("saturate("); - } - ctx.output->append("mad("); - AppendSrcReg(ctx, alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); - ctx.output->append(", "); - AppendSrcReg(ctx, alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs); - ctx.output->append(", "); - AppendSrcReg(ctx, alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs); - ctx.output->append(")"); - if (alu.vector_clamp) { - ctx.output->append(")"); - } - ctx.output->append(";\n"); - AppendDestRegPost(ctx, alu.vector_dest, alu.vector_write_mask, alu.export_data); - return 0; -} - -int TranslateALU_CNDXXv( - xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu, const char* op) { - AppendDestReg(ctx, alu.vector_dest, alu.vector_write_mask, alu.export_data); - ctx.output->append(" = "); - if (alu.vector_clamp) { - ctx.output->append("saturate("); - } - // TODO(benvanik): check argument order - could be 3 as compare and 1 and 2 as values. - ctx.output->append("float4(("); - AppendSrcReg(ctx, alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); - ctx.output->append(").x %s 0.0 ? (", op); - AppendSrcReg(ctx, alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs); - ctx.output->append(").x : ("); - AppendSrcReg(ctx, alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs); - ctx.output->append(").x, ("); - AppendSrcReg(ctx, alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); - ctx.output->append(").y %s 0.0 ? (", op); - AppendSrcReg(ctx, alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs); - ctx.output->append(").y : ("); - AppendSrcReg(ctx, alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs); - ctx.output->append(").y, ("); - AppendSrcReg(ctx, alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); - ctx.output->append(").z %s 0.0 ? (", op); - AppendSrcReg(ctx, alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs); - ctx.output->append(").z : ("); - AppendSrcReg(ctx, alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs); - ctx.output->append(").z, ("); - AppendSrcReg(ctx, alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); - ctx.output->append(").w %s 0.0 ? (", op); - AppendSrcReg(ctx, alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs); - ctx.output->append(").w : ("); - AppendSrcReg(ctx, alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs); - ctx.output->append(").w)"); - if (alu.vector_clamp) { - ctx.output->append(")"); - } - ctx.output->append(";\n"); - AppendDestRegPost(ctx, alu.vector_dest, alu.vector_write_mask, alu.export_data); - return 0; -} -int TranslateALU_CNDEv( - xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) { - return TranslateALU_CNDXXv(ctx, alu, "=="); -} -int TranslateALU_CNDGTEv( - xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) { - return TranslateALU_CNDXXv(ctx, alu, ">="); -} -int TranslateALU_CNDGTv( - xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) { - return TranslateALU_CNDXXv(ctx, alu, ">"); -} - -int TranslateALU_DOT4v( - xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) { - AppendDestReg(ctx, alu.vector_dest, alu.vector_write_mask, alu.export_data); - ctx.output->append(" = "); - if (alu.vector_clamp) { - ctx.output->append("saturate("); - } - ctx.output->append("dot("); - AppendSrcReg(ctx, alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); - ctx.output->append(", "); - AppendSrcReg(ctx, alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs); - ctx.output->append(")"); - if (alu.vector_clamp) { - ctx.output->append(")"); - } - ctx.output->append(";\n"); - AppendDestRegPost(ctx, alu.vector_dest, alu.vector_write_mask, alu.export_data); - return 0; -} - -int TranslateALU_DOT3v( - xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) { - AppendDestReg(ctx, alu.vector_dest, alu.vector_write_mask, alu.export_data); - ctx.output->append(" = "); - if (alu.vector_clamp) { - ctx.output->append("saturate("); - } - ctx.output->append("dot(float4("); - AppendSrcReg(ctx, alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); - ctx.output->append(").xyz, float4("); - AppendSrcReg(ctx, alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs); - ctx.output->append(").xyz)"); - if (alu.vector_clamp) { - ctx.output->append(")"); - } - ctx.output->append(";\n"); - AppendDestRegPost(ctx, alu.vector_dest, alu.vector_write_mask, alu.export_data); - return 0; -} - -int TranslateALU_DOT2ADDv( - xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) { - AppendDestReg(ctx, alu.vector_dest, alu.vector_write_mask, alu.export_data); - ctx.output->append(" = "); - if (alu.vector_clamp) { - ctx.output->append("saturate("); - } - ctx.output->append("dot(float4("); - AppendSrcReg(ctx, alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); - ctx.output->append(").xy, float4("); - AppendSrcReg(ctx, alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs); - ctx.output->append(").xy) + "); - AppendSrcReg(ctx, alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs); - ctx.output->append(".x"); - if (alu.vector_clamp) { - ctx.output->append(")"); - } - ctx.output->append(";\n"); - AppendDestRegPost(ctx, alu.vector_dest, alu.vector_write_mask, alu.export_data); - return 0; -} - -// CUBEv - -int TranslateALU_MAX4v( - xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) { - AppendDestReg(ctx, alu.vector_dest, alu.vector_write_mask, alu.export_data); - ctx.output->append(" = "); - if (alu.vector_clamp) { - ctx.output->append("saturate("); - } - ctx.output->append("max("); - ctx.output->append("max("); - ctx.output->append("max("); - AppendSrcReg(ctx, alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); - ctx.output->append(".x, "); - AppendSrcReg(ctx, alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); - ctx.output->append(".y), "); - AppendSrcReg(ctx, alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); - ctx.output->append(".z), "); - AppendSrcReg(ctx, alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); - ctx.output->append(".w)"); - if (alu.vector_clamp) { - ctx.output->append(")"); - } - ctx.output->append(";\n"); - AppendDestRegPost(ctx, alu.vector_dest, alu.vector_write_mask, alu.export_data); - return 0; -} - -// ... - -int TranslateALU_MAXs( - xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) { - AppendDestReg(ctx, alu.scalar_dest, alu.scalar_write_mask, alu.export_data); - ctx.output->append(" = "); - if (alu.scalar_clamp) { - ctx.output->append("saturate("); - } - if ((alu.src3_swiz & 0x3) == (((alu.src3_swiz >> 2) + 1) & 0x3)) { - // This is a mov. - AppendSrcReg(ctx, alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs); - } else { - ctx.output->append("max("); - AppendSrcReg(ctx, alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs); - ctx.output->append(".x, "); - AppendSrcReg(ctx, alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs); - ctx.output->append(".y).xxxx"); - } - if (alu.scalar_clamp) { - ctx.output->append(")"); - } - ctx.output->append(";\n"); - AppendDestRegPost(ctx, alu.scalar_dest, alu.scalar_write_mask, alu.export_data); - return 0; -} - -int TranslateALU_MINs( - xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) { - AppendDestReg(ctx, alu.scalar_dest, alu.scalar_write_mask, alu.export_data); - ctx.output->append(" = "); - if (alu.scalar_clamp) { - ctx.output->append("saturate("); - } - ctx.output->append("min("); - AppendSrcReg(ctx, alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs); - ctx.output->append(".x, "); - AppendSrcReg(ctx, alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs); - ctx.output->append(".y).xxxx"); - if (alu.scalar_clamp) { - ctx.output->append(")"); - } - ctx.output->append(";\n"); - AppendDestRegPost(ctx, alu.scalar_dest, alu.scalar_write_mask, alu.export_data); - return 0; -} - -int TranslateALU_SETXXs( - xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu, const char* op) { - AppendDestReg(ctx, alu.scalar_dest, alu.scalar_write_mask, alu.export_data); - ctx.output->append(" = "); - if (alu.scalar_clamp) { - ctx.output->append("saturate("); - } - ctx.output->append("(("); - AppendSrcReg(ctx, alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs); - ctx.output->append(".x %s 0.0) ? 1.0 : 0.0).xxxx", op); - if (alu.scalar_clamp) { - ctx.output->append(")"); - } - ctx.output->append(";\n"); - AppendDestRegPost(ctx, alu.scalar_dest, alu.scalar_write_mask, alu.export_data); - return 0; -} -int TranslateALU_SETEs( - xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) { - return TranslateALU_SETXXs(ctx, alu, "=="); -} -int TranslateALU_SETGTs( - xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) { - return TranslateALU_SETXXs(ctx, alu, ">"); -} -int TranslateALU_SETGTEs( - xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) { - return TranslateALU_SETXXs(ctx, alu, ">="); -} -int TranslateALU_SETNEs( - xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) { - return TranslateALU_SETXXs(ctx, alu, "!="); -} - -int TranslateALU_RECIP_IEEE( - xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) { - AppendDestReg(ctx, alu.scalar_dest, alu.scalar_write_mask, alu.export_data); - ctx.output->append(" = "); - if (alu.scalar_clamp) { - ctx.output->append("saturate("); - } - ctx.output->append("(1.0 / "); - AppendSrcReg(ctx, alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs); - ctx.output->append(")"); - if (alu.scalar_clamp) { - ctx.output->append(")"); - } - ctx.output->append(";\n"); - AppendDestRegPost(ctx, alu.scalar_dest, alu.scalar_write_mask, alu.export_data); - return 0; -} - -int TranslateALU_MUL_CONST_0( - xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) { - AppendDestReg(ctx, alu.scalar_dest, alu.scalar_write_mask, alu.export_data); - ctx.output->append(" = "); - if (alu.scalar_clamp) { - ctx.output->append("saturate("); - } - uint32_t src3_swiz = alu.src3_swiz & ~0x3C; - uint32_t swiz_a = ((src3_swiz >> 6) - 1) & 0x3; - uint32_t swiz_b = (src3_swiz & 0x3); - uint32_t reg2 = (alu.scalar_opc & 1) | (alu.src3_swiz & 0x3C) | (alu.src3_sel << 1); - ctx.output->append("("); - AppendSrcReg(ctx, alu.src3_reg, 0, 0, alu.src3_reg_negate, alu.src3_reg_abs); - ctx.output->append(".%c * ", chan_names[swiz_a]); - AppendSrcReg(ctx, reg2, 1, 0, alu.src3_reg_negate, alu.src3_reg_abs); - ctx.output->append(".%c", chan_names[swiz_b]); - ctx.output->append(").xxxx"); - if (alu.scalar_clamp) { - ctx.output->append(")"); - } - ctx.output->append(";\n"); - AppendDestRegPost(ctx, alu.scalar_dest, alu.scalar_write_mask, alu.export_data); - return 0; -} -int TranslateALU_MUL_CONST_1( - xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) { - return TranslateALU_MUL_CONST_0(ctx, alu); -} - -int TranslateALU_ADD_CONST_0( - xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) { - AppendDestReg(ctx, alu.scalar_dest, alu.scalar_write_mask, alu.export_data); - ctx.output->append(" = "); - if (alu.scalar_clamp) { - ctx.output->append("saturate("); - } - uint32_t src3_swiz = alu.src3_swiz & ~0x3C; - uint32_t swiz_a = ((src3_swiz >> 6) - 1) & 0x3; - uint32_t swiz_b = (src3_swiz & 0x3); - uint32_t reg2 = (alu.scalar_opc & 1) | (alu.src3_swiz & 0x3C) | (alu.src3_sel << 1); - ctx.output->append("("); - AppendSrcReg(ctx, alu.src3_reg, 0, 0, alu.src3_reg_negate, alu.src3_reg_abs); - ctx.output->append(".%c + ", chan_names[swiz_a]); - AppendSrcReg(ctx, reg2, 1, 0, alu.src3_reg_negate, alu.src3_reg_abs); - ctx.output->append(".%c", chan_names[swiz_b]); - ctx.output->append(").xxxx"); - if (alu.scalar_clamp) { - ctx.output->append(")"); - } - ctx.output->append(";\n"); - AppendDestRegPost(ctx, alu.scalar_dest, alu.scalar_write_mask, alu.export_data); - return 0; -} -int TranslateALU_ADD_CONST_1( - xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) { - return TranslateALU_ADD_CONST_0(ctx, alu); -} - -int TranslateALU_SUB_CONST_0( - xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) { - AppendDestReg(ctx, alu.scalar_dest, alu.scalar_write_mask, alu.export_data); - ctx.output->append(" = "); - if (alu.scalar_clamp) { - ctx.output->append("saturate("); - } - uint32_t src3_swiz = alu.src3_swiz & ~0x3C; - uint32_t swiz_a = ((src3_swiz >> 6) - 1) & 0x3; - uint32_t swiz_b = (src3_swiz & 0x3); - uint32_t reg2 = (alu.scalar_opc & 1) | (alu.src3_swiz & 0x3C) | (alu.src3_sel << 1); - ctx.output->append("("); - AppendSrcReg(ctx, alu.src3_reg, 0, 0, alu.src3_reg_negate, alu.src3_reg_abs); - ctx.output->append(".%c - ", chan_names[swiz_a]); - AppendSrcReg(ctx, reg2, 1, 0, alu.src3_reg_negate, alu.src3_reg_abs); - ctx.output->append(".%c", chan_names[swiz_b]); - ctx.output->append(").xxxx"); - if (alu.scalar_clamp) { - ctx.output->append(")"); - } - ctx.output->append(";\n"); - AppendDestRegPost(ctx, alu.scalar_dest, alu.scalar_write_mask, alu.export_data); - return 0; -} -int TranslateALU_SUB_CONST_1( - xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu) { - return TranslateALU_SUB_CONST_0(ctx, alu); -} - -typedef int (*xe_gpu_translate_alu_fn)( - xe_gpu_translate_ctx_t& ctx, const instr_alu_t& alu); -typedef struct { - uint32_t num_srcs; - const char* name; - xe_gpu_translate_alu_fn fn; -} xe_gpu_translate_alu_info_t; -#define ALU_INSTR(opc, num_srcs) \ - { num_srcs, #opc, 0 } -#define ALU_INSTR_IMPL(opc, num_srcs) \ - { num_srcs, #opc, TranslateALU_##opc } -static xe_gpu_translate_alu_info_t vector_alu_instrs[0x20] = { - ALU_INSTR_IMPL(ADDv, 2), // 0 - ALU_INSTR_IMPL(MULv, 2), // 1 - ALU_INSTR_IMPL(MAXv, 2), // 2 - ALU_INSTR_IMPL(MINv, 2), // 3 - ALU_INSTR_IMPL(SETEv, 2), // 4 - ALU_INSTR_IMPL(SETGTv, 2), // 5 - ALU_INSTR_IMPL(SETGTEv, 2), // 6 - ALU_INSTR_IMPL(SETNEv, 2), // 7 - ALU_INSTR_IMPL(FRACv, 1), // 8 - ALU_INSTR_IMPL(TRUNCv, 1), // 9 - ALU_INSTR_IMPL(FLOORv, 1), // 10 - ALU_INSTR_IMPL(MULADDv, 3), // 11 - ALU_INSTR_IMPL(CNDEv, 3), // 12 - ALU_INSTR_IMPL(CNDGTEv, 3), // 13 - ALU_INSTR_IMPL(CNDGTv, 3), // 14 - ALU_INSTR_IMPL(DOT4v, 2), // 15 - ALU_INSTR_IMPL(DOT3v, 2), // 16 - ALU_INSTR_IMPL(DOT2ADDv, 3), // 17 -- ??? - ALU_INSTR(CUBEv, 2), // 18 - ALU_INSTR_IMPL(MAX4v, 1), // 19 - ALU_INSTR(PRED_SETE_PUSHv, 2), // 20 - ALU_INSTR(PRED_SETNE_PUSHv, 2), // 21 - ALU_INSTR(PRED_SETGT_PUSHv, 2), // 22 - ALU_INSTR(PRED_SETGTE_PUSHv, 2), // 23 - ALU_INSTR(KILLEv, 2), // 24 - ALU_INSTR(KILLGTv, 2), // 25 - ALU_INSTR(KILLGTEv, 2), // 26 - ALU_INSTR(KILLNEv, 2), // 27 - ALU_INSTR(DSTv, 2), // 28 - ALU_INSTR(MOVAv, 1), // 29 -}; -static xe_gpu_translate_alu_info_t scalar_alu_instrs[0x40] = { - ALU_INSTR(ADDs, 1), // 0 - ALU_INSTR(ADD_PREVs, 1), // 1 - ALU_INSTR(MULs, 1), // 2 - ALU_INSTR(MUL_PREVs, 1), // 3 - ALU_INSTR(MUL_PREV2s, 1), // 4 - ALU_INSTR_IMPL(MAXs, 1), // 5 - ALU_INSTR_IMPL(MINs, 1), // 6 - ALU_INSTR_IMPL(SETEs, 1), // 7 - ALU_INSTR_IMPL(SETGTs, 1), // 8 - ALU_INSTR_IMPL(SETGTEs, 1), // 9 - ALU_INSTR_IMPL(SETNEs, 1), // 10 - ALU_INSTR(FRACs, 1), // 11 - ALU_INSTR(TRUNCs, 1), // 12 - ALU_INSTR(FLOORs, 1), // 13 - ALU_INSTR(EXP_IEEE, 1), // 14 - ALU_INSTR(LOG_CLAMP, 1), // 15 - ALU_INSTR(LOG_IEEE, 1), // 16 - ALU_INSTR(RECIP_CLAMP, 1), // 17 - ALU_INSTR(RECIP_FF, 1), // 18 - ALU_INSTR_IMPL(RECIP_IEEE, 1), // 19 - ALU_INSTR(RECIPSQ_CLAMP, 1), // 20 - ALU_INSTR(RECIPSQ_FF, 1), // 21 - ALU_INSTR(RECIPSQ_IEEE, 1), // 22 - ALU_INSTR(MOVAs, 1), // 23 - ALU_INSTR(MOVA_FLOORs, 1), // 24 - ALU_INSTR(SUBs, 1), // 25 - ALU_INSTR(SUB_PREVs, 1), // 26 - ALU_INSTR(PRED_SETEs, 1), // 27 - ALU_INSTR(PRED_SETNEs, 1), // 28 - ALU_INSTR(PRED_SETGTs, 1), // 29 - ALU_INSTR(PRED_SETGTEs, 1), // 30 - ALU_INSTR(PRED_SET_INVs, 1), // 31 - ALU_INSTR(PRED_SET_POPs, 1), // 32 - ALU_INSTR(PRED_SET_CLRs, 1), // 33 - ALU_INSTR(PRED_SET_RESTOREs, 1), // 34 - ALU_INSTR(KILLEs, 1), // 35 - ALU_INSTR(KILLGTs, 1), // 36 - ALU_INSTR(KILLGTEs, 1), // 37 - ALU_INSTR(KILLNEs, 1), // 38 - ALU_INSTR(KILLONEs, 1), // 39 - ALU_INSTR(SQRT_IEEE, 1), // 40 - { 0, 0, false }, - ALU_INSTR_IMPL(MUL_CONST_0, 2), // 42 - ALU_INSTR_IMPL(MUL_CONST_1, 2), // 43 - ALU_INSTR_IMPL(ADD_CONST_0, 2), // 44 - ALU_INSTR_IMPL(ADD_CONST_1, 2), // 45 - ALU_INSTR_IMPL(SUB_CONST_0, 2), // 46 - ALU_INSTR_IMPL(SUB_CONST_1, 2), // 47 - ALU_INSTR(SIN, 1), // 48 - ALU_INSTR(COS, 1), // 49 - ALU_INSTR(RETAIN_PREV, 1), // 50 -}; -#undef ALU_INSTR - -int TranslateALU( - xe_gpu_translate_ctx_t& ctx, const instr_alu_t* alu, int sync) { - Output* output = ctx.output; - - if (!alu->scalar_write_mask && !alu->vector_write_mask) { - output->append(" // \n"); - return 0; - } - - if (alu->vector_write_mask) { - // Disassemble vector op. - xe_gpu_translate_alu_info_t& iv = vector_alu_instrs[alu->vector_opc]; - output->append(" // %sALU:\t", sync ? "(S)" : " "); - output->append("%s", iv.name); - if (alu->pred_select & 0x2) { - // seems to work similar to conditional execution in ARM instruction - // set, so let's use a similar syntax for now: - output->append((alu->pred_select & 0x1) ? "EQ" : "NE"); - } - output->append("\t"); - print_dstreg(output, - alu->vector_dest, alu->vector_write_mask, alu->export_data); - output->append(" = "); - if (iv.num_srcs == 3) { - print_srcreg(output, - alu->src3_reg, alu->src3_sel, alu->src3_swiz, - alu->src3_reg_negate, alu->src3_reg_abs); - output->append(", "); - } - print_srcreg(output, - alu->src1_reg, alu->src1_sel, alu->src1_swiz, - alu->src1_reg_negate, alu->src1_reg_abs); - if (iv.num_srcs > 1) { - output->append(", "); - print_srcreg(output, - alu->src2_reg, alu->src2_sel, alu->src2_swiz, - alu->src2_reg_negate, alu->src2_reg_abs); - } - if (alu->vector_clamp) { - output->append(" CLAMP"); - } - if (alu->export_data) { - print_export_comment(output, alu->vector_dest, ctx.type); - } - output->append("\n"); - - // Translate vector op. - if (iv.fn) { - output->append(" "); - if (iv.fn(ctx, *alu)) { - return 1; - } - } else { - output->append(" // \n"); - } - } - - if (alu->scalar_write_mask || !alu->vector_write_mask) { - // 2nd optional scalar op: - - // Disassemble scalar op. - xe_gpu_translate_alu_info_t& is = scalar_alu_instrs[alu->scalar_opc]; - output->append(" // "); - output->append("\t"); - if (is.name) { - output->append("\t \t%s\t", is.name); - } else { - output->append("\t \tOP(%u)\t", alu->scalar_opc); - } - print_dstreg(output, - alu->scalar_dest, alu->scalar_write_mask, alu->export_data); - output->append(" = "); - if (is.num_srcs == 2) { - // ADD_CONST_0 dest, [const], [reg] - uint32_t src3_swiz = alu->src3_swiz & ~0x3C; - uint32_t swiz_a = ((src3_swiz >> 6) - 1) & 0x3; - uint32_t swiz_b = (src3_swiz & 0x3); - print_srcreg(output, - alu->src3_reg, 0, 0, - alu->src3_reg_negate, alu->src3_reg_abs); - output->append(".%c", chan_names[swiz_a]); - output->append(", "); - uint32_t reg2 = (alu->scalar_opc & 1) | (alu->src3_swiz & 0x3C) | (alu->src3_sel << 1); - print_srcreg(output, - reg2, 1, 0, - alu->src3_reg_negate, alu->src3_reg_abs); - output->append(".%c", chan_names[swiz_b]); - } else { - print_srcreg(output, - alu->src3_reg, alu->src3_sel, alu->src3_swiz, - alu->src3_reg_negate, alu->src3_reg_abs); - } - if (alu->scalar_clamp) { - output->append(" CLAMP"); - } - if (alu->export_data) { - print_export_comment(output, alu->scalar_dest, ctx.type); - } - output->append("\n"); - - // Translate scalar op. - if (is.fn) { - output->append(" "); - if (is.fn(ctx, *alu)) { - return 1; - } - } else { - output->append(" // \n"); - } - } - - return 0; -} - -struct { - const char *name; -} fetch_types[0xff] = { -#define TYPE(id) { #id } - TYPE(FMT_1_REVERSE), // 0 - {0}, - TYPE(FMT_8), // 2 - {0}, - {0}, - {0}, - TYPE(FMT_8_8_8_8), // 6 - TYPE(FMT_2_10_10_10), // 7 - {0}, - {0}, - TYPE(FMT_8_8), // 10 - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - TYPE(FMT_16), // 24 - TYPE(FMT_16_16), // 25 - TYPE(FMT_16_16_16_16), // 26 - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - TYPE(FMT_32), // 33 - TYPE(FMT_32_32), // 34 - TYPE(FMT_32_32_32_32), // 35 - TYPE(FMT_32_FLOAT), // 36 - TYPE(FMT_32_32_FLOAT), // 37 - TYPE(FMT_32_32_32_32_FLOAT), // 38 - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - {0}, - TYPE(FMT_32_32_32_FLOAT), // 57 -#undef TYPE -}; - -void print_fetch_dst(Output* output, uint32_t dst_reg, uint32_t dst_swiz) { - output->append("\tR%u.", dst_reg); - for (int i = 0; i < 4; i++) { - output->append("%c", chan_names[dst_swiz & 0x7]); - dst_swiz >>= 3; - } -} - -void AppendFetchDest(Output* output, uint32_t dst_reg, uint32_t dst_swiz) { - output->append("r%u.", dst_reg); - for (int i = 0; i < 4; i++) { - output->append("%c", chan_names[dst_swiz & 0x7]); - dst_swiz >>= 3; - } -} - -int TranslateVertexFetch( - xe_gpu_translate_ctx_t& ctx, const instr_fetch_vtx_t* vtx, int sync) { - Output* output = ctx.output; - - // Disassemble. - output->append(" // %sFETCH:\t", sync ? "(S)" : " "); - if (vtx->pred_select) { - output->append(vtx->pred_condition ? "EQ" : "NE"); - } - print_fetch_dst(output, vtx->dst_reg, vtx->dst_swiz); - output->append(" = R%u.", vtx->src_reg); - output->append("%c", chan_names[vtx->src_swiz & 0x3]); - if (fetch_types[vtx->format].name) { - output->append(" %s", fetch_types[vtx->format].name); - } else { - output->append(" TYPE(0x%x)", vtx->format); - } - output->append(" %s", vtx->format_comp_all ? "SIGNED" : "UNSIGNED"); - if (!vtx->num_format_all) { - output->append(" NORMALIZED"); - } - output->append(" STRIDE(%u)", vtx->stride); - if (vtx->offset) { - output->append(" OFFSET(%u)", vtx->offset); - } - output->append(" CONST(%u, %u)", vtx->const_index, vtx->const_index_sel); - if (1) { - // XXX - output->append(" src_reg_am=%u", vtx->src_reg_am); - output->append(" dst_reg_am=%u", vtx->dst_reg_am); - output->append(" num_format_all=%u", vtx->num_format_all); - output->append(" signed_rf_mode_all=%u", vtx->signed_rf_mode_all); - output->append(" exp_adjust_all=%u", vtx->exp_adjust_all); - } - output->append("\n"); - - // Translate. - output->append(" "); - output->append("r%u.xyzw", vtx->dst_reg); - output->append(" = float4("); - uint32_t fetch_slot = vtx->const_index * 3 + vtx->const_index_sel; - // TODO(benvanik): detect xyzw = xyzw, etc. - // TODO(benvanik): detect and set as rN = float4(samp.xyz, 1.0); / etc - uint32_t component_count = GetFormatComponentCount(vtx->format); - uint32_t dst_swiz = vtx->dst_swiz; - for (int i = 0; i < 4; i++) { - if ((dst_swiz & 0x7) == 4) { - output->append("0.0"); - } else if ((dst_swiz & 0x7) == 5) { - output->append("1.0"); - } else if ((dst_swiz & 0x7) == 6) { - // ? - output->append("?"); - } else if ((dst_swiz & 0x7) == 7) { - output->append("r%u.%c", vtx->dst_reg, chan_names[i]); - } else { - output->append("i.vf%u_%d.%c", - fetch_slot, vtx->offset, - chan_names[dst_swiz & 0x3]); - } - if (i < 3) { - output->append(", "); - } - dst_swiz >>= 3; - } - output->append(");\n"); - return 0; -} - -int TranslateTextureFetch( - xe_gpu_translate_ctx_t& ctx, const instr_fetch_tex_t* tex, int sync) { - Output* output = ctx.output; - - // Disassemble. - static const char *filter[] = { - "POINT", // TEX_FILTER_POINT - "LINEAR", // TEX_FILTER_LINEAR - "BASEMAP", // TEX_FILTER_BASEMAP - }; - static const char *aniso_filter[] = { - "DISABLED", // ANISO_FILTER_DISABLED - "MAX_1_1", // ANISO_FILTER_MAX_1_1 - "MAX_2_1", // ANISO_FILTER_MAX_2_1 - "MAX_4_1", // ANISO_FILTER_MAX_4_1 - "MAX_8_1", // ANISO_FILTER_MAX_8_1 - "MAX_16_1", // ANISO_FILTER_MAX_16_1 - }; - static const char *arbitrary_filter[] = { - "2x4_SYM", // ARBITRARY_FILTER_2X4_SYM - "2x4_ASYM", // ARBITRARY_FILTER_2X4_ASYM - "4x2_SYM", // ARBITRARY_FILTER_4X2_SYM - "4x2_ASYM", // ARBITRARY_FILTER_4X2_ASYM - "4x4_SYM", // ARBITRARY_FILTER_4X4_SYM - "4x4_ASYM", // ARBITRARY_FILTER_4X4_ASYM - }; - static const char *sample_loc[] = { - "CENTROID", // SAMPLE_CENTROID - "CENTER", // SAMPLE_CENTER - }; - uint32_t src_swiz = tex->src_swiz; - output->append(" // %sFETCH:\t", sync ? "(S)" : " "); - if (tex->pred_select) { - output->append(tex->pred_condition ? "EQ" : "NE"); - } - print_fetch_dst(output, tex->dst_reg, tex->dst_swiz); - output->append(" = R%u.", tex->src_reg); - for (int i = 0; i < 3; i++) { - output->append("%c", chan_names[src_swiz & 0x3]); - src_swiz >>= 2; - } - output->append(" CONST(%u)", tex->const_idx); - if (tex->fetch_valid_only) { - output->append(" VALID_ONLY"); - } - if (tex->tx_coord_denorm) { - output->append(" DENORM"); - } - if (tex->mag_filter != TEX_FILTER_USE_FETCH_CONST) { - output->append(" MAG(%s)", filter[tex->mag_filter]); - } - if (tex->min_filter != TEX_FILTER_USE_FETCH_CONST) { - output->append(" MIN(%s)", filter[tex->min_filter]); - } - if (tex->mip_filter != TEX_FILTER_USE_FETCH_CONST) { - output->append(" MIP(%s)", filter[tex->mip_filter]); - } - if (tex->aniso_filter != ANISO_FILTER_USE_FETCH_CONST) { - output->append(" ANISO(%s)", aniso_filter[tex->aniso_filter]); - } - if (tex->arbitrary_filter != ARBITRARY_FILTER_USE_FETCH_CONST) { - output->append(" ARBITRARY(%s)", arbitrary_filter[tex->arbitrary_filter]); - } - if (tex->vol_mag_filter != TEX_FILTER_USE_FETCH_CONST) { - output->append(" VOL_MAG(%s)", filter[tex->vol_mag_filter]); - } - if (tex->vol_min_filter != TEX_FILTER_USE_FETCH_CONST) { - output->append(" VOL_MIN(%s)", filter[tex->vol_min_filter]); - } - if (!tex->use_comp_lod) { - output->append(" LOD(%u)", tex->use_comp_lod); - output->append(" LOD_BIAS(%u)", tex->lod_bias); - } - if (tex->use_reg_lod) { - output->append(" REG_LOD(%u)", tex->use_reg_lod); - } - if (tex->use_reg_gradients) { - output->append(" USE_REG_GRADIENTS"); - } - output->append(" LOCATION(%s)", sample_loc[tex->sample_location]); - if (tex->offset_x || tex->offset_y || tex->offset_z) { - output->append(" OFFSET(%u,%u,%u)", tex->offset_x, tex->offset_y, tex->offset_z); - } - output->append("\n"); - - int src_component_count = 0; - switch (tex->dimension) { - case DIMENSION_1D: - src_component_count = 1; - break; - default: - case DIMENSION_2D: - src_component_count = 2; - break; - case DIMENSION_3D: - src_component_count = 3; - break; - case DIMENSION_CUBE: - src_component_count = 3; - break; - } - - // Translate. - output->append(" "); - output->append("r%u.xyzw", tex->dst_reg); - output->append(" = "); - output->append( - "x_texture_%d.Sample(x_sampler_%d, r%u.", - tex->const_idx, - ctx.tex_fetch_index++, // hacky way to line up to tex buffers - tex->src_reg); - src_swiz = tex->src_swiz; - for (int i = 0; i < src_component_count; i++) { - output->append("%c", chan_names[src_swiz & 0x3]); - src_swiz >>= 2; - } - output->append(")."); - - // Pass one over dest does xyzw and fakes the special values. - // TODO(benvanik): detect and set as rN = float4(samp.xyz, 1.0); / etc - uint32_t dst_swiz = tex->dst_swiz; - for (int i = 0; i < 4; i++) { - output->append("%c", chan_names[dst_swiz & 0x3]); - dst_swiz >>= 3; - } - output->append(";\n"); - // Do another pass to set constant values. - dst_swiz = tex->dst_swiz; - for (int i = 0; i < 4; i++) { - if ((dst_swiz & 0x7) == 4) { - output->append(" r%u.%c = 0.0;\n", tex->dst_reg, chan_names[i]); - } else if ((dst_swiz & 0x7) == 5) { - output->append(" r%u.%c = 1.0;\n", tex->dst_reg, chan_names[i]); - } - dst_swiz >>= 3; - } - return 0; -} - -struct { - const char *name; -} cf_instructions[] = { -#define INSTR(opc, fxn) { #opc } - INSTR(NOP, print_cf_nop), - INSTR(EXEC, print_cf_exec), - INSTR(EXEC_END, print_cf_exec), - INSTR(COND_EXEC, print_cf_exec), - INSTR(COND_EXEC_END, print_cf_exec), - INSTR(COND_PRED_EXEC, print_cf_exec), - INSTR(COND_PRED_EXEC_END, print_cf_exec), - INSTR(LOOP_START, print_cf_loop), - INSTR(LOOP_END, print_cf_loop), - INSTR(COND_CALL, print_cf_jmp_call), - INSTR(RETURN, print_cf_jmp_call), - INSTR(COND_JMP, print_cf_jmp_call), - INSTR(ALLOC, print_cf_alloc), - INSTR(COND_EXEC_PRED_CLEAN, print_cf_exec), - INSTR(COND_EXEC_PRED_CLEAN_END, print_cf_exec), - INSTR(MARK_VS_FETCH_DONE, print_cf_nop), // ?? -#undef INSTR -}; - -} // anonymous namespace - - -int D3D11Shader::TranslateExec(xe_gpu_translate_ctx_t& ctx, const instr_cf_exec_t& cf) { - Output* output = ctx.output; - - output->append( - " // %s ADDR(0x%x) CNT(0x%x)", - cf_instructions[cf.opc].name, cf.address, cf.count); - if (cf.yeild) { - output->append(" YIELD"); - } - uint8_t vc = cf.vc_hi | (cf.vc_lo << 2); - if (vc) { - output->append(" VC(0x%x)", vc); - } - if (cf.bool_addr) { - output->append(" BOOL_ADDR(0x%x)", cf.bool_addr); - } - if (cf.address_mode == ABSOLUTE_ADDR) { - output->append(" ABSOLUTE_ADDR"); - } - if (cf.is_cond_exec()) { - output->append(" COND(%d)", cf.condition); - } - output->append("\n"); - - uint32_t sequence = cf.serialize; - for (uint32_t i = 0; i < cf.count; i++) { - uint32_t alu_off = (cf.address + i); - int sync = sequence & 0x2; - if (sequence & 0x1) { - const instr_fetch_t* fetch = - (const instr_fetch_t*)(dwords_ + alu_off * 3); - switch (fetch->opc) { - case VTX_FETCH: - if (TranslateVertexFetch(ctx, &fetch->vtx, sync)) { - return 1; - } - break; - case TEX_FETCH: - if (TranslateTextureFetch(ctx, &fetch->tex, sync)) { - return 1; - } - break; - case TEX_GET_BORDER_COLOR_FRAC: - case TEX_GET_COMP_TEX_LOD: - case TEX_GET_GRADIENTS: - case TEX_GET_WEIGHTS: - case TEX_SET_TEX_LOD: - case TEX_SET_GRADIENTS_H: - case TEX_SET_GRADIENTS_V: - default: - XEASSERTALWAYS(); - break; - } - } else { - const instr_alu_t* alu = - (const instr_alu_t*)(dwords_ + alu_off * 3); - if (TranslateALU(ctx, alu, sync)) { - return 1; - } - } - sequence >>= 2; - } - - return 0; -} diff --git a/src/xenia/gpu/d3d11/d3d11_shader.h b/src/xenia/gpu/d3d11/d3d11_shader.h deleted file mode 100644 index 0b0bb492c..000000000 --- a/src/xenia/gpu/d3d11/d3d11_shader.h +++ /dev/null @@ -1,125 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2013 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_GPU_D3D11_D3D11_SHADER_H_ -#define XENIA_GPU_D3D11_D3D11_SHADER_H_ - -#include - -#include -#include - -#include - - -namespace xe { -namespace gpu { -namespace d3d11 { - -struct Output; - -typedef struct { - Output* output; - xenos::XE_GPU_SHADER_TYPE type; - uint32_t tex_fetch_index; -} xe_gpu_translate_ctx_t; - -class D3D11GeometryShader; - - -class D3D11Shader : public Shader { -public: - virtual ~D3D11Shader(); - - const static uint32_t MAX_INTERPOLATORS = 16; - -protected: - D3D11Shader( - ID3D11Device* device, - xenos::XE_GPU_SHADER_TYPE type, - const uint8_t* src_ptr, size_t length, - uint64_t hash); - - const char* translated_src() const { return translated_src_; } - void set_translated_src(char* value); - - void AppendTextureHeader(Output* output); - int TranslateExec( - xe_gpu_translate_ctx_t& ctx, const xenos::instr_cf_exec_t& cf); - - ID3D10Blob* Compile(const char* shader_source); - -protected: - ID3D11Device* device_; - - char* translated_src_; -}; - - -class D3D11VertexShader : public D3D11Shader { -public: - D3D11VertexShader( - ID3D11Device* device, - const uint8_t* src_ptr, size_t length, - uint64_t hash); - virtual ~D3D11VertexShader(); - - ID3D11VertexShader* handle() const { return handle_; } - ID3D11InputLayout* input_layout() const { return input_layout_; } - - int Prepare(xenos::xe_gpu_program_cntl_t* program_cntl); - - enum GeometryShaderType { - POINT_SPRITE_SHADER, - RECT_LIST_SHADER, - QUAD_LIST_SHADER, - - MAX_GEOMETRY_SHADER_TYPE, - }; - int DemandGeometryShader(GeometryShaderType type, - D3D11GeometryShader** out_shader); - -private: - const char* Translate(xenos::xe_gpu_program_cntl_t* program_cntl); - -private: - ID3D11VertexShader* handle_; - ID3D11InputLayout* input_layout_; - D3D11GeometryShader* geometry_shaders_[MAX_GEOMETRY_SHADER_TYPE]; -}; - - -class D3D11PixelShader : public D3D11Shader { -public: - D3D11PixelShader( - ID3D11Device* device, - const uint8_t* src_ptr, size_t length, - uint64_t hash); - virtual ~D3D11PixelShader(); - - ID3D11PixelShader* handle() const { return handle_; } - - int Prepare(xenos::xe_gpu_program_cntl_t* program_cntl, - D3D11VertexShader* input_shader); - -private: - const char* Translate(xenos::xe_gpu_program_cntl_t* program_cntl, - D3D11VertexShader* input_shader); - -private: - ID3D11PixelShader* handle_; -}; - - -} // namespace d3d11 -} // namespace gpu -} // namespace xe - - -#endif // XENIA_GPU_D3D11_D3D11_SHADER_H_ diff --git a/src/xenia/gpu/d3d11/d3d11_shader_cache.cc b/src/xenia/gpu/d3d11/d3d11_shader_cache.cc deleted file mode 100644 index 7f6a5a722..000000000 --- a/src/xenia/gpu/d3d11/d3d11_shader_cache.cc +++ /dev/null @@ -1,45 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2013 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include - -#include - - -using namespace xe; -using namespace xe::gpu; -using namespace xe::gpu::d3d11; -using namespace xe::gpu::xenos; - - -D3D11ShaderCache::D3D11ShaderCache(ID3D11Device* device) { - device_ = device; - device_->AddRef(); -} - -D3D11ShaderCache::~D3D11ShaderCache() { - device_->Release(); -} - -Shader* D3D11ShaderCache::CreateCore( - xenos::XE_GPU_SHADER_TYPE type, - const uint8_t* src_ptr, size_t length, - uint64_t hash) { - switch (type) { - case XE_GPU_SHADER_TYPE_VERTEX: - return new D3D11VertexShader( - device_, src_ptr, length, hash); - case XE_GPU_SHADER_TYPE_PIXEL: - return new D3D11PixelShader( - device_, src_ptr, length, hash); - default: - XEASSERTALWAYS(); - return NULL; - } -} \ No newline at end of file diff --git a/src/xenia/gpu/d3d11/d3d11_shader_cache.h b/src/xenia/gpu/d3d11/d3d11_shader_cache.h deleted file mode 100644 index 8c33523b4..000000000 --- a/src/xenia/gpu/d3d11/d3d11_shader_cache.h +++ /dev/null @@ -1,46 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2013 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_GPU_D3D11_D3D11_SHADER_CACHE_H_ -#define XENIA_GPU_D3D11_D3D11_SHADER_CACHE_H_ - -#include - -#include - -#include - - -namespace xe { -namespace gpu { -namespace d3d11 { - - -class D3D11ShaderCache : public ShaderCache { -public: - D3D11ShaderCache(ID3D11Device* device); - virtual ~D3D11ShaderCache(); - -protected: - Shader* CreateCore( - xenos::XE_GPU_SHADER_TYPE type, - const uint8_t* src_ptr, size_t length, - uint64_t hash) override; - -protected: - ID3D11Device* device_; -}; - - -} // namespace d3d11 -} // namespace gpu -} // namespace xe - - -#endif // XENIA_GPU_D3D11_D3D11_SHADER_CACHE_H_ diff --git a/src/xenia/gpu/d3d11/d3d11_shader_resource.cc b/src/xenia/gpu/d3d11/d3d11_shader_resource.cc new file mode 100644 index 000000000..e4be7e2cf --- /dev/null +++ b/src/xenia/gpu/d3d11/d3d11_shader_resource.cc @@ -0,0 +1,381 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2014 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include + +#include +#include +#include +#include +#include + +#include + +using namespace xe; +using namespace xe::gpu; +using namespace xe::gpu::d3d11; +using namespace xe::gpu::xenos; + + +namespace { + +ID3D10Blob* D3D11ShaderCompile(XE_GPU_SHADER_TYPE type, + const char* shader_source, + const char* disasm_source) { + SCOPE_profile_cpu_f("gpu"); + + // TODO(benvanik): pick shared runtime mode defines. + D3D10_SHADER_MACRO defines[] = { + "TEST_DEFINE", "1", + 0, 0, + }; + + uint32_t flags1 = 0; + flags1 |= D3D10_SHADER_DEBUG; + flags1 |= D3D10_SHADER_ENABLE_STRICTNESS; + uint32_t flags2 = 0; + + // Create a name. + const char* base_path = ""; + if (FLAGS_dump_shaders.size()) { + base_path = FLAGS_dump_shaders.c_str(); + } + size_t hash = xe_hash64(disasm_source, xestrlena(disasm_source)); // ? + char file_name[XE_MAX_PATH]; + xesnprintfa(file_name, XECOUNT(file_name), + "%s/gen_%.16llX.%s", + base_path, + hash, + type == XE_GPU_SHADER_TYPE_VERTEX ? "vs" : "ps"); + + if (FLAGS_dump_shaders.size()) { + FILE* f = fopen(file_name, "w"); + fprintf(f, shader_source); + fprintf(f, "\n\n"); + fprintf(f, "/*\n"); + fprintf(f, disasm_source); + fprintf(f, " */\n"); + fclose(f); + } + + // Compile shader to bytecode blob. + ID3D10Blob* shader_blob = 0; + ID3D10Blob* error_blob = 0; + HRESULT hr = D3DCompile( + shader_source, strlen(shader_source), + file_name, + defines, nullptr, + "main", + type == XE_GPU_SHADER_TYPE_VERTEX ? "vs_5_0" : "ps_5_0", + flags1, flags2, + &shader_blob, &error_blob); + if (error_blob) { + char* msg = (char*)error_blob->GetBufferPointer(); + XELOGE("D3D11: shader compile failed with %s", msg); + } + XESAFERELEASE(error_blob); + if (FAILED(hr)) { + return nullptr; + } + return shader_blob; +} + +} // namespace + + +D3D11VertexShaderResource::D3D11VertexShaderResource( + D3D11ResourceCache* resource_cache, + const MemoryRange& memory_range, + const Info& info) + : VertexShaderResource(memory_range, info), + resource_cache_(resource_cache), + handle_(nullptr), + input_layout_(nullptr), + translated_src_(nullptr) { + xe_zero_struct(geometry_shaders_, sizeof(geometry_shaders_)); +} + +D3D11VertexShaderResource::~D3D11VertexShaderResource() { + XESAFERELEASE(handle_); + XESAFERELEASE(input_layout_); + for (int i = 0; i < XECOUNT(geometry_shaders_); ++i) { + delete geometry_shaders_[i]; + } + xe_free(translated_src_); +} + +int D3D11VertexShaderResource::Prepare( + const xe_gpu_program_cntl_t& program_cntl) { + SCOPE_profile_cpu_f("gpu"); + if (is_prepared_ || handle_) { + return 0; + } + + // TODO(benvanik): look in file based on hash/etc. + void* byte_code = NULL; + size_t byte_code_length = 0; + + // Translate and compile source. + D3D11ShaderTranslator translator; + int ret = translator.TranslateVertexShader(this, program_cntl); + if (ret) { + XELOGE("D3D11: failed to translate vertex shader"); + return ret; + } + translated_src_ = xestrdupa(translator.translated_src()); + + ID3D10Blob* shader_blob = D3D11ShaderCompile( + XE_GPU_SHADER_TYPE_VERTEX, translated_src_, disasm_src()); + if (!shader_blob) { + return 1; + } + byte_code_length = shader_blob->GetBufferSize(); + byte_code = xe_malloc(byte_code_length); + xe_copy_struct( + byte_code, shader_blob->GetBufferPointer(), byte_code_length); + XESAFERELEASE(shader_blob); + + // Create shader. + HRESULT hr = resource_cache_->device()->CreateVertexShader( + byte_code, byte_code_length, + nullptr, + &handle_); + if (FAILED(hr)) { + XELOGE("D3D11: failed to create vertex shader"); + xe_free(byte_code); + return 1; + } + + // Create input layout. + ret = CreateInputLayout(byte_code, byte_code_length); + xe_free(byte_code); + if (ret) { + return 1; + } + is_prepared_ = true; + return 0; +} + +int D3D11VertexShaderResource::CreateInputLayout(const void* byte_code, + size_t byte_code_length) { + size_t element_count = 0; + const auto& inputs = buffer_inputs(); + for (uint32_t n = 0; n < inputs.count; n++) { + element_count += inputs.descs[n].info.element_count; + } + if (!element_count) { + XELOGW("D3D11: vertex shader with zero inputs -- retaining previous values?"); + input_layout_ = NULL; + return 0; + } + + D3D11_INPUT_ELEMENT_DESC* element_descs = + (D3D11_INPUT_ELEMENT_DESC*)xe_alloca( + sizeof(D3D11_INPUT_ELEMENT_DESC) * element_count); + uint32_t el_index = 0; + for (uint32_t n = 0; n < inputs.count; n++) { + const auto& input = inputs.descs[n]; + for (uint32_t m = 0; m < input.info.element_count; m++) { + const auto& el = input.info.elements[m]; + uint32_t vb_slot = input.input_index; + DXGI_FORMAT vtx_format; + switch (el.format) { + case FMT_8_8_8_8: + if (el.is_normalized) { + vtx_format = el.is_signed ? + DXGI_FORMAT_R8G8B8A8_SNORM : DXGI_FORMAT_R8G8B8A8_UNORM; + } else { + vtx_format = el.is_signed ? + DXGI_FORMAT_R8G8B8A8_SINT : DXGI_FORMAT_R8G8B8A8_UINT; + } + break; + case FMT_2_10_10_10: + if (el.is_normalized) { + vtx_format = DXGI_FORMAT_R10G10B10A2_UNORM; + } else { + vtx_format = DXGI_FORMAT_R10G10B10A2_UINT; + } + break; + // DXGI_FORMAT_R11G11B10_FLOAT? + case FMT_16_16: + if (el.is_normalized) { + vtx_format = el.is_signed ? + DXGI_FORMAT_R16G16_SNORM : DXGI_FORMAT_R16G16_UNORM; + } else { + vtx_format = el.is_signed ? + DXGI_FORMAT_R16G16_SINT : DXGI_FORMAT_R16G16_UINT; + } + break; + case FMT_16_16_16_16: + if (el.is_normalized) { + vtx_format = el.is_signed ? + DXGI_FORMAT_R16G16B16A16_SNORM : DXGI_FORMAT_R16G16B16A16_UNORM; + } else { + vtx_format = el.is_signed ? + DXGI_FORMAT_R16G16B16A16_SINT : DXGI_FORMAT_R16G16B16A16_UINT; + } + break; + case FMT_16_16_FLOAT: + vtx_format = DXGI_FORMAT_R16G16_FLOAT; + break; + case FMT_16_16_16_16_FLOAT: + vtx_format = DXGI_FORMAT_R16G16B16A16_FLOAT; + break; + case FMT_32: + vtx_format = el.is_signed ? + DXGI_FORMAT_R32_SINT : DXGI_FORMAT_R32_UINT; + break; + case FMT_32_32: + vtx_format = el.is_signed ? + DXGI_FORMAT_R32G32_SINT : DXGI_FORMAT_R32G32_UINT; + break; + case FMT_32_32_32_32: + vtx_format = el.is_signed ? + DXGI_FORMAT_R32G32B32A32_SINT : DXGI_FORMAT_R32G32B32A32_UINT; + break; + case FMT_32_FLOAT: + vtx_format = DXGI_FORMAT_R32_FLOAT; + break; + case FMT_32_32_FLOAT: + vtx_format = DXGI_FORMAT_R32G32_FLOAT; + break; + case FMT_32_32_32_FLOAT: + vtx_format = DXGI_FORMAT_R32G32B32_FLOAT; + break; + case FMT_32_32_32_32_FLOAT: + vtx_format = DXGI_FORMAT_R32G32B32A32_FLOAT; + break; + default: + XEASSERTALWAYS(); + break; + } + element_descs[el_index].SemanticName = "XE_VF"; + element_descs[el_index].SemanticIndex = el_index; + element_descs[el_index].Format = vtx_format; + element_descs[el_index].InputSlot = vb_slot; + element_descs[el_index].AlignedByteOffset = el.offset_words * 4; + element_descs[el_index].InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA; + element_descs[el_index].InstanceDataStepRate = 0; + el_index++; + } + } + HRESULT hr = resource_cache_->device()->CreateInputLayout( + element_descs, + (UINT)element_count, + byte_code, byte_code_length, + &input_layout_); + if (FAILED(hr)) { + XELOGE("D3D11: failed to create vertex shader input layout"); + return 1; + } + + return 0; +} + +int D3D11VertexShaderResource::DemandGeometryShader( + GeometryShaderType type, D3D11GeometryShader** out_shader) { + if (geometry_shaders_[type]) { + *out_shader = geometry_shaders_[type]; + return 0; + } + + // Demand generate. + auto device = resource_cache_->device(); + D3D11GeometryShader* shader = nullptr; + switch (type) { + case POINT_SPRITE_SHADER: + shader = new D3D11PointSpriteGeometryShader(device); + break; + case RECT_LIST_SHADER: + shader = new D3D11RectListGeometryShader(device); + break; + case QUAD_LIST_SHADER: + shader = new D3D11QuadListGeometryShader(device); + break; + default: + XEASSERTALWAYS(); + return 1; + } + if (!shader) { + return 1; + } + + if (shader->Prepare(this)) { + delete shader; + return 1; + } + + geometry_shaders_[type] = shader; + *out_shader = geometry_shaders_[type]; + return 0; +} + +D3D11PixelShaderResource::D3D11PixelShaderResource( + D3D11ResourceCache* resource_cache, + const MemoryRange& memory_range, + const Info& info) + : PixelShaderResource(memory_range, info), + resource_cache_(resource_cache), + handle_(nullptr), + translated_src_(nullptr) { +} + +D3D11PixelShaderResource::~D3D11PixelShaderResource() { + XESAFERELEASE(handle_); + xe_free(translated_src_); +} + +int D3D11PixelShaderResource::Prepare(const xe_gpu_program_cntl_t& program_cntl, + VertexShaderResource* input_shader) { + SCOPE_profile_cpu_f("gpu"); + if (is_prepared_ || handle_) { + return 0; + } + + // TODO(benvanik): look in file based on hash/etc. + void* byte_code = NULL; + size_t byte_code_length = 0; + + // Translate and compile source. + D3D11ShaderTranslator translator; + int ret = translator.TranslatePixelShader(this, + program_cntl, + input_shader->alloc_counts()); + if (ret) { + XELOGE("D3D11: failed to translate pixel shader"); + return ret; + } + translated_src_ = xestrdupa(translator.translated_src()); + + ID3D10Blob* shader_blob = D3D11ShaderCompile( + XE_GPU_SHADER_TYPE_PIXEL, translated_src_, disasm_src()); + if (!shader_blob) { + return 1; + } + byte_code_length = shader_blob->GetBufferSize(); + byte_code = xe_malloc(byte_code_length); + xe_copy_struct( + byte_code, shader_blob->GetBufferPointer(), byte_code_length); + XESAFERELEASE(shader_blob); + + // Create shader. + HRESULT hr = resource_cache_->device()->CreatePixelShader( + byte_code, byte_code_length, + nullptr, + &handle_); + if (FAILED(hr)) { + XELOGE("D3D11: failed to create pixel shader"); + xe_free(byte_code); + return 1; + } + + xe_free(byte_code); + is_prepared_ = true; + return 0; +} diff --git a/src/xenia/gpu/d3d11/d3d11_shader_resource.h b/src/xenia/gpu/d3d11/d3d11_shader_resource.h new file mode 100644 index 000000000..5c0da8242 --- /dev/null +++ b/src/xenia/gpu/d3d11/d3d11_shader_resource.h @@ -0,0 +1,91 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2014 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_D3D11_D3D11_SHADER_RESOURCE_H_ +#define XENIA_GPU_D3D11_D3D11_SHADER_RESOURCE_H_ + +#include +#include + +#include + + +namespace xe { +namespace gpu { +namespace d3d11 { + +class D3D11GeometryShader; +class D3D11ResourceCache; + +struct Output; +typedef struct { + Output* output; + xenos::XE_GPU_SHADER_TYPE type; + uint32_t tex_fetch_index; +} xe_gpu_translate_ctx_t; + +class D3D11VertexShaderResource : public VertexShaderResource { +public: + D3D11VertexShaderResource(D3D11ResourceCache* resource_cache, + const MemoryRange& memory_range, + const Info& info); + ~D3D11VertexShaderResource() override; + + void* handle() const override { return handle_; } + ID3D11InputLayout* input_layout() const { return input_layout_; } + const char* translated_src() const { return translated_src_; } + + int Prepare(const xenos::xe_gpu_program_cntl_t& program_cntl) override; + + enum GeometryShaderType { + POINT_SPRITE_SHADER, + RECT_LIST_SHADER, + QUAD_LIST_SHADER, + MAX_GEOMETRY_SHADER_TYPE, // keep at the end + }; + int DemandGeometryShader(GeometryShaderType type, + D3D11GeometryShader** out_shader); + +private: + int CreateInputLayout(const void* byte_code, size_t byte_code_length); + + D3D11ResourceCache* resource_cache_; + ID3D11VertexShader* handle_; + ID3D11InputLayout* input_layout_; + D3D11GeometryShader* geometry_shaders_[MAX_GEOMETRY_SHADER_TYPE]; + char* translated_src_; +}; + + +class D3D11PixelShaderResource : public PixelShaderResource { +public: + D3D11PixelShaderResource(D3D11ResourceCache* resource_cache, + const MemoryRange& memory_range, + const Info& info); + ~D3D11PixelShaderResource() override; + + void* handle() const override { return handle_; } + const char* translated_src() const { return translated_src_; } + + int Prepare(const xenos::xe_gpu_program_cntl_t& program_cntl, + VertexShaderResource* vertex_shader) override; + +private: + D3D11ResourceCache* resource_cache_; + ID3D11PixelShader* handle_; + char* translated_src_; +}; + + +} // namespace d3d11 +} // namespace gpu +} // namespace xe + + +#endif // XENIA_GPU_D3D11_D3D11_SHADER_RESOURCE_H_ diff --git a/src/xenia/gpu/d3d11/d3d11_shader_translator.cc b/src/xenia/gpu/d3d11/d3d11_shader_translator.cc new file mode 100644 index 000000000..dde024356 --- /dev/null +++ b/src/xenia/gpu/d3d11/d3d11_shader_translator.cc @@ -0,0 +1,1625 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2014 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include + +#include +#include +#include +#include + + +using namespace xe; +using namespace xe::gpu; +using namespace xe::gpu::d3d11; +using namespace xe::gpu::xenos; + + +namespace { + +const char* GetFormatTypeName(const VertexBufferResource::DeclElement& el) { + switch (el.format) { + case FMT_32: + return el.is_signed ? "int" : "uint"; + case FMT_32_FLOAT: + return "float"; + case FMT_16_16: + case FMT_32_32: + if (el.is_normalized) { + return el.is_signed ? "snorm float2" : "unorm float2"; + } else { + return el.is_signed ? "int2" : "uint2"; + } + case FMT_16_16_FLOAT: + case FMT_32_32_FLOAT: + return "float2"; + case FMT_10_11_11: + case FMT_11_11_10: + return "int3"; // ? + case FMT_32_32_32_FLOAT: + return "float3"; + case FMT_8_8_8_8: + case FMT_2_10_10_10: + case FMT_16_16_16_16: + case FMT_32_32_32_32: + if (el.is_normalized) { + return el.is_signed ? "snorm float4" : "unorm float4"; + } else { + return el.is_signed ? "int4" : "uint4"; + } + case FMT_16_16_16_16_FLOAT: + case FMT_32_32_32_32_FLOAT: + return "float4"; + default: + XELOGE("Unknown vertex format: %d", el.format); + XEASSERTALWAYS(); + return "float4"; + } +} + +} // anonymous namespace + +D3D11ShaderTranslator::D3D11ShaderTranslator() + : capacity_(kCapacity), offset_(0) { + buffer_[0] = 0; +} + +int D3D11ShaderTranslator::TranslateVertexShader( + VertexShaderResource* vertex_shader, + const xe_gpu_program_cntl_t& program_cntl) { + SCOPE_profile_cpu_f("gpu"); + + type_ = XE_GPU_SHADER_TYPE_VERTEX; + tex_fetch_index_ = 0; + dwords_ = vertex_shader->dwords(); + + // Add constants buffers. + // We could optimize this by only including used buffers, but the compiler + // seems to do a good job of doing this for us. + // It also does read detection, so c[512] can end up c[4] in the asm - + // instead of doing this optimization ourselves we could maybe just query + // this from the compiler. + append( + "cbuffer float_consts : register(b0) {\n" + " float4 c[512];\n" + "};\n"); + // TODO(benvanik): add bool/loop constants. + + AppendTextureHeader(vertex_shader->sampler_inputs()); + + // Transform utilities. We adjust the output position in various ways + // as we can't do this via D3D11 APIs. + append( + "cbuffer vs_consts : register(b3) {\n" + " float4 window;\n" // x,y,w,h + " float4 viewport_z_enable;\n" // min,(max - min),?,enabled + " float4 viewport_size;\n" // x,y,w,h + "};" + "float4 applyViewport(float4 pos) {\n" + " if (viewport_z_enable.w) {\n" + //" pos.x = (pos.x + 1) * viewport_size.z * 0.5 + viewport_size.x;\n" + //" pos.y = (1 - pos.y) * viewport_size.w * 0.5 + viewport_size.y;\n" + //" pos.z = viewport_z_enable.x + pos.z * viewport_z_enable.y;\n" + // w? + " } else {\n" + " pos.xy = pos.xy / float2(window.z / 2.0, -window.w / 2.0) + float2(-1.0, 1.0);\n" + " pos.zw = float2(0.0, 1.0);\n" + " }\n" + " pos.xy += window.xy;\n" + " return pos;\n" + "}\n"); + + // Add vertex shader input. + append( + "struct VS_INPUT {\n"); + uint32_t el_index = 0; + const auto& buffer_inputs = vertex_shader->buffer_inputs(); + for (uint32_t n = 0; n < buffer_inputs.count; n++) { + const auto& input = buffer_inputs.descs[n]; + for (uint32_t m = 0; m < input.info.element_count; m++) { + const auto& el = input.info.elements[m]; + const char* type_name = GetFormatTypeName(el); + const auto& fetch = el.vtx_fetch; + uint32_t fetch_slot = fetch.const_index * 3 + fetch.const_index_sel; + append( + " %s vf%u_%d : XE_VF%u;\n", + type_name, fetch_slot, fetch.offset, el_index); + el_index++; + } + } + append( + "};\n"); + + // Add vertex shader output (pixel shader input). + const auto& alloc_counts = vertex_shader->alloc_counts(); + append( + "struct VS_OUTPUT {\n"); + if (alloc_counts.positions) { + XEASSERT(alloc_counts.positions == 1); + append( + " float4 oPos : SV_POSITION;\n"); + } + if (alloc_counts.params) { + append( + " float4 o[%d] : XE_O;\n", + kMaxInterpolators); + } + if (alloc_counts.point_size) { + append( + " float4 oPointSize : PSIZE;\n"); + } + append( + "};\n"); + + // Vertex shader main() header. + append( + "VS_OUTPUT main(VS_INPUT i) {\n" + " VS_OUTPUT o;\n"); + + // Always write position, as some shaders seem to only write certain values. + append( + " o.oPos = float4(0.0, 0.0, 0.0, 0.0);\n"); + if (alloc_counts.point_size) { + append( + " o.oPointSize = float4(1.0, 0.0, 0.0, 0.0);\n"); + } + + // TODO(benvanik): remove this, if possible (though the compiler may be smart + // enough to do it for us). + if (alloc_counts.params) { + for (uint32_t n = 0; n < kMaxInterpolators; n++) { + append( + " o.o[%d] = float4(0.0, 0.0, 0.0, 0.0);\n", n); + } + } + + // Add temporaries for any registers we may use. + uint32_t temp_regs = program_cntl.vs_regs + program_cntl.ps_regs; + for (uint32_t n = 0; n <= temp_regs; n++) { + append( + " float4 r%d = c[%d];\n", n, n); + } + append(" float4 t;\n"); + + // Execute blocks. + const auto& execs = vertex_shader->execs(); + for (auto it = execs.begin(); it != execs.end(); ++it) { + const instr_cf_exec_t& cf = *it; + // TODO(benvanik): figure out how sequences/jmps/loops/etc work. + if (TranslateExec(cf)) { + return 1; + } + } + + // main footer. + append( + " o.oPos = applyViewport(o.oPos);\n" + " return o;\n" + "};\n"); + + return 0; +} + +int D3D11ShaderTranslator::TranslatePixelShader( + PixelShaderResource* pixel_shader, + const xe_gpu_program_cntl_t& program_cntl, + const VertexShaderResource::AllocCounts& alloc_counts) { + SCOPE_profile_cpu_f("gpu"); + + // We need an input VS to make decisions here. + // TODO(benvanik): do we need to pair VS/PS up and store the combination? + // If the same PS is used with different VS that output different amounts + // (and less than the number of required registers), things may die. + + type_ = XE_GPU_SHADER_TYPE_PIXEL; + tex_fetch_index_ = 0; + dwords_ = pixel_shader->dwords(); + + // Add constants buffers. + // We could optimize this by only including used buffers, but the compiler + // seems to do a good job of doing this for us. + // It also does read detection, so c[512] can end up c[4] in the asm - + // instead of doing this optimization ourselves we could maybe just query + // this from the compiler. + append( + "cbuffer float_consts : register(b0) {\n" + " float4 c[512];\n" + "};\n"); + // TODO(benvanik): add bool/loop constants. + + AppendTextureHeader(pixel_shader->sampler_inputs()); + + // Add vertex shader output (pixel shader input). + append( + "struct VS_OUTPUT {\n"); + if (alloc_counts.positions) { + XEASSERT(alloc_counts.positions == 1); + append( + " float4 oPos : SV_POSITION;\n"); + } + if (alloc_counts.params) { + append( + " float4 o[%d] : XE_O;\n", + kMaxInterpolators); + } + append( + "};\n"); + + // Add pixel shader output. + append( + "struct PS_OUTPUT {\n"); + for (uint32_t n = 0; n < alloc_counts.params; n++) { + append( + " float4 oC%d : SV_TARGET%d;\n", n, n); + if (program_cntl.ps_export_depth) { + // Is this per render-target? + append( + " float oD%d : SV_DEPTH%d;\n", n, n); + } + } + append( + "};\n"); + + // Pixel shader main() header. + append( + "PS_OUTPUT main(VS_OUTPUT i) {\n" + " PS_OUTPUT o;\n"); + + // Add temporary registers. + uint32_t temp_regs = program_cntl.vs_regs + program_cntl.ps_regs; + for (uint32_t n = 0; n <= MAX(15, temp_regs); n++) { + append( + " float4 r%d = c[%d];\n", n, n); + } + append(" float4 t;\n"); + + // Bring registers local. + if (alloc_counts.params) { + for (uint32_t n = 0; n < kMaxInterpolators; n++) { + append( + " r%d = i.o[%d];\n", n, n); + } + } + + // Execute blocks. + const auto& execs = pixel_shader->execs(); + for (auto it = execs.begin(); it != execs.end(); ++it) { + const instr_cf_exec_t& cf = *it; + // TODO(benvanik): figure out how sequences/jmps/loops/etc work. + if (TranslateExec(cf)) { + return 1; + } + } + + // main footer. + append( + " return o;\n" + "}\n"); + + return 0; +} + +void D3D11ShaderTranslator::AppendTextureHeader( + const ShaderResource::SamplerInputs& sampler_inputs) { + bool fetch_setup[32] = { false }; + + // 1 texture per constant slot, 1 sampler per fetch. + for (uint32_t n = 0; n < sampler_inputs.count; n++) { + const auto& input = sampler_inputs.descs[n]; + const auto& fetch = input.tex_fetch; + + // Add texture, if needed. + if (!fetch_setup[fetch.const_idx]) { + fetch_setup[fetch.const_idx] = true; + const char* texture_type = NULL; + switch (fetch.dimension) { + case DIMENSION_1D: + texture_type = "Texture1D"; + break; + default: + case DIMENSION_2D: + texture_type = "Texture2D"; + break; + case DIMENSION_3D: + texture_type = "Texture3D"; + break; + case DIMENSION_CUBE: + texture_type = "TextureCube"; + break; + } + append("%s x_texture_%d;\n", texture_type, fetch.const_idx); + } + + // Add sampler. + append("SamplerState x_sampler_%d;\n", n); + } +} + +namespace { + +static const char chan_names[] = { + 'x', 'y', 'z', 'w', + // these only apply to FETCH dst's, and we shouldn't be using them: + '0', '1', '?', '_', +}; + +} // namespace + +void D3D11ShaderTranslator::AppendSrcReg(uint32_t num, uint32_t type, + uint32_t swiz, uint32_t negate, + uint32_t abs) { + if (negate) { + append("-"); + } + if (abs) { + append("abs("); + } + if (type) { + // Register. + append("r%u", num); + } else { + // Constant. + append("c[%u]", num); + } + if (swiz) { + append("."); + for (int i = 0; i < 4; i++) { + append("%c", chan_names[(swiz + i) & 0x3]); + swiz >>= 2; + } + } + if (abs) { + append(")"); + } +} + +void D3D11ShaderTranslator::AppendDestRegName(uint32_t num, uint32_t dst_exp) { + if (!dst_exp) { + // Register. + append("r%u", num); + } else { + // Export. + switch (type_) { + case XE_GPU_SHADER_TYPE_VERTEX: + switch (num) { + case 62: + append("o.oPos"); + break; + case 63: + append("o.oPointSize"); + break; + default: + // Varying. + append("o.o[%u]", num);; + break; + } + break; + case XE_GPU_SHADER_TYPE_PIXEL: + switch (num) { + case 0: + append("o.oC0"); + break; + default: + // TODO(benvanik): other render targets? + // TODO(benvanik): depth? + XEASSERTALWAYS(); + break; + } + break; + } + } +} + +void D3D11ShaderTranslator::AppendDestReg(uint32_t num, uint32_t mask, + uint32_t dst_exp) { + if (mask != 0xF) { + // If masking, store to a temporary variable and clean it up later. + append("t"); + } else { + // Store directly to output. + AppendDestRegName(num, dst_exp); + } +} + +void D3D11ShaderTranslator::AppendDestRegPost(uint32_t num, uint32_t mask, + uint32_t dst_exp) { + if (mask != 0xF) { + // Masking. + append(" "); + AppendDestRegName(num, dst_exp); + append(" = float4("); + for (int i = 0; i < 4; i++) { + // TODO(benvanik): mask out values? mix in old value as temp? + // append("%c", (mask & 0x1) ? chan_names[i] : 'w'); + if (!(mask & 0x1)) { + AppendDestRegName(num, dst_exp); + } else { + append("t"); + } + append(".%c", chan_names[i]); + mask >>= 1; + if (i < 3) { + append(", "); + } + } + append(");\n"); + } +} + +void D3D11ShaderTranslator::PrintSrcReg(uint32_t num, uint32_t type, + uint32_t swiz, uint32_t negate, + uint32_t abs) { + if (negate) { + append("-"); + } + if (abs) { + append("|"); + } + append("%c%u", type ? 'R' : 'C', num); + if (swiz) { + append("."); + for (int i = 0; i < 4; i++) { + append("%c", chan_names[(swiz + i) & 0x3]); + swiz >>= 2; + } + } + if (abs) { + append("|"); + } +} + +void D3D11ShaderTranslator::PrintDstReg(uint32_t num, uint32_t mask, + uint32_t dst_exp) { + append("%s%u", dst_exp ? "export" : "R", num); + if (mask != 0xf) { + append("."); + for (int i = 0; i < 4; i++) { + append("%c", (mask & 0x1) ? chan_names[i] : '_'); + mask >>= 1; + } + } +} + +void D3D11ShaderTranslator::PrintExportComment(uint32_t num) { + const char *name = NULL; + switch (type_) { + case XE_GPU_SHADER_TYPE_VERTEX: + switch (num) { + case 62: name = "gl_Position"; break; + case 63: name = "gl_PointSize"; break; + } + break; + case XE_GPU_SHADER_TYPE_PIXEL: + switch (num) { + case 0: name = "gl_FragColor"; break; + } + break; + } + /* if we had a symbol table here, we could look + * up the name of the varying.. + */ + if (name) { + append("\t; %s", name); + } +} + +int D3D11ShaderTranslator::TranslateALU_ADDv(const instr_alu_t& alu) { + AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data); + append(" = "); + if (alu.vector_clamp) { + append("saturate("); + } + append("("); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); + append(" + "); + AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs); + append(")"); + if (alu.vector_clamp) { + append(")"); + } + append(";\n"); + AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data); + return 0; +} + +int D3D11ShaderTranslator::TranslateALU_MULv(const instr_alu_t& alu) { + AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data); + append(" = "); + if (alu.vector_clamp) { + append("saturate("); + } + append("("); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); + append(" * "); + AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs); + append(")"); + if (alu.vector_clamp) { + append(")"); + } + append(";\n"); + AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data); + return 0; +} + +int D3D11ShaderTranslator::TranslateALU_MAXv(const instr_alu_t& alu) { + AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data); + append(" = "); + if (alu.vector_clamp) { + append("saturate("); + } + if (alu.src1_reg == alu.src2_reg && + alu.src1_sel == alu.src2_sel && + alu.src1_swiz == alu.src2_swiz && + alu.src1_reg_negate == alu.src2_reg_negate && + alu.src1_reg_abs == alu.src2_reg_abs) { + // This is a mov. + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); + } else { + append("max("); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); + append(", "); + AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs); + append(")"); + } + if (alu.vector_clamp) { + append(")"); + } + append(";\n"); + AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data); + return 0; +} + +int D3D11ShaderTranslator::TranslateALU_MINv(const instr_alu_t& alu) { + AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data); + append(" = "); + if (alu.vector_clamp) { + append("saturate("); + } + append("min("); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); + append(", "); + AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs); + append(")"); + if (alu.vector_clamp) { + append(")"); + } + append(";\n"); + AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data); + return 0; +} + +int D3D11ShaderTranslator::TranslateALU_SETXXv(const instr_alu_t& alu, const char* op) { + AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data); + append(" = "); + if (alu.vector_clamp) { + append("saturate("); + } + append("float4(("); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); + append(").x %s (", op); + AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs); + append(").x ? 1.0 : 0.0, ("); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); + append(").y %s (", op); + AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs); + append(").y ? 1.0 : 0.0, ("); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); + append(").z %s (", op); + AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs); + append(").z ? 1.0 : 0.0, ("); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); + append(").w %s (", op); + AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs); + append(").w ? 1.0 : 0.0)"); + if (alu.vector_clamp) { + append(")"); + } + append(";\n"); + AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data); + return 0; +} +int D3D11ShaderTranslator::TranslateALU_SETEv(const instr_alu_t& alu) { + return TranslateALU_SETXXv(alu, "=="); +} +int D3D11ShaderTranslator::TranslateALU_SETGTv(const instr_alu_t& alu) { + return TranslateALU_SETXXv(alu, ">"); +} +int D3D11ShaderTranslator::TranslateALU_SETGTEv(const instr_alu_t& alu) { + return TranslateALU_SETXXv(alu, ">="); +} +int D3D11ShaderTranslator::TranslateALU_SETNEv(const instr_alu_t& alu) { + return TranslateALU_SETXXv(alu, "!="); +} + +int D3D11ShaderTranslator::TranslateALU_FRACv(const instr_alu_t& alu) { + AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data); + append(" = "); + if (alu.vector_clamp) { + append("saturate("); + } + append("frac("); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); + append(")"); + if (alu.vector_clamp) { + append(")"); + } + append(";\n"); + AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data); + return 0; +} + +int D3D11ShaderTranslator::TranslateALU_TRUNCv(const instr_alu_t& alu) { + AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data); + append(" = "); + if (alu.vector_clamp) { + append("saturate("); + } + append("trunc("); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); + append(")"); + if (alu.vector_clamp) { + append(")"); + } + append(";\n"); + AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data); + return 0; +} + +int D3D11ShaderTranslator::TranslateALU_FLOORv(const instr_alu_t& alu) { + AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data); + append(" = "); + if (alu.vector_clamp) { + append("saturate("); + } + append("floor("); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); + append(")"); + if (alu.vector_clamp) { + append(")"); + } + append(";\n"); + AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data); + return 0; +} + +int D3D11ShaderTranslator::TranslateALU_MULADDv(const instr_alu_t& alu) { + AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data); + append(" = "); + if (alu.vector_clamp) { + append("saturate("); + } + append("mad("); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); + append(", "); + AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs); + append(", "); + AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs); + append(")"); + if (alu.vector_clamp) { + append(")"); + } + append(";\n"); + AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data); + return 0; +} + +int D3D11ShaderTranslator::TranslateALU_CNDXXv(const instr_alu_t& alu, const char* op) { + AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data); + append(" = "); + if (alu.vector_clamp) { + append("saturate("); + } + // TODO(benvanik): check argument order - could be 3 as compare and 1 and 2 as values. + append("float4(("); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); + append(").x %s 0.0 ? (", op); + AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs); + append(").x : ("); + AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs); + append(").x, ("); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); + append(").y %s 0.0 ? (", op); + AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs); + append(").y : ("); + AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs); + append(").y, ("); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); + append(").z %s 0.0 ? (", op); + AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs); + append(").z : ("); + AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs); + append(").z, ("); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); + append(").w %s 0.0 ? (", op); + AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs); + append(").w : ("); + AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs); + append(").w)"); + if (alu.vector_clamp) { + append(")"); + } + append(";\n"); + AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data); + return 0; +} +int D3D11ShaderTranslator::TranslateALU_CNDEv(const instr_alu_t& alu) { + return TranslateALU_CNDXXv(alu, "=="); +} +int D3D11ShaderTranslator::TranslateALU_CNDGTEv(const instr_alu_t& alu) { + return TranslateALU_CNDXXv(alu, ">="); +} +int D3D11ShaderTranslator::TranslateALU_CNDGTv(const instr_alu_t& alu) { + return TranslateALU_CNDXXv(alu, ">"); +} + +int D3D11ShaderTranslator::TranslateALU_DOT4v(const instr_alu_t& alu) { + AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data); + append(" = "); + if (alu.vector_clamp) { + append("saturate("); + } + append("dot("); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); + append(", "); + AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs); + append(")"); + if (alu.vector_clamp) { + append(")"); + } + append(";\n"); + AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data); + return 0; +} + +int D3D11ShaderTranslator::TranslateALU_DOT3v(const instr_alu_t& alu) { + AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data); + append(" = "); + if (alu.vector_clamp) { + append("saturate("); + } + append("dot(float4("); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); + append(").xyz, float4("); + AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs); + append(").xyz)"); + if (alu.vector_clamp) { + append(")"); + } + append(";\n"); + AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data); + return 0; +} + +int D3D11ShaderTranslator::TranslateALU_DOT2ADDv(const instr_alu_t& alu) { + AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data); + append(" = "); + if (alu.vector_clamp) { + append("saturate("); + } + append("dot(float4("); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); + append(").xy, float4("); + AppendSrcReg(alu.src2_reg, alu.src2_sel, alu.src2_swiz, alu.src2_reg_negate, alu.src2_reg_abs); + append(").xy) + "); + AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs); + append(".x"); + if (alu.vector_clamp) { + append(")"); + } + append(";\n"); + AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data); + return 0; +} + +// CUBEv + +int D3D11ShaderTranslator::TranslateALU_MAX4v(const instr_alu_t& alu) { + AppendDestReg(alu.vector_dest, alu.vector_write_mask, alu.export_data); + append(" = "); + if (alu.vector_clamp) { + append("saturate("); + } + append("max("); + append("max("); + append("max("); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); + append(".x, "); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); + append(".y), "); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); + append(".z), "); + AppendSrcReg(alu.src1_reg, alu.src1_sel, alu.src1_swiz, alu.src1_reg_negate, alu.src1_reg_abs); + append(".w)"); + if (alu.vector_clamp) { + append(")"); + } + append(";\n"); + AppendDestRegPost(alu.vector_dest, alu.vector_write_mask, alu.export_data); + return 0; +} + +// ... + +int D3D11ShaderTranslator::TranslateALU_MAXs(const instr_alu_t& alu) { + AppendDestReg(alu.scalar_dest, alu.scalar_write_mask, alu.export_data); + append(" = "); + if (alu.scalar_clamp) { + append("saturate("); + } + if ((alu.src3_swiz & 0x3) == (((alu.src3_swiz >> 2) + 1) & 0x3)) { + // This is a mov. + AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs); + } else { + append("max("); + AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs); + append(".x, "); + AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs); + append(".y).xxxx"); + } + if (alu.scalar_clamp) { + append(")"); + } + append(";\n"); + AppendDestRegPost(alu.scalar_dest, alu.scalar_write_mask, alu.export_data); + return 0; +} + +int D3D11ShaderTranslator::TranslateALU_MINs(const instr_alu_t& alu) { + AppendDestReg(alu.scalar_dest, alu.scalar_write_mask, alu.export_data); + append(" = "); + if (alu.scalar_clamp) { + append("saturate("); + } + append("min("); + AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs); + append(".x, "); + AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs); + append(".y).xxxx"); + if (alu.scalar_clamp) { + append(")"); + } + append(";\n"); + AppendDestRegPost(alu.scalar_dest, alu.scalar_write_mask, alu.export_data); + return 0; +} + +int D3D11ShaderTranslator::TranslateALU_SETXXs(const instr_alu_t& alu, const char* op) { + AppendDestReg(alu.scalar_dest, alu.scalar_write_mask, alu.export_data); + append(" = "); + if (alu.scalar_clamp) { + append("saturate("); + } + append("(("); + AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs); + append(".x %s 0.0) ? 1.0 : 0.0).xxxx", op); + if (alu.scalar_clamp) { + append(")"); + } + append(";\n"); + AppendDestRegPost(alu.scalar_dest, alu.scalar_write_mask, alu.export_data); + return 0; +} +int D3D11ShaderTranslator::TranslateALU_SETEs(const instr_alu_t& alu) { + return TranslateALU_SETXXs(alu, "=="); +} +int D3D11ShaderTranslator::TranslateALU_SETGTs(const instr_alu_t& alu) { + return TranslateALU_SETXXs(alu, ">"); +} +int D3D11ShaderTranslator::TranslateALU_SETGTEs(const instr_alu_t& alu) { + return TranslateALU_SETXXs(alu, ">="); +} +int D3D11ShaderTranslator::TranslateALU_SETNEs(const instr_alu_t& alu) { + return TranslateALU_SETXXs(alu, "!="); +} + +int D3D11ShaderTranslator::TranslateALU_RECIP_IEEE(const instr_alu_t& alu) { + AppendDestReg(alu.scalar_dest, alu.scalar_write_mask, alu.export_data); + append(" = "); + if (alu.scalar_clamp) { + append("saturate("); + } + append("(1.0 / "); + AppendSrcReg(alu.src3_reg, alu.src3_sel, alu.src3_swiz, alu.src3_reg_negate, alu.src3_reg_abs); + append(")"); + if (alu.scalar_clamp) { + append(")"); + } + append(";\n"); + AppendDestRegPost(alu.scalar_dest, alu.scalar_write_mask, alu.export_data); + return 0; +} + +int D3D11ShaderTranslator::TranslateALU_MUL_CONST_0(const instr_alu_t& alu) { + AppendDestReg(alu.scalar_dest, alu.scalar_write_mask, alu.export_data); + append(" = "); + if (alu.scalar_clamp) { + append("saturate("); + } + uint32_t src3_swiz = alu.src3_swiz & ~0x3C; + uint32_t swiz_a = ((src3_swiz >> 6) - 1) & 0x3; + uint32_t swiz_b = (src3_swiz & 0x3); + uint32_t reg2 = (alu.scalar_opc & 1) | (alu.src3_swiz & 0x3C) | (alu.src3_sel << 1); + append("("); + AppendSrcReg(alu.src3_reg, 0, 0, alu.src3_reg_negate, alu.src3_reg_abs); + append(".%c * ", chan_names[swiz_a]); + AppendSrcReg(reg2, 1, 0, alu.src3_reg_negate, alu.src3_reg_abs); + append(".%c", chan_names[swiz_b]); + append(").xxxx"); + if (alu.scalar_clamp) { + append(")"); + } + append(";\n"); + AppendDestRegPost(alu.scalar_dest, alu.scalar_write_mask, alu.export_data); + return 0; +} +int D3D11ShaderTranslator::TranslateALU_MUL_CONST_1(const instr_alu_t& alu) { + return TranslateALU_MUL_CONST_0(alu); +} + +int D3D11ShaderTranslator::TranslateALU_ADD_CONST_0(const instr_alu_t& alu) { + AppendDestReg(alu.scalar_dest, alu.scalar_write_mask, alu.export_data); + append(" = "); + if (alu.scalar_clamp) { + append("saturate("); + } + uint32_t src3_swiz = alu.src3_swiz & ~0x3C; + uint32_t swiz_a = ((src3_swiz >> 6) - 1) & 0x3; + uint32_t swiz_b = (src3_swiz & 0x3); + uint32_t reg2 = (alu.scalar_opc & 1) | (alu.src3_swiz & 0x3C) | (alu.src3_sel << 1); + append("("); + AppendSrcReg(alu.src3_reg, 0, 0, alu.src3_reg_negate, alu.src3_reg_abs); + append(".%c + ", chan_names[swiz_a]); + AppendSrcReg(reg2, 1, 0, alu.src3_reg_negate, alu.src3_reg_abs); + append(".%c", chan_names[swiz_b]); + append(").xxxx"); + if (alu.scalar_clamp) { + append(")"); + } + append(";\n"); + AppendDestRegPost(alu.scalar_dest, alu.scalar_write_mask, alu.export_data); + return 0; +} +int D3D11ShaderTranslator::TranslateALU_ADD_CONST_1(const instr_alu_t& alu) { + return TranslateALU_ADD_CONST_0(alu); +} + +int D3D11ShaderTranslator::TranslateALU_SUB_CONST_0(const instr_alu_t& alu) { + AppendDestReg(alu.scalar_dest, alu.scalar_write_mask, alu.export_data); + append(" = "); + if (alu.scalar_clamp) { + append("saturate("); + } + uint32_t src3_swiz = alu.src3_swiz & ~0x3C; + uint32_t swiz_a = ((src3_swiz >> 6) - 1) & 0x3; + uint32_t swiz_b = (src3_swiz & 0x3); + uint32_t reg2 = (alu.scalar_opc & 1) | (alu.src3_swiz & 0x3C) | (alu.src3_sel << 1); + append("("); + AppendSrcReg(alu.src3_reg, 0, 0, alu.src3_reg_negate, alu.src3_reg_abs); + append(".%c - ", chan_names[swiz_a]); + AppendSrcReg(reg2, 1, 0, alu.src3_reg_negate, alu.src3_reg_abs); + append(".%c", chan_names[swiz_b]); + append(").xxxx"); + if (alu.scalar_clamp) { + append(")"); + } + append(";\n"); + AppendDestRegPost(alu.scalar_dest, alu.scalar_write_mask, alu.export_data); + return 0; +} +int D3D11ShaderTranslator::TranslateALU_SUB_CONST_1(const instr_alu_t& alu) { + return TranslateALU_SUB_CONST_0(alu); +} + +namespace { + +typedef int (D3D11ShaderTranslator::*TranslateFn)(const instr_alu_t& alu); +typedef struct { + uint32_t num_srcs; + const char* name; + TranslateFn fn; +} TranslateInfo; +#define ALU_INSTR(opc, num_srcs) \ + { num_srcs, #opc, nullptr } +#define ALU_INSTR_IMPL(opc, num_srcs) \ + { num_srcs, #opc, &D3D11ShaderTranslator::TranslateALU_##opc } + +} // namespace + +int D3D11ShaderTranslator::TranslateALU(const instr_alu_t* alu, int sync) { + static TranslateInfo vector_alu_instrs[0x20] = { + ALU_INSTR_IMPL(ADDv, 2), // 0 + ALU_INSTR_IMPL(MULv, 2), // 1 + ALU_INSTR_IMPL(MAXv, 2), // 2 + ALU_INSTR_IMPL(MINv, 2), // 3 + ALU_INSTR_IMPL(SETEv, 2), // 4 + ALU_INSTR_IMPL(SETGTv, 2), // 5 + ALU_INSTR_IMPL(SETGTEv, 2), // 6 + ALU_INSTR_IMPL(SETNEv, 2), // 7 + ALU_INSTR_IMPL(FRACv, 1), // 8 + ALU_INSTR_IMPL(TRUNCv, 1), // 9 + ALU_INSTR_IMPL(FLOORv, 1), // 10 + ALU_INSTR_IMPL(MULADDv, 3), // 11 + ALU_INSTR_IMPL(CNDEv, 3), // 12 + ALU_INSTR_IMPL(CNDGTEv, 3), // 13 + ALU_INSTR_IMPL(CNDGTv, 3), // 14 + ALU_INSTR_IMPL(DOT4v, 2), // 15 + ALU_INSTR_IMPL(DOT3v, 2), // 16 + ALU_INSTR_IMPL(DOT2ADDv, 3), // 17 -- ??? + ALU_INSTR(CUBEv, 2), // 18 + ALU_INSTR_IMPL(MAX4v, 1), // 19 + ALU_INSTR(PRED_SETE_PUSHv, 2), // 20 + ALU_INSTR(PRED_SETNE_PUSHv, 2), // 21 + ALU_INSTR(PRED_SETGT_PUSHv, 2), // 22 + ALU_INSTR(PRED_SETGTE_PUSHv, 2), // 23 + ALU_INSTR(KILLEv, 2), // 24 + ALU_INSTR(KILLGTv, 2), // 25 + ALU_INSTR(KILLGTEv, 2), // 26 + ALU_INSTR(KILLNEv, 2), // 27 + ALU_INSTR(DSTv, 2), // 28 + ALU_INSTR(MOVAv, 1), // 29 + }; + static TranslateInfo scalar_alu_instrs[0x40] = { + ALU_INSTR(ADDs, 1), // 0 + ALU_INSTR(ADD_PREVs, 1), // 1 + ALU_INSTR(MULs, 1), // 2 + ALU_INSTR(MUL_PREVs, 1), // 3 + ALU_INSTR(MUL_PREV2s, 1), // 4 + ALU_INSTR_IMPL(MAXs, 1), // 5 + ALU_INSTR_IMPL(MINs, 1), // 6 + ALU_INSTR_IMPL(SETEs, 1), // 7 + ALU_INSTR_IMPL(SETGTs, 1), // 8 + ALU_INSTR_IMPL(SETGTEs, 1), // 9 + ALU_INSTR_IMPL(SETNEs, 1), // 10 + ALU_INSTR(FRACs, 1), // 11 + ALU_INSTR(TRUNCs, 1), // 12 + ALU_INSTR(FLOORs, 1), // 13 + ALU_INSTR(EXP_IEEE, 1), // 14 + ALU_INSTR(LOG_CLAMP, 1), // 15 + ALU_INSTR(LOG_IEEE, 1), // 16 + ALU_INSTR(RECIP_CLAMP, 1), // 17 + ALU_INSTR(RECIP_FF, 1), // 18 + ALU_INSTR_IMPL(RECIP_IEEE, 1), // 19 + ALU_INSTR(RECIPSQ_CLAMP, 1), // 20 + ALU_INSTR(RECIPSQ_FF, 1), // 21 + ALU_INSTR(RECIPSQ_IEEE, 1), // 22 + ALU_INSTR(MOVAs, 1), // 23 + ALU_INSTR(MOVA_FLOORs, 1), // 24 + ALU_INSTR(SUBs, 1), // 25 + ALU_INSTR(SUB_PREVs, 1), // 26 + ALU_INSTR(PRED_SETEs, 1), // 27 + ALU_INSTR(PRED_SETNEs, 1), // 28 + ALU_INSTR(PRED_SETGTs, 1), // 29 + ALU_INSTR(PRED_SETGTEs, 1), // 30 + ALU_INSTR(PRED_SET_INVs, 1), // 31 + ALU_INSTR(PRED_SET_POPs, 1), // 32 + ALU_INSTR(PRED_SET_CLRs, 1), // 33 + ALU_INSTR(PRED_SET_RESTOREs, 1), // 34 + ALU_INSTR(KILLEs, 1), // 35 + ALU_INSTR(KILLGTs, 1), // 36 + ALU_INSTR(KILLGTEs, 1), // 37 + ALU_INSTR(KILLNEs, 1), // 38 + ALU_INSTR(KILLONEs, 1), // 39 + ALU_INSTR(SQRT_IEEE, 1), // 40 + { 0, 0, false }, + ALU_INSTR_IMPL(MUL_CONST_0, 2), // 42 + ALU_INSTR_IMPL(MUL_CONST_1, 2), // 43 + ALU_INSTR_IMPL(ADD_CONST_0, 2), // 44 + ALU_INSTR_IMPL(ADD_CONST_1, 2), // 45 + ALU_INSTR_IMPL(SUB_CONST_0, 2), // 46 + ALU_INSTR_IMPL(SUB_CONST_1, 2), // 47 + ALU_INSTR(SIN, 1), // 48 + ALU_INSTR(COS, 1), // 49 + ALU_INSTR(RETAIN_PREV, 1), // 50 + }; +#undef ALU_INSTR +#undef ALU_INSTR_IMPL + + if (!alu->scalar_write_mask && !alu->vector_write_mask) { + append(" // \n"); + return 0; + } + + if (alu->vector_write_mask) { + // Disassemble vector op. + const auto& iv = vector_alu_instrs[alu->vector_opc]; + append(" // %sALU:\t", sync ? "(S)" : " "); + append("%s", iv.name); + if (alu->pred_select & 0x2) { + // seems to work similar to conditional execution in ARM instruction + // set, so let's use a similar syntax for now: + append((alu->pred_select & 0x1) ? "EQ" : "NE"); + } + append("\t"); + PrintDstReg(alu->vector_dest, alu->vector_write_mask, alu->export_data); + append(" = "); + if (iv.num_srcs == 3) { + PrintSrcReg(alu->src3_reg, alu->src3_sel, alu->src3_swiz, + alu->src3_reg_negate, alu->src3_reg_abs); + append(", "); + } + PrintSrcReg(alu->src1_reg, alu->src1_sel, alu->src1_swiz, + alu->src1_reg_negate, alu->src1_reg_abs); + if (iv.num_srcs > 1) { + append(", "); + PrintSrcReg(alu->src2_reg, alu->src2_sel, alu->src2_swiz, + alu->src2_reg_negate, alu->src2_reg_abs); + } + if (alu->vector_clamp) { + append(" CLAMP"); + } + if (alu->export_data) { + PrintExportComment(alu->vector_dest); + } + append("\n"); + + // Translate vector op. + if (iv.fn) { + append(" "); + if ((this->*iv.fn)(*alu)) { + return 1; + } + } else { + append(" // \n"); + } + } + + if (alu->scalar_write_mask || !alu->vector_write_mask) { + // 2nd optional scalar op: + + // Disassemble scalar op. + const auto& is = scalar_alu_instrs[alu->scalar_opc]; + append(" // "); + append("\t"); + if (is.name) { + append("\t \t%s\t", is.name); + } else { + append("\t \tOP(%u)\t", alu->scalar_opc); + } + PrintDstReg(alu->scalar_dest, alu->scalar_write_mask, alu->export_data); + append(" = "); + if (is.num_srcs == 2) { + // ADD_CONST_0 dest, [const], [reg] + uint32_t src3_swiz = alu->src3_swiz & ~0x3C; + uint32_t swiz_a = ((src3_swiz >> 6) - 1) & 0x3; + uint32_t swiz_b = (src3_swiz & 0x3); + PrintSrcReg(alu->src3_reg, 0, 0, + alu->src3_reg_negate, alu->src3_reg_abs); + append(".%c", chan_names[swiz_a]); + append(", "); + uint32_t reg2 = (alu->scalar_opc & 1) | (alu->src3_swiz & 0x3C) | (alu->src3_sel << 1); + PrintSrcReg(reg2, 1, 0, + alu->src3_reg_negate, alu->src3_reg_abs); + append(".%c", chan_names[swiz_b]); + } else { + PrintSrcReg(alu->src3_reg, alu->src3_sel, alu->src3_swiz, + alu->src3_reg_negate, alu->src3_reg_abs); + } + if (alu->scalar_clamp) { + append(" CLAMP"); + } + if (alu->export_data) { + PrintExportComment(alu->scalar_dest); + } + append("\n"); + + // Translate scalar op. + if (is.fn) { + append(" "); + if ((this->*is.fn)(*alu)) { + return 1; + } + } else { + append(" // \n"); + } + } + + return 0; +} + +void D3D11ShaderTranslator::PrintDestFecth(uint32_t dst_reg, + uint32_t dst_swiz) { + append("\tR%u.", dst_reg); + for (int i = 0; i < 4; i++) { + append("%c", chan_names[dst_swiz & 0x7]); + dst_swiz >>= 3; + } +} + +void D3D11ShaderTranslator::AppendFetchDest(uint32_t dst_reg, + uint32_t dst_swiz) { + append("r%u.", dst_reg); + for (int i = 0; i < 4; i++) { + append("%c", chan_names[dst_swiz & 0x7]); + dst_swiz >>= 3; + } +} + +int D3D11ShaderTranslator::GetFormatComponentCount(uint32_t format) { + switch (format) { + case FMT_32: + case FMT_32_FLOAT: + return 1; + case FMT_16_16: + case FMT_16_16_FLOAT: + case FMT_32_32: + case FMT_32_32_FLOAT: + return 2; + case FMT_10_11_11: + case FMT_11_11_10: + case FMT_32_32_32_FLOAT: + return 3; + case FMT_8_8_8_8: + case FMT_2_10_10_10: + case FMT_16_16_16_16: + case FMT_16_16_16_16_FLOAT: + case FMT_32_32_32_32: + case FMT_32_32_32_32_FLOAT: + return 4; + default: + XELOGE("Unknown vertex format: %d", format); + XEASSERTALWAYS(); + return 4; + } +} + +int D3D11ShaderTranslator::TranslateExec(const instr_cf_exec_t& cf) { + static const struct { + const char *name; + } cf_instructions[] = { + #define INSTR(opc, fxn) { #opc } + INSTR(NOP, print_cf_nop), + INSTR(EXEC, print_cf_exec), + INSTR(EXEC_END, print_cf_exec), + INSTR(COND_EXEC, print_cf_exec), + INSTR(COND_EXEC_END, print_cf_exec), + INSTR(COND_PRED_EXEC, print_cf_exec), + INSTR(COND_PRED_EXEC_END, print_cf_exec), + INSTR(LOOP_START, print_cf_loop), + INSTR(LOOP_END, print_cf_loop), + INSTR(COND_CALL, print_cf_jmp_call), + INSTR(RETURN, print_cf_jmp_call), + INSTR(COND_JMP, print_cf_jmp_call), + INSTR(ALLOC, print_cf_alloc), + INSTR(COND_EXEC_PRED_CLEAN, print_cf_exec), + INSTR(COND_EXEC_PRED_CLEAN_END, print_cf_exec), + INSTR(MARK_VS_FETCH_DONE, print_cf_nop), // ?? + #undef INSTR + }; + + append( + " // %s ADDR(0x%x) CNT(0x%x)", + cf_instructions[cf.opc].name, cf.address, cf.count); + if (cf.yeild) { + append(" YIELD"); + } + uint8_t vc = cf.vc_hi | (cf.vc_lo << 2); + if (vc) { + append(" VC(0x%x)", vc); + } + if (cf.bool_addr) { + append(" BOOL_ADDR(0x%x)", cf.bool_addr); + } + if (cf.address_mode == ABSOLUTE_ADDR) { + append(" ABSOLUTE_ADDR"); + } + if (cf.is_cond_exec()) { + append(" COND(%d)", cf.condition); + } + append("\n"); + + uint32_t sequence = cf.serialize; + for (uint32_t i = 0; i < cf.count; i++) { + uint32_t alu_off = (cf.address + i); + int sync = sequence & 0x2; + if (sequence & 0x1) { + const instr_fetch_t* fetch = + (const instr_fetch_t*)(dwords_ + alu_off * 3); + switch (fetch->opc) { + case VTX_FETCH: + if (TranslateVertexFetch(&fetch->vtx, sync)) { + return 1; + } + break; + case TEX_FETCH: + if (TranslateTextureFetch(&fetch->tex, sync)) { + return 1; + } + break; + case TEX_GET_BORDER_COLOR_FRAC: + case TEX_GET_COMP_TEX_LOD: + case TEX_GET_GRADIENTS: + case TEX_GET_WEIGHTS: + case TEX_SET_TEX_LOD: + case TEX_SET_GRADIENTS_H: + case TEX_SET_GRADIENTS_V: + default: + XEASSERTALWAYS(); + break; + } + } else { + const instr_alu_t* alu = + (const instr_alu_t*)(dwords_ + alu_off * 3); + if (TranslateALU(alu, sync)) { + return 1; + } + } + sequence >>= 2; + } + + return 0; +} + +int D3D11ShaderTranslator::TranslateVertexFetch(const instr_fetch_vtx_t* vtx, + int sync) { + static const struct { + const char *name; + } fetch_types[0xff] = { + #define TYPE(id) { #id } + TYPE(FMT_1_REVERSE), // 0 + {0}, + TYPE(FMT_8), // 2 + {0}, + {0}, + {0}, + TYPE(FMT_8_8_8_8), // 6 + TYPE(FMT_2_10_10_10), // 7 + {0}, + {0}, + TYPE(FMT_8_8), // 10 + {0}, + {0}, + {0}, + {0}, + {0}, + {0}, + {0}, + {0}, + {0}, + {0}, + {0}, + {0}, + {0}, + TYPE(FMT_16), // 24 + TYPE(FMT_16_16), // 25 + TYPE(FMT_16_16_16_16), // 26 + {0}, + {0}, + {0}, + {0}, + {0}, + {0}, + TYPE(FMT_32), // 33 + TYPE(FMT_32_32), // 34 + TYPE(FMT_32_32_32_32), // 35 + TYPE(FMT_32_FLOAT), // 36 + TYPE(FMT_32_32_FLOAT), // 37 + TYPE(FMT_32_32_32_32_FLOAT), // 38 + {0}, + {0}, + {0}, + {0}, + {0}, + {0}, + {0}, + {0}, + {0}, + {0}, + {0}, + {0}, + {0}, + {0}, + {0}, + {0}, + {0}, + {0}, + TYPE(FMT_32_32_32_FLOAT), // 57 + #undef TYPE + }; + + // Disassemble. + append(" // %sFETCH:\t", sync ? "(S)" : " "); + if (vtx->pred_select) { + append(vtx->pred_condition ? "EQ" : "NE"); + } + PrintDestFecth(vtx->dst_reg, vtx->dst_swiz); + append(" = R%u.", vtx->src_reg); + append("%c", chan_names[vtx->src_swiz & 0x3]); + if (fetch_types[vtx->format].name) { + append(" %s", fetch_types[vtx->format].name); + } else { + append(" TYPE(0x%x)", vtx->format); + } + append(" %s", vtx->format_comp_all ? "SIGNED" : "UNSIGNED"); + if (!vtx->num_format_all) { + append(" NORMALIZED"); + } + append(" STRIDE(%u)", vtx->stride); + if (vtx->offset) { + append(" OFFSET(%u)", vtx->offset); + } + append(" CONST(%u, %u)", vtx->const_index, vtx->const_index_sel); + if (1) { + // XXX + append(" src_reg_am=%u", vtx->src_reg_am); + append(" dst_reg_am=%u", vtx->dst_reg_am); + append(" num_format_all=%u", vtx->num_format_all); + append(" signed_rf_mode_all=%u", vtx->signed_rf_mode_all); + append(" exp_adjust_all=%u", vtx->exp_adjust_all); + } + append("\n"); + + // Translate. + append(" "); + append("r%u.xyzw", vtx->dst_reg); + append(" = float4("); + uint32_t fetch_slot = vtx->const_index * 3 + vtx->const_index_sel; + // TODO(benvanik): detect xyzw = xyzw, etc. + // TODO(benvanik): detect and set as rN = float4(samp.xyz, 1.0); / etc + uint32_t component_count = GetFormatComponentCount(vtx->format); + uint32_t dst_swiz = vtx->dst_swiz; + for (int i = 0; i < 4; i++) { + if ((dst_swiz & 0x7) == 4) { + append("0.0"); + } else if ((dst_swiz & 0x7) == 5) { + append("1.0"); + } else if ((dst_swiz & 0x7) == 6) { + // ? + append("?"); + } else if ((dst_swiz & 0x7) == 7) { + append("r%u.%c", vtx->dst_reg, chan_names[i]); + } else { + append("i.vf%u_%d.%c", + fetch_slot, vtx->offset, + chan_names[dst_swiz & 0x3]); + } + if (i < 3) { + append(", "); + } + dst_swiz >>= 3; + } + append(");\n"); + return 0; +} + +int D3D11ShaderTranslator::TranslateTextureFetch(const instr_fetch_tex_t* tex, + int sync) { + // Disassemble. + static const char *filter[] = { + "POINT", // TEX_FILTER_POINT + "LINEAR", // TEX_FILTER_LINEAR + "BASEMAP", // TEX_FILTER_BASEMAP + }; + static const char *aniso_filter[] = { + "DISABLED", // ANISO_FILTER_DISABLED + "MAX_1_1", // ANISO_FILTER_MAX_1_1 + "MAX_2_1", // ANISO_FILTER_MAX_2_1 + "MAX_4_1", // ANISO_FILTER_MAX_4_1 + "MAX_8_1", // ANISO_FILTER_MAX_8_1 + "MAX_16_1", // ANISO_FILTER_MAX_16_1 + }; + static const char *arbitrary_filter[] = { + "2x4_SYM", // ARBITRARY_FILTER_2X4_SYM + "2x4_ASYM", // ARBITRARY_FILTER_2X4_ASYM + "4x2_SYM", // ARBITRARY_FILTER_4X2_SYM + "4x2_ASYM", // ARBITRARY_FILTER_4X2_ASYM + "4x4_SYM", // ARBITRARY_FILTER_4X4_SYM + "4x4_ASYM", // ARBITRARY_FILTER_4X4_ASYM + }; + static const char *sample_loc[] = { + "CENTROID", // SAMPLE_CENTROID + "CENTER", // SAMPLE_CENTER + }; + uint32_t src_swiz = tex->src_swiz; + append(" // %sFETCH:\t", sync ? "(S)" : " "); + if (tex->pred_select) { + append(tex->pred_condition ? "EQ" : "NE"); + } + PrintDestFecth(tex->dst_reg, tex->dst_swiz); + append(" = R%u.", tex->src_reg); + for (int i = 0; i < 3; i++) { + append("%c", chan_names[src_swiz & 0x3]); + src_swiz >>= 2; + } + append(" CONST(%u)", tex->const_idx); + if (tex->fetch_valid_only) { + append(" VALID_ONLY"); + } + if (tex->tx_coord_denorm) { + append(" DENORM"); + } + if (tex->mag_filter != TEX_FILTER_USE_FETCH_CONST) { + append(" MAG(%s)", filter[tex->mag_filter]); + } + if (tex->min_filter != TEX_FILTER_USE_FETCH_CONST) { + append(" MIN(%s)", filter[tex->min_filter]); + } + if (tex->mip_filter != TEX_FILTER_USE_FETCH_CONST) { + append(" MIP(%s)", filter[tex->mip_filter]); + } + if (tex->aniso_filter != ANISO_FILTER_USE_FETCH_CONST) { + append(" ANISO(%s)", aniso_filter[tex->aniso_filter]); + } + if (tex->arbitrary_filter != ARBITRARY_FILTER_USE_FETCH_CONST) { + append(" ARBITRARY(%s)", arbitrary_filter[tex->arbitrary_filter]); + } + if (tex->vol_mag_filter != TEX_FILTER_USE_FETCH_CONST) { + append(" VOL_MAG(%s)", filter[tex->vol_mag_filter]); + } + if (tex->vol_min_filter != TEX_FILTER_USE_FETCH_CONST) { + append(" VOL_MIN(%s)", filter[tex->vol_min_filter]); + } + if (!tex->use_comp_lod) { + append(" LOD(%u)", tex->use_comp_lod); + append(" LOD_BIAS(%u)", tex->lod_bias); + } + if (tex->use_reg_lod) { + append(" REG_LOD(%u)", tex->use_reg_lod); + } + if (tex->use_reg_gradients) { + append(" USE_REG_GRADIENTS"); + } + append(" LOCATION(%s)", sample_loc[tex->sample_location]); + if (tex->offset_x || tex->offset_y || tex->offset_z) { + append(" OFFSET(%u,%u,%u)", tex->offset_x, tex->offset_y, tex->offset_z); + } + append("\n"); + + int src_component_count = 0; + switch (tex->dimension) { + case DIMENSION_1D: + src_component_count = 1; + break; + default: + case DIMENSION_2D: + src_component_count = 2; + break; + case DIMENSION_3D: + src_component_count = 3; + break; + case DIMENSION_CUBE: + src_component_count = 3; + break; + } + + // Translate. + append(" "); + append("r%u.xyzw", tex->dst_reg); + append(" = "); + append( + "x_texture_%d.Sample(x_sampler_%d, r%u.", + tex->const_idx, + tex_fetch_index_++, // hacky way to line up to tex buffers + tex->src_reg); + src_swiz = tex->src_swiz; + for (int i = 0; i < src_component_count; i++) { + append("%c", chan_names[src_swiz & 0x3]); + src_swiz >>= 2; + } + append(")."); + + // Pass one over dest does xyzw and fakes the special values. + // TODO(benvanik): detect and set as rN = float4(samp.xyz, 1.0); / etc + uint32_t dst_swiz = tex->dst_swiz; + for (int i = 0; i < 4; i++) { + append("%c", chan_names[dst_swiz & 0x3]); + dst_swiz >>= 3; + } + append(";\n"); + // Do another pass to set constant values. + dst_swiz = tex->dst_swiz; + for (int i = 0; i < 4; i++) { + if ((dst_swiz & 0x7) == 4) { + append(" r%u.%c = 0.0;\n", tex->dst_reg, chan_names[i]); + } else if ((dst_swiz & 0x7) == 5) { + append(" r%u.%c = 1.0;\n", tex->dst_reg, chan_names[i]); + } + dst_swiz >>= 3; + } + return 0; +} diff --git a/src/xenia/gpu/d3d11/d3d11_shader_translator.h b/src/xenia/gpu/d3d11/d3d11_shader_translator.h new file mode 100644 index 000000000..ad85c7775 --- /dev/null +++ b/src/xenia/gpu/d3d11/d3d11_shader_translator.h @@ -0,0 +1,125 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2014 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_D3D11_D3D11_SHADER_TRANSLATOR_H_ +#define XENIA_GPU_D3D11_D3D11_SHADER_TRANSLATOR_H_ + +#include +#include + +#include + + +namespace xe { +namespace gpu { +namespace d3d11 { + + +class D3D11ShaderTranslator { +public: + const static uint32_t kMaxInterpolators = 16; + + D3D11ShaderTranslator(); + + int TranslateVertexShader(VertexShaderResource* vertex_shader, + const xenos::xe_gpu_program_cntl_t& program_cntl); + int TranslatePixelShader( + PixelShaderResource* pixel_shader, + const xenos::xe_gpu_program_cntl_t& program_cntl, + const VertexShaderResource::AllocCounts& alloc_counts); + + const char* translated_src() const { return buffer_; } + +private: + xenos::XE_GPU_SHADER_TYPE type_; + uint32_t tex_fetch_index_; + const uint32_t* dwords_; + + static const int kCapacity = 64 * 1024; + char buffer_[kCapacity]; + size_t capacity_; + size_t offset_; + void append(const char* format, ...) { + va_list args; + va_start(args, format); + int len = xevsnprintfa(buffer_ + offset_, capacity_ - offset_, + format, args); + va_end(args); + offset_ += len; + buffer_[offset_] = 0; + } + + void AppendTextureHeader( + const ShaderResource::SamplerInputs& sampler_inputs); + + void AppendSrcReg(uint32_t num, uint32_t type, uint32_t swiz, uint32_t negate, + uint32_t abs); + void AppendDestRegName(uint32_t num, uint32_t dst_exp); + void AppendDestReg(uint32_t num, uint32_t mask, uint32_t dst_exp); + void AppendDestRegPost(uint32_t num, uint32_t mask, uint32_t dst_exp); + void PrintSrcReg(uint32_t num, uint32_t type, uint32_t swiz, uint32_t negate, + uint32_t abs); + void PrintDstReg(uint32_t num, uint32_t mask, uint32_t dst_exp); + void PrintExportComment(uint32_t num); + + int TranslateALU(const xenos::instr_alu_t* alu, int sync); + int TranslateALU_ADDv(const xenos::instr_alu_t& alu); + int TranslateALU_MULv(const xenos::instr_alu_t& alu); + int TranslateALU_MAXv(const xenos::instr_alu_t& alu); + int TranslateALU_MINv(const xenos::instr_alu_t& alu); + int TranslateALU_SETXXv(const xenos::instr_alu_t& alu, const char* op); + int TranslateALU_SETEv(const xenos::instr_alu_t& alu); + int TranslateALU_SETGTv(const xenos::instr_alu_t& alu); + int TranslateALU_SETGTEv(const xenos::instr_alu_t& alu); + int TranslateALU_SETNEv(const xenos::instr_alu_t& alu); + int TranslateALU_FRACv(const xenos::instr_alu_t& alu); + int TranslateALU_TRUNCv(const xenos::instr_alu_t& alu); + int TranslateALU_FLOORv(const xenos::instr_alu_t& alu); + int TranslateALU_MULADDv(const xenos::instr_alu_t& alu); + int TranslateALU_CNDXXv(const xenos::instr_alu_t& alu, const char* op); + int TranslateALU_CNDEv(const xenos::instr_alu_t& alu); + int TranslateALU_CNDGTEv(const xenos::instr_alu_t& alu); + int TranslateALU_CNDGTv(const xenos::instr_alu_t& alu); + int TranslateALU_DOT4v(const xenos::instr_alu_t& alu); + int TranslateALU_DOT3v(const xenos::instr_alu_t& alu); + int TranslateALU_DOT2ADDv(const xenos::instr_alu_t& alu); + // CUBEv + int TranslateALU_MAX4v(const xenos::instr_alu_t& alu); + // ... + int TranslateALU_MAXs(const xenos::instr_alu_t& alu); + int TranslateALU_MINs(const xenos::instr_alu_t& alu); + int TranslateALU_SETXXs(const xenos::instr_alu_t& alu, const char* op); + int TranslateALU_SETEs(const xenos::instr_alu_t& alu); + int TranslateALU_SETGTs(const xenos::instr_alu_t& alu); + int TranslateALU_SETGTEs(const xenos::instr_alu_t& alu); + int TranslateALU_SETNEs(const xenos::instr_alu_t& alu); + int TranslateALU_RECIP_IEEE(const xenos::instr_alu_t& alu); + int TranslateALU_MUL_CONST_0(const xenos::instr_alu_t& alu); + int TranslateALU_MUL_CONST_1(const xenos::instr_alu_t& alu); + int TranslateALU_ADD_CONST_0(const xenos::instr_alu_t& alu); + int TranslateALU_ADD_CONST_1(const xenos::instr_alu_t& alu); + int TranslateALU_SUB_CONST_0(const xenos::instr_alu_t& alu); + int TranslateALU_SUB_CONST_1(const xenos::instr_alu_t& alu); + + void PrintDestFecth(uint32_t dst_reg, uint32_t dst_swiz); + void AppendFetchDest(uint32_t dst_reg, uint32_t dst_swiz); + int GetFormatComponentCount(uint32_t format); + + int TranslateExec(const xenos::instr_cf_exec_t& cf); + int TranslateVertexFetch(const xenos::instr_fetch_vtx_t* vtx, int sync); + int TranslateTextureFetch(const xenos::instr_fetch_tex_t* tex, int sync); +}; + + +} // namespace d3d11 +} // namespace gpu +} // namespace xe + + +#endif // XENIA_GPU_D3D11_D3D11_SHADER_TRANSLATOR_H_ diff --git a/src/xenia/gpu/d3d11/d3d11_texture.cc b/src/xenia/gpu/d3d11/d3d11_texture.cc deleted file mode 100644 index 809a971ac..000000000 --- a/src/xenia/gpu/d3d11/d3d11_texture.cc +++ /dev/null @@ -1,264 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2014 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include - -#include -#include -#include -#include - - -using namespace xe; -using namespace xe::gpu; -using namespace xe::gpu::d3d11; -using namespace xe::gpu::xenos; - - -D3D11Texture::D3D11Texture(D3D11TextureCache* cache, uint32_t address, - const uint8_t* host_address) - : Texture(address, host_address), - cache_(cache) { -} - -D3D11Texture::~D3D11Texture() { -} - -TextureView* D3D11Texture::FetchNew( - const xenos::xe_gpu_texture_fetch_t& fetch) { - D3D11TextureView* view = new D3D11TextureView(); - if (!FillViewInfo(view, fetch)) { - return nullptr; - } - - D3D11_SHADER_RESOURCE_VIEW_DESC srv_desc; - xe_zero_struct(&srv_desc, sizeof(srv_desc)); - // TODO(benvanik): this may need to be typed on the fetch instruction (float/int/etc?) - srv_desc.Format = view->format; - - D3D_SRV_DIMENSION dimension = D3D11_SRV_DIMENSION_UNKNOWN; - switch (view->dimensions) { - case DIMENSION_1D: - srv_desc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE1D; - srv_desc.Texture1D.MipLevels = 1; - srv_desc.Texture1D.MostDetailedMip = 0; - if (!CreateTexture1D(view, fetch)) { - XELOGE("D3D11: failed to fetch Texture1D"); - return nullptr; - } - break; - case DIMENSION_2D: - srv_desc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D; - srv_desc.Texture2D.MipLevels = 1; - srv_desc.Texture2D.MostDetailedMip = 0; - if (!CreateTexture2D(view, fetch)) { - XELOGE("D3D11: failed to fetch Texture2D"); - return nullptr; - } - break; - case DIMENSION_3D: - srv_desc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE3D; - srv_desc.Texture3D.MipLevels = 1; - srv_desc.Texture3D.MostDetailedMip = 0; - if (!CreateTexture3D(view, fetch)) { - XELOGE("D3D11: failed to fetch Texture3D"); - return nullptr; - } - break; - case DIMENSION_CUBE: - srv_desc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURECUBE; - srv_desc.TextureCube.MipLevels = 1; - srv_desc.TextureCube.MostDetailedMip = 0; - if (!CreateTextureCube(view, fetch)) { - XELOGE("D3D11: failed to fetch TextureCube"); - return nullptr; - } - break; - } - - HRESULT hr = cache_->device()->CreateShaderResourceView( - view->resource, &srv_desc, &view->srv); - if (FAILED(hr)) { - XELOGE("D3D11: unable to create texture resource view"); - return nullptr; - } - - return view; -} - -bool D3D11Texture::FetchDirty( - TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch) { - auto d3d_view = static_cast(view); - switch (view->dimensions) { - case DIMENSION_1D: - return FetchTexture1D(d3d_view, fetch); - case DIMENSION_2D: - return FetchTexture2D(d3d_view, fetch); - case DIMENSION_3D: - return FetchTexture3D(d3d_view, fetch); - case DIMENSION_CUBE: - return FetchTextureCube(d3d_view, fetch); - } - return false; -} - -bool D3D11Texture::CreateTexture1D( - D3D11TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch) { - uint32_t width = 1 + fetch.size_1d.width; - - D3D11_TEXTURE1D_DESC texture_desc; - xe_zero_struct(&texture_desc, sizeof(texture_desc)); - texture_desc.Width = width; - texture_desc.MipLevels = 1; - texture_desc.ArraySize = 1; - texture_desc.Format = view->format; - texture_desc.Usage = D3D11_USAGE_DYNAMIC; - texture_desc.BindFlags = D3D11_BIND_SHADER_RESOURCE; - texture_desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; - texture_desc.MiscFlags = 0; // D3D11_RESOURCE_MISC_GENERATE_MIPS? - HRESULT hr = cache_->device()->CreateTexture1D( - &texture_desc, NULL, (ID3D11Texture1D**)&view->resource); - if (FAILED(hr)) { - return false; - } - - return FetchTexture1D(view, fetch); -} - -bool D3D11Texture::FetchTexture1D( - D3D11TextureView* view, const xe_gpu_texture_fetch_t& fetch) { - SCOPE_profile_cpu_f("gpu"); - - // TODO(benvanik): upload! - XELOGE("D3D11: FetchTexture1D not yet implemented"); - return false; -} - -bool D3D11Texture::CreateTexture2D( - D3D11TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch) { - XEASSERTTRUE(fetch.dimension == 1); - - D3D11_TEXTURE2D_DESC texture_desc; - xe_zero_struct(&texture_desc, sizeof(texture_desc)); - texture_desc.Width = view->sizes_2d.output_width; - texture_desc.Height = view->sizes_2d.output_height; - texture_desc.MipLevels = 1; - texture_desc.ArraySize = 1; - texture_desc.Format = view->format; - texture_desc.SampleDesc.Count = 1; - texture_desc.SampleDesc.Quality = 0; - texture_desc.Usage = D3D11_USAGE_DYNAMIC; - texture_desc.BindFlags = D3D11_BIND_SHADER_RESOURCE; - texture_desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; - texture_desc.MiscFlags = 0; // D3D11_RESOURCE_MISC_GENERATE_MIPS? - HRESULT hr = cache_->device()->CreateTexture2D( - &texture_desc, NULL, (ID3D11Texture2D**)&view->resource); - if (FAILED(hr)) { - return false; - } - - return FetchTexture2D(view, fetch); -} - -bool D3D11Texture::FetchTexture2D( - D3D11TextureView* view, const xe_gpu_texture_fetch_t& fetch) { - SCOPE_profile_cpu_f("gpu"); - - XEASSERTTRUE(fetch.dimension == 1); - - auto sizes = GetTextureSizes2D(view); - - // TODO(benvanik): all mip levels. - D3D11_MAPPED_SUBRESOURCE res; - HRESULT hr = cache_->context()->Map(view->resource, 0, - D3D11_MAP_WRITE_DISCARD, 0, &res); - if (FAILED(hr)) { - XELOGE("D3D11: failed to map texture"); - return false; - } - - const uint8_t* src = cache_->memory()->Translate(address_); - uint8_t* dest = (uint8_t*)res.pData; - - //memset(dest, 0, output_pitch * (output_height / view->block_size)); // TODO(gibbed): remove me later - - uint32_t output_pitch = res.RowPitch; // (output_width / info.block_size) * info.texel_pitch; - if (!fetch.tiled) { - dest = (uint8_t*)res.pData; - for (uint32_t y = 0; y < sizes.block_height; y++) { - for (uint32_t x = 0; x < sizes.logical_pitch; x += view->texel_pitch) { - TextureSwap(dest + x, src + x, view->texel_pitch, (XE_GPU_ENDIAN)fetch.endianness); - } - src += sizes.input_pitch; - dest += output_pitch; - } - } else { - auto bpp = (view->texel_pitch >> 2) + ((view->texel_pitch >> 1) >> (view->texel_pitch >> 2)); - for (uint32_t y = 0, output_base_offset = 0; - y < sizes.block_height; - y++, output_base_offset += output_pitch) { - auto input_base_offset = TiledOffset2DOuter(y, (sizes.input_width / view->block_size), bpp); - for (uint32_t x = 0, output_offset = output_base_offset; - x < sizes.block_width; - x++, output_offset += view->texel_pitch) { - auto input_offset = TiledOffset2DInner(x, y, bpp, input_base_offset) >> bpp; - TextureSwap(dest + output_offset, - src + input_offset * view->texel_pitch, - view->texel_pitch, (XE_GPU_ENDIAN)fetch.endianness); - } - } - } - cache_->context()->Unmap(view->resource, 0); - return true; -} - -bool D3D11Texture::CreateTexture3D( - D3D11TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch) { - XELOGE("D3D11: CreateTexture3D not yet implemented"); - XEASSERTALWAYS(); - return false; -} - -bool D3D11Texture::FetchTexture3D( - D3D11TextureView* view, const xe_gpu_texture_fetch_t& fetch) { - SCOPE_profile_cpu_f("gpu"); - - XELOGE("D3D11: FetchTexture3D not yet implemented"); - XEASSERTALWAYS(); - return false; - //D3D11_TEXTURE3D_DESC texture_desc; - //xe_zero_struct(&texture_desc, sizeof(texture_desc)); - //texture_desc.Width; - //texture_desc.Height; - //texture_desc.Depth; - //texture_desc.MipLevels; - //texture_desc.Format; - //texture_desc.Usage; - //texture_desc.BindFlags; - //texture_desc.CPUAccessFlags; - //texture_desc.MiscFlags; - //hr = device_->CreateTexture3D( - // &texture_desc, &initial_data, (ID3D11Texture3D**)&view->resource); -} - -bool D3D11Texture::CreateTextureCube( - D3D11TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch) { - XELOGE("D3D11: CreateTextureCube not yet implemented"); - XEASSERTALWAYS(); - return false; -} - -bool D3D11Texture::FetchTextureCube( - D3D11TextureView* view, const xe_gpu_texture_fetch_t& fetch) { - SCOPE_profile_cpu_f("gpu"); - - XELOGE("D3D11: FetchTextureCube not yet implemented"); - XEASSERTALWAYS(); - return false; -} diff --git a/src/xenia/gpu/d3d11/d3d11_texture.h b/src/xenia/gpu/d3d11/d3d11_texture.h deleted file mode 100644 index a8ee91662..000000000 --- a/src/xenia/gpu/d3d11/d3d11_texture.h +++ /dev/null @@ -1,78 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2014 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_GPU_D3D11_D3D11_TEXTURE_H_ -#define XENIA_GPU_D3D11_D3D11_TEXTURE_H_ - -#include - -#include - -#include - - -namespace xe { -namespace gpu { -namespace d3d11 { - -class D3D11TextureCache; - - -struct D3D11TextureView : TextureView { - ID3D11Resource* resource; - ID3D11ShaderResourceView* srv; - - D3D11TextureView() - : resource(nullptr), srv(nullptr) {} - virtual ~D3D11TextureView() { - XESAFERELEASE(srv); - XESAFERELEASE(resource); - } -}; - - -class D3D11Texture : public Texture { -public: - D3D11Texture(D3D11TextureCache* cache, uint32_t address, - const uint8_t* host_address); - virtual ~D3D11Texture(); - -protected: - TextureView* FetchNew( - const xenos::xe_gpu_texture_fetch_t& fetch) override; - bool FetchDirty( - TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch) override; - - bool CreateTexture1D( - D3D11TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch); - bool FetchTexture1D( - D3D11TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch); - bool CreateTexture2D( - D3D11TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch); - bool FetchTexture2D( - D3D11TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch); - bool CreateTexture3D( - D3D11TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch); - bool FetchTexture3D( - D3D11TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch); - bool CreateTextureCube( - D3D11TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch); - bool FetchTextureCube( - D3D11TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch); - - D3D11TextureCache* cache_; -}; - - -} // namespace d3d11 -} // namespace gpu -} // namespace xe - - -#endif // XENIA_GPU_D3D11_D3D11_TEXTURE_H_ diff --git a/src/xenia/gpu/d3d11/d3d11_texture_cache.h b/src/xenia/gpu/d3d11/d3d11_texture_cache.h deleted file mode 100644 index 63f275d02..000000000 --- a/src/xenia/gpu/d3d11/d3d11_texture_cache.h +++ /dev/null @@ -1,61 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2014 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_GPU_D3D11_D3D11_TEXTURE_CACHE_H_ -#define XENIA_GPU_D3D11_D3D11_TEXTURE_CACHE_H_ - -#include - -#include -#include -#include - -#include - - -namespace xe { -namespace gpu { -namespace d3d11 { - - -class D3D11TextureCache : public TextureCache { -public: - D3D11TextureCache(Memory* memory, - ID3D11DeviceContext* context, ID3D11Device* device); - virtual ~D3D11TextureCache(); - - ID3D11DeviceContext* context() const { return context_; } - ID3D11Device* device() const { return device_; } - - ID3D11SamplerState* GetSamplerState( - const xenos::xe_gpu_texture_fetch_t& fetch, - const Shader::tex_buffer_desc_t& desc); - -protected: - Texture* CreateTexture(uint32_t address, const uint8_t* host_address, - const xenos::xe_gpu_texture_fetch_t& fetch) override; - -private: - ID3D11DeviceContext* context_; - ID3D11Device* device_; - - struct CachedSamplerState { - D3D11_SAMPLER_DESC desc; - ID3D11SamplerState* state; - }; - std::unordered_multimap samplers_; -}; - - -} // namespace d3d11 -} // namespace gpu -} // namespace xe - - -#endif // XENIA_GPU_D3D11_D3D11_TEXTURE_CACHE_H_ diff --git a/src/xenia/gpu/d3d11/d3d11_texture_resource.cc b/src/xenia/gpu/d3d11/d3d11_texture_resource.cc new file mode 100644 index 000000000..a90c60b0d --- /dev/null +++ b/src/xenia/gpu/d3d11/d3d11_texture_resource.cc @@ -0,0 +1,219 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2014 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include + +#include +#include + + +using namespace xe; +using namespace xe::gpu; +using namespace xe::gpu::d3d11; +using namespace xe::gpu::xenos; + + +D3D11TextureResource::D3D11TextureResource( + D3D11ResourceCache* resource_cache, + const MemoryRange& memory_range, + const Info& info) + : TextureResource(memory_range, info), + resource_cache_(resource_cache), + texture_(nullptr), + handle_(nullptr) { +} + +D3D11TextureResource::~D3D11TextureResource() { + XESAFERELEASE(texture_); + XESAFERELEASE(handle_); +} + +int D3D11TextureResource::CreateHandle() { + SCOPE_profile_cpu_f("gpu"); + + D3D11_SHADER_RESOURCE_VIEW_DESC srv_desc; + xe_zero_struct(&srv_desc, sizeof(srv_desc)); + // TODO(benvanik): this may need to be typed on the fetch instruction (float/int/etc?) + srv_desc.Format = info_.format; + + D3D_SRV_DIMENSION dimension = D3D11_SRV_DIMENSION_UNKNOWN; + switch (info_.dimension) { + case TEXTURE_DIMENSION_1D: + srv_desc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE1D; + srv_desc.Texture1D.MipLevels = 1; + srv_desc.Texture1D.MostDetailedMip = 0; + if (CreateHandle1D()) { + XELOGE("D3D11: failed to create Texture1D"); + return 1; + } + break; + case TEXTURE_DIMENSION_2D: + srv_desc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D; + srv_desc.Texture2D.MipLevels = 1; + srv_desc.Texture2D.MostDetailedMip = 0; + if (CreateHandle2D()) { + XELOGE("D3D11: failed to create Texture2D"); + return 1; + } + break; + case TEXTURE_DIMENSION_3D: + srv_desc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE3D; + srv_desc.Texture3D.MipLevels = 1; + srv_desc.Texture3D.MostDetailedMip = 0; + if (CreateHandle3D()) { + XELOGE("D3D11: failed to create Texture3D"); + return 1; + } + break; + case TEXTURE_DIMENSION_CUBE: + srv_desc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURECUBE; + srv_desc.TextureCube.MipLevels = 1; + srv_desc.TextureCube.MostDetailedMip = 0; + if (CreateHandleCube()) { + XELOGE("D3D11: failed to create TextureCube"); + return 1; + } + break; + } + + HRESULT hr = resource_cache_->device()->CreateShaderResourceView( + texture_, &srv_desc, &handle_); + if (FAILED(hr)) { + XELOGE("D3D11: unable to create texture resource view"); + return 1; + } + return 0; +} + +int D3D11TextureResource::CreateHandle1D() { + uint32_t width = 1 + info_.size_1d.width; + + D3D11_TEXTURE1D_DESC texture_desc; + xe_zero_struct(&texture_desc, sizeof(texture_desc)); + texture_desc.Width = width; + texture_desc.MipLevels = 1; + texture_desc.ArraySize = 1; + texture_desc.Format = info_.format; + texture_desc.Usage = D3D11_USAGE_DYNAMIC; + texture_desc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + texture_desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + texture_desc.MiscFlags = 0; // D3D11_RESOURCE_MISC_GENERATE_MIPS? + HRESULT hr = resource_cache_->device()->CreateTexture1D( + &texture_desc, NULL, (ID3D11Texture1D**)&texture_); + if (FAILED(hr)) { + return 1; + } + return 0; +} + +int D3D11TextureResource::CreateHandle2D() { + D3D11_TEXTURE2D_DESC texture_desc; + xe_zero_struct(&texture_desc, sizeof(texture_desc)); + texture_desc.Width = info_.size_2d.output_width; + texture_desc.Height = info_.size_2d.output_height; + texture_desc.MipLevels = 1; + texture_desc.ArraySize = 1; + texture_desc.Format = info_.format; + texture_desc.SampleDesc.Count = 1; + texture_desc.SampleDesc.Quality = 0; + texture_desc.Usage = D3D11_USAGE_DYNAMIC; + texture_desc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + texture_desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + texture_desc.MiscFlags = 0; // D3D11_RESOURCE_MISC_GENERATE_MIPS? + HRESULT hr = resource_cache_->device()->CreateTexture2D( + &texture_desc, NULL, (ID3D11Texture2D**)&texture_); + if (FAILED(hr)) { + return 1; + } + return 0; +} + +int D3D11TextureResource::CreateHandle3D() { + XELOGE("D3D11: CreateTexture3D not yet implemented"); + XEASSERTALWAYS(); + return 1; +} + +int D3D11TextureResource::CreateHandleCube() { + XELOGE("D3D11: CreateTextureCube not yet implemented"); + XEASSERTALWAYS(); + return 1; +} + +int D3D11TextureResource::InvalidateRegion(const MemoryRange& memory_range) { + SCOPE_profile_cpu_f("gpu"); + + switch (info_.dimension) { + case TEXTURE_DIMENSION_1D: + return InvalidateRegion1D(memory_range); + case TEXTURE_DIMENSION_2D: + return InvalidateRegion2D(memory_range); + case TEXTURE_DIMENSION_3D: + return InvalidateRegion3D(memory_range); + case TEXTURE_DIMENSION_CUBE: + return InvalidateRegionCube(memory_range); + } + return 1; +} + +int D3D11TextureResource::InvalidateRegion1D(const MemoryRange& memory_range) { + return 1; +} + +int D3D11TextureResource::InvalidateRegion2D(const MemoryRange& memory_range) { + // TODO(benvanik): all mip levels. + D3D11_MAPPED_SUBRESOURCE res; + HRESULT hr = resource_cache_->context()->Map( + texture_, 0, D3D11_MAP_WRITE_DISCARD, 0, &res); + if (FAILED(hr)) { + XELOGE("D3D11: failed to map texture"); + return 1; + } + + const uint8_t* src = memory_range_.host_base; + uint8_t* dest = (uint8_t*)res.pData; + + uint32_t output_pitch = res.RowPitch; // (output_width / info.block_size) * info.texel_pitch; + if (!info_.is_tiled) { + dest = (uint8_t*)res.pData; + for (uint32_t y = 0; y < info_.size_2d.block_height; y++) { + for (uint32_t x = 0; x < info_.size_2d.logical_pitch; x += info_.texel_pitch) { + TextureSwap(dest + x, src + x, info_.texel_pitch); + } + src += info_.size_2d.input_pitch; + dest += output_pitch; + } + } else { + auto bpp = (info_.texel_pitch >> 2) + ((info_.texel_pitch >> 1) >> (info_.texel_pitch >> 2)); + for (uint32_t y = 0, output_base_offset = 0; + y < info_.size_2d.block_height; + y++, output_base_offset += output_pitch) { + auto input_base_offset = TiledOffset2DOuter(y, (info_.size_2d.input_width / info_.block_size), bpp); + for (uint32_t x = 0, output_offset = output_base_offset; + x < info_.size_2d.block_width; + x++, output_offset += info_.texel_pitch) { + auto input_offset = TiledOffset2DInner(x, y, bpp, input_base_offset) >> bpp; + TextureSwap(dest + output_offset, + src + input_offset * info_.texel_pitch, + info_.texel_pitch); + } + } + } + resource_cache_->context()->Unmap(texture_, 0); + return 0; +} + +int D3D11TextureResource::InvalidateRegion3D(const MemoryRange& memory_range) { + return 1; +} + +int D3D11TextureResource::InvalidateRegionCube( + const MemoryRange& memory_range) { + return 1; +} diff --git a/src/xenia/gpu/d3d11/d3d11_texture_resource.h b/src/xenia/gpu/d3d11/d3d11_texture_resource.h new file mode 100644 index 000000000..4e59662a4 --- /dev/null +++ b/src/xenia/gpu/d3d11/d3d11_texture_resource.h @@ -0,0 +1,60 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2014 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_D3D11_D3D11_TEXTURE_RESOURCE_H_ +#define XENIA_GPU_D3D11_D3D11_TEXTURE_RESOURCE_H_ + +#include +#include + +#include + + +namespace xe { +namespace gpu { +namespace d3d11 { + +class D3D11ResourceCache; + + +class D3D11TextureResource : public TextureResource { +public: + D3D11TextureResource(D3D11ResourceCache* resource_cache, + const MemoryRange& memory_range, + const Info& info); + ~D3D11TextureResource() override; + + void* handle() const override { return handle_; } + +protected: + int CreateHandle() override; + int CreateHandle1D(); + int CreateHandle2D(); + int CreateHandle3D(); + int CreateHandleCube(); + + int InvalidateRegion(const MemoryRange& memory_range) override; + int InvalidateRegion1D(const MemoryRange& memory_range); + int InvalidateRegion2D(const MemoryRange& memory_range); + int InvalidateRegion3D(const MemoryRange& memory_range); + int InvalidateRegionCube(const MemoryRange& memory_range); + +private: + D3D11ResourceCache* resource_cache_; + ID3D11Resource* texture_; + ID3D11ShaderResourceView* handle_; +}; + + +} // namespace d3d11 +} // namespace gpu +} // namespace xe + + +#endif // XENIA_GPU_D3D11_D3D11_TEXTURE_RESOURCE_H_ diff --git a/src/xenia/gpu/d3d11/sources.gypi b/src/xenia/gpu/d3d11/sources.gypi index 6dc7ae242..b6b6d76c1 100644 --- a/src/xenia/gpu/d3d11/sources.gypi +++ b/src/xenia/gpu/d3d11/sources.gypi @@ -1,10 +1,8 @@ # Copyright 2013 Ben Vanik. All Rights Reserved. { 'sources': [ - 'd3d11_buffer.cc', - 'd3d11_buffer.h', - 'd3d11_buffer_cache.cc', - 'd3d11_buffer_cache.h', + 'd3d11_buffer_resource.cc', + 'd3d11_buffer_resource.h', 'd3d11_geometry_shader.cc', 'd3d11_geometry_shader.h', 'd3d11_gpu-private.h', @@ -16,14 +14,16 @@ 'd3d11_graphics_system.h', 'd3d11_profiler_display.cc', 'd3d11_profiler_display.h', - 'd3d11_shader.cc', - 'd3d11_shader.h', - 'd3d11_shader_cache.cc', - 'd3d11_shader_cache.h', - 'd3d11_texture.cc', - 'd3d11_texture.h', - 'd3d11_texture_cache.cc', - 'd3d11_texture_cache.h', + 'd3d11_resource_cache.cc', + 'd3d11_resource_cache.h', + 'd3d11_sampler_state_resource.cc', + 'd3d11_sampler_state_resource.h', + 'd3d11_shader_resource.cc', + 'd3d11_shader_resource.h', + 'd3d11_shader_translator.cc', + 'd3d11_shader_translator.h', + 'd3d11_texture_resource.cc', + 'd3d11_texture_resource.h', 'd3d11_window.cc', 'd3d11_window.h', ], diff --git a/src/xenia/gpu/xenos/registers.cc b/src/xenia/gpu/draw_command.cc similarity index 56% rename from src/xenia/gpu/xenos/registers.cc rename to src/xenia/gpu/draw_command.cc index 5d4e99106..468c4ed08 100644 --- a/src/xenia/gpu/xenos/registers.cc +++ b/src/xenia/gpu/draw_command.cc @@ -1,27 +1,17 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2013 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include - - -using namespace xe; -using namespace xe::gpu; -using namespace xe::gpu::xenos; - - -const char* xe::gpu::xenos::GetRegisterName(uint32_t index) { - switch (index) { -#define XE_GPU_REGISTER(index, type, name) \ - case index: return #name; -#include -#undef XE_GPU_REGISTER - default: - return NULL; - } -} +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2014 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include + + +using namespace std; +using namespace xe; +using namespace xe::gpu; +using namespace xe::gpu::xenos; + diff --git a/src/xenia/gpu/draw_command.h b/src/xenia/gpu/draw_command.h new file mode 100644 index 000000000..ac5b07fe6 --- /dev/null +++ b/src/xenia/gpu/draw_command.h @@ -0,0 +1,78 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2014 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_DRAW_COMMAND_H_ +#define XENIA_GPU_DRAW_COMMAND_H_ + +#include +#include +#include +#include +#include +#include + + +namespace xe { +namespace gpu { + + +// TODO(benvanik): move more of the enums in here? +struct DrawCommand { + xenos::XE_GPU_PRIMITIVE_TYPE prim_type; + uint32_t start_index; + uint32_t index_count; + uint32_t base_vertex; + + VertexShaderResource* vertex_shader; + PixelShaderResource* pixel_shader; + + // TODO(benvanik): dirty tracking/max ranges/etc. + struct { + float* values; + size_t count; + } float4_constants; + struct { + uint32_t* values; + size_t count; + } loop_constants; + struct { + uint32_t* values; + size_t count; + } bool_constants; + + // Index buffer, if present. If index_count > 0 then auto draw. + IndexBufferResource* index_buffer; + + // Vertex buffers. + struct { + uint32_t input_index; + VertexBufferResource* buffer; + uint32_t stride; + uint32_t offset; + } vertex_buffers[96]; + size_t vertex_buffer_count; + + // Texture samplers. + struct SamplerInput { + uint32_t input_index; + TextureResource* texture; + SamplerStateResource* sampler_state; + }; + SamplerInput vertex_shader_samplers[32]; + size_t vertex_shader_sampler_count; + SamplerInput pixel_shader_samplers[32]; + size_t pixel_shader_sampler_count; +}; + + +} // namespace gpu +} // namespace xe + + +#endif // XENIA_GPU_DRAW_COMMAND_H_ diff --git a/src/xenia/gpu/graphics_driver.cc b/src/xenia/gpu/graphics_driver.cc index 65dddea49..e398839b8 100644 --- a/src/xenia/gpu/graphics_driver.cc +++ b/src/xenia/gpu/graphics_driver.cc @@ -12,12 +12,300 @@ using namespace xe; using namespace xe::gpu; +using namespace xe::gpu::xenos; GraphicsDriver::GraphicsDriver(Memory* memory) : - memory_(memory), address_translation_(0) { - memset(®ister_file_, 0, sizeof(register_file_)); + memory_(memory), address_translation_(0) { } GraphicsDriver::~GraphicsDriver() { } + +int GraphicsDriver::LoadShader(XE_GPU_SHADER_TYPE type, + uint32_t address, uint32_t length, + uint32_t start) { + MemoryRange memory_range( + memory_->Translate(address), + address, length); + + ShaderResource* shader = nullptr; + if (type == XE_GPU_SHADER_TYPE_VERTEX) { + VertexShaderResource::Info info; + shader = vertex_shader_ = resource_cache()->FetchVertexShader(memory_range, + info); + if (!vertex_shader_) { + XELOGE("Unable to fetch vertex shader"); + return 1; + } + } else { + PixelShaderResource::Info info; + shader = pixel_shader_ = resource_cache()->FetchPixelShader(memory_range, + info); + if (!pixel_shader_) { + XELOGE("Unable to fetch pixel shader"); + return 1; + } + } + + if (!shader->is_prepared()) { + // Disassemble. + const char* source = shader->disasm_src(); + XELOGGPU("Set shader %d at %0.8X (%db):\n%s", + type, address, length, + source ? source : ""); + } + + return 0; +} + +int GraphicsDriver::PrepareDraw(DrawCommand& command) { + SCOPE_profile_cpu_f("gpu"); + + // Ignore copies for now. + uint32_t enable_mode = register_file_[XE_GPU_REG_RB_MODECONTROL].u32 & 0x7; + if (enable_mode != 4) { + XELOGW("GPU: ignoring draw with enable mode %d", enable_mode); + return 1; + } + + // Reset the things we don't modify so that we have clean state. + command.prim_type = XE_GPU_PRIMITIVE_TYPE_POINT_LIST; + command.index_count = 0; + command.index_buffer = nullptr; + + // Generic stuff. + command.start_index = register_file_[XE_GPU_REG_VGT_INDX_OFFSET].u32; + command.base_vertex = 0; + + int ret; + ret = PopulateState(command); + if (ret) { + XELOGE("Unable to prepare draw state"); + return ret; + } + ret = PopulateConstantBuffers(command); + if (ret) { + XELOGE("Unable to prepare draw constant buffers"); + return ret; + } + ret = PopulateShaders(command); + if (ret) { + XELOGE("Unable to prepare draw shaders"); + return ret; + } + ret = PopulateInputAssembly(command); + if (ret) { + XELOGE("Unable to prepare draw input assembly"); + return ret; + } + ret = PopulateSamplers(command); + if (ret) { + XELOGE("Unable to prepare draw samplers"); + return ret; + } + return 0; +} + +int GraphicsDriver::PrepareDrawIndexBuffer( + DrawCommand& command, + uint32_t address, uint32_t length, + xenos::XE_GPU_ENDIAN endianness, + IndexFormat format) { + SCOPE_profile_cpu_f("gpu"); + + address += address_translation_; + MemoryRange memory_range(memory_->Translate(address), address, length); + + IndexBufferResource::Info info; + info.endianness = endianness; + info.format = format; + + command.index_buffer = + resource_cache()->FetchIndexBuffer(memory_range, info); + if (!command.index_buffer) { + return 1; + } + return 0; +} + +int GraphicsDriver::PopulateState(DrawCommand& command) { + return 0; +} + +int GraphicsDriver::PopulateConstantBuffers(DrawCommand& command) { + command.float4_constants.count = 512; + command.float4_constants.values = + ®ister_file_[XE_GPU_REG_SHADER_CONSTANT_000_X].f32; + command.loop_constants.count = 32; + command.loop_constants.values = + ®ister_file_[XE_GPU_REG_SHADER_CONSTANT_LOOP_00].u32; + command.bool_constants.count = 8; + command.bool_constants.values = + ®ister_file_[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].u32; + return 0; +} + +int GraphicsDriver::PopulateShaders(DrawCommand& command) { + SCOPE_profile_cpu_f("gpu"); + + if (!vertex_shader_) { + XELOGE("No vertex shader bound; ignoring"); + return 1; + } + if (!pixel_shader_) { + XELOGE("No pixel shader bound; ignoring"); + return 1; + } + + xe_gpu_program_cntl_t program_cntl; + program_cntl.dword_0 = register_file_[XE_GPU_REG_SQ_PROGRAM_CNTL].u32; + if (!vertex_shader_->is_prepared()) { + if (vertex_shader_->Prepare(program_cntl)) { + XELOGE("Unable to prepare vertex shader"); + return 1; + } + } + if (!pixel_shader_->is_prepared()) { + if (pixel_shader_->Prepare(program_cntl, vertex_shader_)) { + XELOGE("Unable to prepare pixel shader"); + return 1; + } + } + + command.vertex_shader = vertex_shader_; + command.pixel_shader = pixel_shader_; + + return 0; +} + +int GraphicsDriver::PopulateInputAssembly(DrawCommand& command) { + SCOPE_profile_cpu_f("gpu"); + + const auto& buffer_inputs = command.vertex_shader->buffer_inputs(); + command.vertex_buffer_count = buffer_inputs.count; + for (size_t n = 0; n < buffer_inputs.count; n++) { + const auto& desc = buffer_inputs.descs[n]; + + int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + (desc.fetch_slot / 3) * 6; + auto group = reinterpret_cast(®ister_file_.values[r]); + xe_gpu_vertex_fetch_t* fetch = nullptr; + switch (desc.fetch_slot % 3) { + case 0: + fetch = &group->vertex_fetch_0; + break; + case 1: + fetch = &group->vertex_fetch_1; + break; + case 2: + fetch = &group->vertex_fetch_2; + break; + } + XEASSERTNOTNULL(fetch); + // If this assert doesn't hold, maybe we just abort? + XEASSERT(fetch->type == 0x3); + XEASSERTNOTZERO(fetch->size); + + const auto& info = desc.info; + + MemoryRange memory_range; + memory_range.guest_base = (fetch->address << 2) + address_translation_; + memory_range.host_base = memory_->Translate(memory_range.guest_base); + memory_range.length = fetch->size * 4; + // TODO(benvanik): if the memory range is within the command buffer, we + // should use a cached transient buffer. + + auto buffer = resource_cache()->FetchVertexBuffer(memory_range, info); + if (!buffer) { + XELOGE("Unable to create vertex fetch buffer"); + return 1; + } + + command.vertex_buffers[n].input_index = desc.input_index; + command.vertex_buffers[n].buffer = buffer; + command.vertex_buffers[n].stride = desc.info.stride_words * 4; + command.vertex_buffers[n].offset = 0; + } + return 0; +} + +int GraphicsDriver::PopulateSamplers(DrawCommand& command) { + SCOPE_profile_cpu_f("gpu"); + + // Vertex texture samplers. + const auto& vertex_sampler_inputs = command.vertex_shader->sampler_inputs(); + command.vertex_shader_sampler_count = vertex_sampler_inputs.count; + for (size_t i = 0; i < command.vertex_shader_sampler_count; ++i) { + if (PopulateSamplerSet(vertex_sampler_inputs.descs[i], + command.vertex_shader_samplers[i])) { + return 1; + } + } + + // Pixel shader texture sampler. + const auto& pixel_sampler_inputs = command.pixel_shader->sampler_inputs(); + command.pixel_shader_sampler_count = pixel_sampler_inputs.count; + for (size_t i = 0; i < command.pixel_shader_sampler_count; ++i) { + if (PopulateSamplerSet(pixel_sampler_inputs.descs[i], + command.pixel_shader_samplers[i])) { + return 1; + } + } + + return 0; +} + +int GraphicsDriver::PopulateSamplerSet( + const ShaderResource::SamplerDesc& src_input, + DrawCommand::SamplerInput& dst_input) { + int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + src_input.fetch_slot * 6; + const auto group = (const xe_gpu_fetch_group_t*)®ister_file_.values[r]; + const xenos::xe_gpu_texture_fetch_t& fetch = group->texture_fetch; + if (fetch.type != 0x2) { + return 0; + } + + dst_input.input_index = src_input.input_index; + dst_input.texture = nullptr; + dst_input.sampler_state = nullptr; + + TextureResource::Info info; + if (!TextureResource::Info::Prepare(fetch, info)) { + XELOGE("D3D11: unable to parse texture fetcher info"); + return 0; // invalid texture used + } + if (info.format == DXGI_FORMAT_UNKNOWN) { + XELOGW("D3D11: unknown texture format %d", info.format); + return 0; // invalid texture used + } + + // TODO(benvanik): quick validate without refetching intraframe. + // Fetch texture from the cache. + MemoryRange memory_range; + memory_range.guest_base = (fetch.address << 12) + address_translation_; + memory_range.host_base = memory_->Translate(memory_range.guest_base); + memory_range.length = info.input_length; + + auto texture = resource_cache()->FetchTexture(memory_range, info); + if (!texture) { + XELOGW("D3D11: unable to fetch texture"); + return 0; // invalid texture used + } + + SamplerStateResource::Info sampler_info; + if (!SamplerStateResource::Info::Prepare(fetch, + src_input.tex_fetch, + sampler_info)) { + XELOGW("D3D11: unable to parse sampler info"); + return 0; // invalid texture used + } + auto sampler_state = resource_cache()->FetchSamplerState(sampler_info); + if (!sampler_state) { + XELOGW("D3D11: unable to fetch sampler"); + return 0; // invalid texture used + } + + dst_input.texture = texture; + dst_input.sampler_state = sampler_state; + return 0; +} diff --git a/src/xenia/gpu/graphics_driver.h b/src/xenia/gpu/graphics_driver.h index 675a5a7c2..23cb24972 100644 --- a/src/xenia/gpu/graphics_driver.h +++ b/src/xenia/gpu/graphics_driver.h @@ -11,7 +11,9 @@ #define XENIA_GPU_GRAPHICS_DRIVER_H_ #include -#include +#include +#include +#include #include @@ -24,38 +26,45 @@ public: virtual ~GraphicsDriver(); Memory* memory() const { return memory_; } - xenos::RegisterFile* register_file() { return ®ister_file_; }; + virtual ResourceCache* resource_cache() const = 0; + RegisterFile* register_file() { return ®ister_file_; }; void set_address_translation(uint32_t value) { address_translation_ = value; } - virtual void Initialize() = 0; + virtual int Initialize() = 0; - virtual void InvalidateState( - uint32_t mask) = 0; - virtual void SetShader( - xenos::XE_GPU_SHADER_TYPE type, - uint32_t address, - uint32_t start, - uint32_t length) = 0; - virtual void DrawIndexBuffer( - xenos::XE_GPU_PRIMITIVE_TYPE prim_type, - bool index_32bit, uint32_t index_count, - uint32_t index_base, uint32_t index_size, uint32_t endianness) = 0; - //virtual void DrawIndexImmediate(); - virtual void DrawIndexAuto( - xenos::XE_GPU_PRIMITIVE_TYPE prim_type, - uint32_t index_count) = 0; + int LoadShader(xenos::XE_GPU_SHADER_TYPE type, + uint32_t address, uint32_t length, + uint32_t start); + + int PrepareDraw(DrawCommand& command); + int PrepareDrawIndexBuffer(DrawCommand& command, + uint32_t address, uint32_t length, + xenos::XE_GPU_ENDIAN endianness, + IndexFormat format); + virtual int Draw(const DrawCommand& command) = 0; virtual int Resolve() = 0; +private: + int PopulateState(DrawCommand& command); + int PopulateConstantBuffers(DrawCommand& command); + int PopulateShaders(DrawCommand& command); + int PopulateInputAssembly(DrawCommand& command); + int PopulateSamplers(DrawCommand& command); + int PopulateSamplerSet(const ShaderResource::SamplerDesc& src_input, + DrawCommand::SamplerInput& dst_input); + protected: GraphicsDriver(Memory* memory); Memory* memory_; - - xenos::RegisterFile register_file_; + RegisterFile register_file_; uint32_t address_translation_; + + VertexShaderResource* vertex_shader_; + PixelShaderResource* pixel_shader_; }; diff --git a/src/xenia/gpu/graphics_system.cc b/src/xenia/gpu/graphics_system.cc index c0a614d35..be3e4e0de 100644 --- a/src/xenia/gpu/graphics_system.cc +++ b/src/xenia/gpu/graphics_system.cc @@ -11,9 +11,10 @@ #include #include +#include +#include #include -#include -#include +#include using namespace xe; @@ -24,10 +25,10 @@ using namespace xe::gpu::xenos; GraphicsSystem::GraphicsSystem(Emulator* emulator) : emulator_(emulator), memory_(emulator->memory()), - thread_(0), running_(false), driver_(0), worker_(0), + thread_(nullptr), running_(false), driver_(nullptr), + command_processor_(nullptr), interrupt_callback_(0), interrupt_callback_data_(0), - last_interrupt_time_(0), swap_pending_(false), - thread_wait_(NULL) { + last_interrupt_time_(0), swap_pending_(false), thread_wait_(nullptr) { // Create the run loop used for any windows/etc. // This must be done on the thread we create the driver. run_loop_ = xe_run_loop_create(); @@ -42,7 +43,7 @@ X_STATUS GraphicsSystem::Setup() { processor_ = emulator_->processor(); // Create worker. - worker_ = new RingBufferWorker(this, memory_); + command_processor_ = new CommandProcessor(this, memory_); // Let the processor know we want register access callbacks. emulator_->memory()->AddMappedRange( @@ -77,15 +78,18 @@ void GraphicsSystem::ThreadStart() { // Main run loop. while (running_) { // Peek main run loop. - if (xe_run_loop_pump(run_loop)) { - break; + { + SCOPE_profile_cpu_i("gpu", "GraphicsSystemRunLoopPump"); + if (xe_run_loop_pump(run_loop)) { + break; + } } if (!running_) { break; } // Pump worker. - worker_->Pump(); + command_processor_->Pump(); if (!running_) { break; @@ -107,7 +111,7 @@ void GraphicsSystem::Shutdown() { xe_thread_join(thread_); xe_thread_release(thread_); - delete worker_; + delete command_processor_; xe_run_loop_release(run_loop_); } @@ -125,17 +129,19 @@ void GraphicsSystem::InitializeRingBuffer(uint32_t ptr, uint32_t page_count) { Sleep(0); } XEASSERTNOTNULL(driver_); - worker_->Initialize(driver_, ptr, page_count); + command_processor_->Initialize(driver_, ptr, page_count); } void GraphicsSystem::EnableReadPointerWriteBack(uint32_t ptr, uint32_t block_size) { - worker_->EnableReadPointerWriteBack(ptr, block_size); + command_processor_->EnableReadPointerWriteBack(ptr, block_size); } uint64_t GraphicsSystem::ReadRegister(uint64_t addr) { uint32_t r = addr & 0xFFFF; - XELOGGPU("ReadRegister(%.4X)", r); + if (FLAGS_trace_ring_buffer) { + XELOGGPU("ReadRegister(%.4X)", r); + } RegisterFile* regs = driver_->register_file(); @@ -148,31 +154,33 @@ uint64_t GraphicsSystem::ReadRegister(uint64_t addr) { return 1; } - XEASSERT(r >= 0 && r < kXEGpuRegisterCount); + XEASSERT(r >= 0 && r < RegisterFile::kRegisterCount); return regs->values[r].u32; } void GraphicsSystem::WriteRegister(uint64_t addr, uint64_t value) { uint32_t r = addr & 0xFFFF; - XELOGGPU("WriteRegister(%.4X, %.8X)", r, value); + if (FLAGS_trace_ring_buffer) { + XELOGGPU("WriteRegister(%.4X, %.8X)", r, value); + } RegisterFile* regs = driver_->register_file(); switch (r) { case 0x0714: // CP_RB_WPTR - worker_->UpdateWritePointer((uint32_t)value); + command_processor_->UpdateWritePointer((uint32_t)value); break; default: XELOGW("Unknown GPU register %.4X write: %.8X", r, value); break; } - XEASSERT(r >= 0 && r < kXEGpuRegisterCount); + XEASSERT(r >= 0 && r < RegisterFile::kRegisterCount); regs->values[r].u32 = (uint32_t)value; } void GraphicsSystem::MarkVblank() { - worker_->increment_counter(); + command_processor_->increment_counter(); } void GraphicsSystem::DispatchInterruptCallback( diff --git a/src/xenia/gpu/graphics_system.h b/src/xenia/gpu/graphics_system.h index c7c72fea5..8c0a542c8 100644 --- a/src/xenia/gpu/graphics_system.h +++ b/src/xenia/gpu/graphics_system.h @@ -21,8 +21,8 @@ XEDECLARECLASS2(xe, cpu, Processor); namespace xe { namespace gpu { +class CommandProcessor; class GraphicsDriver; -class RingBufferWorker; class GraphicsSystem { @@ -78,7 +78,7 @@ protected: bool running_; GraphicsDriver* driver_; - RingBufferWorker* worker_; + CommandProcessor* command_processor_; uint32_t interrupt_callback_; uint32_t interrupt_callback_data_; diff --git a/src/xenia/gpu/nop/nop_graphics_driver.cc b/src/xenia/gpu/nop/nop_graphics_driver.cc index 69f88fa95..b710b85e4 100644 --- a/src/xenia/gpu/nop/nop_graphics_driver.cc +++ b/src/xenia/gpu/nop/nop_graphics_driver.cc @@ -10,7 +10,6 @@ #include #include -#include using namespace xe; @@ -19,69 +18,19 @@ using namespace xe::gpu::nop; using namespace xe::gpu::xenos; -NopGraphicsDriver::NopGraphicsDriver(Memory* memory) : - GraphicsDriver(memory) { - shader_cache_ = new ShaderCache(); +NopGraphicsDriver::NopGraphicsDriver(Memory* memory) + : GraphicsDriver(memory), resource_cache_(nullptr) { } NopGraphicsDriver::~NopGraphicsDriver() { - delete shader_cache_; } -void NopGraphicsDriver::Initialize() { +int NopGraphicsDriver::Initialize() { + return 0; } -void NopGraphicsDriver::InvalidateState( - uint32_t mask) { - if (mask == XE_GPU_INVALIDATE_MASK_ALL) { - XELOGGPU("NOP: (invalidate all)"); - } - if (mask & XE_GPU_INVALIDATE_MASK_VERTEX_SHADER) { - XELOGGPU("NOP: invalidate vertex shader"); - } - if (mask & XE_GPU_INVALIDATE_MASK_PIXEL_SHADER) { - XELOGGPU("NOP: invalidate pixel shader"); - } -} - -void NopGraphicsDriver::SetShader( - XE_GPU_SHADER_TYPE type, - uint32_t address, - uint32_t start, - uint32_t length) { - // Find or create shader in the cache. - uint8_t* p = memory_->Translate(address); - Shader* shader = shader_cache_->FindOrCreate( - type, p, length); - - // Disassemble. - const char* source = shader->disasm_src(); - if (!source) { - source = ""; - } - XELOGGPU("NOP: set shader %d at %0.8X (%db):\n%s", - type, address, length, source); -} - -void NopGraphicsDriver::DrawIndexBuffer( - XE_GPU_PRIMITIVE_TYPE prim_type, - bool index_32bit, uint32_t index_count, - uint32_t index_base, uint32_t index_size, uint32_t endianness) { - XELOGGPU("NOP: draw index buffer"); -} - -void NopGraphicsDriver::DrawIndexAuto( - XE_GPU_PRIMITIVE_TYPE prim_type, - uint32_t index_count) { - XELOGGPU("NOP: draw indexed %d (%d indicies)", - prim_type, index_count); - - // TODO(benvanik): - // program control - // context misc - // interpolator control - // shader constants / bools / integers - // fetch constants +int NopGraphicsDriver::Draw(const DrawCommand& command) { + return 0; } int NopGraphicsDriver::Resolve() { diff --git a/src/xenia/gpu/nop/nop_graphics_driver.h b/src/xenia/gpu/nop/nop_graphics_driver.h index d345c8159..9463a0cd5 100644 --- a/src/xenia/gpu/nop/nop_graphics_driver.h +++ b/src/xenia/gpu/nop/nop_graphics_driver.h @@ -19,9 +19,6 @@ namespace xe { namespace gpu { - -class ShaderCache; - namespace nop { @@ -30,27 +27,16 @@ public: NopGraphicsDriver(Memory* memory); virtual ~NopGraphicsDriver(); - virtual void Initialize(); + ResourceCache* resource_cache() const override { return resource_cache_; } - virtual void InvalidateState( - uint32_t mask); - virtual void SetShader( - xenos::XE_GPU_SHADER_TYPE type, - uint32_t address, - uint32_t start, - uint32_t length); - virtual void DrawIndexBuffer( - xenos::XE_GPU_PRIMITIVE_TYPE prim_type, - bool index_32bit, uint32_t index_count, - uint32_t index_base, uint32_t index_size, uint32_t endianness); - virtual void DrawIndexAuto( - xenos::XE_GPU_PRIMITIVE_TYPE prim_type, - uint32_t index_count); + int Initialize() override; - virtual int Resolve(); + int Draw(const DrawCommand& command) override; + + int Resolve() override; protected: - ShaderCache* shader_cache_; + ResourceCache* resource_cache_; }; diff --git a/src/xenia/gpu/register_file.cc b/src/xenia/gpu/register_file.cc index f6f119376..288881d58 100644 --- a/src/xenia/gpu/register_file.cc +++ b/src/xenia/gpu/register_file.cc @@ -10,8 +10,21 @@ #include -using namespace std; using namespace xe; using namespace xe::gpu; -using namespace xe::gpu::xenos; + +RegisterFile::RegisterFile() { + xe_zero_struct(values, sizeof(values)); +} + +const char* RegisterFile::GetRegisterName(uint32_t index) { + switch (index) { +#define XE_GPU_REGISTER(index, type, name) \ + case index: return #name; +#include +#undef XE_GPU_REGISTER + default: + return NULL; + } +} diff --git a/src/xenia/gpu/register_file.h b/src/xenia/gpu/register_file.h index 2a530995f..3ab23b4fa 100644 --- a/src/xenia/gpu/register_file.h +++ b/src/xenia/gpu/register_file.h @@ -11,15 +11,36 @@ #define XENIA_GPU_REGISTER_FILE_H_ #include -#include namespace xe { namespace gpu { +enum Register { +#define XE_GPU_REGISTER(index, type, name) \ + XE_GPU_REG_##name = index, +#include +#undef XE_GPU_REGISTER +}; + + class RegisterFile { public: + RegisterFile(); + + const char* GetRegisterName(uint32_t index); + + static const size_t kRegisterCount = 0x5003; + union RegisterValue { + uint32_t u32; + float f32; + }; + RegisterValue values[kRegisterCount]; + + RegisterValue& operator[](Register reg) { + return values[reg]; + } }; diff --git a/src/xenia/gpu/resource.cc b/src/xenia/gpu/resource.cc index 88966aac5..35ef82bb6 100644 --- a/src/xenia/gpu/resource.cc +++ b/src/xenia/gpu/resource.cc @@ -15,3 +15,23 @@ using namespace xe; using namespace xe::gpu; using namespace xe::gpu::xenos; + +HashedResource::HashedResource(const MemoryRange& memory_range) + : memory_range_(memory_range) { +} + +HashedResource::~HashedResource() = default; + +PagedResource::PagedResource(const MemoryRange& memory_range) + : memory_range_(memory_range), dirtied_(true) { +} + +PagedResource::~PagedResource() = default; + +void PagedResource::MarkDirty(uint32_t lo_address, uint32_t hi_address) { + dirtied_ = true; +} + +StaticResource::StaticResource() = default; + +StaticResource::~StaticResource() = default; diff --git a/src/xenia/gpu/resource.h b/src/xenia/gpu/resource.h index e9a0be7fa..1fb56b3d8 100644 --- a/src/xenia/gpu/resource.h +++ b/src/xenia/gpu/resource.h @@ -18,8 +18,82 @@ namespace xe { namespace gpu { +struct MemoryRange { + uint8_t* host_base; + uint32_t guest_base; + uint32_t length; + + MemoryRange() : host_base(nullptr), guest_base(0), length(0) {} + MemoryRange(const MemoryRange& other) + : host_base(other.host_base), guest_base(other.guest_base), + length(other.length) {} + MemoryRange(uint8_t* _host_base, uint32_t _guest_base, uint32_t _length) + : host_base(_host_base), guest_base(_guest_base), length(_length) {} +}; + + class Resource { public: + virtual ~Resource() = default; + + virtual void* handle() const = 0; + + template + T* handle_as() { + return reinterpret_cast(handle()); + } + +protected: + Resource() = default; + + // last use/LRU stuff +}; + + +class HashedResource : public Resource { +public: + ~HashedResource() override; + + const MemoryRange& memory_range() const { return memory_range_; } + +protected: + HashedResource(const MemoryRange& memory_range); + + MemoryRange memory_range_; + // key +}; + + +class PagedResource : public Resource { +public: + ~PagedResource() override; + + const MemoryRange& memory_range() const { return memory_range_; } + + template + bool Equals(const T& info) { + return Equals(&info, sizeof(info)); + } + virtual bool Equals(const void* info_ptr, size_t info_length) = 0; + + bool is_dirty() const { return dirtied_; } + void MarkDirty(uint32_t lo_address, uint32_t hi_address); + +protected: + PagedResource(const MemoryRange& memory_range); + + MemoryRange memory_range_; + bool dirtied_; + // dirtied pages list +}; + + +class StaticResource : public Resource { +public: + ~StaticResource() override; + +protected: + StaticResource(); }; diff --git a/src/xenia/gpu/resource_cache.cc b/src/xenia/gpu/resource_cache.cc index 7a9a1c24d..c317a12be 100644 --- a/src/xenia/gpu/resource_cache.cc +++ b/src/xenia/gpu/resource_cache.cc @@ -15,3 +15,140 @@ using namespace xe; using namespace xe::gpu; using namespace xe::gpu::xenos; + +ResourceCache::ResourceCache(Memory* memory) + : memory_(memory) { +} + +ResourceCache::~ResourceCache() { + for (auto it = resources_.begin(); it != resources_.end(); ++it) { + Resource* resource = *it; + delete resource; + } + resources_.clear(); +} + +VertexShaderResource* ResourceCache::FetchVertexShader( + const MemoryRange& memory_range, + const VertexShaderResource::Info& info) { + return FetchHashedResource( + memory_range, info, &ResourceCache::CreateVertexShader); +} + +PixelShaderResource* ResourceCache::FetchPixelShader( + const MemoryRange& memory_range, + const PixelShaderResource::Info& info) { + return FetchHashedResource( + memory_range, info, &ResourceCache::CreatePixelShader); +} + +TextureResource* ResourceCache::FetchTexture( + const MemoryRange& memory_range, + const TextureResource::Info& info) { + auto resource = FetchPagedResource( + memory_range, info, &ResourceCache::CreateTexture); + if (!resource) { + return nullptr; + } + if (resource->Prepare()) { + XELOGE("Unable to prepare texture"); + return nullptr; + } + return resource; +} + +SamplerStateResource* ResourceCache::FetchSamplerState( + const SamplerStateResource::Info& info) { + auto key = info.hash(); + auto it = static_resources_.find(key); + if (it != static_resources_.end()) { + return static_cast(it->second); + } + auto resource = CreateSamplerState(info); + if (resource->Prepare()) { + XELOGE("Unable to prepare sampler state"); + return nullptr; + } + static_resources_.insert({ key, resource }); + resources_.push_back(resource); + return resource; +} + +IndexBufferResource* ResourceCache::FetchIndexBuffer( + const MemoryRange& memory_range, + const IndexBufferResource::Info& info) { + auto resource = FetchPagedResource( + memory_range, info, &ResourceCache::CreateIndexBuffer); + if (!resource) { + return nullptr; + } + if (resource->Prepare()) { + XELOGE("Unable to prepare index buffer"); + return nullptr; + } + return resource; +} + +VertexBufferResource* ResourceCache::FetchVertexBuffer( + const MemoryRange& memory_range, + const VertexBufferResource::Info& info) { + auto resource = FetchPagedResource( + memory_range, info, &ResourceCache::CreateVertexBuffer); + if (!resource) { + return nullptr; + } + if (resource->Prepare()) { + XELOGE("Unable to prepare vertex buffer"); + return nullptr; + } + return resource; +} + +uint64_t ResourceCache::HashRange(const MemoryRange& memory_range) { + // We could do something smarter here to potentially early exit. + return xe_hash64(memory_range.host_base, memory_range.length); +} + +void ResourceCache::SyncRange(uint32_t address, int length) { + // Scan the page table in sync with our resource list. This means + // we have O(n) complexity for updates, though we could definitely + // make this faster/cleaner. + // TODO(benvanik): actually do this right. + // For now we assume the page table in the range of our resources + // will not be changing, which allows us to do a foreach(res) and reload + // and then clear the table. + + // total bytes = (512 * 1024 * 1024) / (16 * 1024) = 32768 + // each byte = 1 page + // Walk as qwords so we can clear things up faster. + uint64_t* page_table = reinterpret_cast( + memory_->Translate(memory_->page_table())); + int page_size = 16 * 1024; // 16KB pages + + uint32_t lo_address = address % 0x20000000; + uint32_t hi_address = lo_address + length; + hi_address = (hi_address / page_size) * page_size + page_size; + int start_page = lo_address / page_size; + int end_page = hi_address / page_size; + + auto it = paged_resources_.upper_bound(lo_address); + auto end_it = paged_resources_.lower_bound(hi_address); + while (it != end_it) { + const auto& memory_range = it->second->memory_range(); + int lo_page = (memory_range.guest_base % 0x20000000) / page_size; + int hi_page = lo_page + (memory_range.length / page_size); + for (int i = lo_page / 8; i <= hi_page / 8; ++i) { + uint64_t page_flags = page_table[i]; + if (page_flags) { + // Dirty! + it->second->MarkDirty(i * 8 * page_size, (i * 8 + 7) * page_size); + } + } + ++it; + } + + // Reset page table. + for (auto i = start_page / 8; i <= end_page / 8; ++i) { + page_table[i] = 0; + } +} diff --git a/src/xenia/gpu/resource_cache.h b/src/xenia/gpu/resource_cache.h index 7caaad51f..be95f0861 100644 --- a/src/xenia/gpu/resource_cache.h +++ b/src/xenia/gpu/resource_cache.h @@ -10,7 +10,14 @@ #ifndef XENIA_GPU_RESOURCE_CACHE_H_ #define XENIA_GPU_RESOURCE_CACHE_H_ +#include + #include +#include +#include +#include +#include +#include #include @@ -20,6 +27,96 @@ namespace gpu { class ResourceCache { public: + virtual ~ResourceCache(); + + VertexShaderResource* FetchVertexShader( + const MemoryRange& memory_range, + const VertexShaderResource::Info& info); + PixelShaderResource* FetchPixelShader( + const MemoryRange& memory_range, + const PixelShaderResource::Info& info); + + TextureResource* FetchTexture( + const MemoryRange& memory_range, + const TextureResource::Info& info); + SamplerStateResource* FetchSamplerState( + const SamplerStateResource::Info& info); + + IndexBufferResource* FetchIndexBuffer( + const MemoryRange& memory_range, + const IndexBufferResource::Info& info); + VertexBufferResource* FetchVertexBuffer( + const MemoryRange& memory_range, + const VertexBufferResource::Info& info); + + uint64_t HashRange(const MemoryRange& memory_range); + + void SyncRange(uint32_t address, int length); + +protected: + ResourceCache(Memory* memory); + + template + T* FetchHashedResource(const MemoryRange& memory_range, + const typename T::Info& info, + const V& factory) { + // TODO(benvanik): if there's no way it's changed and it's been checked, + // just lookup. This way we don't rehash 100x a frame. + auto key = HashRange(memory_range); + auto it = hashed_resources_.find(key); + if (it != hashed_resources_.end()) { + return static_cast(it->second); + } + auto resource = (this->*factory)(memory_range, info); + hashed_resources_.insert({ key, resource }); + resources_.push_back(resource); + return resource; + } + + template + T* FetchPagedResource(const MemoryRange& memory_range, + const typename T::Info& info, + const V& factory) { + uint32_t lo_address = memory_range.guest_base % 0x20000000; + auto key = uint64_t(lo_address); + auto range = paged_resources_.equal_range(key); + for (auto it = range.first; it != range.second; ++it) { + if (it->second->memory_range().length == memory_range.length && + it->second->Equals(info)) { + return static_cast(it->second); + } + } + auto resource = (this->*factory)(memory_range, info); + paged_resources_.insert({ key, resource }); + resources_.push_back(resource); + return resource; + } + + virtual VertexShaderResource* CreateVertexShader( + const MemoryRange& memory_range, + const VertexShaderResource::Info& info) = 0; + virtual PixelShaderResource* CreatePixelShader( + const MemoryRange& memory_range, + const PixelShaderResource::Info& info) = 0; + virtual TextureResource* CreateTexture( + const MemoryRange& memory_range, + const TextureResource::Info& info) = 0; + virtual SamplerStateResource* CreateSamplerState( + const SamplerStateResource::Info& info) = 0; + virtual IndexBufferResource* CreateIndexBuffer( + const MemoryRange& memory_range, + const IndexBufferResource::Info& info) = 0; + virtual VertexBufferResource* CreateVertexBuffer( + const MemoryRange& memory_range, + const VertexBufferResource::Info& info) = 0; + +private: + Memory* memory_; + + std::vector resources_; + std::unordered_map hashed_resources_; + std::unordered_map static_resources_; + std::multimap paged_resources_; }; diff --git a/src/xenia/gpu/ring_buffer_worker.cc b/src/xenia/gpu/ring_buffer_worker.cc deleted file mode 100644 index 9999601bb..000000000 --- a/src/xenia/gpu/ring_buffer_worker.cc +++ /dev/null @@ -1,741 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2013 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include - -#include -#include -#include -#include -#include - - -using namespace xe; -using namespace xe::gpu; -using namespace xe::gpu::xenos; - - -#define XETRACERB(fmt, ...) if (FLAGS_trace_ring_buffer) XELOGGPU(fmt, ##__VA_ARGS__) - - -RingBufferWorker::RingBufferWorker( - GraphicsSystem* graphics_system, Memory* memory) : - graphics_system_(graphics_system), memory_(memory), driver_(0) { - write_ptr_index_event_ = CreateEvent( - NULL, FALSE, FALSE, NULL); - - primary_buffer_ptr_ = 0; - primary_buffer_size_ = 0; - read_ptr_index_ = 0; - read_ptr_update_freq_ = 0; - read_ptr_writeback_ptr_ = 0; - write_ptr_index_ = 0; - write_ptr_max_index_ = 0; - - LARGE_INTEGER perf_counter; - QueryPerformanceCounter(&perf_counter); - time_base_ = perf_counter.QuadPart; - counter_ = 0; -} - -RingBufferWorker::~RingBufferWorker() { - SetEvent(write_ptr_index_event_); - CloseHandle(write_ptr_index_event_); -} - -uint64_t RingBufferWorker::QueryTime() { - LARGE_INTEGER perf_counter; - QueryPerformanceCounter(&perf_counter); - return perf_counter.QuadPart - time_base_; -} - -void RingBufferWorker::Initialize(GraphicsDriver* driver, - uint32_t ptr, uint32_t page_count) { - driver_ = driver; - primary_buffer_ptr_ = ptr; - // Not sure this is correct, but it's a way to take the page_count back to - // the number of bytes allocated by the physical alloc. - uint32_t original_size = 1 << (0x1C - page_count - 1); - primary_buffer_size_ = original_size; - read_ptr_index_ = 0; - - // Tell the driver what to use for translation. - driver_->set_address_translation(primary_buffer_ptr_ & ~0x1FFFFFFF); -} - -void RingBufferWorker::EnableReadPointerWriteBack(uint32_t ptr, - uint32_t block_size) { - // CP_RB_RPTR_ADDR Ring Buffer Read Pointer Address 0x70C - // ptr = RB_RPTR_ADDR, pointer to write back the address to. - read_ptr_writeback_ptr_ = (primary_buffer_ptr_ & ~0x1FFFFFFF) + ptr; - // CP_RB_CNTL Ring Buffer Control 0x704 - // block_size = RB_BLKSZ, number of quadwords read between updates of the - // read pointer. - read_ptr_update_freq_ = (uint32_t)pow(2.0, (double)block_size) / 4; -} - -void RingBufferWorker::UpdateWritePointer(uint32_t value) { - write_ptr_max_index_ = MAX(write_ptr_max_index_, value); - write_ptr_index_ = value; - SetEvent(write_ptr_index_event_); -} - -void RingBufferWorker::Pump() { - uint8_t* p = memory_->membase(); - - if (write_ptr_index_ == 0xBAADF00D || - read_ptr_index_ == write_ptr_index_) { - // Check if the pointer has moved. - // We wait a short bit here to yield time. Since we are also running the - // main window display we don't want to pause too long, though. - const int wait_time_ms = 1; - if (WaitForSingleObject(write_ptr_index_event_, - wait_time_ms) == WAIT_TIMEOUT) { - return; - } - } - - // Bring local so we don't have to worry about them changing out from under - // us. - uint32_t write_ptr_index = write_ptr_index_; - uint32_t write_ptr_max_index = write_ptr_max_index_; - if (read_ptr_index_ == write_ptr_index) { - return; - } - - // Process the new commands. - XETRACERB("Ring buffer thread work"); - - // Execute. Note that we handle wraparound transparently. - ExecutePrimaryBuffer(read_ptr_index_, write_ptr_index); - read_ptr_index_ = write_ptr_index; - - // TODO(benvanik): use read_ptr_update_freq_ and only issue after moving - // that many indices. - if (read_ptr_writeback_ptr_) { - XESETUINT32BE(p + read_ptr_writeback_ptr_, read_ptr_index_); - } -} - -void RingBufferWorker::ExecutePrimaryBuffer( - uint32_t start_index, uint32_t end_index) { - SCOPE_profile_cpu_f("gpu"); - - // Adjust pointer base. - uint32_t ptr = primary_buffer_ptr_ + start_index * 4; - ptr = (primary_buffer_ptr_ & ~0x1FFFFFFF) | (ptr & 0x1FFFFFFF); - uint32_t end_ptr = primary_buffer_ptr_ + end_index * 4; - end_ptr = (primary_buffer_ptr_ & ~0x1FFFFFFF) | (end_ptr & 0x1FFFFFFF); - - XETRACERB("[%.8X] ExecutePrimaryBuffer(%dw -> %dw)", - ptr, start_index, end_index); - - // Execute commands! - PacketArgs args; - args.ptr = ptr; - args.base_ptr = primary_buffer_ptr_; - args.max_address = primary_buffer_ptr_ + primary_buffer_size_ * 4; - args.ptr_mask = (primary_buffer_size_ / 4) - 1; - uint32_t n = 0; - while (args.ptr != end_ptr) { - n += ExecutePacket(args); - } - if (end_index > start_index) { - XEASSERT(n == (end_index - start_index)); - } - - XETRACERB(" ExecutePrimaryBuffer End"); -} - -void RingBufferWorker::ExecuteIndirectBuffer(uint32_t ptr, uint32_t length) { - XETRACERB("[%.8X] ExecuteIndirectBuffer(%dw)", ptr, length); - - // Execute commands! - PacketArgs args; - args.ptr = ptr; - args.base_ptr = ptr; - args.max_address = ptr + length * 4; - args.ptr_mask = 0; - for (uint32_t n = 0; n < length;) { - n += ExecutePacket(args); - XEASSERT(n <= length); - } - - XETRACERB(" ExecuteIndirectBuffer End"); -} - -#define LOG_DATA(count) \ - for (uint32_t __m = 0; __m < count; __m++) { \ - XETRACERB("[%.8X] %.8X", \ - packet_ptr + (1 + __m) * 4, \ - XEGETUINT32BE(packet_base + 1 * 4 + __m * 4)); \ - } - -void RingBufferWorker::AdvancePtr(PacketArgs& args, uint32_t n) { - args.ptr = args.ptr + n * 4; - if (args.ptr_mask) { - args.ptr = - args.base_ptr + (((args.ptr - args.base_ptr) / 4) & args.ptr_mask) * 4; - } -} -#define ADVANCE_PTR(n) AdvancePtr(args, n) -#define PEEK_PTR() \ - XEGETUINT32BE(p + args.ptr) -#define READ_PTR() \ - XEGETUINT32BE(p + args.ptr); ADVANCE_PTR(1); - -uint32_t RingBufferWorker::ExecutePacket(PacketArgs& args) { - uint8_t* p = memory_->membase(); - RegisterFile* regs = driver_->register_file(); - - uint32_t packet_ptr = args.ptr; - const uint8_t* packet_base = p + packet_ptr; - const uint32_t packet = PEEK_PTR(); - ADVANCE_PTR(1); - const uint32_t packet_type = packet >> 30; - if (packet == 0) { - XETRACERB("[%.8X] Packet(%.8X): 0?", - packet_ptr, packet); - return 1; - } - - switch (packet_type) { - case 0x00: - { - // Type-0 packet. - // Write count registers in sequence to the registers starting at - // (base_index << 2). - XETRACERB("[%.8X] Packet(%.8X): set registers:", - packet_ptr, packet); - uint32_t count = ((packet >> 16) & 0x3FFF) + 1; - uint32_t base_index = (packet & 0x7FFF); - uint32_t write_one_reg = (packet >> 15) & 0x1; - for (uint32_t m = 0; m < count; m++) { - uint32_t reg_data = PEEK_PTR(); - uint32_t target_index = write_one_reg ? base_index : base_index + m; - const char* reg_name = xenos::GetRegisterName(target_index); - XETRACERB("[%.8X] %.8X -> %.4X %s", - args.ptr, - reg_data, target_index, reg_name ? reg_name : ""); - ADVANCE_PTR(1); - WriteRegister(packet_ptr, target_index, reg_data); - } - return 1 + count; - } - break; - case 0x01: - { - // Type-1 packet. - // Contains two registers of data. Type-0 should be more common. - XETRACERB("[%.8X] Packet(%.8X): set registers:", - packet_ptr, packet); - uint32_t reg_index_1 = packet & 0x7FF; - uint32_t reg_index_2 = (packet >> 11) & 0x7FF; - uint32_t reg_ptr_1 = args.ptr; - uint32_t reg_data_1 = READ_PTR(); - uint32_t reg_ptr_2 = args.ptr; - uint32_t reg_data_2 = READ_PTR(); - const char* reg_name_1 = xenos::GetRegisterName(reg_index_1); - const char* reg_name_2 = xenos::GetRegisterName(reg_index_2); - XETRACERB("[%.8X] %.8X -> %.4X %s", - reg_ptr_1, - reg_data_1, reg_index_1, reg_name_1 ? reg_name_1 : ""); - XETRACERB("[%.8X] %.8X -> %.4X %s", - reg_ptr_2, - reg_data_2, reg_index_2, reg_name_2 ? reg_name_2 : ""); - WriteRegister(packet_ptr, reg_index_1, reg_data_1); - WriteRegister(packet_ptr, reg_index_2, reg_data_2); - return 1 + 2; - } - break; - case 0x02: - // Type-2 packet. - // No-op. Do nothing. - XETRACERB("[%.8X] Packet(%.8X): padding", - packet_ptr, packet); - return 1; - case 0x03: - { - // Type-3 packet. - uint32_t count = ((packet >> 16) & 0x3FFF) + 1; - uint32_t opcode = (packet >> 8) & 0x7F; - // & 1 == predicate, maybe? - - switch (opcode) { - case PM4_ME_INIT: - // initialize CP's micro-engine - XETRACERB("[%.8X] Packet(%.8X): PM4_ME_INIT", - packet_ptr, packet); - LOG_DATA(count); - ADVANCE_PTR(count); - break; - - case PM4_NOP: - // skip N 32-bit words to get to the next packet - // No-op, ignore some data. - XETRACERB("[%.8X] Packet(%.8X): PM4_NOP", - packet_ptr, packet); - LOG_DATA(count); - ADVANCE_PTR(count); - break; - - case PM4_INTERRUPT: - // generate interrupt from the command stream - { - XETRACERB("[%.8X] Packet(%.8X): PM4_INTERRUPT", - packet_ptr, packet); - LOG_DATA(count); - uint32_t cpu_mask = READ_PTR(); - for (int n = 0; n < 6; n++) { - if (cpu_mask & (1 << n)) { - graphics_system_->DispatchInterruptCallback(1, n); - } - } - } - break; - - case PM4_INDIRECT_BUFFER: - // indirect buffer dispatch - { - uint32_t list_ptr = READ_PTR(); - uint32_t list_length = READ_PTR(); - XETRACERB("[%.8X] Packet(%.8X): PM4_INDIRECT_BUFFER %.8X (%dw)", - packet_ptr, packet, list_ptr, list_length); - ExecuteIndirectBuffer(GpuToCpu(list_ptr), list_length); - } - break; - - case PM4_WAIT_REG_MEM: - // wait until a register or memory location is a specific value - { - XETRACERB("[%.8X] Packet(%.8X): PM4_WAIT_REG_MEM", - packet_ptr, packet); - LOG_DATA(count); - uint32_t wait_info = READ_PTR(); - uint32_t poll_reg_addr = READ_PTR(); - uint32_t ref = READ_PTR(); - uint32_t mask = READ_PTR(); - uint32_t wait = READ_PTR(); - bool matched = false; - do { - uint32_t value; - if (wait_info & 0x10) { - // Memory. - XE_GPU_ENDIAN endianness = (XE_GPU_ENDIAN)(poll_reg_addr & 0x3); - poll_reg_addr &= ~0x3; - value = XEGETUINT32LE(p + GpuToCpu(packet_ptr, poll_reg_addr)); - value = GpuSwap(value, endianness); - } else { - // Register. - XEASSERT(poll_reg_addr < kXEGpuRegisterCount); - value = regs->values[poll_reg_addr].u32; - } - switch (wait_info & 0x7) { - case 0x0: // Never. - matched = false; - break; - case 0x1: // Less than reference. - matched = (value & mask) < ref; - break; - case 0x2: // Less than or equal to reference. - matched = (value & mask) <= ref; - break; - case 0x3: // Equal to reference. - matched = (value & mask) == ref; - break; - case 0x4: // Not equal to reference. - matched = (value & mask) != ref; - break; - case 0x5: // Greater than or equal to reference. - matched = (value & mask) >= ref; - break; - case 0x6: // Greater than reference. - matched = (value & mask) > ref; - break; - case 0x7: // Always - matched = true; - break; - } - if (!matched) { - // Wait. - if (wait >= 0x100) { - Sleep(wait / 0x100); - } else { - SwitchToThread(); - } - } - } while (!matched); - } - break; - - case PM4_REG_RMW: - // register read/modify/write - // ? (used during shader upload and edram setup) - { - XETRACERB("[%.8X] Packet(%.8X): PM4_REG_RMW", - packet_ptr, packet); - LOG_DATA(count); - uint32_t rmw_info = READ_PTR(); - uint32_t and_mask = READ_PTR(); - uint32_t or_mask = READ_PTR(); - uint32_t value = regs->values[rmw_info & 0x1FFF].u32; - if ((rmw_info >> 30) & 0x1) { - // | reg - value |= regs->values[or_mask & 0x1FFF].u32; - } else { - // | imm - value |= or_mask; - } - if ((rmw_info >> 31) & 0x1) { - // & reg - value &= regs->values[and_mask & 0x1FFF].u32; - } else { - // & imm - value &= and_mask; - } - WriteRegister(packet_ptr, rmw_info & 0x1FFF, value); - } - break; - - case PM4_COND_WRITE: - // conditional write to memory or register - { - XETRACERB("[%.8X] Packet(%.8X): PM4_COND_WRITE", - packet_ptr, packet); - LOG_DATA(count); - uint32_t wait_info = READ_PTR(); - uint32_t poll_reg_addr = READ_PTR(); - uint32_t ref = READ_PTR(); - uint32_t mask = READ_PTR(); - uint32_t write_reg_addr = READ_PTR(); - uint32_t write_data = READ_PTR(); - uint32_t value; - if (wait_info & 0x10) { - // Memory. - XE_GPU_ENDIAN endianness = (XE_GPU_ENDIAN)(poll_reg_addr & 0x3); - poll_reg_addr &= ~0x3; - value = XEGETUINT32LE(p + GpuToCpu(packet_ptr, poll_reg_addr)); - value = GpuSwap(value, endianness); - } else { - // Register. - XEASSERT(poll_reg_addr < kXEGpuRegisterCount); - value = regs->values[poll_reg_addr].u32; - } - bool matched = false; - switch (wait_info & 0x7) { - case 0x0: // Never. - matched = false; - break; - case 0x1: // Less than reference. - matched = (value & mask) < ref; - break; - case 0x2: // Less than or equal to reference. - matched = (value & mask) <= ref; - break; - case 0x3: // Equal to reference. - matched = (value & mask) == ref; - break; - case 0x4: // Not equal to reference. - matched = (value & mask) != ref; - break; - case 0x5: // Greater than or equal to reference. - matched = (value & mask) >= ref; - break; - case 0x6: // Greater than reference. - matched = (value & mask) > ref; - break; - case 0x7: // Always - matched = true; - break; - } - if (matched) { - // Write. - if (wait_info & 0x100) { - // Memory. - XE_GPU_ENDIAN endianness = (XE_GPU_ENDIAN)(write_reg_addr & 0x3); - write_reg_addr &= ~0x3; - write_data = GpuSwap(write_data, endianness); - XESETUINT32LE(p + GpuToCpu(packet_ptr, write_reg_addr), - write_data); - } else { - // Register. - WriteRegister(packet_ptr, write_reg_addr, write_data); - } - } - } - break; - - case PM4_EVENT_WRITE: - // generate an event that creates a write to memory when completed - { - XETRACERB("[%.8X] Packet(%.8X): PM4_EVENT_WRITE (unimplemented!)", - packet_ptr, packet); - LOG_DATA(count); - uint32_t initiator = READ_PTR(); - if (count == 1) { - // Just an event flag? Where does this write? - } else { - // Write to an address. - XEASSERTALWAYS(); - ADVANCE_PTR(count - 1); - } - } - break; - case PM4_EVENT_WRITE_SHD: - // generate a VS|PS_done event - { - XETRACERB("[%.8X] Packet(%.8X): PM4_EVENT_WRITE_SHD", - packet_ptr, packet); - LOG_DATA(count); - uint32_t initiator = READ_PTR(); - uint32_t address = READ_PTR(); - uint32_t value = READ_PTR(); - // Writeback initiator. - WriteRegister(packet_ptr, XE_GPU_REG_VGT_EVENT_INITIATOR, - initiator & 0x1F); - uint32_t data_value; - if ((initiator >> 31) & 0x1) { - // Write counter (GPU vblank counter?). - data_value = counter_; - } else { - // Write value. - data_value = value; - } - XE_GPU_ENDIAN endianness = (XE_GPU_ENDIAN)(address & 0x3); - address &= ~0x3; - data_value = GpuSwap(data_value, endianness); - XESETUINT32LE(p + GpuToCpu(address), data_value); - } - break; - - case PM4_DRAW_INDX: - // initiate fetch of index buffer and draw - { - XETRACERB("[%.8X] Packet(%.8X): PM4_DRAW_INDX", - packet_ptr, packet); - LOG_DATA(count); - // d0 = viz query info - uint32_t d0 = READ_PTR(); - uint32_t d1 = READ_PTR(); - uint32_t index_count = d1 >> 16; - uint32_t prim_type = d1 & 0x3F; - uint32_t src_sel = (d1 >> 6) & 0x3; - if (src_sel == 0x0) { - uint32_t index_base = READ_PTR(); - uint32_t index_size = READ_PTR(); - uint32_t endianness = index_size >> 29; - index_size &= 0x00FFFFFF; - bool index_32bit = (d1 >> 11) & 0x1; - index_size *= index_32bit ? 4 : 2; - driver_->DrawIndexBuffer( - (XE_GPU_PRIMITIVE_TYPE)prim_type, - index_32bit, index_count, index_base, index_size, endianness); - } else if (src_sel == 0x2) { - driver_->DrawIndexAuto( - (XE_GPU_PRIMITIVE_TYPE)prim_type, - index_count); - } else { - // Unknown source select. - XEASSERTALWAYS(); - } - } - break; - case PM4_DRAW_INDX_2: - // draw using supplied indices in packet - { - XETRACERB("[%.8X] Packet(%.8X): PM4_DRAW_INDX_2", - packet_ptr, packet); - LOG_DATA(count); - uint32_t d0 = READ_PTR(); - uint32_t index_count = d0 >> 16; - uint32_t prim_type = d0 & 0x3F; - uint32_t src_sel = (d0 >> 6) & 0x3; - XEASSERT(src_sel == 0x2); // 'SrcSel=AutoIndex' - driver_->DrawIndexAuto( - (XE_GPU_PRIMITIVE_TYPE)prim_type, - index_count); - } - break; - - case PM4_SET_CONSTANT: - // load constant into chip and to memory - { - XETRACERB("[%.8X] Packet(%.8X): PM4_SET_CONSTANT", - packet_ptr, packet); - // PM4_REG(reg) ((0x4 << 16) | (GSL_HAL_SUBBLOCK_OFFSET(reg))) - // reg - 0x2000 - uint32_t offset_type = READ_PTR(); - uint32_t index = offset_type & 0x7FF; - uint32_t type = (offset_type >> 16) & 0xFF; - switch (type) { - case 0x4: // REGISTER - index += 0x2000; // registers - for (uint32_t n = 0; n < count - 1; n++, index++) { - uint32_t data = READ_PTR(); - const char* reg_name = xenos::GetRegisterName(index); - XETRACERB("[%.8X] %.8X -> %.4X %s", - packet_ptr + (1 + n) * 4, - data, index, reg_name ? reg_name : ""); - WriteRegister(packet_ptr, index, data); - } - break; - default: - XEASSERTALWAYS(); - break; - } - } - break; - case PM4_LOAD_ALU_CONSTANT: - // load constants from memory - { - XETRACERB("[%.8X] Packet(%.8X): PM4_LOAD_ALU_CONSTANT", - packet_ptr, packet); - uint32_t address = READ_PTR(); - address &= 0x3FFFFFFF; - uint32_t offset_type = READ_PTR(); - uint32_t index = offset_type & 0x7FF; - uint32_t size = READ_PTR(); - size &= 0xFFF; - index += 0x4000; // alu constants - for (uint32_t n = 0; n < size; n++, index++) { - uint32_t data = XEGETUINT32BE( - p + GpuToCpu(packet_ptr, address + n * 4)); - const char* reg_name = xenos::GetRegisterName(index); - XETRACERB("[%.8X] %.8X -> %.4X %s", - packet_ptr, - data, index, reg_name ? reg_name : ""); - WriteRegister(packet_ptr, index, data); - } - } - break; - - case PM4_IM_LOAD: - // load sequencer instruction memory (pointer-based) - { - XETRACERB("[%.8X] Packet(%.8X): PM4_IM_LOAD", - packet_ptr, packet); - LOG_DATA(count); - uint32_t addr_type = READ_PTR(); - uint32_t type = addr_type & 0x3; - uint32_t addr = addr_type & ~0x3; - uint32_t start_size = READ_PTR(); - uint32_t start = start_size >> 16; - uint32_t size = start_size & 0xFFFF; // dwords - XEASSERT(start == 0); - driver_->SetShader( - (XE_GPU_SHADER_TYPE)type, - GpuToCpu(packet_ptr, addr), - start, - size * 4); - } - break; - case PM4_IM_LOAD_IMMEDIATE: - // load sequencer instruction memory (code embedded in packet) - { - XETRACERB("[%.8X] Packet(%.8X): PM4_IM_LOAD_IMMEDIATE", - packet_ptr, packet); - LOG_DATA(count); - uint32_t type = READ_PTR(); - uint32_t start_size = READ_PTR(); - uint32_t start = start_size >> 16; - uint32_t size = start_size & 0xFFFF; // dwords - XEASSERT(start == 0); - // TODO(benvanik): figure out if this could wrap. - XEASSERT(args.ptr + size * 4 < args.max_address); - driver_->SetShader( - (XE_GPU_SHADER_TYPE)type, - args.ptr, - start, - size * 4); - ADVANCE_PTR(size); - } - break; - - case PM4_INVALIDATE_STATE: - // selective invalidation of state pointers - { - XETRACERB("[%.8X] Packet(%.8X): PM4_INVALIDATE_STATE", - packet_ptr, packet); - LOG_DATA(count); - uint32_t mask = READ_PTR(); - driver_->InvalidateState(mask); - } - break; - - case PM4_SET_BIN_MASK_LO: - { - uint32_t value = READ_PTR(); - XETRACERB("[%.8X] Packet(%.8X): PM4_SET_BIN_MASK_LO = %.8X", - packet_ptr, packet, value); - } - break; - case PM4_SET_BIN_MASK_HI: - { - uint32_t value = READ_PTR(); - XETRACERB("[%.8X] Packet(%.8X): PM4_SET_BIN_MASK_HI = %.8X", - packet_ptr, packet, value); - } - break; - case PM4_SET_BIN_SELECT_LO: - { - uint32_t value = READ_PTR(); - XETRACERB("[%.8X] Packet(%.8X): PM4_SET_BIN_SELECT_LO = %.8X", - packet_ptr, packet, value); - } - break; - case PM4_SET_BIN_SELECT_HI: - { - uint32_t value = READ_PTR(); - XETRACERB("[%.8X] Packet(%.8X): PM4_SET_BIN_SELECT_HI = %.8X", - packet_ptr, packet, value); - } - break; - - // Ignored packets - useful if breaking on the default handler below. - case 0x50: // 0xC0015000 usually 2 words, 0xFFFFFFFF / 0x00000000 - XETRACERB("[%.8X] Packet(%.8X): unknown!", - packet_ptr, packet); - LOG_DATA(count); - ADVANCE_PTR(count); - break; - - default: - XETRACERB("[%.8X] Packet(%.8X): unknown!", - packet_ptr, packet); - LOG_DATA(count); - ADVANCE_PTR(count); - break; - } - - return 1 + count; - } - break; - } - - return 0; -} - -void RingBufferWorker::WriteRegister( - uint32_t packet_ptr, uint32_t index, uint32_t value) { - RegisterFile* regs = driver_->register_file(); - XEASSERT(index < kXEGpuRegisterCount); - regs->values[index].u32 = value; - - // Scratch register writeback. - if (index >= XE_GPU_REG_SCRATCH_REG0 && index <= XE_GPU_REG_SCRATCH_REG7) { - uint32_t scratch_reg = index - XE_GPU_REG_SCRATCH_REG0; - if ((1 << scratch_reg) & regs->values[XE_GPU_REG_SCRATCH_UMSK].u32) { - // Enabled - write to address. - uint8_t* p = memory_->membase(); - uint32_t scratch_addr = regs->values[XE_GPU_REG_SCRATCH_ADDR].u32; - uint32_t mem_addr = scratch_addr + (scratch_reg * 4); - XESETUINT32BE(p + GpuToCpu(primary_buffer_ptr_, mem_addr), value); - } - } -} diff --git a/src/xenia/gpu/ring_buffer_worker.h b/src/xenia/gpu/ring_buffer_worker.h deleted file mode 100644 index 889625d68..000000000 --- a/src/xenia/gpu/ring_buffer_worker.h +++ /dev/null @@ -1,81 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2013 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_GPU_RING_BUFFER_WORKER_H_ -#define XENIA_GPU_RING_BUFFER_WORKER_H_ - -#include - -#include - - -namespace xe { -namespace gpu { - -class GraphicsDriver; -class GraphicsSystem; - -class RingBufferWorker { -public: - RingBufferWorker(GraphicsSystem* graphics_system, Memory* memory); - virtual ~RingBufferWorker(); - - Memory* memory() const { return memory_; } - - uint64_t QueryTime(); - uint32_t counter() const { return counter_; } - void increment_counter() { counter_++; } - - void Initialize(GraphicsDriver* driver, - uint32_t ptr, uint32_t page_count); - void EnableReadPointerWriteBack(uint32_t ptr, uint32_t block_size); - - void UpdateWritePointer(uint32_t value); - - void Pump(); - -private: - typedef struct { - uint32_t ptr; - uint32_t base_ptr; - uint32_t max_address; - uint32_t ptr_mask; - } PacketArgs; - void AdvancePtr(PacketArgs& args, uint32_t n); - void ExecutePrimaryBuffer(uint32_t start_index, uint32_t end_index); - void ExecuteIndirectBuffer(uint32_t ptr, uint32_t length); - uint32_t ExecutePacket(PacketArgs& args); - void WriteRegister(uint32_t packet_ptr, uint32_t index, uint32_t value); - -protected: - Memory* memory_; - GraphicsSystem* graphics_system_; - GraphicsDriver* driver_; - - uint64_t time_base_; - uint32_t counter_; - - uint32_t primary_buffer_ptr_; - uint32_t primary_buffer_size_; - - uint32_t read_ptr_index_; - uint32_t read_ptr_update_freq_; - uint32_t read_ptr_writeback_ptr_; - - HANDLE write_ptr_index_event_; - volatile uint32_t write_ptr_index_; - volatile uint32_t write_ptr_max_index_; -}; - - -} // namespace gpu -} // namespace xe - - -#endif // XENIA_GPU_RING_BUFFER_WORKER_H_ diff --git a/src/xenia/gpu/sampler_state_resource.cc b/src/xenia/gpu/sampler_state_resource.cc new file mode 100644 index 000000000..5865a6920 --- /dev/null +++ b/src/xenia/gpu/sampler_state_resource.cc @@ -0,0 +1,32 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2014 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include + + +using namespace std; +using namespace xe; +using namespace xe::gpu; +using namespace xe::gpu::xenos; + + +bool SamplerStateResource::Info::Prepare( + const xe_gpu_texture_fetch_t& fetch, const instr_fetch_tex_t& fetch_instr, + Info& out_info) { + out_info.min_filter = static_cast( + fetch_instr.min_filter == 3 ? fetch.min_filter : fetch_instr.min_filter); + out_info.mag_filter = static_cast( + fetch_instr.mag_filter == 3 ? fetch.mag_filter : fetch_instr.mag_filter); + out_info.mip_filter = static_cast( + fetch_instr.mip_filter == 3 ? fetch.mip_filter : fetch_instr.mip_filter); + out_info.clamp_u = fetch.clamp_x; + out_info.clamp_v = fetch.clamp_y; + out_info.clamp_w = fetch.clamp_z; + return true; +} diff --git a/src/xenia/gpu/sampler_state_resource.h b/src/xenia/gpu/sampler_state_resource.h new file mode 100644 index 000000000..c0a3c4ab3 --- /dev/null +++ b/src/xenia/gpu/sampler_state_resource.h @@ -0,0 +1,67 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2014 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_SAMPLER_STATE_RESOURCE_H_ +#define XENIA_GPU_SAMPLER_STATE_RESOURCE_H_ + +#include +#include +#include + + +namespace xe { +namespace gpu { + + +class SamplerStateResource : public StaticResource { +public: + struct Info { + xenos::instr_tex_filter_t min_filter; + xenos::instr_tex_filter_t mag_filter; + xenos::instr_tex_filter_t mip_filter; + uint32_t clamp_u; + uint32_t clamp_v; + uint32_t clamp_w; + + uint64_t hash() const { + return hash_combine(0, + min_filter, mag_filter, mip_filter, + clamp_u, clamp_v, clamp_w); + } + bool Equals(const Info& other) const { + return min_filter == other.min_filter && + mag_filter == other.mag_filter && + mip_filter == other.mip_filter && + clamp_u == other.clamp_u && + clamp_v == other.clamp_v && + clamp_w == other.clamp_w; + } + + static bool Prepare(const xenos::xe_gpu_texture_fetch_t& fetch, + const xenos::instr_fetch_tex_t& fetch_instr, + Info& out_info); + }; + + SamplerStateResource(const Info& info) : info_(info) {} + virtual ~SamplerStateResource() = default; + + const Info& info() const { return info_; } + + virtual int Prepare() = 0; + +protected: + Info info_; +}; + + +} // namespace gpu +} // namespace xe + + +#endif // XENIA_GPU_SAMPLER_STATE_RESOURCE_H_ diff --git a/src/xenia/gpu/shader.cc b/src/xenia/gpu/shader.cc deleted file mode 100644 index 69b083a60..000000000 --- a/src/xenia/gpu/shader.cc +++ /dev/null @@ -1,266 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2013 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include - -#include - - -using namespace xe; -using namespace xe::gpu; -using namespace xe::gpu::xenos; - - -Shader::Shader( - XE_GPU_SHADER_TYPE type, - const uint8_t* src_ptr, size_t length, - uint64_t hash) : - type_(type), hash_(hash), is_prepared_(false), disasm_src_(NULL) { - xe_zero_struct(&alloc_counts_, sizeof(alloc_counts_)); - xe_zero_struct(&vtx_buffer_inputs_, sizeof(vtx_buffer_inputs_)); - xe_zero_struct(&tex_buffer_inputs_, sizeof(tex_buffer_inputs_)); - - // Verify. - dword_count_ = length / 4; - XEASSERT(dword_count_ <= 512); - - // Copy bytes and swap. - size_t byte_size = dword_count_ * sizeof(uint32_t); - dwords_ = (uint32_t*)xe_malloc(byte_size); - for (uint32_t n = 0; n < dword_count_; n++) { - dwords_[n] = XEGETUINT32BE(src_ptr + n * 4); - } - - // Gather input/output registers/etc. - GatherIO(); - - // Disassemble, for debugging. - disasm_src_ = DisassembleShader(type_, dwords_, dword_count_); -} - -Shader::~Shader() { - if (disasm_src_) { - xe_free(disasm_src_); - } - xe_free(dwords_); -} - -void Shader::GatherIO() { - // Process all execution blocks. - instr_cf_t cfa; - instr_cf_t cfb; - for (int idx = 0; idx < dword_count_; idx += 3) { - uint32_t dword_0 = dwords_[idx + 0]; - uint32_t dword_1 = dwords_[idx + 1]; - uint32_t dword_2 = dwords_[idx + 2]; - cfa.dword_0 = dword_0; - cfa.dword_1 = dword_1 & 0xFFFF; - cfb.dword_0 = (dword_1 >> 16) | (dword_2 << 16); - cfb.dword_1 = dword_2 >> 16; - if (cfa.opc == ALLOC) { - GatherAlloc(&cfa.alloc); - } else if (cfa.is_exec()) { - GatherExec(&cfa.exec); - } - if (cfb.opc == ALLOC) { - GatherAlloc(&cfb.alloc); - } else if (cfb.is_exec()) { - GatherExec(&cfb.exec); - } - if (cfa.opc == EXEC_END || cfb.opc == EXEC_END) { - break; - } - } -} - -void Shader::GatherAlloc(const instr_cf_alloc_t* cf) { - allocs_.push_back(*cf); - - switch (cf->buffer_select) { - case SQ_POSITION: - // Position (SV_POSITION). - alloc_counts_.positions += cf->size + 1; - break; - case SQ_PARAMETER_PIXEL: - // Output to PS (if VS), or frag output (if PS). - alloc_counts_.params += cf->size + 1; - break; - case SQ_MEMORY: - // MEMEXPORT? - alloc_counts_.memories += cf->size + 1; - break; - } -} - -void Shader::GatherExec(const instr_cf_exec_t* cf) { - execs_.push_back(*cf); - - uint32_t sequence = cf->serialize; - for (uint32_t i = 0; i < cf->count; i++) { - uint32_t alu_off = (cf->address + i); - int sync = sequence & 0x2; - if (sequence & 0x1) { - const instr_fetch_t* fetch = - (const instr_fetch_t*)(dwords_ + alu_off * 3); - switch (fetch->opc) { - case VTX_FETCH: - GatherVertexFetch(&fetch->vtx); - break; - case TEX_FETCH: - GatherTextureFetch(&fetch->tex); - break; - case TEX_GET_BORDER_COLOR_FRAC: - case TEX_GET_COMP_TEX_LOD: - case TEX_GET_GRADIENTS: - case TEX_GET_WEIGHTS: - case TEX_SET_TEX_LOD: - case TEX_SET_GRADIENTS_H: - case TEX_SET_GRADIENTS_V: - default: - XEASSERTALWAYS(); - break; - } - } else { - // TODO(benvanik): gather registers used, predicate bits used, etc. - const instr_alu_t* alu = - (const instr_alu_t*)(dwords_ + alu_off * 3); - if (alu->vector_write_mask) { - if (alu->export_data && alu->vector_dest == 63) { - alloc_counts_.point_size = true; - } - } - if (alu->scalar_write_mask || !alu->vector_write_mask) { - if (alu->export_data && alu->scalar_dest == 63) { - alloc_counts_.point_size = true; - } - } - } - sequence >>= 2; - } -} - -void Shader::GatherVertexFetch(const instr_fetch_vtx_t* vtx) { - // dst_reg/dst_swiz - // src_reg/src_swiz - // format = a2xx_sq_surfaceformat - // format_comp_all ? signed : unsigned - // num_format_all ? normalized - // stride - // offset - // const_index/const_index_sel -- fetch constant register - // num_format_all ? integer : fraction - // exp_adjust_all - [-32,31] - (2^exp_adjust_all)*fetch - 0 = default - - // Sometimes games have fetches that just produce constants. We can - // ignore those. - uint32_t dst_swiz = vtx->dst_swiz; - bool fetches_any_data = false; - for (int i = 0; i < 4; i++) { - if ((dst_swiz & 0x7) == 4) { - // 0.0 - } else if ((dst_swiz & 0x7) == 5) { - // 1.0 - } else if ((dst_swiz & 0x7) == 6) { - // ? - } else if ((dst_swiz & 0x7) == 7) { - // Previous register value. - } else { - fetches_any_data = true; - break; - } - dst_swiz >>= 3; - } - if (!fetches_any_data) { - return; - } - - uint32_t fetch_slot = vtx->const_index * 3 + vtx->const_index_sel; - auto& inputs = vtx_buffer_inputs_; - vtx_buffer_element_t* el = NULL; - for (size_t n = 0; n < inputs.count; n++) { - auto& input = inputs.descs[n]; - if (input.fetch_slot == fetch_slot) { - XEASSERT(input.element_count + 1 < XECOUNT(input.elements)); - // It may not hold that all strides are equal, but I hope it does. - XEASSERT(!vtx->stride || input.stride_words == vtx->stride); - el = &input.elements[input.element_count++]; - break; - } - } - if (!el) { - XEASSERTNOTZERO(vtx->stride); - XEASSERT(inputs.count + 1 < XECOUNT(inputs.descs)); - auto& input = inputs.descs[inputs.count++]; - input.input_index = inputs.count - 1; - input.fetch_slot = fetch_slot; - input.stride_words = vtx->stride; - el = &input.elements[input.element_count++]; - } - - el->vtx_fetch = *vtx; - el->format = vtx->format; - el->offset_words = vtx->offset; - el->size_words = 0; - switch (el->format) { - case FMT_8_8_8_8: - case FMT_2_10_10_10: - case FMT_10_11_11: - case FMT_11_11_10: - el->size_words = 1; - break; - case FMT_16_16: - case FMT_16_16_FLOAT: - el->size_words = 1; - break; - case FMT_16_16_16_16: - case FMT_16_16_16_16_FLOAT: - el->size_words = 2; - break; - case FMT_32: - case FMT_32_FLOAT: - el->size_words = 1; - break; - case FMT_32_32: - case FMT_32_32_FLOAT: - el->size_words = 2; - break; - case FMT_32_32_32_FLOAT: - el->size_words = 3; - break; - case FMT_32_32_32_32: - case FMT_32_32_32_32_FLOAT: - el->size_words = 4; - break; - default: - XELOGE("Unknown vertex format: %d", el->format); - XEASSERTALWAYS(); - break; - } -} - -const Shader::vtx_buffer_inputs_t* Shader::GetVertexBufferInputs() { - return &vtx_buffer_inputs_; -} - -void Shader::GatherTextureFetch(const xenos::instr_fetch_tex_t* tex) { - // TODO(benvanik): check dest_swiz to see if we are writing anything. - - auto& inputs = tex_buffer_inputs_; - XEASSERT(inputs.count + 1 < XECOUNT(inputs.descs)); - auto& input = inputs.descs[inputs.count++]; - input.input_index = inputs.count - 1; - input.fetch_slot = tex->const_idx & 0xF; // ? - input.tex_fetch = *tex; - - // Format mangling, size estimation, etc. -} - -const Shader::tex_buffer_inputs_t* Shader::GetTextureBufferInputs() { - return &tex_buffer_inputs_; -} diff --git a/src/xenia/gpu/shader.h b/src/xenia/gpu/shader.h deleted file mode 100644 index 1dd26b2b4..000000000 --- a/src/xenia/gpu/shader.h +++ /dev/null @@ -1,104 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2013 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_GPU_SHADER_H_ -#define XENIA_GPU_SHADER_H_ - -#include -#include -#include - - -namespace xe { -namespace gpu { - - -class Shader { -public: - Shader(xenos::XE_GPU_SHADER_TYPE type, - const uint8_t* src_ptr, size_t length, - uint64_t hash); - virtual ~Shader(); - - xenos::XE_GPU_SHADER_TYPE type() const { return type_; } - const uint32_t* dwords() const { return dwords_; } - size_t dword_count() const { return dword_count_; } - uint64_t hash() const { return hash_; } - bool is_prepared() const { return is_prepared_; } - - const char* disasm_src() const { return disasm_src_; } - - typedef struct { - xenos::instr_fetch_vtx_t vtx_fetch; - uint32_t format; - uint32_t offset_words; - uint32_t size_words; - } vtx_buffer_element_t; - typedef struct { - uint32_t input_index; - uint32_t fetch_slot; - uint32_t stride_words; - uint32_t element_count; - vtx_buffer_element_t elements[16]; - } vtx_buffer_desc_t; - typedef struct { - uint32_t count; - vtx_buffer_desc_t descs[16]; - } vtx_buffer_inputs_t; - const vtx_buffer_inputs_t* GetVertexBufferInputs(); - - typedef struct { - uint32_t input_index; - uint32_t fetch_slot; - xenos::instr_fetch_tex_t tex_fetch; - uint32_t format; - } tex_buffer_desc_t; - typedef struct { - uint32_t count; - tex_buffer_desc_t descs[32]; - } tex_buffer_inputs_t; - const tex_buffer_inputs_t* GetTextureBufferInputs(); - - typedef struct { - uint32_t positions; - uint32_t params; - uint32_t memories; - bool point_size; - } alloc_counts_t; - const alloc_counts_t& alloc_counts() const { return alloc_counts_; } - -private: - void GatherIO(); - void GatherAlloc(const xenos::instr_cf_alloc_t* cf); - void GatherExec(const xenos::instr_cf_exec_t* cf); - void GatherVertexFetch(const xenos::instr_fetch_vtx_t* vtx); - void GatherTextureFetch(const xenos::instr_fetch_tex_t* tex); - -protected: - xenos::XE_GPU_SHADER_TYPE type_; - uint32_t* dwords_; - size_t dword_count_; - uint64_t hash_; - bool is_prepared_; - - char* disasm_src_; - - alloc_counts_t alloc_counts_; - std::vector execs_; - std::vector allocs_; - vtx_buffer_inputs_t vtx_buffer_inputs_; - tex_buffer_inputs_t tex_buffer_inputs_; -}; - - -} // namespace gpu -} // namespace xe - - -#endif // XENIA_GPU_SHADER_H_ diff --git a/src/xenia/gpu/shader_cache.cc b/src/xenia/gpu/shader_cache.cc deleted file mode 100644 index 2c5e84294..000000000 --- a/src/xenia/gpu/shader_cache.cc +++ /dev/null @@ -1,80 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2013 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include - -#include - - -using namespace std; -using namespace xe; -using namespace xe::gpu; -using namespace xe::gpu::xenos; - - -ShaderCache::ShaderCache() { -} - -ShaderCache::~ShaderCache() { - Clear(); -} - -Shader* ShaderCache::Create( - XE_GPU_SHADER_TYPE type, - const uint8_t* src_ptr, size_t length) { - uint64_t hash = Hash(src_ptr, length); - Shader* shader = CreateCore(type, src_ptr, length, hash); - map_.insert({ hash, shader }); - return shader; -} - -Shader* ShaderCache::CreateCore( - XE_GPU_SHADER_TYPE type, - const uint8_t* src_ptr, size_t length, - uint64_t hash) { - return new Shader(type, src_ptr, length, hash); -} - -Shader* ShaderCache::Find( - XE_GPU_SHADER_TYPE type, - const uint8_t* src_ptr, size_t length) { - uint64_t hash = Hash(src_ptr, length); - auto it = map_.find(hash); - if (it != map_.end()) { - return it->second; - } - return NULL; -} - -Shader* ShaderCache::FindOrCreate( - XE_GPU_SHADER_TYPE type, - const uint8_t* src_ptr, size_t length) { - SCOPE_profile_cpu_f("gpu"); - - uint64_t hash = Hash(src_ptr, length); - auto it = map_.find(hash); - if (it != map_.end()) { - return it->second; - } - Shader* shader = CreateCore(type, src_ptr, length, hash); - map_.insert({ hash, shader }); - return shader; -} - -void ShaderCache::Clear() { - for (auto it = map_.begin(); it != map_.end(); ++it) { - Shader* shader = it->second; - delete shader; - } - map_.clear(); -} - -uint64_t ShaderCache::Hash(const uint8_t* src_ptr, size_t length) { - return xe_hash64(src_ptr, length, 0); -} diff --git a/src/xenia/gpu/shader_cache.h b/src/xenia/gpu/shader_cache.h deleted file mode 100644 index 97edc382f..000000000 --- a/src/xenia/gpu/shader_cache.h +++ /dev/null @@ -1,56 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2013 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_GPU_SHADER_CACHE_H_ -#define XENIA_GPU_SHADER_CACHE_H_ - -#include -#include -#include - - -namespace xe { -namespace gpu { - - -class ShaderCache { -public: - ShaderCache(); - virtual ~ShaderCache(); - - Shader* Create( - xenos::XE_GPU_SHADER_TYPE type, - const uint8_t* src_ptr, size_t length); - Shader* Find( - xenos::XE_GPU_SHADER_TYPE type, - const uint8_t* src_ptr, size_t length); - Shader* FindOrCreate( - xenos::XE_GPU_SHADER_TYPE type, - const uint8_t* src_ptr, size_t length); - - void Clear(); - -private: - uint64_t Hash(const uint8_t* src_ptr, size_t length); - - std::unordered_map map_; - -protected: - virtual Shader* CreateCore( - xenos::XE_GPU_SHADER_TYPE type, - const uint8_t* src_ptr, size_t length, - uint64_t hash); -}; - - -} // namespace gpu -} // namespace xe - - -#endif // XENIA_GPU_SHADER_CACHE_H_ diff --git a/src/xenia/gpu/shader_resource.cc b/src/xenia/gpu/shader_resource.cc index e2520db62..9fbcbf2bb 100644 --- a/src/xenia/gpu/shader_resource.cc +++ b/src/xenia/gpu/shader_resource.cc @@ -9,9 +9,267 @@ #include +#include + using namespace std; using namespace xe; using namespace xe::gpu; using namespace xe::gpu::xenos; + +ShaderResource::ShaderResource(const MemoryRange& memory_range, + const Info& info, + xenos::XE_GPU_SHADER_TYPE type) + : HashedResource(memory_range), + info_(info), type_(type), is_prepared_(false), disasm_src_(nullptr) { + xe_zero_struct(&alloc_counts_, sizeof(alloc_counts_)); + xe_zero_struct(&buffer_inputs_, sizeof(buffer_inputs_)); + xe_zero_struct(&sampler_inputs_, sizeof(sampler_inputs_)); + + // Verify. + dword_count_ = memory_range.length / 4; + XEASSERT(dword_count_ <= 512); + + // Copy bytes and swap. + size_t byte_size = dword_count_ * sizeof(uint32_t); + dwords_ = (uint32_t*)xe_malloc(byte_size); + for (uint32_t n = 0; n < dword_count_; n++) { + dwords_[n] = XEGETUINT32BE(memory_range.host_base + n * 4); + } + + // Disassemble, for debugging. + disasm_src_ = DisassembleShader(type_, dwords_, dword_count_); + + // Gather input/output registers/etc. + GatherIO(); +} + +ShaderResource::~ShaderResource() { + xe_free(disasm_src_); + xe_free(dwords_); +} + +void ShaderResource::GatherIO() { + // Process all execution blocks. + instr_cf_t cfa; + instr_cf_t cfb; + for (int idx = 0; idx < dword_count_; idx += 3) { + uint32_t dword_0 = dwords_[idx + 0]; + uint32_t dword_1 = dwords_[idx + 1]; + uint32_t dword_2 = dwords_[idx + 2]; + cfa.dword_0 = dword_0; + cfa.dword_1 = dword_1 & 0xFFFF; + cfb.dword_0 = (dword_1 >> 16) | (dword_2 << 16); + cfb.dword_1 = dword_2 >> 16; + if (cfa.opc == ALLOC) { + GatherAlloc(&cfa.alloc); + } else if (cfa.is_exec()) { + GatherExec(&cfa.exec); + } + if (cfb.opc == ALLOC) { + GatherAlloc(&cfb.alloc); + } else if (cfb.is_exec()) { + GatherExec(&cfb.exec); + } + if (cfa.opc == EXEC_END || cfb.opc == EXEC_END) { + break; + } + } +} + +void ShaderResource::GatherAlloc(const instr_cf_alloc_t* cf) { + allocs_.push_back(*cf); + + switch (cf->buffer_select) { + case SQ_POSITION: + // Position (SV_POSITION). + alloc_counts_.positions += cf->size + 1; + break; + case SQ_PARAMETER_PIXEL: + // Output to PS (if VS), or frag output (if PS). + alloc_counts_.params += cf->size + 1; + break; + case SQ_MEMORY: + // MEMEXPORT? + alloc_counts_.memories += cf->size + 1; + break; + } +} + +void ShaderResource::GatherExec(const instr_cf_exec_t* cf) { + execs_.push_back(*cf); + + uint32_t sequence = cf->serialize; + for (uint32_t i = 0; i < cf->count; i++) { + uint32_t alu_off = (cf->address + i); + int sync = sequence & 0x2; + if (sequence & 0x1) { + const instr_fetch_t* fetch = + (const instr_fetch_t*)(dwords_ + alu_off * 3); + switch (fetch->opc) { + case VTX_FETCH: + GatherVertexFetch(&fetch->vtx); + break; + case TEX_FETCH: + GatherTextureFetch(&fetch->tex); + break; + case TEX_GET_BORDER_COLOR_FRAC: + case TEX_GET_COMP_TEX_LOD: + case TEX_GET_GRADIENTS: + case TEX_GET_WEIGHTS: + case TEX_SET_TEX_LOD: + case TEX_SET_GRADIENTS_H: + case TEX_SET_GRADIENTS_V: + default: + XEASSERTALWAYS(); + break; + } + } else { + // TODO(benvanik): gather registers used, predicate bits used, etc. + const instr_alu_t* alu = + (const instr_alu_t*)(dwords_ + alu_off * 3); + if (alu->vector_write_mask) { + if (alu->export_data && alu->vector_dest == 63) { + alloc_counts_.point_size = true; + } + } + if (alu->scalar_write_mask || !alu->vector_write_mask) { + if (alu->export_data && alu->scalar_dest == 63) { + alloc_counts_.point_size = true; + } + } + } + sequence >>= 2; + } +} + +void ShaderResource::GatherVertexFetch(const instr_fetch_vtx_t* vtx) { + XEASSERT(type_ == XE_GPU_SHADER_TYPE_VERTEX); + + // dst_reg/dst_swiz + // src_reg/src_swiz + // format = a2xx_sq_surfaceformat + // format_comp_all ? signed : unsigned + // num_format_all ? normalized + // stride + // offset + // const_index/const_index_sel -- fetch constant register + // num_format_all ? integer : fraction + // exp_adjust_all - [-32,31] - (2^exp_adjust_all)*fetch - 0 = default + + // Sometimes games have fetches that just produce constants. We can + // ignore those. + uint32_t dst_swiz = vtx->dst_swiz; + bool fetches_any_data = false; + for (int i = 0; i < 4; i++) { + if ((dst_swiz & 0x7) == 4) { + // 0.0 + } else if ((dst_swiz & 0x7) == 5) { + // 1.0 + } else if ((dst_swiz & 0x7) == 6) { + // ? + } else if ((dst_swiz & 0x7) == 7) { + // Previous register value. + } else { + fetches_any_data = true; + break; + } + dst_swiz >>= 3; + } + if (!fetches_any_data) { + return; + } + + uint32_t fetch_slot = vtx->const_index * 3 + vtx->const_index_sel; + auto& inputs = buffer_inputs_; + VertexBufferResource::DeclElement* el = nullptr; + for (size_t n = 0; n < inputs.count; n++) { + auto& desc = inputs.descs[n]; + auto& info = desc.info; + if (desc.fetch_slot == fetch_slot) { + XEASSERT(info.element_count + 1 < XECOUNT(info.elements)); + // It may not hold that all strides are equal, but I hope it does. + XEASSERT(!vtx->stride || info.stride_words == vtx->stride); + el = &info.elements[info.element_count++]; + break; + } + } + if (!el) { + XEASSERTNOTZERO(vtx->stride); + XEASSERT(inputs.count + 1 < XECOUNT(inputs.descs)); + auto& desc = inputs.descs[inputs.count++]; + desc.input_index = inputs.count - 1; + desc.fetch_slot = fetch_slot; + desc.info.stride_words = vtx->stride; + el = &desc.info.elements[desc.info.element_count++]; + } + + el->vtx_fetch = *vtx; + el->format = vtx->format; + el->is_normalized = vtx->num_format_all == 0; + el->is_signed = vtx->format_comp_all == 1; + el->offset_words = vtx->offset; + el->size_words = 0; + switch (el->format) { + case FMT_8_8_8_8: + case FMT_2_10_10_10: + case FMT_10_11_11: + case FMT_11_11_10: + el->size_words = 1; + break; + case FMT_16_16: + case FMT_16_16_FLOAT: + el->size_words = 1; + break; + case FMT_16_16_16_16: + case FMT_16_16_16_16_FLOAT: + el->size_words = 2; + break; + case FMT_32: + case FMT_32_FLOAT: + el->size_words = 1; + break; + case FMT_32_32: + case FMT_32_32_FLOAT: + el->size_words = 2; + break; + case FMT_32_32_32_FLOAT: + el->size_words = 3; + break; + case FMT_32_32_32_32: + case FMT_32_32_32_32_FLOAT: + el->size_words = 4; + break; + default: + XELOGE("Unknown vertex format: %d", el->format); + XEASSERTALWAYS(); + break; + } +} + +void ShaderResource::GatherTextureFetch(const xenos::instr_fetch_tex_t* tex) { + // TODO(benvanik): check dest_swiz to see if we are writing anything. + + XEASSERT(sampler_inputs_.count + 1 < XECOUNT(sampler_inputs_.descs)); + auto& input = sampler_inputs_.descs[sampler_inputs_.count++]; + input.input_index = sampler_inputs_.count - 1; + input.fetch_slot = tex->const_idx & 0xF; // ? + input.tex_fetch = *tex; + + // Format mangling, size estimation, etc. +} + +VertexShaderResource::VertexShaderResource( + const MemoryRange& memory_range, const Info& info) + : ShaderResource(memory_range, info, XE_GPU_SHADER_TYPE_VERTEX) { +} + +VertexShaderResource::~VertexShaderResource() = default; + +PixelShaderResource::PixelShaderResource( + const MemoryRange& memory_range, const Info& info) + : ShaderResource(memory_range, info, XE_GPU_SHADER_TYPE_PIXEL) { +} + +PixelShaderResource::~PixelShaderResource() = default; diff --git a/src/xenia/gpu/shader_resource.h b/src/xenia/gpu/shader_resource.h index 24b787ec4..b591bfaf2 100644 --- a/src/xenia/gpu/shader_resource.h +++ b/src/xenia/gpu/shader_resource.h @@ -10,7 +10,9 @@ #ifndef XENIA_GPU_SHADER_RESOURCE_H_ #define XENIA_GPU_SHADER_RESOURCE_H_ -#include +#include +#include +#include #include @@ -18,8 +20,104 @@ namespace xe { namespace gpu { -class ShaderResource : public Resource { +class ShaderResource : public HashedResource { public: + struct Info { + // type, etc? + }; + + ~ShaderResource() override; + + const Info& info() const { return info_; } + xenos::XE_GPU_SHADER_TYPE type() const { return type_; } + const uint32_t* dwords() const { return dwords_; } + const size_t dword_count() const { return dword_count_; } + + bool is_prepared() const { return is_prepared_; } + const char* disasm_src() const { return disasm_src_; } + + struct BufferDesc { + uint32_t input_index; + uint32_t fetch_slot; + VertexBufferResource::Info info; + // xenos::instr_fetch_vtx_t vtx_fetch; for each el + }; + struct BufferInputs { + uint32_t count; + BufferDesc descs[32]; + }; + const BufferInputs& buffer_inputs() { return buffer_inputs_; } + + struct SamplerDesc { + uint32_t input_index; + uint32_t fetch_slot; + uint32_t format; + xenos::instr_fetch_tex_t tex_fetch; + }; + struct SamplerInputs { + uint32_t count; + SamplerDesc descs[32]; + }; + const SamplerInputs& sampler_inputs() { return sampler_inputs_; } + + struct AllocCounts { + uint32_t positions; + uint32_t params; + uint32_t memories; + bool point_size; + }; + const AllocCounts& alloc_counts() const { return alloc_counts_; } + const std::vector& execs() const { return execs_; } + const std::vector& allocs() const { return allocs_; } + +private: + void GatherIO(); + void GatherAlloc(const xenos::instr_cf_alloc_t* cf); + void GatherExec(const xenos::instr_cf_exec_t* cf); + void GatherVertexFetch(const xenos::instr_fetch_vtx_t* vtx); + void GatherTextureFetch(const xenos::instr_fetch_tex_t* tex); + +protected: + ShaderResource(const MemoryRange& memory_range, + const Info& info, + xenos::XE_GPU_SHADER_TYPE type); + + Info info_; + xenos::XE_GPU_SHADER_TYPE type_; + size_t dword_count_; + uint32_t* dwords_; + char* disasm_src_; + + AllocCounts alloc_counts_; + std::vector execs_; + std::vector allocs_; + BufferInputs buffer_inputs_; + SamplerInputs sampler_inputs_; + + bool is_prepared_; +}; + + +class VertexShaderResource : public ShaderResource { +public: + VertexShaderResource(const MemoryRange& memory_range, + const Info& info); + ~VertexShaderResource() override; + + // buffer_inputs() matching VertexBufferResource::Info + + virtual int Prepare(const xenos::xe_gpu_program_cntl_t& program_cntl) = 0; +}; + + +class PixelShaderResource : public ShaderResource { +public: + PixelShaderResource(const MemoryRange& memory_range, + const Info& info); + ~PixelShaderResource() override; + + virtual int Prepare(const xenos::xe_gpu_program_cntl_t& program_cntl, + VertexShaderResource* vertex_shader) = 0; }; diff --git a/src/xenia/gpu/sources.gypi b/src/xenia/gpu/sources.gypi index 3d4462fd1..b01f7a33b 100644 --- a/src/xenia/gpu/sources.gypi +++ b/src/xenia/gpu/sources.gypi @@ -5,6 +5,8 @@ 'buffer_resource.h', 'command_processor.cc', 'command_processor.h', + 'draw_command.cc', + 'draw_command.h', 'gpu-private.h', 'gpu.cc', 'gpu.h', @@ -18,6 +20,8 @@ 'resource.h', 'resource_cache.cc', 'resource_cache.h', + 'sampler_state_resource.cc', + 'sampler_state_resource.h', 'shader_resource.cc', 'shader_resource.h', 'texture_resource.cc', diff --git a/src/xenia/gpu/texture.cc b/src/xenia/gpu/texture.cc deleted file mode 100644 index d624d82ce..000000000 --- a/src/xenia/gpu/texture.cc +++ /dev/null @@ -1,369 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2014 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include - -#include -#include - -// TODO(benvanik): replace DXGI constants with xenia constants. -#include - - -using namespace xe; -using namespace xe::gpu; -using namespace xe::gpu::xenos; - - -Texture::Texture(uint32_t address, const uint8_t* host_address) - : address_(address), host_address_(host_address) { -} - -Texture::~Texture() { - for (auto it = views_.begin(); it != views_.end(); ++it) { - auto view = *it; - delete view; - } - views_.clear(); -} - -TextureView* Texture::Fetch( - const xenos::xe_gpu_texture_fetch_t& fetch) { - // TODO(benvanik): compute length for hash check. - size_t length = 0; - switch (fetch.dimension) { - case DIMENSION_1D: - break; - case DIMENSION_2D: - break; - case DIMENSION_3D: - break; - case DIMENSION_CUBE: - break; - } - uint64_t hash = xe_hash64(host_address_, length); - - for (auto it = views_.begin(); it != views_.end(); ++it) { - auto view = *it; - if (memcmp(&view->fetch, &fetch, sizeof(fetch))) { - continue; - } - bool dirty = hash != view->hash; - if (dirty) { - return FetchDirty(view, fetch) ? view : nullptr; - } else { - return view; - } - } - - auto new_view = FetchNew(fetch); - if (!new_view) { - return nullptr; - } - new_view->hash = hash; - views_.push_back(new_view); - return new_view; -} - -bool Texture::FillViewInfo(TextureView* view, - const xenos::xe_gpu_texture_fetch_t& fetch) { - // http://msdn.microsoft.com/en-us/library/windows/desktop/cc308051(v=vs.85).aspx - // a2xx_sq_surfaceformat - - view->texture = this; - view->fetch = fetch; - - view->dimensions = fetch.dimension; - switch (fetch.dimension) { - case DIMENSION_1D: - view->width = fetch.size_1d.width; - break; - case DIMENSION_2D: - view->width = fetch.size_2d.width; - view->height = fetch.size_2d.height; - break; - case DIMENSION_3D: - view->width = fetch.size_3d.width; - view->height = fetch.size_3d.height; - view->depth = fetch.size_3d.depth; - break; - case DIMENSION_CUBE: - view->width = fetch.size_stack.width; - view->height = fetch.size_stack.height; - view->depth = fetch.size_stack.depth; - break; - } - view->format = DXGI_FORMAT_UNKNOWN; - view->block_size = 0; - view->texel_pitch = 0; - view->is_compressed = false; - switch (fetch.format) { - case FMT_8: - switch (fetch.swizzle) { - case XE_GPU_SWIZZLE_RRR1: - view->format = DXGI_FORMAT_R8_UNORM; - break; - case XE_GPU_SWIZZLE_000R: - view->format = DXGI_FORMAT_A8_UNORM; - break; - default: - XELOGW("D3D11: unhandled swizzle for FMT_8"); - view->format = DXGI_FORMAT_A8_UNORM; - break; - } - view->block_size = 1; - view->texel_pitch = 1; - break; - case FMT_1_5_5_5: - switch (fetch.swizzle) { - case XE_GPU_SWIZZLE_BGRA: - view->format = DXGI_FORMAT_B5G5R5A1_UNORM; - break; - default: - XELOGW("D3D11: unhandled swizzle for FMT_1_5_5_5"); - view->format = DXGI_FORMAT_B5G5R5A1_UNORM; - break; - } - view->block_size = 1; - view->texel_pitch = 2; - break; - case FMT_8_8_8_8: - switch (fetch.swizzle) { - case XE_GPU_SWIZZLE_RGBA: - view->format = DXGI_FORMAT_R8G8B8A8_UNORM; - break; - case XE_GPU_SWIZZLE_BGRA: - view->format = DXGI_FORMAT_B8G8R8A8_UNORM; - break; - case XE_GPU_SWIZZLE_RGB1: - view->format = DXGI_FORMAT_R8G8B8A8_UNORM; // ? - break; - case XE_GPU_SWIZZLE_BGR1: - view->format = DXGI_FORMAT_B8G8R8X8_UNORM; - break; - default: - XELOGW("D3D11: unhandled swizzle for FMT_8_8_8_8"); - view->format = DXGI_FORMAT_R8G8B8A8_UNORM; - break; - } - view->block_size = 1; - view->texel_pitch = 4; - break; - case FMT_4_4_4_4: - switch (fetch.swizzle) { - case XE_GPU_SWIZZLE_BGRA: - view->format = DXGI_FORMAT_B4G4R4A4_UNORM; // only supported on Windows 8+ - break; - default: - XELOGW("D3D11: unhandled swizzle for FMT_4_4_4_4"); - view->format = DXGI_FORMAT_B4G4R4A4_UNORM; // only supported on Windows 8+ - break; - } - view->block_size = 1; - view->texel_pitch = 2; - break; - case FMT_16_16_16_16_FLOAT: - switch (fetch.swizzle) { - case XE_GPU_SWIZZLE_RGBA: - view->format = DXGI_FORMAT_R16G16B16A16_FLOAT; - break; - default: - XELOGW("D3D11: unhandled swizzle for FMT_16_16_16_16_FLOAT"); - view->format = DXGI_FORMAT_R16G16B16A16_FLOAT; - break; - } - view->block_size = 1; - view->texel_pitch = 8; - break; - case FMT_32_FLOAT: - switch (fetch.swizzle) { - case XE_GPU_SWIZZLE_R111: - view->format = DXGI_FORMAT_R32_FLOAT; - break; - default: - XELOGW("D3D11: unhandled swizzle for FMT_32_FLOAT"); - view->format = DXGI_FORMAT_R32_FLOAT; - break; - } - view->block_size = 1; - view->texel_pitch = 4; - break; - case FMT_DXT1: - view->format = DXGI_FORMAT_BC1_UNORM; - view->block_size = 4; - view->texel_pitch = 8; - view->is_compressed = true; - break; - case FMT_DXT2_3: - case FMT_DXT4_5: - view->format = (fetch.format == FMT_DXT4_5 ? DXGI_FORMAT_BC3_UNORM : DXGI_FORMAT_BC2_UNORM); - view->block_size = 4; - view->texel_pitch = 16; - view->is_compressed = true; - break; - case FMT_1_REVERSE: - case FMT_1: - case FMT_5_6_5: - case FMT_6_5_5: - case FMT_2_10_10_10: - case FMT_8_A: - case FMT_8_B: - case FMT_8_8: - case FMT_Cr_Y1_Cb_Y0: - case FMT_Y1_Cr_Y0_Cb: - case FMT_5_5_5_1: - case FMT_8_8_8_8_A: - case FMT_10_11_11: - case FMT_11_11_10: - case FMT_24_8: - case FMT_24_8_FLOAT: - case FMT_16: - case FMT_16_16: - case FMT_16_16_16_16: - case FMT_16_EXPAND: - case FMT_16_16_EXPAND: - case FMT_16_16_16_16_EXPAND: - case FMT_16_FLOAT: - case FMT_16_16_FLOAT: - case FMT_32: - case FMT_32_32: - case FMT_32_32_32_32: - case FMT_32_32_FLOAT: - case FMT_32_32_32_32_FLOAT: - case FMT_32_AS_8: - case FMT_32_AS_8_8: - case FMT_16_MPEG: - case FMT_16_16_MPEG: - case FMT_8_INTERLACED: - case FMT_32_AS_8_INTERLACED: - case FMT_32_AS_8_8_INTERLACED: - case FMT_16_INTERLACED: - case FMT_16_MPEG_INTERLACED: - case FMT_16_16_MPEG_INTERLACED: - case FMT_DXN: - case FMT_8_8_8_8_AS_16_16_16_16: - case FMT_DXT1_AS_16_16_16_16: - case FMT_DXT2_3_AS_16_16_16_16: - case FMT_DXT4_5_AS_16_16_16_16: - case FMT_2_10_10_10_AS_16_16_16_16: - case FMT_10_11_11_AS_16_16_16_16: - case FMT_11_11_10_AS_16_16_16_16: - case FMT_32_32_32_FLOAT: - case FMT_DXT3A: - case FMT_DXT5A: - case FMT_CTX1: - case FMT_DXT3A_AS_1_1_1_1: - view->format = DXGI_FORMAT_UNKNOWN; - break; - } - - if (view->format == DXGI_FORMAT_UNKNOWN) { - return false; - } - - switch (fetch.dimension) { - case DIMENSION_1D: - break; - case DIMENSION_2D: - view->sizes_2d = GetTextureSizes2D(view); - break; - case DIMENSION_3D: - break; - case DIMENSION_CUBE: - break; - } - return true; -} - -const TextureSizes2D Texture::GetTextureSizes2D(TextureView* view) { - TextureSizes2D sizes; - - sizes.logical_width = 1 + view->fetch.size_2d.width; - sizes.logical_height = 1 + view->fetch.size_2d.height; - - sizes.block_width = sizes.logical_width / view->block_size; - sizes.block_height = sizes.logical_height / view->block_size; - - if (!view->is_compressed) { - // must be 32x32, but also must have a pitch that is a multiple of 256 bytes - uint32_t bytes_per_block = view->block_size * view->block_size * - view->texel_pitch; - uint32_t width_multiple = 32; - if (bytes_per_block) { - uint32_t minimum_multiple = 256 / bytes_per_block; - if (width_multiple < minimum_multiple) { - width_multiple = minimum_multiple; - } - } - sizes.input_width = XEROUNDUP(sizes.logical_width, width_multiple); - sizes.input_height = XEROUNDUP(sizes.logical_height, 32); - sizes.output_width = sizes.logical_width; - sizes.output_height = sizes.logical_height; - } else { - // must be 128x128 - sizes.input_width = XEROUNDUP(sizes.logical_width, 128); - sizes.input_height = XEROUNDUP(sizes.logical_height, 128); - sizes.output_width = XENEXTPOW2(sizes.logical_width); - sizes.output_height = XENEXTPOW2(sizes.logical_height); - } - - sizes.logical_pitch = - (sizes.logical_width / view->block_size) * view->texel_pitch; - sizes.input_pitch = - (sizes.input_width / view->block_size) * view->texel_pitch; - - return sizes; -} - -void Texture::TextureSwap(uint8_t* dest, const uint8_t* src, uint32_t pitch, - XE_GPU_ENDIAN endianness) { - switch (endianness) { - case XE_GPU_ENDIAN_8IN16: - for (uint32_t i = 0; i < pitch; i += 2, src += 2, dest += 2) { - *(uint16_t*)dest = XESWAP16(*(uint16_t*)src); - } - break; - case XE_GPU_ENDIAN_8IN32: // Swap bytes. - for (uint32_t i = 0; i < pitch; i += 4, src += 4, dest += 4) { - *(uint32_t*)dest = XESWAP32(*(uint32_t*)src); - } - break; - case XE_GPU_ENDIAN_16IN32: // Swap half words. - for (uint32_t i = 0; i < pitch; i += 4, src += 4, dest += 4) { - uint32_t value = *(uint32_t*)src; - *(uint32_t*)dest = ((value >> 16) & 0xFFFF) | (value << 16); - } - break; - default: - case XE_GPU_ENDIAN_NONE: - memcpy(dest, src, pitch); - break; - } -} - -// https://code.google.com/p/crunch/source/browse/trunk/inc/crn_decomp.h#4104 -uint32_t Texture::TiledOffset2DOuter(uint32_t y, uint32_t width, - uint32_t log_bpp) { - uint32_t macro = ((y >> 5) * (width >> 5)) << (log_bpp + 7); - uint32_t micro = ((y & 6) << 2) << log_bpp; - return macro + - ((micro & ~15) << 1) + - (micro & 15) + - ((y & 8) << (3 + log_bpp)) + - ((y & 1) << 4); -} - -uint32_t Texture::TiledOffset2DInner(uint32_t x, uint32_t y, uint32_t bpp, - uint32_t base_offset) { - uint32_t macro = (x >> 5) << (bpp + 7); - uint32_t micro = (x & 7) << bpp; - uint32_t offset = base_offset + (macro + ((micro & ~15) << 1) + (micro & 15)); - return ((offset & ~511) << 3) + ((offset & 448) << 2) + (offset & 63) + - ((y & 16) << 7) + (((((y & 8) >> 2) + (x >> 3)) & 3) << 6); -} diff --git a/src/xenia/gpu/texture.h b/src/xenia/gpu/texture.h deleted file mode 100644 index 9b919a5d9..000000000 --- a/src/xenia/gpu/texture.h +++ /dev/null @@ -1,110 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2014 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_GPU_TEXTURE_H_ -#define XENIA_GPU_TEXTURE_H_ - -#include -#include - -// TODO(benvanik): replace DXGI constants with xenia constants. -#include - - -namespace xe { -namespace gpu { - - -class Texture; - -struct TextureSizes1D {}; -struct TextureSizes2D { - uint32_t logical_width; - uint32_t logical_height; - uint32_t block_width; - uint32_t block_height; - uint32_t input_width; - uint32_t input_height; - uint32_t output_width; - uint32_t output_height; - uint32_t logical_pitch; - uint32_t input_pitch; -}; -struct TextureSizes3D {}; -struct TextureSizesCube {}; - -struct TextureView { - Texture* texture; - xenos::xe_gpu_texture_fetch_t fetch; - uint64_t hash; - - union { - TextureSizes1D sizes_1d; - TextureSizes2D sizes_2d; - TextureSizes3D sizes_3d; - TextureSizesCube sizes_cube; - }; - - int dimensions; - uint32_t width; - uint32_t height; - uint32_t depth; - uint32_t block_size; - uint32_t texel_pitch; - bool is_compressed; - DXGI_FORMAT format; - - TextureView() - : texture(nullptr), - dimensions(0), - width(0), height(0), depth(0), - block_size(0), texel_pitch(0), - is_compressed(false), format(DXGI_FORMAT_UNKNOWN) {} -}; - - -class Texture { -public: - Texture(uint32_t address, const uint8_t* host_address); - virtual ~Texture(); - - TextureView* Fetch( - const xenos::xe_gpu_texture_fetch_t& fetch); - -protected: - bool FillViewInfo(TextureView* view, - const xenos::xe_gpu_texture_fetch_t& fetch); - - virtual TextureView* FetchNew( - const xenos::xe_gpu_texture_fetch_t& fetch) = 0; - virtual bool FetchDirty( - TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch) = 0; - - const TextureSizes2D GetTextureSizes2D(TextureView* view); - - static void TextureSwap(uint8_t* dest, const uint8_t* src, uint32_t pitch, - xenos::XE_GPU_ENDIAN endianness); - static uint32_t TiledOffset2DOuter(uint32_t y, uint32_t width, - uint32_t log_bpp); - static uint32_t TiledOffset2DInner(uint32_t x, uint32_t y, uint32_t bpp, - uint32_t base_offset); - - uint32_t address_; - const uint8_t* host_address_; - - // TODO(benvanik): replace with LRU keyed list. - std::vector views_; -}; - - -} // namespace gpu -} // namespace xe - - -#endif // XENIA_GPU_TEXTURE_H_ diff --git a/src/xenia/gpu/texture_cache.cc b/src/xenia/gpu/texture_cache.cc deleted file mode 100644 index 1f0a4a5ac..000000000 --- a/src/xenia/gpu/texture_cache.cc +++ /dev/null @@ -1,50 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2014 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include - -#include - - -using namespace xe; -using namespace xe::gpu; -using namespace xe::gpu::xenos; - - -// https://github.com/ivmai/bdwgc/blob/master/os_dep.c - -TextureCache::TextureCache(Memory* memory) - : memory_(memory) { -} - -TextureCache::~TextureCache() { - for (auto it = textures_.begin(); it != textures_.end(); ++it) { - auto texture = it->second; - delete texture; - } - textures_.clear(); -} - -TextureView* TextureCache::FetchTexture( - uint32_t address, const xenos::xe_gpu_texture_fetch_t& fetch) { - auto it = textures_.find(address); - if (it == textures_.end()) { - // Texture not found. - const uint8_t* host_address = memory_->Translate(address); - auto texture = CreateTexture(address, host_address, fetch); - if (!texture) { - return nullptr; - } - textures_.insert({ address, texture }); - return texture->Fetch(fetch); - } else { - // Texture found. - return it->second->Fetch(fetch); - } -} diff --git a/src/xenia/gpu/texture_cache.h b/src/xenia/gpu/texture_cache.h deleted file mode 100644 index 285ffe1d7..000000000 --- a/src/xenia/gpu/texture_cache.h +++ /dev/null @@ -1,50 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2014 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_GPU_TEXTURE_CACHE_H_ -#define XENIA_GPU_TEXTURE_CACHE_H_ - -#include -#include -#include - - -namespace xe { -namespace gpu { - - -// TODO(benvanik): overlapping textures. -// TODO(benvanik): multiple textures (differing formats/etc) per address. -class TextureCache { -public: - TextureCache(Memory* memory); - virtual ~TextureCache(); - - Memory* memory() const { return memory_; } - - TextureView* FetchTexture( - uint32_t address, const xenos::xe_gpu_texture_fetch_t& fetch); - -protected: - virtual Texture* CreateTexture( - uint32_t address, const uint8_t* host_address, - const xenos::xe_gpu_texture_fetch_t& fetch) = 0; - - Memory* memory_; - - // Mapped by guest address. - std::unordered_map textures_; -}; - - -} // namespace gpu -} // namespace xe - - -#endif // XENIA_GPU_TEXTURE_CACHE_H_ diff --git a/src/xenia/gpu/texture_resource.cc b/src/xenia/gpu/texture_resource.cc index 5875e76f3..d063acc56 100644 --- a/src/xenia/gpu/texture_resource.cc +++ b/src/xenia/gpu/texture_resource.cc @@ -9,9 +9,342 @@ #include +#include +#include + using namespace std; using namespace xe; using namespace xe::gpu; using namespace xe::gpu::xenos; + +bool TextureResource::Info::Prepare(const xe_gpu_texture_fetch_t& fetch, + Info& info) { + // http://msdn.microsoft.com/en-us/library/windows/desktop/cc308051(v=vs.85).aspx + // a2xx_sq_surfaceformat + + info.dimension = (TextureDimension)fetch.dimension; + switch (info.dimension) { + case TEXTURE_DIMENSION_1D: + info.width = fetch.size_1d.width; + break; + case TEXTURE_DIMENSION_2D: + info.width = fetch.size_2d.width; + info.height = fetch.size_2d.height; + break; + case TEXTURE_DIMENSION_3D: + case TEXTURE_DIMENSION_CUBE: + info.width = fetch.size_3d.width; + info.height = fetch.size_3d.height; + info.depth = fetch.size_3d.depth; + break; + } + info.block_size = 0; + info.texel_pitch = 0; + info.endianness = (XE_GPU_ENDIAN)fetch.endianness; + info.is_tiled = fetch.tiled; + info.is_compressed = false; + info.input_length = 0; + info.format = DXGI_FORMAT_UNKNOWN; + switch (fetch.format) { + case FMT_8: + switch (fetch.swizzle) { + case XE_GPU_SWIZZLE_RRR1: + info.format = DXGI_FORMAT_R8_UNORM; + break; + case XE_GPU_SWIZZLE_000R: + info.format = DXGI_FORMAT_A8_UNORM; + break; + default: + XELOGW("D3D11: unhandled swizzle for FMT_8"); + info.format = DXGI_FORMAT_A8_UNORM; + break; + } + info.block_size = 1; + info.texel_pitch = 1; + break; + case FMT_1_5_5_5: + switch (fetch.swizzle) { + case XE_GPU_SWIZZLE_BGRA: + info.format = DXGI_FORMAT_B5G5R5A1_UNORM; + break; + default: + XELOGW("D3D11: unhandled swizzle for FMT_1_5_5_5"); + info.format = DXGI_FORMAT_B5G5R5A1_UNORM; + break; + } + info.block_size = 1; + info.texel_pitch = 2; + break; + case FMT_8_8_8_8: + switch (fetch.swizzle) { + case XE_GPU_SWIZZLE_RGBA: + info.format = DXGI_FORMAT_R8G8B8A8_UNORM; + break; + case XE_GPU_SWIZZLE_BGRA: + info.format = DXGI_FORMAT_B8G8R8A8_UNORM; + break; + case XE_GPU_SWIZZLE_RGB1: + info.format = DXGI_FORMAT_R8G8B8A8_UNORM; // ? + break; + case XE_GPU_SWIZZLE_BGR1: + info.format = DXGI_FORMAT_B8G8R8X8_UNORM; + break; + default: + XELOGW("D3D11: unhandled swizzle for FMT_8_8_8_8"); + info.format = DXGI_FORMAT_R8G8B8A8_UNORM; + break; + } + info.block_size = 1; + info.texel_pitch = 4; + break; + case FMT_4_4_4_4: + switch (fetch.swizzle) { + case XE_GPU_SWIZZLE_BGRA: + info.format = DXGI_FORMAT_B4G4R4A4_UNORM; // only supported on Windows 8+ + break; + default: + XELOGW("D3D11: unhandled swizzle for FMT_4_4_4_4"); + info.format = DXGI_FORMAT_B4G4R4A4_UNORM; // only supported on Windows 8+ + break; + } + info.block_size = 1; + info.texel_pitch = 2; + break; + case FMT_16_16_16_16_FLOAT: + switch (fetch.swizzle) { + case XE_GPU_SWIZZLE_RGBA: + info.format = DXGI_FORMAT_R16G16B16A16_FLOAT; + break; + default: + XELOGW("D3D11: unhandled swizzle for FMT_16_16_16_16_FLOAT"); + info.format = DXGI_FORMAT_R16G16B16A16_FLOAT; + break; + } + info.block_size = 1; + info.texel_pitch = 8; + break; + case FMT_32_FLOAT: + switch (fetch.swizzle) { + case XE_GPU_SWIZZLE_R111: + info.format = DXGI_FORMAT_R32_FLOAT; + break; + default: + XELOGW("D3D11: unhandled swizzle for FMT_32_FLOAT"); + info.format = DXGI_FORMAT_R32_FLOAT; + break; + } + info.block_size = 1; + info.texel_pitch = 4; + break; + case FMT_DXT1: + info.format = DXGI_FORMAT_BC1_UNORM; + info.block_size = 4; + info.texel_pitch = 8; + info.is_compressed = true; + break; + case FMT_DXT2_3: + case FMT_DXT4_5: + info.format = (fetch.format == FMT_DXT4_5 ? DXGI_FORMAT_BC3_UNORM : DXGI_FORMAT_BC2_UNORM); + info.block_size = 4; + info.texel_pitch = 16; + info.is_compressed = true; + break; + case FMT_1_REVERSE: + case FMT_1: + case FMT_5_6_5: + case FMT_6_5_5: + case FMT_2_10_10_10: + case FMT_8_A: + case FMT_8_B: + case FMT_8_8: + case FMT_Cr_Y1_Cb_Y0: + case FMT_Y1_Cr_Y0_Cb: + case FMT_5_5_5_1: + case FMT_8_8_8_8_A: + case FMT_10_11_11: + case FMT_11_11_10: + case FMT_24_8: + case FMT_24_8_FLOAT: + case FMT_16: + case FMT_16_16: + case FMT_16_16_16_16: + case FMT_16_EXPAND: + case FMT_16_16_EXPAND: + case FMT_16_16_16_16_EXPAND: + case FMT_16_FLOAT: + case FMT_16_16_FLOAT: + case FMT_32: + case FMT_32_32: + case FMT_32_32_32_32: + case FMT_32_32_FLOAT: + case FMT_32_32_32_32_FLOAT: + case FMT_32_AS_8: + case FMT_32_AS_8_8: + case FMT_16_MPEG: + case FMT_16_16_MPEG: + case FMT_8_INTERLACED: + case FMT_32_AS_8_INTERLACED: + case FMT_32_AS_8_8_INTERLACED: + case FMT_16_INTERLACED: + case FMT_16_MPEG_INTERLACED: + case FMT_16_16_MPEG_INTERLACED: + case FMT_DXN: + case FMT_8_8_8_8_AS_16_16_16_16: + case FMT_DXT1_AS_16_16_16_16: + case FMT_DXT2_3_AS_16_16_16_16: + case FMT_DXT4_5_AS_16_16_16_16: + case FMT_2_10_10_10_AS_16_16_16_16: + case FMT_10_11_11_AS_16_16_16_16: + case FMT_11_11_10_AS_16_16_16_16: + case FMT_32_32_32_FLOAT: + case FMT_DXT3A: + case FMT_DXT5A: + case FMT_CTX1: + case FMT_DXT3A_AS_1_1_1_1: + info.format = DXGI_FORMAT_UNKNOWN; + break; + } + + if (info.format == DXGI_FORMAT_UNKNOWN) { + return false; + } + + // Must be called here when we know the format. + switch (info.dimension) { + case TEXTURE_DIMENSION_1D: + info.CalculateTextureSizes1D(fetch); + break; + case TEXTURE_DIMENSION_2D: + info.CalculateTextureSizes2D(fetch); + break; + case TEXTURE_DIMENSION_3D: + // TODO(benvanik): calculate size. + return false; + case TEXTURE_DIMENSION_CUBE: + // TODO(benvanik): calculate size. + return false; + } + return true; +} + +void TextureResource::Info::CalculateTextureSizes1D( + const xe_gpu_texture_fetch_t& fetch) { + // ? + size_1d.width = fetch.size_1d.width; +} + +void TextureResource::Info::CalculateTextureSizes2D( + const xe_gpu_texture_fetch_t& fetch) { + size_2d.logical_width = 1 + fetch.size_2d.width; + size_2d.logical_height = 1 + fetch.size_2d.height; + + size_2d.block_width = size_2d.logical_width / block_size; + size_2d.block_height = size_2d.logical_height / block_size; + + if (!is_compressed) { + // must be 32x32 but also must have a pitch that is a multiple of 256 bytes + uint32_t bytes_per_block = block_size * block_size * texel_pitch; + uint32_t width_multiple = 32; + if (bytes_per_block) { + uint32_t minimum_multiple = 256 / bytes_per_block; + if (width_multiple < minimum_multiple) { + width_multiple = minimum_multiple; + } + } + size_2d.input_width = XEROUNDUP(size_2d.logical_width, width_multiple); + size_2d.input_height = XEROUNDUP(size_2d.logical_height, 32); + size_2d.output_width = size_2d.logical_width; + size_2d.output_height = size_2d.logical_height; + } else { + // must be 128x128 + size_2d.input_width = XEROUNDUP(size_2d.logical_width, 128); + size_2d.input_height = XEROUNDUP(size_2d.logical_height, 128); + size_2d.output_width = XENEXTPOW2(size_2d.logical_width); + size_2d.output_height = XENEXTPOW2(size_2d.logical_height); + } + + size_2d.logical_pitch = (size_2d.logical_width / block_size) * texel_pitch; + size_2d.input_pitch = (size_2d.input_width / block_size) * texel_pitch; + + if (!is_tiled) { + input_length = size_2d.block_height * size_2d.logical_pitch; + } else { + input_length = size_2d.block_height * size_2d.logical_pitch; // ? + } +} + +TextureResource::TextureResource(const MemoryRange& memory_range, + const Info& info) + : PagedResource(memory_range), + info_(info) { +} + +TextureResource::~TextureResource() { +} + +int TextureResource::Prepare() { + if (!handle()) { + if (CreateHandle()) { + XELOGE("Unable to create texture handle"); + return 1; + } + } + + if (!dirtied_) { + return 0; + } + dirtied_ = false; + + // pass dirty regions? + return InvalidateRegion(memory_range_); +} + +void TextureResource::TextureSwap(uint8_t* dest, const uint8_t* src, + uint32_t pitch) const { + // TODO(benvanik): optimize swapping paths. + switch (info_.endianness) { + case XE_GPU_ENDIAN_8IN16: + for (uint32_t i = 0; i < pitch; i += 2, src += 2, dest += 2) { + *(uint16_t*)dest = XESWAP16(*(uint16_t*)src); + } + break; + case XE_GPU_ENDIAN_8IN32: // Swap bytes. + for (uint32_t i = 0; i < pitch; i += 4, src += 4, dest += 4) { + *(uint32_t*)dest = XESWAP32(*(uint32_t*)src); + } + break; + case XE_GPU_ENDIAN_16IN32: // Swap half words. + for (uint32_t i = 0; i < pitch; i += 4, src += 4, dest += 4) { + uint32_t value = *(uint32_t*)src; + *(uint32_t*)dest = ((value >> 16) & 0xFFFF) | (value << 16); + } + break; + default: + case XE_GPU_ENDIAN_NONE: + memcpy(dest, src, pitch); + break; + } +} + +// https://code.google.com/p/crunch/source/browse/trunk/inc/crn_decomp.h#4104 +uint32_t TextureResource::TiledOffset2DOuter(uint32_t y, uint32_t width, + uint32_t log_bpp) const { + uint32_t macro = ((y >> 5) * (width >> 5)) << (log_bpp + 7); + uint32_t micro = ((y & 6) << 2) << log_bpp; + return macro + + ((micro & ~15) << 1) + + (micro & 15) + + ((y & 8) << (3 + log_bpp)) + + ((y & 1) << 4); +} + +uint32_t TextureResource::TiledOffset2DInner(uint32_t x, uint32_t y, uint32_t bpp, + uint32_t base_offset) const { + uint32_t macro = (x >> 5) << (bpp + 7); + uint32_t micro = (x & 7) << bpp; + uint32_t offset = base_offset + (macro + ((micro & ~15) << 1) + (micro & 15)); + return ((offset & ~511) << 3) + ((offset & 448) << 2) + (offset & 63) + + ((y & 16) << 7) + (((((y & 8) >> 2) + (x >> 3)) & 3) << 6); +} diff --git a/src/xenia/gpu/texture_resource.h b/src/xenia/gpu/texture_resource.h index 35f83bcda..57dc63422 100644 --- a/src/xenia/gpu/texture_resource.h +++ b/src/xenia/gpu/texture_resource.h @@ -10,7 +10,7 @@ #ifndef XENIA_GPU_TEXTURE_RESOURCE_H_ #define XENIA_GPU_TEXTURE_RESOURCE_H_ -#include +#include #include // TODO(benvanik): replace DXGI constants with xenia constants. @@ -21,8 +21,85 @@ namespace xe { namespace gpu { -class TextureResource : public Resource { +enum TextureDimension { + TEXTURE_DIMENSION_1D = 0, + TEXTURE_DIMENSION_2D = 1, + TEXTURE_DIMENSION_3D = 2, + TEXTURE_DIMENSION_CUBE = 3, +}; + + +class TextureResource : public PagedResource { public: + struct Info { + TextureDimension dimension; + uint32_t width; + uint32_t height; + uint32_t depth; + uint32_t block_size; + uint32_t texel_pitch; + xenos::XE_GPU_ENDIAN endianness; + bool is_tiled; + bool is_compressed; + uint32_t input_length; + + // TODO(benvanik): replace with our own constants. + DXGI_FORMAT format; + + union { + struct { + uint32_t width; + } size_1d; + struct { + uint32_t logical_width; + uint32_t logical_height; + uint32_t block_width; + uint32_t block_height; + uint32_t input_width; + uint32_t input_height; + uint32_t output_width; + uint32_t output_height; + uint32_t logical_pitch; + uint32_t input_pitch; + } size_2d; + struct { + } size_3d; + struct { + } size_cube; + }; + + static bool Prepare(const xenos::xe_gpu_texture_fetch_t& fetch, + Info& out_info); + + private: + void CalculateTextureSizes1D(const xenos::xe_gpu_texture_fetch_t& fetch); + void CalculateTextureSizes2D(const xenos::xe_gpu_texture_fetch_t& fetch); + }; + + TextureResource(const MemoryRange& memory_range, + const Info& info); + ~TextureResource() override; + + const Info& info() const { return info_; } + + bool Equals(const void* info_ptr, size_t info_length) override { + return info_length == sizeof(Info) && + memcmp(info_ptr, &info_, info_length) == 0; + } + + virtual int Prepare(); + +protected: + virtual int CreateHandle() = 0; + virtual int InvalidateRegion(const MemoryRange& memory_range) = 0; + + void TextureSwap(uint8_t* dest, const uint8_t* src, uint32_t pitch) const; + uint32_t TiledOffset2DOuter(uint32_t y, uint32_t width, + uint32_t log_bpp) const; + uint32_t TiledOffset2DInner(uint32_t x, uint32_t y, uint32_t bpp, + uint32_t base_offset) const; + + Info info_; }; diff --git a/src/xenia/gpu/xenos/registers.h b/src/xenia/gpu/xenos/registers.h deleted file mode 100644 index 39a0d43db..000000000 --- a/src/xenia/gpu/xenos/registers.h +++ /dev/null @@ -1,51 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2013 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_GPU_XENOS_REGISTERS_H_ -#define XENIA_GPU_XENOS_REGISTERS_H_ - -#include - - -namespace xe { -namespace gpu { -namespace xenos { - - -static const uint32_t kXEGpuRegisterCount = 0x5003; - - -enum Registers { -#define XE_GPU_REGISTER(index, type, name) \ - XE_GPU_REG_##name = index, -#include -#undef XE_GPU_REGISTER -}; - - -const char* GetRegisterName(uint32_t index); - - -union RegisterValue { - uint32_t u32; - float f32; -}; - - -struct RegisterFile { - RegisterValue values[kXEGpuRegisterCount]; -}; - - -} // namespace xenos -} // namespace gpu -} // namespace xe - - -#endif // XENIA_GPU_XENOS_REGISTERS_H_ diff --git a/src/xenia/gpu/xenos/sources.gypi b/src/xenia/gpu/xenos/sources.gypi index c1f677682..998444938 100644 --- a/src/xenia/gpu/xenos/sources.gypi +++ b/src/xenia/gpu/xenos/sources.gypi @@ -3,8 +3,6 @@ 'sources': [ 'packets.h', 'register_table.inc', - 'registers.cc', - 'registers.h', 'ucode.h', 'ucode_disassembler.cc', 'ucode_disassembler.h',