Initial refactoring of gpu/.

Runs too fast - now there are ringbuffer wrapping issues.
This commit is contained in:
Ben Vanik 2014-06-08 11:23:55 -07:00
parent 4072640a64
commit 295910c3d8
70 changed files with 5923 additions and 6095 deletions

View File

@ -1,42 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include <xenia/gpu/buffer.h>
#include <xenia/gpu/xenos/ucode_disassembler.h>
using namespace xe;
using namespace xe::gpu;
using namespace xe::gpu::xenos;
Buffer::Buffer(
const uint8_t* src_ptr, size_t length) :
src_(src_ptr), length_(length) {
}
Buffer::~Buffer() {
}
IndexBuffer::IndexBuffer(const IndexBufferInfo& info,
const uint8_t* src_ptr, size_t length)
: Buffer(src_ptr, length),
info_(info) {
}
IndexBuffer::~IndexBuffer() {}
VertexBuffer::VertexBuffer(const VertexBufferInfo& info,
const uint8_t* src_ptr, size_t length)
: Buffer(src_ptr, length),
info_(info) {
}
VertexBuffer::~VertexBuffer() {}

View File

@ -1,91 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_BUFFER_H_
#define XENIA_GPU_BUFFER_H_
#include <xenia/core.h>
#include <xenia/gpu/xenos/ucode.h>
#include <xenia/gpu/xenos/xenos.h>
namespace xe {
namespace gpu {
class Buffer {
public:
Buffer(const uint8_t* src_ptr, size_t length);
virtual ~Buffer();
const uint8_t* src() const { return src_; }
size_t length() const { return length_; }
uint64_t hash() const { return hash_; }
virtual bool FetchNew(uint64_t hash) = 0;
virtual bool FetchDirty(uint64_t hash) = 0;
protected:
const uint8_t* src_;
size_t length_;
uint64_t hash_;
};
struct IndexBufferInfo {
bool index_32bit;
uint32_t index_count;
uint32_t index_size;
uint32_t endianness;
};
class IndexBuffer : public Buffer {
public:
IndexBuffer(const IndexBufferInfo& info,
const uint8_t* src_ptr, size_t length);
virtual ~IndexBuffer();
protected:
IndexBufferInfo info_;
};
struct VertexBufferLayout {
uint32_t stride_words;
uint32_t element_count;
struct {
uint32_t format;
uint32_t offset_words;
uint32_t size_words;
} elements[16];
};
struct VertexBufferInfo {
VertexBufferLayout layout;
};
class VertexBuffer : public Buffer {
public:
VertexBuffer(const VertexBufferInfo& info,
const uint8_t* src_ptr, size_t length);
virtual ~VertexBuffer();
protected:
VertexBufferInfo info_;
};
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_BUFFER_H_

View File

@ -1,79 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include <xenia/gpu/buffer_cache.h>
#include <xenia/gpu/buffer.h>
using namespace std;
using namespace xe;
using namespace xe::gpu;
using namespace xe::gpu::xenos;
BufferCache::BufferCache() {
}
BufferCache::~BufferCache() {
Clear();
}
IndexBuffer* BufferCache::FetchIndexBuffer(
const IndexBufferInfo& info,
const uint8_t* src_ptr, size_t length) {
size_t key = hash_combine(info.endianness, info.index_32bit, info.index_count, info.index_size);
size_t hash = xe_hash64(src_ptr, length);
auto it = index_buffer_map_.find(key);
if (it != index_buffer_map_.end()) {
if (hash == it->second->hash()) {
return it->second;
} else {
return it->second->FetchDirty(hash) ? it->second : nullptr;
}
} else {
auto buffer = CreateIndexBuffer(info, src_ptr, length);
index_buffer_map_.insert({ key, buffer });
if (!buffer->FetchNew(hash)) {
return nullptr;
}
return buffer;
}
}
VertexBuffer* BufferCache::FetchVertexBuffer(
const VertexBufferInfo& info,
const uint8_t* src_ptr, size_t length) {
size_t key = reinterpret_cast<size_t>(src_ptr);
size_t hash = xe_hash64(src_ptr, length);
auto it = vertex_buffer_map_.find(key);
if (it != vertex_buffer_map_.end()) {
if (hash == it->second->hash()) {
return it->second;
} else {
return it->second->FetchDirty(hash) ? it->second : nullptr;
}
} else {
auto buffer = CreateVertexBuffer(info, src_ptr, length);
vertex_buffer_map_.insert({ key, buffer });
if (!buffer->FetchNew(hash)) {
return nullptr;
}
return buffer;
}
}
void BufferCache::Clear() {
for (auto it = index_buffer_map_.begin();
it != index_buffer_map_.end(); ++it) {
auto buffer = it->second;
delete buffer;
}
index_buffer_map_.clear();
}

View File

@ -1,55 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_BUFFER_CACHE_H_
#define XENIA_GPU_BUFFER_CACHE_H_
#include <xenia/core.h>
#include <xenia/gpu/buffer.h>
#include <xenia/gpu/xenos/xenos.h>
namespace xe {
namespace gpu {
class BufferCache {
public:
BufferCache();
virtual ~BufferCache();
IndexBuffer* FetchIndexBuffer(
const IndexBufferInfo& info,
const uint8_t* src_ptr, size_t length);
VertexBuffer* FetchVertexBuffer(
const VertexBufferInfo& info,
const uint8_t* src_ptr, size_t length);
void Clear();
protected:
virtual IndexBuffer* CreateIndexBuffer(
const IndexBufferInfo& info,
const uint8_t* src_ptr, size_t length) = 0;
virtual VertexBuffer* CreateVertexBuffer(
const VertexBufferInfo& info,
const uint8_t* src_ptr, size_t length) = 0;
private:
std::unordered_map<uint64_t, IndexBuffer*> index_buffer_map_;
std::unordered_map<uint64_t, VertexBuffer*> vertex_buffer_map_;
};
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_BUFFER_CACHE_H_

View File

@ -15,3 +15,42 @@ using namespace xe;
using namespace xe::gpu;
using namespace xe::gpu::xenos;
BufferResource::BufferResource(const MemoryRange& memory_range)
: PagedResource(memory_range) {
}
BufferResource::~BufferResource() = default;
int BufferResource::Prepare() {
if (!handle()) {
if (CreateHandle()) {
XELOGE("Unable to create buffer handle");
return 1;
}
}
if (!dirtied_) {
return 0;
}
dirtied_ = false;
// pass dirty regions?
return InvalidateRegion(memory_range_);
}
IndexBufferResource::IndexBufferResource(const MemoryRange& memory_range,
const Info& info)
: BufferResource(memory_range),
info_(info) {
}
IndexBufferResource::~IndexBufferResource() = default;
VertexBufferResource::VertexBufferResource(const MemoryRange& memory_range,
const Info& info)
: BufferResource(memory_range),
info_(info) {
}
VertexBufferResource::~VertexBufferResource() = default;

View File

@ -10,7 +10,8 @@
#ifndef XENIA_GPU_BUFFER_RESOURCE_H_
#define XENIA_GPU_BUFFER_RESOURCE_H_
#include <xenia/core.h>
#include <xenia/gpu/resource.h>
#include <xenia/gpu/xenos/ucode.h>
#include <xenia/gpu/xenos/xenos.h>
@ -18,8 +19,76 @@ namespace xe {
namespace gpu {
class BufferResource : public Resource {
class BufferResource : public PagedResource {
public:
BufferResource(const MemoryRange& memory_range);
~BufferResource() override;
virtual int Prepare();
protected:
virtual int CreateHandle() = 0;
virtual int InvalidateRegion(const MemoryRange& memory_range) = 0;
};
enum IndexFormat {
INDEX_FORMAT_16BIT = 0,
INDEX_FORMAT_32BIT = 1,
};
class IndexBufferResource : public BufferResource {
public:
struct Info {
IndexFormat format;
xenos::XE_GPU_ENDIAN endianness;
};
IndexBufferResource(const MemoryRange& memory_range,
const Info& info);
~IndexBufferResource() override;
const Info& info() const { return info_; }
bool Equals(const void* info_ptr, size_t info_length) override {
return info_length == sizeof(Info) &&
memcmp(info_ptr, &info_, info_length) == 0;
}
protected:
Info info_;
};
class VertexBufferResource : public BufferResource {
public:
struct DeclElement {
xenos::instr_fetch_vtx_t vtx_fetch;
uint32_t format;
uint32_t offset_words;
uint32_t size_words;
bool is_signed;
bool is_normalized;
};
struct Info {
uint32_t stride_words;
uint32_t element_count;
DeclElement elements[16];
};
VertexBufferResource(const MemoryRange& memory_range,
const Info& info);
~VertexBufferResource() override;
const Info& info() const { return info_; }
bool Equals(const void* info_ptr, size_t info_length) override {
return info_length == sizeof(Info) &&
memcmp(info_ptr, &info_, info_length) == 0;
}
protected:
Info info_;
};

View File

@ -9,9 +9,782 @@
#include <xenia/gpu/command_processor.h>
#include <xenia/gpu/gpu-private.h>
#include <xenia/gpu/graphics_driver.h>
#include <xenia/gpu/graphics_system.h>
#include <xenia/gpu/xenos/packets.h>
using namespace std;
using namespace xe;
using namespace xe::gpu;
using namespace xe::gpu::xenos;
#define XETRACECP(fmt, ...) if (FLAGS_trace_ring_buffer) XELOGGPU(fmt, ##__VA_ARGS__)
CommandProcessor::CommandProcessor(
GraphicsSystem* graphics_system, Memory* memory) :
graphics_system_(graphics_system), memory_(memory), driver_(0) {
write_ptr_index_event_ = CreateEvent(NULL, FALSE, FALSE, NULL);
primary_buffer_ptr_ = 0;
primary_buffer_size_ = 0;
read_ptr_index_ = 0;
read_ptr_update_freq_ = 0;
read_ptr_writeback_ptr_ = 0;
write_ptr_index_ = 0;
write_ptr_max_index_ = 0;
LARGE_INTEGER perf_counter;
QueryPerformanceCounter(&perf_counter);
time_base_ = perf_counter.QuadPart;
counter_ = 0;
}
CommandProcessor::~CommandProcessor() {
SetEvent(write_ptr_index_event_);
CloseHandle(write_ptr_index_event_);
}
uint64_t CommandProcessor::QueryTime() {
LARGE_INTEGER perf_counter;
QueryPerformanceCounter(&perf_counter);
return perf_counter.QuadPart - time_base_;
}
void CommandProcessor::Initialize(GraphicsDriver* driver,
uint32_t ptr, uint32_t page_count) {
driver_ = driver;
primary_buffer_ptr_ = ptr;
// Not sure this is correct, but it's a way to take the page_count back to
// the number of bytes allocated by the physical alloc.
uint32_t original_size = 1 << (0x1C - page_count - 1);
primary_buffer_size_ = original_size;
read_ptr_index_ = 0;
// Tell the driver what to use for translation.
driver_->set_address_translation(primary_buffer_ptr_ & ~0x1FFFFFFF);
}
void CommandProcessor::EnableReadPointerWriteBack(uint32_t ptr,
uint32_t block_size) {
// CP_RB_RPTR_ADDR Ring Buffer Read Pointer Address 0x70C
// ptr = RB_RPTR_ADDR, pointer to write back the address to.
read_ptr_writeback_ptr_ = (primary_buffer_ptr_ & ~0x1FFFFFFF) + ptr;
// CP_RB_CNTL Ring Buffer Control 0x704
// block_size = RB_BLKSZ, number of quadwords read between updates of the
// read pointer.
read_ptr_update_freq_ = (uint32_t)pow(2.0, (double)block_size) / 4;
}
void CommandProcessor::UpdateWritePointer(uint32_t value) {
write_ptr_max_index_ = MAX(write_ptr_max_index_, value);
write_ptr_index_ = value;
SetEvent(write_ptr_index_event_);
}
void CommandProcessor::Pump() {
uint8_t* p = memory_->membase();
while (write_ptr_index_ == 0xBAADF00D ||
read_ptr_index_ == write_ptr_index_) {
// Check if the pointer has moved.
// We wait a short bit here to yield time. Since we are also running the
// main window display we don't want to pause too long, though.
// YieldProcessor();
const int wait_time_ms = 1;
if (WaitForSingleObject(write_ptr_index_event_,
wait_time_ms) == WAIT_TIMEOUT) {
return;
}
}
// Bring local so we don't have to worry about them changing out from under
// us.
uint32_t write_ptr_index = write_ptr_index_;
uint32_t write_ptr_max_index = write_ptr_max_index_;
if (read_ptr_index_ == write_ptr_index) {
return;
}
// Process the new commands.
XETRACECP("Command processor thread work");
// Execute. Note that we handle wraparound transparently.
ExecutePrimaryBuffer(read_ptr_index_, write_ptr_index);
read_ptr_index_ = write_ptr_index;
// TODO(benvanik): use read_ptr_update_freq_ and only issue after moving
// that many indices.
if (read_ptr_writeback_ptr_) {
XESETUINT32BE(p + read_ptr_writeback_ptr_, read_ptr_index_);
}
}
void CommandProcessor::ExecutePrimaryBuffer(
uint32_t start_index, uint32_t end_index) {
SCOPE_profile_cpu_f("gpu");
// Adjust pointer base.
uint32_t ptr = primary_buffer_ptr_ + start_index * 4;
ptr = (primary_buffer_ptr_ & ~0x1FFFFFFF) | (ptr & 0x1FFFFFFF);
uint32_t end_ptr = primary_buffer_ptr_ + end_index * 4;
end_ptr = (primary_buffer_ptr_ & ~0x1FFFFFFF) | (end_ptr & 0x1FFFFFFF);
XETRACECP("[%.8X] ExecutePrimaryBuffer(%dw -> %dw)",
ptr, start_index, end_index);
// Execute commands!
PacketArgs args;
args.ptr = ptr;
args.base_ptr = primary_buffer_ptr_;
args.max_address = primary_buffer_ptr_ + primary_buffer_size_;
args.ptr_mask = (primary_buffer_size_ / 4) - 1;
uint32_t n = 0;
while (args.ptr != end_ptr) {
n += ExecutePacket(args);
XEASSERT(args.ptr < args.max_address);
}
if (end_index > start_index) {
XEASSERT(n == (end_index - start_index));
}
XETRACECP(" ExecutePrimaryBuffer End");
}
void CommandProcessor::ExecuteIndirectBuffer(uint32_t ptr, uint32_t length) {
XETRACECP("[%.8X] ExecuteIndirectBuffer(%dw)", ptr, length);
// Execute commands!
PacketArgs args;
args.ptr = ptr;
args.base_ptr = ptr;
args.max_address = ptr + length * 4;
args.ptr_mask = 0;
for (uint32_t n = 0; n < length;) {
n += ExecutePacket(args);
XEASSERT(n <= length);
}
XETRACECP(" ExecuteIndirectBuffer End");
}
#define LOG_DATA(count) \
for (uint32_t __m = 0; __m < count; __m++) { \
XETRACECP("[%.8X] %.8X", \
packet_ptr + (1 + __m) * 4, \
XEGETUINT32BE(packet_base + 1 * 4 + __m * 4)); \
}
void CommandProcessor::AdvancePtr(PacketArgs& args, uint32_t n) {
args.ptr = args.ptr + n * 4;
if (args.ptr_mask) {
args.ptr =
args.base_ptr + (((args.ptr - args.base_ptr) / 4) & args.ptr_mask) * 4;
}
}
#define ADVANCE_PTR(n) AdvancePtr(args, n)
#define PEEK_PTR() \
XEGETUINT32BE(p + args.ptr)
#define READ_PTR() \
XEGETUINT32BE(p + args.ptr); ADVANCE_PTR(1);
uint32_t CommandProcessor::ExecutePacket(PacketArgs& args) {
uint8_t* p = memory_->membase();
RegisterFile* regs = driver_->register_file();
uint32_t packet_ptr = args.ptr;
const uint8_t* packet_base = p + packet_ptr;
const uint32_t packet = PEEK_PTR();
ADVANCE_PTR(1);
const uint32_t packet_type = packet >> 30;
if (packet == 0) {
XETRACECP("[%.8X] Packet(%.8X): 0?",
packet_ptr, packet);
return 1;
}
switch (packet_type) {
case 0x00:
{
// Type-0 packet.
// Write count registers in sequence to the registers starting at
// (base_index << 2).
XETRACECP("[%.8X] Packet(%.8X): set registers:",
packet_ptr, packet);
uint32_t count = ((packet >> 16) & 0x3FFF) + 1;
uint32_t base_index = (packet & 0x7FFF);
uint32_t write_one_reg = (packet >> 15) & 0x1;
for (uint32_t m = 0; m < count; m++) {
uint32_t reg_data = PEEK_PTR();
uint32_t target_index = write_one_reg ? base_index : base_index + m;
const char* reg_name = regs->GetRegisterName(target_index);
XETRACECP("[%.8X] %.8X -> %.4X %s",
args.ptr,
reg_data, target_index, reg_name ? reg_name : "");
ADVANCE_PTR(1);
WriteRegister(packet_ptr, target_index, reg_data);
}
return 1 + count;
}
break;
case 0x01:
{
// Type-1 packet.
// Contains two registers of data. Type-0 should be more common.
XETRACECP("[%.8X] Packet(%.8X): set registers:",
packet_ptr, packet);
uint32_t reg_index_1 = packet & 0x7FF;
uint32_t reg_index_2 = (packet >> 11) & 0x7FF;
uint32_t reg_ptr_1 = args.ptr;
uint32_t reg_data_1 = READ_PTR();
uint32_t reg_ptr_2 = args.ptr;
uint32_t reg_data_2 = READ_PTR();
const char* reg_name_1 = regs->GetRegisterName(reg_index_1);
const char* reg_name_2 = regs->GetRegisterName(reg_index_2);
XETRACECP("[%.8X] %.8X -> %.4X %s",
reg_ptr_1,
reg_data_1, reg_index_1, reg_name_1 ? reg_name_1 : "");
XETRACECP("[%.8X] %.8X -> %.4X %s",
reg_ptr_2,
reg_data_2, reg_index_2, reg_name_2 ? reg_name_2 : "");
WriteRegister(packet_ptr, reg_index_1, reg_data_1);
WriteRegister(packet_ptr, reg_index_2, reg_data_2);
return 1 + 2;
}
break;
case 0x02:
// Type-2 packet.
// No-op. Do nothing.
XETRACECP("[%.8X] Packet(%.8X): padding",
packet_ptr, packet);
return 1;
case 0x03:
{
// Type-3 packet.
uint32_t count = ((packet >> 16) & 0x3FFF) + 1;
uint32_t opcode = (packet >> 8) & 0x7F;
// & 1 == predicate, maybe?
switch (opcode) {
case PM4_ME_INIT:
// initialize CP's micro-engine
XETRACECP("[%.8X] Packet(%.8X): PM4_ME_INIT",
packet_ptr, packet);
LOG_DATA(count);
ADVANCE_PTR(count);
break;
case PM4_NOP:
// skip N 32-bit words to get to the next packet
// No-op, ignore some data.
XETRACECP("[%.8X] Packet(%.8X): PM4_NOP",
packet_ptr, packet);
LOG_DATA(count);
ADVANCE_PTR(count);
break;
case PM4_INTERRUPT:
// generate interrupt from the command stream
{
XETRACECP("[%.8X] Packet(%.8X): PM4_INTERRUPT",
packet_ptr, packet);
LOG_DATA(count);
uint32_t cpu_mask = READ_PTR();
for (int n = 0; n < 6; n++) {
if (cpu_mask & (1 << n)) {
graphics_system_->DispatchInterruptCallback(1, n);
}
}
}
break;
case PM4_INDIRECT_BUFFER:
// indirect buffer dispatch
{
uint32_t list_ptr = READ_PTR();
uint32_t list_length = READ_PTR();
XETRACECP("[%.8X] Packet(%.8X): PM4_INDIRECT_BUFFER %.8X (%dw)",
packet_ptr, packet, list_ptr, list_length);
ExecuteIndirectBuffer(GpuToCpu(list_ptr), list_length);
}
break;
case PM4_WAIT_REG_MEM:
// wait until a register or memory location is a specific value
{
XETRACECP("[%.8X] Packet(%.8X): PM4_WAIT_REG_MEM",
packet_ptr, packet);
LOG_DATA(count);
uint32_t wait_info = READ_PTR();
uint32_t poll_reg_addr = READ_PTR();
uint32_t ref = READ_PTR();
uint32_t mask = READ_PTR();
uint32_t wait = READ_PTR();
bool matched = false;
do {
uint32_t value;
if (wait_info & 0x10) {
// Memory.
XE_GPU_ENDIAN endianness = (XE_GPU_ENDIAN)(poll_reg_addr & 0x3);
poll_reg_addr &= ~0x3;
value = XEGETUINT32LE(p + GpuToCpu(packet_ptr, poll_reg_addr));
value = GpuSwap(value, endianness);
} else {
// Register.
XEASSERT(poll_reg_addr < RegisterFile::kRegisterCount);
if (poll_reg_addr == XE_GPU_REG_COHER_STATUS_HOST) {
// Waiting for coherency. We should have all the info we need
// now (base+size+mode), so kick it off.
MakeCoherent();
}
value = regs->values[poll_reg_addr].u32;
}
switch (wait_info & 0x7) {
case 0x0: // Never.
matched = false;
break;
case 0x1: // Less than reference.
matched = (value & mask) < ref;
break;
case 0x2: // Less than or equal to reference.
matched = (value & mask) <= ref;
break;
case 0x3: // Equal to reference.
matched = (value & mask) == ref;
break;
case 0x4: // Not equal to reference.
matched = (value & mask) != ref;
break;
case 0x5: // Greater than or equal to reference.
matched = (value & mask) >= ref;
break;
case 0x6: // Greater than reference.
matched = (value & mask) > ref;
break;
case 0x7: // Always
matched = true;
break;
}
if (!matched) {
// Wait.
if (wait >= 0x100) {
Sleep(wait / 0x100);
} else {
SwitchToThread();
}
}
} while (!matched);
}
break;
case PM4_REG_RMW:
// register read/modify/write
// ? (used during shader upload and edram setup)
{
XETRACECP("[%.8X] Packet(%.8X): PM4_REG_RMW",
packet_ptr, packet);
LOG_DATA(count);
uint32_t rmw_info = READ_PTR();
uint32_t and_mask = READ_PTR();
uint32_t or_mask = READ_PTR();
uint32_t value = regs->values[rmw_info & 0x1FFF].u32;
if ((rmw_info >> 30) & 0x1) {
// | reg
value |= regs->values[or_mask & 0x1FFF].u32;
} else {
// | imm
value |= or_mask;
}
if ((rmw_info >> 31) & 0x1) {
// & reg
value &= regs->values[and_mask & 0x1FFF].u32;
} else {
// & imm
value &= and_mask;
}
WriteRegister(packet_ptr, rmw_info & 0x1FFF, value);
}
break;
case PM4_COND_WRITE:
// conditional write to memory or register
{
XETRACECP("[%.8X] Packet(%.8X): PM4_COND_WRITE",
packet_ptr, packet);
LOG_DATA(count);
uint32_t wait_info = READ_PTR();
uint32_t poll_reg_addr = READ_PTR();
uint32_t ref = READ_PTR();
uint32_t mask = READ_PTR();
uint32_t write_reg_addr = READ_PTR();
uint32_t write_data = READ_PTR();
uint32_t value;
if (wait_info & 0x10) {
// Memory.
XE_GPU_ENDIAN endianness = (XE_GPU_ENDIAN)(poll_reg_addr & 0x3);
poll_reg_addr &= ~0x3;
value = XEGETUINT32LE(p + GpuToCpu(packet_ptr, poll_reg_addr));
value = GpuSwap(value, endianness);
} else {
// Register.
XEASSERT(poll_reg_addr < RegisterFile::kRegisterCount);
value = regs->values[poll_reg_addr].u32;
}
bool matched = false;
switch (wait_info & 0x7) {
case 0x0: // Never.
matched = false;
break;
case 0x1: // Less than reference.
matched = (value & mask) < ref;
break;
case 0x2: // Less than or equal to reference.
matched = (value & mask) <= ref;
break;
case 0x3: // Equal to reference.
matched = (value & mask) == ref;
break;
case 0x4: // Not equal to reference.
matched = (value & mask) != ref;
break;
case 0x5: // Greater than or equal to reference.
matched = (value & mask) >= ref;
break;
case 0x6: // Greater than reference.
matched = (value & mask) > ref;
break;
case 0x7: // Always
matched = true;
break;
}
if (matched) {
// Write.
if (wait_info & 0x100) {
// Memory.
XE_GPU_ENDIAN endianness = (XE_GPU_ENDIAN)(write_reg_addr & 0x3);
write_reg_addr &= ~0x3;
write_data = GpuSwap(write_data, endianness);
XESETUINT32LE(p + GpuToCpu(packet_ptr, write_reg_addr),
write_data);
} else {
// Register.
WriteRegister(packet_ptr, write_reg_addr, write_data);
}
}
}
break;
case PM4_EVENT_WRITE:
// generate an event that creates a write to memory when completed
{
XETRACECP("[%.8X] Packet(%.8X): PM4_EVENT_WRITE (unimplemented!)",
packet_ptr, packet);
LOG_DATA(count);
uint32_t initiator = READ_PTR();
if (count == 1) {
// Just an event flag? Where does this write?
} else {
// Write to an address.
XEASSERTALWAYS();
ADVANCE_PTR(count - 1);
}
}
break;
case PM4_EVENT_WRITE_SHD:
// generate a VS|PS_done event
{
XETRACECP("[%.8X] Packet(%.8X): PM4_EVENT_WRITE_SHD",
packet_ptr, packet);
LOG_DATA(count);
uint32_t initiator = READ_PTR();
uint32_t address = READ_PTR();
uint32_t value = READ_PTR();
// Writeback initiator.
WriteRegister(packet_ptr, XE_GPU_REG_VGT_EVENT_INITIATOR,
initiator & 0x1F);
uint32_t data_value;
if ((initiator >> 31) & 0x1) {
// Write counter (GPU vblank counter?).
data_value = counter_;
} else {
// Write value.
data_value = value;
}
XE_GPU_ENDIAN endianness = (XE_GPU_ENDIAN)(address & 0x3);
address &= ~0x3;
data_value = GpuSwap(data_value, endianness);
XESETUINT32LE(p + GpuToCpu(address), data_value);
}
break;
case PM4_DRAW_INDX:
// initiate fetch of index buffer and draw
{
XETRACECP("[%.8X] Packet(%.8X): PM4_DRAW_INDX",
packet_ptr, packet);
LOG_DATA(count);
// d0 = viz query info
uint32_t d0 = READ_PTR();
uint32_t d1 = READ_PTR();
uint32_t index_count = d1 >> 16;
uint32_t prim_type = d1 & 0x3F;
uint32_t src_sel = (d1 >> 6) & 0x3;
if (!driver_->PrepareDraw(draw_command_)) {
draw_command_.prim_type = (XE_GPU_PRIMITIVE_TYPE)prim_type;
draw_command_.start_index = 0;
draw_command_.index_count = index_count;
draw_command_.base_vertex = 0;
if (src_sel == 0x0) {
// Indexed draw.
// TODO(benvanik): detect subregions of larger index buffers!
uint32_t index_base = READ_PTR();
uint32_t index_size = READ_PTR();
uint32_t endianness = index_size >> 29;
index_size &= 0x00FFFFFF;
bool index_32bit = (d1 >> 11) & 0x1;
index_size *= index_32bit ? 4 : 2;
driver_->PrepareDrawIndexBuffer(
draw_command_,
index_base, index_size,
(XE_GPU_ENDIAN)endianness,
index_32bit ? INDEX_FORMAT_32BIT : INDEX_FORMAT_16BIT);
} else if (src_sel == 0x2) {
// Auto draw.
draw_command_.index_buffer = nullptr;
} else {
// Unknown source select.
XEASSERTALWAYS();
}
driver_->Draw(draw_command_);
} else {
if (src_sel == 0x0) {
ADVANCE_PTR(2); // skip
}
}
}
break;
case PM4_DRAW_INDX_2:
// draw using supplied indices in packet
{
XETRACECP("[%.8X] Packet(%.8X): PM4_DRAW_INDX_2",
packet_ptr, packet);
LOG_DATA(count);
uint32_t d0 = READ_PTR();
uint32_t index_count = d0 >> 16;
uint32_t prim_type = d0 & 0x3F;
uint32_t src_sel = (d0 >> 6) & 0x3;
XEASSERT(src_sel == 0x2); // 'SrcSel=AutoIndex'
if (!driver_->PrepareDraw(draw_command_)) {
draw_command_.prim_type = (XE_GPU_PRIMITIVE_TYPE)prim_type;
draw_command_.start_index = 0;
draw_command_.index_count = index_count;
draw_command_.base_vertex = 0;
draw_command_.index_buffer = nullptr;
driver_->Draw(draw_command_);
}
}
break;
case PM4_SET_CONSTANT:
// load constant into chip and to memory
{
XETRACECP("[%.8X] Packet(%.8X): PM4_SET_CONSTANT",
packet_ptr, packet);
// PM4_REG(reg) ((0x4 << 16) | (GSL_HAL_SUBBLOCK_OFFSET(reg)))
// reg - 0x2000
uint32_t offset_type = READ_PTR();
uint32_t index = offset_type & 0x7FF;
uint32_t type = (offset_type >> 16) & 0xFF;
switch (type) {
case 0x4: // REGISTER
index += 0x2000; // registers
for (uint32_t n = 0; n < count - 1; n++, index++) {
uint32_t data = READ_PTR();
const char* reg_name = regs->GetRegisterName(index);
XETRACECP("[%.8X] %.8X -> %.4X %s",
packet_ptr + (1 + n) * 4,
data, index, reg_name ? reg_name : "");
WriteRegister(packet_ptr, index, data);
}
break;
default:
XEASSERTALWAYS();
break;
}
}
break;
case PM4_LOAD_ALU_CONSTANT:
// load constants from memory
{
XETRACECP("[%.8X] Packet(%.8X): PM4_LOAD_ALU_CONSTANT",
packet_ptr, packet);
uint32_t address = READ_PTR();
address &= 0x3FFFFFFF;
uint32_t offset_type = READ_PTR();
uint32_t index = offset_type & 0x7FF;
uint32_t size = READ_PTR();
size &= 0xFFF;
index += 0x4000; // alu constants
for (uint32_t n = 0; n < size; n++, index++) {
uint32_t data = XEGETUINT32BE(
p + GpuToCpu(packet_ptr, address + n * 4));
const char* reg_name = regs->GetRegisterName(index);
XETRACECP("[%.8X] %.8X -> %.4X %s",
packet_ptr,
data, index, reg_name ? reg_name : "");
WriteRegister(packet_ptr, index, data);
}
}
break;
case PM4_IM_LOAD:
// load sequencer instruction memory (pointer-based)
{
XETRACECP("[%.8X] Packet(%.8X): PM4_IM_LOAD",
packet_ptr, packet);
LOG_DATA(count);
uint32_t addr_type = READ_PTR();
uint32_t type = addr_type & 0x3;
uint32_t addr = addr_type & ~0x3;
uint32_t start_size = READ_PTR();
uint32_t start = start_size >> 16;
uint32_t size = start_size & 0xFFFF; // dwords
XEASSERT(start == 0);
driver_->LoadShader((XE_GPU_SHADER_TYPE)type,
GpuToCpu(packet_ptr, addr), size * 4, start);
}
break;
case PM4_IM_LOAD_IMMEDIATE:
// load sequencer instruction memory (code embedded in packet)
{
XETRACECP("[%.8X] Packet(%.8X): PM4_IM_LOAD_IMMEDIATE",
packet_ptr, packet);
LOG_DATA(count);
uint32_t type = READ_PTR();
uint32_t start_size = READ_PTR();
uint32_t start = start_size >> 16;
uint32_t size = start_size & 0xFFFF; // dwords
XEASSERT(start == 0);
// TODO(benvanik): figure out if this could wrap.
XEASSERT(args.ptr + size * 4 < args.max_address);
driver_->LoadShader((XE_GPU_SHADER_TYPE)type,
args.ptr, size * 4, start);
ADVANCE_PTR(size);
}
break;
case PM4_INVALIDATE_STATE:
// selective invalidation of state pointers
{
XETRACECP("[%.8X] Packet(%.8X): PM4_INVALIDATE_STATE",
packet_ptr, packet);
LOG_DATA(count);
uint32_t mask = READ_PTR();
//driver_->InvalidateState(mask);
}
break;
case PM4_SET_BIN_MASK_LO:
{
uint32_t value = READ_PTR();
XETRACECP("[%.8X] Packet(%.8X): PM4_SET_BIN_MASK_LO = %.8X",
packet_ptr, packet, value);
}
break;
case PM4_SET_BIN_MASK_HI:
{
uint32_t value = READ_PTR();
XETRACECP("[%.8X] Packet(%.8X): PM4_SET_BIN_MASK_HI = %.8X",
packet_ptr, packet, value);
}
break;
case PM4_SET_BIN_SELECT_LO:
{
uint32_t value = READ_PTR();
XETRACECP("[%.8X] Packet(%.8X): PM4_SET_BIN_SELECT_LO = %.8X",
packet_ptr, packet, value);
}
break;
case PM4_SET_BIN_SELECT_HI:
{
uint32_t value = READ_PTR();
XETRACECP("[%.8X] Packet(%.8X): PM4_SET_BIN_SELECT_HI = %.8X",
packet_ptr, packet, value);
}
break;
// Ignored packets - useful if breaking on the default handler below.
case 0x50: // 0xC0015000 usually 2 words, 0xFFFFFFFF / 0x00000000
XETRACECP("[%.8X] Packet(%.8X): unknown!",
packet_ptr, packet);
LOG_DATA(count);
ADVANCE_PTR(count);
break;
default:
XETRACECP("[%.8X] Packet(%.8X): unknown!",
packet_ptr, packet);
LOG_DATA(count);
ADVANCE_PTR(count);
break;
}
return 1 + count;
}
break;
}
return 0;
}
void CommandProcessor::WriteRegister(
uint32_t packet_ptr, uint32_t index, uint32_t value) {
RegisterFile* regs = driver_->register_file();
XEASSERT(index < RegisterFile::kRegisterCount);
regs->values[index].u32 = value;
// If this is a COHER register, set the dirty flag.
// This will block the command processor the next time it WAIT_MEM_REGs and
// allow us to synchronize the memory.
if (index == XE_GPU_REG_COHER_STATUS_HOST) {
regs->values[index].u32 |= 0x80000000ul;
}
// Scratch register writeback.
if (index >= XE_GPU_REG_SCRATCH_REG0 && index <= XE_GPU_REG_SCRATCH_REG7) {
uint32_t scratch_reg = index - XE_GPU_REG_SCRATCH_REG0;
if ((1 << scratch_reg) & regs->values[XE_GPU_REG_SCRATCH_UMSK].u32) {
// Enabled - write to address.
uint8_t* p = memory_->membase();
uint32_t scratch_addr = regs->values[XE_GPU_REG_SCRATCH_ADDR].u32;
uint32_t mem_addr = scratch_addr + (scratch_reg * 4);
XESETUINT32BE(p + GpuToCpu(primary_buffer_ptr_, mem_addr), value);
}
}
}
void CommandProcessor::MakeCoherent() {
RegisterFile* regs = driver_->register_file();
auto status_host = regs->values[XE_GPU_REG_COHER_STATUS_HOST].u32;
auto base_host = regs->values[XE_GPU_REG_COHER_BASE_HOST].u32;
auto size_host = regs->values[XE_GPU_REG_COHER_SIZE_HOST].u32;
// Status host often has 0x01000000 or 0x03000000.
// This is likely toggling VC (vertex cache) or TC (texture cache).
// Or, it also has a direction in here maybe - there is probably
// some way to check for dest coherency (what all the COHER_DEST_BASE_*
// registers are for).
// TODO(benvanik): notify resource cache of base->size and type.
XETRACECP("Make %.8X -> %.8X (%db) coherent",
base_host, base_host + size_host, size_host);
driver_->resource_cache()->SyncRange(base_host, size_host);
// Mark coherent.
status_host &= ~0x80000000ul;
regs->values[XE_GPU_REG_COHER_STATUS_HOST].u32 = status_host;
}

View File

@ -11,15 +11,70 @@
#define XENIA_GPU_COMMAND_PROCESSOR_H_
#include <xenia/core.h>
#include <xenia/gpu/draw_command.h>
#include <xenia/gpu/register_file.h>
#include <xenia/gpu/xenos/xenos.h>
namespace xe {
namespace gpu {
class GraphicsDriver;
class GraphicsSystem;
class CommandProcessor {
public:
CommandProcessor(GraphicsSystem* graphics_system, Memory* memory);
virtual ~CommandProcessor();
Memory* memory() const { return memory_; }
uint64_t QueryTime();
uint32_t counter() const { return counter_; }
void increment_counter() { counter_++; }
void Initialize(GraphicsDriver* driver, uint32_t ptr, uint32_t page_count);
void EnableReadPointerWriteBack(uint32_t ptr, uint32_t block_size);
void UpdateWritePointer(uint32_t value);
void Pump();
private:
typedef struct {
uint32_t ptr;
uint32_t base_ptr;
uint32_t max_address;
uint32_t ptr_mask;
} PacketArgs;
void AdvancePtr(PacketArgs& args, uint32_t n);
void ExecutePrimaryBuffer(uint32_t start_index, uint32_t end_index);
void ExecuteIndirectBuffer(uint32_t ptr, uint32_t length);
uint32_t ExecutePacket(PacketArgs& args);
void WriteRegister(uint32_t packet_ptr, uint32_t index, uint32_t value);
void MakeCoherent();
Memory* memory_;
GraphicsSystem* graphics_system_;
GraphicsDriver* driver_;
uint64_t time_base_;
uint32_t counter_;
uint32_t primary_buffer_ptr_;
uint32_t primary_buffer_size_;
uint32_t read_ptr_index_;
uint32_t read_ptr_update_freq_;
uint32_t read_ptr_writeback_ptr_;
HANDLE write_ptr_index_event_;
volatile uint32_t write_ptr_index_;
volatile uint32_t write_ptr_max_index_;
DrawCommand draw_command_;
};

View File

@ -1,150 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include <xenia/gpu/d3d11/d3d11_buffer.h>
#include <xenia/gpu/gpu-private.h>
#include <xenia/gpu/d3d11/d3d11_buffer_cache.h>
using namespace xe;
using namespace xe::gpu;
using namespace xe::gpu::d3d11;
using namespace xe::gpu::xenos;
D3D11IndexBuffer::D3D11IndexBuffer(
D3D11BufferCache* buffer_cache,
const IndexBufferInfo& info,
const uint8_t* src_ptr, size_t length)
: IndexBuffer(info, src_ptr, length),
buffer_cache_(buffer_cache),
handle_(nullptr) {
}
D3D11IndexBuffer::~D3D11IndexBuffer() {
XESAFERELEASE(handle_);
}
bool D3D11IndexBuffer::FetchNew(uint64_t hash) {
hash_ = hash;
D3D11_BUFFER_DESC buffer_desc;
xe_zero_struct(&buffer_desc, sizeof(buffer_desc));
buffer_desc.ByteWidth = info_.index_size;
buffer_desc.Usage = D3D11_USAGE_DYNAMIC;
buffer_desc.BindFlags = D3D11_BIND_INDEX_BUFFER;
buffer_desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
HRESULT hr = buffer_cache_->device()->CreateBuffer(&buffer_desc, NULL, &handle_);
if (FAILED(hr)) {
XELOGW("D3D11: failed to create index buffer");
return false;
}
return FetchDirty(hash);
}
bool D3D11IndexBuffer::FetchDirty(uint64_t hash) {
hash_ = hash;
// All that's done so far:
XEASSERT(info_.endianness == 0x2);
D3D11_MAPPED_SUBRESOURCE res;
HRESULT hr = buffer_cache_->context()->Map(
handle_, 0, D3D11_MAP_WRITE_DISCARD, 0, &res);
if (FAILED(hr)) {
XELOGE("D3D11: unable to map index buffer");
return false;
}
if (info_.index_32bit) {
const uint32_t* src = reinterpret_cast<const uint32_t*>(src_);
uint32_t* dest = reinterpret_cast<uint32_t*>(res.pData);
for (uint32_t n = 0; n < info_.index_count; n++) {
uint32_t d = { XESWAP32(src[n]) };
dest[n] = d;
}
} else {
const uint16_t* src = reinterpret_cast<const uint16_t*>(src_);
uint16_t* dest = reinterpret_cast<uint16_t*>(res.pData);
for (uint32_t n = 0; n < info_.index_count; n++) {
uint16_t d = XESWAP16(src[n]);
dest[n] = d;
}
}
buffer_cache_->context()->Unmap(handle_, 0);
return true;
}
D3D11VertexBuffer::D3D11VertexBuffer(
D3D11BufferCache* buffer_cache,
const VertexBufferInfo& info,
const uint8_t* src_ptr, size_t length)
: VertexBuffer(info, src_ptr, length),
buffer_cache_(buffer_cache),
handle_(nullptr) {
}
D3D11VertexBuffer::~D3D11VertexBuffer() {
XESAFERELEASE(handle_);
}
bool D3D11VertexBuffer::FetchNew(uint64_t hash) {
hash_ = hash;
D3D11_BUFFER_DESC buffer_desc;
xe_zero_struct(&buffer_desc, sizeof(buffer_desc));
buffer_desc.ByteWidth = static_cast<UINT>(length_);
buffer_desc.Usage = D3D11_USAGE_DYNAMIC;
buffer_desc.BindFlags = D3D11_BIND_VERTEX_BUFFER;
buffer_desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
HRESULT hr = buffer_cache_->device()->CreateBuffer(&buffer_desc, NULL, &handle_);
if (FAILED(hr)) {
XELOGW("D3D11: failed to create index buffer");
return false;
}
return FetchDirty(hash);
}
bool D3D11VertexBuffer::FetchDirty(uint64_t hash) {
hash_ = hash;
D3D11_MAPPED_SUBRESOURCE res;
HRESULT hr = buffer_cache_->context()->Map(
handle_, 0, D3D11_MAP_WRITE_DISCARD, 0, &res);
if (FAILED(hr)) {
XELOGE("D3D11: unable to map vertex buffer");
return false;
}
uint8_t* dest = reinterpret_cast<uint8_t*>(res.pData);
// TODO(benvanik): rewrite to be faster/special case common/etc
uint32_t stride = info_.layout.stride_words;
size_t count = (length_ / 4) / stride;
for (size_t n = 0; n < info_.layout.element_count; n++) {
const auto& el = info_.layout.elements[n];
const uint32_t* src_ptr = (const uint32_t*)(src_ + el.offset_words * 4);
uint32_t* dest_ptr = (uint32_t*)(dest + el.offset_words * 4);
uint32_t o = 0;
for (uint32_t i = 0; i < count; i++) {
for (uint32_t j = 0; j < el.size_words; j++) {
dest_ptr[o + j] = XESWAP32(src_ptr[o + j]);
}
o += stride;
}
}
buffer_cache_->context()->Unmap(handle_, 0);
return true;
}

View File

@ -1,69 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_D3D11_D3D11_BUFFER_H_
#define XENIA_GPU_D3D11_D3D11_BUFFER_H_
#include <xenia/core.h>
#include <xenia/gpu/buffer.h>
#include <xenia/gpu/xenos/xenos.h>
#include <d3d11.h>
namespace xe {
namespace gpu {
namespace d3d11 {
class D3D11BufferCache;
class D3D11IndexBuffer : public IndexBuffer {
public:
D3D11IndexBuffer(D3D11BufferCache* buffer_cache,
const IndexBufferInfo& info,
const uint8_t* src_ptr, size_t length);
virtual ~D3D11IndexBuffer();
ID3D11Buffer* handle() const { return handle_; }
bool FetchNew(uint64_t hash) override;
bool FetchDirty(uint64_t hash) override;
private:
D3D11BufferCache* buffer_cache_;
ID3D11Buffer* handle_;
};
class D3D11VertexBuffer : public VertexBuffer {
public:
D3D11VertexBuffer(D3D11BufferCache* buffer_cache,
const VertexBufferInfo& info,
const uint8_t* src_ptr, size_t length);
virtual ~D3D11VertexBuffer();
ID3D11Buffer* handle() const { return handle_; }
bool FetchNew(uint64_t hash) override;
bool FetchDirty(uint64_t hash) override;
private:
D3D11BufferCache* buffer_cache_;
ID3D11Buffer* handle_;
};
} // namespace d3d11
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_D3D11_D3D11_BUFFER_H_

View File

@ -1,44 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include <xenia/gpu/d3d11/d3d11_buffer_cache.h>
#include <xenia/gpu/gpu-private.h>
#include <xenia/gpu/d3d11/d3d11_buffer.h>
using namespace xe;
using namespace xe::gpu;
using namespace xe::gpu::d3d11;
using namespace xe::gpu::xenos;
D3D11BufferCache::D3D11BufferCache(ID3D11DeviceContext* context,
ID3D11Device* device)
: context_(context), device_(device) {
context->AddRef();
device_->AddRef();
}
D3D11BufferCache::~D3D11BufferCache() {
XESAFERELEASE(device_);
XESAFERELEASE(context_);
}
IndexBuffer* D3D11BufferCache::CreateIndexBuffer(
const IndexBufferInfo& info,
const uint8_t* src_ptr, size_t length) {
return new D3D11IndexBuffer(this, info, src_ptr, length);
}
VertexBuffer* D3D11BufferCache::CreateVertexBuffer(
const VertexBufferInfo& info,
const uint8_t* src_ptr, size_t length) {
return new D3D11VertexBuffer(this, info, src_ptr, length);
}

View File

@ -1,53 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_D3D11_D3D11_BUFFER_CACHE_H_
#define XENIA_GPU_D3D11_D3D11_BUFFER_CACHE_H_
#include <xenia/core.h>
#include <xenia/gpu/buffer_cache.h>
#include <xenia/gpu/xenos/xenos.h>
#include <d3d11.h>
namespace xe {
namespace gpu {
namespace d3d11 {
class D3D11BufferCache : public BufferCache {
public:
D3D11BufferCache(ID3D11DeviceContext* context, ID3D11Device* device);
virtual ~D3D11BufferCache();
ID3D11DeviceContext* context() const { return context_; }
ID3D11Device* device() const { return device_; }
protected:
IndexBuffer* CreateIndexBuffer(
const IndexBufferInfo& info,
const uint8_t* src_ptr, size_t length) override;
VertexBuffer* CreateVertexBuffer(
const VertexBufferInfo& info,
const uint8_t* src_ptr, size_t length) override;
protected:
ID3D11DeviceContext* context_;
ID3D11Device* device_;
};
} // namespace d3d11
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_D3D11_D3D11_BUFFER_CACHE_H_

View File

@ -0,0 +1,149 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include <xenia/gpu/d3d11/d3d11_buffer_resource.h>
#include <xenia/gpu/gpu-private.h>
#include <xenia/gpu/d3d11/d3d11_resource_cache.h>
using namespace xe;
using namespace xe::gpu;
using namespace xe::gpu::d3d11;
using namespace xe::gpu::xenos;
D3D11IndexBufferResource::D3D11IndexBufferResource(
D3D11ResourceCache* resource_cache,
const MemoryRange& memory_range,
const Info& info)
: IndexBufferResource(memory_range, info),
resource_cache_(resource_cache),
handle_(nullptr) {
}
D3D11IndexBufferResource::~D3D11IndexBufferResource() {
XESAFERELEASE(handle_);
}
int D3D11IndexBufferResource::CreateHandle() {
D3D11_BUFFER_DESC buffer_desc;
xe_zero_struct(&buffer_desc, sizeof(buffer_desc));
buffer_desc.ByteWidth = static_cast<UINT>(memory_range_.length);
buffer_desc.Usage = D3D11_USAGE_DYNAMIC;
buffer_desc.BindFlags = D3D11_BIND_INDEX_BUFFER;
buffer_desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
HRESULT hr = resource_cache_->device()->CreateBuffer(
&buffer_desc, nullptr, &handle_);
if (FAILED(hr)) {
XELOGW("D3D11: failed to create index buffer");
return 1;
}
return 0;
}
int D3D11IndexBufferResource::InvalidateRegion(
const MemoryRange& memory_range) {
SCOPE_profile_cpu_f("gpu");
// All that's done so far:
XEASSERT(info_.endianness == 0x2);
D3D11_MAPPED_SUBRESOURCE res;
HRESULT hr = resource_cache_->context()->Map(
handle_, 0, D3D11_MAP_WRITE_DISCARD, 0, &res);
if (FAILED(hr)) {
XELOGE("D3D11: unable to map index buffer");
return 1;
}
if (info_.format == INDEX_FORMAT_32BIT) {
uint32_t index_count = memory_range_.length / 4;
const uint32_t* src = reinterpret_cast<const uint32_t*>(
memory_range_.host_base);
uint32_t* dest = reinterpret_cast<uint32_t*>(res.pData);
for (uint32_t n = 0; n < index_count; n++) {
dest[n] = XESWAP32(src[n]);
}
} else {
uint32_t index_count = memory_range_.length / 2;
const uint16_t* src = reinterpret_cast<const uint16_t*>(
memory_range_.host_base);
uint16_t* dest = reinterpret_cast<uint16_t*>(res.pData);
for (uint32_t n = 0; n < index_count; n++) {
dest[n] = XESWAP16(src[n]);
}
}
resource_cache_->context()->Unmap(handle_, 0);
return 0;
}
D3D11VertexBufferResource::D3D11VertexBufferResource(
D3D11ResourceCache* resource_cache,
const MemoryRange& memory_range,
const Info& info)
: VertexBufferResource(memory_range, info),
resource_cache_(resource_cache),
handle_(nullptr) {
}
D3D11VertexBufferResource::~D3D11VertexBufferResource() {
XESAFERELEASE(handle_);
}
int D3D11VertexBufferResource::CreateHandle() {
D3D11_BUFFER_DESC buffer_desc;
xe_zero_struct(&buffer_desc, sizeof(buffer_desc));
buffer_desc.ByteWidth = static_cast<UINT>(memory_range_.length);
buffer_desc.Usage = D3D11_USAGE_DYNAMIC;
buffer_desc.BindFlags = D3D11_BIND_VERTEX_BUFFER;
buffer_desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
HRESULT hr = resource_cache_->device()->CreateBuffer(
&buffer_desc, nullptr, &handle_);
if (FAILED(hr)) {
XELOGW("D3D11: failed to create vertex buffer");
return 1;
}
return 0;
}
int D3D11VertexBufferResource::InvalidateRegion(
const MemoryRange& memory_range) {
SCOPE_profile_cpu_f("gpu");
D3D11_MAPPED_SUBRESOURCE res;
HRESULT hr = resource_cache_->context()->Map(
handle_, 0, D3D11_MAP_WRITE_DISCARD, 0, &res);
if (FAILED(hr)) {
XELOGE("D3D11: unable to map vertex buffer");
return 1;
}
uint8_t* dest = reinterpret_cast<uint8_t*>(res.pData);
// TODO(benvanik): rewrite to be faster/special case common/etc
uint32_t stride = info_.stride_words;
size_t count = (memory_range_.length / 4) / stride;
for (size_t n = 0; n < info_.element_count; n++) {
const auto& el = info_.elements[n];
const uint32_t* src_ptr = (const uint32_t*)(
memory_range_.host_base + el.offset_words * 4);
uint32_t* dest_ptr = (uint32_t*)(dest + el.offset_words * 4);
uint32_t o = 0;
for (uint32_t i = 0; i < count; i++) {
for (uint32_t j = 0; j < el.size_words; j++) {
dest_ptr[o + j] = XESWAP32(src_ptr[o + j]);
}
o += stride;
}
}
resource_cache_->context()->Unmap(handle_, 0);
return 0;
}

View File

@ -0,0 +1,69 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_D3D11_D3D11_BUFFER_RESOURCE_H_
#define XENIA_GPU_D3D11_D3D11_BUFFER_RESOURCE_H_
#include <xenia/gpu/buffer_resource.h>
#include <xenia/gpu/xenos/xenos.h>
#include <d3d11.h>
namespace xe {
namespace gpu {
namespace d3d11 {
class D3D11ResourceCache;
class D3D11IndexBufferResource : public IndexBufferResource {
public:
D3D11IndexBufferResource(D3D11ResourceCache* resource_cache,
const MemoryRange& memory_range,
const Info& info);
~D3D11IndexBufferResource() override;
void* handle() const override { return handle_; }
protected:
int CreateHandle() override;
int InvalidateRegion(const MemoryRange& memory_range) override;
private:
D3D11ResourceCache* resource_cache_;
ID3D11Buffer* handle_;
};
class D3D11VertexBufferResource : public VertexBufferResource {
public:
D3D11VertexBufferResource(D3D11ResourceCache* resource_cache,
const MemoryRange& memory_range,
const Info& info);
~D3D11VertexBufferResource() override;
void* handle() const override { return handle_; }
protected:
int CreateHandle() override;
int InvalidateRegion(const MemoryRange& memory_range) override;
private:
D3D11ResourceCache* resource_cache_;
ID3D11Buffer* handle_;
};
} // namespace d3d11
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_D3D11_D3D11_BUFFER_RESOURCE_H_

View File

@ -10,7 +10,8 @@
#include <xenia/gpu/d3d11/d3d11_geometry_shader.h>
#include <xenia/gpu/gpu-private.h>
#include <xenia/gpu/d3d11/d3d11_shader.h>
#include <xenia/gpu/d3d11/d3d11_shader_resource.h>
#include <xenia/gpu/d3d11/d3d11_shader_translator.h>
#include <xenia/gpu/xenos/ucode.h>
#include <d3dcompiler.h>
@ -22,8 +23,8 @@ using namespace xe::gpu::d3d11;
using namespace xe::gpu::xenos;
D3D11GeometryShader::D3D11GeometryShader(ID3D11Device* device, uint64_t hash) :
hash_(hash), handle_(NULL) {
D3D11GeometryShader::D3D11GeometryShader(ID3D11Device* device)
: handle_(nullptr) {
device_ = device;
device_->AddRef();
}
@ -33,7 +34,7 @@ D3D11GeometryShader::~D3D11GeometryShader() {
XESAFERELEASE(device_);
}
int D3D11GeometryShader::Prepare(D3D11VertexShader* vertex_shader) {
int D3D11GeometryShader::Prepare(D3D11VertexShaderResource* vertex_shader) {
SCOPE_profile_cpu_f("gpu");
if (handle_) {
@ -94,11 +95,12 @@ ID3D10Blob* D3D11GeometryShader::Compile(const char* shader_source) {
if (FLAGS_dump_shaders.size()) {
base_path = FLAGS_dump_shaders.c_str();
}
uint64_t hash = xe_hash64(shader_source, xestrlena(shader_source)); // ?
char file_name[XE_MAX_PATH];
xesnprintfa(file_name, XECOUNT(file_name),
"%s/gen_%.16llX.gs",
base_path,
hash_);
hash);
if (FLAGS_dump_shaders.size()) {
FILE* f = fopen(file_name, "w");
@ -128,7 +130,7 @@ ID3D10Blob* D3D11GeometryShader::Compile(const char* shader_source) {
return shader_blob;
}
int D3D11GeometryShader::Generate(D3D11VertexShader* vertex_shader,
int D3D11GeometryShader::Generate(D3D11VertexShaderResource* vertex_shader,
alloy::StringBuffer* output) {
output->Append(
"struct VERTEX {\n"
@ -138,7 +140,7 @@ int D3D11GeometryShader::Generate(D3D11VertexShader* vertex_shader,
// TODO(benvanik): only add used ones?
output->Append(
" float4 o[%d] : XE_O;\n",
D3D11Shader::MAX_INTERPOLATORS);
D3D11ShaderTranslator::kMaxInterpolators);
}
if (alloc_counts.point_size) {
output->Append(
@ -156,14 +158,14 @@ int D3D11GeometryShader::Generate(D3D11VertexShader* vertex_shader,
D3D11PointSpriteGeometryShader::D3D11PointSpriteGeometryShader(
ID3D11Device* device, uint64_t hash) :
D3D11GeometryShader(device, hash) {
ID3D11Device* device) : D3D11GeometryShader(device) {
}
D3D11PointSpriteGeometryShader::~D3D11PointSpriteGeometryShader() {
}
int D3D11PointSpriteGeometryShader::Generate(D3D11VertexShader* vertex_shader,
int D3D11PointSpriteGeometryShader::Generate(
D3D11VertexShaderResource* vertex_shader,
alloy::StringBuffer* output) {
SCOPE_profile_cpu_f("gpu");
if (D3D11GeometryShader::Generate(vertex_shader, output)) {
@ -211,14 +213,14 @@ int D3D11PointSpriteGeometryShader::Generate(D3D11VertexShader* vertex_shader,
D3D11RectListGeometryShader::D3D11RectListGeometryShader(
ID3D11Device* device, uint64_t hash) :
D3D11GeometryShader(device, hash) {
ID3D11Device* device) : D3D11GeometryShader(device) {
}
D3D11RectListGeometryShader::~D3D11RectListGeometryShader() {
}
int D3D11RectListGeometryShader::Generate(D3D11VertexShader* vertex_shader,
int D3D11RectListGeometryShader::Generate(
D3D11VertexShaderResource* vertex_shader,
alloy::StringBuffer* output) {
SCOPE_profile_cpu_f("gpu");
if (D3D11GeometryShader::Generate(vertex_shader, output)) {
@ -256,14 +258,14 @@ int D3D11RectListGeometryShader::Generate(D3D11VertexShader* vertex_shader,
D3D11QuadListGeometryShader::D3D11QuadListGeometryShader(
ID3D11Device* device, uint64_t hash) :
D3D11GeometryShader(device, hash) {
ID3D11Device* device) : D3D11GeometryShader(device) {
}
D3D11QuadListGeometryShader::~D3D11QuadListGeometryShader() {
}
int D3D11QuadListGeometryShader::Generate(D3D11VertexShader* vertex_shader,
int D3D11QuadListGeometryShader::Generate(
D3D11VertexShaderResource* vertex_shader,
alloy::StringBuffer* output) {
SCOPE_profile_cpu_f("gpu");
if (D3D11GeometryShader::Generate(vertex_shader, output)) {

View File

@ -21,7 +21,7 @@ namespace xe {
namespace gpu {
namespace d3d11 {
class D3D11VertexShader;
class D3D11VertexShaderResource;
class D3D11GeometryShader {
@ -30,53 +30,52 @@ public:
ID3D11GeometryShader* handle() const { return handle_; }
int Prepare(D3D11VertexShader* vertex_shader);
int Prepare(D3D11VertexShaderResource* vertex_shader);
protected:
D3D11GeometryShader(ID3D11Device* device, uint64_t hash);
D3D11GeometryShader(ID3D11Device* device);
ID3D10Blob* Compile(const char* shader_source);
virtual int Generate(D3D11VertexShader* vertex_shader,
virtual int Generate(D3D11VertexShaderResource* vertex_shader,
alloy::StringBuffer* output);
protected:
ID3D11Device* device_;
uint64_t hash_;
ID3D11GeometryShader* handle_;
};
class D3D11PointSpriteGeometryShader : public D3D11GeometryShader {
public:
D3D11PointSpriteGeometryShader(ID3D11Device* device, uint64_t hash);
virtual ~D3D11PointSpriteGeometryShader();
D3D11PointSpriteGeometryShader(ID3D11Device* device);
~D3D11PointSpriteGeometryShader() override;
protected:
virtual int Generate(D3D11VertexShader* vertex_shader,
alloy::StringBuffer* output);
int Generate(D3D11VertexShaderResource* vertex_shader,
alloy::StringBuffer* output) override;
};
class D3D11RectListGeometryShader : public D3D11GeometryShader {
public:
D3D11RectListGeometryShader(ID3D11Device* device, uint64_t hash);
virtual ~D3D11RectListGeometryShader();
D3D11RectListGeometryShader(ID3D11Device* device);
~D3D11RectListGeometryShader() override;
protected:
virtual int Generate(D3D11VertexShader* vertex_shader,
alloy::StringBuffer* output);
int Generate(D3D11VertexShaderResource* vertex_shader,
alloy::StringBuffer* output) override;
};
class D3D11QuadListGeometryShader : public D3D11GeometryShader {
public:
D3D11QuadListGeometryShader(ID3D11Device* device, uint64_t hash);
virtual ~D3D11QuadListGeometryShader();
D3D11QuadListGeometryShader(ID3D11Device* device);
~D3D11QuadListGeometryShader() override;
protected:
virtual int Generate(D3D11VertexShader* vertex_shader,
alloy::StringBuffer* output);
int Generate(D3D11VertexShaderResource* vertex_shader,
alloy::StringBuffer* output) override;
};

File diff suppressed because it is too large Load Diff

View File

@ -13,8 +13,8 @@
#include <xenia/core.h>
#include <xenia/gpu/graphics_driver.h>
#include <xenia/gpu/shader.h>
#include <xenia/gpu/d3d11/d3d11_gpu-private.h>
#include <xenia/gpu/d3d11/d3d11_resource_cache.h>
#include <xenia/gpu/xenos/xenos.h>
#include <d3d11.h>
@ -24,13 +24,6 @@ namespace xe {
namespace gpu {
namespace d3d11 {
class D3D11BufferCache;
class D3D11PixelShader;
class D3D11ShaderCache;
class D3D11TextureCache;
struct D3D11TextureView;
class D3D11VertexShader;
class D3D11GraphicsDriver : public GraphicsDriver {
public:
@ -38,48 +31,32 @@ public:
Memory* memory, IDXGISwapChain* swap_chain, ID3D11Device* device);
virtual ~D3D11GraphicsDriver();
virtual void Initialize();
ResourceCache* resource_cache() const override { return resource_cache_; }
virtual void InvalidateState(
uint32_t mask);
virtual void SetShader(
xenos::XE_GPU_SHADER_TYPE type,
uint32_t address,
uint32_t start,
uint32_t length);
virtual void DrawIndexBuffer(
xenos::XE_GPU_PRIMITIVE_TYPE prim_type,
bool index_32bit, uint32_t index_count,
uint32_t index_base, uint32_t index_size, uint32_t endianness);
virtual void DrawIndexAuto(
xenos::XE_GPU_PRIMITIVE_TYPE prim_type,
uint32_t index_count);
int Initialize() override;
int Draw(const DrawCommand& command) override;
// TODO(benvanik): figure this out.
virtual int Resolve();
int Resolve() override;
private:
int SetupDraw(xenos::XE_GPU_PRIMITIVE_TYPE prim_type);
void InitializeInvalidTexture();
int UpdateState(const DrawCommand& command);
int SetupConstantBuffers(const DrawCommand& command);
int SetupShaders(const DrawCommand& command);
int SetupInputAssembly(const DrawCommand& command);
int SetupSamplers(const DrawCommand& command);
int RebuildRenderTargets(uint32_t width, uint32_t height);
int UpdateState(uint32_t state_overrides = 0);
int UpdateConstantBuffers();
int BindShaders();
int PrepareFetchers();
int PrepareVertexBuffer(Shader::vtx_buffer_desc_t& desc);
int PrepareTextureFetchers();
int PrepareTextureSampler(xenos::XE_GPU_SHADER_TYPE shader_type,
Shader::tex_buffer_desc_t& desc);
int PrepareIndexBuffer(
bool index_32bit, uint32_t index_count,
uint32_t index_base, uint32_t index_size, uint32_t endianness);
private:
IDXGISwapChain* swap_chain_;
ID3D11Device* device_;
ID3D11DeviceContext* context_;
D3D11BufferCache* buffer_cache_;
D3D11ShaderCache* shader_cache_;
D3D11TextureCache* texture_cache_;
D3D11ResourceCache* resource_cache_;
ID3D11ShaderResourceView* invalid_texture_view_;
ID3D11SamplerState* invalid_texture_sampler_state_;
@ -97,9 +74,6 @@ private:
} render_targets_;
struct {
D3D11VertexShader* vertex_shader;
D3D11PixelShader* pixel_shader;
struct {
ID3D11Buffer* float_constants;
ID3D11Buffer* bool_constants;
@ -107,17 +81,7 @@ private:
ID3D11Buffer* vs_consts;
ID3D11Buffer* gs_consts;
} constant_buffers;
struct {
bool enabled;
xenos::xe_gpu_texture_fetch_t fetch;
D3D11TextureView* view;
} texture_fetchers[32];
} state_;
enum StateOverrides {
STATE_OVERRIDE_DISABLE_CULLING = (1 << 0),
};
};

View File

@ -146,12 +146,18 @@ void D3D11GraphicsSystem::Initialize() {
XEASSERTNULL(driver_);
driver_ = new D3D11GraphicsDriver(
memory_, window_->swap_chain(), device_);
if (driver_->Initialize()) {
XELOGE("Unable to initialize D3D11 driver");
return;
}
// Initial vsync kick.
DispatchInterruptCallback(0);
}
void D3D11GraphicsSystem::Pump() {
SCOPE_profile_cpu_f("gpu");
if (swap_pending_) {
swap_pending_ = false;

View File

@ -0,0 +1,71 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include <xenia/gpu/d3d11/d3d11_resource_cache.h>
#include <xenia/gpu/gpu-private.h>
#include <xenia/gpu/d3d11/d3d11_buffer_resource.h>
#include <xenia/gpu/d3d11/d3d11_sampler_state_resource.h>
#include <xenia/gpu/d3d11/d3d11_shader_resource.h>
#include <xenia/gpu/d3d11/d3d11_texture_resource.h>
using namespace xe;
using namespace xe::gpu;
using namespace xe::gpu::d3d11;
D3D11ResourceCache::D3D11ResourceCache(Memory* memory,
ID3D11Device* device,
ID3D11DeviceContext* context)
: ResourceCache(memory),
device_(device), context_(context) {
device_->AddRef();
context_->AddRef();
}
D3D11ResourceCache::~D3D11ResourceCache() {
XESAFERELEASE(device_);
XESAFERELEASE(context_);
}
VertexShaderResource* D3D11ResourceCache::CreateVertexShader(
const MemoryRange& memory_range,
const VertexShaderResource::Info& info) {
return new D3D11VertexShaderResource(this, memory_range, info);
}
PixelShaderResource* D3D11ResourceCache::CreatePixelShader(
const MemoryRange& memory_range,
const PixelShaderResource::Info& info) {
return new D3D11PixelShaderResource(this, memory_range, info);
}
TextureResource* D3D11ResourceCache::CreateTexture(
const MemoryRange& memory_range,
const TextureResource::Info& info) {
return new D3D11TextureResource(this, memory_range, info);
}
SamplerStateResource* D3D11ResourceCache::CreateSamplerState(
const SamplerStateResource::Info& info) {
return new D3D11SamplerStateResource(this, info);
}
IndexBufferResource* D3D11ResourceCache::CreateIndexBuffer(
const MemoryRange& memory_range,
const IndexBufferResource::Info& info) {
return new D3D11IndexBufferResource(this, memory_range, info);
}
VertexBufferResource* D3D11ResourceCache::CreateVertexBuffer(
const MemoryRange& memory_range,
const VertexBufferResource::Info& info) {
return new D3D11VertexBufferResource(this, memory_range, info);
}

View File

@ -0,0 +1,64 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_D3D11_D3D11_RESOURCE_CACHE_H_
#define XENIA_GPU_D3D11_D3D11_RESOURCE_CACHE_H_
#include <xenia/core.h>
#include <xenia/gpu/resource_cache.h>
#include <d3d11.h>
namespace xe {
namespace gpu {
namespace d3d11 {
class D3D11ResourceCache : public ResourceCache {
public:
D3D11ResourceCache(Memory* memory,
ID3D11Device* device, ID3D11DeviceContext* context);
virtual ~D3D11ResourceCache();
ID3D11Device* device() const { return device_; }
ID3D11DeviceContext* context() const { return context_; }
protected:
VertexShaderResource* CreateVertexShader(
const MemoryRange& memory_range,
const VertexShaderResource::Info& info) override;
PixelShaderResource* CreatePixelShader(
const MemoryRange& memory_range,
const PixelShaderResource::Info& info) override;
TextureResource* CreateTexture(
const MemoryRange& memory_range,
const TextureResource::Info& info) override;
SamplerStateResource* CreateSamplerState(
const SamplerStateResource::Info& info) override;
IndexBufferResource* CreateIndexBuffer(
const MemoryRange& memory_range,
const IndexBufferResource::Info& info) override;
VertexBufferResource* CreateVertexBuffer(
const MemoryRange& memory_range,
const VertexBufferResource::Info& info) override;
private:
ID3D11Device* device_;
ID3D11DeviceContext* context_;
};
} // namespace d3d11
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_D3D11_D3D11_RESOURCE_CACHE_H_

View File

@ -7,53 +7,36 @@
******************************************************************************
*/
#include <xenia/gpu/d3d11/d3d11_texture_cache.h>
#include <xenia/gpu/d3d11/d3d11_sampler_state_resource.h>
#include <xenia/gpu/gpu-private.h>
#include <xenia/gpu/d3d11/d3d11_resource_cache.h>
using namespace std;
using namespace xe;
using namespace xe::gpu;
using namespace xe::gpu::d3d11;
using namespace xe::gpu::xenos;
D3D11TextureCache::D3D11TextureCache(
Memory* memory,
ID3D11DeviceContext* context, ID3D11Device* device)
: TextureCache(memory),
context_(context), device_(device) {
context_->AddRef();
device_->AddRef();
D3D11SamplerStateResource::D3D11SamplerStateResource(
D3D11ResourceCache* resource_cache, const Info& info)
: SamplerStateResource(info),
resource_cache_(resource_cache),
handle_(nullptr) {
}
D3D11TextureCache::~D3D11TextureCache() {
for (auto it = samplers_.begin(); it != samplers_.end(); ++it) {
auto& cached_state = it->second;
XESAFERELEASE(cached_state.state);
}
samplers_.clear();
XESAFERELEASE(device_);
XESAFERELEASE(context_);
D3D11SamplerStateResource::~D3D11SamplerStateResource() {
XESAFERELEASE(handle_);
}
Texture* D3D11TextureCache::CreateTexture(
uint32_t address, const uint8_t* host_address,
const xenos::xe_gpu_texture_fetch_t& fetch) {
return new D3D11Texture(this, address, host_address);
int D3D11SamplerStateResource::Prepare() {
if (handle_) {
return 0;
}
ID3D11SamplerState* D3D11TextureCache::GetSamplerState(
const xenos::xe_gpu_texture_fetch_t& fetch,
const Shader::tex_buffer_desc_t& desc) {
D3D11_SAMPLER_DESC sampler_desc;
xe_zero_struct(&sampler_desc, sizeof(sampler_desc));
uint32_t min_filter = desc.tex_fetch.min_filter == 3 ?
fetch.min_filter : desc.tex_fetch.min_filter;
uint32_t mag_filter = desc.tex_fetch.mag_filter == 3 ?
fetch.mag_filter : desc.tex_fetch.mag_filter;
uint32_t mip_filter = desc.tex_fetch.mip_filter == 3 ?
fetch.mip_filter : desc.tex_fetch.mip_filter;
// MIN, MAG, MIP
static const D3D11_FILTER filter_matrix[2][2][3] = {
{
@ -87,7 +70,8 @@ ID3D11SamplerState* D3D11TextureCache::GetSamplerState(
},
},
};
sampler_desc.Filter = filter_matrix[min_filter][mag_filter][mip_filter];
sampler_desc.Filter =
filter_matrix[info_.min_filter][info_.mag_filter][info_.mip_filter];
static const D3D11_TEXTURE_ADDRESS_MODE mode_map[] = {
D3D11_TEXTURE_ADDRESS_WRAP,
D3D11_TEXTURE_ADDRESS_MIRROR,
@ -98,9 +82,9 @@ ID3D11SamplerState* D3D11TextureCache::GetSamplerState(
D3D11_TEXTURE_ADDRESS_BORDER, // ?
D3D11_TEXTURE_ADDRESS_MIRROR, // ?
};
sampler_desc.AddressU = mode_map[fetch.clamp_x];
sampler_desc.AddressV = mode_map[fetch.clamp_y];
sampler_desc.AddressW = mode_map[fetch.clamp_z];
sampler_desc.AddressU = mode_map[info_.clamp_u];
sampler_desc.AddressV = mode_map[info_.clamp_v];
sampler_desc.AddressW = mode_map[info_.clamp_w];
sampler_desc.MipLODBias;
sampler_desc.MaxAnisotropy = 1;
sampler_desc.ComparisonFunc = D3D11_COMPARISON_ALWAYS;
@ -111,29 +95,12 @@ ID3D11SamplerState* D3D11TextureCache::GetSamplerState(
sampler_desc.MinLOD;
sampler_desc.MaxLOD;
// TODO(benvanik): do this earlier without having to setup the whole struct?
size_t hash = hash_combine(
sampler_desc.Filter,
sampler_desc.AddressU,
sampler_desc.AddressV,
sampler_desc.AddressW);
auto range = samplers_.equal_range(hash);
for (auto it = range.first; it != range.second; ++it) {
const auto& cached_state = it->second;
// TODO(benvanik): faster compare?
if (memcmp(&sampler_desc, &cached_state.desc, sizeof(sampler_desc)) == 0) {
return cached_state.state;
}
}
ID3D11SamplerState* sampler_state = NULL;
HRESULT hr = device_->CreateSamplerState(&sampler_desc, &sampler_state);
HRESULT hr = resource_cache_->device()->CreateSamplerState(
&sampler_desc, &handle_);
if (FAILED(hr)) {
XELOGE("D3D11: unable to create sampler state");
return nullptr;
return 1;
}
samplers_.insert({ hash, { sampler_desc, sampler_state } });
return sampler_state;
return 0;
}

View File

@ -0,0 +1,48 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_D3D11_D3D11_SAMPLER_STATE_RESOURCE_H_
#define XENIA_GPU_D3D11_D3D11_SAMPLER_STATE_RESOURCE_H_
#include <xenia/gpu/sampler_state_resource.h>
#include <xenia/gpu/xenos/ucode.h>
#include <xenia/gpu/xenos/xenos.h>
#include <d3d11.h>
namespace xe {
namespace gpu {
namespace d3d11 {
class D3D11ResourceCache;
class D3D11SamplerStateResource : public SamplerStateResource {
public:
D3D11SamplerStateResource(D3D11ResourceCache* resource_cache,
const Info& info);
~D3D11SamplerStateResource() override;
void* handle() const override { return handle_; }
int Prepare() override;
protected:
D3D11ResourceCache* resource_cache_;
ID3D11SamplerState* handle_;
};
} // namespace d3d11
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_D3D11_D3D11_SAMPLER_STATE_RESOURCE_H_

File diff suppressed because it is too large Load Diff

View File

@ -1,125 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2013 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_D3D11_D3D11_SHADER_H_
#define XENIA_GPU_D3D11_D3D11_SHADER_H_
#include <xenia/core.h>
#include <xenia/gpu/shader.h>
#include <xenia/gpu/xenos/xenos.h>
#include <d3d11.h>
namespace xe {
namespace gpu {
namespace d3d11 {
struct Output;
typedef struct {
Output* output;
xenos::XE_GPU_SHADER_TYPE type;
uint32_t tex_fetch_index;
} xe_gpu_translate_ctx_t;
class D3D11GeometryShader;
class D3D11Shader : public Shader {
public:
virtual ~D3D11Shader();
const static uint32_t MAX_INTERPOLATORS = 16;
protected:
D3D11Shader(
ID3D11Device* device,
xenos::XE_GPU_SHADER_TYPE type,
const uint8_t* src_ptr, size_t length,
uint64_t hash);
const char* translated_src() const { return translated_src_; }
void set_translated_src(char* value);
void AppendTextureHeader(Output* output);
int TranslateExec(
xe_gpu_translate_ctx_t& ctx, const xenos::instr_cf_exec_t& cf);
ID3D10Blob* Compile(const char* shader_source);
protected:
ID3D11Device* device_;
char* translated_src_;
};
class D3D11VertexShader : public D3D11Shader {
public:
D3D11VertexShader(
ID3D11Device* device,
const uint8_t* src_ptr, size_t length,
uint64_t hash);
virtual ~D3D11VertexShader();
ID3D11VertexShader* handle() const { return handle_; }
ID3D11InputLayout* input_layout() const { return input_layout_; }
int Prepare(xenos::xe_gpu_program_cntl_t* program_cntl);
enum GeometryShaderType {
POINT_SPRITE_SHADER,
RECT_LIST_SHADER,
QUAD_LIST_SHADER,
MAX_GEOMETRY_SHADER_TYPE,
};
int DemandGeometryShader(GeometryShaderType type,
D3D11GeometryShader** out_shader);
private:
const char* Translate(xenos::xe_gpu_program_cntl_t* program_cntl);
private:
ID3D11VertexShader* handle_;
ID3D11InputLayout* input_layout_;
D3D11GeometryShader* geometry_shaders_[MAX_GEOMETRY_SHADER_TYPE];
};
class D3D11PixelShader : public D3D11Shader {
public:
D3D11PixelShader(
ID3D11Device* device,
const uint8_t* src_ptr, size_t length,
uint64_t hash);
virtual ~D3D11PixelShader();
ID3D11PixelShader* handle() const { return handle_; }
int Prepare(xenos::xe_gpu_program_cntl_t* program_cntl,
D3D11VertexShader* input_shader);
private:
const char* Translate(xenos::xe_gpu_program_cntl_t* program_cntl,
D3D11VertexShader* input_shader);
private:
ID3D11PixelShader* handle_;
};
} // namespace d3d11
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_D3D11_D3D11_SHADER_H_

View File

@ -1,45 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2013 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include <xenia/gpu/d3d11/d3d11_shader_cache.h>
#include <xenia/gpu/d3d11/d3d11_shader.h>
using namespace xe;
using namespace xe::gpu;
using namespace xe::gpu::d3d11;
using namespace xe::gpu::xenos;
D3D11ShaderCache::D3D11ShaderCache(ID3D11Device* device) {
device_ = device;
device_->AddRef();
}
D3D11ShaderCache::~D3D11ShaderCache() {
device_->Release();
}
Shader* D3D11ShaderCache::CreateCore(
xenos::XE_GPU_SHADER_TYPE type,
const uint8_t* src_ptr, size_t length,
uint64_t hash) {
switch (type) {
case XE_GPU_SHADER_TYPE_VERTEX:
return new D3D11VertexShader(
device_, src_ptr, length, hash);
case XE_GPU_SHADER_TYPE_PIXEL:
return new D3D11PixelShader(
device_, src_ptr, length, hash);
default:
XEASSERTALWAYS();
return NULL;
}
}

View File

@ -1,46 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2013 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_D3D11_D3D11_SHADER_CACHE_H_
#define XENIA_GPU_D3D11_D3D11_SHADER_CACHE_H_
#include <xenia/core.h>
#include <xenia/gpu/shader_cache.h>
#include <D3D11.h>
namespace xe {
namespace gpu {
namespace d3d11 {
class D3D11ShaderCache : public ShaderCache {
public:
D3D11ShaderCache(ID3D11Device* device);
virtual ~D3D11ShaderCache();
protected:
Shader* CreateCore(
xenos::XE_GPU_SHADER_TYPE type,
const uint8_t* src_ptr, size_t length,
uint64_t hash) override;
protected:
ID3D11Device* device_;
};
} // namespace d3d11
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_D3D11_D3D11_SHADER_CACHE_H_

View File

@ -0,0 +1,381 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include <xenia/gpu/d3d11/d3d11_shader_resource.h>
#include <xenia/gpu/gpu-private.h>
#include <xenia/gpu/d3d11/d3d11_geometry_shader.h>
#include <xenia/gpu/d3d11/d3d11_resource_cache.h>
#include <xenia/gpu/d3d11/d3d11_shader_translator.h>
#include <xenia/gpu/xenos/ucode.h>
#include <d3dcompiler.h>
using namespace xe;
using namespace xe::gpu;
using namespace xe::gpu::d3d11;
using namespace xe::gpu::xenos;
namespace {
ID3D10Blob* D3D11ShaderCompile(XE_GPU_SHADER_TYPE type,
const char* shader_source,
const char* disasm_source) {
SCOPE_profile_cpu_f("gpu");
// TODO(benvanik): pick shared runtime mode defines.
D3D10_SHADER_MACRO defines[] = {
"TEST_DEFINE", "1",
0, 0,
};
uint32_t flags1 = 0;
flags1 |= D3D10_SHADER_DEBUG;
flags1 |= D3D10_SHADER_ENABLE_STRICTNESS;
uint32_t flags2 = 0;
// Create a name.
const char* base_path = "";
if (FLAGS_dump_shaders.size()) {
base_path = FLAGS_dump_shaders.c_str();
}
size_t hash = xe_hash64(disasm_source, xestrlena(disasm_source)); // ?
char file_name[XE_MAX_PATH];
xesnprintfa(file_name, XECOUNT(file_name),
"%s/gen_%.16llX.%s",
base_path,
hash,
type == XE_GPU_SHADER_TYPE_VERTEX ? "vs" : "ps");
if (FLAGS_dump_shaders.size()) {
FILE* f = fopen(file_name, "w");
fprintf(f, shader_source);
fprintf(f, "\n\n");
fprintf(f, "/*\n");
fprintf(f, disasm_source);
fprintf(f, " */\n");
fclose(f);
}
// Compile shader to bytecode blob.
ID3D10Blob* shader_blob = 0;
ID3D10Blob* error_blob = 0;
HRESULT hr = D3DCompile(
shader_source, strlen(shader_source),
file_name,
defines, nullptr,
"main",
type == XE_GPU_SHADER_TYPE_VERTEX ? "vs_5_0" : "ps_5_0",
flags1, flags2,
&shader_blob, &error_blob);
if (error_blob) {
char* msg = (char*)error_blob->GetBufferPointer();
XELOGE("D3D11: shader compile failed with %s", msg);
}
XESAFERELEASE(error_blob);
if (FAILED(hr)) {
return nullptr;
}
return shader_blob;
}
} // namespace
D3D11VertexShaderResource::D3D11VertexShaderResource(
D3D11ResourceCache* resource_cache,
const MemoryRange& memory_range,
const Info& info)
: VertexShaderResource(memory_range, info),
resource_cache_(resource_cache),
handle_(nullptr),
input_layout_(nullptr),
translated_src_(nullptr) {
xe_zero_struct(geometry_shaders_, sizeof(geometry_shaders_));
}
D3D11VertexShaderResource::~D3D11VertexShaderResource() {
XESAFERELEASE(handle_);
XESAFERELEASE(input_layout_);
for (int i = 0; i < XECOUNT(geometry_shaders_); ++i) {
delete geometry_shaders_[i];
}
xe_free(translated_src_);
}
int D3D11VertexShaderResource::Prepare(
const xe_gpu_program_cntl_t& program_cntl) {
SCOPE_profile_cpu_f("gpu");
if (is_prepared_ || handle_) {
return 0;
}
// TODO(benvanik): look in file based on hash/etc.
void* byte_code = NULL;
size_t byte_code_length = 0;
// Translate and compile source.
D3D11ShaderTranslator translator;
int ret = translator.TranslateVertexShader(this, program_cntl);
if (ret) {
XELOGE("D3D11: failed to translate vertex shader");
return ret;
}
translated_src_ = xestrdupa(translator.translated_src());
ID3D10Blob* shader_blob = D3D11ShaderCompile(
XE_GPU_SHADER_TYPE_VERTEX, translated_src_, disasm_src());
if (!shader_blob) {
return 1;
}
byte_code_length = shader_blob->GetBufferSize();
byte_code = xe_malloc(byte_code_length);
xe_copy_struct(
byte_code, shader_blob->GetBufferPointer(), byte_code_length);
XESAFERELEASE(shader_blob);
// Create shader.
HRESULT hr = resource_cache_->device()->CreateVertexShader(
byte_code, byte_code_length,
nullptr,
&handle_);
if (FAILED(hr)) {
XELOGE("D3D11: failed to create vertex shader");
xe_free(byte_code);
return 1;
}
// Create input layout.
ret = CreateInputLayout(byte_code, byte_code_length);
xe_free(byte_code);
if (ret) {
return 1;
}
is_prepared_ = true;
return 0;
}
int D3D11VertexShaderResource::CreateInputLayout(const void* byte_code,
size_t byte_code_length) {
size_t element_count = 0;
const auto& inputs = buffer_inputs();
for (uint32_t n = 0; n < inputs.count; n++) {
element_count += inputs.descs[n].info.element_count;
}
if (!element_count) {
XELOGW("D3D11: vertex shader with zero inputs -- retaining previous values?");
input_layout_ = NULL;
return 0;
}
D3D11_INPUT_ELEMENT_DESC* element_descs =
(D3D11_INPUT_ELEMENT_DESC*)xe_alloca(
sizeof(D3D11_INPUT_ELEMENT_DESC) * element_count);
uint32_t el_index = 0;
for (uint32_t n = 0; n < inputs.count; n++) {
const auto& input = inputs.descs[n];
for (uint32_t m = 0; m < input.info.element_count; m++) {
const auto& el = input.info.elements[m];
uint32_t vb_slot = input.input_index;
DXGI_FORMAT vtx_format;
switch (el.format) {
case FMT_8_8_8_8:
if (el.is_normalized) {
vtx_format = el.is_signed ?
DXGI_FORMAT_R8G8B8A8_SNORM : DXGI_FORMAT_R8G8B8A8_UNORM;
} else {
vtx_format = el.is_signed ?
DXGI_FORMAT_R8G8B8A8_SINT : DXGI_FORMAT_R8G8B8A8_UINT;
}
break;
case FMT_2_10_10_10:
if (el.is_normalized) {
vtx_format = DXGI_FORMAT_R10G10B10A2_UNORM;
} else {
vtx_format = DXGI_FORMAT_R10G10B10A2_UINT;
}
break;
// DXGI_FORMAT_R11G11B10_FLOAT?
case FMT_16_16:
if (el.is_normalized) {
vtx_format = el.is_signed ?
DXGI_FORMAT_R16G16_SNORM : DXGI_FORMAT_R16G16_UNORM;
} else {
vtx_format = el.is_signed ?
DXGI_FORMAT_R16G16_SINT : DXGI_FORMAT_R16G16_UINT;
}
break;
case FMT_16_16_16_16:
if (el.is_normalized) {
vtx_format = el.is_signed ?
DXGI_FORMAT_R16G16B16A16_SNORM : DXGI_FORMAT_R16G16B16A16_UNORM;
} else {
vtx_format = el.is_signed ?
DXGI_FORMAT_R16G16B16A16_SINT : DXGI_FORMAT_R16G16B16A16_UINT;
}
break;
case FMT_16_16_FLOAT:
vtx_format = DXGI_FORMAT_R16G16_FLOAT;
break;
case FMT_16_16_16_16_FLOAT:
vtx_format = DXGI_FORMAT_R16G16B16A16_FLOAT;
break;
case FMT_32:
vtx_format = el.is_signed ?
DXGI_FORMAT_R32_SINT : DXGI_FORMAT_R32_UINT;
break;
case FMT_32_32:
vtx_format = el.is_signed ?
DXGI_FORMAT_R32G32_SINT : DXGI_FORMAT_R32G32_UINT;
break;
case FMT_32_32_32_32:
vtx_format = el.is_signed ?
DXGI_FORMAT_R32G32B32A32_SINT : DXGI_FORMAT_R32G32B32A32_UINT;
break;
case FMT_32_FLOAT:
vtx_format = DXGI_FORMAT_R32_FLOAT;
break;
case FMT_32_32_FLOAT:
vtx_format = DXGI_FORMAT_R32G32_FLOAT;
break;
case FMT_32_32_32_FLOAT:
vtx_format = DXGI_FORMAT_R32G32B32_FLOAT;
break;
case FMT_32_32_32_32_FLOAT:
vtx_format = DXGI_FORMAT_R32G32B32A32_FLOAT;
break;
default:
XEASSERTALWAYS();
break;
}
element_descs[el_index].SemanticName = "XE_VF";
element_descs[el_index].SemanticIndex = el_index;
element_descs[el_index].Format = vtx_format;
element_descs[el_index].InputSlot = vb_slot;
element_descs[el_index].AlignedByteOffset = el.offset_words * 4;
element_descs[el_index].InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA;
element_descs[el_index].InstanceDataStepRate = 0;
el_index++;
}
}
HRESULT hr = resource_cache_->device()->CreateInputLayout(
element_descs,
(UINT)element_count,
byte_code, byte_code_length,
&input_layout_);
if (FAILED(hr)) {
XELOGE("D3D11: failed to create vertex shader input layout");
return 1;
}
return 0;
}
int D3D11VertexShaderResource::DemandGeometryShader(
GeometryShaderType type, D3D11GeometryShader** out_shader) {
if (geometry_shaders_[type]) {
*out_shader = geometry_shaders_[type];
return 0;
}
// Demand generate.
auto device = resource_cache_->device();
D3D11GeometryShader* shader = nullptr;
switch (type) {
case POINT_SPRITE_SHADER:
shader = new D3D11PointSpriteGeometryShader(device);
break;
case RECT_LIST_SHADER:
shader = new D3D11RectListGeometryShader(device);
break;
case QUAD_LIST_SHADER:
shader = new D3D11QuadListGeometryShader(device);
break;
default:
XEASSERTALWAYS();
return 1;
}
if (!shader) {
return 1;
}
if (shader->Prepare(this)) {
delete shader;
return 1;
}
geometry_shaders_[type] = shader;
*out_shader = geometry_shaders_[type];
return 0;
}
D3D11PixelShaderResource::D3D11PixelShaderResource(
D3D11ResourceCache* resource_cache,
const MemoryRange& memory_range,
const Info& info)
: PixelShaderResource(memory_range, info),
resource_cache_(resource_cache),
handle_(nullptr),
translated_src_(nullptr) {
}
D3D11PixelShaderResource::~D3D11PixelShaderResource() {
XESAFERELEASE(handle_);
xe_free(translated_src_);
}
int D3D11PixelShaderResource::Prepare(const xe_gpu_program_cntl_t& program_cntl,
VertexShaderResource* input_shader) {
SCOPE_profile_cpu_f("gpu");
if (is_prepared_ || handle_) {
return 0;
}
// TODO(benvanik): look in file based on hash/etc.
void* byte_code = NULL;
size_t byte_code_length = 0;
// Translate and compile source.
D3D11ShaderTranslator translator;
int ret = translator.TranslatePixelShader(this,
program_cntl,
input_shader->alloc_counts());
if (ret) {
XELOGE("D3D11: failed to translate pixel shader");
return ret;
}
translated_src_ = xestrdupa(translator.translated_src());
ID3D10Blob* shader_blob = D3D11ShaderCompile(
XE_GPU_SHADER_TYPE_PIXEL, translated_src_, disasm_src());
if (!shader_blob) {
return 1;
}
byte_code_length = shader_blob->GetBufferSize();
byte_code = xe_malloc(byte_code_length);
xe_copy_struct(
byte_code, shader_blob->GetBufferPointer(), byte_code_length);
XESAFERELEASE(shader_blob);
// Create shader.
HRESULT hr = resource_cache_->device()->CreatePixelShader(
byte_code, byte_code_length,
nullptr,
&handle_);
if (FAILED(hr)) {
XELOGE("D3D11: failed to create pixel shader");
xe_free(byte_code);
return 1;
}
xe_free(byte_code);
is_prepared_ = true;
return 0;
}

View File

@ -0,0 +1,91 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_D3D11_D3D11_SHADER_RESOURCE_H_
#define XENIA_GPU_D3D11_D3D11_SHADER_RESOURCE_H_
#include <xenia/gpu/shader_resource.h>
#include <xenia/gpu/xenos/xenos.h>
#include <d3d11.h>
namespace xe {
namespace gpu {
namespace d3d11 {
class D3D11GeometryShader;
class D3D11ResourceCache;
struct Output;
typedef struct {
Output* output;
xenos::XE_GPU_SHADER_TYPE type;
uint32_t tex_fetch_index;
} xe_gpu_translate_ctx_t;
class D3D11VertexShaderResource : public VertexShaderResource {
public:
D3D11VertexShaderResource(D3D11ResourceCache* resource_cache,
const MemoryRange& memory_range,
const Info& info);
~D3D11VertexShaderResource() override;
void* handle() const override { return handle_; }
ID3D11InputLayout* input_layout() const { return input_layout_; }
const char* translated_src() const { return translated_src_; }
int Prepare(const xenos::xe_gpu_program_cntl_t& program_cntl) override;
enum GeometryShaderType {
POINT_SPRITE_SHADER,
RECT_LIST_SHADER,
QUAD_LIST_SHADER,
MAX_GEOMETRY_SHADER_TYPE, // keep at the end
};
int DemandGeometryShader(GeometryShaderType type,
D3D11GeometryShader** out_shader);
private:
int CreateInputLayout(const void* byte_code, size_t byte_code_length);
D3D11ResourceCache* resource_cache_;
ID3D11VertexShader* handle_;
ID3D11InputLayout* input_layout_;
D3D11GeometryShader* geometry_shaders_[MAX_GEOMETRY_SHADER_TYPE];
char* translated_src_;
};
class D3D11PixelShaderResource : public PixelShaderResource {
public:
D3D11PixelShaderResource(D3D11ResourceCache* resource_cache,
const MemoryRange& memory_range,
const Info& info);
~D3D11PixelShaderResource() override;
void* handle() const override { return handle_; }
const char* translated_src() const { return translated_src_; }
int Prepare(const xenos::xe_gpu_program_cntl_t& program_cntl,
VertexShaderResource* vertex_shader) override;
private:
D3D11ResourceCache* resource_cache_;
ID3D11PixelShader* handle_;
char* translated_src_;
};
} // namespace d3d11
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_D3D11_D3D11_SHADER_RESOURCE_H_

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,125 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_D3D11_D3D11_SHADER_TRANSLATOR_H_
#define XENIA_GPU_D3D11_D3D11_SHADER_TRANSLATOR_H_
#include <xenia/gpu/shader_resource.h>
#include <xenia/gpu/xenos/xenos.h>
#include <d3d11.h>
namespace xe {
namespace gpu {
namespace d3d11 {
class D3D11ShaderTranslator {
public:
const static uint32_t kMaxInterpolators = 16;
D3D11ShaderTranslator();
int TranslateVertexShader(VertexShaderResource* vertex_shader,
const xenos::xe_gpu_program_cntl_t& program_cntl);
int TranslatePixelShader(
PixelShaderResource* pixel_shader,
const xenos::xe_gpu_program_cntl_t& program_cntl,
const VertexShaderResource::AllocCounts& alloc_counts);
const char* translated_src() const { return buffer_; }
private:
xenos::XE_GPU_SHADER_TYPE type_;
uint32_t tex_fetch_index_;
const uint32_t* dwords_;
static const int kCapacity = 64 * 1024;
char buffer_[kCapacity];
size_t capacity_;
size_t offset_;
void append(const char* format, ...) {
va_list args;
va_start(args, format);
int len = xevsnprintfa(buffer_ + offset_, capacity_ - offset_,
format, args);
va_end(args);
offset_ += len;
buffer_[offset_] = 0;
}
void AppendTextureHeader(
const ShaderResource::SamplerInputs& sampler_inputs);
void AppendSrcReg(uint32_t num, uint32_t type, uint32_t swiz, uint32_t negate,
uint32_t abs);
void AppendDestRegName(uint32_t num, uint32_t dst_exp);
void AppendDestReg(uint32_t num, uint32_t mask, uint32_t dst_exp);
void AppendDestRegPost(uint32_t num, uint32_t mask, uint32_t dst_exp);
void PrintSrcReg(uint32_t num, uint32_t type, uint32_t swiz, uint32_t negate,
uint32_t abs);
void PrintDstReg(uint32_t num, uint32_t mask, uint32_t dst_exp);
void PrintExportComment(uint32_t num);
int TranslateALU(const xenos::instr_alu_t* alu, int sync);
int TranslateALU_ADDv(const xenos::instr_alu_t& alu);
int TranslateALU_MULv(const xenos::instr_alu_t& alu);
int TranslateALU_MAXv(const xenos::instr_alu_t& alu);
int TranslateALU_MINv(const xenos::instr_alu_t& alu);
int TranslateALU_SETXXv(const xenos::instr_alu_t& alu, const char* op);
int TranslateALU_SETEv(const xenos::instr_alu_t& alu);
int TranslateALU_SETGTv(const xenos::instr_alu_t& alu);
int TranslateALU_SETGTEv(const xenos::instr_alu_t& alu);
int TranslateALU_SETNEv(const xenos::instr_alu_t& alu);
int TranslateALU_FRACv(const xenos::instr_alu_t& alu);
int TranslateALU_TRUNCv(const xenos::instr_alu_t& alu);
int TranslateALU_FLOORv(const xenos::instr_alu_t& alu);
int TranslateALU_MULADDv(const xenos::instr_alu_t& alu);
int TranslateALU_CNDXXv(const xenos::instr_alu_t& alu, const char* op);
int TranslateALU_CNDEv(const xenos::instr_alu_t& alu);
int TranslateALU_CNDGTEv(const xenos::instr_alu_t& alu);
int TranslateALU_CNDGTv(const xenos::instr_alu_t& alu);
int TranslateALU_DOT4v(const xenos::instr_alu_t& alu);
int TranslateALU_DOT3v(const xenos::instr_alu_t& alu);
int TranslateALU_DOT2ADDv(const xenos::instr_alu_t& alu);
// CUBEv
int TranslateALU_MAX4v(const xenos::instr_alu_t& alu);
// ...
int TranslateALU_MAXs(const xenos::instr_alu_t& alu);
int TranslateALU_MINs(const xenos::instr_alu_t& alu);
int TranslateALU_SETXXs(const xenos::instr_alu_t& alu, const char* op);
int TranslateALU_SETEs(const xenos::instr_alu_t& alu);
int TranslateALU_SETGTs(const xenos::instr_alu_t& alu);
int TranslateALU_SETGTEs(const xenos::instr_alu_t& alu);
int TranslateALU_SETNEs(const xenos::instr_alu_t& alu);
int TranslateALU_RECIP_IEEE(const xenos::instr_alu_t& alu);
int TranslateALU_MUL_CONST_0(const xenos::instr_alu_t& alu);
int TranslateALU_MUL_CONST_1(const xenos::instr_alu_t& alu);
int TranslateALU_ADD_CONST_0(const xenos::instr_alu_t& alu);
int TranslateALU_ADD_CONST_1(const xenos::instr_alu_t& alu);
int TranslateALU_SUB_CONST_0(const xenos::instr_alu_t& alu);
int TranslateALU_SUB_CONST_1(const xenos::instr_alu_t& alu);
void PrintDestFecth(uint32_t dst_reg, uint32_t dst_swiz);
void AppendFetchDest(uint32_t dst_reg, uint32_t dst_swiz);
int GetFormatComponentCount(uint32_t format);
int TranslateExec(const xenos::instr_cf_exec_t& cf);
int TranslateVertexFetch(const xenos::instr_fetch_vtx_t* vtx, int sync);
int TranslateTextureFetch(const xenos::instr_fetch_tex_t* tex, int sync);
};
} // namespace d3d11
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_D3D11_D3D11_SHADER_TRANSLATOR_H_

View File

@ -1,264 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include <xenia/gpu/d3d11/d3d11_texture.h>
#include <xenia/gpu/gpu-private.h>
#include <xenia/gpu/d3d11/d3d11_texture_cache.h>
#include <xenia/gpu/xenos/ucode.h>
#include <xenia/gpu/xenos/xenos.h>
using namespace xe;
using namespace xe::gpu;
using namespace xe::gpu::d3d11;
using namespace xe::gpu::xenos;
D3D11Texture::D3D11Texture(D3D11TextureCache* cache, uint32_t address,
const uint8_t* host_address)
: Texture(address, host_address),
cache_(cache) {
}
D3D11Texture::~D3D11Texture() {
}
TextureView* D3D11Texture::FetchNew(
const xenos::xe_gpu_texture_fetch_t& fetch) {
D3D11TextureView* view = new D3D11TextureView();
if (!FillViewInfo(view, fetch)) {
return nullptr;
}
D3D11_SHADER_RESOURCE_VIEW_DESC srv_desc;
xe_zero_struct(&srv_desc, sizeof(srv_desc));
// TODO(benvanik): this may need to be typed on the fetch instruction (float/int/etc?)
srv_desc.Format = view->format;
D3D_SRV_DIMENSION dimension = D3D11_SRV_DIMENSION_UNKNOWN;
switch (view->dimensions) {
case DIMENSION_1D:
srv_desc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE1D;
srv_desc.Texture1D.MipLevels = 1;
srv_desc.Texture1D.MostDetailedMip = 0;
if (!CreateTexture1D(view, fetch)) {
XELOGE("D3D11: failed to fetch Texture1D");
return nullptr;
}
break;
case DIMENSION_2D:
srv_desc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D;
srv_desc.Texture2D.MipLevels = 1;
srv_desc.Texture2D.MostDetailedMip = 0;
if (!CreateTexture2D(view, fetch)) {
XELOGE("D3D11: failed to fetch Texture2D");
return nullptr;
}
break;
case DIMENSION_3D:
srv_desc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE3D;
srv_desc.Texture3D.MipLevels = 1;
srv_desc.Texture3D.MostDetailedMip = 0;
if (!CreateTexture3D(view, fetch)) {
XELOGE("D3D11: failed to fetch Texture3D");
return nullptr;
}
break;
case DIMENSION_CUBE:
srv_desc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURECUBE;
srv_desc.TextureCube.MipLevels = 1;
srv_desc.TextureCube.MostDetailedMip = 0;
if (!CreateTextureCube(view, fetch)) {
XELOGE("D3D11: failed to fetch TextureCube");
return nullptr;
}
break;
}
HRESULT hr = cache_->device()->CreateShaderResourceView(
view->resource, &srv_desc, &view->srv);
if (FAILED(hr)) {
XELOGE("D3D11: unable to create texture resource view");
return nullptr;
}
return view;
}
bool D3D11Texture::FetchDirty(
TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch) {
auto d3d_view = static_cast<D3D11TextureView*>(view);
switch (view->dimensions) {
case DIMENSION_1D:
return FetchTexture1D(d3d_view, fetch);
case DIMENSION_2D:
return FetchTexture2D(d3d_view, fetch);
case DIMENSION_3D:
return FetchTexture3D(d3d_view, fetch);
case DIMENSION_CUBE:
return FetchTextureCube(d3d_view, fetch);
}
return false;
}
bool D3D11Texture::CreateTexture1D(
D3D11TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch) {
uint32_t width = 1 + fetch.size_1d.width;
D3D11_TEXTURE1D_DESC texture_desc;
xe_zero_struct(&texture_desc, sizeof(texture_desc));
texture_desc.Width = width;
texture_desc.MipLevels = 1;
texture_desc.ArraySize = 1;
texture_desc.Format = view->format;
texture_desc.Usage = D3D11_USAGE_DYNAMIC;
texture_desc.BindFlags = D3D11_BIND_SHADER_RESOURCE;
texture_desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
texture_desc.MiscFlags = 0; // D3D11_RESOURCE_MISC_GENERATE_MIPS?
HRESULT hr = cache_->device()->CreateTexture1D(
&texture_desc, NULL, (ID3D11Texture1D**)&view->resource);
if (FAILED(hr)) {
return false;
}
return FetchTexture1D(view, fetch);
}
bool D3D11Texture::FetchTexture1D(
D3D11TextureView* view, const xe_gpu_texture_fetch_t& fetch) {
SCOPE_profile_cpu_f("gpu");
// TODO(benvanik): upload!
XELOGE("D3D11: FetchTexture1D not yet implemented");
return false;
}
bool D3D11Texture::CreateTexture2D(
D3D11TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch) {
XEASSERTTRUE(fetch.dimension == 1);
D3D11_TEXTURE2D_DESC texture_desc;
xe_zero_struct(&texture_desc, sizeof(texture_desc));
texture_desc.Width = view->sizes_2d.output_width;
texture_desc.Height = view->sizes_2d.output_height;
texture_desc.MipLevels = 1;
texture_desc.ArraySize = 1;
texture_desc.Format = view->format;
texture_desc.SampleDesc.Count = 1;
texture_desc.SampleDesc.Quality = 0;
texture_desc.Usage = D3D11_USAGE_DYNAMIC;
texture_desc.BindFlags = D3D11_BIND_SHADER_RESOURCE;
texture_desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
texture_desc.MiscFlags = 0; // D3D11_RESOURCE_MISC_GENERATE_MIPS?
HRESULT hr = cache_->device()->CreateTexture2D(
&texture_desc, NULL, (ID3D11Texture2D**)&view->resource);
if (FAILED(hr)) {
return false;
}
return FetchTexture2D(view, fetch);
}
bool D3D11Texture::FetchTexture2D(
D3D11TextureView* view, const xe_gpu_texture_fetch_t& fetch) {
SCOPE_profile_cpu_f("gpu");
XEASSERTTRUE(fetch.dimension == 1);
auto sizes = GetTextureSizes2D(view);
// TODO(benvanik): all mip levels.
D3D11_MAPPED_SUBRESOURCE res;
HRESULT hr = cache_->context()->Map(view->resource, 0,
D3D11_MAP_WRITE_DISCARD, 0, &res);
if (FAILED(hr)) {
XELOGE("D3D11: failed to map texture");
return false;
}
const uint8_t* src = cache_->memory()->Translate(address_);
uint8_t* dest = (uint8_t*)res.pData;
//memset(dest, 0, output_pitch * (output_height / view->block_size)); // TODO(gibbed): remove me later
uint32_t output_pitch = res.RowPitch; // (output_width / info.block_size) * info.texel_pitch;
if (!fetch.tiled) {
dest = (uint8_t*)res.pData;
for (uint32_t y = 0; y < sizes.block_height; y++) {
for (uint32_t x = 0; x < sizes.logical_pitch; x += view->texel_pitch) {
TextureSwap(dest + x, src + x, view->texel_pitch, (XE_GPU_ENDIAN)fetch.endianness);
}
src += sizes.input_pitch;
dest += output_pitch;
}
} else {
auto bpp = (view->texel_pitch >> 2) + ((view->texel_pitch >> 1) >> (view->texel_pitch >> 2));
for (uint32_t y = 0, output_base_offset = 0;
y < sizes.block_height;
y++, output_base_offset += output_pitch) {
auto input_base_offset = TiledOffset2DOuter(y, (sizes.input_width / view->block_size), bpp);
for (uint32_t x = 0, output_offset = output_base_offset;
x < sizes.block_width;
x++, output_offset += view->texel_pitch) {
auto input_offset = TiledOffset2DInner(x, y, bpp, input_base_offset) >> bpp;
TextureSwap(dest + output_offset,
src + input_offset * view->texel_pitch,
view->texel_pitch, (XE_GPU_ENDIAN)fetch.endianness);
}
}
}
cache_->context()->Unmap(view->resource, 0);
return true;
}
bool D3D11Texture::CreateTexture3D(
D3D11TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch) {
XELOGE("D3D11: CreateTexture3D not yet implemented");
XEASSERTALWAYS();
return false;
}
bool D3D11Texture::FetchTexture3D(
D3D11TextureView* view, const xe_gpu_texture_fetch_t& fetch) {
SCOPE_profile_cpu_f("gpu");
XELOGE("D3D11: FetchTexture3D not yet implemented");
XEASSERTALWAYS();
return false;
//D3D11_TEXTURE3D_DESC texture_desc;
//xe_zero_struct(&texture_desc, sizeof(texture_desc));
//texture_desc.Width;
//texture_desc.Height;
//texture_desc.Depth;
//texture_desc.MipLevels;
//texture_desc.Format;
//texture_desc.Usage;
//texture_desc.BindFlags;
//texture_desc.CPUAccessFlags;
//texture_desc.MiscFlags;
//hr = device_->CreateTexture3D(
// &texture_desc, &initial_data, (ID3D11Texture3D**)&view->resource);
}
bool D3D11Texture::CreateTextureCube(
D3D11TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch) {
XELOGE("D3D11: CreateTextureCube not yet implemented");
XEASSERTALWAYS();
return false;
}
bool D3D11Texture::FetchTextureCube(
D3D11TextureView* view, const xe_gpu_texture_fetch_t& fetch) {
SCOPE_profile_cpu_f("gpu");
XELOGE("D3D11: FetchTextureCube not yet implemented");
XEASSERTALWAYS();
return false;
}

View File

@ -1,78 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_D3D11_D3D11_TEXTURE_H_
#define XENIA_GPU_D3D11_D3D11_TEXTURE_H_
#include <xenia/core.h>
#include <xenia/gpu/texture.h>
#include <d3d11.h>
namespace xe {
namespace gpu {
namespace d3d11 {
class D3D11TextureCache;
struct D3D11TextureView : TextureView {
ID3D11Resource* resource;
ID3D11ShaderResourceView* srv;
D3D11TextureView()
: resource(nullptr), srv(nullptr) {}
virtual ~D3D11TextureView() {
XESAFERELEASE(srv);
XESAFERELEASE(resource);
}
};
class D3D11Texture : public Texture {
public:
D3D11Texture(D3D11TextureCache* cache, uint32_t address,
const uint8_t* host_address);
virtual ~D3D11Texture();
protected:
TextureView* FetchNew(
const xenos::xe_gpu_texture_fetch_t& fetch) override;
bool FetchDirty(
TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch) override;
bool CreateTexture1D(
D3D11TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch);
bool FetchTexture1D(
D3D11TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch);
bool CreateTexture2D(
D3D11TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch);
bool FetchTexture2D(
D3D11TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch);
bool CreateTexture3D(
D3D11TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch);
bool FetchTexture3D(
D3D11TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch);
bool CreateTextureCube(
D3D11TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch);
bool FetchTextureCube(
D3D11TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch);
D3D11TextureCache* cache_;
};
} // namespace d3d11
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_D3D11_D3D11_TEXTURE_H_

View File

@ -1,61 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_D3D11_D3D11_TEXTURE_CACHE_H_
#define XENIA_GPU_D3D11_D3D11_TEXTURE_CACHE_H_
#include <xenia/core.h>
#include <xenia/gpu/texture_cache.h>
#include <xenia/gpu/shader.h>
#include <xenia/gpu/d3d11/d3d11_texture.h>
#include <d3d11.h>
namespace xe {
namespace gpu {
namespace d3d11 {
class D3D11TextureCache : public TextureCache {
public:
D3D11TextureCache(Memory* memory,
ID3D11DeviceContext* context, ID3D11Device* device);
virtual ~D3D11TextureCache();
ID3D11DeviceContext* context() const { return context_; }
ID3D11Device* device() const { return device_; }
ID3D11SamplerState* GetSamplerState(
const xenos::xe_gpu_texture_fetch_t& fetch,
const Shader::tex_buffer_desc_t& desc);
protected:
Texture* CreateTexture(uint32_t address, const uint8_t* host_address,
const xenos::xe_gpu_texture_fetch_t& fetch) override;
private:
ID3D11DeviceContext* context_;
ID3D11Device* device_;
struct CachedSamplerState {
D3D11_SAMPLER_DESC desc;
ID3D11SamplerState* state;
};
std::unordered_multimap<size_t, CachedSamplerState> samplers_;
};
} // namespace d3d11
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_D3D11_D3D11_TEXTURE_CACHE_H_

View File

@ -0,0 +1,219 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include <xenia/gpu/d3d11/d3d11_texture_resource.h>
#include <xenia/gpu/gpu-private.h>
#include <xenia/gpu/d3d11/d3d11_resource_cache.h>
using namespace xe;
using namespace xe::gpu;
using namespace xe::gpu::d3d11;
using namespace xe::gpu::xenos;
D3D11TextureResource::D3D11TextureResource(
D3D11ResourceCache* resource_cache,
const MemoryRange& memory_range,
const Info& info)
: TextureResource(memory_range, info),
resource_cache_(resource_cache),
texture_(nullptr),
handle_(nullptr) {
}
D3D11TextureResource::~D3D11TextureResource() {
XESAFERELEASE(texture_);
XESAFERELEASE(handle_);
}
int D3D11TextureResource::CreateHandle() {
SCOPE_profile_cpu_f("gpu");
D3D11_SHADER_RESOURCE_VIEW_DESC srv_desc;
xe_zero_struct(&srv_desc, sizeof(srv_desc));
// TODO(benvanik): this may need to be typed on the fetch instruction (float/int/etc?)
srv_desc.Format = info_.format;
D3D_SRV_DIMENSION dimension = D3D11_SRV_DIMENSION_UNKNOWN;
switch (info_.dimension) {
case TEXTURE_DIMENSION_1D:
srv_desc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE1D;
srv_desc.Texture1D.MipLevels = 1;
srv_desc.Texture1D.MostDetailedMip = 0;
if (CreateHandle1D()) {
XELOGE("D3D11: failed to create Texture1D");
return 1;
}
break;
case TEXTURE_DIMENSION_2D:
srv_desc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D;
srv_desc.Texture2D.MipLevels = 1;
srv_desc.Texture2D.MostDetailedMip = 0;
if (CreateHandle2D()) {
XELOGE("D3D11: failed to create Texture2D");
return 1;
}
break;
case TEXTURE_DIMENSION_3D:
srv_desc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE3D;
srv_desc.Texture3D.MipLevels = 1;
srv_desc.Texture3D.MostDetailedMip = 0;
if (CreateHandle3D()) {
XELOGE("D3D11: failed to create Texture3D");
return 1;
}
break;
case TEXTURE_DIMENSION_CUBE:
srv_desc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURECUBE;
srv_desc.TextureCube.MipLevels = 1;
srv_desc.TextureCube.MostDetailedMip = 0;
if (CreateHandleCube()) {
XELOGE("D3D11: failed to create TextureCube");
return 1;
}
break;
}
HRESULT hr = resource_cache_->device()->CreateShaderResourceView(
texture_, &srv_desc, &handle_);
if (FAILED(hr)) {
XELOGE("D3D11: unable to create texture resource view");
return 1;
}
return 0;
}
int D3D11TextureResource::CreateHandle1D() {
uint32_t width = 1 + info_.size_1d.width;
D3D11_TEXTURE1D_DESC texture_desc;
xe_zero_struct(&texture_desc, sizeof(texture_desc));
texture_desc.Width = width;
texture_desc.MipLevels = 1;
texture_desc.ArraySize = 1;
texture_desc.Format = info_.format;
texture_desc.Usage = D3D11_USAGE_DYNAMIC;
texture_desc.BindFlags = D3D11_BIND_SHADER_RESOURCE;
texture_desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
texture_desc.MiscFlags = 0; // D3D11_RESOURCE_MISC_GENERATE_MIPS?
HRESULT hr = resource_cache_->device()->CreateTexture1D(
&texture_desc, NULL, (ID3D11Texture1D**)&texture_);
if (FAILED(hr)) {
return 1;
}
return 0;
}
int D3D11TextureResource::CreateHandle2D() {
D3D11_TEXTURE2D_DESC texture_desc;
xe_zero_struct(&texture_desc, sizeof(texture_desc));
texture_desc.Width = info_.size_2d.output_width;
texture_desc.Height = info_.size_2d.output_height;
texture_desc.MipLevels = 1;
texture_desc.ArraySize = 1;
texture_desc.Format = info_.format;
texture_desc.SampleDesc.Count = 1;
texture_desc.SampleDesc.Quality = 0;
texture_desc.Usage = D3D11_USAGE_DYNAMIC;
texture_desc.BindFlags = D3D11_BIND_SHADER_RESOURCE;
texture_desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
texture_desc.MiscFlags = 0; // D3D11_RESOURCE_MISC_GENERATE_MIPS?
HRESULT hr = resource_cache_->device()->CreateTexture2D(
&texture_desc, NULL, (ID3D11Texture2D**)&texture_);
if (FAILED(hr)) {
return 1;
}
return 0;
}
int D3D11TextureResource::CreateHandle3D() {
XELOGE("D3D11: CreateTexture3D not yet implemented");
XEASSERTALWAYS();
return 1;
}
int D3D11TextureResource::CreateHandleCube() {
XELOGE("D3D11: CreateTextureCube not yet implemented");
XEASSERTALWAYS();
return 1;
}
int D3D11TextureResource::InvalidateRegion(const MemoryRange& memory_range) {
SCOPE_profile_cpu_f("gpu");
switch (info_.dimension) {
case TEXTURE_DIMENSION_1D:
return InvalidateRegion1D(memory_range);
case TEXTURE_DIMENSION_2D:
return InvalidateRegion2D(memory_range);
case TEXTURE_DIMENSION_3D:
return InvalidateRegion3D(memory_range);
case TEXTURE_DIMENSION_CUBE:
return InvalidateRegionCube(memory_range);
}
return 1;
}
int D3D11TextureResource::InvalidateRegion1D(const MemoryRange& memory_range) {
return 1;
}
int D3D11TextureResource::InvalidateRegion2D(const MemoryRange& memory_range) {
// TODO(benvanik): all mip levels.
D3D11_MAPPED_SUBRESOURCE res;
HRESULT hr = resource_cache_->context()->Map(
texture_, 0, D3D11_MAP_WRITE_DISCARD, 0, &res);
if (FAILED(hr)) {
XELOGE("D3D11: failed to map texture");
return 1;
}
const uint8_t* src = memory_range_.host_base;
uint8_t* dest = (uint8_t*)res.pData;
uint32_t output_pitch = res.RowPitch; // (output_width / info.block_size) * info.texel_pitch;
if (!info_.is_tiled) {
dest = (uint8_t*)res.pData;
for (uint32_t y = 0; y < info_.size_2d.block_height; y++) {
for (uint32_t x = 0; x < info_.size_2d.logical_pitch; x += info_.texel_pitch) {
TextureSwap(dest + x, src + x, info_.texel_pitch);
}
src += info_.size_2d.input_pitch;
dest += output_pitch;
}
} else {
auto bpp = (info_.texel_pitch >> 2) + ((info_.texel_pitch >> 1) >> (info_.texel_pitch >> 2));
for (uint32_t y = 0, output_base_offset = 0;
y < info_.size_2d.block_height;
y++, output_base_offset += output_pitch) {
auto input_base_offset = TiledOffset2DOuter(y, (info_.size_2d.input_width / info_.block_size), bpp);
for (uint32_t x = 0, output_offset = output_base_offset;
x < info_.size_2d.block_width;
x++, output_offset += info_.texel_pitch) {
auto input_offset = TiledOffset2DInner(x, y, bpp, input_base_offset) >> bpp;
TextureSwap(dest + output_offset,
src + input_offset * info_.texel_pitch,
info_.texel_pitch);
}
}
}
resource_cache_->context()->Unmap(texture_, 0);
return 0;
}
int D3D11TextureResource::InvalidateRegion3D(const MemoryRange& memory_range) {
return 1;
}
int D3D11TextureResource::InvalidateRegionCube(
const MemoryRange& memory_range) {
return 1;
}

View File

@ -0,0 +1,60 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_D3D11_D3D11_TEXTURE_RESOURCE_H_
#define XENIA_GPU_D3D11_D3D11_TEXTURE_RESOURCE_H_
#include <xenia/gpu/texture_resource.h>
#include <xenia/gpu/xenos/xenos.h>
#include <d3d11.h>
namespace xe {
namespace gpu {
namespace d3d11 {
class D3D11ResourceCache;
class D3D11TextureResource : public TextureResource {
public:
D3D11TextureResource(D3D11ResourceCache* resource_cache,
const MemoryRange& memory_range,
const Info& info);
~D3D11TextureResource() override;
void* handle() const override { return handle_; }
protected:
int CreateHandle() override;
int CreateHandle1D();
int CreateHandle2D();
int CreateHandle3D();
int CreateHandleCube();
int InvalidateRegion(const MemoryRange& memory_range) override;
int InvalidateRegion1D(const MemoryRange& memory_range);
int InvalidateRegion2D(const MemoryRange& memory_range);
int InvalidateRegion3D(const MemoryRange& memory_range);
int InvalidateRegionCube(const MemoryRange& memory_range);
private:
D3D11ResourceCache* resource_cache_;
ID3D11Resource* texture_;
ID3D11ShaderResourceView* handle_;
};
} // namespace d3d11
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_D3D11_D3D11_TEXTURE_RESOURCE_H_

View File

@ -1,10 +1,8 @@
# Copyright 2013 Ben Vanik. All Rights Reserved.
{
'sources': [
'd3d11_buffer.cc',
'd3d11_buffer.h',
'd3d11_buffer_cache.cc',
'd3d11_buffer_cache.h',
'd3d11_buffer_resource.cc',
'd3d11_buffer_resource.h',
'd3d11_geometry_shader.cc',
'd3d11_geometry_shader.h',
'd3d11_gpu-private.h',
@ -16,14 +14,16 @@
'd3d11_graphics_system.h',
'd3d11_profiler_display.cc',
'd3d11_profiler_display.h',
'd3d11_shader.cc',
'd3d11_shader.h',
'd3d11_shader_cache.cc',
'd3d11_shader_cache.h',
'd3d11_texture.cc',
'd3d11_texture.h',
'd3d11_texture_cache.cc',
'd3d11_texture_cache.h',
'd3d11_resource_cache.cc',
'd3d11_resource_cache.h',
'd3d11_sampler_state_resource.cc',
'd3d11_sampler_state_resource.h',
'd3d11_shader_resource.cc',
'd3d11_shader_resource.h',
'd3d11_shader_translator.cc',
'd3d11_shader_translator.h',
'd3d11_texture_resource.cc',
'd3d11_texture_resource.h',
'd3d11_window.cc',
'd3d11_window.h',
],

View File

@ -2,26 +2,16 @@
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2013 Ben Vanik. All rights reserved. *
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include <xenia/gpu/xenos/registers.h>
#include <xenia/gpu/draw_command.h>
using namespace std;
using namespace xe;
using namespace xe::gpu;
using namespace xe::gpu::xenos;
const char* xe::gpu::xenos::GetRegisterName(uint32_t index) {
switch (index) {
#define XE_GPU_REGISTER(index, type, name) \
case index: return #name;
#include <xenia/gpu/xenos/register_table.inc>
#undef XE_GPU_REGISTER
default:
return NULL;
}
}

View File

@ -0,0 +1,78 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_DRAW_COMMAND_H_
#define XENIA_GPU_DRAW_COMMAND_H_
#include <xenia/core.h>
#include <xenia/gpu/buffer_resource.h>
#include <xenia/gpu/sampler_state_resource.h>
#include <xenia/gpu/shader_resource.h>
#include <xenia/gpu/texture_resource.h>
#include <xenia/gpu/xenos/xenos.h>
namespace xe {
namespace gpu {
// TODO(benvanik): move more of the enums in here?
struct DrawCommand {
xenos::XE_GPU_PRIMITIVE_TYPE prim_type;
uint32_t start_index;
uint32_t index_count;
uint32_t base_vertex;
VertexShaderResource* vertex_shader;
PixelShaderResource* pixel_shader;
// TODO(benvanik): dirty tracking/max ranges/etc.
struct {
float* values;
size_t count;
} float4_constants;
struct {
uint32_t* values;
size_t count;
} loop_constants;
struct {
uint32_t* values;
size_t count;
} bool_constants;
// Index buffer, if present. If index_count > 0 then auto draw.
IndexBufferResource* index_buffer;
// Vertex buffers.
struct {
uint32_t input_index;
VertexBufferResource* buffer;
uint32_t stride;
uint32_t offset;
} vertex_buffers[96];
size_t vertex_buffer_count;
// Texture samplers.
struct SamplerInput {
uint32_t input_index;
TextureResource* texture;
SamplerStateResource* sampler_state;
};
SamplerInput vertex_shader_samplers[32];
size_t vertex_shader_sampler_count;
SamplerInput pixel_shader_samplers[32];
size_t pixel_shader_sampler_count;
};
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_DRAW_COMMAND_H_

View File

@ -12,12 +12,300 @@
using namespace xe;
using namespace xe::gpu;
using namespace xe::gpu::xenos;
GraphicsDriver::GraphicsDriver(Memory* memory) :
memory_(memory), address_translation_(0) {
memset(&register_file_, 0, sizeof(register_file_));
}
GraphicsDriver::~GraphicsDriver() {
}
int GraphicsDriver::LoadShader(XE_GPU_SHADER_TYPE type,
uint32_t address, uint32_t length,
uint32_t start) {
MemoryRange memory_range(
memory_->Translate(address),
address, length);
ShaderResource* shader = nullptr;
if (type == XE_GPU_SHADER_TYPE_VERTEX) {
VertexShaderResource::Info info;
shader = vertex_shader_ = resource_cache()->FetchVertexShader(memory_range,
info);
if (!vertex_shader_) {
XELOGE("Unable to fetch vertex shader");
return 1;
}
} else {
PixelShaderResource::Info info;
shader = pixel_shader_ = resource_cache()->FetchPixelShader(memory_range,
info);
if (!pixel_shader_) {
XELOGE("Unable to fetch pixel shader");
return 1;
}
}
if (!shader->is_prepared()) {
// Disassemble.
const char* source = shader->disasm_src();
XELOGGPU("Set shader %d at %0.8X (%db):\n%s",
type, address, length,
source ? source : "<failed to disassemble>");
}
return 0;
}
int GraphicsDriver::PrepareDraw(DrawCommand& command) {
SCOPE_profile_cpu_f("gpu");
// Ignore copies for now.
uint32_t enable_mode = register_file_[XE_GPU_REG_RB_MODECONTROL].u32 & 0x7;
if (enable_mode != 4) {
XELOGW("GPU: ignoring draw with enable mode %d", enable_mode);
return 1;
}
// Reset the things we don't modify so that we have clean state.
command.prim_type = XE_GPU_PRIMITIVE_TYPE_POINT_LIST;
command.index_count = 0;
command.index_buffer = nullptr;
// Generic stuff.
command.start_index = register_file_[XE_GPU_REG_VGT_INDX_OFFSET].u32;
command.base_vertex = 0;
int ret;
ret = PopulateState(command);
if (ret) {
XELOGE("Unable to prepare draw state");
return ret;
}
ret = PopulateConstantBuffers(command);
if (ret) {
XELOGE("Unable to prepare draw constant buffers");
return ret;
}
ret = PopulateShaders(command);
if (ret) {
XELOGE("Unable to prepare draw shaders");
return ret;
}
ret = PopulateInputAssembly(command);
if (ret) {
XELOGE("Unable to prepare draw input assembly");
return ret;
}
ret = PopulateSamplers(command);
if (ret) {
XELOGE("Unable to prepare draw samplers");
return ret;
}
return 0;
}
int GraphicsDriver::PrepareDrawIndexBuffer(
DrawCommand& command,
uint32_t address, uint32_t length,
xenos::XE_GPU_ENDIAN endianness,
IndexFormat format) {
SCOPE_profile_cpu_f("gpu");
address += address_translation_;
MemoryRange memory_range(memory_->Translate(address), address, length);
IndexBufferResource::Info info;
info.endianness = endianness;
info.format = format;
command.index_buffer =
resource_cache()->FetchIndexBuffer(memory_range, info);
if (!command.index_buffer) {
return 1;
}
return 0;
}
int GraphicsDriver::PopulateState(DrawCommand& command) {
return 0;
}
int GraphicsDriver::PopulateConstantBuffers(DrawCommand& command) {
command.float4_constants.count = 512;
command.float4_constants.values =
&register_file_[XE_GPU_REG_SHADER_CONSTANT_000_X].f32;
command.loop_constants.count = 32;
command.loop_constants.values =
&register_file_[XE_GPU_REG_SHADER_CONSTANT_LOOP_00].u32;
command.bool_constants.count = 8;
command.bool_constants.values =
&register_file_[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].u32;
return 0;
}
int GraphicsDriver::PopulateShaders(DrawCommand& command) {
SCOPE_profile_cpu_f("gpu");
if (!vertex_shader_) {
XELOGE("No vertex shader bound; ignoring");
return 1;
}
if (!pixel_shader_) {
XELOGE("No pixel shader bound; ignoring");
return 1;
}
xe_gpu_program_cntl_t program_cntl;
program_cntl.dword_0 = register_file_[XE_GPU_REG_SQ_PROGRAM_CNTL].u32;
if (!vertex_shader_->is_prepared()) {
if (vertex_shader_->Prepare(program_cntl)) {
XELOGE("Unable to prepare vertex shader");
return 1;
}
}
if (!pixel_shader_->is_prepared()) {
if (pixel_shader_->Prepare(program_cntl, vertex_shader_)) {
XELOGE("Unable to prepare pixel shader");
return 1;
}
}
command.vertex_shader = vertex_shader_;
command.pixel_shader = pixel_shader_;
return 0;
}
int GraphicsDriver::PopulateInputAssembly(DrawCommand& command) {
SCOPE_profile_cpu_f("gpu");
const auto& buffer_inputs = command.vertex_shader->buffer_inputs();
command.vertex_buffer_count = buffer_inputs.count;
for (size_t n = 0; n < buffer_inputs.count; n++) {
const auto& desc = buffer_inputs.descs[n];
int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + (desc.fetch_slot / 3) * 6;
auto group = reinterpret_cast<xe_gpu_fetch_group_t*>(&register_file_.values[r]);
xe_gpu_vertex_fetch_t* fetch = nullptr;
switch (desc.fetch_slot % 3) {
case 0:
fetch = &group->vertex_fetch_0;
break;
case 1:
fetch = &group->vertex_fetch_1;
break;
case 2:
fetch = &group->vertex_fetch_2;
break;
}
XEASSERTNOTNULL(fetch);
// If this assert doesn't hold, maybe we just abort?
XEASSERT(fetch->type == 0x3);
XEASSERTNOTZERO(fetch->size);
const auto& info = desc.info;
MemoryRange memory_range;
memory_range.guest_base = (fetch->address << 2) + address_translation_;
memory_range.host_base = memory_->Translate(memory_range.guest_base);
memory_range.length = fetch->size * 4;
// TODO(benvanik): if the memory range is within the command buffer, we
// should use a cached transient buffer.
auto buffer = resource_cache()->FetchVertexBuffer(memory_range, info);
if (!buffer) {
XELOGE("Unable to create vertex fetch buffer");
return 1;
}
command.vertex_buffers[n].input_index = desc.input_index;
command.vertex_buffers[n].buffer = buffer;
command.vertex_buffers[n].stride = desc.info.stride_words * 4;
command.vertex_buffers[n].offset = 0;
}
return 0;
}
int GraphicsDriver::PopulateSamplers(DrawCommand& command) {
SCOPE_profile_cpu_f("gpu");
// Vertex texture samplers.
const auto& vertex_sampler_inputs = command.vertex_shader->sampler_inputs();
command.vertex_shader_sampler_count = vertex_sampler_inputs.count;
for (size_t i = 0; i < command.vertex_shader_sampler_count; ++i) {
if (PopulateSamplerSet(vertex_sampler_inputs.descs[i],
command.vertex_shader_samplers[i])) {
return 1;
}
}
// Pixel shader texture sampler.
const auto& pixel_sampler_inputs = command.pixel_shader->sampler_inputs();
command.pixel_shader_sampler_count = pixel_sampler_inputs.count;
for (size_t i = 0; i < command.pixel_shader_sampler_count; ++i) {
if (PopulateSamplerSet(pixel_sampler_inputs.descs[i],
command.pixel_shader_samplers[i])) {
return 1;
}
}
return 0;
}
int GraphicsDriver::PopulateSamplerSet(
const ShaderResource::SamplerDesc& src_input,
DrawCommand::SamplerInput& dst_input) {
int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + src_input.fetch_slot * 6;
const auto group = (const xe_gpu_fetch_group_t*)&register_file_.values[r];
const xenos::xe_gpu_texture_fetch_t& fetch = group->texture_fetch;
if (fetch.type != 0x2) {
return 0;
}
dst_input.input_index = src_input.input_index;
dst_input.texture = nullptr;
dst_input.sampler_state = nullptr;
TextureResource::Info info;
if (!TextureResource::Info::Prepare(fetch, info)) {
XELOGE("D3D11: unable to parse texture fetcher info");
return 0; // invalid texture used
}
if (info.format == DXGI_FORMAT_UNKNOWN) {
XELOGW("D3D11: unknown texture format %d", info.format);
return 0; // invalid texture used
}
// TODO(benvanik): quick validate without refetching intraframe.
// Fetch texture from the cache.
MemoryRange memory_range;
memory_range.guest_base = (fetch.address << 12) + address_translation_;
memory_range.host_base = memory_->Translate(memory_range.guest_base);
memory_range.length = info.input_length;
auto texture = resource_cache()->FetchTexture(memory_range, info);
if (!texture) {
XELOGW("D3D11: unable to fetch texture");
return 0; // invalid texture used
}
SamplerStateResource::Info sampler_info;
if (!SamplerStateResource::Info::Prepare(fetch,
src_input.tex_fetch,
sampler_info)) {
XELOGW("D3D11: unable to parse sampler info");
return 0; // invalid texture used
}
auto sampler_state = resource_cache()->FetchSamplerState(sampler_info);
if (!sampler_state) {
XELOGW("D3D11: unable to fetch sampler");
return 0; // invalid texture used
}
dst_input.texture = texture;
dst_input.sampler_state = sampler_state;
return 0;
}

View File

@ -11,7 +11,9 @@
#define XENIA_GPU_GRAPHICS_DRIVER_H_
#include <xenia/core.h>
#include <xenia/gpu/xenos/registers.h>
#include <xenia/gpu/draw_command.h>
#include <xenia/gpu/register_file.h>
#include <xenia/gpu/resource_cache.h>
#include <xenia/gpu/xenos/xenos.h>
@ -24,38 +26,45 @@ public:
virtual ~GraphicsDriver();
Memory* memory() const { return memory_; }
xenos::RegisterFile* register_file() { return &register_file_; };
virtual ResourceCache* resource_cache() const = 0;
RegisterFile* register_file() { return &register_file_; };
void set_address_translation(uint32_t value) {
address_translation_ = value;
}
virtual void Initialize() = 0;
virtual int Initialize() = 0;
virtual void InvalidateState(
uint32_t mask) = 0;
virtual void SetShader(
xenos::XE_GPU_SHADER_TYPE type,
uint32_t address,
uint32_t start,
uint32_t length) = 0;
virtual void DrawIndexBuffer(
xenos::XE_GPU_PRIMITIVE_TYPE prim_type,
bool index_32bit, uint32_t index_count,
uint32_t index_base, uint32_t index_size, uint32_t endianness) = 0;
//virtual void DrawIndexImmediate();
virtual void DrawIndexAuto(
xenos::XE_GPU_PRIMITIVE_TYPE prim_type,
uint32_t index_count) = 0;
int LoadShader(xenos::XE_GPU_SHADER_TYPE type,
uint32_t address, uint32_t length,
uint32_t start);
int PrepareDraw(DrawCommand& command);
int PrepareDrawIndexBuffer(DrawCommand& command,
uint32_t address, uint32_t length,
xenos::XE_GPU_ENDIAN endianness,
IndexFormat format);
virtual int Draw(const DrawCommand& command) = 0;
virtual int Resolve() = 0;
private:
int PopulateState(DrawCommand& command);
int PopulateConstantBuffers(DrawCommand& command);
int PopulateShaders(DrawCommand& command);
int PopulateInputAssembly(DrawCommand& command);
int PopulateSamplers(DrawCommand& command);
int PopulateSamplerSet(const ShaderResource::SamplerDesc& src_input,
DrawCommand::SamplerInput& dst_input);
protected:
GraphicsDriver(Memory* memory);
Memory* memory_;
xenos::RegisterFile register_file_;
RegisterFile register_file_;
uint32_t address_translation_;
VertexShaderResource* vertex_shader_;
PixelShaderResource* pixel_shader_;
};

View File

@ -11,9 +11,10 @@
#include <xenia/emulator.h>
#include <xenia/cpu/processor.h>
#include <xenia/gpu/command_processor.h>
#include <xenia/gpu/gpu-private.h>
#include <xenia/gpu/graphics_driver.h>
#include <xenia/gpu/ring_buffer_worker.h>
#include <xenia/gpu/xenos/registers.h>
#include <xenia/gpu/register_file.h>
using namespace xe;
@ -24,10 +25,10 @@ using namespace xe::gpu::xenos;
GraphicsSystem::GraphicsSystem(Emulator* emulator) :
emulator_(emulator), memory_(emulator->memory()),
thread_(0), running_(false), driver_(0), worker_(0),
thread_(nullptr), running_(false), driver_(nullptr),
command_processor_(nullptr),
interrupt_callback_(0), interrupt_callback_data_(0),
last_interrupt_time_(0), swap_pending_(false),
thread_wait_(NULL) {
last_interrupt_time_(0), swap_pending_(false), thread_wait_(nullptr) {
// Create the run loop used for any windows/etc.
// This must be done on the thread we create the driver.
run_loop_ = xe_run_loop_create();
@ -42,7 +43,7 @@ X_STATUS GraphicsSystem::Setup() {
processor_ = emulator_->processor();
// Create worker.
worker_ = new RingBufferWorker(this, memory_);
command_processor_ = new CommandProcessor(this, memory_);
// Let the processor know we want register access callbacks.
emulator_->memory()->AddMappedRange(
@ -77,15 +78,18 @@ void GraphicsSystem::ThreadStart() {
// Main run loop.
while (running_) {
// Peek main run loop.
{
SCOPE_profile_cpu_i("gpu", "GraphicsSystemRunLoopPump");
if (xe_run_loop_pump(run_loop)) {
break;
}
}
if (!running_) {
break;
}
// Pump worker.
worker_->Pump();
command_processor_->Pump();
if (!running_) {
break;
@ -107,7 +111,7 @@ void GraphicsSystem::Shutdown() {
xe_thread_join(thread_);
xe_thread_release(thread_);
delete worker_;
delete command_processor_;
xe_run_loop_release(run_loop_);
}
@ -125,17 +129,19 @@ void GraphicsSystem::InitializeRingBuffer(uint32_t ptr, uint32_t page_count) {
Sleep(0);
}
XEASSERTNOTNULL(driver_);
worker_->Initialize(driver_, ptr, page_count);
command_processor_->Initialize(driver_, ptr, page_count);
}
void GraphicsSystem::EnableReadPointerWriteBack(uint32_t ptr,
uint32_t block_size) {
worker_->EnableReadPointerWriteBack(ptr, block_size);
command_processor_->EnableReadPointerWriteBack(ptr, block_size);
}
uint64_t GraphicsSystem::ReadRegister(uint64_t addr) {
uint32_t r = addr & 0xFFFF;
if (FLAGS_trace_ring_buffer) {
XELOGGPU("ReadRegister(%.4X)", r);
}
RegisterFile* regs = driver_->register_file();
@ -148,31 +154,33 @@ uint64_t GraphicsSystem::ReadRegister(uint64_t addr) {
return 1;
}
XEASSERT(r >= 0 && r < kXEGpuRegisterCount);
XEASSERT(r >= 0 && r < RegisterFile::kRegisterCount);
return regs->values[r].u32;
}
void GraphicsSystem::WriteRegister(uint64_t addr, uint64_t value) {
uint32_t r = addr & 0xFFFF;
if (FLAGS_trace_ring_buffer) {
XELOGGPU("WriteRegister(%.4X, %.8X)", r, value);
}
RegisterFile* regs = driver_->register_file();
switch (r) {
case 0x0714: // CP_RB_WPTR
worker_->UpdateWritePointer((uint32_t)value);
command_processor_->UpdateWritePointer((uint32_t)value);
break;
default:
XELOGW("Unknown GPU register %.4X write: %.8X", r, value);
break;
}
XEASSERT(r >= 0 && r < kXEGpuRegisterCount);
XEASSERT(r >= 0 && r < RegisterFile::kRegisterCount);
regs->values[r].u32 = (uint32_t)value;
}
void GraphicsSystem::MarkVblank() {
worker_->increment_counter();
command_processor_->increment_counter();
}
void GraphicsSystem::DispatchInterruptCallback(

View File

@ -21,8 +21,8 @@ XEDECLARECLASS2(xe, cpu, Processor);
namespace xe {
namespace gpu {
class CommandProcessor;
class GraphicsDriver;
class RingBufferWorker;
class GraphicsSystem {
@ -78,7 +78,7 @@ protected:
bool running_;
GraphicsDriver* driver_;
RingBufferWorker* worker_;
CommandProcessor* command_processor_;
uint32_t interrupt_callback_;
uint32_t interrupt_callback_data_;

View File

@ -10,7 +10,6 @@
#include <xenia/gpu/nop/nop_graphics_driver.h>
#include <xenia/gpu/gpu-private.h>
#include <xenia/gpu/shader_cache.h>
using namespace xe;
@ -19,69 +18,19 @@ using namespace xe::gpu::nop;
using namespace xe::gpu::xenos;
NopGraphicsDriver::NopGraphicsDriver(Memory* memory) :
GraphicsDriver(memory) {
shader_cache_ = new ShaderCache();
NopGraphicsDriver::NopGraphicsDriver(Memory* memory)
: GraphicsDriver(memory), resource_cache_(nullptr) {
}
NopGraphicsDriver::~NopGraphicsDriver() {
delete shader_cache_;
}
void NopGraphicsDriver::Initialize() {
int NopGraphicsDriver::Initialize() {
return 0;
}
void NopGraphicsDriver::InvalidateState(
uint32_t mask) {
if (mask == XE_GPU_INVALIDATE_MASK_ALL) {
XELOGGPU("NOP: (invalidate all)");
}
if (mask & XE_GPU_INVALIDATE_MASK_VERTEX_SHADER) {
XELOGGPU("NOP: invalidate vertex shader");
}
if (mask & XE_GPU_INVALIDATE_MASK_PIXEL_SHADER) {
XELOGGPU("NOP: invalidate pixel shader");
}
}
void NopGraphicsDriver::SetShader(
XE_GPU_SHADER_TYPE type,
uint32_t address,
uint32_t start,
uint32_t length) {
// Find or create shader in the cache.
uint8_t* p = memory_->Translate(address);
Shader* shader = shader_cache_->FindOrCreate(
type, p, length);
// Disassemble.
const char* source = shader->disasm_src();
if (!source) {
source = "<failed to disassemble>";
}
XELOGGPU("NOP: set shader %d at %0.8X (%db):\n%s",
type, address, length, source);
}
void NopGraphicsDriver::DrawIndexBuffer(
XE_GPU_PRIMITIVE_TYPE prim_type,
bool index_32bit, uint32_t index_count,
uint32_t index_base, uint32_t index_size, uint32_t endianness) {
XELOGGPU("NOP: draw index buffer");
}
void NopGraphicsDriver::DrawIndexAuto(
XE_GPU_PRIMITIVE_TYPE prim_type,
uint32_t index_count) {
XELOGGPU("NOP: draw indexed %d (%d indicies)",
prim_type, index_count);
// TODO(benvanik):
// program control
// context misc
// interpolator control
// shader constants / bools / integers
// fetch constants
int NopGraphicsDriver::Draw(const DrawCommand& command) {
return 0;
}
int NopGraphicsDriver::Resolve() {

View File

@ -19,9 +19,6 @@
namespace xe {
namespace gpu {
class ShaderCache;
namespace nop {
@ -30,27 +27,16 @@ public:
NopGraphicsDriver(Memory* memory);
virtual ~NopGraphicsDriver();
virtual void Initialize();
ResourceCache* resource_cache() const override { return resource_cache_; }
virtual void InvalidateState(
uint32_t mask);
virtual void SetShader(
xenos::XE_GPU_SHADER_TYPE type,
uint32_t address,
uint32_t start,
uint32_t length);
virtual void DrawIndexBuffer(
xenos::XE_GPU_PRIMITIVE_TYPE prim_type,
bool index_32bit, uint32_t index_count,
uint32_t index_base, uint32_t index_size, uint32_t endianness);
virtual void DrawIndexAuto(
xenos::XE_GPU_PRIMITIVE_TYPE prim_type,
uint32_t index_count);
int Initialize() override;
virtual int Resolve();
int Draw(const DrawCommand& command) override;
int Resolve() override;
protected:
ShaderCache* shader_cache_;
ResourceCache* resource_cache_;
};

View File

@ -10,8 +10,21 @@
#include <xenia/gpu/register_file.h>
using namespace std;
using namespace xe;
using namespace xe::gpu;
using namespace xe::gpu::xenos;
RegisterFile::RegisterFile() {
xe_zero_struct(values, sizeof(values));
}
const char* RegisterFile::GetRegisterName(uint32_t index) {
switch (index) {
#define XE_GPU_REGISTER(index, type, name) \
case index: return #name;
#include <xenia/gpu/xenos/register_table.inc>
#undef XE_GPU_REGISTER
default:
return NULL;
}
}

View File

@ -11,15 +11,36 @@
#define XENIA_GPU_REGISTER_FILE_H_
#include <xenia/core.h>
#include <xenia/gpu/xenos/xenos.h>
namespace xe {
namespace gpu {
enum Register {
#define XE_GPU_REGISTER(index, type, name) \
XE_GPU_REG_##name = index,
#include <xenia/gpu/xenos/register_table.inc>
#undef XE_GPU_REGISTER
};
class RegisterFile {
public:
RegisterFile();
const char* GetRegisterName(uint32_t index);
static const size_t kRegisterCount = 0x5003;
union RegisterValue {
uint32_t u32;
float f32;
};
RegisterValue values[kRegisterCount];
RegisterValue& operator[](Register reg) {
return values[reg];
}
};

View File

@ -15,3 +15,23 @@ using namespace xe;
using namespace xe::gpu;
using namespace xe::gpu::xenos;
HashedResource::HashedResource(const MemoryRange& memory_range)
: memory_range_(memory_range) {
}
HashedResource::~HashedResource() = default;
PagedResource::PagedResource(const MemoryRange& memory_range)
: memory_range_(memory_range), dirtied_(true) {
}
PagedResource::~PagedResource() = default;
void PagedResource::MarkDirty(uint32_t lo_address, uint32_t hi_address) {
dirtied_ = true;
}
StaticResource::StaticResource() = default;
StaticResource::~StaticResource() = default;

View File

@ -18,8 +18,82 @@ namespace xe {
namespace gpu {
struct MemoryRange {
uint8_t* host_base;
uint32_t guest_base;
uint32_t length;
MemoryRange() : host_base(nullptr), guest_base(0), length(0) {}
MemoryRange(const MemoryRange& other)
: host_base(other.host_base), guest_base(other.guest_base),
length(other.length) {}
MemoryRange(uint8_t* _host_base, uint32_t _guest_base, uint32_t _length)
: host_base(_host_base), guest_base(_guest_base), length(_length) {}
};
class Resource {
public:
virtual ~Resource() = default;
virtual void* handle() const = 0;
template <typename T>
T* handle_as() {
return reinterpret_cast<T*>(handle());
}
protected:
Resource() = default;
// last use/LRU stuff
};
class HashedResource : public Resource {
public:
~HashedResource() override;
const MemoryRange& memory_range() const { return memory_range_; }
protected:
HashedResource(const MemoryRange& memory_range);
MemoryRange memory_range_;
// key
};
class PagedResource : public Resource {
public:
~PagedResource() override;
const MemoryRange& memory_range() const { return memory_range_; }
template <typename T>
bool Equals(const T& info) {
return Equals(&info, sizeof(info));
}
virtual bool Equals(const void* info_ptr, size_t info_length) = 0;
bool is_dirty() const { return dirtied_; }
void MarkDirty(uint32_t lo_address, uint32_t hi_address);
protected:
PagedResource(const MemoryRange& memory_range);
MemoryRange memory_range_;
bool dirtied_;
// dirtied pages list
};
class StaticResource : public Resource {
public:
~StaticResource() override;
protected:
StaticResource();
};

View File

@ -15,3 +15,140 @@ using namespace xe;
using namespace xe::gpu;
using namespace xe::gpu::xenos;
ResourceCache::ResourceCache(Memory* memory)
: memory_(memory) {
}
ResourceCache::~ResourceCache() {
for (auto it = resources_.begin(); it != resources_.end(); ++it) {
Resource* resource = *it;
delete resource;
}
resources_.clear();
}
VertexShaderResource* ResourceCache::FetchVertexShader(
const MemoryRange& memory_range,
const VertexShaderResource::Info& info) {
return FetchHashedResource<VertexShaderResource>(
memory_range, info, &ResourceCache::CreateVertexShader);
}
PixelShaderResource* ResourceCache::FetchPixelShader(
const MemoryRange& memory_range,
const PixelShaderResource::Info& info) {
return FetchHashedResource<PixelShaderResource>(
memory_range, info, &ResourceCache::CreatePixelShader);
}
TextureResource* ResourceCache::FetchTexture(
const MemoryRange& memory_range,
const TextureResource::Info& info) {
auto resource = FetchPagedResource<TextureResource>(
memory_range, info, &ResourceCache::CreateTexture);
if (!resource) {
return nullptr;
}
if (resource->Prepare()) {
XELOGE("Unable to prepare texture");
return nullptr;
}
return resource;
}
SamplerStateResource* ResourceCache::FetchSamplerState(
const SamplerStateResource::Info& info) {
auto key = info.hash();
auto it = static_resources_.find(key);
if (it != static_resources_.end()) {
return static_cast<SamplerStateResource*>(it->second);
}
auto resource = CreateSamplerState(info);
if (resource->Prepare()) {
XELOGE("Unable to prepare sampler state");
return nullptr;
}
static_resources_.insert({ key, resource });
resources_.push_back(resource);
return resource;
}
IndexBufferResource* ResourceCache::FetchIndexBuffer(
const MemoryRange& memory_range,
const IndexBufferResource::Info& info) {
auto resource = FetchPagedResource<IndexBufferResource>(
memory_range, info, &ResourceCache::CreateIndexBuffer);
if (!resource) {
return nullptr;
}
if (resource->Prepare()) {
XELOGE("Unable to prepare index buffer");
return nullptr;
}
return resource;
}
VertexBufferResource* ResourceCache::FetchVertexBuffer(
const MemoryRange& memory_range,
const VertexBufferResource::Info& info) {
auto resource = FetchPagedResource<VertexBufferResource>(
memory_range, info, &ResourceCache::CreateVertexBuffer);
if (!resource) {
return nullptr;
}
if (resource->Prepare()) {
XELOGE("Unable to prepare vertex buffer");
return nullptr;
}
return resource;
}
uint64_t ResourceCache::HashRange(const MemoryRange& memory_range) {
// We could do something smarter here to potentially early exit.
return xe_hash64(memory_range.host_base, memory_range.length);
}
void ResourceCache::SyncRange(uint32_t address, int length) {
// Scan the page table in sync with our resource list. This means
// we have O(n) complexity for updates, though we could definitely
// make this faster/cleaner.
// TODO(benvanik): actually do this right.
// For now we assume the page table in the range of our resources
// will not be changing, which allows us to do a foreach(res) and reload
// and then clear the table.
// total bytes = (512 * 1024 * 1024) / (16 * 1024) = 32768
// each byte = 1 page
// Walk as qwords so we can clear things up faster.
uint64_t* page_table = reinterpret_cast<uint64_t*>(
memory_->Translate(memory_->page_table()));
int page_size = 16 * 1024; // 16KB pages
uint32_t lo_address = address % 0x20000000;
uint32_t hi_address = lo_address + length;
hi_address = (hi_address / page_size) * page_size + page_size;
int start_page = lo_address / page_size;
int end_page = hi_address / page_size;
auto it = paged_resources_.upper_bound(lo_address);
auto end_it = paged_resources_.lower_bound(hi_address);
while (it != end_it) {
const auto& memory_range = it->second->memory_range();
int lo_page = (memory_range.guest_base % 0x20000000) / page_size;
int hi_page = lo_page + (memory_range.length / page_size);
for (int i = lo_page / 8; i <= hi_page / 8; ++i) {
uint64_t page_flags = page_table[i];
if (page_flags) {
// Dirty!
it->second->MarkDirty(i * 8 * page_size, (i * 8 + 7) * page_size);
}
}
++it;
}
// Reset page table.
for (auto i = start_page / 8; i <= end_page / 8; ++i) {
page_table[i] = 0;
}
}

View File

@ -10,7 +10,14 @@
#ifndef XENIA_GPU_RESOURCE_CACHE_H_
#define XENIA_GPU_RESOURCE_CACHE_H_
#include <map>
#include <xenia/core.h>
#include <xenia/gpu/buffer_resource.h>
#include <xenia/gpu/resource.h>
#include <xenia/gpu/sampler_state_resource.h>
#include <xenia/gpu/shader_resource.h>
#include <xenia/gpu/texture_resource.h>
#include <xenia/gpu/xenos/xenos.h>
@ -20,6 +27,96 @@ namespace gpu {
class ResourceCache {
public:
virtual ~ResourceCache();
VertexShaderResource* FetchVertexShader(
const MemoryRange& memory_range,
const VertexShaderResource::Info& info);
PixelShaderResource* FetchPixelShader(
const MemoryRange& memory_range,
const PixelShaderResource::Info& info);
TextureResource* FetchTexture(
const MemoryRange& memory_range,
const TextureResource::Info& info);
SamplerStateResource* FetchSamplerState(
const SamplerStateResource::Info& info);
IndexBufferResource* FetchIndexBuffer(
const MemoryRange& memory_range,
const IndexBufferResource::Info& info);
VertexBufferResource* FetchVertexBuffer(
const MemoryRange& memory_range,
const VertexBufferResource::Info& info);
uint64_t HashRange(const MemoryRange& memory_range);
void SyncRange(uint32_t address, int length);
protected:
ResourceCache(Memory* memory);
template <typename T, typename V>
T* FetchHashedResource(const MemoryRange& memory_range,
const typename T::Info& info,
const V& factory) {
// TODO(benvanik): if there's no way it's changed and it's been checked,
// just lookup. This way we don't rehash 100x a frame.
auto key = HashRange(memory_range);
auto it = hashed_resources_.find(key);
if (it != hashed_resources_.end()) {
return static_cast<T*>(it->second);
}
auto resource = (this->*factory)(memory_range, info);
hashed_resources_.insert({ key, resource });
resources_.push_back(resource);
return resource;
}
template <typename T, typename V>
T* FetchPagedResource(const MemoryRange& memory_range,
const typename T::Info& info,
const V& factory) {
uint32_t lo_address = memory_range.guest_base % 0x20000000;
auto key = uint64_t(lo_address);
auto range = paged_resources_.equal_range(key);
for (auto it = range.first; it != range.second; ++it) {
if (it->second->memory_range().length == memory_range.length &&
it->second->Equals(info)) {
return static_cast<T*>(it->second);
}
}
auto resource = (this->*factory)(memory_range, info);
paged_resources_.insert({ key, resource });
resources_.push_back(resource);
return resource;
}
virtual VertexShaderResource* CreateVertexShader(
const MemoryRange& memory_range,
const VertexShaderResource::Info& info) = 0;
virtual PixelShaderResource* CreatePixelShader(
const MemoryRange& memory_range,
const PixelShaderResource::Info& info) = 0;
virtual TextureResource* CreateTexture(
const MemoryRange& memory_range,
const TextureResource::Info& info) = 0;
virtual SamplerStateResource* CreateSamplerState(
const SamplerStateResource::Info& info) = 0;
virtual IndexBufferResource* CreateIndexBuffer(
const MemoryRange& memory_range,
const IndexBufferResource::Info& info) = 0;
virtual VertexBufferResource* CreateVertexBuffer(
const MemoryRange& memory_range,
const VertexBufferResource::Info& info) = 0;
private:
Memory* memory_;
std::vector<Resource*> resources_;
std::unordered_map<uint64_t, HashedResource*> hashed_resources_;
std::unordered_map<uint64_t, StaticResource*> static_resources_;
std::multimap<uint64_t, PagedResource*> paged_resources_;
};

View File

@ -1,741 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2013 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include <xenia/gpu/ring_buffer_worker.h>
#include <xenia/gpu/gpu-private.h>
#include <xenia/gpu/graphics_driver.h>
#include <xenia/gpu/graphics_system.h>
#include <xenia/gpu/xenos/packets.h>
#include <xenia/gpu/xenos/registers.h>
using namespace xe;
using namespace xe::gpu;
using namespace xe::gpu::xenos;
#define XETRACERB(fmt, ...) if (FLAGS_trace_ring_buffer) XELOGGPU(fmt, ##__VA_ARGS__)
RingBufferWorker::RingBufferWorker(
GraphicsSystem* graphics_system, Memory* memory) :
graphics_system_(graphics_system), memory_(memory), driver_(0) {
write_ptr_index_event_ = CreateEvent(
NULL, FALSE, FALSE, NULL);
primary_buffer_ptr_ = 0;
primary_buffer_size_ = 0;
read_ptr_index_ = 0;
read_ptr_update_freq_ = 0;
read_ptr_writeback_ptr_ = 0;
write_ptr_index_ = 0;
write_ptr_max_index_ = 0;
LARGE_INTEGER perf_counter;
QueryPerformanceCounter(&perf_counter);
time_base_ = perf_counter.QuadPart;
counter_ = 0;
}
RingBufferWorker::~RingBufferWorker() {
SetEvent(write_ptr_index_event_);
CloseHandle(write_ptr_index_event_);
}
uint64_t RingBufferWorker::QueryTime() {
LARGE_INTEGER perf_counter;
QueryPerformanceCounter(&perf_counter);
return perf_counter.QuadPart - time_base_;
}
void RingBufferWorker::Initialize(GraphicsDriver* driver,
uint32_t ptr, uint32_t page_count) {
driver_ = driver;
primary_buffer_ptr_ = ptr;
// Not sure this is correct, but it's a way to take the page_count back to
// the number of bytes allocated by the physical alloc.
uint32_t original_size = 1 << (0x1C - page_count - 1);
primary_buffer_size_ = original_size;
read_ptr_index_ = 0;
// Tell the driver what to use for translation.
driver_->set_address_translation(primary_buffer_ptr_ & ~0x1FFFFFFF);
}
void RingBufferWorker::EnableReadPointerWriteBack(uint32_t ptr,
uint32_t block_size) {
// CP_RB_RPTR_ADDR Ring Buffer Read Pointer Address 0x70C
// ptr = RB_RPTR_ADDR, pointer to write back the address to.
read_ptr_writeback_ptr_ = (primary_buffer_ptr_ & ~0x1FFFFFFF) + ptr;
// CP_RB_CNTL Ring Buffer Control 0x704
// block_size = RB_BLKSZ, number of quadwords read between updates of the
// read pointer.
read_ptr_update_freq_ = (uint32_t)pow(2.0, (double)block_size) / 4;
}
void RingBufferWorker::UpdateWritePointer(uint32_t value) {
write_ptr_max_index_ = MAX(write_ptr_max_index_, value);
write_ptr_index_ = value;
SetEvent(write_ptr_index_event_);
}
void RingBufferWorker::Pump() {
uint8_t* p = memory_->membase();
if (write_ptr_index_ == 0xBAADF00D ||
read_ptr_index_ == write_ptr_index_) {
// Check if the pointer has moved.
// We wait a short bit here to yield time. Since we are also running the
// main window display we don't want to pause too long, though.
const int wait_time_ms = 1;
if (WaitForSingleObject(write_ptr_index_event_,
wait_time_ms) == WAIT_TIMEOUT) {
return;
}
}
// Bring local so we don't have to worry about them changing out from under
// us.
uint32_t write_ptr_index = write_ptr_index_;
uint32_t write_ptr_max_index = write_ptr_max_index_;
if (read_ptr_index_ == write_ptr_index) {
return;
}
// Process the new commands.
XETRACERB("Ring buffer thread work");
// Execute. Note that we handle wraparound transparently.
ExecutePrimaryBuffer(read_ptr_index_, write_ptr_index);
read_ptr_index_ = write_ptr_index;
// TODO(benvanik): use read_ptr_update_freq_ and only issue after moving
// that many indices.
if (read_ptr_writeback_ptr_) {
XESETUINT32BE(p + read_ptr_writeback_ptr_, read_ptr_index_);
}
}
void RingBufferWorker::ExecutePrimaryBuffer(
uint32_t start_index, uint32_t end_index) {
SCOPE_profile_cpu_f("gpu");
// Adjust pointer base.
uint32_t ptr = primary_buffer_ptr_ + start_index * 4;
ptr = (primary_buffer_ptr_ & ~0x1FFFFFFF) | (ptr & 0x1FFFFFFF);
uint32_t end_ptr = primary_buffer_ptr_ + end_index * 4;
end_ptr = (primary_buffer_ptr_ & ~0x1FFFFFFF) | (end_ptr & 0x1FFFFFFF);
XETRACERB("[%.8X] ExecutePrimaryBuffer(%dw -> %dw)",
ptr, start_index, end_index);
// Execute commands!
PacketArgs args;
args.ptr = ptr;
args.base_ptr = primary_buffer_ptr_;
args.max_address = primary_buffer_ptr_ + primary_buffer_size_ * 4;
args.ptr_mask = (primary_buffer_size_ / 4) - 1;
uint32_t n = 0;
while (args.ptr != end_ptr) {
n += ExecutePacket(args);
}
if (end_index > start_index) {
XEASSERT(n == (end_index - start_index));
}
XETRACERB(" ExecutePrimaryBuffer End");
}
void RingBufferWorker::ExecuteIndirectBuffer(uint32_t ptr, uint32_t length) {
XETRACERB("[%.8X] ExecuteIndirectBuffer(%dw)", ptr, length);
// Execute commands!
PacketArgs args;
args.ptr = ptr;
args.base_ptr = ptr;
args.max_address = ptr + length * 4;
args.ptr_mask = 0;
for (uint32_t n = 0; n < length;) {
n += ExecutePacket(args);
XEASSERT(n <= length);
}
XETRACERB(" ExecuteIndirectBuffer End");
}
#define LOG_DATA(count) \
for (uint32_t __m = 0; __m < count; __m++) { \
XETRACERB("[%.8X] %.8X", \
packet_ptr + (1 + __m) * 4, \
XEGETUINT32BE(packet_base + 1 * 4 + __m * 4)); \
}
void RingBufferWorker::AdvancePtr(PacketArgs& args, uint32_t n) {
args.ptr = args.ptr + n * 4;
if (args.ptr_mask) {
args.ptr =
args.base_ptr + (((args.ptr - args.base_ptr) / 4) & args.ptr_mask) * 4;
}
}
#define ADVANCE_PTR(n) AdvancePtr(args, n)
#define PEEK_PTR() \
XEGETUINT32BE(p + args.ptr)
#define READ_PTR() \
XEGETUINT32BE(p + args.ptr); ADVANCE_PTR(1);
uint32_t RingBufferWorker::ExecutePacket(PacketArgs& args) {
uint8_t* p = memory_->membase();
RegisterFile* regs = driver_->register_file();
uint32_t packet_ptr = args.ptr;
const uint8_t* packet_base = p + packet_ptr;
const uint32_t packet = PEEK_PTR();
ADVANCE_PTR(1);
const uint32_t packet_type = packet >> 30;
if (packet == 0) {
XETRACERB("[%.8X] Packet(%.8X): 0?",
packet_ptr, packet);
return 1;
}
switch (packet_type) {
case 0x00:
{
// Type-0 packet.
// Write count registers in sequence to the registers starting at
// (base_index << 2).
XETRACERB("[%.8X] Packet(%.8X): set registers:",
packet_ptr, packet);
uint32_t count = ((packet >> 16) & 0x3FFF) + 1;
uint32_t base_index = (packet & 0x7FFF);
uint32_t write_one_reg = (packet >> 15) & 0x1;
for (uint32_t m = 0; m < count; m++) {
uint32_t reg_data = PEEK_PTR();
uint32_t target_index = write_one_reg ? base_index : base_index + m;
const char* reg_name = xenos::GetRegisterName(target_index);
XETRACERB("[%.8X] %.8X -> %.4X %s",
args.ptr,
reg_data, target_index, reg_name ? reg_name : "");
ADVANCE_PTR(1);
WriteRegister(packet_ptr, target_index, reg_data);
}
return 1 + count;
}
break;
case 0x01:
{
// Type-1 packet.
// Contains two registers of data. Type-0 should be more common.
XETRACERB("[%.8X] Packet(%.8X): set registers:",
packet_ptr, packet);
uint32_t reg_index_1 = packet & 0x7FF;
uint32_t reg_index_2 = (packet >> 11) & 0x7FF;
uint32_t reg_ptr_1 = args.ptr;
uint32_t reg_data_1 = READ_PTR();
uint32_t reg_ptr_2 = args.ptr;
uint32_t reg_data_2 = READ_PTR();
const char* reg_name_1 = xenos::GetRegisterName(reg_index_1);
const char* reg_name_2 = xenos::GetRegisterName(reg_index_2);
XETRACERB("[%.8X] %.8X -> %.4X %s",
reg_ptr_1,
reg_data_1, reg_index_1, reg_name_1 ? reg_name_1 : "");
XETRACERB("[%.8X] %.8X -> %.4X %s",
reg_ptr_2,
reg_data_2, reg_index_2, reg_name_2 ? reg_name_2 : "");
WriteRegister(packet_ptr, reg_index_1, reg_data_1);
WriteRegister(packet_ptr, reg_index_2, reg_data_2);
return 1 + 2;
}
break;
case 0x02:
// Type-2 packet.
// No-op. Do nothing.
XETRACERB("[%.8X] Packet(%.8X): padding",
packet_ptr, packet);
return 1;
case 0x03:
{
// Type-3 packet.
uint32_t count = ((packet >> 16) & 0x3FFF) + 1;
uint32_t opcode = (packet >> 8) & 0x7F;
// & 1 == predicate, maybe?
switch (opcode) {
case PM4_ME_INIT:
// initialize CP's micro-engine
XETRACERB("[%.8X] Packet(%.8X): PM4_ME_INIT",
packet_ptr, packet);
LOG_DATA(count);
ADVANCE_PTR(count);
break;
case PM4_NOP:
// skip N 32-bit words to get to the next packet
// No-op, ignore some data.
XETRACERB("[%.8X] Packet(%.8X): PM4_NOP",
packet_ptr, packet);
LOG_DATA(count);
ADVANCE_PTR(count);
break;
case PM4_INTERRUPT:
// generate interrupt from the command stream
{
XETRACERB("[%.8X] Packet(%.8X): PM4_INTERRUPT",
packet_ptr, packet);
LOG_DATA(count);
uint32_t cpu_mask = READ_PTR();
for (int n = 0; n < 6; n++) {
if (cpu_mask & (1 << n)) {
graphics_system_->DispatchInterruptCallback(1, n);
}
}
}
break;
case PM4_INDIRECT_BUFFER:
// indirect buffer dispatch
{
uint32_t list_ptr = READ_PTR();
uint32_t list_length = READ_PTR();
XETRACERB("[%.8X] Packet(%.8X): PM4_INDIRECT_BUFFER %.8X (%dw)",
packet_ptr, packet, list_ptr, list_length);
ExecuteIndirectBuffer(GpuToCpu(list_ptr), list_length);
}
break;
case PM4_WAIT_REG_MEM:
// wait until a register or memory location is a specific value
{
XETRACERB("[%.8X] Packet(%.8X): PM4_WAIT_REG_MEM",
packet_ptr, packet);
LOG_DATA(count);
uint32_t wait_info = READ_PTR();
uint32_t poll_reg_addr = READ_PTR();
uint32_t ref = READ_PTR();
uint32_t mask = READ_PTR();
uint32_t wait = READ_PTR();
bool matched = false;
do {
uint32_t value;
if (wait_info & 0x10) {
// Memory.
XE_GPU_ENDIAN endianness = (XE_GPU_ENDIAN)(poll_reg_addr & 0x3);
poll_reg_addr &= ~0x3;
value = XEGETUINT32LE(p + GpuToCpu(packet_ptr, poll_reg_addr));
value = GpuSwap(value, endianness);
} else {
// Register.
XEASSERT(poll_reg_addr < kXEGpuRegisterCount);
value = regs->values[poll_reg_addr].u32;
}
switch (wait_info & 0x7) {
case 0x0: // Never.
matched = false;
break;
case 0x1: // Less than reference.
matched = (value & mask) < ref;
break;
case 0x2: // Less than or equal to reference.
matched = (value & mask) <= ref;
break;
case 0x3: // Equal to reference.
matched = (value & mask) == ref;
break;
case 0x4: // Not equal to reference.
matched = (value & mask) != ref;
break;
case 0x5: // Greater than or equal to reference.
matched = (value & mask) >= ref;
break;
case 0x6: // Greater than reference.
matched = (value & mask) > ref;
break;
case 0x7: // Always
matched = true;
break;
}
if (!matched) {
// Wait.
if (wait >= 0x100) {
Sleep(wait / 0x100);
} else {
SwitchToThread();
}
}
} while (!matched);
}
break;
case PM4_REG_RMW:
// register read/modify/write
// ? (used during shader upload and edram setup)
{
XETRACERB("[%.8X] Packet(%.8X): PM4_REG_RMW",
packet_ptr, packet);
LOG_DATA(count);
uint32_t rmw_info = READ_PTR();
uint32_t and_mask = READ_PTR();
uint32_t or_mask = READ_PTR();
uint32_t value = regs->values[rmw_info & 0x1FFF].u32;
if ((rmw_info >> 30) & 0x1) {
// | reg
value |= regs->values[or_mask & 0x1FFF].u32;
} else {
// | imm
value |= or_mask;
}
if ((rmw_info >> 31) & 0x1) {
// & reg
value &= regs->values[and_mask & 0x1FFF].u32;
} else {
// & imm
value &= and_mask;
}
WriteRegister(packet_ptr, rmw_info & 0x1FFF, value);
}
break;
case PM4_COND_WRITE:
// conditional write to memory or register
{
XETRACERB("[%.8X] Packet(%.8X): PM4_COND_WRITE",
packet_ptr, packet);
LOG_DATA(count);
uint32_t wait_info = READ_PTR();
uint32_t poll_reg_addr = READ_PTR();
uint32_t ref = READ_PTR();
uint32_t mask = READ_PTR();
uint32_t write_reg_addr = READ_PTR();
uint32_t write_data = READ_PTR();
uint32_t value;
if (wait_info & 0x10) {
// Memory.
XE_GPU_ENDIAN endianness = (XE_GPU_ENDIAN)(poll_reg_addr & 0x3);
poll_reg_addr &= ~0x3;
value = XEGETUINT32LE(p + GpuToCpu(packet_ptr, poll_reg_addr));
value = GpuSwap(value, endianness);
} else {
// Register.
XEASSERT(poll_reg_addr < kXEGpuRegisterCount);
value = regs->values[poll_reg_addr].u32;
}
bool matched = false;
switch (wait_info & 0x7) {
case 0x0: // Never.
matched = false;
break;
case 0x1: // Less than reference.
matched = (value & mask) < ref;
break;
case 0x2: // Less than or equal to reference.
matched = (value & mask) <= ref;
break;
case 0x3: // Equal to reference.
matched = (value & mask) == ref;
break;
case 0x4: // Not equal to reference.
matched = (value & mask) != ref;
break;
case 0x5: // Greater than or equal to reference.
matched = (value & mask) >= ref;
break;
case 0x6: // Greater than reference.
matched = (value & mask) > ref;
break;
case 0x7: // Always
matched = true;
break;
}
if (matched) {
// Write.
if (wait_info & 0x100) {
// Memory.
XE_GPU_ENDIAN endianness = (XE_GPU_ENDIAN)(write_reg_addr & 0x3);
write_reg_addr &= ~0x3;
write_data = GpuSwap(write_data, endianness);
XESETUINT32LE(p + GpuToCpu(packet_ptr, write_reg_addr),
write_data);
} else {
// Register.
WriteRegister(packet_ptr, write_reg_addr, write_data);
}
}
}
break;
case PM4_EVENT_WRITE:
// generate an event that creates a write to memory when completed
{
XETRACERB("[%.8X] Packet(%.8X): PM4_EVENT_WRITE (unimplemented!)",
packet_ptr, packet);
LOG_DATA(count);
uint32_t initiator = READ_PTR();
if (count == 1) {
// Just an event flag? Where does this write?
} else {
// Write to an address.
XEASSERTALWAYS();
ADVANCE_PTR(count - 1);
}
}
break;
case PM4_EVENT_WRITE_SHD:
// generate a VS|PS_done event
{
XETRACERB("[%.8X] Packet(%.8X): PM4_EVENT_WRITE_SHD",
packet_ptr, packet);
LOG_DATA(count);
uint32_t initiator = READ_PTR();
uint32_t address = READ_PTR();
uint32_t value = READ_PTR();
// Writeback initiator.
WriteRegister(packet_ptr, XE_GPU_REG_VGT_EVENT_INITIATOR,
initiator & 0x1F);
uint32_t data_value;
if ((initiator >> 31) & 0x1) {
// Write counter (GPU vblank counter?).
data_value = counter_;
} else {
// Write value.
data_value = value;
}
XE_GPU_ENDIAN endianness = (XE_GPU_ENDIAN)(address & 0x3);
address &= ~0x3;
data_value = GpuSwap(data_value, endianness);
XESETUINT32LE(p + GpuToCpu(address), data_value);
}
break;
case PM4_DRAW_INDX:
// initiate fetch of index buffer and draw
{
XETRACERB("[%.8X] Packet(%.8X): PM4_DRAW_INDX",
packet_ptr, packet);
LOG_DATA(count);
// d0 = viz query info
uint32_t d0 = READ_PTR();
uint32_t d1 = READ_PTR();
uint32_t index_count = d1 >> 16;
uint32_t prim_type = d1 & 0x3F;
uint32_t src_sel = (d1 >> 6) & 0x3;
if (src_sel == 0x0) {
uint32_t index_base = READ_PTR();
uint32_t index_size = READ_PTR();
uint32_t endianness = index_size >> 29;
index_size &= 0x00FFFFFF;
bool index_32bit = (d1 >> 11) & 0x1;
index_size *= index_32bit ? 4 : 2;
driver_->DrawIndexBuffer(
(XE_GPU_PRIMITIVE_TYPE)prim_type,
index_32bit, index_count, index_base, index_size, endianness);
} else if (src_sel == 0x2) {
driver_->DrawIndexAuto(
(XE_GPU_PRIMITIVE_TYPE)prim_type,
index_count);
} else {
// Unknown source select.
XEASSERTALWAYS();
}
}
break;
case PM4_DRAW_INDX_2:
// draw using supplied indices in packet
{
XETRACERB("[%.8X] Packet(%.8X): PM4_DRAW_INDX_2",
packet_ptr, packet);
LOG_DATA(count);
uint32_t d0 = READ_PTR();
uint32_t index_count = d0 >> 16;
uint32_t prim_type = d0 & 0x3F;
uint32_t src_sel = (d0 >> 6) & 0x3;
XEASSERT(src_sel == 0x2); // 'SrcSel=AutoIndex'
driver_->DrawIndexAuto(
(XE_GPU_PRIMITIVE_TYPE)prim_type,
index_count);
}
break;
case PM4_SET_CONSTANT:
// load constant into chip and to memory
{
XETRACERB("[%.8X] Packet(%.8X): PM4_SET_CONSTANT",
packet_ptr, packet);
// PM4_REG(reg) ((0x4 << 16) | (GSL_HAL_SUBBLOCK_OFFSET(reg)))
// reg - 0x2000
uint32_t offset_type = READ_PTR();
uint32_t index = offset_type & 0x7FF;
uint32_t type = (offset_type >> 16) & 0xFF;
switch (type) {
case 0x4: // REGISTER
index += 0x2000; // registers
for (uint32_t n = 0; n < count - 1; n++, index++) {
uint32_t data = READ_PTR();
const char* reg_name = xenos::GetRegisterName(index);
XETRACERB("[%.8X] %.8X -> %.4X %s",
packet_ptr + (1 + n) * 4,
data, index, reg_name ? reg_name : "");
WriteRegister(packet_ptr, index, data);
}
break;
default:
XEASSERTALWAYS();
break;
}
}
break;
case PM4_LOAD_ALU_CONSTANT:
// load constants from memory
{
XETRACERB("[%.8X] Packet(%.8X): PM4_LOAD_ALU_CONSTANT",
packet_ptr, packet);
uint32_t address = READ_PTR();
address &= 0x3FFFFFFF;
uint32_t offset_type = READ_PTR();
uint32_t index = offset_type & 0x7FF;
uint32_t size = READ_PTR();
size &= 0xFFF;
index += 0x4000; // alu constants
for (uint32_t n = 0; n < size; n++, index++) {
uint32_t data = XEGETUINT32BE(
p + GpuToCpu(packet_ptr, address + n * 4));
const char* reg_name = xenos::GetRegisterName(index);
XETRACERB("[%.8X] %.8X -> %.4X %s",
packet_ptr,
data, index, reg_name ? reg_name : "");
WriteRegister(packet_ptr, index, data);
}
}
break;
case PM4_IM_LOAD:
// load sequencer instruction memory (pointer-based)
{
XETRACERB("[%.8X] Packet(%.8X): PM4_IM_LOAD",
packet_ptr, packet);
LOG_DATA(count);
uint32_t addr_type = READ_PTR();
uint32_t type = addr_type & 0x3;
uint32_t addr = addr_type & ~0x3;
uint32_t start_size = READ_PTR();
uint32_t start = start_size >> 16;
uint32_t size = start_size & 0xFFFF; // dwords
XEASSERT(start == 0);
driver_->SetShader(
(XE_GPU_SHADER_TYPE)type,
GpuToCpu(packet_ptr, addr),
start,
size * 4);
}
break;
case PM4_IM_LOAD_IMMEDIATE:
// load sequencer instruction memory (code embedded in packet)
{
XETRACERB("[%.8X] Packet(%.8X): PM4_IM_LOAD_IMMEDIATE",
packet_ptr, packet);
LOG_DATA(count);
uint32_t type = READ_PTR();
uint32_t start_size = READ_PTR();
uint32_t start = start_size >> 16;
uint32_t size = start_size & 0xFFFF; // dwords
XEASSERT(start == 0);
// TODO(benvanik): figure out if this could wrap.
XEASSERT(args.ptr + size * 4 < args.max_address);
driver_->SetShader(
(XE_GPU_SHADER_TYPE)type,
args.ptr,
start,
size * 4);
ADVANCE_PTR(size);
}
break;
case PM4_INVALIDATE_STATE:
// selective invalidation of state pointers
{
XETRACERB("[%.8X] Packet(%.8X): PM4_INVALIDATE_STATE",
packet_ptr, packet);
LOG_DATA(count);
uint32_t mask = READ_PTR();
driver_->InvalidateState(mask);
}
break;
case PM4_SET_BIN_MASK_LO:
{
uint32_t value = READ_PTR();
XETRACERB("[%.8X] Packet(%.8X): PM4_SET_BIN_MASK_LO = %.8X",
packet_ptr, packet, value);
}
break;
case PM4_SET_BIN_MASK_HI:
{
uint32_t value = READ_PTR();
XETRACERB("[%.8X] Packet(%.8X): PM4_SET_BIN_MASK_HI = %.8X",
packet_ptr, packet, value);
}
break;
case PM4_SET_BIN_SELECT_LO:
{
uint32_t value = READ_PTR();
XETRACERB("[%.8X] Packet(%.8X): PM4_SET_BIN_SELECT_LO = %.8X",
packet_ptr, packet, value);
}
break;
case PM4_SET_BIN_SELECT_HI:
{
uint32_t value = READ_PTR();
XETRACERB("[%.8X] Packet(%.8X): PM4_SET_BIN_SELECT_HI = %.8X",
packet_ptr, packet, value);
}
break;
// Ignored packets - useful if breaking on the default handler below.
case 0x50: // 0xC0015000 usually 2 words, 0xFFFFFFFF / 0x00000000
XETRACERB("[%.8X] Packet(%.8X): unknown!",
packet_ptr, packet);
LOG_DATA(count);
ADVANCE_PTR(count);
break;
default:
XETRACERB("[%.8X] Packet(%.8X): unknown!",
packet_ptr, packet);
LOG_DATA(count);
ADVANCE_PTR(count);
break;
}
return 1 + count;
}
break;
}
return 0;
}
void RingBufferWorker::WriteRegister(
uint32_t packet_ptr, uint32_t index, uint32_t value) {
RegisterFile* regs = driver_->register_file();
XEASSERT(index < kXEGpuRegisterCount);
regs->values[index].u32 = value;
// Scratch register writeback.
if (index >= XE_GPU_REG_SCRATCH_REG0 && index <= XE_GPU_REG_SCRATCH_REG7) {
uint32_t scratch_reg = index - XE_GPU_REG_SCRATCH_REG0;
if ((1 << scratch_reg) & regs->values[XE_GPU_REG_SCRATCH_UMSK].u32) {
// Enabled - write to address.
uint8_t* p = memory_->membase();
uint32_t scratch_addr = regs->values[XE_GPU_REG_SCRATCH_ADDR].u32;
uint32_t mem_addr = scratch_addr + (scratch_reg * 4);
XESETUINT32BE(p + GpuToCpu(primary_buffer_ptr_, mem_addr), value);
}
}
}

View File

@ -1,81 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2013 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_RING_BUFFER_WORKER_H_
#define XENIA_GPU_RING_BUFFER_WORKER_H_
#include <xenia/core.h>
#include <xenia/gpu/xenos/registers.h>
namespace xe {
namespace gpu {
class GraphicsDriver;
class GraphicsSystem;
class RingBufferWorker {
public:
RingBufferWorker(GraphicsSystem* graphics_system, Memory* memory);
virtual ~RingBufferWorker();
Memory* memory() const { return memory_; }
uint64_t QueryTime();
uint32_t counter() const { return counter_; }
void increment_counter() { counter_++; }
void Initialize(GraphicsDriver* driver,
uint32_t ptr, uint32_t page_count);
void EnableReadPointerWriteBack(uint32_t ptr, uint32_t block_size);
void UpdateWritePointer(uint32_t value);
void Pump();
private:
typedef struct {
uint32_t ptr;
uint32_t base_ptr;
uint32_t max_address;
uint32_t ptr_mask;
} PacketArgs;
void AdvancePtr(PacketArgs& args, uint32_t n);
void ExecutePrimaryBuffer(uint32_t start_index, uint32_t end_index);
void ExecuteIndirectBuffer(uint32_t ptr, uint32_t length);
uint32_t ExecutePacket(PacketArgs& args);
void WriteRegister(uint32_t packet_ptr, uint32_t index, uint32_t value);
protected:
Memory* memory_;
GraphicsSystem* graphics_system_;
GraphicsDriver* driver_;
uint64_t time_base_;
uint32_t counter_;
uint32_t primary_buffer_ptr_;
uint32_t primary_buffer_size_;
uint32_t read_ptr_index_;
uint32_t read_ptr_update_freq_;
uint32_t read_ptr_writeback_ptr_;
HANDLE write_ptr_index_event_;
volatile uint32_t write_ptr_index_;
volatile uint32_t write_ptr_max_index_;
};
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_RING_BUFFER_WORKER_H_

View File

@ -0,0 +1,32 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include <xenia/gpu/sampler_state_resource.h>
using namespace std;
using namespace xe;
using namespace xe::gpu;
using namespace xe::gpu::xenos;
bool SamplerStateResource::Info::Prepare(
const xe_gpu_texture_fetch_t& fetch, const instr_fetch_tex_t& fetch_instr,
Info& out_info) {
out_info.min_filter = static_cast<instr_tex_filter_t>(
fetch_instr.min_filter == 3 ? fetch.min_filter : fetch_instr.min_filter);
out_info.mag_filter = static_cast<instr_tex_filter_t>(
fetch_instr.mag_filter == 3 ? fetch.mag_filter : fetch_instr.mag_filter);
out_info.mip_filter = static_cast<instr_tex_filter_t>(
fetch_instr.mip_filter == 3 ? fetch.mip_filter : fetch_instr.mip_filter);
out_info.clamp_u = fetch.clamp_x;
out_info.clamp_v = fetch.clamp_y;
out_info.clamp_w = fetch.clamp_z;
return true;
}

View File

@ -0,0 +1,67 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_SAMPLER_STATE_RESOURCE_H_
#define XENIA_GPU_SAMPLER_STATE_RESOURCE_H_
#include <xenia/gpu/resource.h>
#include <xenia/gpu/xenos/ucode.h>
#include <xenia/gpu/xenos/xenos.h>
namespace xe {
namespace gpu {
class SamplerStateResource : public StaticResource {
public:
struct Info {
xenos::instr_tex_filter_t min_filter;
xenos::instr_tex_filter_t mag_filter;
xenos::instr_tex_filter_t mip_filter;
uint32_t clamp_u;
uint32_t clamp_v;
uint32_t clamp_w;
uint64_t hash() const {
return hash_combine(0,
min_filter, mag_filter, mip_filter,
clamp_u, clamp_v, clamp_w);
}
bool Equals(const Info& other) const {
return min_filter == other.min_filter &&
mag_filter == other.mag_filter &&
mip_filter == other.mip_filter &&
clamp_u == other.clamp_u &&
clamp_v == other.clamp_v &&
clamp_w == other.clamp_w;
}
static bool Prepare(const xenos::xe_gpu_texture_fetch_t& fetch,
const xenos::instr_fetch_tex_t& fetch_instr,
Info& out_info);
};
SamplerStateResource(const Info& info) : info_(info) {}
virtual ~SamplerStateResource() = default;
const Info& info() const { return info_; }
virtual int Prepare() = 0;
protected:
Info info_;
};
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_SAMPLER_STATE_RESOURCE_H_

View File

@ -1,266 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2013 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include <xenia/gpu/shader.h>
#include <xenia/gpu/xenos/ucode_disassembler.h>
using namespace xe;
using namespace xe::gpu;
using namespace xe::gpu::xenos;
Shader::Shader(
XE_GPU_SHADER_TYPE type,
const uint8_t* src_ptr, size_t length,
uint64_t hash) :
type_(type), hash_(hash), is_prepared_(false), disasm_src_(NULL) {
xe_zero_struct(&alloc_counts_, sizeof(alloc_counts_));
xe_zero_struct(&vtx_buffer_inputs_, sizeof(vtx_buffer_inputs_));
xe_zero_struct(&tex_buffer_inputs_, sizeof(tex_buffer_inputs_));
// Verify.
dword_count_ = length / 4;
XEASSERT(dword_count_ <= 512);
// Copy bytes and swap.
size_t byte_size = dword_count_ * sizeof(uint32_t);
dwords_ = (uint32_t*)xe_malloc(byte_size);
for (uint32_t n = 0; n < dword_count_; n++) {
dwords_[n] = XEGETUINT32BE(src_ptr + n * 4);
}
// Gather input/output registers/etc.
GatherIO();
// Disassemble, for debugging.
disasm_src_ = DisassembleShader(type_, dwords_, dword_count_);
}
Shader::~Shader() {
if (disasm_src_) {
xe_free(disasm_src_);
}
xe_free(dwords_);
}
void Shader::GatherIO() {
// Process all execution blocks.
instr_cf_t cfa;
instr_cf_t cfb;
for (int idx = 0; idx < dword_count_; idx += 3) {
uint32_t dword_0 = dwords_[idx + 0];
uint32_t dword_1 = dwords_[idx + 1];
uint32_t dword_2 = dwords_[idx + 2];
cfa.dword_0 = dword_0;
cfa.dword_1 = dword_1 & 0xFFFF;
cfb.dword_0 = (dword_1 >> 16) | (dword_2 << 16);
cfb.dword_1 = dword_2 >> 16;
if (cfa.opc == ALLOC) {
GatherAlloc(&cfa.alloc);
} else if (cfa.is_exec()) {
GatherExec(&cfa.exec);
}
if (cfb.opc == ALLOC) {
GatherAlloc(&cfb.alloc);
} else if (cfb.is_exec()) {
GatherExec(&cfb.exec);
}
if (cfa.opc == EXEC_END || cfb.opc == EXEC_END) {
break;
}
}
}
void Shader::GatherAlloc(const instr_cf_alloc_t* cf) {
allocs_.push_back(*cf);
switch (cf->buffer_select) {
case SQ_POSITION:
// Position (SV_POSITION).
alloc_counts_.positions += cf->size + 1;
break;
case SQ_PARAMETER_PIXEL:
// Output to PS (if VS), or frag output (if PS).
alloc_counts_.params += cf->size + 1;
break;
case SQ_MEMORY:
// MEMEXPORT?
alloc_counts_.memories += cf->size + 1;
break;
}
}
void Shader::GatherExec(const instr_cf_exec_t* cf) {
execs_.push_back(*cf);
uint32_t sequence = cf->serialize;
for (uint32_t i = 0; i < cf->count; i++) {
uint32_t alu_off = (cf->address + i);
int sync = sequence & 0x2;
if (sequence & 0x1) {
const instr_fetch_t* fetch =
(const instr_fetch_t*)(dwords_ + alu_off * 3);
switch (fetch->opc) {
case VTX_FETCH:
GatherVertexFetch(&fetch->vtx);
break;
case TEX_FETCH:
GatherTextureFetch(&fetch->tex);
break;
case TEX_GET_BORDER_COLOR_FRAC:
case TEX_GET_COMP_TEX_LOD:
case TEX_GET_GRADIENTS:
case TEX_GET_WEIGHTS:
case TEX_SET_TEX_LOD:
case TEX_SET_GRADIENTS_H:
case TEX_SET_GRADIENTS_V:
default:
XEASSERTALWAYS();
break;
}
} else {
// TODO(benvanik): gather registers used, predicate bits used, etc.
const instr_alu_t* alu =
(const instr_alu_t*)(dwords_ + alu_off * 3);
if (alu->vector_write_mask) {
if (alu->export_data && alu->vector_dest == 63) {
alloc_counts_.point_size = true;
}
}
if (alu->scalar_write_mask || !alu->vector_write_mask) {
if (alu->export_data && alu->scalar_dest == 63) {
alloc_counts_.point_size = true;
}
}
}
sequence >>= 2;
}
}
void Shader::GatherVertexFetch(const instr_fetch_vtx_t* vtx) {
// dst_reg/dst_swiz
// src_reg/src_swiz
// format = a2xx_sq_surfaceformat
// format_comp_all ? signed : unsigned
// num_format_all ? normalized
// stride
// offset
// const_index/const_index_sel -- fetch constant register
// num_format_all ? integer : fraction
// exp_adjust_all - [-32,31] - (2^exp_adjust_all)*fetch - 0 = default
// Sometimes games have fetches that just produce constants. We can
// ignore those.
uint32_t dst_swiz = vtx->dst_swiz;
bool fetches_any_data = false;
for (int i = 0; i < 4; i++) {
if ((dst_swiz & 0x7) == 4) {
// 0.0
} else if ((dst_swiz & 0x7) == 5) {
// 1.0
} else if ((dst_swiz & 0x7) == 6) {
// ?
} else if ((dst_swiz & 0x7) == 7) {
// Previous register value.
} else {
fetches_any_data = true;
break;
}
dst_swiz >>= 3;
}
if (!fetches_any_data) {
return;
}
uint32_t fetch_slot = vtx->const_index * 3 + vtx->const_index_sel;
auto& inputs = vtx_buffer_inputs_;
vtx_buffer_element_t* el = NULL;
for (size_t n = 0; n < inputs.count; n++) {
auto& input = inputs.descs[n];
if (input.fetch_slot == fetch_slot) {
XEASSERT(input.element_count + 1 < XECOUNT(input.elements));
// It may not hold that all strides are equal, but I hope it does.
XEASSERT(!vtx->stride || input.stride_words == vtx->stride);
el = &input.elements[input.element_count++];
break;
}
}
if (!el) {
XEASSERTNOTZERO(vtx->stride);
XEASSERT(inputs.count + 1 < XECOUNT(inputs.descs));
auto& input = inputs.descs[inputs.count++];
input.input_index = inputs.count - 1;
input.fetch_slot = fetch_slot;
input.stride_words = vtx->stride;
el = &input.elements[input.element_count++];
}
el->vtx_fetch = *vtx;
el->format = vtx->format;
el->offset_words = vtx->offset;
el->size_words = 0;
switch (el->format) {
case FMT_8_8_8_8:
case FMT_2_10_10_10:
case FMT_10_11_11:
case FMT_11_11_10:
el->size_words = 1;
break;
case FMT_16_16:
case FMT_16_16_FLOAT:
el->size_words = 1;
break;
case FMT_16_16_16_16:
case FMT_16_16_16_16_FLOAT:
el->size_words = 2;
break;
case FMT_32:
case FMT_32_FLOAT:
el->size_words = 1;
break;
case FMT_32_32:
case FMT_32_32_FLOAT:
el->size_words = 2;
break;
case FMT_32_32_32_FLOAT:
el->size_words = 3;
break;
case FMT_32_32_32_32:
case FMT_32_32_32_32_FLOAT:
el->size_words = 4;
break;
default:
XELOGE("Unknown vertex format: %d", el->format);
XEASSERTALWAYS();
break;
}
}
const Shader::vtx_buffer_inputs_t* Shader::GetVertexBufferInputs() {
return &vtx_buffer_inputs_;
}
void Shader::GatherTextureFetch(const xenos::instr_fetch_tex_t* tex) {
// TODO(benvanik): check dest_swiz to see if we are writing anything.
auto& inputs = tex_buffer_inputs_;
XEASSERT(inputs.count + 1 < XECOUNT(inputs.descs));
auto& input = inputs.descs[inputs.count++];
input.input_index = inputs.count - 1;
input.fetch_slot = tex->const_idx & 0xF; // ?
input.tex_fetch = *tex;
// Format mangling, size estimation, etc.
}
const Shader::tex_buffer_inputs_t* Shader::GetTextureBufferInputs() {
return &tex_buffer_inputs_;
}

View File

@ -1,104 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2013 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_SHADER_H_
#define XENIA_GPU_SHADER_H_
#include <xenia/core.h>
#include <xenia/gpu/xenos/ucode.h>
#include <xenia/gpu/xenos/xenos.h>
namespace xe {
namespace gpu {
class Shader {
public:
Shader(xenos::XE_GPU_SHADER_TYPE type,
const uint8_t* src_ptr, size_t length,
uint64_t hash);
virtual ~Shader();
xenos::XE_GPU_SHADER_TYPE type() const { return type_; }
const uint32_t* dwords() const { return dwords_; }
size_t dword_count() const { return dword_count_; }
uint64_t hash() const { return hash_; }
bool is_prepared() const { return is_prepared_; }
const char* disasm_src() const { return disasm_src_; }
typedef struct {
xenos::instr_fetch_vtx_t vtx_fetch;
uint32_t format;
uint32_t offset_words;
uint32_t size_words;
} vtx_buffer_element_t;
typedef struct {
uint32_t input_index;
uint32_t fetch_slot;
uint32_t stride_words;
uint32_t element_count;
vtx_buffer_element_t elements[16];
} vtx_buffer_desc_t;
typedef struct {
uint32_t count;
vtx_buffer_desc_t descs[16];
} vtx_buffer_inputs_t;
const vtx_buffer_inputs_t* GetVertexBufferInputs();
typedef struct {
uint32_t input_index;
uint32_t fetch_slot;
xenos::instr_fetch_tex_t tex_fetch;
uint32_t format;
} tex_buffer_desc_t;
typedef struct {
uint32_t count;
tex_buffer_desc_t descs[32];
} tex_buffer_inputs_t;
const tex_buffer_inputs_t* GetTextureBufferInputs();
typedef struct {
uint32_t positions;
uint32_t params;
uint32_t memories;
bool point_size;
} alloc_counts_t;
const alloc_counts_t& alloc_counts() const { return alloc_counts_; }
private:
void GatherIO();
void GatherAlloc(const xenos::instr_cf_alloc_t* cf);
void GatherExec(const xenos::instr_cf_exec_t* cf);
void GatherVertexFetch(const xenos::instr_fetch_vtx_t* vtx);
void GatherTextureFetch(const xenos::instr_fetch_tex_t* tex);
protected:
xenos::XE_GPU_SHADER_TYPE type_;
uint32_t* dwords_;
size_t dword_count_;
uint64_t hash_;
bool is_prepared_;
char* disasm_src_;
alloc_counts_t alloc_counts_;
std::vector<xenos::instr_cf_exec_t> execs_;
std::vector<xenos::instr_cf_alloc_t> allocs_;
vtx_buffer_inputs_t vtx_buffer_inputs_;
tex_buffer_inputs_t tex_buffer_inputs_;
};
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_SHADER_H_

View File

@ -1,80 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2013 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include <xenia/gpu/shader_cache.h>
#include <xenia/gpu/shader.h>
using namespace std;
using namespace xe;
using namespace xe::gpu;
using namespace xe::gpu::xenos;
ShaderCache::ShaderCache() {
}
ShaderCache::~ShaderCache() {
Clear();
}
Shader* ShaderCache::Create(
XE_GPU_SHADER_TYPE type,
const uint8_t* src_ptr, size_t length) {
uint64_t hash = Hash(src_ptr, length);
Shader* shader = CreateCore(type, src_ptr, length, hash);
map_.insert({ hash, shader });
return shader;
}
Shader* ShaderCache::CreateCore(
XE_GPU_SHADER_TYPE type,
const uint8_t* src_ptr, size_t length,
uint64_t hash) {
return new Shader(type, src_ptr, length, hash);
}
Shader* ShaderCache::Find(
XE_GPU_SHADER_TYPE type,
const uint8_t* src_ptr, size_t length) {
uint64_t hash = Hash(src_ptr, length);
auto it = map_.find(hash);
if (it != map_.end()) {
return it->second;
}
return NULL;
}
Shader* ShaderCache::FindOrCreate(
XE_GPU_SHADER_TYPE type,
const uint8_t* src_ptr, size_t length) {
SCOPE_profile_cpu_f("gpu");
uint64_t hash = Hash(src_ptr, length);
auto it = map_.find(hash);
if (it != map_.end()) {
return it->second;
}
Shader* shader = CreateCore(type, src_ptr, length, hash);
map_.insert({ hash, shader });
return shader;
}
void ShaderCache::Clear() {
for (auto it = map_.begin(); it != map_.end(); ++it) {
Shader* shader = it->second;
delete shader;
}
map_.clear();
}
uint64_t ShaderCache::Hash(const uint8_t* src_ptr, size_t length) {
return xe_hash64(src_ptr, length, 0);
}

View File

@ -1,56 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2013 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_SHADER_CACHE_H_
#define XENIA_GPU_SHADER_CACHE_H_
#include <xenia/core.h>
#include <xenia/gpu/shader.h>
#include <xenia/gpu/xenos/xenos.h>
namespace xe {
namespace gpu {
class ShaderCache {
public:
ShaderCache();
virtual ~ShaderCache();
Shader* Create(
xenos::XE_GPU_SHADER_TYPE type,
const uint8_t* src_ptr, size_t length);
Shader* Find(
xenos::XE_GPU_SHADER_TYPE type,
const uint8_t* src_ptr, size_t length);
Shader* FindOrCreate(
xenos::XE_GPU_SHADER_TYPE type,
const uint8_t* src_ptr, size_t length);
void Clear();
private:
uint64_t Hash(const uint8_t* src_ptr, size_t length);
std::unordered_map<uint64_t, Shader*> map_;
protected:
virtual Shader* CreateCore(
xenos::XE_GPU_SHADER_TYPE type,
const uint8_t* src_ptr, size_t length,
uint64_t hash);
};
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_SHADER_CACHE_H_

View File

@ -9,9 +9,267 @@
#include <xenia/gpu/shader_resource.h>
#include <xenia/gpu/xenos/ucode_disassembler.h>
using namespace std;
using namespace xe;
using namespace xe::gpu;
using namespace xe::gpu::xenos;
ShaderResource::ShaderResource(const MemoryRange& memory_range,
const Info& info,
xenos::XE_GPU_SHADER_TYPE type)
: HashedResource(memory_range),
info_(info), type_(type), is_prepared_(false), disasm_src_(nullptr) {
xe_zero_struct(&alloc_counts_, sizeof(alloc_counts_));
xe_zero_struct(&buffer_inputs_, sizeof(buffer_inputs_));
xe_zero_struct(&sampler_inputs_, sizeof(sampler_inputs_));
// Verify.
dword_count_ = memory_range.length / 4;
XEASSERT(dword_count_ <= 512);
// Copy bytes and swap.
size_t byte_size = dword_count_ * sizeof(uint32_t);
dwords_ = (uint32_t*)xe_malloc(byte_size);
for (uint32_t n = 0; n < dword_count_; n++) {
dwords_[n] = XEGETUINT32BE(memory_range.host_base + n * 4);
}
// Disassemble, for debugging.
disasm_src_ = DisassembleShader(type_, dwords_, dword_count_);
// Gather input/output registers/etc.
GatherIO();
}
ShaderResource::~ShaderResource() {
xe_free(disasm_src_);
xe_free(dwords_);
}
void ShaderResource::GatherIO() {
// Process all execution blocks.
instr_cf_t cfa;
instr_cf_t cfb;
for (int idx = 0; idx < dword_count_; idx += 3) {
uint32_t dword_0 = dwords_[idx + 0];
uint32_t dword_1 = dwords_[idx + 1];
uint32_t dword_2 = dwords_[idx + 2];
cfa.dword_0 = dword_0;
cfa.dword_1 = dword_1 & 0xFFFF;
cfb.dword_0 = (dword_1 >> 16) | (dword_2 << 16);
cfb.dword_1 = dword_2 >> 16;
if (cfa.opc == ALLOC) {
GatherAlloc(&cfa.alloc);
} else if (cfa.is_exec()) {
GatherExec(&cfa.exec);
}
if (cfb.opc == ALLOC) {
GatherAlloc(&cfb.alloc);
} else if (cfb.is_exec()) {
GatherExec(&cfb.exec);
}
if (cfa.opc == EXEC_END || cfb.opc == EXEC_END) {
break;
}
}
}
void ShaderResource::GatherAlloc(const instr_cf_alloc_t* cf) {
allocs_.push_back(*cf);
switch (cf->buffer_select) {
case SQ_POSITION:
// Position (SV_POSITION).
alloc_counts_.positions += cf->size + 1;
break;
case SQ_PARAMETER_PIXEL:
// Output to PS (if VS), or frag output (if PS).
alloc_counts_.params += cf->size + 1;
break;
case SQ_MEMORY:
// MEMEXPORT?
alloc_counts_.memories += cf->size + 1;
break;
}
}
void ShaderResource::GatherExec(const instr_cf_exec_t* cf) {
execs_.push_back(*cf);
uint32_t sequence = cf->serialize;
for (uint32_t i = 0; i < cf->count; i++) {
uint32_t alu_off = (cf->address + i);
int sync = sequence & 0x2;
if (sequence & 0x1) {
const instr_fetch_t* fetch =
(const instr_fetch_t*)(dwords_ + alu_off * 3);
switch (fetch->opc) {
case VTX_FETCH:
GatherVertexFetch(&fetch->vtx);
break;
case TEX_FETCH:
GatherTextureFetch(&fetch->tex);
break;
case TEX_GET_BORDER_COLOR_FRAC:
case TEX_GET_COMP_TEX_LOD:
case TEX_GET_GRADIENTS:
case TEX_GET_WEIGHTS:
case TEX_SET_TEX_LOD:
case TEX_SET_GRADIENTS_H:
case TEX_SET_GRADIENTS_V:
default:
XEASSERTALWAYS();
break;
}
} else {
// TODO(benvanik): gather registers used, predicate bits used, etc.
const instr_alu_t* alu =
(const instr_alu_t*)(dwords_ + alu_off * 3);
if (alu->vector_write_mask) {
if (alu->export_data && alu->vector_dest == 63) {
alloc_counts_.point_size = true;
}
}
if (alu->scalar_write_mask || !alu->vector_write_mask) {
if (alu->export_data && alu->scalar_dest == 63) {
alloc_counts_.point_size = true;
}
}
}
sequence >>= 2;
}
}
void ShaderResource::GatherVertexFetch(const instr_fetch_vtx_t* vtx) {
XEASSERT(type_ == XE_GPU_SHADER_TYPE_VERTEX);
// dst_reg/dst_swiz
// src_reg/src_swiz
// format = a2xx_sq_surfaceformat
// format_comp_all ? signed : unsigned
// num_format_all ? normalized
// stride
// offset
// const_index/const_index_sel -- fetch constant register
// num_format_all ? integer : fraction
// exp_adjust_all - [-32,31] - (2^exp_adjust_all)*fetch - 0 = default
// Sometimes games have fetches that just produce constants. We can
// ignore those.
uint32_t dst_swiz = vtx->dst_swiz;
bool fetches_any_data = false;
for (int i = 0; i < 4; i++) {
if ((dst_swiz & 0x7) == 4) {
// 0.0
} else if ((dst_swiz & 0x7) == 5) {
// 1.0
} else if ((dst_swiz & 0x7) == 6) {
// ?
} else if ((dst_swiz & 0x7) == 7) {
// Previous register value.
} else {
fetches_any_data = true;
break;
}
dst_swiz >>= 3;
}
if (!fetches_any_data) {
return;
}
uint32_t fetch_slot = vtx->const_index * 3 + vtx->const_index_sel;
auto& inputs = buffer_inputs_;
VertexBufferResource::DeclElement* el = nullptr;
for (size_t n = 0; n < inputs.count; n++) {
auto& desc = inputs.descs[n];
auto& info = desc.info;
if (desc.fetch_slot == fetch_slot) {
XEASSERT(info.element_count + 1 < XECOUNT(info.elements));
// It may not hold that all strides are equal, but I hope it does.
XEASSERT(!vtx->stride || info.stride_words == vtx->stride);
el = &info.elements[info.element_count++];
break;
}
}
if (!el) {
XEASSERTNOTZERO(vtx->stride);
XEASSERT(inputs.count + 1 < XECOUNT(inputs.descs));
auto& desc = inputs.descs[inputs.count++];
desc.input_index = inputs.count - 1;
desc.fetch_slot = fetch_slot;
desc.info.stride_words = vtx->stride;
el = &desc.info.elements[desc.info.element_count++];
}
el->vtx_fetch = *vtx;
el->format = vtx->format;
el->is_normalized = vtx->num_format_all == 0;
el->is_signed = vtx->format_comp_all == 1;
el->offset_words = vtx->offset;
el->size_words = 0;
switch (el->format) {
case FMT_8_8_8_8:
case FMT_2_10_10_10:
case FMT_10_11_11:
case FMT_11_11_10:
el->size_words = 1;
break;
case FMT_16_16:
case FMT_16_16_FLOAT:
el->size_words = 1;
break;
case FMT_16_16_16_16:
case FMT_16_16_16_16_FLOAT:
el->size_words = 2;
break;
case FMT_32:
case FMT_32_FLOAT:
el->size_words = 1;
break;
case FMT_32_32:
case FMT_32_32_FLOAT:
el->size_words = 2;
break;
case FMT_32_32_32_FLOAT:
el->size_words = 3;
break;
case FMT_32_32_32_32:
case FMT_32_32_32_32_FLOAT:
el->size_words = 4;
break;
default:
XELOGE("Unknown vertex format: %d", el->format);
XEASSERTALWAYS();
break;
}
}
void ShaderResource::GatherTextureFetch(const xenos::instr_fetch_tex_t* tex) {
// TODO(benvanik): check dest_swiz to see if we are writing anything.
XEASSERT(sampler_inputs_.count + 1 < XECOUNT(sampler_inputs_.descs));
auto& input = sampler_inputs_.descs[sampler_inputs_.count++];
input.input_index = sampler_inputs_.count - 1;
input.fetch_slot = tex->const_idx & 0xF; // ?
input.tex_fetch = *tex;
// Format mangling, size estimation, etc.
}
VertexShaderResource::VertexShaderResource(
const MemoryRange& memory_range, const Info& info)
: ShaderResource(memory_range, info, XE_GPU_SHADER_TYPE_VERTEX) {
}
VertexShaderResource::~VertexShaderResource() = default;
PixelShaderResource::PixelShaderResource(
const MemoryRange& memory_range, const Info& info)
: ShaderResource(memory_range, info, XE_GPU_SHADER_TYPE_PIXEL) {
}
PixelShaderResource::~PixelShaderResource() = default;

View File

@ -10,7 +10,9 @@
#ifndef XENIA_GPU_SHADER_RESOURCE_H_
#define XENIA_GPU_SHADER_RESOURCE_H_
#include <xenia/core.h>
#include <xenia/gpu/buffer_resource.h>
#include <xenia/gpu/resource.h>
#include <xenia/gpu/xenos/ucode.h>
#include <xenia/gpu/xenos/xenos.h>
@ -18,8 +20,104 @@ namespace xe {
namespace gpu {
class ShaderResource : public Resource {
class ShaderResource : public HashedResource {
public:
struct Info {
// type, etc?
};
~ShaderResource() override;
const Info& info() const { return info_; }
xenos::XE_GPU_SHADER_TYPE type() const { return type_; }
const uint32_t* dwords() const { return dwords_; }
const size_t dword_count() const { return dword_count_; }
bool is_prepared() const { return is_prepared_; }
const char* disasm_src() const { return disasm_src_; }
struct BufferDesc {
uint32_t input_index;
uint32_t fetch_slot;
VertexBufferResource::Info info;
// xenos::instr_fetch_vtx_t vtx_fetch; for each el
};
struct BufferInputs {
uint32_t count;
BufferDesc descs[32];
};
const BufferInputs& buffer_inputs() { return buffer_inputs_; }
struct SamplerDesc {
uint32_t input_index;
uint32_t fetch_slot;
uint32_t format;
xenos::instr_fetch_tex_t tex_fetch;
};
struct SamplerInputs {
uint32_t count;
SamplerDesc descs[32];
};
const SamplerInputs& sampler_inputs() { return sampler_inputs_; }
struct AllocCounts {
uint32_t positions;
uint32_t params;
uint32_t memories;
bool point_size;
};
const AllocCounts& alloc_counts() const { return alloc_counts_; }
const std::vector<xenos::instr_cf_exec_t>& execs() const { return execs_; }
const std::vector<xenos::instr_cf_alloc_t>& allocs() const { return allocs_; }
private:
void GatherIO();
void GatherAlloc(const xenos::instr_cf_alloc_t* cf);
void GatherExec(const xenos::instr_cf_exec_t* cf);
void GatherVertexFetch(const xenos::instr_fetch_vtx_t* vtx);
void GatherTextureFetch(const xenos::instr_fetch_tex_t* tex);
protected:
ShaderResource(const MemoryRange& memory_range,
const Info& info,
xenos::XE_GPU_SHADER_TYPE type);
Info info_;
xenos::XE_GPU_SHADER_TYPE type_;
size_t dword_count_;
uint32_t* dwords_;
char* disasm_src_;
AllocCounts alloc_counts_;
std::vector<xenos::instr_cf_exec_t> execs_;
std::vector<xenos::instr_cf_alloc_t> allocs_;
BufferInputs buffer_inputs_;
SamplerInputs sampler_inputs_;
bool is_prepared_;
};
class VertexShaderResource : public ShaderResource {
public:
VertexShaderResource(const MemoryRange& memory_range,
const Info& info);
~VertexShaderResource() override;
// buffer_inputs() matching VertexBufferResource::Info
virtual int Prepare(const xenos::xe_gpu_program_cntl_t& program_cntl) = 0;
};
class PixelShaderResource : public ShaderResource {
public:
PixelShaderResource(const MemoryRange& memory_range,
const Info& info);
~PixelShaderResource() override;
virtual int Prepare(const xenos::xe_gpu_program_cntl_t& program_cntl,
VertexShaderResource* vertex_shader) = 0;
};

View File

@ -5,6 +5,8 @@
'buffer_resource.h',
'command_processor.cc',
'command_processor.h',
'draw_command.cc',
'draw_command.h',
'gpu-private.h',
'gpu.cc',
'gpu.h',
@ -18,6 +20,8 @@
'resource.h',
'resource_cache.cc',
'resource_cache.h',
'sampler_state_resource.cc',
'sampler_state_resource.h',
'shader_resource.cc',
'shader_resource.h',
'texture_resource.cc',

View File

@ -1,369 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include <xenia/gpu/texture.h>
#include <xenia/gpu/xenos/ucode.h>
#include <xenia/gpu/xenos/xenos.h>
// TODO(benvanik): replace DXGI constants with xenia constants.
#include <d3d11.h>
using namespace xe;
using namespace xe::gpu;
using namespace xe::gpu::xenos;
Texture::Texture(uint32_t address, const uint8_t* host_address)
: address_(address), host_address_(host_address) {
}
Texture::~Texture() {
for (auto it = views_.begin(); it != views_.end(); ++it) {
auto view = *it;
delete view;
}
views_.clear();
}
TextureView* Texture::Fetch(
const xenos::xe_gpu_texture_fetch_t& fetch) {
// TODO(benvanik): compute length for hash check.
size_t length = 0;
switch (fetch.dimension) {
case DIMENSION_1D:
break;
case DIMENSION_2D:
break;
case DIMENSION_3D:
break;
case DIMENSION_CUBE:
break;
}
uint64_t hash = xe_hash64(host_address_, length);
for (auto it = views_.begin(); it != views_.end(); ++it) {
auto view = *it;
if (memcmp(&view->fetch, &fetch, sizeof(fetch))) {
continue;
}
bool dirty = hash != view->hash;
if (dirty) {
return FetchDirty(view, fetch) ? view : nullptr;
} else {
return view;
}
}
auto new_view = FetchNew(fetch);
if (!new_view) {
return nullptr;
}
new_view->hash = hash;
views_.push_back(new_view);
return new_view;
}
bool Texture::FillViewInfo(TextureView* view,
const xenos::xe_gpu_texture_fetch_t& fetch) {
// http://msdn.microsoft.com/en-us/library/windows/desktop/cc308051(v=vs.85).aspx
// a2xx_sq_surfaceformat
view->texture = this;
view->fetch = fetch;
view->dimensions = fetch.dimension;
switch (fetch.dimension) {
case DIMENSION_1D:
view->width = fetch.size_1d.width;
break;
case DIMENSION_2D:
view->width = fetch.size_2d.width;
view->height = fetch.size_2d.height;
break;
case DIMENSION_3D:
view->width = fetch.size_3d.width;
view->height = fetch.size_3d.height;
view->depth = fetch.size_3d.depth;
break;
case DIMENSION_CUBE:
view->width = fetch.size_stack.width;
view->height = fetch.size_stack.height;
view->depth = fetch.size_stack.depth;
break;
}
view->format = DXGI_FORMAT_UNKNOWN;
view->block_size = 0;
view->texel_pitch = 0;
view->is_compressed = false;
switch (fetch.format) {
case FMT_8:
switch (fetch.swizzle) {
case XE_GPU_SWIZZLE_RRR1:
view->format = DXGI_FORMAT_R8_UNORM;
break;
case XE_GPU_SWIZZLE_000R:
view->format = DXGI_FORMAT_A8_UNORM;
break;
default:
XELOGW("D3D11: unhandled swizzle for FMT_8");
view->format = DXGI_FORMAT_A8_UNORM;
break;
}
view->block_size = 1;
view->texel_pitch = 1;
break;
case FMT_1_5_5_5:
switch (fetch.swizzle) {
case XE_GPU_SWIZZLE_BGRA:
view->format = DXGI_FORMAT_B5G5R5A1_UNORM;
break;
default:
XELOGW("D3D11: unhandled swizzle for FMT_1_5_5_5");
view->format = DXGI_FORMAT_B5G5R5A1_UNORM;
break;
}
view->block_size = 1;
view->texel_pitch = 2;
break;
case FMT_8_8_8_8:
switch (fetch.swizzle) {
case XE_GPU_SWIZZLE_RGBA:
view->format = DXGI_FORMAT_R8G8B8A8_UNORM;
break;
case XE_GPU_SWIZZLE_BGRA:
view->format = DXGI_FORMAT_B8G8R8A8_UNORM;
break;
case XE_GPU_SWIZZLE_RGB1:
view->format = DXGI_FORMAT_R8G8B8A8_UNORM; // ?
break;
case XE_GPU_SWIZZLE_BGR1:
view->format = DXGI_FORMAT_B8G8R8X8_UNORM;
break;
default:
XELOGW("D3D11: unhandled swizzle for FMT_8_8_8_8");
view->format = DXGI_FORMAT_R8G8B8A8_UNORM;
break;
}
view->block_size = 1;
view->texel_pitch = 4;
break;
case FMT_4_4_4_4:
switch (fetch.swizzle) {
case XE_GPU_SWIZZLE_BGRA:
view->format = DXGI_FORMAT_B4G4R4A4_UNORM; // only supported on Windows 8+
break;
default:
XELOGW("D3D11: unhandled swizzle for FMT_4_4_4_4");
view->format = DXGI_FORMAT_B4G4R4A4_UNORM; // only supported on Windows 8+
break;
}
view->block_size = 1;
view->texel_pitch = 2;
break;
case FMT_16_16_16_16_FLOAT:
switch (fetch.swizzle) {
case XE_GPU_SWIZZLE_RGBA:
view->format = DXGI_FORMAT_R16G16B16A16_FLOAT;
break;
default:
XELOGW("D3D11: unhandled swizzle for FMT_16_16_16_16_FLOAT");
view->format = DXGI_FORMAT_R16G16B16A16_FLOAT;
break;
}
view->block_size = 1;
view->texel_pitch = 8;
break;
case FMT_32_FLOAT:
switch (fetch.swizzle) {
case XE_GPU_SWIZZLE_R111:
view->format = DXGI_FORMAT_R32_FLOAT;
break;
default:
XELOGW("D3D11: unhandled swizzle for FMT_32_FLOAT");
view->format = DXGI_FORMAT_R32_FLOAT;
break;
}
view->block_size = 1;
view->texel_pitch = 4;
break;
case FMT_DXT1:
view->format = DXGI_FORMAT_BC1_UNORM;
view->block_size = 4;
view->texel_pitch = 8;
view->is_compressed = true;
break;
case FMT_DXT2_3:
case FMT_DXT4_5:
view->format = (fetch.format == FMT_DXT4_5 ? DXGI_FORMAT_BC3_UNORM : DXGI_FORMAT_BC2_UNORM);
view->block_size = 4;
view->texel_pitch = 16;
view->is_compressed = true;
break;
case FMT_1_REVERSE:
case FMT_1:
case FMT_5_6_5:
case FMT_6_5_5:
case FMT_2_10_10_10:
case FMT_8_A:
case FMT_8_B:
case FMT_8_8:
case FMT_Cr_Y1_Cb_Y0:
case FMT_Y1_Cr_Y0_Cb:
case FMT_5_5_5_1:
case FMT_8_8_8_8_A:
case FMT_10_11_11:
case FMT_11_11_10:
case FMT_24_8:
case FMT_24_8_FLOAT:
case FMT_16:
case FMT_16_16:
case FMT_16_16_16_16:
case FMT_16_EXPAND:
case FMT_16_16_EXPAND:
case FMT_16_16_16_16_EXPAND:
case FMT_16_FLOAT:
case FMT_16_16_FLOAT:
case FMT_32:
case FMT_32_32:
case FMT_32_32_32_32:
case FMT_32_32_FLOAT:
case FMT_32_32_32_32_FLOAT:
case FMT_32_AS_8:
case FMT_32_AS_8_8:
case FMT_16_MPEG:
case FMT_16_16_MPEG:
case FMT_8_INTERLACED:
case FMT_32_AS_8_INTERLACED:
case FMT_32_AS_8_8_INTERLACED:
case FMT_16_INTERLACED:
case FMT_16_MPEG_INTERLACED:
case FMT_16_16_MPEG_INTERLACED:
case FMT_DXN:
case FMT_8_8_8_8_AS_16_16_16_16:
case FMT_DXT1_AS_16_16_16_16:
case FMT_DXT2_3_AS_16_16_16_16:
case FMT_DXT4_5_AS_16_16_16_16:
case FMT_2_10_10_10_AS_16_16_16_16:
case FMT_10_11_11_AS_16_16_16_16:
case FMT_11_11_10_AS_16_16_16_16:
case FMT_32_32_32_FLOAT:
case FMT_DXT3A:
case FMT_DXT5A:
case FMT_CTX1:
case FMT_DXT3A_AS_1_1_1_1:
view->format = DXGI_FORMAT_UNKNOWN;
break;
}
if (view->format == DXGI_FORMAT_UNKNOWN) {
return false;
}
switch (fetch.dimension) {
case DIMENSION_1D:
break;
case DIMENSION_2D:
view->sizes_2d = GetTextureSizes2D(view);
break;
case DIMENSION_3D:
break;
case DIMENSION_CUBE:
break;
}
return true;
}
const TextureSizes2D Texture::GetTextureSizes2D(TextureView* view) {
TextureSizes2D sizes;
sizes.logical_width = 1 + view->fetch.size_2d.width;
sizes.logical_height = 1 + view->fetch.size_2d.height;
sizes.block_width = sizes.logical_width / view->block_size;
sizes.block_height = sizes.logical_height / view->block_size;
if (!view->is_compressed) {
// must be 32x32, but also must have a pitch that is a multiple of 256 bytes
uint32_t bytes_per_block = view->block_size * view->block_size *
view->texel_pitch;
uint32_t width_multiple = 32;
if (bytes_per_block) {
uint32_t minimum_multiple = 256 / bytes_per_block;
if (width_multiple < minimum_multiple) {
width_multiple = minimum_multiple;
}
}
sizes.input_width = XEROUNDUP(sizes.logical_width, width_multiple);
sizes.input_height = XEROUNDUP(sizes.logical_height, 32);
sizes.output_width = sizes.logical_width;
sizes.output_height = sizes.logical_height;
} else {
// must be 128x128
sizes.input_width = XEROUNDUP(sizes.logical_width, 128);
sizes.input_height = XEROUNDUP(sizes.logical_height, 128);
sizes.output_width = XENEXTPOW2(sizes.logical_width);
sizes.output_height = XENEXTPOW2(sizes.logical_height);
}
sizes.logical_pitch =
(sizes.logical_width / view->block_size) * view->texel_pitch;
sizes.input_pitch =
(sizes.input_width / view->block_size) * view->texel_pitch;
return sizes;
}
void Texture::TextureSwap(uint8_t* dest, const uint8_t* src, uint32_t pitch,
XE_GPU_ENDIAN endianness) {
switch (endianness) {
case XE_GPU_ENDIAN_8IN16:
for (uint32_t i = 0; i < pitch; i += 2, src += 2, dest += 2) {
*(uint16_t*)dest = XESWAP16(*(uint16_t*)src);
}
break;
case XE_GPU_ENDIAN_8IN32: // Swap bytes.
for (uint32_t i = 0; i < pitch; i += 4, src += 4, dest += 4) {
*(uint32_t*)dest = XESWAP32(*(uint32_t*)src);
}
break;
case XE_GPU_ENDIAN_16IN32: // Swap half words.
for (uint32_t i = 0; i < pitch; i += 4, src += 4, dest += 4) {
uint32_t value = *(uint32_t*)src;
*(uint32_t*)dest = ((value >> 16) & 0xFFFF) | (value << 16);
}
break;
default:
case XE_GPU_ENDIAN_NONE:
memcpy(dest, src, pitch);
break;
}
}
// https://code.google.com/p/crunch/source/browse/trunk/inc/crn_decomp.h#4104
uint32_t Texture::TiledOffset2DOuter(uint32_t y, uint32_t width,
uint32_t log_bpp) {
uint32_t macro = ((y >> 5) * (width >> 5)) << (log_bpp + 7);
uint32_t micro = ((y & 6) << 2) << log_bpp;
return macro +
((micro & ~15) << 1) +
(micro & 15) +
((y & 8) << (3 + log_bpp)) +
((y & 1) << 4);
}
uint32_t Texture::TiledOffset2DInner(uint32_t x, uint32_t y, uint32_t bpp,
uint32_t base_offset) {
uint32_t macro = (x >> 5) << (bpp + 7);
uint32_t micro = (x & 7) << bpp;
uint32_t offset = base_offset + (macro + ((micro & ~15) << 1) + (micro & 15));
return ((offset & ~511) << 3) + ((offset & 448) << 2) + (offset & 63) +
((y & 16) << 7) + (((((y & 8) >> 2) + (x >> 3)) & 3) << 6);
}

View File

@ -1,110 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_TEXTURE_H_
#define XENIA_GPU_TEXTURE_H_
#include <xenia/core.h>
#include <xenia/gpu/xenos/xenos.h>
// TODO(benvanik): replace DXGI constants with xenia constants.
#include <d3d11.h>
namespace xe {
namespace gpu {
class Texture;
struct TextureSizes1D {};
struct TextureSizes2D {
uint32_t logical_width;
uint32_t logical_height;
uint32_t block_width;
uint32_t block_height;
uint32_t input_width;
uint32_t input_height;
uint32_t output_width;
uint32_t output_height;
uint32_t logical_pitch;
uint32_t input_pitch;
};
struct TextureSizes3D {};
struct TextureSizesCube {};
struct TextureView {
Texture* texture;
xenos::xe_gpu_texture_fetch_t fetch;
uint64_t hash;
union {
TextureSizes1D sizes_1d;
TextureSizes2D sizes_2d;
TextureSizes3D sizes_3d;
TextureSizesCube sizes_cube;
};
int dimensions;
uint32_t width;
uint32_t height;
uint32_t depth;
uint32_t block_size;
uint32_t texel_pitch;
bool is_compressed;
DXGI_FORMAT format;
TextureView()
: texture(nullptr),
dimensions(0),
width(0), height(0), depth(0),
block_size(0), texel_pitch(0),
is_compressed(false), format(DXGI_FORMAT_UNKNOWN) {}
};
class Texture {
public:
Texture(uint32_t address, const uint8_t* host_address);
virtual ~Texture();
TextureView* Fetch(
const xenos::xe_gpu_texture_fetch_t& fetch);
protected:
bool FillViewInfo(TextureView* view,
const xenos::xe_gpu_texture_fetch_t& fetch);
virtual TextureView* FetchNew(
const xenos::xe_gpu_texture_fetch_t& fetch) = 0;
virtual bool FetchDirty(
TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch) = 0;
const TextureSizes2D GetTextureSizes2D(TextureView* view);
static void TextureSwap(uint8_t* dest, const uint8_t* src, uint32_t pitch,
xenos::XE_GPU_ENDIAN endianness);
static uint32_t TiledOffset2DOuter(uint32_t y, uint32_t width,
uint32_t log_bpp);
static uint32_t TiledOffset2DInner(uint32_t x, uint32_t y, uint32_t bpp,
uint32_t base_offset);
uint32_t address_;
const uint8_t* host_address_;
// TODO(benvanik): replace with LRU keyed list.
std::vector<TextureView*> views_;
};
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_TEXTURE_H_

View File

@ -1,50 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include <xenia/gpu/texture_cache.h>
#include <xenia/gpu/xenos/ucode.h>
using namespace xe;
using namespace xe::gpu;
using namespace xe::gpu::xenos;
// https://github.com/ivmai/bdwgc/blob/master/os_dep.c
TextureCache::TextureCache(Memory* memory)
: memory_(memory) {
}
TextureCache::~TextureCache() {
for (auto it = textures_.begin(); it != textures_.end(); ++it) {
auto texture = it->second;
delete texture;
}
textures_.clear();
}
TextureView* TextureCache::FetchTexture(
uint32_t address, const xenos::xe_gpu_texture_fetch_t& fetch) {
auto it = textures_.find(address);
if (it == textures_.end()) {
// Texture not found.
const uint8_t* host_address = memory_->Translate(address);
auto texture = CreateTexture(address, host_address, fetch);
if (!texture) {
return nullptr;
}
textures_.insert({ address, texture });
return texture->Fetch(fetch);
} else {
// Texture found.
return it->second->Fetch(fetch);
}
}

View File

@ -1,50 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_TEXTURE_CACHE_H_
#define XENIA_GPU_TEXTURE_CACHE_H_
#include <xenia/core.h>
#include <xenia/gpu/texture.h>
#include <xenia/gpu/xenos/xenos.h>
namespace xe {
namespace gpu {
// TODO(benvanik): overlapping textures.
// TODO(benvanik): multiple textures (differing formats/etc) per address.
class TextureCache {
public:
TextureCache(Memory* memory);
virtual ~TextureCache();
Memory* memory() const { return memory_; }
TextureView* FetchTexture(
uint32_t address, const xenos::xe_gpu_texture_fetch_t& fetch);
protected:
virtual Texture* CreateTexture(
uint32_t address, const uint8_t* host_address,
const xenos::xe_gpu_texture_fetch_t& fetch) = 0;
Memory* memory_;
// Mapped by guest address.
std::unordered_map<uint32_t, Texture*> textures_;
};
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_TEXTURE_CACHE_H_

View File

@ -9,9 +9,342 @@
#include <xenia/gpu/texture_resource.h>
#include <xenia/gpu/xenos/ucode.h>
#include <xenia/gpu/xenos/xenos.h>
using namespace std;
using namespace xe;
using namespace xe::gpu;
using namespace xe::gpu::xenos;
bool TextureResource::Info::Prepare(const xe_gpu_texture_fetch_t& fetch,
Info& info) {
// http://msdn.microsoft.com/en-us/library/windows/desktop/cc308051(v=vs.85).aspx
// a2xx_sq_surfaceformat
info.dimension = (TextureDimension)fetch.dimension;
switch (info.dimension) {
case TEXTURE_DIMENSION_1D:
info.width = fetch.size_1d.width;
break;
case TEXTURE_DIMENSION_2D:
info.width = fetch.size_2d.width;
info.height = fetch.size_2d.height;
break;
case TEXTURE_DIMENSION_3D:
case TEXTURE_DIMENSION_CUBE:
info.width = fetch.size_3d.width;
info.height = fetch.size_3d.height;
info.depth = fetch.size_3d.depth;
break;
}
info.block_size = 0;
info.texel_pitch = 0;
info.endianness = (XE_GPU_ENDIAN)fetch.endianness;
info.is_tiled = fetch.tiled;
info.is_compressed = false;
info.input_length = 0;
info.format = DXGI_FORMAT_UNKNOWN;
switch (fetch.format) {
case FMT_8:
switch (fetch.swizzle) {
case XE_GPU_SWIZZLE_RRR1:
info.format = DXGI_FORMAT_R8_UNORM;
break;
case XE_GPU_SWIZZLE_000R:
info.format = DXGI_FORMAT_A8_UNORM;
break;
default:
XELOGW("D3D11: unhandled swizzle for FMT_8");
info.format = DXGI_FORMAT_A8_UNORM;
break;
}
info.block_size = 1;
info.texel_pitch = 1;
break;
case FMT_1_5_5_5:
switch (fetch.swizzle) {
case XE_GPU_SWIZZLE_BGRA:
info.format = DXGI_FORMAT_B5G5R5A1_UNORM;
break;
default:
XELOGW("D3D11: unhandled swizzle for FMT_1_5_5_5");
info.format = DXGI_FORMAT_B5G5R5A1_UNORM;
break;
}
info.block_size = 1;
info.texel_pitch = 2;
break;
case FMT_8_8_8_8:
switch (fetch.swizzle) {
case XE_GPU_SWIZZLE_RGBA:
info.format = DXGI_FORMAT_R8G8B8A8_UNORM;
break;
case XE_GPU_SWIZZLE_BGRA:
info.format = DXGI_FORMAT_B8G8R8A8_UNORM;
break;
case XE_GPU_SWIZZLE_RGB1:
info.format = DXGI_FORMAT_R8G8B8A8_UNORM; // ?
break;
case XE_GPU_SWIZZLE_BGR1:
info.format = DXGI_FORMAT_B8G8R8X8_UNORM;
break;
default:
XELOGW("D3D11: unhandled swizzle for FMT_8_8_8_8");
info.format = DXGI_FORMAT_R8G8B8A8_UNORM;
break;
}
info.block_size = 1;
info.texel_pitch = 4;
break;
case FMT_4_4_4_4:
switch (fetch.swizzle) {
case XE_GPU_SWIZZLE_BGRA:
info.format = DXGI_FORMAT_B4G4R4A4_UNORM; // only supported on Windows 8+
break;
default:
XELOGW("D3D11: unhandled swizzle for FMT_4_4_4_4");
info.format = DXGI_FORMAT_B4G4R4A4_UNORM; // only supported on Windows 8+
break;
}
info.block_size = 1;
info.texel_pitch = 2;
break;
case FMT_16_16_16_16_FLOAT:
switch (fetch.swizzle) {
case XE_GPU_SWIZZLE_RGBA:
info.format = DXGI_FORMAT_R16G16B16A16_FLOAT;
break;
default:
XELOGW("D3D11: unhandled swizzle for FMT_16_16_16_16_FLOAT");
info.format = DXGI_FORMAT_R16G16B16A16_FLOAT;
break;
}
info.block_size = 1;
info.texel_pitch = 8;
break;
case FMT_32_FLOAT:
switch (fetch.swizzle) {
case XE_GPU_SWIZZLE_R111:
info.format = DXGI_FORMAT_R32_FLOAT;
break;
default:
XELOGW("D3D11: unhandled swizzle for FMT_32_FLOAT");
info.format = DXGI_FORMAT_R32_FLOAT;
break;
}
info.block_size = 1;
info.texel_pitch = 4;
break;
case FMT_DXT1:
info.format = DXGI_FORMAT_BC1_UNORM;
info.block_size = 4;
info.texel_pitch = 8;
info.is_compressed = true;
break;
case FMT_DXT2_3:
case FMT_DXT4_5:
info.format = (fetch.format == FMT_DXT4_5 ? DXGI_FORMAT_BC3_UNORM : DXGI_FORMAT_BC2_UNORM);
info.block_size = 4;
info.texel_pitch = 16;
info.is_compressed = true;
break;
case FMT_1_REVERSE:
case FMT_1:
case FMT_5_6_5:
case FMT_6_5_5:
case FMT_2_10_10_10:
case FMT_8_A:
case FMT_8_B:
case FMT_8_8:
case FMT_Cr_Y1_Cb_Y0:
case FMT_Y1_Cr_Y0_Cb:
case FMT_5_5_5_1:
case FMT_8_8_8_8_A:
case FMT_10_11_11:
case FMT_11_11_10:
case FMT_24_8:
case FMT_24_8_FLOAT:
case FMT_16:
case FMT_16_16:
case FMT_16_16_16_16:
case FMT_16_EXPAND:
case FMT_16_16_EXPAND:
case FMT_16_16_16_16_EXPAND:
case FMT_16_FLOAT:
case FMT_16_16_FLOAT:
case FMT_32:
case FMT_32_32:
case FMT_32_32_32_32:
case FMT_32_32_FLOAT:
case FMT_32_32_32_32_FLOAT:
case FMT_32_AS_8:
case FMT_32_AS_8_8:
case FMT_16_MPEG:
case FMT_16_16_MPEG:
case FMT_8_INTERLACED:
case FMT_32_AS_8_INTERLACED:
case FMT_32_AS_8_8_INTERLACED:
case FMT_16_INTERLACED:
case FMT_16_MPEG_INTERLACED:
case FMT_16_16_MPEG_INTERLACED:
case FMT_DXN:
case FMT_8_8_8_8_AS_16_16_16_16:
case FMT_DXT1_AS_16_16_16_16:
case FMT_DXT2_3_AS_16_16_16_16:
case FMT_DXT4_5_AS_16_16_16_16:
case FMT_2_10_10_10_AS_16_16_16_16:
case FMT_10_11_11_AS_16_16_16_16:
case FMT_11_11_10_AS_16_16_16_16:
case FMT_32_32_32_FLOAT:
case FMT_DXT3A:
case FMT_DXT5A:
case FMT_CTX1:
case FMT_DXT3A_AS_1_1_1_1:
info.format = DXGI_FORMAT_UNKNOWN;
break;
}
if (info.format == DXGI_FORMAT_UNKNOWN) {
return false;
}
// Must be called here when we know the format.
switch (info.dimension) {
case TEXTURE_DIMENSION_1D:
info.CalculateTextureSizes1D(fetch);
break;
case TEXTURE_DIMENSION_2D:
info.CalculateTextureSizes2D(fetch);
break;
case TEXTURE_DIMENSION_3D:
// TODO(benvanik): calculate size.
return false;
case TEXTURE_DIMENSION_CUBE:
// TODO(benvanik): calculate size.
return false;
}
return true;
}
void TextureResource::Info::CalculateTextureSizes1D(
const xe_gpu_texture_fetch_t& fetch) {
// ?
size_1d.width = fetch.size_1d.width;
}
void TextureResource::Info::CalculateTextureSizes2D(
const xe_gpu_texture_fetch_t& fetch) {
size_2d.logical_width = 1 + fetch.size_2d.width;
size_2d.logical_height = 1 + fetch.size_2d.height;
size_2d.block_width = size_2d.logical_width / block_size;
size_2d.block_height = size_2d.logical_height / block_size;
if (!is_compressed) {
// must be 32x32 but also must have a pitch that is a multiple of 256 bytes
uint32_t bytes_per_block = block_size * block_size * texel_pitch;
uint32_t width_multiple = 32;
if (bytes_per_block) {
uint32_t minimum_multiple = 256 / bytes_per_block;
if (width_multiple < minimum_multiple) {
width_multiple = minimum_multiple;
}
}
size_2d.input_width = XEROUNDUP(size_2d.logical_width, width_multiple);
size_2d.input_height = XEROUNDUP(size_2d.logical_height, 32);
size_2d.output_width = size_2d.logical_width;
size_2d.output_height = size_2d.logical_height;
} else {
// must be 128x128
size_2d.input_width = XEROUNDUP(size_2d.logical_width, 128);
size_2d.input_height = XEROUNDUP(size_2d.logical_height, 128);
size_2d.output_width = XENEXTPOW2(size_2d.logical_width);
size_2d.output_height = XENEXTPOW2(size_2d.logical_height);
}
size_2d.logical_pitch = (size_2d.logical_width / block_size) * texel_pitch;
size_2d.input_pitch = (size_2d.input_width / block_size) * texel_pitch;
if (!is_tiled) {
input_length = size_2d.block_height * size_2d.logical_pitch;
} else {
input_length = size_2d.block_height * size_2d.logical_pitch; // ?
}
}
TextureResource::TextureResource(const MemoryRange& memory_range,
const Info& info)
: PagedResource(memory_range),
info_(info) {
}
TextureResource::~TextureResource() {
}
int TextureResource::Prepare() {
if (!handle()) {
if (CreateHandle()) {
XELOGE("Unable to create texture handle");
return 1;
}
}
if (!dirtied_) {
return 0;
}
dirtied_ = false;
// pass dirty regions?
return InvalidateRegion(memory_range_);
}
void TextureResource::TextureSwap(uint8_t* dest, const uint8_t* src,
uint32_t pitch) const {
// TODO(benvanik): optimize swapping paths.
switch (info_.endianness) {
case XE_GPU_ENDIAN_8IN16:
for (uint32_t i = 0; i < pitch; i += 2, src += 2, dest += 2) {
*(uint16_t*)dest = XESWAP16(*(uint16_t*)src);
}
break;
case XE_GPU_ENDIAN_8IN32: // Swap bytes.
for (uint32_t i = 0; i < pitch; i += 4, src += 4, dest += 4) {
*(uint32_t*)dest = XESWAP32(*(uint32_t*)src);
}
break;
case XE_GPU_ENDIAN_16IN32: // Swap half words.
for (uint32_t i = 0; i < pitch; i += 4, src += 4, dest += 4) {
uint32_t value = *(uint32_t*)src;
*(uint32_t*)dest = ((value >> 16) & 0xFFFF) | (value << 16);
}
break;
default:
case XE_GPU_ENDIAN_NONE:
memcpy(dest, src, pitch);
break;
}
}
// https://code.google.com/p/crunch/source/browse/trunk/inc/crn_decomp.h#4104
uint32_t TextureResource::TiledOffset2DOuter(uint32_t y, uint32_t width,
uint32_t log_bpp) const {
uint32_t macro = ((y >> 5) * (width >> 5)) << (log_bpp + 7);
uint32_t micro = ((y & 6) << 2) << log_bpp;
return macro +
((micro & ~15) << 1) +
(micro & 15) +
((y & 8) << (3 + log_bpp)) +
((y & 1) << 4);
}
uint32_t TextureResource::TiledOffset2DInner(uint32_t x, uint32_t y, uint32_t bpp,
uint32_t base_offset) const {
uint32_t macro = (x >> 5) << (bpp + 7);
uint32_t micro = (x & 7) << bpp;
uint32_t offset = base_offset + (macro + ((micro & ~15) << 1) + (micro & 15));
return ((offset & ~511) << 3) + ((offset & 448) << 2) + (offset & 63) +
((y & 16) << 7) + (((((y & 8) >> 2) + (x >> 3)) & 3) << 6);
}

View File

@ -10,7 +10,7 @@
#ifndef XENIA_GPU_TEXTURE_RESOURCE_H_
#define XENIA_GPU_TEXTURE_RESOURCE_H_
#include <xenia/core.h>
#include <xenia/gpu/resource.h>
#include <xenia/gpu/xenos/xenos.h>
// TODO(benvanik): replace DXGI constants with xenia constants.
@ -21,8 +21,85 @@ namespace xe {
namespace gpu {
class TextureResource : public Resource {
enum TextureDimension {
TEXTURE_DIMENSION_1D = 0,
TEXTURE_DIMENSION_2D = 1,
TEXTURE_DIMENSION_3D = 2,
TEXTURE_DIMENSION_CUBE = 3,
};
class TextureResource : public PagedResource {
public:
struct Info {
TextureDimension dimension;
uint32_t width;
uint32_t height;
uint32_t depth;
uint32_t block_size;
uint32_t texel_pitch;
xenos::XE_GPU_ENDIAN endianness;
bool is_tiled;
bool is_compressed;
uint32_t input_length;
// TODO(benvanik): replace with our own constants.
DXGI_FORMAT format;
union {
struct {
uint32_t width;
} size_1d;
struct {
uint32_t logical_width;
uint32_t logical_height;
uint32_t block_width;
uint32_t block_height;
uint32_t input_width;
uint32_t input_height;
uint32_t output_width;
uint32_t output_height;
uint32_t logical_pitch;
uint32_t input_pitch;
} size_2d;
struct {
} size_3d;
struct {
} size_cube;
};
static bool Prepare(const xenos::xe_gpu_texture_fetch_t& fetch,
Info& out_info);
private:
void CalculateTextureSizes1D(const xenos::xe_gpu_texture_fetch_t& fetch);
void CalculateTextureSizes2D(const xenos::xe_gpu_texture_fetch_t& fetch);
};
TextureResource(const MemoryRange& memory_range,
const Info& info);
~TextureResource() override;
const Info& info() const { return info_; }
bool Equals(const void* info_ptr, size_t info_length) override {
return info_length == sizeof(Info) &&
memcmp(info_ptr, &info_, info_length) == 0;
}
virtual int Prepare();
protected:
virtual int CreateHandle() = 0;
virtual int InvalidateRegion(const MemoryRange& memory_range) = 0;
void TextureSwap(uint8_t* dest, const uint8_t* src, uint32_t pitch) const;
uint32_t TiledOffset2DOuter(uint32_t y, uint32_t width,
uint32_t log_bpp) const;
uint32_t TiledOffset2DInner(uint32_t x, uint32_t y, uint32_t bpp,
uint32_t base_offset) const;
Info info_;
};

View File

@ -1,51 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2013 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_XENOS_REGISTERS_H_
#define XENIA_GPU_XENOS_REGISTERS_H_
#include <xenia/core.h>
namespace xe {
namespace gpu {
namespace xenos {
static const uint32_t kXEGpuRegisterCount = 0x5003;
enum Registers {
#define XE_GPU_REGISTER(index, type, name) \
XE_GPU_REG_##name = index,
#include <xenia/gpu/xenos/register_table.inc>
#undef XE_GPU_REGISTER
};
const char* GetRegisterName(uint32_t index);
union RegisterValue {
uint32_t u32;
float f32;
};
struct RegisterFile {
RegisterValue values[kXEGpuRegisterCount];
};
} // namespace xenos
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_XENOS_REGISTERS_H_

View File

@ -3,8 +3,6 @@
'sources': [
'packets.h',
'register_table.inc',
'registers.cc',
'registers.h',
'ucode.h',
'ucode_disassembler.cc',
'ucode_disassembler.h',