Initial refactoring of gpu/.
Runs too fast - now there are ringbuffer wrapping issues.
This commit is contained in:
parent
4072640a64
commit
295910c3d8
|
@ -1,42 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include <xenia/gpu/buffer.h>
|
||||
|
||||
#include <xenia/gpu/xenos/ucode_disassembler.h>
|
||||
|
||||
|
||||
using namespace xe;
|
||||
using namespace xe::gpu;
|
||||
using namespace xe::gpu::xenos;
|
||||
|
||||
|
||||
Buffer::Buffer(
|
||||
const uint8_t* src_ptr, size_t length) :
|
||||
src_(src_ptr), length_(length) {
|
||||
}
|
||||
|
||||
Buffer::~Buffer() {
|
||||
}
|
||||
|
||||
IndexBuffer::IndexBuffer(const IndexBufferInfo& info,
|
||||
const uint8_t* src_ptr, size_t length)
|
||||
: Buffer(src_ptr, length),
|
||||
info_(info) {
|
||||
}
|
||||
|
||||
IndexBuffer::~IndexBuffer() {}
|
||||
|
||||
VertexBuffer::VertexBuffer(const VertexBufferInfo& info,
|
||||
const uint8_t* src_ptr, size_t length)
|
||||
: Buffer(src_ptr, length),
|
||||
info_(info) {
|
||||
}
|
||||
|
||||
VertexBuffer::~VertexBuffer() {}
|
|
@ -1,91 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_GPU_BUFFER_H_
|
||||
#define XENIA_GPU_BUFFER_H_
|
||||
|
||||
#include <xenia/core.h>
|
||||
#include <xenia/gpu/xenos/ucode.h>
|
||||
#include <xenia/gpu/xenos/xenos.h>
|
||||
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
|
||||
|
||||
class Buffer {
|
||||
public:
|
||||
Buffer(const uint8_t* src_ptr, size_t length);
|
||||
virtual ~Buffer();
|
||||
|
||||
const uint8_t* src() const { return src_; }
|
||||
size_t length() const { return length_; }
|
||||
uint64_t hash() const { return hash_; }
|
||||
|
||||
virtual bool FetchNew(uint64_t hash) = 0;
|
||||
virtual bool FetchDirty(uint64_t hash) = 0;
|
||||
|
||||
protected:
|
||||
const uint8_t* src_;
|
||||
size_t length_;
|
||||
uint64_t hash_;
|
||||
};
|
||||
|
||||
|
||||
struct IndexBufferInfo {
|
||||
bool index_32bit;
|
||||
uint32_t index_count;
|
||||
uint32_t index_size;
|
||||
uint32_t endianness;
|
||||
};
|
||||
|
||||
|
||||
class IndexBuffer : public Buffer {
|
||||
public:
|
||||
IndexBuffer(const IndexBufferInfo& info,
|
||||
const uint8_t* src_ptr, size_t length);
|
||||
virtual ~IndexBuffer();
|
||||
|
||||
protected:
|
||||
IndexBufferInfo info_;
|
||||
};
|
||||
|
||||
|
||||
struct VertexBufferLayout {
|
||||
uint32_t stride_words;
|
||||
uint32_t element_count;
|
||||
struct {
|
||||
uint32_t format;
|
||||
uint32_t offset_words;
|
||||
uint32_t size_words;
|
||||
} elements[16];
|
||||
};
|
||||
|
||||
struct VertexBufferInfo {
|
||||
VertexBufferLayout layout;
|
||||
};
|
||||
|
||||
|
||||
class VertexBuffer : public Buffer {
|
||||
public:
|
||||
VertexBuffer(const VertexBufferInfo& info,
|
||||
const uint8_t* src_ptr, size_t length);
|
||||
virtual ~VertexBuffer();
|
||||
|
||||
protected:
|
||||
VertexBufferInfo info_;
|
||||
};
|
||||
|
||||
|
||||
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
||||
|
||||
#endif // XENIA_GPU_BUFFER_H_
|
|
@ -1,79 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include <xenia/gpu/buffer_cache.h>
|
||||
|
||||
#include <xenia/gpu/buffer.h>
|
||||
|
||||
|
||||
using namespace std;
|
||||
using namespace xe;
|
||||
using namespace xe::gpu;
|
||||
using namespace xe::gpu::xenos;
|
||||
|
||||
|
||||
BufferCache::BufferCache() {
|
||||
}
|
||||
|
||||
BufferCache::~BufferCache() {
|
||||
Clear();
|
||||
}
|
||||
|
||||
IndexBuffer* BufferCache::FetchIndexBuffer(
|
||||
const IndexBufferInfo& info,
|
||||
const uint8_t* src_ptr, size_t length) {
|
||||
size_t key = hash_combine(info.endianness, info.index_32bit, info.index_count, info.index_size);
|
||||
size_t hash = xe_hash64(src_ptr, length);
|
||||
auto it = index_buffer_map_.find(key);
|
||||
if (it != index_buffer_map_.end()) {
|
||||
if (hash == it->second->hash()) {
|
||||
return it->second;
|
||||
} else {
|
||||
return it->second->FetchDirty(hash) ? it->second : nullptr;
|
||||
}
|
||||
} else {
|
||||
auto buffer = CreateIndexBuffer(info, src_ptr, length);
|
||||
index_buffer_map_.insert({ key, buffer });
|
||||
if (!buffer->FetchNew(hash)) {
|
||||
return nullptr;
|
||||
}
|
||||
return buffer;
|
||||
}
|
||||
}
|
||||
|
||||
VertexBuffer* BufferCache::FetchVertexBuffer(
|
||||
const VertexBufferInfo& info,
|
||||
const uint8_t* src_ptr, size_t length) {
|
||||
size_t key = reinterpret_cast<size_t>(src_ptr);
|
||||
size_t hash = xe_hash64(src_ptr, length);
|
||||
auto it = vertex_buffer_map_.find(key);
|
||||
if (it != vertex_buffer_map_.end()) {
|
||||
if (hash == it->second->hash()) {
|
||||
return it->second;
|
||||
} else {
|
||||
return it->second->FetchDirty(hash) ? it->second : nullptr;
|
||||
}
|
||||
} else {
|
||||
auto buffer = CreateVertexBuffer(info, src_ptr, length);
|
||||
vertex_buffer_map_.insert({ key, buffer });
|
||||
if (!buffer->FetchNew(hash)) {
|
||||
return nullptr;
|
||||
}
|
||||
return buffer;
|
||||
}
|
||||
}
|
||||
|
||||
void BufferCache::Clear() {
|
||||
for (auto it = index_buffer_map_.begin();
|
||||
it != index_buffer_map_.end(); ++it) {
|
||||
auto buffer = it->second;
|
||||
delete buffer;
|
||||
}
|
||||
index_buffer_map_.clear();
|
||||
}
|
|
@ -1,55 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_GPU_BUFFER_CACHE_H_
|
||||
#define XENIA_GPU_BUFFER_CACHE_H_
|
||||
|
||||
#include <xenia/core.h>
|
||||
#include <xenia/gpu/buffer.h>
|
||||
#include <xenia/gpu/xenos/xenos.h>
|
||||
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
|
||||
|
||||
class BufferCache {
|
||||
public:
|
||||
BufferCache();
|
||||
virtual ~BufferCache();
|
||||
|
||||
IndexBuffer* FetchIndexBuffer(
|
||||
const IndexBufferInfo& info,
|
||||
const uint8_t* src_ptr, size_t length);
|
||||
|
||||
VertexBuffer* FetchVertexBuffer(
|
||||
const VertexBufferInfo& info,
|
||||
const uint8_t* src_ptr, size_t length);
|
||||
|
||||
void Clear();
|
||||
|
||||
protected:
|
||||
virtual IndexBuffer* CreateIndexBuffer(
|
||||
const IndexBufferInfo& info,
|
||||
const uint8_t* src_ptr, size_t length) = 0;
|
||||
virtual VertexBuffer* CreateVertexBuffer(
|
||||
const VertexBufferInfo& info,
|
||||
const uint8_t* src_ptr, size_t length) = 0;
|
||||
|
||||
private:
|
||||
std::unordered_map<uint64_t, IndexBuffer*> index_buffer_map_;
|
||||
std::unordered_map<uint64_t, VertexBuffer*> vertex_buffer_map_;
|
||||
};
|
||||
|
||||
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
||||
|
||||
#endif // XENIA_GPU_BUFFER_CACHE_H_
|
|
@ -15,3 +15,42 @@ using namespace xe;
|
|||
using namespace xe::gpu;
|
||||
using namespace xe::gpu::xenos;
|
||||
|
||||
|
||||
BufferResource::BufferResource(const MemoryRange& memory_range)
|
||||
: PagedResource(memory_range) {
|
||||
}
|
||||
|
||||
BufferResource::~BufferResource() = default;
|
||||
|
||||
int BufferResource::Prepare() {
|
||||
if (!handle()) {
|
||||
if (CreateHandle()) {
|
||||
XELOGE("Unable to create buffer handle");
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (!dirtied_) {
|
||||
return 0;
|
||||
}
|
||||
dirtied_ = false;
|
||||
|
||||
// pass dirty regions?
|
||||
return InvalidateRegion(memory_range_);
|
||||
}
|
||||
|
||||
IndexBufferResource::IndexBufferResource(const MemoryRange& memory_range,
|
||||
const Info& info)
|
||||
: BufferResource(memory_range),
|
||||
info_(info) {
|
||||
}
|
||||
|
||||
IndexBufferResource::~IndexBufferResource() = default;
|
||||
|
||||
VertexBufferResource::VertexBufferResource(const MemoryRange& memory_range,
|
||||
const Info& info)
|
||||
: BufferResource(memory_range),
|
||||
info_(info) {
|
||||
}
|
||||
|
||||
VertexBufferResource::~VertexBufferResource() = default;
|
||||
|
|
|
@ -10,7 +10,8 @@
|
|||
#ifndef XENIA_GPU_BUFFER_RESOURCE_H_
|
||||
#define XENIA_GPU_BUFFER_RESOURCE_H_
|
||||
|
||||
#include <xenia/core.h>
|
||||
#include <xenia/gpu/resource.h>
|
||||
#include <xenia/gpu/xenos/ucode.h>
|
||||
#include <xenia/gpu/xenos/xenos.h>
|
||||
|
||||
|
||||
|
@ -18,8 +19,76 @@ namespace xe {
|
|||
namespace gpu {
|
||||
|
||||
|
||||
class BufferResource : public Resource {
|
||||
class BufferResource : public PagedResource {
|
||||
public:
|
||||
BufferResource(const MemoryRange& memory_range);
|
||||
~BufferResource() override;
|
||||
|
||||
virtual int Prepare();
|
||||
|
||||
protected:
|
||||
virtual int CreateHandle() = 0;
|
||||
virtual int InvalidateRegion(const MemoryRange& memory_range) = 0;
|
||||
};
|
||||
|
||||
|
||||
enum IndexFormat {
|
||||
INDEX_FORMAT_16BIT = 0,
|
||||
INDEX_FORMAT_32BIT = 1,
|
||||
};
|
||||
|
||||
class IndexBufferResource : public BufferResource {
|
||||
public:
|
||||
struct Info {
|
||||
IndexFormat format;
|
||||
xenos::XE_GPU_ENDIAN endianness;
|
||||
};
|
||||
|
||||
IndexBufferResource(const MemoryRange& memory_range,
|
||||
const Info& info);
|
||||
~IndexBufferResource() override;
|
||||
|
||||
const Info& info() const { return info_; }
|
||||
|
||||
bool Equals(const void* info_ptr, size_t info_length) override {
|
||||
return info_length == sizeof(Info) &&
|
||||
memcmp(info_ptr, &info_, info_length) == 0;
|
||||
}
|
||||
|
||||
protected:
|
||||
Info info_;
|
||||
};
|
||||
|
||||
|
||||
class VertexBufferResource : public BufferResource {
|
||||
public:
|
||||
struct DeclElement {
|
||||
xenos::instr_fetch_vtx_t vtx_fetch;
|
||||
uint32_t format;
|
||||
uint32_t offset_words;
|
||||
uint32_t size_words;
|
||||
bool is_signed;
|
||||
bool is_normalized;
|
||||
};
|
||||
struct Info {
|
||||
uint32_t stride_words;
|
||||
uint32_t element_count;
|
||||
DeclElement elements[16];
|
||||
};
|
||||
|
||||
VertexBufferResource(const MemoryRange& memory_range,
|
||||
const Info& info);
|
||||
~VertexBufferResource() override;
|
||||
|
||||
const Info& info() const { return info_; }
|
||||
|
||||
bool Equals(const void* info_ptr, size_t info_length) override {
|
||||
return info_length == sizeof(Info) &&
|
||||
memcmp(info_ptr, &info_, info_length) == 0;
|
||||
}
|
||||
|
||||
protected:
|
||||
Info info_;
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -9,9 +9,782 @@
|
|||
|
||||
#include <xenia/gpu/command_processor.h>
|
||||
|
||||
#include <xenia/gpu/gpu-private.h>
|
||||
#include <xenia/gpu/graphics_driver.h>
|
||||
#include <xenia/gpu/graphics_system.h>
|
||||
#include <xenia/gpu/xenos/packets.h>
|
||||
|
||||
|
||||
using namespace std;
|
||||
using namespace xe;
|
||||
using namespace xe::gpu;
|
||||
using namespace xe::gpu::xenos;
|
||||
|
||||
|
||||
#define XETRACECP(fmt, ...) if (FLAGS_trace_ring_buffer) XELOGGPU(fmt, ##__VA_ARGS__)
|
||||
|
||||
|
||||
CommandProcessor::CommandProcessor(
|
||||
GraphicsSystem* graphics_system, Memory* memory) :
|
||||
graphics_system_(graphics_system), memory_(memory), driver_(0) {
|
||||
write_ptr_index_event_ = CreateEvent(NULL, FALSE, FALSE, NULL);
|
||||
|
||||
primary_buffer_ptr_ = 0;
|
||||
primary_buffer_size_ = 0;
|
||||
read_ptr_index_ = 0;
|
||||
read_ptr_update_freq_ = 0;
|
||||
read_ptr_writeback_ptr_ = 0;
|
||||
write_ptr_index_ = 0;
|
||||
write_ptr_max_index_ = 0;
|
||||
|
||||
LARGE_INTEGER perf_counter;
|
||||
QueryPerformanceCounter(&perf_counter);
|
||||
time_base_ = perf_counter.QuadPart;
|
||||
counter_ = 0;
|
||||
}
|
||||
|
||||
CommandProcessor::~CommandProcessor() {
|
||||
SetEvent(write_ptr_index_event_);
|
||||
CloseHandle(write_ptr_index_event_);
|
||||
}
|
||||
|
||||
uint64_t CommandProcessor::QueryTime() {
|
||||
LARGE_INTEGER perf_counter;
|
||||
QueryPerformanceCounter(&perf_counter);
|
||||
return perf_counter.QuadPart - time_base_;
|
||||
}
|
||||
|
||||
void CommandProcessor::Initialize(GraphicsDriver* driver,
|
||||
uint32_t ptr, uint32_t page_count) {
|
||||
driver_ = driver;
|
||||
primary_buffer_ptr_ = ptr;
|
||||
// Not sure this is correct, but it's a way to take the page_count back to
|
||||
// the number of bytes allocated by the physical alloc.
|
||||
uint32_t original_size = 1 << (0x1C - page_count - 1);
|
||||
primary_buffer_size_ = original_size;
|
||||
read_ptr_index_ = 0;
|
||||
|
||||
// Tell the driver what to use for translation.
|
||||
driver_->set_address_translation(primary_buffer_ptr_ & ~0x1FFFFFFF);
|
||||
}
|
||||
|
||||
void CommandProcessor::EnableReadPointerWriteBack(uint32_t ptr,
|
||||
uint32_t block_size) {
|
||||
// CP_RB_RPTR_ADDR Ring Buffer Read Pointer Address 0x70C
|
||||
// ptr = RB_RPTR_ADDR, pointer to write back the address to.
|
||||
read_ptr_writeback_ptr_ = (primary_buffer_ptr_ & ~0x1FFFFFFF) + ptr;
|
||||
// CP_RB_CNTL Ring Buffer Control 0x704
|
||||
// block_size = RB_BLKSZ, number of quadwords read between updates of the
|
||||
// read pointer.
|
||||
read_ptr_update_freq_ = (uint32_t)pow(2.0, (double)block_size) / 4;
|
||||
}
|
||||
|
||||
void CommandProcessor::UpdateWritePointer(uint32_t value) {
|
||||
write_ptr_max_index_ = MAX(write_ptr_max_index_, value);
|
||||
write_ptr_index_ = value;
|
||||
SetEvent(write_ptr_index_event_);
|
||||
}
|
||||
|
||||
void CommandProcessor::Pump() {
|
||||
uint8_t* p = memory_->membase();
|
||||
|
||||
while (write_ptr_index_ == 0xBAADF00D ||
|
||||
read_ptr_index_ == write_ptr_index_) {
|
||||
// Check if the pointer has moved.
|
||||
// We wait a short bit here to yield time. Since we are also running the
|
||||
// main window display we don't want to pause too long, though.
|
||||
// YieldProcessor();
|
||||
const int wait_time_ms = 1;
|
||||
if (WaitForSingleObject(write_ptr_index_event_,
|
||||
wait_time_ms) == WAIT_TIMEOUT) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Bring local so we don't have to worry about them changing out from under
|
||||
// us.
|
||||
uint32_t write_ptr_index = write_ptr_index_;
|
||||
uint32_t write_ptr_max_index = write_ptr_max_index_;
|
||||
if (read_ptr_index_ == write_ptr_index) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Process the new commands.
|
||||
XETRACECP("Command processor thread work");
|
||||
|
||||
// Execute. Note that we handle wraparound transparently.
|
||||
ExecutePrimaryBuffer(read_ptr_index_, write_ptr_index);
|
||||
read_ptr_index_ = write_ptr_index;
|
||||
|
||||
// TODO(benvanik): use read_ptr_update_freq_ and only issue after moving
|
||||
// that many indices.
|
||||
if (read_ptr_writeback_ptr_) {
|
||||
XESETUINT32BE(p + read_ptr_writeback_ptr_, read_ptr_index_);
|
||||
}
|
||||
}
|
||||
|
||||
void CommandProcessor::ExecutePrimaryBuffer(
|
||||
uint32_t start_index, uint32_t end_index) {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
// Adjust pointer base.
|
||||
uint32_t ptr = primary_buffer_ptr_ + start_index * 4;
|
||||
ptr = (primary_buffer_ptr_ & ~0x1FFFFFFF) | (ptr & 0x1FFFFFFF);
|
||||
uint32_t end_ptr = primary_buffer_ptr_ + end_index * 4;
|
||||
end_ptr = (primary_buffer_ptr_ & ~0x1FFFFFFF) | (end_ptr & 0x1FFFFFFF);
|
||||
|
||||
XETRACECP("[%.8X] ExecutePrimaryBuffer(%dw -> %dw)",
|
||||
ptr, start_index, end_index);
|
||||
|
||||
// Execute commands!
|
||||
PacketArgs args;
|
||||
args.ptr = ptr;
|
||||
args.base_ptr = primary_buffer_ptr_;
|
||||
args.max_address = primary_buffer_ptr_ + primary_buffer_size_;
|
||||
args.ptr_mask = (primary_buffer_size_ / 4) - 1;
|
||||
uint32_t n = 0;
|
||||
while (args.ptr != end_ptr) {
|
||||
n += ExecutePacket(args);
|
||||
XEASSERT(args.ptr < args.max_address);
|
||||
}
|
||||
if (end_index > start_index) {
|
||||
XEASSERT(n == (end_index - start_index));
|
||||
}
|
||||
|
||||
XETRACECP(" ExecutePrimaryBuffer End");
|
||||
}
|
||||
|
||||
void CommandProcessor::ExecuteIndirectBuffer(uint32_t ptr, uint32_t length) {
|
||||
XETRACECP("[%.8X] ExecuteIndirectBuffer(%dw)", ptr, length);
|
||||
|
||||
// Execute commands!
|
||||
PacketArgs args;
|
||||
args.ptr = ptr;
|
||||
args.base_ptr = ptr;
|
||||
args.max_address = ptr + length * 4;
|
||||
args.ptr_mask = 0;
|
||||
for (uint32_t n = 0; n < length;) {
|
||||
n += ExecutePacket(args);
|
||||
XEASSERT(n <= length);
|
||||
}
|
||||
|
||||
XETRACECP(" ExecuteIndirectBuffer End");
|
||||
}
|
||||
|
||||
#define LOG_DATA(count) \
|
||||
for (uint32_t __m = 0; __m < count; __m++) { \
|
||||
XETRACECP("[%.8X] %.8X", \
|
||||
packet_ptr + (1 + __m) * 4, \
|
||||
XEGETUINT32BE(packet_base + 1 * 4 + __m * 4)); \
|
||||
}
|
||||
|
||||
void CommandProcessor::AdvancePtr(PacketArgs& args, uint32_t n) {
|
||||
args.ptr = args.ptr + n * 4;
|
||||
if (args.ptr_mask) {
|
||||
args.ptr =
|
||||
args.base_ptr + (((args.ptr - args.base_ptr) / 4) & args.ptr_mask) * 4;
|
||||
}
|
||||
}
|
||||
#define ADVANCE_PTR(n) AdvancePtr(args, n)
|
||||
#define PEEK_PTR() \
|
||||
XEGETUINT32BE(p + args.ptr)
|
||||
#define READ_PTR() \
|
||||
XEGETUINT32BE(p + args.ptr); ADVANCE_PTR(1);
|
||||
|
||||
uint32_t CommandProcessor::ExecutePacket(PacketArgs& args) {
|
||||
uint8_t* p = memory_->membase();
|
||||
RegisterFile* regs = driver_->register_file();
|
||||
|
||||
uint32_t packet_ptr = args.ptr;
|
||||
const uint8_t* packet_base = p + packet_ptr;
|
||||
const uint32_t packet = PEEK_PTR();
|
||||
ADVANCE_PTR(1);
|
||||
const uint32_t packet_type = packet >> 30;
|
||||
if (packet == 0) {
|
||||
XETRACECP("[%.8X] Packet(%.8X): 0?",
|
||||
packet_ptr, packet);
|
||||
return 1;
|
||||
}
|
||||
|
||||
switch (packet_type) {
|
||||
case 0x00:
|
||||
{
|
||||
// Type-0 packet.
|
||||
// Write count registers in sequence to the registers starting at
|
||||
// (base_index << 2).
|
||||
XETRACECP("[%.8X] Packet(%.8X): set registers:",
|
||||
packet_ptr, packet);
|
||||
uint32_t count = ((packet >> 16) & 0x3FFF) + 1;
|
||||
uint32_t base_index = (packet & 0x7FFF);
|
||||
uint32_t write_one_reg = (packet >> 15) & 0x1;
|
||||
for (uint32_t m = 0; m < count; m++) {
|
||||
uint32_t reg_data = PEEK_PTR();
|
||||
uint32_t target_index = write_one_reg ? base_index : base_index + m;
|
||||
const char* reg_name = regs->GetRegisterName(target_index);
|
||||
XETRACECP("[%.8X] %.8X -> %.4X %s",
|
||||
args.ptr,
|
||||
reg_data, target_index, reg_name ? reg_name : "");
|
||||
ADVANCE_PTR(1);
|
||||
WriteRegister(packet_ptr, target_index, reg_data);
|
||||
}
|
||||
return 1 + count;
|
||||
}
|
||||
break;
|
||||
case 0x01:
|
||||
{
|
||||
// Type-1 packet.
|
||||
// Contains two registers of data. Type-0 should be more common.
|
||||
XETRACECP("[%.8X] Packet(%.8X): set registers:",
|
||||
packet_ptr, packet);
|
||||
uint32_t reg_index_1 = packet & 0x7FF;
|
||||
uint32_t reg_index_2 = (packet >> 11) & 0x7FF;
|
||||
uint32_t reg_ptr_1 = args.ptr;
|
||||
uint32_t reg_data_1 = READ_PTR();
|
||||
uint32_t reg_ptr_2 = args.ptr;
|
||||
uint32_t reg_data_2 = READ_PTR();
|
||||
const char* reg_name_1 = regs->GetRegisterName(reg_index_1);
|
||||
const char* reg_name_2 = regs->GetRegisterName(reg_index_2);
|
||||
XETRACECP("[%.8X] %.8X -> %.4X %s",
|
||||
reg_ptr_1,
|
||||
reg_data_1, reg_index_1, reg_name_1 ? reg_name_1 : "");
|
||||
XETRACECP("[%.8X] %.8X -> %.4X %s",
|
||||
reg_ptr_2,
|
||||
reg_data_2, reg_index_2, reg_name_2 ? reg_name_2 : "");
|
||||
WriteRegister(packet_ptr, reg_index_1, reg_data_1);
|
||||
WriteRegister(packet_ptr, reg_index_2, reg_data_2);
|
||||
return 1 + 2;
|
||||
}
|
||||
break;
|
||||
case 0x02:
|
||||
// Type-2 packet.
|
||||
// No-op. Do nothing.
|
||||
XETRACECP("[%.8X] Packet(%.8X): padding",
|
||||
packet_ptr, packet);
|
||||
return 1;
|
||||
case 0x03:
|
||||
{
|
||||
// Type-3 packet.
|
||||
uint32_t count = ((packet >> 16) & 0x3FFF) + 1;
|
||||
uint32_t opcode = (packet >> 8) & 0x7F;
|
||||
// & 1 == predicate, maybe?
|
||||
|
||||
switch (opcode) {
|
||||
case PM4_ME_INIT:
|
||||
// initialize CP's micro-engine
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_ME_INIT",
|
||||
packet_ptr, packet);
|
||||
LOG_DATA(count);
|
||||
ADVANCE_PTR(count);
|
||||
break;
|
||||
|
||||
case PM4_NOP:
|
||||
// skip N 32-bit words to get to the next packet
|
||||
// No-op, ignore some data.
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_NOP",
|
||||
packet_ptr, packet);
|
||||
LOG_DATA(count);
|
||||
ADVANCE_PTR(count);
|
||||
break;
|
||||
|
||||
case PM4_INTERRUPT:
|
||||
// generate interrupt from the command stream
|
||||
{
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_INTERRUPT",
|
||||
packet_ptr, packet);
|
||||
LOG_DATA(count);
|
||||
uint32_t cpu_mask = READ_PTR();
|
||||
for (int n = 0; n < 6; n++) {
|
||||
if (cpu_mask & (1 << n)) {
|
||||
graphics_system_->DispatchInterruptCallback(1, n);
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case PM4_INDIRECT_BUFFER:
|
||||
// indirect buffer dispatch
|
||||
{
|
||||
uint32_t list_ptr = READ_PTR();
|
||||
uint32_t list_length = READ_PTR();
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_INDIRECT_BUFFER %.8X (%dw)",
|
||||
packet_ptr, packet, list_ptr, list_length);
|
||||
ExecuteIndirectBuffer(GpuToCpu(list_ptr), list_length);
|
||||
}
|
||||
break;
|
||||
|
||||
case PM4_WAIT_REG_MEM:
|
||||
// wait until a register or memory location is a specific value
|
||||
{
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_WAIT_REG_MEM",
|
||||
packet_ptr, packet);
|
||||
LOG_DATA(count);
|
||||
uint32_t wait_info = READ_PTR();
|
||||
uint32_t poll_reg_addr = READ_PTR();
|
||||
uint32_t ref = READ_PTR();
|
||||
uint32_t mask = READ_PTR();
|
||||
uint32_t wait = READ_PTR();
|
||||
bool matched = false;
|
||||
do {
|
||||
uint32_t value;
|
||||
if (wait_info & 0x10) {
|
||||
// Memory.
|
||||
XE_GPU_ENDIAN endianness = (XE_GPU_ENDIAN)(poll_reg_addr & 0x3);
|
||||
poll_reg_addr &= ~0x3;
|
||||
value = XEGETUINT32LE(p + GpuToCpu(packet_ptr, poll_reg_addr));
|
||||
value = GpuSwap(value, endianness);
|
||||
} else {
|
||||
// Register.
|
||||
XEASSERT(poll_reg_addr < RegisterFile::kRegisterCount);
|
||||
|
||||
if (poll_reg_addr == XE_GPU_REG_COHER_STATUS_HOST) {
|
||||
// Waiting for coherency. We should have all the info we need
|
||||
// now (base+size+mode), so kick it off.
|
||||
MakeCoherent();
|
||||
}
|
||||
|
||||
value = regs->values[poll_reg_addr].u32;
|
||||
}
|
||||
switch (wait_info & 0x7) {
|
||||
case 0x0: // Never.
|
||||
matched = false;
|
||||
break;
|
||||
case 0x1: // Less than reference.
|
||||
matched = (value & mask) < ref;
|
||||
break;
|
||||
case 0x2: // Less than or equal to reference.
|
||||
matched = (value & mask) <= ref;
|
||||
break;
|
||||
case 0x3: // Equal to reference.
|
||||
matched = (value & mask) == ref;
|
||||
break;
|
||||
case 0x4: // Not equal to reference.
|
||||
matched = (value & mask) != ref;
|
||||
break;
|
||||
case 0x5: // Greater than or equal to reference.
|
||||
matched = (value & mask) >= ref;
|
||||
break;
|
||||
case 0x6: // Greater than reference.
|
||||
matched = (value & mask) > ref;
|
||||
break;
|
||||
case 0x7: // Always
|
||||
matched = true;
|
||||
break;
|
||||
}
|
||||
if (!matched) {
|
||||
// Wait.
|
||||
if (wait >= 0x100) {
|
||||
Sleep(wait / 0x100);
|
||||
} else {
|
||||
SwitchToThread();
|
||||
}
|
||||
}
|
||||
} while (!matched);
|
||||
}
|
||||
break;
|
||||
|
||||
case PM4_REG_RMW:
|
||||
// register read/modify/write
|
||||
// ? (used during shader upload and edram setup)
|
||||
{
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_REG_RMW",
|
||||
packet_ptr, packet);
|
||||
LOG_DATA(count);
|
||||
uint32_t rmw_info = READ_PTR();
|
||||
uint32_t and_mask = READ_PTR();
|
||||
uint32_t or_mask = READ_PTR();
|
||||
uint32_t value = regs->values[rmw_info & 0x1FFF].u32;
|
||||
if ((rmw_info >> 30) & 0x1) {
|
||||
// | reg
|
||||
value |= regs->values[or_mask & 0x1FFF].u32;
|
||||
} else {
|
||||
// | imm
|
||||
value |= or_mask;
|
||||
}
|
||||
if ((rmw_info >> 31) & 0x1) {
|
||||
// & reg
|
||||
value &= regs->values[and_mask & 0x1FFF].u32;
|
||||
} else {
|
||||
// & imm
|
||||
value &= and_mask;
|
||||
}
|
||||
WriteRegister(packet_ptr, rmw_info & 0x1FFF, value);
|
||||
}
|
||||
break;
|
||||
|
||||
case PM4_COND_WRITE:
|
||||
// conditional write to memory or register
|
||||
{
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_COND_WRITE",
|
||||
packet_ptr, packet);
|
||||
LOG_DATA(count);
|
||||
uint32_t wait_info = READ_PTR();
|
||||
uint32_t poll_reg_addr = READ_PTR();
|
||||
uint32_t ref = READ_PTR();
|
||||
uint32_t mask = READ_PTR();
|
||||
uint32_t write_reg_addr = READ_PTR();
|
||||
uint32_t write_data = READ_PTR();
|
||||
uint32_t value;
|
||||
if (wait_info & 0x10) {
|
||||
// Memory.
|
||||
XE_GPU_ENDIAN endianness = (XE_GPU_ENDIAN)(poll_reg_addr & 0x3);
|
||||
poll_reg_addr &= ~0x3;
|
||||
value = XEGETUINT32LE(p + GpuToCpu(packet_ptr, poll_reg_addr));
|
||||
value = GpuSwap(value, endianness);
|
||||
} else {
|
||||
// Register.
|
||||
XEASSERT(poll_reg_addr < RegisterFile::kRegisterCount);
|
||||
value = regs->values[poll_reg_addr].u32;
|
||||
}
|
||||
bool matched = false;
|
||||
switch (wait_info & 0x7) {
|
||||
case 0x0: // Never.
|
||||
matched = false;
|
||||
break;
|
||||
case 0x1: // Less than reference.
|
||||
matched = (value & mask) < ref;
|
||||
break;
|
||||
case 0x2: // Less than or equal to reference.
|
||||
matched = (value & mask) <= ref;
|
||||
break;
|
||||
case 0x3: // Equal to reference.
|
||||
matched = (value & mask) == ref;
|
||||
break;
|
||||
case 0x4: // Not equal to reference.
|
||||
matched = (value & mask) != ref;
|
||||
break;
|
||||
case 0x5: // Greater than or equal to reference.
|
||||
matched = (value & mask) >= ref;
|
||||
break;
|
||||
case 0x6: // Greater than reference.
|
||||
matched = (value & mask) > ref;
|
||||
break;
|
||||
case 0x7: // Always
|
||||
matched = true;
|
||||
break;
|
||||
}
|
||||
if (matched) {
|
||||
// Write.
|
||||
if (wait_info & 0x100) {
|
||||
// Memory.
|
||||
XE_GPU_ENDIAN endianness = (XE_GPU_ENDIAN)(write_reg_addr & 0x3);
|
||||
write_reg_addr &= ~0x3;
|
||||
write_data = GpuSwap(write_data, endianness);
|
||||
XESETUINT32LE(p + GpuToCpu(packet_ptr, write_reg_addr),
|
||||
write_data);
|
||||
} else {
|
||||
// Register.
|
||||
WriteRegister(packet_ptr, write_reg_addr, write_data);
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case PM4_EVENT_WRITE:
|
||||
// generate an event that creates a write to memory when completed
|
||||
{
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_EVENT_WRITE (unimplemented!)",
|
||||
packet_ptr, packet);
|
||||
LOG_DATA(count);
|
||||
uint32_t initiator = READ_PTR();
|
||||
if (count == 1) {
|
||||
// Just an event flag? Where does this write?
|
||||
} else {
|
||||
// Write to an address.
|
||||
XEASSERTALWAYS();
|
||||
ADVANCE_PTR(count - 1);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case PM4_EVENT_WRITE_SHD:
|
||||
// generate a VS|PS_done event
|
||||
{
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_EVENT_WRITE_SHD",
|
||||
packet_ptr, packet);
|
||||
LOG_DATA(count);
|
||||
uint32_t initiator = READ_PTR();
|
||||
uint32_t address = READ_PTR();
|
||||
uint32_t value = READ_PTR();
|
||||
// Writeback initiator.
|
||||
WriteRegister(packet_ptr, XE_GPU_REG_VGT_EVENT_INITIATOR,
|
||||
initiator & 0x1F);
|
||||
uint32_t data_value;
|
||||
if ((initiator >> 31) & 0x1) {
|
||||
// Write counter (GPU vblank counter?).
|
||||
data_value = counter_;
|
||||
} else {
|
||||
// Write value.
|
||||
data_value = value;
|
||||
}
|
||||
XE_GPU_ENDIAN endianness = (XE_GPU_ENDIAN)(address & 0x3);
|
||||
address &= ~0x3;
|
||||
data_value = GpuSwap(data_value, endianness);
|
||||
XESETUINT32LE(p + GpuToCpu(address), data_value);
|
||||
}
|
||||
break;
|
||||
|
||||
case PM4_DRAW_INDX:
|
||||
// initiate fetch of index buffer and draw
|
||||
{
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_DRAW_INDX",
|
||||
packet_ptr, packet);
|
||||
LOG_DATA(count);
|
||||
// d0 = viz query info
|
||||
uint32_t d0 = READ_PTR();
|
||||
uint32_t d1 = READ_PTR();
|
||||
uint32_t index_count = d1 >> 16;
|
||||
uint32_t prim_type = d1 & 0x3F;
|
||||
uint32_t src_sel = (d1 >> 6) & 0x3;
|
||||
if (!driver_->PrepareDraw(draw_command_)) {
|
||||
draw_command_.prim_type = (XE_GPU_PRIMITIVE_TYPE)prim_type;
|
||||
draw_command_.start_index = 0;
|
||||
draw_command_.index_count = index_count;
|
||||
draw_command_.base_vertex = 0;
|
||||
if (src_sel == 0x0) {
|
||||
// Indexed draw.
|
||||
// TODO(benvanik): detect subregions of larger index buffers!
|
||||
uint32_t index_base = READ_PTR();
|
||||
uint32_t index_size = READ_PTR();
|
||||
uint32_t endianness = index_size >> 29;
|
||||
index_size &= 0x00FFFFFF;
|
||||
bool index_32bit = (d1 >> 11) & 0x1;
|
||||
index_size *= index_32bit ? 4 : 2;
|
||||
driver_->PrepareDrawIndexBuffer(
|
||||
draw_command_,
|
||||
index_base, index_size,
|
||||
(XE_GPU_ENDIAN)endianness,
|
||||
index_32bit ? INDEX_FORMAT_32BIT : INDEX_FORMAT_16BIT);
|
||||
} else if (src_sel == 0x2) {
|
||||
// Auto draw.
|
||||
draw_command_.index_buffer = nullptr;
|
||||
} else {
|
||||
// Unknown source select.
|
||||
XEASSERTALWAYS();
|
||||
}
|
||||
driver_->Draw(draw_command_);
|
||||
} else {
|
||||
if (src_sel == 0x0) {
|
||||
ADVANCE_PTR(2); // skip
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
case PM4_DRAW_INDX_2:
|
||||
// draw using supplied indices in packet
|
||||
{
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_DRAW_INDX_2",
|
||||
packet_ptr, packet);
|
||||
LOG_DATA(count);
|
||||
uint32_t d0 = READ_PTR();
|
||||
uint32_t index_count = d0 >> 16;
|
||||
uint32_t prim_type = d0 & 0x3F;
|
||||
uint32_t src_sel = (d0 >> 6) & 0x3;
|
||||
XEASSERT(src_sel == 0x2); // 'SrcSel=AutoIndex'
|
||||
if (!driver_->PrepareDraw(draw_command_)) {
|
||||
draw_command_.prim_type = (XE_GPU_PRIMITIVE_TYPE)prim_type;
|
||||
draw_command_.start_index = 0;
|
||||
draw_command_.index_count = index_count;
|
||||
draw_command_.base_vertex = 0;
|
||||
draw_command_.index_buffer = nullptr;
|
||||
driver_->Draw(draw_command_);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case PM4_SET_CONSTANT:
|
||||
// load constant into chip and to memory
|
||||
{
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_SET_CONSTANT",
|
||||
packet_ptr, packet);
|
||||
// PM4_REG(reg) ((0x4 << 16) | (GSL_HAL_SUBBLOCK_OFFSET(reg)))
|
||||
// reg - 0x2000
|
||||
uint32_t offset_type = READ_PTR();
|
||||
uint32_t index = offset_type & 0x7FF;
|
||||
uint32_t type = (offset_type >> 16) & 0xFF;
|
||||
switch (type) {
|
||||
case 0x4: // REGISTER
|
||||
index += 0x2000; // registers
|
||||
for (uint32_t n = 0; n < count - 1; n++, index++) {
|
||||
uint32_t data = READ_PTR();
|
||||
const char* reg_name = regs->GetRegisterName(index);
|
||||
XETRACECP("[%.8X] %.8X -> %.4X %s",
|
||||
packet_ptr + (1 + n) * 4,
|
||||
data, index, reg_name ? reg_name : "");
|
||||
WriteRegister(packet_ptr, index, data);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
XEASSERTALWAYS();
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case PM4_LOAD_ALU_CONSTANT:
|
||||
// load constants from memory
|
||||
{
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_LOAD_ALU_CONSTANT",
|
||||
packet_ptr, packet);
|
||||
uint32_t address = READ_PTR();
|
||||
address &= 0x3FFFFFFF;
|
||||
uint32_t offset_type = READ_PTR();
|
||||
uint32_t index = offset_type & 0x7FF;
|
||||
uint32_t size = READ_PTR();
|
||||
size &= 0xFFF;
|
||||
index += 0x4000; // alu constants
|
||||
for (uint32_t n = 0; n < size; n++, index++) {
|
||||
uint32_t data = XEGETUINT32BE(
|
||||
p + GpuToCpu(packet_ptr, address + n * 4));
|
||||
const char* reg_name = regs->GetRegisterName(index);
|
||||
XETRACECP("[%.8X] %.8X -> %.4X %s",
|
||||
packet_ptr,
|
||||
data, index, reg_name ? reg_name : "");
|
||||
WriteRegister(packet_ptr, index, data);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case PM4_IM_LOAD:
|
||||
// load sequencer instruction memory (pointer-based)
|
||||
{
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_IM_LOAD",
|
||||
packet_ptr, packet);
|
||||
LOG_DATA(count);
|
||||
uint32_t addr_type = READ_PTR();
|
||||
uint32_t type = addr_type & 0x3;
|
||||
uint32_t addr = addr_type & ~0x3;
|
||||
uint32_t start_size = READ_PTR();
|
||||
uint32_t start = start_size >> 16;
|
||||
uint32_t size = start_size & 0xFFFF; // dwords
|
||||
XEASSERT(start == 0);
|
||||
driver_->LoadShader((XE_GPU_SHADER_TYPE)type,
|
||||
GpuToCpu(packet_ptr, addr), size * 4, start);
|
||||
}
|
||||
break;
|
||||
case PM4_IM_LOAD_IMMEDIATE:
|
||||
// load sequencer instruction memory (code embedded in packet)
|
||||
{
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_IM_LOAD_IMMEDIATE",
|
||||
packet_ptr, packet);
|
||||
LOG_DATA(count);
|
||||
uint32_t type = READ_PTR();
|
||||
uint32_t start_size = READ_PTR();
|
||||
uint32_t start = start_size >> 16;
|
||||
uint32_t size = start_size & 0xFFFF; // dwords
|
||||
XEASSERT(start == 0);
|
||||
// TODO(benvanik): figure out if this could wrap.
|
||||
XEASSERT(args.ptr + size * 4 < args.max_address);
|
||||
driver_->LoadShader((XE_GPU_SHADER_TYPE)type,
|
||||
args.ptr, size * 4, start);
|
||||
ADVANCE_PTR(size);
|
||||
}
|
||||
break;
|
||||
|
||||
case PM4_INVALIDATE_STATE:
|
||||
// selective invalidation of state pointers
|
||||
{
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_INVALIDATE_STATE",
|
||||
packet_ptr, packet);
|
||||
LOG_DATA(count);
|
||||
uint32_t mask = READ_PTR();
|
||||
//driver_->InvalidateState(mask);
|
||||
}
|
||||
break;
|
||||
|
||||
case PM4_SET_BIN_MASK_LO:
|
||||
{
|
||||
uint32_t value = READ_PTR();
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_SET_BIN_MASK_LO = %.8X",
|
||||
packet_ptr, packet, value);
|
||||
}
|
||||
break;
|
||||
case PM4_SET_BIN_MASK_HI:
|
||||
{
|
||||
uint32_t value = READ_PTR();
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_SET_BIN_MASK_HI = %.8X",
|
||||
packet_ptr, packet, value);
|
||||
}
|
||||
break;
|
||||
case PM4_SET_BIN_SELECT_LO:
|
||||
{
|
||||
uint32_t value = READ_PTR();
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_SET_BIN_SELECT_LO = %.8X",
|
||||
packet_ptr, packet, value);
|
||||
}
|
||||
break;
|
||||
case PM4_SET_BIN_SELECT_HI:
|
||||
{
|
||||
uint32_t value = READ_PTR();
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_SET_BIN_SELECT_HI = %.8X",
|
||||
packet_ptr, packet, value);
|
||||
}
|
||||
break;
|
||||
|
||||
// Ignored packets - useful if breaking on the default handler below.
|
||||
case 0x50: // 0xC0015000 usually 2 words, 0xFFFFFFFF / 0x00000000
|
||||
XETRACECP("[%.8X] Packet(%.8X): unknown!",
|
||||
packet_ptr, packet);
|
||||
LOG_DATA(count);
|
||||
ADVANCE_PTR(count);
|
||||
break;
|
||||
|
||||
default:
|
||||
XETRACECP("[%.8X] Packet(%.8X): unknown!",
|
||||
packet_ptr, packet);
|
||||
LOG_DATA(count);
|
||||
ADVANCE_PTR(count);
|
||||
break;
|
||||
}
|
||||
|
||||
return 1 + count;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void CommandProcessor::WriteRegister(
|
||||
uint32_t packet_ptr, uint32_t index, uint32_t value) {
|
||||
RegisterFile* regs = driver_->register_file();
|
||||
XEASSERT(index < RegisterFile::kRegisterCount);
|
||||
regs->values[index].u32 = value;
|
||||
|
||||
// If this is a COHER register, set the dirty flag.
|
||||
// This will block the command processor the next time it WAIT_MEM_REGs and
|
||||
// allow us to synchronize the memory.
|
||||
if (index == XE_GPU_REG_COHER_STATUS_HOST) {
|
||||
regs->values[index].u32 |= 0x80000000ul;
|
||||
}
|
||||
|
||||
// Scratch register writeback.
|
||||
if (index >= XE_GPU_REG_SCRATCH_REG0 && index <= XE_GPU_REG_SCRATCH_REG7) {
|
||||
uint32_t scratch_reg = index - XE_GPU_REG_SCRATCH_REG0;
|
||||
if ((1 << scratch_reg) & regs->values[XE_GPU_REG_SCRATCH_UMSK].u32) {
|
||||
// Enabled - write to address.
|
||||
uint8_t* p = memory_->membase();
|
||||
uint32_t scratch_addr = regs->values[XE_GPU_REG_SCRATCH_ADDR].u32;
|
||||
uint32_t mem_addr = scratch_addr + (scratch_reg * 4);
|
||||
XESETUINT32BE(p + GpuToCpu(primary_buffer_ptr_, mem_addr), value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void CommandProcessor::MakeCoherent() {
|
||||
RegisterFile* regs = driver_->register_file();
|
||||
auto status_host = regs->values[XE_GPU_REG_COHER_STATUS_HOST].u32;
|
||||
auto base_host = regs->values[XE_GPU_REG_COHER_BASE_HOST].u32;
|
||||
auto size_host = regs->values[XE_GPU_REG_COHER_SIZE_HOST].u32;
|
||||
|
||||
// Status host often has 0x01000000 or 0x03000000.
|
||||
// This is likely toggling VC (vertex cache) or TC (texture cache).
|
||||
// Or, it also has a direction in here maybe - there is probably
|
||||
// some way to check for dest coherency (what all the COHER_DEST_BASE_*
|
||||
// registers are for).
|
||||
|
||||
// TODO(benvanik): notify resource cache of base->size and type.
|
||||
XETRACECP("Make %.8X -> %.8X (%db) coherent",
|
||||
base_host, base_host + size_host, size_host);
|
||||
driver_->resource_cache()->SyncRange(base_host, size_host);
|
||||
|
||||
// Mark coherent.
|
||||
status_host &= ~0x80000000ul;
|
||||
regs->values[XE_GPU_REG_COHER_STATUS_HOST].u32 = status_host;
|
||||
}
|
||||
|
|
|
@ -11,15 +11,70 @@
|
|||
#define XENIA_GPU_COMMAND_PROCESSOR_H_
|
||||
|
||||
#include <xenia/core.h>
|
||||
#include <xenia/gpu/draw_command.h>
|
||||
#include <xenia/gpu/register_file.h>
|
||||
#include <xenia/gpu/xenos/xenos.h>
|
||||
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
|
||||
class GraphicsDriver;
|
||||
class GraphicsSystem;
|
||||
|
||||
|
||||
class CommandProcessor {
|
||||
public:
|
||||
CommandProcessor(GraphicsSystem* graphics_system, Memory* memory);
|
||||
virtual ~CommandProcessor();
|
||||
|
||||
Memory* memory() const { return memory_; }
|
||||
|
||||
uint64_t QueryTime();
|
||||
uint32_t counter() const { return counter_; }
|
||||
void increment_counter() { counter_++; }
|
||||
|
||||
void Initialize(GraphicsDriver* driver, uint32_t ptr, uint32_t page_count);
|
||||
void EnableReadPointerWriteBack(uint32_t ptr, uint32_t block_size);
|
||||
|
||||
void UpdateWritePointer(uint32_t value);
|
||||
|
||||
void Pump();
|
||||
|
||||
private:
|
||||
typedef struct {
|
||||
uint32_t ptr;
|
||||
uint32_t base_ptr;
|
||||
uint32_t max_address;
|
||||
uint32_t ptr_mask;
|
||||
} PacketArgs;
|
||||
|
||||
void AdvancePtr(PacketArgs& args, uint32_t n);
|
||||
void ExecutePrimaryBuffer(uint32_t start_index, uint32_t end_index);
|
||||
void ExecuteIndirectBuffer(uint32_t ptr, uint32_t length);
|
||||
uint32_t ExecutePacket(PacketArgs& args);
|
||||
void WriteRegister(uint32_t packet_ptr, uint32_t index, uint32_t value);
|
||||
void MakeCoherent();
|
||||
|
||||
Memory* memory_;
|
||||
GraphicsSystem* graphics_system_;
|
||||
GraphicsDriver* driver_;
|
||||
|
||||
uint64_t time_base_;
|
||||
uint32_t counter_;
|
||||
|
||||
uint32_t primary_buffer_ptr_;
|
||||
uint32_t primary_buffer_size_;
|
||||
|
||||
uint32_t read_ptr_index_;
|
||||
uint32_t read_ptr_update_freq_;
|
||||
uint32_t read_ptr_writeback_ptr_;
|
||||
|
||||
HANDLE write_ptr_index_event_;
|
||||
volatile uint32_t write_ptr_index_;
|
||||
volatile uint32_t write_ptr_max_index_;
|
||||
|
||||
DrawCommand draw_command_;
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -1,150 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include <xenia/gpu/d3d11/d3d11_buffer.h>
|
||||
|
||||
#include <xenia/gpu/gpu-private.h>
|
||||
#include <xenia/gpu/d3d11/d3d11_buffer_cache.h>
|
||||
|
||||
|
||||
using namespace xe;
|
||||
using namespace xe::gpu;
|
||||
using namespace xe::gpu::d3d11;
|
||||
using namespace xe::gpu::xenos;
|
||||
|
||||
|
||||
D3D11IndexBuffer::D3D11IndexBuffer(
|
||||
D3D11BufferCache* buffer_cache,
|
||||
const IndexBufferInfo& info,
|
||||
const uint8_t* src_ptr, size_t length)
|
||||
: IndexBuffer(info, src_ptr, length),
|
||||
buffer_cache_(buffer_cache),
|
||||
handle_(nullptr) {
|
||||
}
|
||||
|
||||
D3D11IndexBuffer::~D3D11IndexBuffer() {
|
||||
XESAFERELEASE(handle_);
|
||||
}
|
||||
|
||||
bool D3D11IndexBuffer::FetchNew(uint64_t hash) {
|
||||
hash_ = hash;
|
||||
|
||||
D3D11_BUFFER_DESC buffer_desc;
|
||||
xe_zero_struct(&buffer_desc, sizeof(buffer_desc));
|
||||
buffer_desc.ByteWidth = info_.index_size;
|
||||
buffer_desc.Usage = D3D11_USAGE_DYNAMIC;
|
||||
buffer_desc.BindFlags = D3D11_BIND_INDEX_BUFFER;
|
||||
buffer_desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
|
||||
HRESULT hr = buffer_cache_->device()->CreateBuffer(&buffer_desc, NULL, &handle_);
|
||||
if (FAILED(hr)) {
|
||||
XELOGW("D3D11: failed to create index buffer");
|
||||
return false;
|
||||
}
|
||||
|
||||
return FetchDirty(hash);
|
||||
}
|
||||
|
||||
bool D3D11IndexBuffer::FetchDirty(uint64_t hash) {
|
||||
hash_ = hash;
|
||||
|
||||
// All that's done so far:
|
||||
XEASSERT(info_.endianness == 0x2);
|
||||
|
||||
D3D11_MAPPED_SUBRESOURCE res;
|
||||
HRESULT hr = buffer_cache_->context()->Map(
|
||||
handle_, 0, D3D11_MAP_WRITE_DISCARD, 0, &res);
|
||||
if (FAILED(hr)) {
|
||||
XELOGE("D3D11: unable to map index buffer");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (info_.index_32bit) {
|
||||
const uint32_t* src = reinterpret_cast<const uint32_t*>(src_);
|
||||
uint32_t* dest = reinterpret_cast<uint32_t*>(res.pData);
|
||||
for (uint32_t n = 0; n < info_.index_count; n++) {
|
||||
uint32_t d = { XESWAP32(src[n]) };
|
||||
dest[n] = d;
|
||||
}
|
||||
} else {
|
||||
const uint16_t* src = reinterpret_cast<const uint16_t*>(src_);
|
||||
uint16_t* dest = reinterpret_cast<uint16_t*>(res.pData);
|
||||
for (uint32_t n = 0; n < info_.index_count; n++) {
|
||||
uint16_t d = XESWAP16(src[n]);
|
||||
dest[n] = d;
|
||||
}
|
||||
}
|
||||
buffer_cache_->context()->Unmap(handle_, 0);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
D3D11VertexBuffer::D3D11VertexBuffer(
|
||||
D3D11BufferCache* buffer_cache,
|
||||
const VertexBufferInfo& info,
|
||||
const uint8_t* src_ptr, size_t length)
|
||||
: VertexBuffer(info, src_ptr, length),
|
||||
buffer_cache_(buffer_cache),
|
||||
handle_(nullptr) {
|
||||
}
|
||||
|
||||
D3D11VertexBuffer::~D3D11VertexBuffer() {
|
||||
XESAFERELEASE(handle_);
|
||||
}
|
||||
|
||||
bool D3D11VertexBuffer::FetchNew(uint64_t hash) {
|
||||
hash_ = hash;
|
||||
|
||||
D3D11_BUFFER_DESC buffer_desc;
|
||||
xe_zero_struct(&buffer_desc, sizeof(buffer_desc));
|
||||
buffer_desc.ByteWidth = static_cast<UINT>(length_);
|
||||
buffer_desc.Usage = D3D11_USAGE_DYNAMIC;
|
||||
buffer_desc.BindFlags = D3D11_BIND_VERTEX_BUFFER;
|
||||
buffer_desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
|
||||
HRESULT hr = buffer_cache_->device()->CreateBuffer(&buffer_desc, NULL, &handle_);
|
||||
if (FAILED(hr)) {
|
||||
XELOGW("D3D11: failed to create index buffer");
|
||||
return false;
|
||||
}
|
||||
|
||||
return FetchDirty(hash);
|
||||
}
|
||||
|
||||
bool D3D11VertexBuffer::FetchDirty(uint64_t hash) {
|
||||
hash_ = hash;
|
||||
|
||||
D3D11_MAPPED_SUBRESOURCE res;
|
||||
HRESULT hr = buffer_cache_->context()->Map(
|
||||
handle_, 0, D3D11_MAP_WRITE_DISCARD, 0, &res);
|
||||
if (FAILED(hr)) {
|
||||
XELOGE("D3D11: unable to map vertex buffer");
|
||||
return false;
|
||||
}
|
||||
uint8_t* dest = reinterpret_cast<uint8_t*>(res.pData);
|
||||
|
||||
// TODO(benvanik): rewrite to be faster/special case common/etc
|
||||
uint32_t stride = info_.layout.stride_words;
|
||||
size_t count = (length_ / 4) / stride;
|
||||
for (size_t n = 0; n < info_.layout.element_count; n++) {
|
||||
const auto& el = info_.layout.elements[n];
|
||||
const uint32_t* src_ptr = (const uint32_t*)(src_ + el.offset_words * 4);
|
||||
uint32_t* dest_ptr = (uint32_t*)(dest + el.offset_words * 4);
|
||||
uint32_t o = 0;
|
||||
for (uint32_t i = 0; i < count; i++) {
|
||||
for (uint32_t j = 0; j < el.size_words; j++) {
|
||||
dest_ptr[o + j] = XESWAP32(src_ptr[o + j]);
|
||||
}
|
||||
o += stride;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
buffer_cache_->context()->Unmap(handle_, 0);
|
||||
return true;
|
||||
}
|
|
@ -1,69 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_GPU_D3D11_D3D11_BUFFER_H_
|
||||
#define XENIA_GPU_D3D11_D3D11_BUFFER_H_
|
||||
|
||||
#include <xenia/core.h>
|
||||
|
||||
#include <xenia/gpu/buffer.h>
|
||||
#include <xenia/gpu/xenos/xenos.h>
|
||||
|
||||
#include <d3d11.h>
|
||||
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace d3d11 {
|
||||
|
||||
class D3D11BufferCache;
|
||||
|
||||
|
||||
class D3D11IndexBuffer : public IndexBuffer {
|
||||
public:
|
||||
D3D11IndexBuffer(D3D11BufferCache* buffer_cache,
|
||||
const IndexBufferInfo& info,
|
||||
const uint8_t* src_ptr, size_t length);
|
||||
virtual ~D3D11IndexBuffer();
|
||||
|
||||
ID3D11Buffer* handle() const { return handle_; }
|
||||
|
||||
bool FetchNew(uint64_t hash) override;
|
||||
bool FetchDirty(uint64_t hash) override;
|
||||
|
||||
private:
|
||||
D3D11BufferCache* buffer_cache_;
|
||||
ID3D11Buffer* handle_;
|
||||
};
|
||||
|
||||
|
||||
class D3D11VertexBuffer : public VertexBuffer {
|
||||
public:
|
||||
D3D11VertexBuffer(D3D11BufferCache* buffer_cache,
|
||||
const VertexBufferInfo& info,
|
||||
const uint8_t* src_ptr, size_t length);
|
||||
virtual ~D3D11VertexBuffer();
|
||||
|
||||
ID3D11Buffer* handle() const { return handle_; }
|
||||
|
||||
bool FetchNew(uint64_t hash) override;
|
||||
bool FetchDirty(uint64_t hash) override;
|
||||
|
||||
private:
|
||||
D3D11BufferCache* buffer_cache_;
|
||||
ID3D11Buffer* handle_;
|
||||
};
|
||||
|
||||
|
||||
} // namespace d3d11
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
||||
|
||||
#endif // XENIA_GPU_D3D11_D3D11_BUFFER_H_
|
|
@ -1,44 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include <xenia/gpu/d3d11/d3d11_buffer_cache.h>
|
||||
|
||||
#include <xenia/gpu/gpu-private.h>
|
||||
#include <xenia/gpu/d3d11/d3d11_buffer.h>
|
||||
|
||||
|
||||
using namespace xe;
|
||||
using namespace xe::gpu;
|
||||
using namespace xe::gpu::d3d11;
|
||||
using namespace xe::gpu::xenos;
|
||||
|
||||
|
||||
D3D11BufferCache::D3D11BufferCache(ID3D11DeviceContext* context,
|
||||
ID3D11Device* device)
|
||||
: context_(context), device_(device) {
|
||||
context->AddRef();
|
||||
device_->AddRef();
|
||||
}
|
||||
|
||||
D3D11BufferCache::~D3D11BufferCache() {
|
||||
XESAFERELEASE(device_);
|
||||
XESAFERELEASE(context_);
|
||||
}
|
||||
|
||||
IndexBuffer* D3D11BufferCache::CreateIndexBuffer(
|
||||
const IndexBufferInfo& info,
|
||||
const uint8_t* src_ptr, size_t length) {
|
||||
return new D3D11IndexBuffer(this, info, src_ptr, length);
|
||||
}
|
||||
|
||||
VertexBuffer* D3D11BufferCache::CreateVertexBuffer(
|
||||
const VertexBufferInfo& info,
|
||||
const uint8_t* src_ptr, size_t length) {
|
||||
return new D3D11VertexBuffer(this, info, src_ptr, length);
|
||||
}
|
|
@ -1,53 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_GPU_D3D11_D3D11_BUFFER_CACHE_H_
|
||||
#define XENIA_GPU_D3D11_D3D11_BUFFER_CACHE_H_
|
||||
|
||||
#include <xenia/core.h>
|
||||
|
||||
#include <xenia/gpu/buffer_cache.h>
|
||||
#include <xenia/gpu/xenos/xenos.h>
|
||||
|
||||
#include <d3d11.h>
|
||||
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace d3d11 {
|
||||
|
||||
|
||||
class D3D11BufferCache : public BufferCache {
|
||||
public:
|
||||
D3D11BufferCache(ID3D11DeviceContext* context, ID3D11Device* device);
|
||||
virtual ~D3D11BufferCache();
|
||||
|
||||
ID3D11DeviceContext* context() const { return context_; }
|
||||
ID3D11Device* device() const { return device_; }
|
||||
|
||||
protected:
|
||||
IndexBuffer* CreateIndexBuffer(
|
||||
const IndexBufferInfo& info,
|
||||
const uint8_t* src_ptr, size_t length) override;
|
||||
VertexBuffer* CreateVertexBuffer(
|
||||
const VertexBufferInfo& info,
|
||||
const uint8_t* src_ptr, size_t length) override;
|
||||
|
||||
protected:
|
||||
ID3D11DeviceContext* context_;
|
||||
ID3D11Device* device_;
|
||||
};
|
||||
|
||||
|
||||
} // namespace d3d11
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
||||
|
||||
#endif // XENIA_GPU_D3D11_D3D11_BUFFER_CACHE_H_
|
|
@ -0,0 +1,149 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include <xenia/gpu/d3d11/d3d11_buffer_resource.h>
|
||||
|
||||
#include <xenia/gpu/gpu-private.h>
|
||||
#include <xenia/gpu/d3d11/d3d11_resource_cache.h>
|
||||
|
||||
|
||||
using namespace xe;
|
||||
using namespace xe::gpu;
|
||||
using namespace xe::gpu::d3d11;
|
||||
using namespace xe::gpu::xenos;
|
||||
|
||||
|
||||
D3D11IndexBufferResource::D3D11IndexBufferResource(
|
||||
D3D11ResourceCache* resource_cache,
|
||||
const MemoryRange& memory_range,
|
||||
const Info& info)
|
||||
: IndexBufferResource(memory_range, info),
|
||||
resource_cache_(resource_cache),
|
||||
handle_(nullptr) {
|
||||
}
|
||||
|
||||
D3D11IndexBufferResource::~D3D11IndexBufferResource() {
|
||||
XESAFERELEASE(handle_);
|
||||
}
|
||||
|
||||
int D3D11IndexBufferResource::CreateHandle() {
|
||||
D3D11_BUFFER_DESC buffer_desc;
|
||||
xe_zero_struct(&buffer_desc, sizeof(buffer_desc));
|
||||
buffer_desc.ByteWidth = static_cast<UINT>(memory_range_.length);
|
||||
buffer_desc.Usage = D3D11_USAGE_DYNAMIC;
|
||||
buffer_desc.BindFlags = D3D11_BIND_INDEX_BUFFER;
|
||||
buffer_desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
|
||||
HRESULT hr = resource_cache_->device()->CreateBuffer(
|
||||
&buffer_desc, nullptr, &handle_);
|
||||
if (FAILED(hr)) {
|
||||
XELOGW("D3D11: failed to create index buffer");
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int D3D11IndexBufferResource::InvalidateRegion(
|
||||
const MemoryRange& memory_range) {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
// All that's done so far:
|
||||
XEASSERT(info_.endianness == 0x2);
|
||||
|
||||
D3D11_MAPPED_SUBRESOURCE res;
|
||||
HRESULT hr = resource_cache_->context()->Map(
|
||||
handle_, 0, D3D11_MAP_WRITE_DISCARD, 0, &res);
|
||||
if (FAILED(hr)) {
|
||||
XELOGE("D3D11: unable to map index buffer");
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (info_.format == INDEX_FORMAT_32BIT) {
|
||||
uint32_t index_count = memory_range_.length / 4;
|
||||
const uint32_t* src = reinterpret_cast<const uint32_t*>(
|
||||
memory_range_.host_base);
|
||||
uint32_t* dest = reinterpret_cast<uint32_t*>(res.pData);
|
||||
for (uint32_t n = 0; n < index_count; n++) {
|
||||
dest[n] = XESWAP32(src[n]);
|
||||
}
|
||||
} else {
|
||||
uint32_t index_count = memory_range_.length / 2;
|
||||
const uint16_t* src = reinterpret_cast<const uint16_t*>(
|
||||
memory_range_.host_base);
|
||||
uint16_t* dest = reinterpret_cast<uint16_t*>(res.pData);
|
||||
for (uint32_t n = 0; n < index_count; n++) {
|
||||
dest[n] = XESWAP16(src[n]);
|
||||
}
|
||||
}
|
||||
resource_cache_->context()->Unmap(handle_, 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
D3D11VertexBufferResource::D3D11VertexBufferResource(
|
||||
D3D11ResourceCache* resource_cache,
|
||||
const MemoryRange& memory_range,
|
||||
const Info& info)
|
||||
: VertexBufferResource(memory_range, info),
|
||||
resource_cache_(resource_cache),
|
||||
handle_(nullptr) {
|
||||
}
|
||||
|
||||
D3D11VertexBufferResource::~D3D11VertexBufferResource() {
|
||||
XESAFERELEASE(handle_);
|
||||
}
|
||||
|
||||
int D3D11VertexBufferResource::CreateHandle() {
|
||||
D3D11_BUFFER_DESC buffer_desc;
|
||||
xe_zero_struct(&buffer_desc, sizeof(buffer_desc));
|
||||
buffer_desc.ByteWidth = static_cast<UINT>(memory_range_.length);
|
||||
buffer_desc.Usage = D3D11_USAGE_DYNAMIC;
|
||||
buffer_desc.BindFlags = D3D11_BIND_VERTEX_BUFFER;
|
||||
buffer_desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
|
||||
HRESULT hr = resource_cache_->device()->CreateBuffer(
|
||||
&buffer_desc, nullptr, &handle_);
|
||||
if (FAILED(hr)) {
|
||||
XELOGW("D3D11: failed to create vertex buffer");
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int D3D11VertexBufferResource::InvalidateRegion(
|
||||
const MemoryRange& memory_range) {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
D3D11_MAPPED_SUBRESOURCE res;
|
||||
HRESULT hr = resource_cache_->context()->Map(
|
||||
handle_, 0, D3D11_MAP_WRITE_DISCARD, 0, &res);
|
||||
if (FAILED(hr)) {
|
||||
XELOGE("D3D11: unable to map vertex buffer");
|
||||
return 1;
|
||||
}
|
||||
uint8_t* dest = reinterpret_cast<uint8_t*>(res.pData);
|
||||
|
||||
// TODO(benvanik): rewrite to be faster/special case common/etc
|
||||
uint32_t stride = info_.stride_words;
|
||||
size_t count = (memory_range_.length / 4) / stride;
|
||||
for (size_t n = 0; n < info_.element_count; n++) {
|
||||
const auto& el = info_.elements[n];
|
||||
const uint32_t* src_ptr = (const uint32_t*)(
|
||||
memory_range_.host_base + el.offset_words * 4);
|
||||
uint32_t* dest_ptr = (uint32_t*)(dest + el.offset_words * 4);
|
||||
uint32_t o = 0;
|
||||
for (uint32_t i = 0; i < count; i++) {
|
||||
for (uint32_t j = 0; j < el.size_words; j++) {
|
||||
dest_ptr[o + j] = XESWAP32(src_ptr[o + j]);
|
||||
}
|
||||
o += stride;
|
||||
}
|
||||
}
|
||||
|
||||
resource_cache_->context()->Unmap(handle_, 0);
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,69 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_GPU_D3D11_D3D11_BUFFER_RESOURCE_H_
|
||||
#define XENIA_GPU_D3D11_D3D11_BUFFER_RESOURCE_H_
|
||||
|
||||
#include <xenia/gpu/buffer_resource.h>
|
||||
#include <xenia/gpu/xenos/xenos.h>
|
||||
|
||||
#include <d3d11.h>
|
||||
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace d3d11 {
|
||||
|
||||
class D3D11ResourceCache;
|
||||
|
||||
|
||||
class D3D11IndexBufferResource : public IndexBufferResource {
|
||||
public:
|
||||
D3D11IndexBufferResource(D3D11ResourceCache* resource_cache,
|
||||
const MemoryRange& memory_range,
|
||||
const Info& info);
|
||||
~D3D11IndexBufferResource() override;
|
||||
|
||||
void* handle() const override { return handle_; }
|
||||
|
||||
protected:
|
||||
int CreateHandle() override;
|
||||
int InvalidateRegion(const MemoryRange& memory_range) override;
|
||||
|
||||
private:
|
||||
D3D11ResourceCache* resource_cache_;
|
||||
ID3D11Buffer* handle_;
|
||||
};
|
||||
|
||||
|
||||
class D3D11VertexBufferResource : public VertexBufferResource {
|
||||
public:
|
||||
D3D11VertexBufferResource(D3D11ResourceCache* resource_cache,
|
||||
const MemoryRange& memory_range,
|
||||
const Info& info);
|
||||
~D3D11VertexBufferResource() override;
|
||||
|
||||
void* handle() const override { return handle_; }
|
||||
|
||||
protected:
|
||||
int CreateHandle() override;
|
||||
int InvalidateRegion(const MemoryRange& memory_range) override;
|
||||
|
||||
private:
|
||||
D3D11ResourceCache* resource_cache_;
|
||||
ID3D11Buffer* handle_;
|
||||
};
|
||||
|
||||
|
||||
} // namespace d3d11
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
||||
|
||||
#endif // XENIA_GPU_D3D11_D3D11_BUFFER_RESOURCE_H_
|
|
@ -10,7 +10,8 @@
|
|||
#include <xenia/gpu/d3d11/d3d11_geometry_shader.h>
|
||||
|
||||
#include <xenia/gpu/gpu-private.h>
|
||||
#include <xenia/gpu/d3d11/d3d11_shader.h>
|
||||
#include <xenia/gpu/d3d11/d3d11_shader_resource.h>
|
||||
#include <xenia/gpu/d3d11/d3d11_shader_translator.h>
|
||||
#include <xenia/gpu/xenos/ucode.h>
|
||||
|
||||
#include <d3dcompiler.h>
|
||||
|
@ -22,8 +23,8 @@ using namespace xe::gpu::d3d11;
|
|||
using namespace xe::gpu::xenos;
|
||||
|
||||
|
||||
D3D11GeometryShader::D3D11GeometryShader(ID3D11Device* device, uint64_t hash) :
|
||||
hash_(hash), handle_(NULL) {
|
||||
D3D11GeometryShader::D3D11GeometryShader(ID3D11Device* device)
|
||||
: handle_(nullptr) {
|
||||
device_ = device;
|
||||
device_->AddRef();
|
||||
}
|
||||
|
@ -33,7 +34,7 @@ D3D11GeometryShader::~D3D11GeometryShader() {
|
|||
XESAFERELEASE(device_);
|
||||
}
|
||||
|
||||
int D3D11GeometryShader::Prepare(D3D11VertexShader* vertex_shader) {
|
||||
int D3D11GeometryShader::Prepare(D3D11VertexShaderResource* vertex_shader) {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
if (handle_) {
|
||||
|
@ -94,11 +95,12 @@ ID3D10Blob* D3D11GeometryShader::Compile(const char* shader_source) {
|
|||
if (FLAGS_dump_shaders.size()) {
|
||||
base_path = FLAGS_dump_shaders.c_str();
|
||||
}
|
||||
uint64_t hash = xe_hash64(shader_source, xestrlena(shader_source)); // ?
|
||||
char file_name[XE_MAX_PATH];
|
||||
xesnprintfa(file_name, XECOUNT(file_name),
|
||||
"%s/gen_%.16llX.gs",
|
||||
base_path,
|
||||
hash_);
|
||||
hash);
|
||||
|
||||
if (FLAGS_dump_shaders.size()) {
|
||||
FILE* f = fopen(file_name, "w");
|
||||
|
@ -128,7 +130,7 @@ ID3D10Blob* D3D11GeometryShader::Compile(const char* shader_source) {
|
|||
return shader_blob;
|
||||
}
|
||||
|
||||
int D3D11GeometryShader::Generate(D3D11VertexShader* vertex_shader,
|
||||
int D3D11GeometryShader::Generate(D3D11VertexShaderResource* vertex_shader,
|
||||
alloy::StringBuffer* output) {
|
||||
output->Append(
|
||||
"struct VERTEX {\n"
|
||||
|
@ -138,7 +140,7 @@ int D3D11GeometryShader::Generate(D3D11VertexShader* vertex_shader,
|
|||
// TODO(benvanik): only add used ones?
|
||||
output->Append(
|
||||
" float4 o[%d] : XE_O;\n",
|
||||
D3D11Shader::MAX_INTERPOLATORS);
|
||||
D3D11ShaderTranslator::kMaxInterpolators);
|
||||
}
|
||||
if (alloc_counts.point_size) {
|
||||
output->Append(
|
||||
|
@ -156,15 +158,15 @@ int D3D11GeometryShader::Generate(D3D11VertexShader* vertex_shader,
|
|||
|
||||
|
||||
D3D11PointSpriteGeometryShader::D3D11PointSpriteGeometryShader(
|
||||
ID3D11Device* device, uint64_t hash) :
|
||||
D3D11GeometryShader(device, hash) {
|
||||
ID3D11Device* device) : D3D11GeometryShader(device) {
|
||||
}
|
||||
|
||||
D3D11PointSpriteGeometryShader::~D3D11PointSpriteGeometryShader() {
|
||||
}
|
||||
|
||||
int D3D11PointSpriteGeometryShader::Generate(D3D11VertexShader* vertex_shader,
|
||||
alloy::StringBuffer* output) {
|
||||
int D3D11PointSpriteGeometryShader::Generate(
|
||||
D3D11VertexShaderResource* vertex_shader,
|
||||
alloy::StringBuffer* output) {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
if (D3D11GeometryShader::Generate(vertex_shader, output)) {
|
||||
return 1;
|
||||
|
@ -211,15 +213,15 @@ int D3D11PointSpriteGeometryShader::Generate(D3D11VertexShader* vertex_shader,
|
|||
|
||||
|
||||
D3D11RectListGeometryShader::D3D11RectListGeometryShader(
|
||||
ID3D11Device* device, uint64_t hash) :
|
||||
D3D11GeometryShader(device, hash) {
|
||||
ID3D11Device* device) : D3D11GeometryShader(device) {
|
||||
}
|
||||
|
||||
D3D11RectListGeometryShader::~D3D11RectListGeometryShader() {
|
||||
}
|
||||
|
||||
int D3D11RectListGeometryShader::Generate(D3D11VertexShader* vertex_shader,
|
||||
alloy::StringBuffer* output) {
|
||||
int D3D11RectListGeometryShader::Generate(
|
||||
D3D11VertexShaderResource* vertex_shader,
|
||||
alloy::StringBuffer* output) {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
if (D3D11GeometryShader::Generate(vertex_shader, output)) {
|
||||
return 1;
|
||||
|
@ -256,15 +258,15 @@ int D3D11RectListGeometryShader::Generate(D3D11VertexShader* vertex_shader,
|
|||
|
||||
|
||||
D3D11QuadListGeometryShader::D3D11QuadListGeometryShader(
|
||||
ID3D11Device* device, uint64_t hash) :
|
||||
D3D11GeometryShader(device, hash) {
|
||||
ID3D11Device* device) : D3D11GeometryShader(device) {
|
||||
}
|
||||
|
||||
D3D11QuadListGeometryShader::~D3D11QuadListGeometryShader() {
|
||||
}
|
||||
|
||||
int D3D11QuadListGeometryShader::Generate(D3D11VertexShader* vertex_shader,
|
||||
alloy::StringBuffer* output) {
|
||||
int D3D11QuadListGeometryShader::Generate(
|
||||
D3D11VertexShaderResource* vertex_shader,
|
||||
alloy::StringBuffer* output) {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
if (D3D11GeometryShader::Generate(vertex_shader, output)) {
|
||||
return 1;
|
||||
|
|
|
@ -21,7 +21,7 @@ namespace xe {
|
|||
namespace gpu {
|
||||
namespace d3d11 {
|
||||
|
||||
class D3D11VertexShader;
|
||||
class D3D11VertexShaderResource;
|
||||
|
||||
|
||||
class D3D11GeometryShader {
|
||||
|
@ -30,53 +30,52 @@ public:
|
|||
|
||||
ID3D11GeometryShader* handle() const { return handle_; }
|
||||
|
||||
int Prepare(D3D11VertexShader* vertex_shader);
|
||||
int Prepare(D3D11VertexShaderResource* vertex_shader);
|
||||
|
||||
protected:
|
||||
D3D11GeometryShader(ID3D11Device* device, uint64_t hash);
|
||||
D3D11GeometryShader(ID3D11Device* device);
|
||||
|
||||
ID3D10Blob* Compile(const char* shader_source);
|
||||
|
||||
virtual int Generate(D3D11VertexShader* vertex_shader,
|
||||
virtual int Generate(D3D11VertexShaderResource* vertex_shader,
|
||||
alloy::StringBuffer* output);
|
||||
|
||||
protected:
|
||||
ID3D11Device* device_;
|
||||
uint64_t hash_;
|
||||
ID3D11GeometryShader* handle_;
|
||||
};
|
||||
|
||||
|
||||
class D3D11PointSpriteGeometryShader : public D3D11GeometryShader {
|
||||
public:
|
||||
D3D11PointSpriteGeometryShader(ID3D11Device* device, uint64_t hash);
|
||||
virtual ~D3D11PointSpriteGeometryShader();
|
||||
D3D11PointSpriteGeometryShader(ID3D11Device* device);
|
||||
~D3D11PointSpriteGeometryShader() override;
|
||||
|
||||
protected:
|
||||
virtual int Generate(D3D11VertexShader* vertex_shader,
|
||||
alloy::StringBuffer* output);
|
||||
int Generate(D3D11VertexShaderResource* vertex_shader,
|
||||
alloy::StringBuffer* output) override;
|
||||
};
|
||||
|
||||
|
||||
class D3D11RectListGeometryShader : public D3D11GeometryShader {
|
||||
public:
|
||||
D3D11RectListGeometryShader(ID3D11Device* device, uint64_t hash);
|
||||
virtual ~D3D11RectListGeometryShader();
|
||||
D3D11RectListGeometryShader(ID3D11Device* device);
|
||||
~D3D11RectListGeometryShader() override;
|
||||
|
||||
protected:
|
||||
virtual int Generate(D3D11VertexShader* vertex_shader,
|
||||
alloy::StringBuffer* output);
|
||||
int Generate(D3D11VertexShaderResource* vertex_shader,
|
||||
alloy::StringBuffer* output) override;
|
||||
};
|
||||
|
||||
|
||||
class D3D11QuadListGeometryShader : public D3D11GeometryShader {
|
||||
public:
|
||||
D3D11QuadListGeometryShader(ID3D11Device* device, uint64_t hash);
|
||||
virtual ~D3D11QuadListGeometryShader();
|
||||
D3D11QuadListGeometryShader(ID3D11Device* device);
|
||||
~D3D11QuadListGeometryShader() override;
|
||||
|
||||
protected:
|
||||
virtual int Generate(D3D11VertexShader* vertex_shader,
|
||||
alloy::StringBuffer* output);
|
||||
int Generate(D3D11VertexShaderResource* vertex_shader,
|
||||
alloy::StringBuffer* output) override;
|
||||
};
|
||||
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -13,8 +13,8 @@
|
|||
#include <xenia/core.h>
|
||||
|
||||
#include <xenia/gpu/graphics_driver.h>
|
||||
#include <xenia/gpu/shader.h>
|
||||
#include <xenia/gpu/d3d11/d3d11_gpu-private.h>
|
||||
#include <xenia/gpu/d3d11/d3d11_resource_cache.h>
|
||||
#include <xenia/gpu/xenos/xenos.h>
|
||||
|
||||
#include <d3d11.h>
|
||||
|
@ -24,13 +24,6 @@ namespace xe {
|
|||
namespace gpu {
|
||||
namespace d3d11 {
|
||||
|
||||
class D3D11BufferCache;
|
||||
class D3D11PixelShader;
|
||||
class D3D11ShaderCache;
|
||||
class D3D11TextureCache;
|
||||
struct D3D11TextureView;
|
||||
class D3D11VertexShader;
|
||||
|
||||
|
||||
class D3D11GraphicsDriver : public GraphicsDriver {
|
||||
public:
|
||||
|
@ -38,48 +31,32 @@ public:
|
|||
Memory* memory, IDXGISwapChain* swap_chain, ID3D11Device* device);
|
||||
virtual ~D3D11GraphicsDriver();
|
||||
|
||||
virtual void Initialize();
|
||||
ResourceCache* resource_cache() const override { return resource_cache_; }
|
||||
|
||||
virtual void InvalidateState(
|
||||
uint32_t mask);
|
||||
virtual void SetShader(
|
||||
xenos::XE_GPU_SHADER_TYPE type,
|
||||
uint32_t address,
|
||||
uint32_t start,
|
||||
uint32_t length);
|
||||
virtual void DrawIndexBuffer(
|
||||
xenos::XE_GPU_PRIMITIVE_TYPE prim_type,
|
||||
bool index_32bit, uint32_t index_count,
|
||||
uint32_t index_base, uint32_t index_size, uint32_t endianness);
|
||||
virtual void DrawIndexAuto(
|
||||
xenos::XE_GPU_PRIMITIVE_TYPE prim_type,
|
||||
uint32_t index_count);
|
||||
int Initialize() override;
|
||||
|
||||
int Draw(const DrawCommand& command) override;
|
||||
|
||||
// TODO(benvanik): figure this out.
|
||||
virtual int Resolve();
|
||||
int Resolve() override;
|
||||
|
||||
private:
|
||||
int SetupDraw(xenos::XE_GPU_PRIMITIVE_TYPE prim_type);
|
||||
void InitializeInvalidTexture();
|
||||
|
||||
int UpdateState(const DrawCommand& command);
|
||||
int SetupConstantBuffers(const DrawCommand& command);
|
||||
int SetupShaders(const DrawCommand& command);
|
||||
int SetupInputAssembly(const DrawCommand& command);
|
||||
int SetupSamplers(const DrawCommand& command);
|
||||
|
||||
int RebuildRenderTargets(uint32_t width, uint32_t height);
|
||||
int UpdateState(uint32_t state_overrides = 0);
|
||||
int UpdateConstantBuffers();
|
||||
int BindShaders();
|
||||
int PrepareFetchers();
|
||||
int PrepareVertexBuffer(Shader::vtx_buffer_desc_t& desc);
|
||||
int PrepareTextureFetchers();
|
||||
int PrepareTextureSampler(xenos::XE_GPU_SHADER_TYPE shader_type,
|
||||
Shader::tex_buffer_desc_t& desc);
|
||||
int PrepareIndexBuffer(
|
||||
bool index_32bit, uint32_t index_count,
|
||||
uint32_t index_base, uint32_t index_size, uint32_t endianness);
|
||||
|
||||
private:
|
||||
IDXGISwapChain* swap_chain_;
|
||||
ID3D11Device* device_;
|
||||
ID3D11DeviceContext* context_;
|
||||
D3D11BufferCache* buffer_cache_;
|
||||
D3D11ShaderCache* shader_cache_;
|
||||
D3D11TextureCache* texture_cache_;
|
||||
|
||||
D3D11ResourceCache* resource_cache_;
|
||||
|
||||
ID3D11ShaderResourceView* invalid_texture_view_;
|
||||
ID3D11SamplerState* invalid_texture_sampler_state_;
|
||||
|
@ -97,9 +74,6 @@ private:
|
|||
} render_targets_;
|
||||
|
||||
struct {
|
||||
D3D11VertexShader* vertex_shader;
|
||||
D3D11PixelShader* pixel_shader;
|
||||
|
||||
struct {
|
||||
ID3D11Buffer* float_constants;
|
||||
ID3D11Buffer* bool_constants;
|
||||
|
@ -107,17 +81,7 @@ private:
|
|||
ID3D11Buffer* vs_consts;
|
||||
ID3D11Buffer* gs_consts;
|
||||
} constant_buffers;
|
||||
|
||||
struct {
|
||||
bool enabled;
|
||||
xenos::xe_gpu_texture_fetch_t fetch;
|
||||
D3D11TextureView* view;
|
||||
} texture_fetchers[32];
|
||||
} state_;
|
||||
|
||||
enum StateOverrides {
|
||||
STATE_OVERRIDE_DISABLE_CULLING = (1 << 0),
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -146,12 +146,18 @@ void D3D11GraphicsSystem::Initialize() {
|
|||
XEASSERTNULL(driver_);
|
||||
driver_ = new D3D11GraphicsDriver(
|
||||
memory_, window_->swap_chain(), device_);
|
||||
if (driver_->Initialize()) {
|
||||
XELOGE("Unable to initialize D3D11 driver");
|
||||
return;
|
||||
}
|
||||
|
||||
// Initial vsync kick.
|
||||
DispatchInterruptCallback(0);
|
||||
}
|
||||
|
||||
void D3D11GraphicsSystem::Pump() {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
if (swap_pending_) {
|
||||
swap_pending_ = false;
|
||||
|
||||
|
|
|
@ -0,0 +1,71 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include <xenia/gpu/d3d11/d3d11_resource_cache.h>
|
||||
|
||||
#include <xenia/gpu/gpu-private.h>
|
||||
#include <xenia/gpu/d3d11/d3d11_buffer_resource.h>
|
||||
#include <xenia/gpu/d3d11/d3d11_sampler_state_resource.h>
|
||||
#include <xenia/gpu/d3d11/d3d11_shader_resource.h>
|
||||
#include <xenia/gpu/d3d11/d3d11_texture_resource.h>
|
||||
|
||||
|
||||
using namespace xe;
|
||||
using namespace xe::gpu;
|
||||
using namespace xe::gpu::d3d11;
|
||||
|
||||
|
||||
D3D11ResourceCache::D3D11ResourceCache(Memory* memory,
|
||||
ID3D11Device* device,
|
||||
ID3D11DeviceContext* context)
|
||||
: ResourceCache(memory),
|
||||
device_(device), context_(context) {
|
||||
device_->AddRef();
|
||||
context_->AddRef();
|
||||
}
|
||||
|
||||
D3D11ResourceCache::~D3D11ResourceCache() {
|
||||
XESAFERELEASE(device_);
|
||||
XESAFERELEASE(context_);
|
||||
}
|
||||
|
||||
VertexShaderResource* D3D11ResourceCache::CreateVertexShader(
|
||||
const MemoryRange& memory_range,
|
||||
const VertexShaderResource::Info& info) {
|
||||
return new D3D11VertexShaderResource(this, memory_range, info);
|
||||
}
|
||||
|
||||
PixelShaderResource* D3D11ResourceCache::CreatePixelShader(
|
||||
const MemoryRange& memory_range,
|
||||
const PixelShaderResource::Info& info) {
|
||||
return new D3D11PixelShaderResource(this, memory_range, info);
|
||||
}
|
||||
|
||||
TextureResource* D3D11ResourceCache::CreateTexture(
|
||||
const MemoryRange& memory_range,
|
||||
const TextureResource::Info& info) {
|
||||
return new D3D11TextureResource(this, memory_range, info);
|
||||
}
|
||||
|
||||
SamplerStateResource* D3D11ResourceCache::CreateSamplerState(
|
||||
const SamplerStateResource::Info& info) {
|
||||
return new D3D11SamplerStateResource(this, info);
|
||||
}
|
||||
|
||||
IndexBufferResource* D3D11ResourceCache::CreateIndexBuffer(
|
||||
const MemoryRange& memory_range,
|
||||
const IndexBufferResource::Info& info) {
|
||||
return new D3D11IndexBufferResource(this, memory_range, info);
|
||||
}
|
||||
|
||||
VertexBufferResource* D3D11ResourceCache::CreateVertexBuffer(
|
||||
const MemoryRange& memory_range,
|
||||
const VertexBufferResource::Info& info) {
|
||||
return new D3D11VertexBufferResource(this, memory_range, info);
|
||||
}
|
|
@ -0,0 +1,64 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_GPU_D3D11_D3D11_RESOURCE_CACHE_H_
|
||||
#define XENIA_GPU_D3D11_D3D11_RESOURCE_CACHE_H_
|
||||
|
||||
#include <xenia/core.h>
|
||||
|
||||
#include <xenia/gpu/resource_cache.h>
|
||||
|
||||
#include <d3d11.h>
|
||||
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace d3d11 {
|
||||
|
||||
|
||||
class D3D11ResourceCache : public ResourceCache {
|
||||
public:
|
||||
D3D11ResourceCache(Memory* memory,
|
||||
ID3D11Device* device, ID3D11DeviceContext* context);
|
||||
virtual ~D3D11ResourceCache();
|
||||
|
||||
ID3D11Device* device() const { return device_; }
|
||||
ID3D11DeviceContext* context() const { return context_; }
|
||||
|
||||
protected:
|
||||
VertexShaderResource* CreateVertexShader(
|
||||
const MemoryRange& memory_range,
|
||||
const VertexShaderResource::Info& info) override;
|
||||
PixelShaderResource* CreatePixelShader(
|
||||
const MemoryRange& memory_range,
|
||||
const PixelShaderResource::Info& info) override;
|
||||
TextureResource* CreateTexture(
|
||||
const MemoryRange& memory_range,
|
||||
const TextureResource::Info& info) override;
|
||||
SamplerStateResource* CreateSamplerState(
|
||||
const SamplerStateResource::Info& info) override;
|
||||
IndexBufferResource* CreateIndexBuffer(
|
||||
const MemoryRange& memory_range,
|
||||
const IndexBufferResource::Info& info) override;
|
||||
VertexBufferResource* CreateVertexBuffer(
|
||||
const MemoryRange& memory_range,
|
||||
const VertexBufferResource::Info& info) override;
|
||||
|
||||
private:
|
||||
ID3D11Device* device_;
|
||||
ID3D11DeviceContext* context_;
|
||||
};
|
||||
|
||||
|
||||
} // namespace d3d11
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
||||
|
||||
#endif // XENIA_GPU_D3D11_D3D11_RESOURCE_CACHE_H_
|
|
@ -7,53 +7,36 @@
|
|||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include <xenia/gpu/d3d11/d3d11_texture_cache.h>
|
||||
#include <xenia/gpu/d3d11/d3d11_sampler_state_resource.h>
|
||||
|
||||
#include <xenia/gpu/gpu-private.h>
|
||||
#include <xenia/gpu/d3d11/d3d11_resource_cache.h>
|
||||
|
||||
|
||||
using namespace std;
|
||||
using namespace xe;
|
||||
using namespace xe::gpu;
|
||||
using namespace xe::gpu::d3d11;
|
||||
using namespace xe::gpu::xenos;
|
||||
|
||||
|
||||
D3D11TextureCache::D3D11TextureCache(
|
||||
Memory* memory,
|
||||
ID3D11DeviceContext* context, ID3D11Device* device)
|
||||
: TextureCache(memory),
|
||||
context_(context), device_(device) {
|
||||
context_->AddRef();
|
||||
device_->AddRef();
|
||||
D3D11SamplerStateResource::D3D11SamplerStateResource(
|
||||
D3D11ResourceCache* resource_cache, const Info& info)
|
||||
: SamplerStateResource(info),
|
||||
resource_cache_(resource_cache),
|
||||
handle_(nullptr) {
|
||||
}
|
||||
|
||||
D3D11TextureCache::~D3D11TextureCache() {
|
||||
for (auto it = samplers_.begin(); it != samplers_.end(); ++it) {
|
||||
auto& cached_state = it->second;
|
||||
XESAFERELEASE(cached_state.state);
|
||||
D3D11SamplerStateResource::~D3D11SamplerStateResource() {
|
||||
XESAFERELEASE(handle_);
|
||||
}
|
||||
|
||||
int D3D11SamplerStateResource::Prepare() {
|
||||
if (handle_) {
|
||||
return 0;
|
||||
}
|
||||
samplers_.clear();
|
||||
|
||||
XESAFERELEASE(device_);
|
||||
XESAFERELEASE(context_);
|
||||
}
|
||||
|
||||
Texture* D3D11TextureCache::CreateTexture(
|
||||
uint32_t address, const uint8_t* host_address,
|
||||
const xenos::xe_gpu_texture_fetch_t& fetch) {
|
||||
return new D3D11Texture(this, address, host_address);
|
||||
}
|
||||
|
||||
ID3D11SamplerState* D3D11TextureCache::GetSamplerState(
|
||||
const xenos::xe_gpu_texture_fetch_t& fetch,
|
||||
const Shader::tex_buffer_desc_t& desc) {
|
||||
D3D11_SAMPLER_DESC sampler_desc;
|
||||
xe_zero_struct(&sampler_desc, sizeof(sampler_desc));
|
||||
uint32_t min_filter = desc.tex_fetch.min_filter == 3 ?
|
||||
fetch.min_filter : desc.tex_fetch.min_filter;
|
||||
uint32_t mag_filter = desc.tex_fetch.mag_filter == 3 ?
|
||||
fetch.mag_filter : desc.tex_fetch.mag_filter;
|
||||
uint32_t mip_filter = desc.tex_fetch.mip_filter == 3 ?
|
||||
fetch.mip_filter : desc.tex_fetch.mip_filter;
|
||||
// MIN, MAG, MIP
|
||||
static const D3D11_FILTER filter_matrix[2][2][3] = {
|
||||
{
|
||||
|
@ -87,7 +70,8 @@ ID3D11SamplerState* D3D11TextureCache::GetSamplerState(
|
|||
},
|
||||
},
|
||||
};
|
||||
sampler_desc.Filter = filter_matrix[min_filter][mag_filter][mip_filter];
|
||||
sampler_desc.Filter =
|
||||
filter_matrix[info_.min_filter][info_.mag_filter][info_.mip_filter];
|
||||
static const D3D11_TEXTURE_ADDRESS_MODE mode_map[] = {
|
||||
D3D11_TEXTURE_ADDRESS_WRAP,
|
||||
D3D11_TEXTURE_ADDRESS_MIRROR,
|
||||
|
@ -98,9 +82,9 @@ ID3D11SamplerState* D3D11TextureCache::GetSamplerState(
|
|||
D3D11_TEXTURE_ADDRESS_BORDER, // ?
|
||||
D3D11_TEXTURE_ADDRESS_MIRROR, // ?
|
||||
};
|
||||
sampler_desc.AddressU = mode_map[fetch.clamp_x];
|
||||
sampler_desc.AddressV = mode_map[fetch.clamp_y];
|
||||
sampler_desc.AddressW = mode_map[fetch.clamp_z];
|
||||
sampler_desc.AddressU = mode_map[info_.clamp_u];
|
||||
sampler_desc.AddressV = mode_map[info_.clamp_v];
|
||||
sampler_desc.AddressW = mode_map[info_.clamp_w];
|
||||
sampler_desc.MipLODBias;
|
||||
sampler_desc.MaxAnisotropy = 1;
|
||||
sampler_desc.ComparisonFunc = D3D11_COMPARISON_ALWAYS;
|
||||
|
@ -111,29 +95,12 @@ ID3D11SamplerState* D3D11TextureCache::GetSamplerState(
|
|||
sampler_desc.MinLOD;
|
||||
sampler_desc.MaxLOD;
|
||||
|
||||
// TODO(benvanik): do this earlier without having to setup the whole struct?
|
||||
size_t hash = hash_combine(
|
||||
sampler_desc.Filter,
|
||||
sampler_desc.AddressU,
|
||||
sampler_desc.AddressV,
|
||||
sampler_desc.AddressW);
|
||||
auto range = samplers_.equal_range(hash);
|
||||
for (auto it = range.first; it != range.second; ++it) {
|
||||
const auto& cached_state = it->second;
|
||||
// TODO(benvanik): faster compare?
|
||||
if (memcmp(&sampler_desc, &cached_state.desc, sizeof(sampler_desc)) == 0) {
|
||||
return cached_state.state;
|
||||
}
|
||||
}
|
||||
|
||||
ID3D11SamplerState* sampler_state = NULL;
|
||||
HRESULT hr = device_->CreateSamplerState(&sampler_desc, &sampler_state);
|
||||
HRESULT hr = resource_cache_->device()->CreateSamplerState(
|
||||
&sampler_desc, &handle_);
|
||||
if (FAILED(hr)) {
|
||||
XELOGE("D3D11: unable to create sampler state");
|
||||
return nullptr;
|
||||
return 1;
|
||||
}
|
||||
|
||||
samplers_.insert({ hash, { sampler_desc, sampler_state } });
|
||||
|
||||
return sampler_state;
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,48 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_GPU_D3D11_D3D11_SAMPLER_STATE_RESOURCE_H_
|
||||
#define XENIA_GPU_D3D11_D3D11_SAMPLER_STATE_RESOURCE_H_
|
||||
|
||||
#include <xenia/gpu/sampler_state_resource.h>
|
||||
#include <xenia/gpu/xenos/ucode.h>
|
||||
#include <xenia/gpu/xenos/xenos.h>
|
||||
|
||||
#include <d3d11.h>
|
||||
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace d3d11 {
|
||||
|
||||
class D3D11ResourceCache;
|
||||
|
||||
|
||||
class D3D11SamplerStateResource : public SamplerStateResource {
|
||||
public:
|
||||
D3D11SamplerStateResource(D3D11ResourceCache* resource_cache,
|
||||
const Info& info);
|
||||
~D3D11SamplerStateResource() override;
|
||||
|
||||
void* handle() const override { return handle_; }
|
||||
|
||||
int Prepare() override;
|
||||
|
||||
protected:
|
||||
D3D11ResourceCache* resource_cache_;
|
||||
ID3D11SamplerState* handle_;
|
||||
};
|
||||
|
||||
|
||||
} // namespace d3d11
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
||||
|
||||
#endif // XENIA_GPU_D3D11_D3D11_SAMPLER_STATE_RESOURCE_H_
|
File diff suppressed because it is too large
Load Diff
|
@ -1,125 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2013 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_GPU_D3D11_D3D11_SHADER_H_
|
||||
#define XENIA_GPU_D3D11_D3D11_SHADER_H_
|
||||
|
||||
#include <xenia/core.h>
|
||||
|
||||
#include <xenia/gpu/shader.h>
|
||||
#include <xenia/gpu/xenos/xenos.h>
|
||||
|
||||
#include <d3d11.h>
|
||||
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace d3d11 {
|
||||
|
||||
struct Output;
|
||||
|
||||
typedef struct {
|
||||
Output* output;
|
||||
xenos::XE_GPU_SHADER_TYPE type;
|
||||
uint32_t tex_fetch_index;
|
||||
} xe_gpu_translate_ctx_t;
|
||||
|
||||
class D3D11GeometryShader;
|
||||
|
||||
|
||||
class D3D11Shader : public Shader {
|
||||
public:
|
||||
virtual ~D3D11Shader();
|
||||
|
||||
const static uint32_t MAX_INTERPOLATORS = 16;
|
||||
|
||||
protected:
|
||||
D3D11Shader(
|
||||
ID3D11Device* device,
|
||||
xenos::XE_GPU_SHADER_TYPE type,
|
||||
const uint8_t* src_ptr, size_t length,
|
||||
uint64_t hash);
|
||||
|
||||
const char* translated_src() const { return translated_src_; }
|
||||
void set_translated_src(char* value);
|
||||
|
||||
void AppendTextureHeader(Output* output);
|
||||
int TranslateExec(
|
||||
xe_gpu_translate_ctx_t& ctx, const xenos::instr_cf_exec_t& cf);
|
||||
|
||||
ID3D10Blob* Compile(const char* shader_source);
|
||||
|
||||
protected:
|
||||
ID3D11Device* device_;
|
||||
|
||||
char* translated_src_;
|
||||
};
|
||||
|
||||
|
||||
class D3D11VertexShader : public D3D11Shader {
|
||||
public:
|
||||
D3D11VertexShader(
|
||||
ID3D11Device* device,
|
||||
const uint8_t* src_ptr, size_t length,
|
||||
uint64_t hash);
|
||||
virtual ~D3D11VertexShader();
|
||||
|
||||
ID3D11VertexShader* handle() const { return handle_; }
|
||||
ID3D11InputLayout* input_layout() const { return input_layout_; }
|
||||
|
||||
int Prepare(xenos::xe_gpu_program_cntl_t* program_cntl);
|
||||
|
||||
enum GeometryShaderType {
|
||||
POINT_SPRITE_SHADER,
|
||||
RECT_LIST_SHADER,
|
||||
QUAD_LIST_SHADER,
|
||||
|
||||
MAX_GEOMETRY_SHADER_TYPE,
|
||||
};
|
||||
int DemandGeometryShader(GeometryShaderType type,
|
||||
D3D11GeometryShader** out_shader);
|
||||
|
||||
private:
|
||||
const char* Translate(xenos::xe_gpu_program_cntl_t* program_cntl);
|
||||
|
||||
private:
|
||||
ID3D11VertexShader* handle_;
|
||||
ID3D11InputLayout* input_layout_;
|
||||
D3D11GeometryShader* geometry_shaders_[MAX_GEOMETRY_SHADER_TYPE];
|
||||
};
|
||||
|
||||
|
||||
class D3D11PixelShader : public D3D11Shader {
|
||||
public:
|
||||
D3D11PixelShader(
|
||||
ID3D11Device* device,
|
||||
const uint8_t* src_ptr, size_t length,
|
||||
uint64_t hash);
|
||||
virtual ~D3D11PixelShader();
|
||||
|
||||
ID3D11PixelShader* handle() const { return handle_; }
|
||||
|
||||
int Prepare(xenos::xe_gpu_program_cntl_t* program_cntl,
|
||||
D3D11VertexShader* input_shader);
|
||||
|
||||
private:
|
||||
const char* Translate(xenos::xe_gpu_program_cntl_t* program_cntl,
|
||||
D3D11VertexShader* input_shader);
|
||||
|
||||
private:
|
||||
ID3D11PixelShader* handle_;
|
||||
};
|
||||
|
||||
|
||||
} // namespace d3d11
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
||||
|
||||
#endif // XENIA_GPU_D3D11_D3D11_SHADER_H_
|
|
@ -1,45 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2013 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include <xenia/gpu/d3d11/d3d11_shader_cache.h>
|
||||
|
||||
#include <xenia/gpu/d3d11/d3d11_shader.h>
|
||||
|
||||
|
||||
using namespace xe;
|
||||
using namespace xe::gpu;
|
||||
using namespace xe::gpu::d3d11;
|
||||
using namespace xe::gpu::xenos;
|
||||
|
||||
|
||||
D3D11ShaderCache::D3D11ShaderCache(ID3D11Device* device) {
|
||||
device_ = device;
|
||||
device_->AddRef();
|
||||
}
|
||||
|
||||
D3D11ShaderCache::~D3D11ShaderCache() {
|
||||
device_->Release();
|
||||
}
|
||||
|
||||
Shader* D3D11ShaderCache::CreateCore(
|
||||
xenos::XE_GPU_SHADER_TYPE type,
|
||||
const uint8_t* src_ptr, size_t length,
|
||||
uint64_t hash) {
|
||||
switch (type) {
|
||||
case XE_GPU_SHADER_TYPE_VERTEX:
|
||||
return new D3D11VertexShader(
|
||||
device_, src_ptr, length, hash);
|
||||
case XE_GPU_SHADER_TYPE_PIXEL:
|
||||
return new D3D11PixelShader(
|
||||
device_, src_ptr, length, hash);
|
||||
default:
|
||||
XEASSERTALWAYS();
|
||||
return NULL;
|
||||
}
|
||||
}
|
|
@ -1,46 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2013 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_GPU_D3D11_D3D11_SHADER_CACHE_H_
|
||||
#define XENIA_GPU_D3D11_D3D11_SHADER_CACHE_H_
|
||||
|
||||
#include <xenia/core.h>
|
||||
|
||||
#include <xenia/gpu/shader_cache.h>
|
||||
|
||||
#include <D3D11.h>
|
||||
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace d3d11 {
|
||||
|
||||
|
||||
class D3D11ShaderCache : public ShaderCache {
|
||||
public:
|
||||
D3D11ShaderCache(ID3D11Device* device);
|
||||
virtual ~D3D11ShaderCache();
|
||||
|
||||
protected:
|
||||
Shader* CreateCore(
|
||||
xenos::XE_GPU_SHADER_TYPE type,
|
||||
const uint8_t* src_ptr, size_t length,
|
||||
uint64_t hash) override;
|
||||
|
||||
protected:
|
||||
ID3D11Device* device_;
|
||||
};
|
||||
|
||||
|
||||
} // namespace d3d11
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
||||
|
||||
#endif // XENIA_GPU_D3D11_D3D11_SHADER_CACHE_H_
|
|
@ -0,0 +1,381 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include <xenia/gpu/d3d11/d3d11_shader_resource.h>
|
||||
|
||||
#include <xenia/gpu/gpu-private.h>
|
||||
#include <xenia/gpu/d3d11/d3d11_geometry_shader.h>
|
||||
#include <xenia/gpu/d3d11/d3d11_resource_cache.h>
|
||||
#include <xenia/gpu/d3d11/d3d11_shader_translator.h>
|
||||
#include <xenia/gpu/xenos/ucode.h>
|
||||
|
||||
#include <d3dcompiler.h>
|
||||
|
||||
using namespace xe;
|
||||
using namespace xe::gpu;
|
||||
using namespace xe::gpu::d3d11;
|
||||
using namespace xe::gpu::xenos;
|
||||
|
||||
|
||||
namespace {
|
||||
|
||||
ID3D10Blob* D3D11ShaderCompile(XE_GPU_SHADER_TYPE type,
|
||||
const char* shader_source,
|
||||
const char* disasm_source) {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
// TODO(benvanik): pick shared runtime mode defines.
|
||||
D3D10_SHADER_MACRO defines[] = {
|
||||
"TEST_DEFINE", "1",
|
||||
0, 0,
|
||||
};
|
||||
|
||||
uint32_t flags1 = 0;
|
||||
flags1 |= D3D10_SHADER_DEBUG;
|
||||
flags1 |= D3D10_SHADER_ENABLE_STRICTNESS;
|
||||
uint32_t flags2 = 0;
|
||||
|
||||
// Create a name.
|
||||
const char* base_path = "";
|
||||
if (FLAGS_dump_shaders.size()) {
|
||||
base_path = FLAGS_dump_shaders.c_str();
|
||||
}
|
||||
size_t hash = xe_hash64(disasm_source, xestrlena(disasm_source)); // ?
|
||||
char file_name[XE_MAX_PATH];
|
||||
xesnprintfa(file_name, XECOUNT(file_name),
|
||||
"%s/gen_%.16llX.%s",
|
||||
base_path,
|
||||
hash,
|
||||
type == XE_GPU_SHADER_TYPE_VERTEX ? "vs" : "ps");
|
||||
|
||||
if (FLAGS_dump_shaders.size()) {
|
||||
FILE* f = fopen(file_name, "w");
|
||||
fprintf(f, shader_source);
|
||||
fprintf(f, "\n\n");
|
||||
fprintf(f, "/*\n");
|
||||
fprintf(f, disasm_source);
|
||||
fprintf(f, " */\n");
|
||||
fclose(f);
|
||||
}
|
||||
|
||||
// Compile shader to bytecode blob.
|
||||
ID3D10Blob* shader_blob = 0;
|
||||
ID3D10Blob* error_blob = 0;
|
||||
HRESULT hr = D3DCompile(
|
||||
shader_source, strlen(shader_source),
|
||||
file_name,
|
||||
defines, nullptr,
|
||||
"main",
|
||||
type == XE_GPU_SHADER_TYPE_VERTEX ? "vs_5_0" : "ps_5_0",
|
||||
flags1, flags2,
|
||||
&shader_blob, &error_blob);
|
||||
if (error_blob) {
|
||||
char* msg = (char*)error_blob->GetBufferPointer();
|
||||
XELOGE("D3D11: shader compile failed with %s", msg);
|
||||
}
|
||||
XESAFERELEASE(error_blob);
|
||||
if (FAILED(hr)) {
|
||||
return nullptr;
|
||||
}
|
||||
return shader_blob;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
|
||||
D3D11VertexShaderResource::D3D11VertexShaderResource(
|
||||
D3D11ResourceCache* resource_cache,
|
||||
const MemoryRange& memory_range,
|
||||
const Info& info)
|
||||
: VertexShaderResource(memory_range, info),
|
||||
resource_cache_(resource_cache),
|
||||
handle_(nullptr),
|
||||
input_layout_(nullptr),
|
||||
translated_src_(nullptr) {
|
||||
xe_zero_struct(geometry_shaders_, sizeof(geometry_shaders_));
|
||||
}
|
||||
|
||||
D3D11VertexShaderResource::~D3D11VertexShaderResource() {
|
||||
XESAFERELEASE(handle_);
|
||||
XESAFERELEASE(input_layout_);
|
||||
for (int i = 0; i < XECOUNT(geometry_shaders_); ++i) {
|
||||
delete geometry_shaders_[i];
|
||||
}
|
||||
xe_free(translated_src_);
|
||||
}
|
||||
|
||||
int D3D11VertexShaderResource::Prepare(
|
||||
const xe_gpu_program_cntl_t& program_cntl) {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
if (is_prepared_ || handle_) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// TODO(benvanik): look in file based on hash/etc.
|
||||
void* byte_code = NULL;
|
||||
size_t byte_code_length = 0;
|
||||
|
||||
// Translate and compile source.
|
||||
D3D11ShaderTranslator translator;
|
||||
int ret = translator.TranslateVertexShader(this, program_cntl);
|
||||
if (ret) {
|
||||
XELOGE("D3D11: failed to translate vertex shader");
|
||||
return ret;
|
||||
}
|
||||
translated_src_ = xestrdupa(translator.translated_src());
|
||||
|
||||
ID3D10Blob* shader_blob = D3D11ShaderCompile(
|
||||
XE_GPU_SHADER_TYPE_VERTEX, translated_src_, disasm_src());
|
||||
if (!shader_blob) {
|
||||
return 1;
|
||||
}
|
||||
byte_code_length = shader_blob->GetBufferSize();
|
||||
byte_code = xe_malloc(byte_code_length);
|
||||
xe_copy_struct(
|
||||
byte_code, shader_blob->GetBufferPointer(), byte_code_length);
|
||||
XESAFERELEASE(shader_blob);
|
||||
|
||||
// Create shader.
|
||||
HRESULT hr = resource_cache_->device()->CreateVertexShader(
|
||||
byte_code, byte_code_length,
|
||||
nullptr,
|
||||
&handle_);
|
||||
if (FAILED(hr)) {
|
||||
XELOGE("D3D11: failed to create vertex shader");
|
||||
xe_free(byte_code);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Create input layout.
|
||||
ret = CreateInputLayout(byte_code, byte_code_length);
|
||||
xe_free(byte_code);
|
||||
if (ret) {
|
||||
return 1;
|
||||
}
|
||||
is_prepared_ = true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int D3D11VertexShaderResource::CreateInputLayout(const void* byte_code,
|
||||
size_t byte_code_length) {
|
||||
size_t element_count = 0;
|
||||
const auto& inputs = buffer_inputs();
|
||||
for (uint32_t n = 0; n < inputs.count; n++) {
|
||||
element_count += inputs.descs[n].info.element_count;
|
||||
}
|
||||
if (!element_count) {
|
||||
XELOGW("D3D11: vertex shader with zero inputs -- retaining previous values?");
|
||||
input_layout_ = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
D3D11_INPUT_ELEMENT_DESC* element_descs =
|
||||
(D3D11_INPUT_ELEMENT_DESC*)xe_alloca(
|
||||
sizeof(D3D11_INPUT_ELEMENT_DESC) * element_count);
|
||||
uint32_t el_index = 0;
|
||||
for (uint32_t n = 0; n < inputs.count; n++) {
|
||||
const auto& input = inputs.descs[n];
|
||||
for (uint32_t m = 0; m < input.info.element_count; m++) {
|
||||
const auto& el = input.info.elements[m];
|
||||
uint32_t vb_slot = input.input_index;
|
||||
DXGI_FORMAT vtx_format;
|
||||
switch (el.format) {
|
||||
case FMT_8_8_8_8:
|
||||
if (el.is_normalized) {
|
||||
vtx_format = el.is_signed ?
|
||||
DXGI_FORMAT_R8G8B8A8_SNORM : DXGI_FORMAT_R8G8B8A8_UNORM;
|
||||
} else {
|
||||
vtx_format = el.is_signed ?
|
||||
DXGI_FORMAT_R8G8B8A8_SINT : DXGI_FORMAT_R8G8B8A8_UINT;
|
||||
}
|
||||
break;
|
||||
case FMT_2_10_10_10:
|
||||
if (el.is_normalized) {
|
||||
vtx_format = DXGI_FORMAT_R10G10B10A2_UNORM;
|
||||
} else {
|
||||
vtx_format = DXGI_FORMAT_R10G10B10A2_UINT;
|
||||
}
|
||||
break;
|
||||
// DXGI_FORMAT_R11G11B10_FLOAT?
|
||||
case FMT_16_16:
|
||||
if (el.is_normalized) {
|
||||
vtx_format = el.is_signed ?
|
||||
DXGI_FORMAT_R16G16_SNORM : DXGI_FORMAT_R16G16_UNORM;
|
||||
} else {
|
||||
vtx_format = el.is_signed ?
|
||||
DXGI_FORMAT_R16G16_SINT : DXGI_FORMAT_R16G16_UINT;
|
||||
}
|
||||
break;
|
||||
case FMT_16_16_16_16:
|
||||
if (el.is_normalized) {
|
||||
vtx_format = el.is_signed ?
|
||||
DXGI_FORMAT_R16G16B16A16_SNORM : DXGI_FORMAT_R16G16B16A16_UNORM;
|
||||
} else {
|
||||
vtx_format = el.is_signed ?
|
||||
DXGI_FORMAT_R16G16B16A16_SINT : DXGI_FORMAT_R16G16B16A16_UINT;
|
||||
}
|
||||
break;
|
||||
case FMT_16_16_FLOAT:
|
||||
vtx_format = DXGI_FORMAT_R16G16_FLOAT;
|
||||
break;
|
||||
case FMT_16_16_16_16_FLOAT:
|
||||
vtx_format = DXGI_FORMAT_R16G16B16A16_FLOAT;
|
||||
break;
|
||||
case FMT_32:
|
||||
vtx_format = el.is_signed ?
|
||||
DXGI_FORMAT_R32_SINT : DXGI_FORMAT_R32_UINT;
|
||||
break;
|
||||
case FMT_32_32:
|
||||
vtx_format = el.is_signed ?
|
||||
DXGI_FORMAT_R32G32_SINT : DXGI_FORMAT_R32G32_UINT;
|
||||
break;
|
||||
case FMT_32_32_32_32:
|
||||
vtx_format = el.is_signed ?
|
||||
DXGI_FORMAT_R32G32B32A32_SINT : DXGI_FORMAT_R32G32B32A32_UINT;
|
||||
break;
|
||||
case FMT_32_FLOAT:
|
||||
vtx_format = DXGI_FORMAT_R32_FLOAT;
|
||||
break;
|
||||
case FMT_32_32_FLOAT:
|
||||
vtx_format = DXGI_FORMAT_R32G32_FLOAT;
|
||||
break;
|
||||
case FMT_32_32_32_FLOAT:
|
||||
vtx_format = DXGI_FORMAT_R32G32B32_FLOAT;
|
||||
break;
|
||||
case FMT_32_32_32_32_FLOAT:
|
||||
vtx_format = DXGI_FORMAT_R32G32B32A32_FLOAT;
|
||||
break;
|
||||
default:
|
||||
XEASSERTALWAYS();
|
||||
break;
|
||||
}
|
||||
element_descs[el_index].SemanticName = "XE_VF";
|
||||
element_descs[el_index].SemanticIndex = el_index;
|
||||
element_descs[el_index].Format = vtx_format;
|
||||
element_descs[el_index].InputSlot = vb_slot;
|
||||
element_descs[el_index].AlignedByteOffset = el.offset_words * 4;
|
||||
element_descs[el_index].InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA;
|
||||
element_descs[el_index].InstanceDataStepRate = 0;
|
||||
el_index++;
|
||||
}
|
||||
}
|
||||
HRESULT hr = resource_cache_->device()->CreateInputLayout(
|
||||
element_descs,
|
||||
(UINT)element_count,
|
||||
byte_code, byte_code_length,
|
||||
&input_layout_);
|
||||
if (FAILED(hr)) {
|
||||
XELOGE("D3D11: failed to create vertex shader input layout");
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int D3D11VertexShaderResource::DemandGeometryShader(
|
||||
GeometryShaderType type, D3D11GeometryShader** out_shader) {
|
||||
if (geometry_shaders_[type]) {
|
||||
*out_shader = geometry_shaders_[type];
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Demand generate.
|
||||
auto device = resource_cache_->device();
|
||||
D3D11GeometryShader* shader = nullptr;
|
||||
switch (type) {
|
||||
case POINT_SPRITE_SHADER:
|
||||
shader = new D3D11PointSpriteGeometryShader(device);
|
||||
break;
|
||||
case RECT_LIST_SHADER:
|
||||
shader = new D3D11RectListGeometryShader(device);
|
||||
break;
|
||||
case QUAD_LIST_SHADER:
|
||||
shader = new D3D11QuadListGeometryShader(device);
|
||||
break;
|
||||
default:
|
||||
XEASSERTALWAYS();
|
||||
return 1;
|
||||
}
|
||||
if (!shader) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (shader->Prepare(this)) {
|
||||
delete shader;
|
||||
return 1;
|
||||
}
|
||||
|
||||
geometry_shaders_[type] = shader;
|
||||
*out_shader = geometry_shaders_[type];
|
||||
return 0;
|
||||
}
|
||||
|
||||
D3D11PixelShaderResource::D3D11PixelShaderResource(
|
||||
D3D11ResourceCache* resource_cache,
|
||||
const MemoryRange& memory_range,
|
||||
const Info& info)
|
||||
: PixelShaderResource(memory_range, info),
|
||||
resource_cache_(resource_cache),
|
||||
handle_(nullptr),
|
||||
translated_src_(nullptr) {
|
||||
}
|
||||
|
||||
D3D11PixelShaderResource::~D3D11PixelShaderResource() {
|
||||
XESAFERELEASE(handle_);
|
||||
xe_free(translated_src_);
|
||||
}
|
||||
|
||||
int D3D11PixelShaderResource::Prepare(const xe_gpu_program_cntl_t& program_cntl,
|
||||
VertexShaderResource* input_shader) {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
if (is_prepared_ || handle_) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// TODO(benvanik): look in file based on hash/etc.
|
||||
void* byte_code = NULL;
|
||||
size_t byte_code_length = 0;
|
||||
|
||||
// Translate and compile source.
|
||||
D3D11ShaderTranslator translator;
|
||||
int ret = translator.TranslatePixelShader(this,
|
||||
program_cntl,
|
||||
input_shader->alloc_counts());
|
||||
if (ret) {
|
||||
XELOGE("D3D11: failed to translate pixel shader");
|
||||
return ret;
|
||||
}
|
||||
translated_src_ = xestrdupa(translator.translated_src());
|
||||
|
||||
ID3D10Blob* shader_blob = D3D11ShaderCompile(
|
||||
XE_GPU_SHADER_TYPE_PIXEL, translated_src_, disasm_src());
|
||||
if (!shader_blob) {
|
||||
return 1;
|
||||
}
|
||||
byte_code_length = shader_blob->GetBufferSize();
|
||||
byte_code = xe_malloc(byte_code_length);
|
||||
xe_copy_struct(
|
||||
byte_code, shader_blob->GetBufferPointer(), byte_code_length);
|
||||
XESAFERELEASE(shader_blob);
|
||||
|
||||
// Create shader.
|
||||
HRESULT hr = resource_cache_->device()->CreatePixelShader(
|
||||
byte_code, byte_code_length,
|
||||
nullptr,
|
||||
&handle_);
|
||||
if (FAILED(hr)) {
|
||||
XELOGE("D3D11: failed to create pixel shader");
|
||||
xe_free(byte_code);
|
||||
return 1;
|
||||
}
|
||||
|
||||
xe_free(byte_code);
|
||||
is_prepared_ = true;
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,91 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_GPU_D3D11_D3D11_SHADER_RESOURCE_H_
|
||||
#define XENIA_GPU_D3D11_D3D11_SHADER_RESOURCE_H_
|
||||
|
||||
#include <xenia/gpu/shader_resource.h>
|
||||
#include <xenia/gpu/xenos/xenos.h>
|
||||
|
||||
#include <d3d11.h>
|
||||
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace d3d11 {
|
||||
|
||||
class D3D11GeometryShader;
|
||||
class D3D11ResourceCache;
|
||||
|
||||
struct Output;
|
||||
typedef struct {
|
||||
Output* output;
|
||||
xenos::XE_GPU_SHADER_TYPE type;
|
||||
uint32_t tex_fetch_index;
|
||||
} xe_gpu_translate_ctx_t;
|
||||
|
||||
class D3D11VertexShaderResource : public VertexShaderResource {
|
||||
public:
|
||||
D3D11VertexShaderResource(D3D11ResourceCache* resource_cache,
|
||||
const MemoryRange& memory_range,
|
||||
const Info& info);
|
||||
~D3D11VertexShaderResource() override;
|
||||
|
||||
void* handle() const override { return handle_; }
|
||||
ID3D11InputLayout* input_layout() const { return input_layout_; }
|
||||
const char* translated_src() const { return translated_src_; }
|
||||
|
||||
int Prepare(const xenos::xe_gpu_program_cntl_t& program_cntl) override;
|
||||
|
||||
enum GeometryShaderType {
|
||||
POINT_SPRITE_SHADER,
|
||||
RECT_LIST_SHADER,
|
||||
QUAD_LIST_SHADER,
|
||||
MAX_GEOMETRY_SHADER_TYPE, // keep at the end
|
||||
};
|
||||
int DemandGeometryShader(GeometryShaderType type,
|
||||
D3D11GeometryShader** out_shader);
|
||||
|
||||
private:
|
||||
int CreateInputLayout(const void* byte_code, size_t byte_code_length);
|
||||
|
||||
D3D11ResourceCache* resource_cache_;
|
||||
ID3D11VertexShader* handle_;
|
||||
ID3D11InputLayout* input_layout_;
|
||||
D3D11GeometryShader* geometry_shaders_[MAX_GEOMETRY_SHADER_TYPE];
|
||||
char* translated_src_;
|
||||
};
|
||||
|
||||
|
||||
class D3D11PixelShaderResource : public PixelShaderResource {
|
||||
public:
|
||||
D3D11PixelShaderResource(D3D11ResourceCache* resource_cache,
|
||||
const MemoryRange& memory_range,
|
||||
const Info& info);
|
||||
~D3D11PixelShaderResource() override;
|
||||
|
||||
void* handle() const override { return handle_; }
|
||||
const char* translated_src() const { return translated_src_; }
|
||||
|
||||
int Prepare(const xenos::xe_gpu_program_cntl_t& program_cntl,
|
||||
VertexShaderResource* vertex_shader) override;
|
||||
|
||||
private:
|
||||
D3D11ResourceCache* resource_cache_;
|
||||
ID3D11PixelShader* handle_;
|
||||
char* translated_src_;
|
||||
};
|
||||
|
||||
|
||||
} // namespace d3d11
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
||||
|
||||
#endif // XENIA_GPU_D3D11_D3D11_SHADER_RESOURCE_H_
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,125 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_GPU_D3D11_D3D11_SHADER_TRANSLATOR_H_
|
||||
#define XENIA_GPU_D3D11_D3D11_SHADER_TRANSLATOR_H_
|
||||
|
||||
#include <xenia/gpu/shader_resource.h>
|
||||
#include <xenia/gpu/xenos/xenos.h>
|
||||
|
||||
#include <d3d11.h>
|
||||
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace d3d11 {
|
||||
|
||||
|
||||
class D3D11ShaderTranslator {
|
||||
public:
|
||||
const static uint32_t kMaxInterpolators = 16;
|
||||
|
||||
D3D11ShaderTranslator();
|
||||
|
||||
int TranslateVertexShader(VertexShaderResource* vertex_shader,
|
||||
const xenos::xe_gpu_program_cntl_t& program_cntl);
|
||||
int TranslatePixelShader(
|
||||
PixelShaderResource* pixel_shader,
|
||||
const xenos::xe_gpu_program_cntl_t& program_cntl,
|
||||
const VertexShaderResource::AllocCounts& alloc_counts);
|
||||
|
||||
const char* translated_src() const { return buffer_; }
|
||||
|
||||
private:
|
||||
xenos::XE_GPU_SHADER_TYPE type_;
|
||||
uint32_t tex_fetch_index_;
|
||||
const uint32_t* dwords_;
|
||||
|
||||
static const int kCapacity = 64 * 1024;
|
||||
char buffer_[kCapacity];
|
||||
size_t capacity_;
|
||||
size_t offset_;
|
||||
void append(const char* format, ...) {
|
||||
va_list args;
|
||||
va_start(args, format);
|
||||
int len = xevsnprintfa(buffer_ + offset_, capacity_ - offset_,
|
||||
format, args);
|
||||
va_end(args);
|
||||
offset_ += len;
|
||||
buffer_[offset_] = 0;
|
||||
}
|
||||
|
||||
void AppendTextureHeader(
|
||||
const ShaderResource::SamplerInputs& sampler_inputs);
|
||||
|
||||
void AppendSrcReg(uint32_t num, uint32_t type, uint32_t swiz, uint32_t negate,
|
||||
uint32_t abs);
|
||||
void AppendDestRegName(uint32_t num, uint32_t dst_exp);
|
||||
void AppendDestReg(uint32_t num, uint32_t mask, uint32_t dst_exp);
|
||||
void AppendDestRegPost(uint32_t num, uint32_t mask, uint32_t dst_exp);
|
||||
void PrintSrcReg(uint32_t num, uint32_t type, uint32_t swiz, uint32_t negate,
|
||||
uint32_t abs);
|
||||
void PrintDstReg(uint32_t num, uint32_t mask, uint32_t dst_exp);
|
||||
void PrintExportComment(uint32_t num);
|
||||
|
||||
int TranslateALU(const xenos::instr_alu_t* alu, int sync);
|
||||
int TranslateALU_ADDv(const xenos::instr_alu_t& alu);
|
||||
int TranslateALU_MULv(const xenos::instr_alu_t& alu);
|
||||
int TranslateALU_MAXv(const xenos::instr_alu_t& alu);
|
||||
int TranslateALU_MINv(const xenos::instr_alu_t& alu);
|
||||
int TranslateALU_SETXXv(const xenos::instr_alu_t& alu, const char* op);
|
||||
int TranslateALU_SETEv(const xenos::instr_alu_t& alu);
|
||||
int TranslateALU_SETGTv(const xenos::instr_alu_t& alu);
|
||||
int TranslateALU_SETGTEv(const xenos::instr_alu_t& alu);
|
||||
int TranslateALU_SETNEv(const xenos::instr_alu_t& alu);
|
||||
int TranslateALU_FRACv(const xenos::instr_alu_t& alu);
|
||||
int TranslateALU_TRUNCv(const xenos::instr_alu_t& alu);
|
||||
int TranslateALU_FLOORv(const xenos::instr_alu_t& alu);
|
||||
int TranslateALU_MULADDv(const xenos::instr_alu_t& alu);
|
||||
int TranslateALU_CNDXXv(const xenos::instr_alu_t& alu, const char* op);
|
||||
int TranslateALU_CNDEv(const xenos::instr_alu_t& alu);
|
||||
int TranslateALU_CNDGTEv(const xenos::instr_alu_t& alu);
|
||||
int TranslateALU_CNDGTv(const xenos::instr_alu_t& alu);
|
||||
int TranslateALU_DOT4v(const xenos::instr_alu_t& alu);
|
||||
int TranslateALU_DOT3v(const xenos::instr_alu_t& alu);
|
||||
int TranslateALU_DOT2ADDv(const xenos::instr_alu_t& alu);
|
||||
// CUBEv
|
||||
int TranslateALU_MAX4v(const xenos::instr_alu_t& alu);
|
||||
// ...
|
||||
int TranslateALU_MAXs(const xenos::instr_alu_t& alu);
|
||||
int TranslateALU_MINs(const xenos::instr_alu_t& alu);
|
||||
int TranslateALU_SETXXs(const xenos::instr_alu_t& alu, const char* op);
|
||||
int TranslateALU_SETEs(const xenos::instr_alu_t& alu);
|
||||
int TranslateALU_SETGTs(const xenos::instr_alu_t& alu);
|
||||
int TranslateALU_SETGTEs(const xenos::instr_alu_t& alu);
|
||||
int TranslateALU_SETNEs(const xenos::instr_alu_t& alu);
|
||||
int TranslateALU_RECIP_IEEE(const xenos::instr_alu_t& alu);
|
||||
int TranslateALU_MUL_CONST_0(const xenos::instr_alu_t& alu);
|
||||
int TranslateALU_MUL_CONST_1(const xenos::instr_alu_t& alu);
|
||||
int TranslateALU_ADD_CONST_0(const xenos::instr_alu_t& alu);
|
||||
int TranslateALU_ADD_CONST_1(const xenos::instr_alu_t& alu);
|
||||
int TranslateALU_SUB_CONST_0(const xenos::instr_alu_t& alu);
|
||||
int TranslateALU_SUB_CONST_1(const xenos::instr_alu_t& alu);
|
||||
|
||||
void PrintDestFecth(uint32_t dst_reg, uint32_t dst_swiz);
|
||||
void AppendFetchDest(uint32_t dst_reg, uint32_t dst_swiz);
|
||||
int GetFormatComponentCount(uint32_t format);
|
||||
|
||||
int TranslateExec(const xenos::instr_cf_exec_t& cf);
|
||||
int TranslateVertexFetch(const xenos::instr_fetch_vtx_t* vtx, int sync);
|
||||
int TranslateTextureFetch(const xenos::instr_fetch_tex_t* tex, int sync);
|
||||
};
|
||||
|
||||
|
||||
} // namespace d3d11
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
||||
|
||||
#endif // XENIA_GPU_D3D11_D3D11_SHADER_TRANSLATOR_H_
|
|
@ -1,264 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include <xenia/gpu/d3d11/d3d11_texture.h>
|
||||
|
||||
#include <xenia/gpu/gpu-private.h>
|
||||
#include <xenia/gpu/d3d11/d3d11_texture_cache.h>
|
||||
#include <xenia/gpu/xenos/ucode.h>
|
||||
#include <xenia/gpu/xenos/xenos.h>
|
||||
|
||||
|
||||
using namespace xe;
|
||||
using namespace xe::gpu;
|
||||
using namespace xe::gpu::d3d11;
|
||||
using namespace xe::gpu::xenos;
|
||||
|
||||
|
||||
D3D11Texture::D3D11Texture(D3D11TextureCache* cache, uint32_t address,
|
||||
const uint8_t* host_address)
|
||||
: Texture(address, host_address),
|
||||
cache_(cache) {
|
||||
}
|
||||
|
||||
D3D11Texture::~D3D11Texture() {
|
||||
}
|
||||
|
||||
TextureView* D3D11Texture::FetchNew(
|
||||
const xenos::xe_gpu_texture_fetch_t& fetch) {
|
||||
D3D11TextureView* view = new D3D11TextureView();
|
||||
if (!FillViewInfo(view, fetch)) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
D3D11_SHADER_RESOURCE_VIEW_DESC srv_desc;
|
||||
xe_zero_struct(&srv_desc, sizeof(srv_desc));
|
||||
// TODO(benvanik): this may need to be typed on the fetch instruction (float/int/etc?)
|
||||
srv_desc.Format = view->format;
|
||||
|
||||
D3D_SRV_DIMENSION dimension = D3D11_SRV_DIMENSION_UNKNOWN;
|
||||
switch (view->dimensions) {
|
||||
case DIMENSION_1D:
|
||||
srv_desc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE1D;
|
||||
srv_desc.Texture1D.MipLevels = 1;
|
||||
srv_desc.Texture1D.MostDetailedMip = 0;
|
||||
if (!CreateTexture1D(view, fetch)) {
|
||||
XELOGE("D3D11: failed to fetch Texture1D");
|
||||
return nullptr;
|
||||
}
|
||||
break;
|
||||
case DIMENSION_2D:
|
||||
srv_desc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D;
|
||||
srv_desc.Texture2D.MipLevels = 1;
|
||||
srv_desc.Texture2D.MostDetailedMip = 0;
|
||||
if (!CreateTexture2D(view, fetch)) {
|
||||
XELOGE("D3D11: failed to fetch Texture2D");
|
||||
return nullptr;
|
||||
}
|
||||
break;
|
||||
case DIMENSION_3D:
|
||||
srv_desc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE3D;
|
||||
srv_desc.Texture3D.MipLevels = 1;
|
||||
srv_desc.Texture3D.MostDetailedMip = 0;
|
||||
if (!CreateTexture3D(view, fetch)) {
|
||||
XELOGE("D3D11: failed to fetch Texture3D");
|
||||
return nullptr;
|
||||
}
|
||||
break;
|
||||
case DIMENSION_CUBE:
|
||||
srv_desc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURECUBE;
|
||||
srv_desc.TextureCube.MipLevels = 1;
|
||||
srv_desc.TextureCube.MostDetailedMip = 0;
|
||||
if (!CreateTextureCube(view, fetch)) {
|
||||
XELOGE("D3D11: failed to fetch TextureCube");
|
||||
return nullptr;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
HRESULT hr = cache_->device()->CreateShaderResourceView(
|
||||
view->resource, &srv_desc, &view->srv);
|
||||
if (FAILED(hr)) {
|
||||
XELOGE("D3D11: unable to create texture resource view");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return view;
|
||||
}
|
||||
|
||||
bool D3D11Texture::FetchDirty(
|
||||
TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch) {
|
||||
auto d3d_view = static_cast<D3D11TextureView*>(view);
|
||||
switch (view->dimensions) {
|
||||
case DIMENSION_1D:
|
||||
return FetchTexture1D(d3d_view, fetch);
|
||||
case DIMENSION_2D:
|
||||
return FetchTexture2D(d3d_view, fetch);
|
||||
case DIMENSION_3D:
|
||||
return FetchTexture3D(d3d_view, fetch);
|
||||
case DIMENSION_CUBE:
|
||||
return FetchTextureCube(d3d_view, fetch);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool D3D11Texture::CreateTexture1D(
|
||||
D3D11TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch) {
|
||||
uint32_t width = 1 + fetch.size_1d.width;
|
||||
|
||||
D3D11_TEXTURE1D_DESC texture_desc;
|
||||
xe_zero_struct(&texture_desc, sizeof(texture_desc));
|
||||
texture_desc.Width = width;
|
||||
texture_desc.MipLevels = 1;
|
||||
texture_desc.ArraySize = 1;
|
||||
texture_desc.Format = view->format;
|
||||
texture_desc.Usage = D3D11_USAGE_DYNAMIC;
|
||||
texture_desc.BindFlags = D3D11_BIND_SHADER_RESOURCE;
|
||||
texture_desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
|
||||
texture_desc.MiscFlags = 0; // D3D11_RESOURCE_MISC_GENERATE_MIPS?
|
||||
HRESULT hr = cache_->device()->CreateTexture1D(
|
||||
&texture_desc, NULL, (ID3D11Texture1D**)&view->resource);
|
||||
if (FAILED(hr)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return FetchTexture1D(view, fetch);
|
||||
}
|
||||
|
||||
bool D3D11Texture::FetchTexture1D(
|
||||
D3D11TextureView* view, const xe_gpu_texture_fetch_t& fetch) {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
// TODO(benvanik): upload!
|
||||
XELOGE("D3D11: FetchTexture1D not yet implemented");
|
||||
return false;
|
||||
}
|
||||
|
||||
bool D3D11Texture::CreateTexture2D(
|
||||
D3D11TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch) {
|
||||
XEASSERTTRUE(fetch.dimension == 1);
|
||||
|
||||
D3D11_TEXTURE2D_DESC texture_desc;
|
||||
xe_zero_struct(&texture_desc, sizeof(texture_desc));
|
||||
texture_desc.Width = view->sizes_2d.output_width;
|
||||
texture_desc.Height = view->sizes_2d.output_height;
|
||||
texture_desc.MipLevels = 1;
|
||||
texture_desc.ArraySize = 1;
|
||||
texture_desc.Format = view->format;
|
||||
texture_desc.SampleDesc.Count = 1;
|
||||
texture_desc.SampleDesc.Quality = 0;
|
||||
texture_desc.Usage = D3D11_USAGE_DYNAMIC;
|
||||
texture_desc.BindFlags = D3D11_BIND_SHADER_RESOURCE;
|
||||
texture_desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
|
||||
texture_desc.MiscFlags = 0; // D3D11_RESOURCE_MISC_GENERATE_MIPS?
|
||||
HRESULT hr = cache_->device()->CreateTexture2D(
|
||||
&texture_desc, NULL, (ID3D11Texture2D**)&view->resource);
|
||||
if (FAILED(hr)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return FetchTexture2D(view, fetch);
|
||||
}
|
||||
|
||||
bool D3D11Texture::FetchTexture2D(
|
||||
D3D11TextureView* view, const xe_gpu_texture_fetch_t& fetch) {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
XEASSERTTRUE(fetch.dimension == 1);
|
||||
|
||||
auto sizes = GetTextureSizes2D(view);
|
||||
|
||||
// TODO(benvanik): all mip levels.
|
||||
D3D11_MAPPED_SUBRESOURCE res;
|
||||
HRESULT hr = cache_->context()->Map(view->resource, 0,
|
||||
D3D11_MAP_WRITE_DISCARD, 0, &res);
|
||||
if (FAILED(hr)) {
|
||||
XELOGE("D3D11: failed to map texture");
|
||||
return false;
|
||||
}
|
||||
|
||||
const uint8_t* src = cache_->memory()->Translate(address_);
|
||||
uint8_t* dest = (uint8_t*)res.pData;
|
||||
|
||||
//memset(dest, 0, output_pitch * (output_height / view->block_size)); // TODO(gibbed): remove me later
|
||||
|
||||
uint32_t output_pitch = res.RowPitch; // (output_width / info.block_size) * info.texel_pitch;
|
||||
if (!fetch.tiled) {
|
||||
dest = (uint8_t*)res.pData;
|
||||
for (uint32_t y = 0; y < sizes.block_height; y++) {
|
||||
for (uint32_t x = 0; x < sizes.logical_pitch; x += view->texel_pitch) {
|
||||
TextureSwap(dest + x, src + x, view->texel_pitch, (XE_GPU_ENDIAN)fetch.endianness);
|
||||
}
|
||||
src += sizes.input_pitch;
|
||||
dest += output_pitch;
|
||||
}
|
||||
} else {
|
||||
auto bpp = (view->texel_pitch >> 2) + ((view->texel_pitch >> 1) >> (view->texel_pitch >> 2));
|
||||
for (uint32_t y = 0, output_base_offset = 0;
|
||||
y < sizes.block_height;
|
||||
y++, output_base_offset += output_pitch) {
|
||||
auto input_base_offset = TiledOffset2DOuter(y, (sizes.input_width / view->block_size), bpp);
|
||||
for (uint32_t x = 0, output_offset = output_base_offset;
|
||||
x < sizes.block_width;
|
||||
x++, output_offset += view->texel_pitch) {
|
||||
auto input_offset = TiledOffset2DInner(x, y, bpp, input_base_offset) >> bpp;
|
||||
TextureSwap(dest + output_offset,
|
||||
src + input_offset * view->texel_pitch,
|
||||
view->texel_pitch, (XE_GPU_ENDIAN)fetch.endianness);
|
||||
}
|
||||
}
|
||||
}
|
||||
cache_->context()->Unmap(view->resource, 0);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool D3D11Texture::CreateTexture3D(
|
||||
D3D11TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch) {
|
||||
XELOGE("D3D11: CreateTexture3D not yet implemented");
|
||||
XEASSERTALWAYS();
|
||||
return false;
|
||||
}
|
||||
|
||||
bool D3D11Texture::FetchTexture3D(
|
||||
D3D11TextureView* view, const xe_gpu_texture_fetch_t& fetch) {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
XELOGE("D3D11: FetchTexture3D not yet implemented");
|
||||
XEASSERTALWAYS();
|
||||
return false;
|
||||
//D3D11_TEXTURE3D_DESC texture_desc;
|
||||
//xe_zero_struct(&texture_desc, sizeof(texture_desc));
|
||||
//texture_desc.Width;
|
||||
//texture_desc.Height;
|
||||
//texture_desc.Depth;
|
||||
//texture_desc.MipLevels;
|
||||
//texture_desc.Format;
|
||||
//texture_desc.Usage;
|
||||
//texture_desc.BindFlags;
|
||||
//texture_desc.CPUAccessFlags;
|
||||
//texture_desc.MiscFlags;
|
||||
//hr = device_->CreateTexture3D(
|
||||
// &texture_desc, &initial_data, (ID3D11Texture3D**)&view->resource);
|
||||
}
|
||||
|
||||
bool D3D11Texture::CreateTextureCube(
|
||||
D3D11TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch) {
|
||||
XELOGE("D3D11: CreateTextureCube not yet implemented");
|
||||
XEASSERTALWAYS();
|
||||
return false;
|
||||
}
|
||||
|
||||
bool D3D11Texture::FetchTextureCube(
|
||||
D3D11TextureView* view, const xe_gpu_texture_fetch_t& fetch) {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
XELOGE("D3D11: FetchTextureCube not yet implemented");
|
||||
XEASSERTALWAYS();
|
||||
return false;
|
||||
}
|
|
@ -1,78 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_GPU_D3D11_D3D11_TEXTURE_H_
|
||||
#define XENIA_GPU_D3D11_D3D11_TEXTURE_H_
|
||||
|
||||
#include <xenia/core.h>
|
||||
|
||||
#include <xenia/gpu/texture.h>
|
||||
|
||||
#include <d3d11.h>
|
||||
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace d3d11 {
|
||||
|
||||
class D3D11TextureCache;
|
||||
|
||||
|
||||
struct D3D11TextureView : TextureView {
|
||||
ID3D11Resource* resource;
|
||||
ID3D11ShaderResourceView* srv;
|
||||
|
||||
D3D11TextureView()
|
||||
: resource(nullptr), srv(nullptr) {}
|
||||
virtual ~D3D11TextureView() {
|
||||
XESAFERELEASE(srv);
|
||||
XESAFERELEASE(resource);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
class D3D11Texture : public Texture {
|
||||
public:
|
||||
D3D11Texture(D3D11TextureCache* cache, uint32_t address,
|
||||
const uint8_t* host_address);
|
||||
virtual ~D3D11Texture();
|
||||
|
||||
protected:
|
||||
TextureView* FetchNew(
|
||||
const xenos::xe_gpu_texture_fetch_t& fetch) override;
|
||||
bool FetchDirty(
|
||||
TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch) override;
|
||||
|
||||
bool CreateTexture1D(
|
||||
D3D11TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch);
|
||||
bool FetchTexture1D(
|
||||
D3D11TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch);
|
||||
bool CreateTexture2D(
|
||||
D3D11TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch);
|
||||
bool FetchTexture2D(
|
||||
D3D11TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch);
|
||||
bool CreateTexture3D(
|
||||
D3D11TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch);
|
||||
bool FetchTexture3D(
|
||||
D3D11TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch);
|
||||
bool CreateTextureCube(
|
||||
D3D11TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch);
|
||||
bool FetchTextureCube(
|
||||
D3D11TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch);
|
||||
|
||||
D3D11TextureCache* cache_;
|
||||
};
|
||||
|
||||
|
||||
} // namespace d3d11
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
||||
|
||||
#endif // XENIA_GPU_D3D11_D3D11_TEXTURE_H_
|
|
@ -1,61 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_GPU_D3D11_D3D11_TEXTURE_CACHE_H_
|
||||
#define XENIA_GPU_D3D11_D3D11_TEXTURE_CACHE_H_
|
||||
|
||||
#include <xenia/core.h>
|
||||
|
||||
#include <xenia/gpu/texture_cache.h>
|
||||
#include <xenia/gpu/shader.h>
|
||||
#include <xenia/gpu/d3d11/d3d11_texture.h>
|
||||
|
||||
#include <d3d11.h>
|
||||
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace d3d11 {
|
||||
|
||||
|
||||
class D3D11TextureCache : public TextureCache {
|
||||
public:
|
||||
D3D11TextureCache(Memory* memory,
|
||||
ID3D11DeviceContext* context, ID3D11Device* device);
|
||||
virtual ~D3D11TextureCache();
|
||||
|
||||
ID3D11DeviceContext* context() const { return context_; }
|
||||
ID3D11Device* device() const { return device_; }
|
||||
|
||||
ID3D11SamplerState* GetSamplerState(
|
||||
const xenos::xe_gpu_texture_fetch_t& fetch,
|
||||
const Shader::tex_buffer_desc_t& desc);
|
||||
|
||||
protected:
|
||||
Texture* CreateTexture(uint32_t address, const uint8_t* host_address,
|
||||
const xenos::xe_gpu_texture_fetch_t& fetch) override;
|
||||
|
||||
private:
|
||||
ID3D11DeviceContext* context_;
|
||||
ID3D11Device* device_;
|
||||
|
||||
struct CachedSamplerState {
|
||||
D3D11_SAMPLER_DESC desc;
|
||||
ID3D11SamplerState* state;
|
||||
};
|
||||
std::unordered_multimap<size_t, CachedSamplerState> samplers_;
|
||||
};
|
||||
|
||||
|
||||
} // namespace d3d11
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
||||
|
||||
#endif // XENIA_GPU_D3D11_D3D11_TEXTURE_CACHE_H_
|
|
@ -0,0 +1,219 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include <xenia/gpu/d3d11/d3d11_texture_resource.h>
|
||||
|
||||
#include <xenia/gpu/gpu-private.h>
|
||||
#include <xenia/gpu/d3d11/d3d11_resource_cache.h>
|
||||
|
||||
|
||||
using namespace xe;
|
||||
using namespace xe::gpu;
|
||||
using namespace xe::gpu::d3d11;
|
||||
using namespace xe::gpu::xenos;
|
||||
|
||||
|
||||
D3D11TextureResource::D3D11TextureResource(
|
||||
D3D11ResourceCache* resource_cache,
|
||||
const MemoryRange& memory_range,
|
||||
const Info& info)
|
||||
: TextureResource(memory_range, info),
|
||||
resource_cache_(resource_cache),
|
||||
texture_(nullptr),
|
||||
handle_(nullptr) {
|
||||
}
|
||||
|
||||
D3D11TextureResource::~D3D11TextureResource() {
|
||||
XESAFERELEASE(texture_);
|
||||
XESAFERELEASE(handle_);
|
||||
}
|
||||
|
||||
int D3D11TextureResource::CreateHandle() {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
D3D11_SHADER_RESOURCE_VIEW_DESC srv_desc;
|
||||
xe_zero_struct(&srv_desc, sizeof(srv_desc));
|
||||
// TODO(benvanik): this may need to be typed on the fetch instruction (float/int/etc?)
|
||||
srv_desc.Format = info_.format;
|
||||
|
||||
D3D_SRV_DIMENSION dimension = D3D11_SRV_DIMENSION_UNKNOWN;
|
||||
switch (info_.dimension) {
|
||||
case TEXTURE_DIMENSION_1D:
|
||||
srv_desc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE1D;
|
||||
srv_desc.Texture1D.MipLevels = 1;
|
||||
srv_desc.Texture1D.MostDetailedMip = 0;
|
||||
if (CreateHandle1D()) {
|
||||
XELOGE("D3D11: failed to create Texture1D");
|
||||
return 1;
|
||||
}
|
||||
break;
|
||||
case TEXTURE_DIMENSION_2D:
|
||||
srv_desc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D;
|
||||
srv_desc.Texture2D.MipLevels = 1;
|
||||
srv_desc.Texture2D.MostDetailedMip = 0;
|
||||
if (CreateHandle2D()) {
|
||||
XELOGE("D3D11: failed to create Texture2D");
|
||||
return 1;
|
||||
}
|
||||
break;
|
||||
case TEXTURE_DIMENSION_3D:
|
||||
srv_desc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE3D;
|
||||
srv_desc.Texture3D.MipLevels = 1;
|
||||
srv_desc.Texture3D.MostDetailedMip = 0;
|
||||
if (CreateHandle3D()) {
|
||||
XELOGE("D3D11: failed to create Texture3D");
|
||||
return 1;
|
||||
}
|
||||
break;
|
||||
case TEXTURE_DIMENSION_CUBE:
|
||||
srv_desc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURECUBE;
|
||||
srv_desc.TextureCube.MipLevels = 1;
|
||||
srv_desc.TextureCube.MostDetailedMip = 0;
|
||||
if (CreateHandleCube()) {
|
||||
XELOGE("D3D11: failed to create TextureCube");
|
||||
return 1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
HRESULT hr = resource_cache_->device()->CreateShaderResourceView(
|
||||
texture_, &srv_desc, &handle_);
|
||||
if (FAILED(hr)) {
|
||||
XELOGE("D3D11: unable to create texture resource view");
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int D3D11TextureResource::CreateHandle1D() {
|
||||
uint32_t width = 1 + info_.size_1d.width;
|
||||
|
||||
D3D11_TEXTURE1D_DESC texture_desc;
|
||||
xe_zero_struct(&texture_desc, sizeof(texture_desc));
|
||||
texture_desc.Width = width;
|
||||
texture_desc.MipLevels = 1;
|
||||
texture_desc.ArraySize = 1;
|
||||
texture_desc.Format = info_.format;
|
||||
texture_desc.Usage = D3D11_USAGE_DYNAMIC;
|
||||
texture_desc.BindFlags = D3D11_BIND_SHADER_RESOURCE;
|
||||
texture_desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
|
||||
texture_desc.MiscFlags = 0; // D3D11_RESOURCE_MISC_GENERATE_MIPS?
|
||||
HRESULT hr = resource_cache_->device()->CreateTexture1D(
|
||||
&texture_desc, NULL, (ID3D11Texture1D**)&texture_);
|
||||
if (FAILED(hr)) {
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int D3D11TextureResource::CreateHandle2D() {
|
||||
D3D11_TEXTURE2D_DESC texture_desc;
|
||||
xe_zero_struct(&texture_desc, sizeof(texture_desc));
|
||||
texture_desc.Width = info_.size_2d.output_width;
|
||||
texture_desc.Height = info_.size_2d.output_height;
|
||||
texture_desc.MipLevels = 1;
|
||||
texture_desc.ArraySize = 1;
|
||||
texture_desc.Format = info_.format;
|
||||
texture_desc.SampleDesc.Count = 1;
|
||||
texture_desc.SampleDesc.Quality = 0;
|
||||
texture_desc.Usage = D3D11_USAGE_DYNAMIC;
|
||||
texture_desc.BindFlags = D3D11_BIND_SHADER_RESOURCE;
|
||||
texture_desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
|
||||
texture_desc.MiscFlags = 0; // D3D11_RESOURCE_MISC_GENERATE_MIPS?
|
||||
HRESULT hr = resource_cache_->device()->CreateTexture2D(
|
||||
&texture_desc, NULL, (ID3D11Texture2D**)&texture_);
|
||||
if (FAILED(hr)) {
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int D3D11TextureResource::CreateHandle3D() {
|
||||
XELOGE("D3D11: CreateTexture3D not yet implemented");
|
||||
XEASSERTALWAYS();
|
||||
return 1;
|
||||
}
|
||||
|
||||
int D3D11TextureResource::CreateHandleCube() {
|
||||
XELOGE("D3D11: CreateTextureCube not yet implemented");
|
||||
XEASSERTALWAYS();
|
||||
return 1;
|
||||
}
|
||||
|
||||
int D3D11TextureResource::InvalidateRegion(const MemoryRange& memory_range) {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
switch (info_.dimension) {
|
||||
case TEXTURE_DIMENSION_1D:
|
||||
return InvalidateRegion1D(memory_range);
|
||||
case TEXTURE_DIMENSION_2D:
|
||||
return InvalidateRegion2D(memory_range);
|
||||
case TEXTURE_DIMENSION_3D:
|
||||
return InvalidateRegion3D(memory_range);
|
||||
case TEXTURE_DIMENSION_CUBE:
|
||||
return InvalidateRegionCube(memory_range);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
int D3D11TextureResource::InvalidateRegion1D(const MemoryRange& memory_range) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
int D3D11TextureResource::InvalidateRegion2D(const MemoryRange& memory_range) {
|
||||
// TODO(benvanik): all mip levels.
|
||||
D3D11_MAPPED_SUBRESOURCE res;
|
||||
HRESULT hr = resource_cache_->context()->Map(
|
||||
texture_, 0, D3D11_MAP_WRITE_DISCARD, 0, &res);
|
||||
if (FAILED(hr)) {
|
||||
XELOGE("D3D11: failed to map texture");
|
||||
return 1;
|
||||
}
|
||||
|
||||
const uint8_t* src = memory_range_.host_base;
|
||||
uint8_t* dest = (uint8_t*)res.pData;
|
||||
|
||||
uint32_t output_pitch = res.RowPitch; // (output_width / info.block_size) * info.texel_pitch;
|
||||
if (!info_.is_tiled) {
|
||||
dest = (uint8_t*)res.pData;
|
||||
for (uint32_t y = 0; y < info_.size_2d.block_height; y++) {
|
||||
for (uint32_t x = 0; x < info_.size_2d.logical_pitch; x += info_.texel_pitch) {
|
||||
TextureSwap(dest + x, src + x, info_.texel_pitch);
|
||||
}
|
||||
src += info_.size_2d.input_pitch;
|
||||
dest += output_pitch;
|
||||
}
|
||||
} else {
|
||||
auto bpp = (info_.texel_pitch >> 2) + ((info_.texel_pitch >> 1) >> (info_.texel_pitch >> 2));
|
||||
for (uint32_t y = 0, output_base_offset = 0;
|
||||
y < info_.size_2d.block_height;
|
||||
y++, output_base_offset += output_pitch) {
|
||||
auto input_base_offset = TiledOffset2DOuter(y, (info_.size_2d.input_width / info_.block_size), bpp);
|
||||
for (uint32_t x = 0, output_offset = output_base_offset;
|
||||
x < info_.size_2d.block_width;
|
||||
x++, output_offset += info_.texel_pitch) {
|
||||
auto input_offset = TiledOffset2DInner(x, y, bpp, input_base_offset) >> bpp;
|
||||
TextureSwap(dest + output_offset,
|
||||
src + input_offset * info_.texel_pitch,
|
||||
info_.texel_pitch);
|
||||
}
|
||||
}
|
||||
}
|
||||
resource_cache_->context()->Unmap(texture_, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int D3D11TextureResource::InvalidateRegion3D(const MemoryRange& memory_range) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
int D3D11TextureResource::InvalidateRegionCube(
|
||||
const MemoryRange& memory_range) {
|
||||
return 1;
|
||||
}
|
|
@ -0,0 +1,60 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_GPU_D3D11_D3D11_TEXTURE_RESOURCE_H_
|
||||
#define XENIA_GPU_D3D11_D3D11_TEXTURE_RESOURCE_H_
|
||||
|
||||
#include <xenia/gpu/texture_resource.h>
|
||||
#include <xenia/gpu/xenos/xenos.h>
|
||||
|
||||
#include <d3d11.h>
|
||||
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace d3d11 {
|
||||
|
||||
class D3D11ResourceCache;
|
||||
|
||||
|
||||
class D3D11TextureResource : public TextureResource {
|
||||
public:
|
||||
D3D11TextureResource(D3D11ResourceCache* resource_cache,
|
||||
const MemoryRange& memory_range,
|
||||
const Info& info);
|
||||
~D3D11TextureResource() override;
|
||||
|
||||
void* handle() const override { return handle_; }
|
||||
|
||||
protected:
|
||||
int CreateHandle() override;
|
||||
int CreateHandle1D();
|
||||
int CreateHandle2D();
|
||||
int CreateHandle3D();
|
||||
int CreateHandleCube();
|
||||
|
||||
int InvalidateRegion(const MemoryRange& memory_range) override;
|
||||
int InvalidateRegion1D(const MemoryRange& memory_range);
|
||||
int InvalidateRegion2D(const MemoryRange& memory_range);
|
||||
int InvalidateRegion3D(const MemoryRange& memory_range);
|
||||
int InvalidateRegionCube(const MemoryRange& memory_range);
|
||||
|
||||
private:
|
||||
D3D11ResourceCache* resource_cache_;
|
||||
ID3D11Resource* texture_;
|
||||
ID3D11ShaderResourceView* handle_;
|
||||
};
|
||||
|
||||
|
||||
} // namespace d3d11
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
||||
|
||||
#endif // XENIA_GPU_D3D11_D3D11_TEXTURE_RESOURCE_H_
|
|
@ -1,10 +1,8 @@
|
|||
# Copyright 2013 Ben Vanik. All Rights Reserved.
|
||||
{
|
||||
'sources': [
|
||||
'd3d11_buffer.cc',
|
||||
'd3d11_buffer.h',
|
||||
'd3d11_buffer_cache.cc',
|
||||
'd3d11_buffer_cache.h',
|
||||
'd3d11_buffer_resource.cc',
|
||||
'd3d11_buffer_resource.h',
|
||||
'd3d11_geometry_shader.cc',
|
||||
'd3d11_geometry_shader.h',
|
||||
'd3d11_gpu-private.h',
|
||||
|
@ -16,14 +14,16 @@
|
|||
'd3d11_graphics_system.h',
|
||||
'd3d11_profiler_display.cc',
|
||||
'd3d11_profiler_display.h',
|
||||
'd3d11_shader.cc',
|
||||
'd3d11_shader.h',
|
||||
'd3d11_shader_cache.cc',
|
||||
'd3d11_shader_cache.h',
|
||||
'd3d11_texture.cc',
|
||||
'd3d11_texture.h',
|
||||
'd3d11_texture_cache.cc',
|
||||
'd3d11_texture_cache.h',
|
||||
'd3d11_resource_cache.cc',
|
||||
'd3d11_resource_cache.h',
|
||||
'd3d11_sampler_state_resource.cc',
|
||||
'd3d11_sampler_state_resource.h',
|
||||
'd3d11_shader_resource.cc',
|
||||
'd3d11_shader_resource.h',
|
||||
'd3d11_shader_translator.cc',
|
||||
'd3d11_shader_translator.h',
|
||||
'd3d11_texture_resource.cc',
|
||||
'd3d11_texture_resource.h',
|
||||
'd3d11_window.cc',
|
||||
'd3d11_window.h',
|
||||
],
|
||||
|
|
|
@ -2,26 +2,16 @@
|
|||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2013 Ben Vanik. All rights reserved. *
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include <xenia/gpu/xenos/registers.h>
|
||||
#include <xenia/gpu/draw_command.h>
|
||||
|
||||
|
||||
using namespace std;
|
||||
using namespace xe;
|
||||
using namespace xe::gpu;
|
||||
using namespace xe::gpu::xenos;
|
||||
|
||||
|
||||
const char* xe::gpu::xenos::GetRegisterName(uint32_t index) {
|
||||
switch (index) {
|
||||
#define XE_GPU_REGISTER(index, type, name) \
|
||||
case index: return #name;
|
||||
#include <xenia/gpu/xenos/register_table.inc>
|
||||
#undef XE_GPU_REGISTER
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,78 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_GPU_DRAW_COMMAND_H_
|
||||
#define XENIA_GPU_DRAW_COMMAND_H_
|
||||
|
||||
#include <xenia/core.h>
|
||||
#include <xenia/gpu/buffer_resource.h>
|
||||
#include <xenia/gpu/sampler_state_resource.h>
|
||||
#include <xenia/gpu/shader_resource.h>
|
||||
#include <xenia/gpu/texture_resource.h>
|
||||
#include <xenia/gpu/xenos/xenos.h>
|
||||
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
|
||||
|
||||
// TODO(benvanik): move more of the enums in here?
|
||||
struct DrawCommand {
|
||||
xenos::XE_GPU_PRIMITIVE_TYPE prim_type;
|
||||
uint32_t start_index;
|
||||
uint32_t index_count;
|
||||
uint32_t base_vertex;
|
||||
|
||||
VertexShaderResource* vertex_shader;
|
||||
PixelShaderResource* pixel_shader;
|
||||
|
||||
// TODO(benvanik): dirty tracking/max ranges/etc.
|
||||
struct {
|
||||
float* values;
|
||||
size_t count;
|
||||
} float4_constants;
|
||||
struct {
|
||||
uint32_t* values;
|
||||
size_t count;
|
||||
} loop_constants;
|
||||
struct {
|
||||
uint32_t* values;
|
||||
size_t count;
|
||||
} bool_constants;
|
||||
|
||||
// Index buffer, if present. If index_count > 0 then auto draw.
|
||||
IndexBufferResource* index_buffer;
|
||||
|
||||
// Vertex buffers.
|
||||
struct {
|
||||
uint32_t input_index;
|
||||
VertexBufferResource* buffer;
|
||||
uint32_t stride;
|
||||
uint32_t offset;
|
||||
} vertex_buffers[96];
|
||||
size_t vertex_buffer_count;
|
||||
|
||||
// Texture samplers.
|
||||
struct SamplerInput {
|
||||
uint32_t input_index;
|
||||
TextureResource* texture;
|
||||
SamplerStateResource* sampler_state;
|
||||
};
|
||||
SamplerInput vertex_shader_samplers[32];
|
||||
size_t vertex_shader_sampler_count;
|
||||
SamplerInput pixel_shader_samplers[32];
|
||||
size_t pixel_shader_sampler_count;
|
||||
};
|
||||
|
||||
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
||||
|
||||
#endif // XENIA_GPU_DRAW_COMMAND_H_
|
|
@ -12,12 +12,300 @@
|
|||
|
||||
using namespace xe;
|
||||
using namespace xe::gpu;
|
||||
using namespace xe::gpu::xenos;
|
||||
|
||||
|
||||
GraphicsDriver::GraphicsDriver(Memory* memory) :
|
||||
memory_(memory), address_translation_(0) {
|
||||
memset(®ister_file_, 0, sizeof(register_file_));
|
||||
memory_(memory), address_translation_(0) {
|
||||
}
|
||||
|
||||
GraphicsDriver::~GraphicsDriver() {
|
||||
}
|
||||
|
||||
int GraphicsDriver::LoadShader(XE_GPU_SHADER_TYPE type,
|
||||
uint32_t address, uint32_t length,
|
||||
uint32_t start) {
|
||||
MemoryRange memory_range(
|
||||
memory_->Translate(address),
|
||||
address, length);
|
||||
|
||||
ShaderResource* shader = nullptr;
|
||||
if (type == XE_GPU_SHADER_TYPE_VERTEX) {
|
||||
VertexShaderResource::Info info;
|
||||
shader = vertex_shader_ = resource_cache()->FetchVertexShader(memory_range,
|
||||
info);
|
||||
if (!vertex_shader_) {
|
||||
XELOGE("Unable to fetch vertex shader");
|
||||
return 1;
|
||||
}
|
||||
} else {
|
||||
PixelShaderResource::Info info;
|
||||
shader = pixel_shader_ = resource_cache()->FetchPixelShader(memory_range,
|
||||
info);
|
||||
if (!pixel_shader_) {
|
||||
XELOGE("Unable to fetch pixel shader");
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (!shader->is_prepared()) {
|
||||
// Disassemble.
|
||||
const char* source = shader->disasm_src();
|
||||
XELOGGPU("Set shader %d at %0.8X (%db):\n%s",
|
||||
type, address, length,
|
||||
source ? source : "<failed to disassemble>");
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int GraphicsDriver::PrepareDraw(DrawCommand& command) {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
// Ignore copies for now.
|
||||
uint32_t enable_mode = register_file_[XE_GPU_REG_RB_MODECONTROL].u32 & 0x7;
|
||||
if (enable_mode != 4) {
|
||||
XELOGW("GPU: ignoring draw with enable mode %d", enable_mode);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Reset the things we don't modify so that we have clean state.
|
||||
command.prim_type = XE_GPU_PRIMITIVE_TYPE_POINT_LIST;
|
||||
command.index_count = 0;
|
||||
command.index_buffer = nullptr;
|
||||
|
||||
// Generic stuff.
|
||||
command.start_index = register_file_[XE_GPU_REG_VGT_INDX_OFFSET].u32;
|
||||
command.base_vertex = 0;
|
||||
|
||||
int ret;
|
||||
ret = PopulateState(command);
|
||||
if (ret) {
|
||||
XELOGE("Unable to prepare draw state");
|
||||
return ret;
|
||||
}
|
||||
ret = PopulateConstantBuffers(command);
|
||||
if (ret) {
|
||||
XELOGE("Unable to prepare draw constant buffers");
|
||||
return ret;
|
||||
}
|
||||
ret = PopulateShaders(command);
|
||||
if (ret) {
|
||||
XELOGE("Unable to prepare draw shaders");
|
||||
return ret;
|
||||
}
|
||||
ret = PopulateInputAssembly(command);
|
||||
if (ret) {
|
||||
XELOGE("Unable to prepare draw input assembly");
|
||||
return ret;
|
||||
}
|
||||
ret = PopulateSamplers(command);
|
||||
if (ret) {
|
||||
XELOGE("Unable to prepare draw samplers");
|
||||
return ret;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int GraphicsDriver::PrepareDrawIndexBuffer(
|
||||
DrawCommand& command,
|
||||
uint32_t address, uint32_t length,
|
||||
xenos::XE_GPU_ENDIAN endianness,
|
||||
IndexFormat format) {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
address += address_translation_;
|
||||
MemoryRange memory_range(memory_->Translate(address), address, length);
|
||||
|
||||
IndexBufferResource::Info info;
|
||||
info.endianness = endianness;
|
||||
info.format = format;
|
||||
|
||||
command.index_buffer =
|
||||
resource_cache()->FetchIndexBuffer(memory_range, info);
|
||||
if (!command.index_buffer) {
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int GraphicsDriver::PopulateState(DrawCommand& command) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int GraphicsDriver::PopulateConstantBuffers(DrawCommand& command) {
|
||||
command.float4_constants.count = 512;
|
||||
command.float4_constants.values =
|
||||
®ister_file_[XE_GPU_REG_SHADER_CONSTANT_000_X].f32;
|
||||
command.loop_constants.count = 32;
|
||||
command.loop_constants.values =
|
||||
®ister_file_[XE_GPU_REG_SHADER_CONSTANT_LOOP_00].u32;
|
||||
command.bool_constants.count = 8;
|
||||
command.bool_constants.values =
|
||||
®ister_file_[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].u32;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int GraphicsDriver::PopulateShaders(DrawCommand& command) {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
if (!vertex_shader_) {
|
||||
XELOGE("No vertex shader bound; ignoring");
|
||||
return 1;
|
||||
}
|
||||
if (!pixel_shader_) {
|
||||
XELOGE("No pixel shader bound; ignoring");
|
||||
return 1;
|
||||
}
|
||||
|
||||
xe_gpu_program_cntl_t program_cntl;
|
||||
program_cntl.dword_0 = register_file_[XE_GPU_REG_SQ_PROGRAM_CNTL].u32;
|
||||
if (!vertex_shader_->is_prepared()) {
|
||||
if (vertex_shader_->Prepare(program_cntl)) {
|
||||
XELOGE("Unable to prepare vertex shader");
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
if (!pixel_shader_->is_prepared()) {
|
||||
if (pixel_shader_->Prepare(program_cntl, vertex_shader_)) {
|
||||
XELOGE("Unable to prepare pixel shader");
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
command.vertex_shader = vertex_shader_;
|
||||
command.pixel_shader = pixel_shader_;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int GraphicsDriver::PopulateInputAssembly(DrawCommand& command) {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
const auto& buffer_inputs = command.vertex_shader->buffer_inputs();
|
||||
command.vertex_buffer_count = buffer_inputs.count;
|
||||
for (size_t n = 0; n < buffer_inputs.count; n++) {
|
||||
const auto& desc = buffer_inputs.descs[n];
|
||||
|
||||
int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + (desc.fetch_slot / 3) * 6;
|
||||
auto group = reinterpret_cast<xe_gpu_fetch_group_t*>(®ister_file_.values[r]);
|
||||
xe_gpu_vertex_fetch_t* fetch = nullptr;
|
||||
switch (desc.fetch_slot % 3) {
|
||||
case 0:
|
||||
fetch = &group->vertex_fetch_0;
|
||||
break;
|
||||
case 1:
|
||||
fetch = &group->vertex_fetch_1;
|
||||
break;
|
||||
case 2:
|
||||
fetch = &group->vertex_fetch_2;
|
||||
break;
|
||||
}
|
||||
XEASSERTNOTNULL(fetch);
|
||||
// If this assert doesn't hold, maybe we just abort?
|
||||
XEASSERT(fetch->type == 0x3);
|
||||
XEASSERTNOTZERO(fetch->size);
|
||||
|
||||
const auto& info = desc.info;
|
||||
|
||||
MemoryRange memory_range;
|
||||
memory_range.guest_base = (fetch->address << 2) + address_translation_;
|
||||
memory_range.host_base = memory_->Translate(memory_range.guest_base);
|
||||
memory_range.length = fetch->size * 4;
|
||||
// TODO(benvanik): if the memory range is within the command buffer, we
|
||||
// should use a cached transient buffer.
|
||||
|
||||
auto buffer = resource_cache()->FetchVertexBuffer(memory_range, info);
|
||||
if (!buffer) {
|
||||
XELOGE("Unable to create vertex fetch buffer");
|
||||
return 1;
|
||||
}
|
||||
|
||||
command.vertex_buffers[n].input_index = desc.input_index;
|
||||
command.vertex_buffers[n].buffer = buffer;
|
||||
command.vertex_buffers[n].stride = desc.info.stride_words * 4;
|
||||
command.vertex_buffers[n].offset = 0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int GraphicsDriver::PopulateSamplers(DrawCommand& command) {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
// Vertex texture samplers.
|
||||
const auto& vertex_sampler_inputs = command.vertex_shader->sampler_inputs();
|
||||
command.vertex_shader_sampler_count = vertex_sampler_inputs.count;
|
||||
for (size_t i = 0; i < command.vertex_shader_sampler_count; ++i) {
|
||||
if (PopulateSamplerSet(vertex_sampler_inputs.descs[i],
|
||||
command.vertex_shader_samplers[i])) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Pixel shader texture sampler.
|
||||
const auto& pixel_sampler_inputs = command.pixel_shader->sampler_inputs();
|
||||
command.pixel_shader_sampler_count = pixel_sampler_inputs.count;
|
||||
for (size_t i = 0; i < command.pixel_shader_sampler_count; ++i) {
|
||||
if (PopulateSamplerSet(pixel_sampler_inputs.descs[i],
|
||||
command.pixel_shader_samplers[i])) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int GraphicsDriver::PopulateSamplerSet(
|
||||
const ShaderResource::SamplerDesc& src_input,
|
||||
DrawCommand::SamplerInput& dst_input) {
|
||||
int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + src_input.fetch_slot * 6;
|
||||
const auto group = (const xe_gpu_fetch_group_t*)®ister_file_.values[r];
|
||||
const xenos::xe_gpu_texture_fetch_t& fetch = group->texture_fetch;
|
||||
if (fetch.type != 0x2) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
dst_input.input_index = src_input.input_index;
|
||||
dst_input.texture = nullptr;
|
||||
dst_input.sampler_state = nullptr;
|
||||
|
||||
TextureResource::Info info;
|
||||
if (!TextureResource::Info::Prepare(fetch, info)) {
|
||||
XELOGE("D3D11: unable to parse texture fetcher info");
|
||||
return 0; // invalid texture used
|
||||
}
|
||||
if (info.format == DXGI_FORMAT_UNKNOWN) {
|
||||
XELOGW("D3D11: unknown texture format %d", info.format);
|
||||
return 0; // invalid texture used
|
||||
}
|
||||
|
||||
// TODO(benvanik): quick validate without refetching intraframe.
|
||||
// Fetch texture from the cache.
|
||||
MemoryRange memory_range;
|
||||
memory_range.guest_base = (fetch.address << 12) + address_translation_;
|
||||
memory_range.host_base = memory_->Translate(memory_range.guest_base);
|
||||
memory_range.length = info.input_length;
|
||||
|
||||
auto texture = resource_cache()->FetchTexture(memory_range, info);
|
||||
if (!texture) {
|
||||
XELOGW("D3D11: unable to fetch texture");
|
||||
return 0; // invalid texture used
|
||||
}
|
||||
|
||||
SamplerStateResource::Info sampler_info;
|
||||
if (!SamplerStateResource::Info::Prepare(fetch,
|
||||
src_input.tex_fetch,
|
||||
sampler_info)) {
|
||||
XELOGW("D3D11: unable to parse sampler info");
|
||||
return 0; // invalid texture used
|
||||
}
|
||||
auto sampler_state = resource_cache()->FetchSamplerState(sampler_info);
|
||||
if (!sampler_state) {
|
||||
XELOGW("D3D11: unable to fetch sampler");
|
||||
return 0; // invalid texture used
|
||||
}
|
||||
|
||||
dst_input.texture = texture;
|
||||
dst_input.sampler_state = sampler_state;
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -11,7 +11,9 @@
|
|||
#define XENIA_GPU_GRAPHICS_DRIVER_H_
|
||||
|
||||
#include <xenia/core.h>
|
||||
#include <xenia/gpu/xenos/registers.h>
|
||||
#include <xenia/gpu/draw_command.h>
|
||||
#include <xenia/gpu/register_file.h>
|
||||
#include <xenia/gpu/resource_cache.h>
|
||||
#include <xenia/gpu/xenos/xenos.h>
|
||||
|
||||
|
||||
|
@ -24,38 +26,45 @@ public:
|
|||
virtual ~GraphicsDriver();
|
||||
|
||||
Memory* memory() const { return memory_; }
|
||||
xenos::RegisterFile* register_file() { return ®ister_file_; };
|
||||
virtual ResourceCache* resource_cache() const = 0;
|
||||
RegisterFile* register_file() { return ®ister_file_; };
|
||||
void set_address_translation(uint32_t value) {
|
||||
address_translation_ = value;
|
||||
}
|
||||
|
||||
virtual void Initialize() = 0;
|
||||
virtual int Initialize() = 0;
|
||||
|
||||
virtual void InvalidateState(
|
||||
uint32_t mask) = 0;
|
||||
virtual void SetShader(
|
||||
xenos::XE_GPU_SHADER_TYPE type,
|
||||
uint32_t address,
|
||||
uint32_t start,
|
||||
uint32_t length) = 0;
|
||||
virtual void DrawIndexBuffer(
|
||||
xenos::XE_GPU_PRIMITIVE_TYPE prim_type,
|
||||
bool index_32bit, uint32_t index_count,
|
||||
uint32_t index_base, uint32_t index_size, uint32_t endianness) = 0;
|
||||
//virtual void DrawIndexImmediate();
|
||||
virtual void DrawIndexAuto(
|
||||
xenos::XE_GPU_PRIMITIVE_TYPE prim_type,
|
||||
uint32_t index_count) = 0;
|
||||
int LoadShader(xenos::XE_GPU_SHADER_TYPE type,
|
||||
uint32_t address, uint32_t length,
|
||||
uint32_t start);
|
||||
|
||||
int PrepareDraw(DrawCommand& command);
|
||||
int PrepareDrawIndexBuffer(DrawCommand& command,
|
||||
uint32_t address, uint32_t length,
|
||||
xenos::XE_GPU_ENDIAN endianness,
|
||||
IndexFormat format);
|
||||
virtual int Draw(const DrawCommand& command) = 0;
|
||||
|
||||
virtual int Resolve() = 0;
|
||||
|
||||
private:
|
||||
int PopulateState(DrawCommand& command);
|
||||
int PopulateConstantBuffers(DrawCommand& command);
|
||||
int PopulateShaders(DrawCommand& command);
|
||||
int PopulateInputAssembly(DrawCommand& command);
|
||||
int PopulateSamplers(DrawCommand& command);
|
||||
int PopulateSamplerSet(const ShaderResource::SamplerDesc& src_input,
|
||||
DrawCommand::SamplerInput& dst_input);
|
||||
|
||||
protected:
|
||||
GraphicsDriver(Memory* memory);
|
||||
|
||||
Memory* memory_;
|
||||
|
||||
xenos::RegisterFile register_file_;
|
||||
RegisterFile register_file_;
|
||||
uint32_t address_translation_;
|
||||
|
||||
VertexShaderResource* vertex_shader_;
|
||||
PixelShaderResource* pixel_shader_;
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -11,9 +11,10 @@
|
|||
|
||||
#include <xenia/emulator.h>
|
||||
#include <xenia/cpu/processor.h>
|
||||
#include <xenia/gpu/command_processor.h>
|
||||
#include <xenia/gpu/gpu-private.h>
|
||||
#include <xenia/gpu/graphics_driver.h>
|
||||
#include <xenia/gpu/ring_buffer_worker.h>
|
||||
#include <xenia/gpu/xenos/registers.h>
|
||||
#include <xenia/gpu/register_file.h>
|
||||
|
||||
|
||||
using namespace xe;
|
||||
|
@ -24,10 +25,10 @@ using namespace xe::gpu::xenos;
|
|||
|
||||
GraphicsSystem::GraphicsSystem(Emulator* emulator) :
|
||||
emulator_(emulator), memory_(emulator->memory()),
|
||||
thread_(0), running_(false), driver_(0), worker_(0),
|
||||
thread_(nullptr), running_(false), driver_(nullptr),
|
||||
command_processor_(nullptr),
|
||||
interrupt_callback_(0), interrupt_callback_data_(0),
|
||||
last_interrupt_time_(0), swap_pending_(false),
|
||||
thread_wait_(NULL) {
|
||||
last_interrupt_time_(0), swap_pending_(false), thread_wait_(nullptr) {
|
||||
// Create the run loop used for any windows/etc.
|
||||
// This must be done on the thread we create the driver.
|
||||
run_loop_ = xe_run_loop_create();
|
||||
|
@ -42,7 +43,7 @@ X_STATUS GraphicsSystem::Setup() {
|
|||
processor_ = emulator_->processor();
|
||||
|
||||
// Create worker.
|
||||
worker_ = new RingBufferWorker(this, memory_);
|
||||
command_processor_ = new CommandProcessor(this, memory_);
|
||||
|
||||
// Let the processor know we want register access callbacks.
|
||||
emulator_->memory()->AddMappedRange(
|
||||
|
@ -77,15 +78,18 @@ void GraphicsSystem::ThreadStart() {
|
|||
// Main run loop.
|
||||
while (running_) {
|
||||
// Peek main run loop.
|
||||
if (xe_run_loop_pump(run_loop)) {
|
||||
break;
|
||||
{
|
||||
SCOPE_profile_cpu_i("gpu", "GraphicsSystemRunLoopPump");
|
||||
if (xe_run_loop_pump(run_loop)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!running_) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Pump worker.
|
||||
worker_->Pump();
|
||||
command_processor_->Pump();
|
||||
|
||||
if (!running_) {
|
||||
break;
|
||||
|
@ -107,7 +111,7 @@ void GraphicsSystem::Shutdown() {
|
|||
xe_thread_join(thread_);
|
||||
xe_thread_release(thread_);
|
||||
|
||||
delete worker_;
|
||||
delete command_processor_;
|
||||
|
||||
xe_run_loop_release(run_loop_);
|
||||
}
|
||||
|
@ -125,17 +129,19 @@ void GraphicsSystem::InitializeRingBuffer(uint32_t ptr, uint32_t page_count) {
|
|||
Sleep(0);
|
||||
}
|
||||
XEASSERTNOTNULL(driver_);
|
||||
worker_->Initialize(driver_, ptr, page_count);
|
||||
command_processor_->Initialize(driver_, ptr, page_count);
|
||||
}
|
||||
|
||||
void GraphicsSystem::EnableReadPointerWriteBack(uint32_t ptr,
|
||||
uint32_t block_size) {
|
||||
worker_->EnableReadPointerWriteBack(ptr, block_size);
|
||||
command_processor_->EnableReadPointerWriteBack(ptr, block_size);
|
||||
}
|
||||
|
||||
uint64_t GraphicsSystem::ReadRegister(uint64_t addr) {
|
||||
uint32_t r = addr & 0xFFFF;
|
||||
XELOGGPU("ReadRegister(%.4X)", r);
|
||||
if (FLAGS_trace_ring_buffer) {
|
||||
XELOGGPU("ReadRegister(%.4X)", r);
|
||||
}
|
||||
|
||||
RegisterFile* regs = driver_->register_file();
|
||||
|
||||
|
@ -148,31 +154,33 @@ uint64_t GraphicsSystem::ReadRegister(uint64_t addr) {
|
|||
return 1;
|
||||
}
|
||||
|
||||
XEASSERT(r >= 0 && r < kXEGpuRegisterCount);
|
||||
XEASSERT(r >= 0 && r < RegisterFile::kRegisterCount);
|
||||
return regs->values[r].u32;
|
||||
}
|
||||
|
||||
void GraphicsSystem::WriteRegister(uint64_t addr, uint64_t value) {
|
||||
uint32_t r = addr & 0xFFFF;
|
||||
XELOGGPU("WriteRegister(%.4X, %.8X)", r, value);
|
||||
if (FLAGS_trace_ring_buffer) {
|
||||
XELOGGPU("WriteRegister(%.4X, %.8X)", r, value);
|
||||
}
|
||||
|
||||
RegisterFile* regs = driver_->register_file();
|
||||
|
||||
switch (r) {
|
||||
case 0x0714: // CP_RB_WPTR
|
||||
worker_->UpdateWritePointer((uint32_t)value);
|
||||
command_processor_->UpdateWritePointer((uint32_t)value);
|
||||
break;
|
||||
default:
|
||||
XELOGW("Unknown GPU register %.4X write: %.8X", r, value);
|
||||
break;
|
||||
}
|
||||
|
||||
XEASSERT(r >= 0 && r < kXEGpuRegisterCount);
|
||||
XEASSERT(r >= 0 && r < RegisterFile::kRegisterCount);
|
||||
regs->values[r].u32 = (uint32_t)value;
|
||||
}
|
||||
|
||||
void GraphicsSystem::MarkVblank() {
|
||||
worker_->increment_counter();
|
||||
command_processor_->increment_counter();
|
||||
}
|
||||
|
||||
void GraphicsSystem::DispatchInterruptCallback(
|
||||
|
|
|
@ -21,8 +21,8 @@ XEDECLARECLASS2(xe, cpu, Processor);
|
|||
namespace xe {
|
||||
namespace gpu {
|
||||
|
||||
class CommandProcessor;
|
||||
class GraphicsDriver;
|
||||
class RingBufferWorker;
|
||||
|
||||
|
||||
class GraphicsSystem {
|
||||
|
@ -78,7 +78,7 @@ protected:
|
|||
bool running_;
|
||||
|
||||
GraphicsDriver* driver_;
|
||||
RingBufferWorker* worker_;
|
||||
CommandProcessor* command_processor_;
|
||||
|
||||
uint32_t interrupt_callback_;
|
||||
uint32_t interrupt_callback_data_;
|
||||
|
|
|
@ -10,7 +10,6 @@
|
|||
#include <xenia/gpu/nop/nop_graphics_driver.h>
|
||||
|
||||
#include <xenia/gpu/gpu-private.h>
|
||||
#include <xenia/gpu/shader_cache.h>
|
||||
|
||||
|
||||
using namespace xe;
|
||||
|
@ -19,69 +18,19 @@ using namespace xe::gpu::nop;
|
|||
using namespace xe::gpu::xenos;
|
||||
|
||||
|
||||
NopGraphicsDriver::NopGraphicsDriver(Memory* memory) :
|
||||
GraphicsDriver(memory) {
|
||||
shader_cache_ = new ShaderCache();
|
||||
NopGraphicsDriver::NopGraphicsDriver(Memory* memory)
|
||||
: GraphicsDriver(memory), resource_cache_(nullptr) {
|
||||
}
|
||||
|
||||
NopGraphicsDriver::~NopGraphicsDriver() {
|
||||
delete shader_cache_;
|
||||
}
|
||||
|
||||
void NopGraphicsDriver::Initialize() {
|
||||
int NopGraphicsDriver::Initialize() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
void NopGraphicsDriver::InvalidateState(
|
||||
uint32_t mask) {
|
||||
if (mask == XE_GPU_INVALIDATE_MASK_ALL) {
|
||||
XELOGGPU("NOP: (invalidate all)");
|
||||
}
|
||||
if (mask & XE_GPU_INVALIDATE_MASK_VERTEX_SHADER) {
|
||||
XELOGGPU("NOP: invalidate vertex shader");
|
||||
}
|
||||
if (mask & XE_GPU_INVALIDATE_MASK_PIXEL_SHADER) {
|
||||
XELOGGPU("NOP: invalidate pixel shader");
|
||||
}
|
||||
}
|
||||
|
||||
void NopGraphicsDriver::SetShader(
|
||||
XE_GPU_SHADER_TYPE type,
|
||||
uint32_t address,
|
||||
uint32_t start,
|
||||
uint32_t length) {
|
||||
// Find or create shader in the cache.
|
||||
uint8_t* p = memory_->Translate(address);
|
||||
Shader* shader = shader_cache_->FindOrCreate(
|
||||
type, p, length);
|
||||
|
||||
// Disassemble.
|
||||
const char* source = shader->disasm_src();
|
||||
if (!source) {
|
||||
source = "<failed to disassemble>";
|
||||
}
|
||||
XELOGGPU("NOP: set shader %d at %0.8X (%db):\n%s",
|
||||
type, address, length, source);
|
||||
}
|
||||
|
||||
void NopGraphicsDriver::DrawIndexBuffer(
|
||||
XE_GPU_PRIMITIVE_TYPE prim_type,
|
||||
bool index_32bit, uint32_t index_count,
|
||||
uint32_t index_base, uint32_t index_size, uint32_t endianness) {
|
||||
XELOGGPU("NOP: draw index buffer");
|
||||
}
|
||||
|
||||
void NopGraphicsDriver::DrawIndexAuto(
|
||||
XE_GPU_PRIMITIVE_TYPE prim_type,
|
||||
uint32_t index_count) {
|
||||
XELOGGPU("NOP: draw indexed %d (%d indicies)",
|
||||
prim_type, index_count);
|
||||
|
||||
// TODO(benvanik):
|
||||
// program control
|
||||
// context misc
|
||||
// interpolator control
|
||||
// shader constants / bools / integers
|
||||
// fetch constants
|
||||
int NopGraphicsDriver::Draw(const DrawCommand& command) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int NopGraphicsDriver::Resolve() {
|
||||
|
|
|
@ -19,9 +19,6 @@
|
|||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
|
||||
class ShaderCache;
|
||||
|
||||
namespace nop {
|
||||
|
||||
|
||||
|
@ -30,27 +27,16 @@ public:
|
|||
NopGraphicsDriver(Memory* memory);
|
||||
virtual ~NopGraphicsDriver();
|
||||
|
||||
virtual void Initialize();
|
||||
ResourceCache* resource_cache() const override { return resource_cache_; }
|
||||
|
||||
virtual void InvalidateState(
|
||||
uint32_t mask);
|
||||
virtual void SetShader(
|
||||
xenos::XE_GPU_SHADER_TYPE type,
|
||||
uint32_t address,
|
||||
uint32_t start,
|
||||
uint32_t length);
|
||||
virtual void DrawIndexBuffer(
|
||||
xenos::XE_GPU_PRIMITIVE_TYPE prim_type,
|
||||
bool index_32bit, uint32_t index_count,
|
||||
uint32_t index_base, uint32_t index_size, uint32_t endianness);
|
||||
virtual void DrawIndexAuto(
|
||||
xenos::XE_GPU_PRIMITIVE_TYPE prim_type,
|
||||
uint32_t index_count);
|
||||
int Initialize() override;
|
||||
|
||||
virtual int Resolve();
|
||||
int Draw(const DrawCommand& command) override;
|
||||
|
||||
int Resolve() override;
|
||||
|
||||
protected:
|
||||
ShaderCache* shader_cache_;
|
||||
ResourceCache* resource_cache_;
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -10,8 +10,21 @@
|
|||
#include <xenia/gpu/register_file.h>
|
||||
|
||||
|
||||
using namespace std;
|
||||
using namespace xe;
|
||||
using namespace xe::gpu;
|
||||
using namespace xe::gpu::xenos;
|
||||
|
||||
|
||||
RegisterFile::RegisterFile() {
|
||||
xe_zero_struct(values, sizeof(values));
|
||||
}
|
||||
|
||||
const char* RegisterFile::GetRegisterName(uint32_t index) {
|
||||
switch (index) {
|
||||
#define XE_GPU_REGISTER(index, type, name) \
|
||||
case index: return #name;
|
||||
#include <xenia/gpu/xenos/register_table.inc>
|
||||
#undef XE_GPU_REGISTER
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -11,15 +11,36 @@
|
|||
#define XENIA_GPU_REGISTER_FILE_H_
|
||||
|
||||
#include <xenia/core.h>
|
||||
#include <xenia/gpu/xenos/xenos.h>
|
||||
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
|
||||
|
||||
enum Register {
|
||||
#define XE_GPU_REGISTER(index, type, name) \
|
||||
XE_GPU_REG_##name = index,
|
||||
#include <xenia/gpu/xenos/register_table.inc>
|
||||
#undef XE_GPU_REGISTER
|
||||
};
|
||||
|
||||
|
||||
class RegisterFile {
|
||||
public:
|
||||
RegisterFile();
|
||||
|
||||
const char* GetRegisterName(uint32_t index);
|
||||
|
||||
static const size_t kRegisterCount = 0x5003;
|
||||
union RegisterValue {
|
||||
uint32_t u32;
|
||||
float f32;
|
||||
};
|
||||
RegisterValue values[kRegisterCount];
|
||||
|
||||
RegisterValue& operator[](Register reg) {
|
||||
return values[reg];
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -15,3 +15,23 @@ using namespace xe;
|
|||
using namespace xe::gpu;
|
||||
using namespace xe::gpu::xenos;
|
||||
|
||||
|
||||
HashedResource::HashedResource(const MemoryRange& memory_range)
|
||||
: memory_range_(memory_range) {
|
||||
}
|
||||
|
||||
HashedResource::~HashedResource() = default;
|
||||
|
||||
PagedResource::PagedResource(const MemoryRange& memory_range)
|
||||
: memory_range_(memory_range), dirtied_(true) {
|
||||
}
|
||||
|
||||
PagedResource::~PagedResource() = default;
|
||||
|
||||
void PagedResource::MarkDirty(uint32_t lo_address, uint32_t hi_address) {
|
||||
dirtied_ = true;
|
||||
}
|
||||
|
||||
StaticResource::StaticResource() = default;
|
||||
|
||||
StaticResource::~StaticResource() = default;
|
||||
|
|
|
@ -18,8 +18,82 @@ namespace xe {
|
|||
namespace gpu {
|
||||
|
||||
|
||||
struct MemoryRange {
|
||||
uint8_t* host_base;
|
||||
uint32_t guest_base;
|
||||
uint32_t length;
|
||||
|
||||
MemoryRange() : host_base(nullptr), guest_base(0), length(0) {}
|
||||
MemoryRange(const MemoryRange& other)
|
||||
: host_base(other.host_base), guest_base(other.guest_base),
|
||||
length(other.length) {}
|
||||
MemoryRange(uint8_t* _host_base, uint32_t _guest_base, uint32_t _length)
|
||||
: host_base(_host_base), guest_base(_guest_base), length(_length) {}
|
||||
};
|
||||
|
||||
|
||||
class Resource {
|
||||
public:
|
||||
virtual ~Resource() = default;
|
||||
|
||||
virtual void* handle() const = 0;
|
||||
|
||||
template <typename T>
|
||||
T* handle_as() {
|
||||
return reinterpret_cast<T*>(handle());
|
||||
}
|
||||
|
||||
protected:
|
||||
Resource() = default;
|
||||
|
||||
// last use/LRU stuff
|
||||
};
|
||||
|
||||
|
||||
class HashedResource : public Resource {
|
||||
public:
|
||||
~HashedResource() override;
|
||||
|
||||
const MemoryRange& memory_range() const { return memory_range_; }
|
||||
|
||||
protected:
|
||||
HashedResource(const MemoryRange& memory_range);
|
||||
|
||||
MemoryRange memory_range_;
|
||||
// key
|
||||
};
|
||||
|
||||
|
||||
class PagedResource : public Resource {
|
||||
public:
|
||||
~PagedResource() override;
|
||||
|
||||
const MemoryRange& memory_range() const { return memory_range_; }
|
||||
|
||||
template <typename T>
|
||||
bool Equals(const T& info) {
|
||||
return Equals(&info, sizeof(info));
|
||||
}
|
||||
virtual bool Equals(const void* info_ptr, size_t info_length) = 0;
|
||||
|
||||
bool is_dirty() const { return dirtied_; }
|
||||
void MarkDirty(uint32_t lo_address, uint32_t hi_address);
|
||||
|
||||
protected:
|
||||
PagedResource(const MemoryRange& memory_range);
|
||||
|
||||
MemoryRange memory_range_;
|
||||
bool dirtied_;
|
||||
// dirtied pages list
|
||||
};
|
||||
|
||||
|
||||
class StaticResource : public Resource {
|
||||
public:
|
||||
~StaticResource() override;
|
||||
|
||||
protected:
|
||||
StaticResource();
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -15,3 +15,140 @@ using namespace xe;
|
|||
using namespace xe::gpu;
|
||||
using namespace xe::gpu::xenos;
|
||||
|
||||
|
||||
ResourceCache::ResourceCache(Memory* memory)
|
||||
: memory_(memory) {
|
||||
}
|
||||
|
||||
ResourceCache::~ResourceCache() {
|
||||
for (auto it = resources_.begin(); it != resources_.end(); ++it) {
|
||||
Resource* resource = *it;
|
||||
delete resource;
|
||||
}
|
||||
resources_.clear();
|
||||
}
|
||||
|
||||
VertexShaderResource* ResourceCache::FetchVertexShader(
|
||||
const MemoryRange& memory_range,
|
||||
const VertexShaderResource::Info& info) {
|
||||
return FetchHashedResource<VertexShaderResource>(
|
||||
memory_range, info, &ResourceCache::CreateVertexShader);
|
||||
}
|
||||
|
||||
PixelShaderResource* ResourceCache::FetchPixelShader(
|
||||
const MemoryRange& memory_range,
|
||||
const PixelShaderResource::Info& info) {
|
||||
return FetchHashedResource<PixelShaderResource>(
|
||||
memory_range, info, &ResourceCache::CreatePixelShader);
|
||||
}
|
||||
|
||||
TextureResource* ResourceCache::FetchTexture(
|
||||
const MemoryRange& memory_range,
|
||||
const TextureResource::Info& info) {
|
||||
auto resource = FetchPagedResource<TextureResource>(
|
||||
memory_range, info, &ResourceCache::CreateTexture);
|
||||
if (!resource) {
|
||||
return nullptr;
|
||||
}
|
||||
if (resource->Prepare()) {
|
||||
XELOGE("Unable to prepare texture");
|
||||
return nullptr;
|
||||
}
|
||||
return resource;
|
||||
}
|
||||
|
||||
SamplerStateResource* ResourceCache::FetchSamplerState(
|
||||
const SamplerStateResource::Info& info) {
|
||||
auto key = info.hash();
|
||||
auto it = static_resources_.find(key);
|
||||
if (it != static_resources_.end()) {
|
||||
return static_cast<SamplerStateResource*>(it->second);
|
||||
}
|
||||
auto resource = CreateSamplerState(info);
|
||||
if (resource->Prepare()) {
|
||||
XELOGE("Unable to prepare sampler state");
|
||||
return nullptr;
|
||||
}
|
||||
static_resources_.insert({ key, resource });
|
||||
resources_.push_back(resource);
|
||||
return resource;
|
||||
}
|
||||
|
||||
IndexBufferResource* ResourceCache::FetchIndexBuffer(
|
||||
const MemoryRange& memory_range,
|
||||
const IndexBufferResource::Info& info) {
|
||||
auto resource = FetchPagedResource<IndexBufferResource>(
|
||||
memory_range, info, &ResourceCache::CreateIndexBuffer);
|
||||
if (!resource) {
|
||||
return nullptr;
|
||||
}
|
||||
if (resource->Prepare()) {
|
||||
XELOGE("Unable to prepare index buffer");
|
||||
return nullptr;
|
||||
}
|
||||
return resource;
|
||||
}
|
||||
|
||||
VertexBufferResource* ResourceCache::FetchVertexBuffer(
|
||||
const MemoryRange& memory_range,
|
||||
const VertexBufferResource::Info& info) {
|
||||
auto resource = FetchPagedResource<VertexBufferResource>(
|
||||
memory_range, info, &ResourceCache::CreateVertexBuffer);
|
||||
if (!resource) {
|
||||
return nullptr;
|
||||
}
|
||||
if (resource->Prepare()) {
|
||||
XELOGE("Unable to prepare vertex buffer");
|
||||
return nullptr;
|
||||
}
|
||||
return resource;
|
||||
}
|
||||
|
||||
uint64_t ResourceCache::HashRange(const MemoryRange& memory_range) {
|
||||
// We could do something smarter here to potentially early exit.
|
||||
return xe_hash64(memory_range.host_base, memory_range.length);
|
||||
}
|
||||
|
||||
void ResourceCache::SyncRange(uint32_t address, int length) {
|
||||
// Scan the page table in sync with our resource list. This means
|
||||
// we have O(n) complexity for updates, though we could definitely
|
||||
// make this faster/cleaner.
|
||||
// TODO(benvanik): actually do this right.
|
||||
// For now we assume the page table in the range of our resources
|
||||
// will not be changing, which allows us to do a foreach(res) and reload
|
||||
// and then clear the table.
|
||||
|
||||
// total bytes = (512 * 1024 * 1024) / (16 * 1024) = 32768
|
||||
// each byte = 1 page
|
||||
// Walk as qwords so we can clear things up faster.
|
||||
uint64_t* page_table = reinterpret_cast<uint64_t*>(
|
||||
memory_->Translate(memory_->page_table()));
|
||||
int page_size = 16 * 1024; // 16KB pages
|
||||
|
||||
uint32_t lo_address = address % 0x20000000;
|
||||
uint32_t hi_address = lo_address + length;
|
||||
hi_address = (hi_address / page_size) * page_size + page_size;
|
||||
int start_page = lo_address / page_size;
|
||||
int end_page = hi_address / page_size;
|
||||
|
||||
auto it = paged_resources_.upper_bound(lo_address);
|
||||
auto end_it = paged_resources_.lower_bound(hi_address);
|
||||
while (it != end_it) {
|
||||
const auto& memory_range = it->second->memory_range();
|
||||
int lo_page = (memory_range.guest_base % 0x20000000) / page_size;
|
||||
int hi_page = lo_page + (memory_range.length / page_size);
|
||||
for (int i = lo_page / 8; i <= hi_page / 8; ++i) {
|
||||
uint64_t page_flags = page_table[i];
|
||||
if (page_flags) {
|
||||
// Dirty!
|
||||
it->second->MarkDirty(i * 8 * page_size, (i * 8 + 7) * page_size);
|
||||
}
|
||||
}
|
||||
++it;
|
||||
}
|
||||
|
||||
// Reset page table.
|
||||
for (auto i = start_page / 8; i <= end_page / 8; ++i) {
|
||||
page_table[i] = 0;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -10,7 +10,14 @@
|
|||
#ifndef XENIA_GPU_RESOURCE_CACHE_H_
|
||||
#define XENIA_GPU_RESOURCE_CACHE_H_
|
||||
|
||||
#include <map>
|
||||
|
||||
#include <xenia/core.h>
|
||||
#include <xenia/gpu/buffer_resource.h>
|
||||
#include <xenia/gpu/resource.h>
|
||||
#include <xenia/gpu/sampler_state_resource.h>
|
||||
#include <xenia/gpu/shader_resource.h>
|
||||
#include <xenia/gpu/texture_resource.h>
|
||||
#include <xenia/gpu/xenos/xenos.h>
|
||||
|
||||
|
||||
|
@ -20,6 +27,96 @@ namespace gpu {
|
|||
|
||||
class ResourceCache {
|
||||
public:
|
||||
virtual ~ResourceCache();
|
||||
|
||||
VertexShaderResource* FetchVertexShader(
|
||||
const MemoryRange& memory_range,
|
||||
const VertexShaderResource::Info& info);
|
||||
PixelShaderResource* FetchPixelShader(
|
||||
const MemoryRange& memory_range,
|
||||
const PixelShaderResource::Info& info);
|
||||
|
||||
TextureResource* FetchTexture(
|
||||
const MemoryRange& memory_range,
|
||||
const TextureResource::Info& info);
|
||||
SamplerStateResource* FetchSamplerState(
|
||||
const SamplerStateResource::Info& info);
|
||||
|
||||
IndexBufferResource* FetchIndexBuffer(
|
||||
const MemoryRange& memory_range,
|
||||
const IndexBufferResource::Info& info);
|
||||
VertexBufferResource* FetchVertexBuffer(
|
||||
const MemoryRange& memory_range,
|
||||
const VertexBufferResource::Info& info);
|
||||
|
||||
uint64_t HashRange(const MemoryRange& memory_range);
|
||||
|
||||
void SyncRange(uint32_t address, int length);
|
||||
|
||||
protected:
|
||||
ResourceCache(Memory* memory);
|
||||
|
||||
template <typename T, typename V>
|
||||
T* FetchHashedResource(const MemoryRange& memory_range,
|
||||
const typename T::Info& info,
|
||||
const V& factory) {
|
||||
// TODO(benvanik): if there's no way it's changed and it's been checked,
|
||||
// just lookup. This way we don't rehash 100x a frame.
|
||||
auto key = HashRange(memory_range);
|
||||
auto it = hashed_resources_.find(key);
|
||||
if (it != hashed_resources_.end()) {
|
||||
return static_cast<T*>(it->second);
|
||||
}
|
||||
auto resource = (this->*factory)(memory_range, info);
|
||||
hashed_resources_.insert({ key, resource });
|
||||
resources_.push_back(resource);
|
||||
return resource;
|
||||
}
|
||||
|
||||
template <typename T, typename V>
|
||||
T* FetchPagedResource(const MemoryRange& memory_range,
|
||||
const typename T::Info& info,
|
||||
const V& factory) {
|
||||
uint32_t lo_address = memory_range.guest_base % 0x20000000;
|
||||
auto key = uint64_t(lo_address);
|
||||
auto range = paged_resources_.equal_range(key);
|
||||
for (auto it = range.first; it != range.second; ++it) {
|
||||
if (it->second->memory_range().length == memory_range.length &&
|
||||
it->second->Equals(info)) {
|
||||
return static_cast<T*>(it->second);
|
||||
}
|
||||
}
|
||||
auto resource = (this->*factory)(memory_range, info);
|
||||
paged_resources_.insert({ key, resource });
|
||||
resources_.push_back(resource);
|
||||
return resource;
|
||||
}
|
||||
|
||||
virtual VertexShaderResource* CreateVertexShader(
|
||||
const MemoryRange& memory_range,
|
||||
const VertexShaderResource::Info& info) = 0;
|
||||
virtual PixelShaderResource* CreatePixelShader(
|
||||
const MemoryRange& memory_range,
|
||||
const PixelShaderResource::Info& info) = 0;
|
||||
virtual TextureResource* CreateTexture(
|
||||
const MemoryRange& memory_range,
|
||||
const TextureResource::Info& info) = 0;
|
||||
virtual SamplerStateResource* CreateSamplerState(
|
||||
const SamplerStateResource::Info& info) = 0;
|
||||
virtual IndexBufferResource* CreateIndexBuffer(
|
||||
const MemoryRange& memory_range,
|
||||
const IndexBufferResource::Info& info) = 0;
|
||||
virtual VertexBufferResource* CreateVertexBuffer(
|
||||
const MemoryRange& memory_range,
|
||||
const VertexBufferResource::Info& info) = 0;
|
||||
|
||||
private:
|
||||
Memory* memory_;
|
||||
|
||||
std::vector<Resource*> resources_;
|
||||
std::unordered_map<uint64_t, HashedResource*> hashed_resources_;
|
||||
std::unordered_map<uint64_t, StaticResource*> static_resources_;
|
||||
std::multimap<uint64_t, PagedResource*> paged_resources_;
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -1,741 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2013 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include <xenia/gpu/ring_buffer_worker.h>
|
||||
|
||||
#include <xenia/gpu/gpu-private.h>
|
||||
#include <xenia/gpu/graphics_driver.h>
|
||||
#include <xenia/gpu/graphics_system.h>
|
||||
#include <xenia/gpu/xenos/packets.h>
|
||||
#include <xenia/gpu/xenos/registers.h>
|
||||
|
||||
|
||||
using namespace xe;
|
||||
using namespace xe::gpu;
|
||||
using namespace xe::gpu::xenos;
|
||||
|
||||
|
||||
#define XETRACERB(fmt, ...) if (FLAGS_trace_ring_buffer) XELOGGPU(fmt, ##__VA_ARGS__)
|
||||
|
||||
|
||||
RingBufferWorker::RingBufferWorker(
|
||||
GraphicsSystem* graphics_system, Memory* memory) :
|
||||
graphics_system_(graphics_system), memory_(memory), driver_(0) {
|
||||
write_ptr_index_event_ = CreateEvent(
|
||||
NULL, FALSE, FALSE, NULL);
|
||||
|
||||
primary_buffer_ptr_ = 0;
|
||||
primary_buffer_size_ = 0;
|
||||
read_ptr_index_ = 0;
|
||||
read_ptr_update_freq_ = 0;
|
||||
read_ptr_writeback_ptr_ = 0;
|
||||
write_ptr_index_ = 0;
|
||||
write_ptr_max_index_ = 0;
|
||||
|
||||
LARGE_INTEGER perf_counter;
|
||||
QueryPerformanceCounter(&perf_counter);
|
||||
time_base_ = perf_counter.QuadPart;
|
||||
counter_ = 0;
|
||||
}
|
||||
|
||||
RingBufferWorker::~RingBufferWorker() {
|
||||
SetEvent(write_ptr_index_event_);
|
||||
CloseHandle(write_ptr_index_event_);
|
||||
}
|
||||
|
||||
uint64_t RingBufferWorker::QueryTime() {
|
||||
LARGE_INTEGER perf_counter;
|
||||
QueryPerformanceCounter(&perf_counter);
|
||||
return perf_counter.QuadPart - time_base_;
|
||||
}
|
||||
|
||||
void RingBufferWorker::Initialize(GraphicsDriver* driver,
|
||||
uint32_t ptr, uint32_t page_count) {
|
||||
driver_ = driver;
|
||||
primary_buffer_ptr_ = ptr;
|
||||
// Not sure this is correct, but it's a way to take the page_count back to
|
||||
// the number of bytes allocated by the physical alloc.
|
||||
uint32_t original_size = 1 << (0x1C - page_count - 1);
|
||||
primary_buffer_size_ = original_size;
|
||||
read_ptr_index_ = 0;
|
||||
|
||||
// Tell the driver what to use for translation.
|
||||
driver_->set_address_translation(primary_buffer_ptr_ & ~0x1FFFFFFF);
|
||||
}
|
||||
|
||||
void RingBufferWorker::EnableReadPointerWriteBack(uint32_t ptr,
|
||||
uint32_t block_size) {
|
||||
// CP_RB_RPTR_ADDR Ring Buffer Read Pointer Address 0x70C
|
||||
// ptr = RB_RPTR_ADDR, pointer to write back the address to.
|
||||
read_ptr_writeback_ptr_ = (primary_buffer_ptr_ & ~0x1FFFFFFF) + ptr;
|
||||
// CP_RB_CNTL Ring Buffer Control 0x704
|
||||
// block_size = RB_BLKSZ, number of quadwords read between updates of the
|
||||
// read pointer.
|
||||
read_ptr_update_freq_ = (uint32_t)pow(2.0, (double)block_size) / 4;
|
||||
}
|
||||
|
||||
void RingBufferWorker::UpdateWritePointer(uint32_t value) {
|
||||
write_ptr_max_index_ = MAX(write_ptr_max_index_, value);
|
||||
write_ptr_index_ = value;
|
||||
SetEvent(write_ptr_index_event_);
|
||||
}
|
||||
|
||||
void RingBufferWorker::Pump() {
|
||||
uint8_t* p = memory_->membase();
|
||||
|
||||
if (write_ptr_index_ == 0xBAADF00D ||
|
||||
read_ptr_index_ == write_ptr_index_) {
|
||||
// Check if the pointer has moved.
|
||||
// We wait a short bit here to yield time. Since we are also running the
|
||||
// main window display we don't want to pause too long, though.
|
||||
const int wait_time_ms = 1;
|
||||
if (WaitForSingleObject(write_ptr_index_event_,
|
||||
wait_time_ms) == WAIT_TIMEOUT) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Bring local so we don't have to worry about them changing out from under
|
||||
// us.
|
||||
uint32_t write_ptr_index = write_ptr_index_;
|
||||
uint32_t write_ptr_max_index = write_ptr_max_index_;
|
||||
if (read_ptr_index_ == write_ptr_index) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Process the new commands.
|
||||
XETRACERB("Ring buffer thread work");
|
||||
|
||||
// Execute. Note that we handle wraparound transparently.
|
||||
ExecutePrimaryBuffer(read_ptr_index_, write_ptr_index);
|
||||
read_ptr_index_ = write_ptr_index;
|
||||
|
||||
// TODO(benvanik): use read_ptr_update_freq_ and only issue after moving
|
||||
// that many indices.
|
||||
if (read_ptr_writeback_ptr_) {
|
||||
XESETUINT32BE(p + read_ptr_writeback_ptr_, read_ptr_index_);
|
||||
}
|
||||
}
|
||||
|
||||
void RingBufferWorker::ExecutePrimaryBuffer(
|
||||
uint32_t start_index, uint32_t end_index) {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
// Adjust pointer base.
|
||||
uint32_t ptr = primary_buffer_ptr_ + start_index * 4;
|
||||
ptr = (primary_buffer_ptr_ & ~0x1FFFFFFF) | (ptr & 0x1FFFFFFF);
|
||||
uint32_t end_ptr = primary_buffer_ptr_ + end_index * 4;
|
||||
end_ptr = (primary_buffer_ptr_ & ~0x1FFFFFFF) | (end_ptr & 0x1FFFFFFF);
|
||||
|
||||
XETRACERB("[%.8X] ExecutePrimaryBuffer(%dw -> %dw)",
|
||||
ptr, start_index, end_index);
|
||||
|
||||
// Execute commands!
|
||||
PacketArgs args;
|
||||
args.ptr = ptr;
|
||||
args.base_ptr = primary_buffer_ptr_;
|
||||
args.max_address = primary_buffer_ptr_ + primary_buffer_size_ * 4;
|
||||
args.ptr_mask = (primary_buffer_size_ / 4) - 1;
|
||||
uint32_t n = 0;
|
||||
while (args.ptr != end_ptr) {
|
||||
n += ExecutePacket(args);
|
||||
}
|
||||
if (end_index > start_index) {
|
||||
XEASSERT(n == (end_index - start_index));
|
||||
}
|
||||
|
||||
XETRACERB(" ExecutePrimaryBuffer End");
|
||||
}
|
||||
|
||||
void RingBufferWorker::ExecuteIndirectBuffer(uint32_t ptr, uint32_t length) {
|
||||
XETRACERB("[%.8X] ExecuteIndirectBuffer(%dw)", ptr, length);
|
||||
|
||||
// Execute commands!
|
||||
PacketArgs args;
|
||||
args.ptr = ptr;
|
||||
args.base_ptr = ptr;
|
||||
args.max_address = ptr + length * 4;
|
||||
args.ptr_mask = 0;
|
||||
for (uint32_t n = 0; n < length;) {
|
||||
n += ExecutePacket(args);
|
||||
XEASSERT(n <= length);
|
||||
}
|
||||
|
||||
XETRACERB(" ExecuteIndirectBuffer End");
|
||||
}
|
||||
|
||||
#define LOG_DATA(count) \
|
||||
for (uint32_t __m = 0; __m < count; __m++) { \
|
||||
XETRACERB("[%.8X] %.8X", \
|
||||
packet_ptr + (1 + __m) * 4, \
|
||||
XEGETUINT32BE(packet_base + 1 * 4 + __m * 4)); \
|
||||
}
|
||||
|
||||
void RingBufferWorker::AdvancePtr(PacketArgs& args, uint32_t n) {
|
||||
args.ptr = args.ptr + n * 4;
|
||||
if (args.ptr_mask) {
|
||||
args.ptr =
|
||||
args.base_ptr + (((args.ptr - args.base_ptr) / 4) & args.ptr_mask) * 4;
|
||||
}
|
||||
}
|
||||
#define ADVANCE_PTR(n) AdvancePtr(args, n)
|
||||
#define PEEK_PTR() \
|
||||
XEGETUINT32BE(p + args.ptr)
|
||||
#define READ_PTR() \
|
||||
XEGETUINT32BE(p + args.ptr); ADVANCE_PTR(1);
|
||||
|
||||
uint32_t RingBufferWorker::ExecutePacket(PacketArgs& args) {
|
||||
uint8_t* p = memory_->membase();
|
||||
RegisterFile* regs = driver_->register_file();
|
||||
|
||||
uint32_t packet_ptr = args.ptr;
|
||||
const uint8_t* packet_base = p + packet_ptr;
|
||||
const uint32_t packet = PEEK_PTR();
|
||||
ADVANCE_PTR(1);
|
||||
const uint32_t packet_type = packet >> 30;
|
||||
if (packet == 0) {
|
||||
XETRACERB("[%.8X] Packet(%.8X): 0?",
|
||||
packet_ptr, packet);
|
||||
return 1;
|
||||
}
|
||||
|
||||
switch (packet_type) {
|
||||
case 0x00:
|
||||
{
|
||||
// Type-0 packet.
|
||||
// Write count registers in sequence to the registers starting at
|
||||
// (base_index << 2).
|
||||
XETRACERB("[%.8X] Packet(%.8X): set registers:",
|
||||
packet_ptr, packet);
|
||||
uint32_t count = ((packet >> 16) & 0x3FFF) + 1;
|
||||
uint32_t base_index = (packet & 0x7FFF);
|
||||
uint32_t write_one_reg = (packet >> 15) & 0x1;
|
||||
for (uint32_t m = 0; m < count; m++) {
|
||||
uint32_t reg_data = PEEK_PTR();
|
||||
uint32_t target_index = write_one_reg ? base_index : base_index + m;
|
||||
const char* reg_name = xenos::GetRegisterName(target_index);
|
||||
XETRACERB("[%.8X] %.8X -> %.4X %s",
|
||||
args.ptr,
|
||||
reg_data, target_index, reg_name ? reg_name : "");
|
||||
ADVANCE_PTR(1);
|
||||
WriteRegister(packet_ptr, target_index, reg_data);
|
||||
}
|
||||
return 1 + count;
|
||||
}
|
||||
break;
|
||||
case 0x01:
|
||||
{
|
||||
// Type-1 packet.
|
||||
// Contains two registers of data. Type-0 should be more common.
|
||||
XETRACERB("[%.8X] Packet(%.8X): set registers:",
|
||||
packet_ptr, packet);
|
||||
uint32_t reg_index_1 = packet & 0x7FF;
|
||||
uint32_t reg_index_2 = (packet >> 11) & 0x7FF;
|
||||
uint32_t reg_ptr_1 = args.ptr;
|
||||
uint32_t reg_data_1 = READ_PTR();
|
||||
uint32_t reg_ptr_2 = args.ptr;
|
||||
uint32_t reg_data_2 = READ_PTR();
|
||||
const char* reg_name_1 = xenos::GetRegisterName(reg_index_1);
|
||||
const char* reg_name_2 = xenos::GetRegisterName(reg_index_2);
|
||||
XETRACERB("[%.8X] %.8X -> %.4X %s",
|
||||
reg_ptr_1,
|
||||
reg_data_1, reg_index_1, reg_name_1 ? reg_name_1 : "");
|
||||
XETRACERB("[%.8X] %.8X -> %.4X %s",
|
||||
reg_ptr_2,
|
||||
reg_data_2, reg_index_2, reg_name_2 ? reg_name_2 : "");
|
||||
WriteRegister(packet_ptr, reg_index_1, reg_data_1);
|
||||
WriteRegister(packet_ptr, reg_index_2, reg_data_2);
|
||||
return 1 + 2;
|
||||
}
|
||||
break;
|
||||
case 0x02:
|
||||
// Type-2 packet.
|
||||
// No-op. Do nothing.
|
||||
XETRACERB("[%.8X] Packet(%.8X): padding",
|
||||
packet_ptr, packet);
|
||||
return 1;
|
||||
case 0x03:
|
||||
{
|
||||
// Type-3 packet.
|
||||
uint32_t count = ((packet >> 16) & 0x3FFF) + 1;
|
||||
uint32_t opcode = (packet >> 8) & 0x7F;
|
||||
// & 1 == predicate, maybe?
|
||||
|
||||
switch (opcode) {
|
||||
case PM4_ME_INIT:
|
||||
// initialize CP's micro-engine
|
||||
XETRACERB("[%.8X] Packet(%.8X): PM4_ME_INIT",
|
||||
packet_ptr, packet);
|
||||
LOG_DATA(count);
|
||||
ADVANCE_PTR(count);
|
||||
break;
|
||||
|
||||
case PM4_NOP:
|
||||
// skip N 32-bit words to get to the next packet
|
||||
// No-op, ignore some data.
|
||||
XETRACERB("[%.8X] Packet(%.8X): PM4_NOP",
|
||||
packet_ptr, packet);
|
||||
LOG_DATA(count);
|
||||
ADVANCE_PTR(count);
|
||||
break;
|
||||
|
||||
case PM4_INTERRUPT:
|
||||
// generate interrupt from the command stream
|
||||
{
|
||||
XETRACERB("[%.8X] Packet(%.8X): PM4_INTERRUPT",
|
||||
packet_ptr, packet);
|
||||
LOG_DATA(count);
|
||||
uint32_t cpu_mask = READ_PTR();
|
||||
for (int n = 0; n < 6; n++) {
|
||||
if (cpu_mask & (1 << n)) {
|
||||
graphics_system_->DispatchInterruptCallback(1, n);
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case PM4_INDIRECT_BUFFER:
|
||||
// indirect buffer dispatch
|
||||
{
|
||||
uint32_t list_ptr = READ_PTR();
|
||||
uint32_t list_length = READ_PTR();
|
||||
XETRACERB("[%.8X] Packet(%.8X): PM4_INDIRECT_BUFFER %.8X (%dw)",
|
||||
packet_ptr, packet, list_ptr, list_length);
|
||||
ExecuteIndirectBuffer(GpuToCpu(list_ptr), list_length);
|
||||
}
|
||||
break;
|
||||
|
||||
case PM4_WAIT_REG_MEM:
|
||||
// wait until a register or memory location is a specific value
|
||||
{
|
||||
XETRACERB("[%.8X] Packet(%.8X): PM4_WAIT_REG_MEM",
|
||||
packet_ptr, packet);
|
||||
LOG_DATA(count);
|
||||
uint32_t wait_info = READ_PTR();
|
||||
uint32_t poll_reg_addr = READ_PTR();
|
||||
uint32_t ref = READ_PTR();
|
||||
uint32_t mask = READ_PTR();
|
||||
uint32_t wait = READ_PTR();
|
||||
bool matched = false;
|
||||
do {
|
||||
uint32_t value;
|
||||
if (wait_info & 0x10) {
|
||||
// Memory.
|
||||
XE_GPU_ENDIAN endianness = (XE_GPU_ENDIAN)(poll_reg_addr & 0x3);
|
||||
poll_reg_addr &= ~0x3;
|
||||
value = XEGETUINT32LE(p + GpuToCpu(packet_ptr, poll_reg_addr));
|
||||
value = GpuSwap(value, endianness);
|
||||
} else {
|
||||
// Register.
|
||||
XEASSERT(poll_reg_addr < kXEGpuRegisterCount);
|
||||
value = regs->values[poll_reg_addr].u32;
|
||||
}
|
||||
switch (wait_info & 0x7) {
|
||||
case 0x0: // Never.
|
||||
matched = false;
|
||||
break;
|
||||
case 0x1: // Less than reference.
|
||||
matched = (value & mask) < ref;
|
||||
break;
|
||||
case 0x2: // Less than or equal to reference.
|
||||
matched = (value & mask) <= ref;
|
||||
break;
|
||||
case 0x3: // Equal to reference.
|
||||
matched = (value & mask) == ref;
|
||||
break;
|
||||
case 0x4: // Not equal to reference.
|
||||
matched = (value & mask) != ref;
|
||||
break;
|
||||
case 0x5: // Greater than or equal to reference.
|
||||
matched = (value & mask) >= ref;
|
||||
break;
|
||||
case 0x6: // Greater than reference.
|
||||
matched = (value & mask) > ref;
|
||||
break;
|
||||
case 0x7: // Always
|
||||
matched = true;
|
||||
break;
|
||||
}
|
||||
if (!matched) {
|
||||
// Wait.
|
||||
if (wait >= 0x100) {
|
||||
Sleep(wait / 0x100);
|
||||
} else {
|
||||
SwitchToThread();
|
||||
}
|
||||
}
|
||||
} while (!matched);
|
||||
}
|
||||
break;
|
||||
|
||||
case PM4_REG_RMW:
|
||||
// register read/modify/write
|
||||
// ? (used during shader upload and edram setup)
|
||||
{
|
||||
XETRACERB("[%.8X] Packet(%.8X): PM4_REG_RMW",
|
||||
packet_ptr, packet);
|
||||
LOG_DATA(count);
|
||||
uint32_t rmw_info = READ_PTR();
|
||||
uint32_t and_mask = READ_PTR();
|
||||
uint32_t or_mask = READ_PTR();
|
||||
uint32_t value = regs->values[rmw_info & 0x1FFF].u32;
|
||||
if ((rmw_info >> 30) & 0x1) {
|
||||
// | reg
|
||||
value |= regs->values[or_mask & 0x1FFF].u32;
|
||||
} else {
|
||||
// | imm
|
||||
value |= or_mask;
|
||||
}
|
||||
if ((rmw_info >> 31) & 0x1) {
|
||||
// & reg
|
||||
value &= regs->values[and_mask & 0x1FFF].u32;
|
||||
} else {
|
||||
// & imm
|
||||
value &= and_mask;
|
||||
}
|
||||
WriteRegister(packet_ptr, rmw_info & 0x1FFF, value);
|
||||
}
|
||||
break;
|
||||
|
||||
case PM4_COND_WRITE:
|
||||
// conditional write to memory or register
|
||||
{
|
||||
XETRACERB("[%.8X] Packet(%.8X): PM4_COND_WRITE",
|
||||
packet_ptr, packet);
|
||||
LOG_DATA(count);
|
||||
uint32_t wait_info = READ_PTR();
|
||||
uint32_t poll_reg_addr = READ_PTR();
|
||||
uint32_t ref = READ_PTR();
|
||||
uint32_t mask = READ_PTR();
|
||||
uint32_t write_reg_addr = READ_PTR();
|
||||
uint32_t write_data = READ_PTR();
|
||||
uint32_t value;
|
||||
if (wait_info & 0x10) {
|
||||
// Memory.
|
||||
XE_GPU_ENDIAN endianness = (XE_GPU_ENDIAN)(poll_reg_addr & 0x3);
|
||||
poll_reg_addr &= ~0x3;
|
||||
value = XEGETUINT32LE(p + GpuToCpu(packet_ptr, poll_reg_addr));
|
||||
value = GpuSwap(value, endianness);
|
||||
} else {
|
||||
// Register.
|
||||
XEASSERT(poll_reg_addr < kXEGpuRegisterCount);
|
||||
value = regs->values[poll_reg_addr].u32;
|
||||
}
|
||||
bool matched = false;
|
||||
switch (wait_info & 0x7) {
|
||||
case 0x0: // Never.
|
||||
matched = false;
|
||||
break;
|
||||
case 0x1: // Less than reference.
|
||||
matched = (value & mask) < ref;
|
||||
break;
|
||||
case 0x2: // Less than or equal to reference.
|
||||
matched = (value & mask) <= ref;
|
||||
break;
|
||||
case 0x3: // Equal to reference.
|
||||
matched = (value & mask) == ref;
|
||||
break;
|
||||
case 0x4: // Not equal to reference.
|
||||
matched = (value & mask) != ref;
|
||||
break;
|
||||
case 0x5: // Greater than or equal to reference.
|
||||
matched = (value & mask) >= ref;
|
||||
break;
|
||||
case 0x6: // Greater than reference.
|
||||
matched = (value & mask) > ref;
|
||||
break;
|
||||
case 0x7: // Always
|
||||
matched = true;
|
||||
break;
|
||||
}
|
||||
if (matched) {
|
||||
// Write.
|
||||
if (wait_info & 0x100) {
|
||||
// Memory.
|
||||
XE_GPU_ENDIAN endianness = (XE_GPU_ENDIAN)(write_reg_addr & 0x3);
|
||||
write_reg_addr &= ~0x3;
|
||||
write_data = GpuSwap(write_data, endianness);
|
||||
XESETUINT32LE(p + GpuToCpu(packet_ptr, write_reg_addr),
|
||||
write_data);
|
||||
} else {
|
||||
// Register.
|
||||
WriteRegister(packet_ptr, write_reg_addr, write_data);
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case PM4_EVENT_WRITE:
|
||||
// generate an event that creates a write to memory when completed
|
||||
{
|
||||
XETRACERB("[%.8X] Packet(%.8X): PM4_EVENT_WRITE (unimplemented!)",
|
||||
packet_ptr, packet);
|
||||
LOG_DATA(count);
|
||||
uint32_t initiator = READ_PTR();
|
||||
if (count == 1) {
|
||||
// Just an event flag? Where does this write?
|
||||
} else {
|
||||
// Write to an address.
|
||||
XEASSERTALWAYS();
|
||||
ADVANCE_PTR(count - 1);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case PM4_EVENT_WRITE_SHD:
|
||||
// generate a VS|PS_done event
|
||||
{
|
||||
XETRACERB("[%.8X] Packet(%.8X): PM4_EVENT_WRITE_SHD",
|
||||
packet_ptr, packet);
|
||||
LOG_DATA(count);
|
||||
uint32_t initiator = READ_PTR();
|
||||
uint32_t address = READ_PTR();
|
||||
uint32_t value = READ_PTR();
|
||||
// Writeback initiator.
|
||||
WriteRegister(packet_ptr, XE_GPU_REG_VGT_EVENT_INITIATOR,
|
||||
initiator & 0x1F);
|
||||
uint32_t data_value;
|
||||
if ((initiator >> 31) & 0x1) {
|
||||
// Write counter (GPU vblank counter?).
|
||||
data_value = counter_;
|
||||
} else {
|
||||
// Write value.
|
||||
data_value = value;
|
||||
}
|
||||
XE_GPU_ENDIAN endianness = (XE_GPU_ENDIAN)(address & 0x3);
|
||||
address &= ~0x3;
|
||||
data_value = GpuSwap(data_value, endianness);
|
||||
XESETUINT32LE(p + GpuToCpu(address), data_value);
|
||||
}
|
||||
break;
|
||||
|
||||
case PM4_DRAW_INDX:
|
||||
// initiate fetch of index buffer and draw
|
||||
{
|
||||
XETRACERB("[%.8X] Packet(%.8X): PM4_DRAW_INDX",
|
||||
packet_ptr, packet);
|
||||
LOG_DATA(count);
|
||||
// d0 = viz query info
|
||||
uint32_t d0 = READ_PTR();
|
||||
uint32_t d1 = READ_PTR();
|
||||
uint32_t index_count = d1 >> 16;
|
||||
uint32_t prim_type = d1 & 0x3F;
|
||||
uint32_t src_sel = (d1 >> 6) & 0x3;
|
||||
if (src_sel == 0x0) {
|
||||
uint32_t index_base = READ_PTR();
|
||||
uint32_t index_size = READ_PTR();
|
||||
uint32_t endianness = index_size >> 29;
|
||||
index_size &= 0x00FFFFFF;
|
||||
bool index_32bit = (d1 >> 11) & 0x1;
|
||||
index_size *= index_32bit ? 4 : 2;
|
||||
driver_->DrawIndexBuffer(
|
||||
(XE_GPU_PRIMITIVE_TYPE)prim_type,
|
||||
index_32bit, index_count, index_base, index_size, endianness);
|
||||
} else if (src_sel == 0x2) {
|
||||
driver_->DrawIndexAuto(
|
||||
(XE_GPU_PRIMITIVE_TYPE)prim_type,
|
||||
index_count);
|
||||
} else {
|
||||
// Unknown source select.
|
||||
XEASSERTALWAYS();
|
||||
}
|
||||
}
|
||||
break;
|
||||
case PM4_DRAW_INDX_2:
|
||||
// draw using supplied indices in packet
|
||||
{
|
||||
XETRACERB("[%.8X] Packet(%.8X): PM4_DRAW_INDX_2",
|
||||
packet_ptr, packet);
|
||||
LOG_DATA(count);
|
||||
uint32_t d0 = READ_PTR();
|
||||
uint32_t index_count = d0 >> 16;
|
||||
uint32_t prim_type = d0 & 0x3F;
|
||||
uint32_t src_sel = (d0 >> 6) & 0x3;
|
||||
XEASSERT(src_sel == 0x2); // 'SrcSel=AutoIndex'
|
||||
driver_->DrawIndexAuto(
|
||||
(XE_GPU_PRIMITIVE_TYPE)prim_type,
|
||||
index_count);
|
||||
}
|
||||
break;
|
||||
|
||||
case PM4_SET_CONSTANT:
|
||||
// load constant into chip and to memory
|
||||
{
|
||||
XETRACERB("[%.8X] Packet(%.8X): PM4_SET_CONSTANT",
|
||||
packet_ptr, packet);
|
||||
// PM4_REG(reg) ((0x4 << 16) | (GSL_HAL_SUBBLOCK_OFFSET(reg)))
|
||||
// reg - 0x2000
|
||||
uint32_t offset_type = READ_PTR();
|
||||
uint32_t index = offset_type & 0x7FF;
|
||||
uint32_t type = (offset_type >> 16) & 0xFF;
|
||||
switch (type) {
|
||||
case 0x4: // REGISTER
|
||||
index += 0x2000; // registers
|
||||
for (uint32_t n = 0; n < count - 1; n++, index++) {
|
||||
uint32_t data = READ_PTR();
|
||||
const char* reg_name = xenos::GetRegisterName(index);
|
||||
XETRACERB("[%.8X] %.8X -> %.4X %s",
|
||||
packet_ptr + (1 + n) * 4,
|
||||
data, index, reg_name ? reg_name : "");
|
||||
WriteRegister(packet_ptr, index, data);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
XEASSERTALWAYS();
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case PM4_LOAD_ALU_CONSTANT:
|
||||
// load constants from memory
|
||||
{
|
||||
XETRACERB("[%.8X] Packet(%.8X): PM4_LOAD_ALU_CONSTANT",
|
||||
packet_ptr, packet);
|
||||
uint32_t address = READ_PTR();
|
||||
address &= 0x3FFFFFFF;
|
||||
uint32_t offset_type = READ_PTR();
|
||||
uint32_t index = offset_type & 0x7FF;
|
||||
uint32_t size = READ_PTR();
|
||||
size &= 0xFFF;
|
||||
index += 0x4000; // alu constants
|
||||
for (uint32_t n = 0; n < size; n++, index++) {
|
||||
uint32_t data = XEGETUINT32BE(
|
||||
p + GpuToCpu(packet_ptr, address + n * 4));
|
||||
const char* reg_name = xenos::GetRegisterName(index);
|
||||
XETRACERB("[%.8X] %.8X -> %.4X %s",
|
||||
packet_ptr,
|
||||
data, index, reg_name ? reg_name : "");
|
||||
WriteRegister(packet_ptr, index, data);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case PM4_IM_LOAD:
|
||||
// load sequencer instruction memory (pointer-based)
|
||||
{
|
||||
XETRACERB("[%.8X] Packet(%.8X): PM4_IM_LOAD",
|
||||
packet_ptr, packet);
|
||||
LOG_DATA(count);
|
||||
uint32_t addr_type = READ_PTR();
|
||||
uint32_t type = addr_type & 0x3;
|
||||
uint32_t addr = addr_type & ~0x3;
|
||||
uint32_t start_size = READ_PTR();
|
||||
uint32_t start = start_size >> 16;
|
||||
uint32_t size = start_size & 0xFFFF; // dwords
|
||||
XEASSERT(start == 0);
|
||||
driver_->SetShader(
|
||||
(XE_GPU_SHADER_TYPE)type,
|
||||
GpuToCpu(packet_ptr, addr),
|
||||
start,
|
||||
size * 4);
|
||||
}
|
||||
break;
|
||||
case PM4_IM_LOAD_IMMEDIATE:
|
||||
// load sequencer instruction memory (code embedded in packet)
|
||||
{
|
||||
XETRACERB("[%.8X] Packet(%.8X): PM4_IM_LOAD_IMMEDIATE",
|
||||
packet_ptr, packet);
|
||||
LOG_DATA(count);
|
||||
uint32_t type = READ_PTR();
|
||||
uint32_t start_size = READ_PTR();
|
||||
uint32_t start = start_size >> 16;
|
||||
uint32_t size = start_size & 0xFFFF; // dwords
|
||||
XEASSERT(start == 0);
|
||||
// TODO(benvanik): figure out if this could wrap.
|
||||
XEASSERT(args.ptr + size * 4 < args.max_address);
|
||||
driver_->SetShader(
|
||||
(XE_GPU_SHADER_TYPE)type,
|
||||
args.ptr,
|
||||
start,
|
||||
size * 4);
|
||||
ADVANCE_PTR(size);
|
||||
}
|
||||
break;
|
||||
|
||||
case PM4_INVALIDATE_STATE:
|
||||
// selective invalidation of state pointers
|
||||
{
|
||||
XETRACERB("[%.8X] Packet(%.8X): PM4_INVALIDATE_STATE",
|
||||
packet_ptr, packet);
|
||||
LOG_DATA(count);
|
||||
uint32_t mask = READ_PTR();
|
||||
driver_->InvalidateState(mask);
|
||||
}
|
||||
break;
|
||||
|
||||
case PM4_SET_BIN_MASK_LO:
|
||||
{
|
||||
uint32_t value = READ_PTR();
|
||||
XETRACERB("[%.8X] Packet(%.8X): PM4_SET_BIN_MASK_LO = %.8X",
|
||||
packet_ptr, packet, value);
|
||||
}
|
||||
break;
|
||||
case PM4_SET_BIN_MASK_HI:
|
||||
{
|
||||
uint32_t value = READ_PTR();
|
||||
XETRACERB("[%.8X] Packet(%.8X): PM4_SET_BIN_MASK_HI = %.8X",
|
||||
packet_ptr, packet, value);
|
||||
}
|
||||
break;
|
||||
case PM4_SET_BIN_SELECT_LO:
|
||||
{
|
||||
uint32_t value = READ_PTR();
|
||||
XETRACERB("[%.8X] Packet(%.8X): PM4_SET_BIN_SELECT_LO = %.8X",
|
||||
packet_ptr, packet, value);
|
||||
}
|
||||
break;
|
||||
case PM4_SET_BIN_SELECT_HI:
|
||||
{
|
||||
uint32_t value = READ_PTR();
|
||||
XETRACERB("[%.8X] Packet(%.8X): PM4_SET_BIN_SELECT_HI = %.8X",
|
||||
packet_ptr, packet, value);
|
||||
}
|
||||
break;
|
||||
|
||||
// Ignored packets - useful if breaking on the default handler below.
|
||||
case 0x50: // 0xC0015000 usually 2 words, 0xFFFFFFFF / 0x00000000
|
||||
XETRACERB("[%.8X] Packet(%.8X): unknown!",
|
||||
packet_ptr, packet);
|
||||
LOG_DATA(count);
|
||||
ADVANCE_PTR(count);
|
||||
break;
|
||||
|
||||
default:
|
||||
XETRACERB("[%.8X] Packet(%.8X): unknown!",
|
||||
packet_ptr, packet);
|
||||
LOG_DATA(count);
|
||||
ADVANCE_PTR(count);
|
||||
break;
|
||||
}
|
||||
|
||||
return 1 + count;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void RingBufferWorker::WriteRegister(
|
||||
uint32_t packet_ptr, uint32_t index, uint32_t value) {
|
||||
RegisterFile* regs = driver_->register_file();
|
||||
XEASSERT(index < kXEGpuRegisterCount);
|
||||
regs->values[index].u32 = value;
|
||||
|
||||
// Scratch register writeback.
|
||||
if (index >= XE_GPU_REG_SCRATCH_REG0 && index <= XE_GPU_REG_SCRATCH_REG7) {
|
||||
uint32_t scratch_reg = index - XE_GPU_REG_SCRATCH_REG0;
|
||||
if ((1 << scratch_reg) & regs->values[XE_GPU_REG_SCRATCH_UMSK].u32) {
|
||||
// Enabled - write to address.
|
||||
uint8_t* p = memory_->membase();
|
||||
uint32_t scratch_addr = regs->values[XE_GPU_REG_SCRATCH_ADDR].u32;
|
||||
uint32_t mem_addr = scratch_addr + (scratch_reg * 4);
|
||||
XESETUINT32BE(p + GpuToCpu(primary_buffer_ptr_, mem_addr), value);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,81 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2013 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_GPU_RING_BUFFER_WORKER_H_
|
||||
#define XENIA_GPU_RING_BUFFER_WORKER_H_
|
||||
|
||||
#include <xenia/core.h>
|
||||
|
||||
#include <xenia/gpu/xenos/registers.h>
|
||||
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
|
||||
class GraphicsDriver;
|
||||
class GraphicsSystem;
|
||||
|
||||
class RingBufferWorker {
|
||||
public:
|
||||
RingBufferWorker(GraphicsSystem* graphics_system, Memory* memory);
|
||||
virtual ~RingBufferWorker();
|
||||
|
||||
Memory* memory() const { return memory_; }
|
||||
|
||||
uint64_t QueryTime();
|
||||
uint32_t counter() const { return counter_; }
|
||||
void increment_counter() { counter_++; }
|
||||
|
||||
void Initialize(GraphicsDriver* driver,
|
||||
uint32_t ptr, uint32_t page_count);
|
||||
void EnableReadPointerWriteBack(uint32_t ptr, uint32_t block_size);
|
||||
|
||||
void UpdateWritePointer(uint32_t value);
|
||||
|
||||
void Pump();
|
||||
|
||||
private:
|
||||
typedef struct {
|
||||
uint32_t ptr;
|
||||
uint32_t base_ptr;
|
||||
uint32_t max_address;
|
||||
uint32_t ptr_mask;
|
||||
} PacketArgs;
|
||||
void AdvancePtr(PacketArgs& args, uint32_t n);
|
||||
void ExecutePrimaryBuffer(uint32_t start_index, uint32_t end_index);
|
||||
void ExecuteIndirectBuffer(uint32_t ptr, uint32_t length);
|
||||
uint32_t ExecutePacket(PacketArgs& args);
|
||||
void WriteRegister(uint32_t packet_ptr, uint32_t index, uint32_t value);
|
||||
|
||||
protected:
|
||||
Memory* memory_;
|
||||
GraphicsSystem* graphics_system_;
|
||||
GraphicsDriver* driver_;
|
||||
|
||||
uint64_t time_base_;
|
||||
uint32_t counter_;
|
||||
|
||||
uint32_t primary_buffer_ptr_;
|
||||
uint32_t primary_buffer_size_;
|
||||
|
||||
uint32_t read_ptr_index_;
|
||||
uint32_t read_ptr_update_freq_;
|
||||
uint32_t read_ptr_writeback_ptr_;
|
||||
|
||||
HANDLE write_ptr_index_event_;
|
||||
volatile uint32_t write_ptr_index_;
|
||||
volatile uint32_t write_ptr_max_index_;
|
||||
};
|
||||
|
||||
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
||||
|
||||
#endif // XENIA_GPU_RING_BUFFER_WORKER_H_
|
|
@ -0,0 +1,32 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include <xenia/gpu/sampler_state_resource.h>
|
||||
|
||||
|
||||
using namespace std;
|
||||
using namespace xe;
|
||||
using namespace xe::gpu;
|
||||
using namespace xe::gpu::xenos;
|
||||
|
||||
|
||||
bool SamplerStateResource::Info::Prepare(
|
||||
const xe_gpu_texture_fetch_t& fetch, const instr_fetch_tex_t& fetch_instr,
|
||||
Info& out_info) {
|
||||
out_info.min_filter = static_cast<instr_tex_filter_t>(
|
||||
fetch_instr.min_filter == 3 ? fetch.min_filter : fetch_instr.min_filter);
|
||||
out_info.mag_filter = static_cast<instr_tex_filter_t>(
|
||||
fetch_instr.mag_filter == 3 ? fetch.mag_filter : fetch_instr.mag_filter);
|
||||
out_info.mip_filter = static_cast<instr_tex_filter_t>(
|
||||
fetch_instr.mip_filter == 3 ? fetch.mip_filter : fetch_instr.mip_filter);
|
||||
out_info.clamp_u = fetch.clamp_x;
|
||||
out_info.clamp_v = fetch.clamp_y;
|
||||
out_info.clamp_w = fetch.clamp_z;
|
||||
return true;
|
||||
}
|
|
@ -0,0 +1,67 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_GPU_SAMPLER_STATE_RESOURCE_H_
|
||||
#define XENIA_GPU_SAMPLER_STATE_RESOURCE_H_
|
||||
|
||||
#include <xenia/gpu/resource.h>
|
||||
#include <xenia/gpu/xenos/ucode.h>
|
||||
#include <xenia/gpu/xenos/xenos.h>
|
||||
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
|
||||
|
||||
class SamplerStateResource : public StaticResource {
|
||||
public:
|
||||
struct Info {
|
||||
xenos::instr_tex_filter_t min_filter;
|
||||
xenos::instr_tex_filter_t mag_filter;
|
||||
xenos::instr_tex_filter_t mip_filter;
|
||||
uint32_t clamp_u;
|
||||
uint32_t clamp_v;
|
||||
uint32_t clamp_w;
|
||||
|
||||
uint64_t hash() const {
|
||||
return hash_combine(0,
|
||||
min_filter, mag_filter, mip_filter,
|
||||
clamp_u, clamp_v, clamp_w);
|
||||
}
|
||||
bool Equals(const Info& other) const {
|
||||
return min_filter == other.min_filter &&
|
||||
mag_filter == other.mag_filter &&
|
||||
mip_filter == other.mip_filter &&
|
||||
clamp_u == other.clamp_u &&
|
||||
clamp_v == other.clamp_v &&
|
||||
clamp_w == other.clamp_w;
|
||||
}
|
||||
|
||||
static bool Prepare(const xenos::xe_gpu_texture_fetch_t& fetch,
|
||||
const xenos::instr_fetch_tex_t& fetch_instr,
|
||||
Info& out_info);
|
||||
};
|
||||
|
||||
SamplerStateResource(const Info& info) : info_(info) {}
|
||||
virtual ~SamplerStateResource() = default;
|
||||
|
||||
const Info& info() const { return info_; }
|
||||
|
||||
virtual int Prepare() = 0;
|
||||
|
||||
protected:
|
||||
Info info_;
|
||||
};
|
||||
|
||||
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
||||
|
||||
#endif // XENIA_GPU_SAMPLER_STATE_RESOURCE_H_
|
|
@ -1,266 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2013 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include <xenia/gpu/shader.h>
|
||||
|
||||
#include <xenia/gpu/xenos/ucode_disassembler.h>
|
||||
|
||||
|
||||
using namespace xe;
|
||||
using namespace xe::gpu;
|
||||
using namespace xe::gpu::xenos;
|
||||
|
||||
|
||||
Shader::Shader(
|
||||
XE_GPU_SHADER_TYPE type,
|
||||
const uint8_t* src_ptr, size_t length,
|
||||
uint64_t hash) :
|
||||
type_(type), hash_(hash), is_prepared_(false), disasm_src_(NULL) {
|
||||
xe_zero_struct(&alloc_counts_, sizeof(alloc_counts_));
|
||||
xe_zero_struct(&vtx_buffer_inputs_, sizeof(vtx_buffer_inputs_));
|
||||
xe_zero_struct(&tex_buffer_inputs_, sizeof(tex_buffer_inputs_));
|
||||
|
||||
// Verify.
|
||||
dword_count_ = length / 4;
|
||||
XEASSERT(dword_count_ <= 512);
|
||||
|
||||
// Copy bytes and swap.
|
||||
size_t byte_size = dword_count_ * sizeof(uint32_t);
|
||||
dwords_ = (uint32_t*)xe_malloc(byte_size);
|
||||
for (uint32_t n = 0; n < dword_count_; n++) {
|
||||
dwords_[n] = XEGETUINT32BE(src_ptr + n * 4);
|
||||
}
|
||||
|
||||
// Gather input/output registers/etc.
|
||||
GatherIO();
|
||||
|
||||
// Disassemble, for debugging.
|
||||
disasm_src_ = DisassembleShader(type_, dwords_, dword_count_);
|
||||
}
|
||||
|
||||
Shader::~Shader() {
|
||||
if (disasm_src_) {
|
||||
xe_free(disasm_src_);
|
||||
}
|
||||
xe_free(dwords_);
|
||||
}
|
||||
|
||||
void Shader::GatherIO() {
|
||||
// Process all execution blocks.
|
||||
instr_cf_t cfa;
|
||||
instr_cf_t cfb;
|
||||
for (int idx = 0; idx < dword_count_; idx += 3) {
|
||||
uint32_t dword_0 = dwords_[idx + 0];
|
||||
uint32_t dword_1 = dwords_[idx + 1];
|
||||
uint32_t dword_2 = dwords_[idx + 2];
|
||||
cfa.dword_0 = dword_0;
|
||||
cfa.dword_1 = dword_1 & 0xFFFF;
|
||||
cfb.dword_0 = (dword_1 >> 16) | (dword_2 << 16);
|
||||
cfb.dword_1 = dword_2 >> 16;
|
||||
if (cfa.opc == ALLOC) {
|
||||
GatherAlloc(&cfa.alloc);
|
||||
} else if (cfa.is_exec()) {
|
||||
GatherExec(&cfa.exec);
|
||||
}
|
||||
if (cfb.opc == ALLOC) {
|
||||
GatherAlloc(&cfb.alloc);
|
||||
} else if (cfb.is_exec()) {
|
||||
GatherExec(&cfb.exec);
|
||||
}
|
||||
if (cfa.opc == EXEC_END || cfb.opc == EXEC_END) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Shader::GatherAlloc(const instr_cf_alloc_t* cf) {
|
||||
allocs_.push_back(*cf);
|
||||
|
||||
switch (cf->buffer_select) {
|
||||
case SQ_POSITION:
|
||||
// Position (SV_POSITION).
|
||||
alloc_counts_.positions += cf->size + 1;
|
||||
break;
|
||||
case SQ_PARAMETER_PIXEL:
|
||||
// Output to PS (if VS), or frag output (if PS).
|
||||
alloc_counts_.params += cf->size + 1;
|
||||
break;
|
||||
case SQ_MEMORY:
|
||||
// MEMEXPORT?
|
||||
alloc_counts_.memories += cf->size + 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void Shader::GatherExec(const instr_cf_exec_t* cf) {
|
||||
execs_.push_back(*cf);
|
||||
|
||||
uint32_t sequence = cf->serialize;
|
||||
for (uint32_t i = 0; i < cf->count; i++) {
|
||||
uint32_t alu_off = (cf->address + i);
|
||||
int sync = sequence & 0x2;
|
||||
if (sequence & 0x1) {
|
||||
const instr_fetch_t* fetch =
|
||||
(const instr_fetch_t*)(dwords_ + alu_off * 3);
|
||||
switch (fetch->opc) {
|
||||
case VTX_FETCH:
|
||||
GatherVertexFetch(&fetch->vtx);
|
||||
break;
|
||||
case TEX_FETCH:
|
||||
GatherTextureFetch(&fetch->tex);
|
||||
break;
|
||||
case TEX_GET_BORDER_COLOR_FRAC:
|
||||
case TEX_GET_COMP_TEX_LOD:
|
||||
case TEX_GET_GRADIENTS:
|
||||
case TEX_GET_WEIGHTS:
|
||||
case TEX_SET_TEX_LOD:
|
||||
case TEX_SET_GRADIENTS_H:
|
||||
case TEX_SET_GRADIENTS_V:
|
||||
default:
|
||||
XEASSERTALWAYS();
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
// TODO(benvanik): gather registers used, predicate bits used, etc.
|
||||
const instr_alu_t* alu =
|
||||
(const instr_alu_t*)(dwords_ + alu_off * 3);
|
||||
if (alu->vector_write_mask) {
|
||||
if (alu->export_data && alu->vector_dest == 63) {
|
||||
alloc_counts_.point_size = true;
|
||||
}
|
||||
}
|
||||
if (alu->scalar_write_mask || !alu->vector_write_mask) {
|
||||
if (alu->export_data && alu->scalar_dest == 63) {
|
||||
alloc_counts_.point_size = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
sequence >>= 2;
|
||||
}
|
||||
}
|
||||
|
||||
void Shader::GatherVertexFetch(const instr_fetch_vtx_t* vtx) {
|
||||
// dst_reg/dst_swiz
|
||||
// src_reg/src_swiz
|
||||
// format = a2xx_sq_surfaceformat
|
||||
// format_comp_all ? signed : unsigned
|
||||
// num_format_all ? normalized
|
||||
// stride
|
||||
// offset
|
||||
// const_index/const_index_sel -- fetch constant register
|
||||
// num_format_all ? integer : fraction
|
||||
// exp_adjust_all - [-32,31] - (2^exp_adjust_all)*fetch - 0 = default
|
||||
|
||||
// Sometimes games have fetches that just produce constants. We can
|
||||
// ignore those.
|
||||
uint32_t dst_swiz = vtx->dst_swiz;
|
||||
bool fetches_any_data = false;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if ((dst_swiz & 0x7) == 4) {
|
||||
// 0.0
|
||||
} else if ((dst_swiz & 0x7) == 5) {
|
||||
// 1.0
|
||||
} else if ((dst_swiz & 0x7) == 6) {
|
||||
// ?
|
||||
} else if ((dst_swiz & 0x7) == 7) {
|
||||
// Previous register value.
|
||||
} else {
|
||||
fetches_any_data = true;
|
||||
break;
|
||||
}
|
||||
dst_swiz >>= 3;
|
||||
}
|
||||
if (!fetches_any_data) {
|
||||
return;
|
||||
}
|
||||
|
||||
uint32_t fetch_slot = vtx->const_index * 3 + vtx->const_index_sel;
|
||||
auto& inputs = vtx_buffer_inputs_;
|
||||
vtx_buffer_element_t* el = NULL;
|
||||
for (size_t n = 0; n < inputs.count; n++) {
|
||||
auto& input = inputs.descs[n];
|
||||
if (input.fetch_slot == fetch_slot) {
|
||||
XEASSERT(input.element_count + 1 < XECOUNT(input.elements));
|
||||
// It may not hold that all strides are equal, but I hope it does.
|
||||
XEASSERT(!vtx->stride || input.stride_words == vtx->stride);
|
||||
el = &input.elements[input.element_count++];
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!el) {
|
||||
XEASSERTNOTZERO(vtx->stride);
|
||||
XEASSERT(inputs.count + 1 < XECOUNT(inputs.descs));
|
||||
auto& input = inputs.descs[inputs.count++];
|
||||
input.input_index = inputs.count - 1;
|
||||
input.fetch_slot = fetch_slot;
|
||||
input.stride_words = vtx->stride;
|
||||
el = &input.elements[input.element_count++];
|
||||
}
|
||||
|
||||
el->vtx_fetch = *vtx;
|
||||
el->format = vtx->format;
|
||||
el->offset_words = vtx->offset;
|
||||
el->size_words = 0;
|
||||
switch (el->format) {
|
||||
case FMT_8_8_8_8:
|
||||
case FMT_2_10_10_10:
|
||||
case FMT_10_11_11:
|
||||
case FMT_11_11_10:
|
||||
el->size_words = 1;
|
||||
break;
|
||||
case FMT_16_16:
|
||||
case FMT_16_16_FLOAT:
|
||||
el->size_words = 1;
|
||||
break;
|
||||
case FMT_16_16_16_16:
|
||||
case FMT_16_16_16_16_FLOAT:
|
||||
el->size_words = 2;
|
||||
break;
|
||||
case FMT_32:
|
||||
case FMT_32_FLOAT:
|
||||
el->size_words = 1;
|
||||
break;
|
||||
case FMT_32_32:
|
||||
case FMT_32_32_FLOAT:
|
||||
el->size_words = 2;
|
||||
break;
|
||||
case FMT_32_32_32_FLOAT:
|
||||
el->size_words = 3;
|
||||
break;
|
||||
case FMT_32_32_32_32:
|
||||
case FMT_32_32_32_32_FLOAT:
|
||||
el->size_words = 4;
|
||||
break;
|
||||
default:
|
||||
XELOGE("Unknown vertex format: %d", el->format);
|
||||
XEASSERTALWAYS();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const Shader::vtx_buffer_inputs_t* Shader::GetVertexBufferInputs() {
|
||||
return &vtx_buffer_inputs_;
|
||||
}
|
||||
|
||||
void Shader::GatherTextureFetch(const xenos::instr_fetch_tex_t* tex) {
|
||||
// TODO(benvanik): check dest_swiz to see if we are writing anything.
|
||||
|
||||
auto& inputs = tex_buffer_inputs_;
|
||||
XEASSERT(inputs.count + 1 < XECOUNT(inputs.descs));
|
||||
auto& input = inputs.descs[inputs.count++];
|
||||
input.input_index = inputs.count - 1;
|
||||
input.fetch_slot = tex->const_idx & 0xF; // ?
|
||||
input.tex_fetch = *tex;
|
||||
|
||||
// Format mangling, size estimation, etc.
|
||||
}
|
||||
|
||||
const Shader::tex_buffer_inputs_t* Shader::GetTextureBufferInputs() {
|
||||
return &tex_buffer_inputs_;
|
||||
}
|
|
@ -1,104 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2013 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_GPU_SHADER_H_
|
||||
#define XENIA_GPU_SHADER_H_
|
||||
|
||||
#include <xenia/core.h>
|
||||
#include <xenia/gpu/xenos/ucode.h>
|
||||
#include <xenia/gpu/xenos/xenos.h>
|
||||
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
|
||||
|
||||
class Shader {
|
||||
public:
|
||||
Shader(xenos::XE_GPU_SHADER_TYPE type,
|
||||
const uint8_t* src_ptr, size_t length,
|
||||
uint64_t hash);
|
||||
virtual ~Shader();
|
||||
|
||||
xenos::XE_GPU_SHADER_TYPE type() const { return type_; }
|
||||
const uint32_t* dwords() const { return dwords_; }
|
||||
size_t dword_count() const { return dword_count_; }
|
||||
uint64_t hash() const { return hash_; }
|
||||
bool is_prepared() const { return is_prepared_; }
|
||||
|
||||
const char* disasm_src() const { return disasm_src_; }
|
||||
|
||||
typedef struct {
|
||||
xenos::instr_fetch_vtx_t vtx_fetch;
|
||||
uint32_t format;
|
||||
uint32_t offset_words;
|
||||
uint32_t size_words;
|
||||
} vtx_buffer_element_t;
|
||||
typedef struct {
|
||||
uint32_t input_index;
|
||||
uint32_t fetch_slot;
|
||||
uint32_t stride_words;
|
||||
uint32_t element_count;
|
||||
vtx_buffer_element_t elements[16];
|
||||
} vtx_buffer_desc_t;
|
||||
typedef struct {
|
||||
uint32_t count;
|
||||
vtx_buffer_desc_t descs[16];
|
||||
} vtx_buffer_inputs_t;
|
||||
const vtx_buffer_inputs_t* GetVertexBufferInputs();
|
||||
|
||||
typedef struct {
|
||||
uint32_t input_index;
|
||||
uint32_t fetch_slot;
|
||||
xenos::instr_fetch_tex_t tex_fetch;
|
||||
uint32_t format;
|
||||
} tex_buffer_desc_t;
|
||||
typedef struct {
|
||||
uint32_t count;
|
||||
tex_buffer_desc_t descs[32];
|
||||
} tex_buffer_inputs_t;
|
||||
const tex_buffer_inputs_t* GetTextureBufferInputs();
|
||||
|
||||
typedef struct {
|
||||
uint32_t positions;
|
||||
uint32_t params;
|
||||
uint32_t memories;
|
||||
bool point_size;
|
||||
} alloc_counts_t;
|
||||
const alloc_counts_t& alloc_counts() const { return alloc_counts_; }
|
||||
|
||||
private:
|
||||
void GatherIO();
|
||||
void GatherAlloc(const xenos::instr_cf_alloc_t* cf);
|
||||
void GatherExec(const xenos::instr_cf_exec_t* cf);
|
||||
void GatherVertexFetch(const xenos::instr_fetch_vtx_t* vtx);
|
||||
void GatherTextureFetch(const xenos::instr_fetch_tex_t* tex);
|
||||
|
||||
protected:
|
||||
xenos::XE_GPU_SHADER_TYPE type_;
|
||||
uint32_t* dwords_;
|
||||
size_t dword_count_;
|
||||
uint64_t hash_;
|
||||
bool is_prepared_;
|
||||
|
||||
char* disasm_src_;
|
||||
|
||||
alloc_counts_t alloc_counts_;
|
||||
std::vector<xenos::instr_cf_exec_t> execs_;
|
||||
std::vector<xenos::instr_cf_alloc_t> allocs_;
|
||||
vtx_buffer_inputs_t vtx_buffer_inputs_;
|
||||
tex_buffer_inputs_t tex_buffer_inputs_;
|
||||
};
|
||||
|
||||
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
||||
|
||||
#endif // XENIA_GPU_SHADER_H_
|
|
@ -1,80 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2013 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include <xenia/gpu/shader_cache.h>
|
||||
|
||||
#include <xenia/gpu/shader.h>
|
||||
|
||||
|
||||
using namespace std;
|
||||
using namespace xe;
|
||||
using namespace xe::gpu;
|
||||
using namespace xe::gpu::xenos;
|
||||
|
||||
|
||||
ShaderCache::ShaderCache() {
|
||||
}
|
||||
|
||||
ShaderCache::~ShaderCache() {
|
||||
Clear();
|
||||
}
|
||||
|
||||
Shader* ShaderCache::Create(
|
||||
XE_GPU_SHADER_TYPE type,
|
||||
const uint8_t* src_ptr, size_t length) {
|
||||
uint64_t hash = Hash(src_ptr, length);
|
||||
Shader* shader = CreateCore(type, src_ptr, length, hash);
|
||||
map_.insert({ hash, shader });
|
||||
return shader;
|
||||
}
|
||||
|
||||
Shader* ShaderCache::CreateCore(
|
||||
XE_GPU_SHADER_TYPE type,
|
||||
const uint8_t* src_ptr, size_t length,
|
||||
uint64_t hash) {
|
||||
return new Shader(type, src_ptr, length, hash);
|
||||
}
|
||||
|
||||
Shader* ShaderCache::Find(
|
||||
XE_GPU_SHADER_TYPE type,
|
||||
const uint8_t* src_ptr, size_t length) {
|
||||
uint64_t hash = Hash(src_ptr, length);
|
||||
auto it = map_.find(hash);
|
||||
if (it != map_.end()) {
|
||||
return it->second;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
Shader* ShaderCache::FindOrCreate(
|
||||
XE_GPU_SHADER_TYPE type,
|
||||
const uint8_t* src_ptr, size_t length) {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
uint64_t hash = Hash(src_ptr, length);
|
||||
auto it = map_.find(hash);
|
||||
if (it != map_.end()) {
|
||||
return it->second;
|
||||
}
|
||||
Shader* shader = CreateCore(type, src_ptr, length, hash);
|
||||
map_.insert({ hash, shader });
|
||||
return shader;
|
||||
}
|
||||
|
||||
void ShaderCache::Clear() {
|
||||
for (auto it = map_.begin(); it != map_.end(); ++it) {
|
||||
Shader* shader = it->second;
|
||||
delete shader;
|
||||
}
|
||||
map_.clear();
|
||||
}
|
||||
|
||||
uint64_t ShaderCache::Hash(const uint8_t* src_ptr, size_t length) {
|
||||
return xe_hash64(src_ptr, length, 0);
|
||||
}
|
|
@ -1,56 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2013 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_GPU_SHADER_CACHE_H_
|
||||
#define XENIA_GPU_SHADER_CACHE_H_
|
||||
|
||||
#include <xenia/core.h>
|
||||
#include <xenia/gpu/shader.h>
|
||||
#include <xenia/gpu/xenos/xenos.h>
|
||||
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
|
||||
|
||||
class ShaderCache {
|
||||
public:
|
||||
ShaderCache();
|
||||
virtual ~ShaderCache();
|
||||
|
||||
Shader* Create(
|
||||
xenos::XE_GPU_SHADER_TYPE type,
|
||||
const uint8_t* src_ptr, size_t length);
|
||||
Shader* Find(
|
||||
xenos::XE_GPU_SHADER_TYPE type,
|
||||
const uint8_t* src_ptr, size_t length);
|
||||
Shader* FindOrCreate(
|
||||
xenos::XE_GPU_SHADER_TYPE type,
|
||||
const uint8_t* src_ptr, size_t length);
|
||||
|
||||
void Clear();
|
||||
|
||||
private:
|
||||
uint64_t Hash(const uint8_t* src_ptr, size_t length);
|
||||
|
||||
std::unordered_map<uint64_t, Shader*> map_;
|
||||
|
||||
protected:
|
||||
virtual Shader* CreateCore(
|
||||
xenos::XE_GPU_SHADER_TYPE type,
|
||||
const uint8_t* src_ptr, size_t length,
|
||||
uint64_t hash);
|
||||
};
|
||||
|
||||
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
||||
|
||||
#endif // XENIA_GPU_SHADER_CACHE_H_
|
|
@ -9,9 +9,267 @@
|
|||
|
||||
#include <xenia/gpu/shader_resource.h>
|
||||
|
||||
#include <xenia/gpu/xenos/ucode_disassembler.h>
|
||||
|
||||
|
||||
using namespace std;
|
||||
using namespace xe;
|
||||
using namespace xe::gpu;
|
||||
using namespace xe::gpu::xenos;
|
||||
|
||||
|
||||
ShaderResource::ShaderResource(const MemoryRange& memory_range,
|
||||
const Info& info,
|
||||
xenos::XE_GPU_SHADER_TYPE type)
|
||||
: HashedResource(memory_range),
|
||||
info_(info), type_(type), is_prepared_(false), disasm_src_(nullptr) {
|
||||
xe_zero_struct(&alloc_counts_, sizeof(alloc_counts_));
|
||||
xe_zero_struct(&buffer_inputs_, sizeof(buffer_inputs_));
|
||||
xe_zero_struct(&sampler_inputs_, sizeof(sampler_inputs_));
|
||||
|
||||
// Verify.
|
||||
dword_count_ = memory_range.length / 4;
|
||||
XEASSERT(dword_count_ <= 512);
|
||||
|
||||
// Copy bytes and swap.
|
||||
size_t byte_size = dword_count_ * sizeof(uint32_t);
|
||||
dwords_ = (uint32_t*)xe_malloc(byte_size);
|
||||
for (uint32_t n = 0; n < dword_count_; n++) {
|
||||
dwords_[n] = XEGETUINT32BE(memory_range.host_base + n * 4);
|
||||
}
|
||||
|
||||
// Disassemble, for debugging.
|
||||
disasm_src_ = DisassembleShader(type_, dwords_, dword_count_);
|
||||
|
||||
// Gather input/output registers/etc.
|
||||
GatherIO();
|
||||
}
|
||||
|
||||
ShaderResource::~ShaderResource() {
|
||||
xe_free(disasm_src_);
|
||||
xe_free(dwords_);
|
||||
}
|
||||
|
||||
void ShaderResource::GatherIO() {
|
||||
// Process all execution blocks.
|
||||
instr_cf_t cfa;
|
||||
instr_cf_t cfb;
|
||||
for (int idx = 0; idx < dword_count_; idx += 3) {
|
||||
uint32_t dword_0 = dwords_[idx + 0];
|
||||
uint32_t dword_1 = dwords_[idx + 1];
|
||||
uint32_t dword_2 = dwords_[idx + 2];
|
||||
cfa.dword_0 = dword_0;
|
||||
cfa.dword_1 = dword_1 & 0xFFFF;
|
||||
cfb.dword_0 = (dword_1 >> 16) | (dword_2 << 16);
|
||||
cfb.dword_1 = dword_2 >> 16;
|
||||
if (cfa.opc == ALLOC) {
|
||||
GatherAlloc(&cfa.alloc);
|
||||
} else if (cfa.is_exec()) {
|
||||
GatherExec(&cfa.exec);
|
||||
}
|
||||
if (cfb.opc == ALLOC) {
|
||||
GatherAlloc(&cfb.alloc);
|
||||
} else if (cfb.is_exec()) {
|
||||
GatherExec(&cfb.exec);
|
||||
}
|
||||
if (cfa.opc == EXEC_END || cfb.opc == EXEC_END) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ShaderResource::GatherAlloc(const instr_cf_alloc_t* cf) {
|
||||
allocs_.push_back(*cf);
|
||||
|
||||
switch (cf->buffer_select) {
|
||||
case SQ_POSITION:
|
||||
// Position (SV_POSITION).
|
||||
alloc_counts_.positions += cf->size + 1;
|
||||
break;
|
||||
case SQ_PARAMETER_PIXEL:
|
||||
// Output to PS (if VS), or frag output (if PS).
|
||||
alloc_counts_.params += cf->size + 1;
|
||||
break;
|
||||
case SQ_MEMORY:
|
||||
// MEMEXPORT?
|
||||
alloc_counts_.memories += cf->size + 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void ShaderResource::GatherExec(const instr_cf_exec_t* cf) {
|
||||
execs_.push_back(*cf);
|
||||
|
||||
uint32_t sequence = cf->serialize;
|
||||
for (uint32_t i = 0; i < cf->count; i++) {
|
||||
uint32_t alu_off = (cf->address + i);
|
||||
int sync = sequence & 0x2;
|
||||
if (sequence & 0x1) {
|
||||
const instr_fetch_t* fetch =
|
||||
(const instr_fetch_t*)(dwords_ + alu_off * 3);
|
||||
switch (fetch->opc) {
|
||||
case VTX_FETCH:
|
||||
GatherVertexFetch(&fetch->vtx);
|
||||
break;
|
||||
case TEX_FETCH:
|
||||
GatherTextureFetch(&fetch->tex);
|
||||
break;
|
||||
case TEX_GET_BORDER_COLOR_FRAC:
|
||||
case TEX_GET_COMP_TEX_LOD:
|
||||
case TEX_GET_GRADIENTS:
|
||||
case TEX_GET_WEIGHTS:
|
||||
case TEX_SET_TEX_LOD:
|
||||
case TEX_SET_GRADIENTS_H:
|
||||
case TEX_SET_GRADIENTS_V:
|
||||
default:
|
||||
XEASSERTALWAYS();
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
// TODO(benvanik): gather registers used, predicate bits used, etc.
|
||||
const instr_alu_t* alu =
|
||||
(const instr_alu_t*)(dwords_ + alu_off * 3);
|
||||
if (alu->vector_write_mask) {
|
||||
if (alu->export_data && alu->vector_dest == 63) {
|
||||
alloc_counts_.point_size = true;
|
||||
}
|
||||
}
|
||||
if (alu->scalar_write_mask || !alu->vector_write_mask) {
|
||||
if (alu->export_data && alu->scalar_dest == 63) {
|
||||
alloc_counts_.point_size = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
sequence >>= 2;
|
||||
}
|
||||
}
|
||||
|
||||
void ShaderResource::GatherVertexFetch(const instr_fetch_vtx_t* vtx) {
|
||||
XEASSERT(type_ == XE_GPU_SHADER_TYPE_VERTEX);
|
||||
|
||||
// dst_reg/dst_swiz
|
||||
// src_reg/src_swiz
|
||||
// format = a2xx_sq_surfaceformat
|
||||
// format_comp_all ? signed : unsigned
|
||||
// num_format_all ? normalized
|
||||
// stride
|
||||
// offset
|
||||
// const_index/const_index_sel -- fetch constant register
|
||||
// num_format_all ? integer : fraction
|
||||
// exp_adjust_all - [-32,31] - (2^exp_adjust_all)*fetch - 0 = default
|
||||
|
||||
// Sometimes games have fetches that just produce constants. We can
|
||||
// ignore those.
|
||||
uint32_t dst_swiz = vtx->dst_swiz;
|
||||
bool fetches_any_data = false;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if ((dst_swiz & 0x7) == 4) {
|
||||
// 0.0
|
||||
} else if ((dst_swiz & 0x7) == 5) {
|
||||
// 1.0
|
||||
} else if ((dst_swiz & 0x7) == 6) {
|
||||
// ?
|
||||
} else if ((dst_swiz & 0x7) == 7) {
|
||||
// Previous register value.
|
||||
} else {
|
||||
fetches_any_data = true;
|
||||
break;
|
||||
}
|
||||
dst_swiz >>= 3;
|
||||
}
|
||||
if (!fetches_any_data) {
|
||||
return;
|
||||
}
|
||||
|
||||
uint32_t fetch_slot = vtx->const_index * 3 + vtx->const_index_sel;
|
||||
auto& inputs = buffer_inputs_;
|
||||
VertexBufferResource::DeclElement* el = nullptr;
|
||||
for (size_t n = 0; n < inputs.count; n++) {
|
||||
auto& desc = inputs.descs[n];
|
||||
auto& info = desc.info;
|
||||
if (desc.fetch_slot == fetch_slot) {
|
||||
XEASSERT(info.element_count + 1 < XECOUNT(info.elements));
|
||||
// It may not hold that all strides are equal, but I hope it does.
|
||||
XEASSERT(!vtx->stride || info.stride_words == vtx->stride);
|
||||
el = &info.elements[info.element_count++];
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!el) {
|
||||
XEASSERTNOTZERO(vtx->stride);
|
||||
XEASSERT(inputs.count + 1 < XECOUNT(inputs.descs));
|
||||
auto& desc = inputs.descs[inputs.count++];
|
||||
desc.input_index = inputs.count - 1;
|
||||
desc.fetch_slot = fetch_slot;
|
||||
desc.info.stride_words = vtx->stride;
|
||||
el = &desc.info.elements[desc.info.element_count++];
|
||||
}
|
||||
|
||||
el->vtx_fetch = *vtx;
|
||||
el->format = vtx->format;
|
||||
el->is_normalized = vtx->num_format_all == 0;
|
||||
el->is_signed = vtx->format_comp_all == 1;
|
||||
el->offset_words = vtx->offset;
|
||||
el->size_words = 0;
|
||||
switch (el->format) {
|
||||
case FMT_8_8_8_8:
|
||||
case FMT_2_10_10_10:
|
||||
case FMT_10_11_11:
|
||||
case FMT_11_11_10:
|
||||
el->size_words = 1;
|
||||
break;
|
||||
case FMT_16_16:
|
||||
case FMT_16_16_FLOAT:
|
||||
el->size_words = 1;
|
||||
break;
|
||||
case FMT_16_16_16_16:
|
||||
case FMT_16_16_16_16_FLOAT:
|
||||
el->size_words = 2;
|
||||
break;
|
||||
case FMT_32:
|
||||
case FMT_32_FLOAT:
|
||||
el->size_words = 1;
|
||||
break;
|
||||
case FMT_32_32:
|
||||
case FMT_32_32_FLOAT:
|
||||
el->size_words = 2;
|
||||
break;
|
||||
case FMT_32_32_32_FLOAT:
|
||||
el->size_words = 3;
|
||||
break;
|
||||
case FMT_32_32_32_32:
|
||||
case FMT_32_32_32_32_FLOAT:
|
||||
el->size_words = 4;
|
||||
break;
|
||||
default:
|
||||
XELOGE("Unknown vertex format: %d", el->format);
|
||||
XEASSERTALWAYS();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void ShaderResource::GatherTextureFetch(const xenos::instr_fetch_tex_t* tex) {
|
||||
// TODO(benvanik): check dest_swiz to see if we are writing anything.
|
||||
|
||||
XEASSERT(sampler_inputs_.count + 1 < XECOUNT(sampler_inputs_.descs));
|
||||
auto& input = sampler_inputs_.descs[sampler_inputs_.count++];
|
||||
input.input_index = sampler_inputs_.count - 1;
|
||||
input.fetch_slot = tex->const_idx & 0xF; // ?
|
||||
input.tex_fetch = *tex;
|
||||
|
||||
// Format mangling, size estimation, etc.
|
||||
}
|
||||
|
||||
VertexShaderResource::VertexShaderResource(
|
||||
const MemoryRange& memory_range, const Info& info)
|
||||
: ShaderResource(memory_range, info, XE_GPU_SHADER_TYPE_VERTEX) {
|
||||
}
|
||||
|
||||
VertexShaderResource::~VertexShaderResource() = default;
|
||||
|
||||
PixelShaderResource::PixelShaderResource(
|
||||
const MemoryRange& memory_range, const Info& info)
|
||||
: ShaderResource(memory_range, info, XE_GPU_SHADER_TYPE_PIXEL) {
|
||||
}
|
||||
|
||||
PixelShaderResource::~PixelShaderResource() = default;
|
||||
|
|
|
@ -10,7 +10,9 @@
|
|||
#ifndef XENIA_GPU_SHADER_RESOURCE_H_
|
||||
#define XENIA_GPU_SHADER_RESOURCE_H_
|
||||
|
||||
#include <xenia/core.h>
|
||||
#include <xenia/gpu/buffer_resource.h>
|
||||
#include <xenia/gpu/resource.h>
|
||||
#include <xenia/gpu/xenos/ucode.h>
|
||||
#include <xenia/gpu/xenos/xenos.h>
|
||||
|
||||
|
||||
|
@ -18,8 +20,104 @@ namespace xe {
|
|||
namespace gpu {
|
||||
|
||||
|
||||
class ShaderResource : public Resource {
|
||||
class ShaderResource : public HashedResource {
|
||||
public:
|
||||
struct Info {
|
||||
// type, etc?
|
||||
};
|
||||
|
||||
~ShaderResource() override;
|
||||
|
||||
const Info& info() const { return info_; }
|
||||
xenos::XE_GPU_SHADER_TYPE type() const { return type_; }
|
||||
const uint32_t* dwords() const { return dwords_; }
|
||||
const size_t dword_count() const { return dword_count_; }
|
||||
|
||||
bool is_prepared() const { return is_prepared_; }
|
||||
const char* disasm_src() const { return disasm_src_; }
|
||||
|
||||
struct BufferDesc {
|
||||
uint32_t input_index;
|
||||
uint32_t fetch_slot;
|
||||
VertexBufferResource::Info info;
|
||||
// xenos::instr_fetch_vtx_t vtx_fetch; for each el
|
||||
};
|
||||
struct BufferInputs {
|
||||
uint32_t count;
|
||||
BufferDesc descs[32];
|
||||
};
|
||||
const BufferInputs& buffer_inputs() { return buffer_inputs_; }
|
||||
|
||||
struct SamplerDesc {
|
||||
uint32_t input_index;
|
||||
uint32_t fetch_slot;
|
||||
uint32_t format;
|
||||
xenos::instr_fetch_tex_t tex_fetch;
|
||||
};
|
||||
struct SamplerInputs {
|
||||
uint32_t count;
|
||||
SamplerDesc descs[32];
|
||||
};
|
||||
const SamplerInputs& sampler_inputs() { return sampler_inputs_; }
|
||||
|
||||
struct AllocCounts {
|
||||
uint32_t positions;
|
||||
uint32_t params;
|
||||
uint32_t memories;
|
||||
bool point_size;
|
||||
};
|
||||
const AllocCounts& alloc_counts() const { return alloc_counts_; }
|
||||
const std::vector<xenos::instr_cf_exec_t>& execs() const { return execs_; }
|
||||
const std::vector<xenos::instr_cf_alloc_t>& allocs() const { return allocs_; }
|
||||
|
||||
private:
|
||||
void GatherIO();
|
||||
void GatherAlloc(const xenos::instr_cf_alloc_t* cf);
|
||||
void GatherExec(const xenos::instr_cf_exec_t* cf);
|
||||
void GatherVertexFetch(const xenos::instr_fetch_vtx_t* vtx);
|
||||
void GatherTextureFetch(const xenos::instr_fetch_tex_t* tex);
|
||||
|
||||
protected:
|
||||
ShaderResource(const MemoryRange& memory_range,
|
||||
const Info& info,
|
||||
xenos::XE_GPU_SHADER_TYPE type);
|
||||
|
||||
Info info_;
|
||||
xenos::XE_GPU_SHADER_TYPE type_;
|
||||
size_t dword_count_;
|
||||
uint32_t* dwords_;
|
||||
char* disasm_src_;
|
||||
|
||||
AllocCounts alloc_counts_;
|
||||
std::vector<xenos::instr_cf_exec_t> execs_;
|
||||
std::vector<xenos::instr_cf_alloc_t> allocs_;
|
||||
BufferInputs buffer_inputs_;
|
||||
SamplerInputs sampler_inputs_;
|
||||
|
||||
bool is_prepared_;
|
||||
};
|
||||
|
||||
|
||||
class VertexShaderResource : public ShaderResource {
|
||||
public:
|
||||
VertexShaderResource(const MemoryRange& memory_range,
|
||||
const Info& info);
|
||||
~VertexShaderResource() override;
|
||||
|
||||
// buffer_inputs() matching VertexBufferResource::Info
|
||||
|
||||
virtual int Prepare(const xenos::xe_gpu_program_cntl_t& program_cntl) = 0;
|
||||
};
|
||||
|
||||
|
||||
class PixelShaderResource : public ShaderResource {
|
||||
public:
|
||||
PixelShaderResource(const MemoryRange& memory_range,
|
||||
const Info& info);
|
||||
~PixelShaderResource() override;
|
||||
|
||||
virtual int Prepare(const xenos::xe_gpu_program_cntl_t& program_cntl,
|
||||
VertexShaderResource* vertex_shader) = 0;
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -5,6 +5,8 @@
|
|||
'buffer_resource.h',
|
||||
'command_processor.cc',
|
||||
'command_processor.h',
|
||||
'draw_command.cc',
|
||||
'draw_command.h',
|
||||
'gpu-private.h',
|
||||
'gpu.cc',
|
||||
'gpu.h',
|
||||
|
@ -18,6 +20,8 @@
|
|||
'resource.h',
|
||||
'resource_cache.cc',
|
||||
'resource_cache.h',
|
||||
'sampler_state_resource.cc',
|
||||
'sampler_state_resource.h',
|
||||
'shader_resource.cc',
|
||||
'shader_resource.h',
|
||||
'texture_resource.cc',
|
||||
|
|
|
@ -1,369 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include <xenia/gpu/texture.h>
|
||||
|
||||
#include <xenia/gpu/xenos/ucode.h>
|
||||
#include <xenia/gpu/xenos/xenos.h>
|
||||
|
||||
// TODO(benvanik): replace DXGI constants with xenia constants.
|
||||
#include <d3d11.h>
|
||||
|
||||
|
||||
using namespace xe;
|
||||
using namespace xe::gpu;
|
||||
using namespace xe::gpu::xenos;
|
||||
|
||||
|
||||
Texture::Texture(uint32_t address, const uint8_t* host_address)
|
||||
: address_(address), host_address_(host_address) {
|
||||
}
|
||||
|
||||
Texture::~Texture() {
|
||||
for (auto it = views_.begin(); it != views_.end(); ++it) {
|
||||
auto view = *it;
|
||||
delete view;
|
||||
}
|
||||
views_.clear();
|
||||
}
|
||||
|
||||
TextureView* Texture::Fetch(
|
||||
const xenos::xe_gpu_texture_fetch_t& fetch) {
|
||||
// TODO(benvanik): compute length for hash check.
|
||||
size_t length = 0;
|
||||
switch (fetch.dimension) {
|
||||
case DIMENSION_1D:
|
||||
break;
|
||||
case DIMENSION_2D:
|
||||
break;
|
||||
case DIMENSION_3D:
|
||||
break;
|
||||
case DIMENSION_CUBE:
|
||||
break;
|
||||
}
|
||||
uint64_t hash = xe_hash64(host_address_, length);
|
||||
|
||||
for (auto it = views_.begin(); it != views_.end(); ++it) {
|
||||
auto view = *it;
|
||||
if (memcmp(&view->fetch, &fetch, sizeof(fetch))) {
|
||||
continue;
|
||||
}
|
||||
bool dirty = hash != view->hash;
|
||||
if (dirty) {
|
||||
return FetchDirty(view, fetch) ? view : nullptr;
|
||||
} else {
|
||||
return view;
|
||||
}
|
||||
}
|
||||
|
||||
auto new_view = FetchNew(fetch);
|
||||
if (!new_view) {
|
||||
return nullptr;
|
||||
}
|
||||
new_view->hash = hash;
|
||||
views_.push_back(new_view);
|
||||
return new_view;
|
||||
}
|
||||
|
||||
bool Texture::FillViewInfo(TextureView* view,
|
||||
const xenos::xe_gpu_texture_fetch_t& fetch) {
|
||||
// http://msdn.microsoft.com/en-us/library/windows/desktop/cc308051(v=vs.85).aspx
|
||||
// a2xx_sq_surfaceformat
|
||||
|
||||
view->texture = this;
|
||||
view->fetch = fetch;
|
||||
|
||||
view->dimensions = fetch.dimension;
|
||||
switch (fetch.dimension) {
|
||||
case DIMENSION_1D:
|
||||
view->width = fetch.size_1d.width;
|
||||
break;
|
||||
case DIMENSION_2D:
|
||||
view->width = fetch.size_2d.width;
|
||||
view->height = fetch.size_2d.height;
|
||||
break;
|
||||
case DIMENSION_3D:
|
||||
view->width = fetch.size_3d.width;
|
||||
view->height = fetch.size_3d.height;
|
||||
view->depth = fetch.size_3d.depth;
|
||||
break;
|
||||
case DIMENSION_CUBE:
|
||||
view->width = fetch.size_stack.width;
|
||||
view->height = fetch.size_stack.height;
|
||||
view->depth = fetch.size_stack.depth;
|
||||
break;
|
||||
}
|
||||
view->format = DXGI_FORMAT_UNKNOWN;
|
||||
view->block_size = 0;
|
||||
view->texel_pitch = 0;
|
||||
view->is_compressed = false;
|
||||
switch (fetch.format) {
|
||||
case FMT_8:
|
||||
switch (fetch.swizzle) {
|
||||
case XE_GPU_SWIZZLE_RRR1:
|
||||
view->format = DXGI_FORMAT_R8_UNORM;
|
||||
break;
|
||||
case XE_GPU_SWIZZLE_000R:
|
||||
view->format = DXGI_FORMAT_A8_UNORM;
|
||||
break;
|
||||
default:
|
||||
XELOGW("D3D11: unhandled swizzle for FMT_8");
|
||||
view->format = DXGI_FORMAT_A8_UNORM;
|
||||
break;
|
||||
}
|
||||
view->block_size = 1;
|
||||
view->texel_pitch = 1;
|
||||
break;
|
||||
case FMT_1_5_5_5:
|
||||
switch (fetch.swizzle) {
|
||||
case XE_GPU_SWIZZLE_BGRA:
|
||||
view->format = DXGI_FORMAT_B5G5R5A1_UNORM;
|
||||
break;
|
||||
default:
|
||||
XELOGW("D3D11: unhandled swizzle for FMT_1_5_5_5");
|
||||
view->format = DXGI_FORMAT_B5G5R5A1_UNORM;
|
||||
break;
|
||||
}
|
||||
view->block_size = 1;
|
||||
view->texel_pitch = 2;
|
||||
break;
|
||||
case FMT_8_8_8_8:
|
||||
switch (fetch.swizzle) {
|
||||
case XE_GPU_SWIZZLE_RGBA:
|
||||
view->format = DXGI_FORMAT_R8G8B8A8_UNORM;
|
||||
break;
|
||||
case XE_GPU_SWIZZLE_BGRA:
|
||||
view->format = DXGI_FORMAT_B8G8R8A8_UNORM;
|
||||
break;
|
||||
case XE_GPU_SWIZZLE_RGB1:
|
||||
view->format = DXGI_FORMAT_R8G8B8A8_UNORM; // ?
|
||||
break;
|
||||
case XE_GPU_SWIZZLE_BGR1:
|
||||
view->format = DXGI_FORMAT_B8G8R8X8_UNORM;
|
||||
break;
|
||||
default:
|
||||
XELOGW("D3D11: unhandled swizzle for FMT_8_8_8_8");
|
||||
view->format = DXGI_FORMAT_R8G8B8A8_UNORM;
|
||||
break;
|
||||
}
|
||||
view->block_size = 1;
|
||||
view->texel_pitch = 4;
|
||||
break;
|
||||
case FMT_4_4_4_4:
|
||||
switch (fetch.swizzle) {
|
||||
case XE_GPU_SWIZZLE_BGRA:
|
||||
view->format = DXGI_FORMAT_B4G4R4A4_UNORM; // only supported on Windows 8+
|
||||
break;
|
||||
default:
|
||||
XELOGW("D3D11: unhandled swizzle for FMT_4_4_4_4");
|
||||
view->format = DXGI_FORMAT_B4G4R4A4_UNORM; // only supported on Windows 8+
|
||||
break;
|
||||
}
|
||||
view->block_size = 1;
|
||||
view->texel_pitch = 2;
|
||||
break;
|
||||
case FMT_16_16_16_16_FLOAT:
|
||||
switch (fetch.swizzle) {
|
||||
case XE_GPU_SWIZZLE_RGBA:
|
||||
view->format = DXGI_FORMAT_R16G16B16A16_FLOAT;
|
||||
break;
|
||||
default:
|
||||
XELOGW("D3D11: unhandled swizzle for FMT_16_16_16_16_FLOAT");
|
||||
view->format = DXGI_FORMAT_R16G16B16A16_FLOAT;
|
||||
break;
|
||||
}
|
||||
view->block_size = 1;
|
||||
view->texel_pitch = 8;
|
||||
break;
|
||||
case FMT_32_FLOAT:
|
||||
switch (fetch.swizzle) {
|
||||
case XE_GPU_SWIZZLE_R111:
|
||||
view->format = DXGI_FORMAT_R32_FLOAT;
|
||||
break;
|
||||
default:
|
||||
XELOGW("D3D11: unhandled swizzle for FMT_32_FLOAT");
|
||||
view->format = DXGI_FORMAT_R32_FLOAT;
|
||||
break;
|
||||
}
|
||||
view->block_size = 1;
|
||||
view->texel_pitch = 4;
|
||||
break;
|
||||
case FMT_DXT1:
|
||||
view->format = DXGI_FORMAT_BC1_UNORM;
|
||||
view->block_size = 4;
|
||||
view->texel_pitch = 8;
|
||||
view->is_compressed = true;
|
||||
break;
|
||||
case FMT_DXT2_3:
|
||||
case FMT_DXT4_5:
|
||||
view->format = (fetch.format == FMT_DXT4_5 ? DXGI_FORMAT_BC3_UNORM : DXGI_FORMAT_BC2_UNORM);
|
||||
view->block_size = 4;
|
||||
view->texel_pitch = 16;
|
||||
view->is_compressed = true;
|
||||
break;
|
||||
case FMT_1_REVERSE:
|
||||
case FMT_1:
|
||||
case FMT_5_6_5:
|
||||
case FMT_6_5_5:
|
||||
case FMT_2_10_10_10:
|
||||
case FMT_8_A:
|
||||
case FMT_8_B:
|
||||
case FMT_8_8:
|
||||
case FMT_Cr_Y1_Cb_Y0:
|
||||
case FMT_Y1_Cr_Y0_Cb:
|
||||
case FMT_5_5_5_1:
|
||||
case FMT_8_8_8_8_A:
|
||||
case FMT_10_11_11:
|
||||
case FMT_11_11_10:
|
||||
case FMT_24_8:
|
||||
case FMT_24_8_FLOAT:
|
||||
case FMT_16:
|
||||
case FMT_16_16:
|
||||
case FMT_16_16_16_16:
|
||||
case FMT_16_EXPAND:
|
||||
case FMT_16_16_EXPAND:
|
||||
case FMT_16_16_16_16_EXPAND:
|
||||
case FMT_16_FLOAT:
|
||||
case FMT_16_16_FLOAT:
|
||||
case FMT_32:
|
||||
case FMT_32_32:
|
||||
case FMT_32_32_32_32:
|
||||
case FMT_32_32_FLOAT:
|
||||
case FMT_32_32_32_32_FLOAT:
|
||||
case FMT_32_AS_8:
|
||||
case FMT_32_AS_8_8:
|
||||
case FMT_16_MPEG:
|
||||
case FMT_16_16_MPEG:
|
||||
case FMT_8_INTERLACED:
|
||||
case FMT_32_AS_8_INTERLACED:
|
||||
case FMT_32_AS_8_8_INTERLACED:
|
||||
case FMT_16_INTERLACED:
|
||||
case FMT_16_MPEG_INTERLACED:
|
||||
case FMT_16_16_MPEG_INTERLACED:
|
||||
case FMT_DXN:
|
||||
case FMT_8_8_8_8_AS_16_16_16_16:
|
||||
case FMT_DXT1_AS_16_16_16_16:
|
||||
case FMT_DXT2_3_AS_16_16_16_16:
|
||||
case FMT_DXT4_5_AS_16_16_16_16:
|
||||
case FMT_2_10_10_10_AS_16_16_16_16:
|
||||
case FMT_10_11_11_AS_16_16_16_16:
|
||||
case FMT_11_11_10_AS_16_16_16_16:
|
||||
case FMT_32_32_32_FLOAT:
|
||||
case FMT_DXT3A:
|
||||
case FMT_DXT5A:
|
||||
case FMT_CTX1:
|
||||
case FMT_DXT3A_AS_1_1_1_1:
|
||||
view->format = DXGI_FORMAT_UNKNOWN;
|
||||
break;
|
||||
}
|
||||
|
||||
if (view->format == DXGI_FORMAT_UNKNOWN) {
|
||||
return false;
|
||||
}
|
||||
|
||||
switch (fetch.dimension) {
|
||||
case DIMENSION_1D:
|
||||
break;
|
||||
case DIMENSION_2D:
|
||||
view->sizes_2d = GetTextureSizes2D(view);
|
||||
break;
|
||||
case DIMENSION_3D:
|
||||
break;
|
||||
case DIMENSION_CUBE:
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
const TextureSizes2D Texture::GetTextureSizes2D(TextureView* view) {
|
||||
TextureSizes2D sizes;
|
||||
|
||||
sizes.logical_width = 1 + view->fetch.size_2d.width;
|
||||
sizes.logical_height = 1 + view->fetch.size_2d.height;
|
||||
|
||||
sizes.block_width = sizes.logical_width / view->block_size;
|
||||
sizes.block_height = sizes.logical_height / view->block_size;
|
||||
|
||||
if (!view->is_compressed) {
|
||||
// must be 32x32, but also must have a pitch that is a multiple of 256 bytes
|
||||
uint32_t bytes_per_block = view->block_size * view->block_size *
|
||||
view->texel_pitch;
|
||||
uint32_t width_multiple = 32;
|
||||
if (bytes_per_block) {
|
||||
uint32_t minimum_multiple = 256 / bytes_per_block;
|
||||
if (width_multiple < minimum_multiple) {
|
||||
width_multiple = minimum_multiple;
|
||||
}
|
||||
}
|
||||
sizes.input_width = XEROUNDUP(sizes.logical_width, width_multiple);
|
||||
sizes.input_height = XEROUNDUP(sizes.logical_height, 32);
|
||||
sizes.output_width = sizes.logical_width;
|
||||
sizes.output_height = sizes.logical_height;
|
||||
} else {
|
||||
// must be 128x128
|
||||
sizes.input_width = XEROUNDUP(sizes.logical_width, 128);
|
||||
sizes.input_height = XEROUNDUP(sizes.logical_height, 128);
|
||||
sizes.output_width = XENEXTPOW2(sizes.logical_width);
|
||||
sizes.output_height = XENEXTPOW2(sizes.logical_height);
|
||||
}
|
||||
|
||||
sizes.logical_pitch =
|
||||
(sizes.logical_width / view->block_size) * view->texel_pitch;
|
||||
sizes.input_pitch =
|
||||
(sizes.input_width / view->block_size) * view->texel_pitch;
|
||||
|
||||
return sizes;
|
||||
}
|
||||
|
||||
void Texture::TextureSwap(uint8_t* dest, const uint8_t* src, uint32_t pitch,
|
||||
XE_GPU_ENDIAN endianness) {
|
||||
switch (endianness) {
|
||||
case XE_GPU_ENDIAN_8IN16:
|
||||
for (uint32_t i = 0; i < pitch; i += 2, src += 2, dest += 2) {
|
||||
*(uint16_t*)dest = XESWAP16(*(uint16_t*)src);
|
||||
}
|
||||
break;
|
||||
case XE_GPU_ENDIAN_8IN32: // Swap bytes.
|
||||
for (uint32_t i = 0; i < pitch; i += 4, src += 4, dest += 4) {
|
||||
*(uint32_t*)dest = XESWAP32(*(uint32_t*)src);
|
||||
}
|
||||
break;
|
||||
case XE_GPU_ENDIAN_16IN32: // Swap half words.
|
||||
for (uint32_t i = 0; i < pitch; i += 4, src += 4, dest += 4) {
|
||||
uint32_t value = *(uint32_t*)src;
|
||||
*(uint32_t*)dest = ((value >> 16) & 0xFFFF) | (value << 16);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
case XE_GPU_ENDIAN_NONE:
|
||||
memcpy(dest, src, pitch);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// https://code.google.com/p/crunch/source/browse/trunk/inc/crn_decomp.h#4104
|
||||
uint32_t Texture::TiledOffset2DOuter(uint32_t y, uint32_t width,
|
||||
uint32_t log_bpp) {
|
||||
uint32_t macro = ((y >> 5) * (width >> 5)) << (log_bpp + 7);
|
||||
uint32_t micro = ((y & 6) << 2) << log_bpp;
|
||||
return macro +
|
||||
((micro & ~15) << 1) +
|
||||
(micro & 15) +
|
||||
((y & 8) << (3 + log_bpp)) +
|
||||
((y & 1) << 4);
|
||||
}
|
||||
|
||||
uint32_t Texture::TiledOffset2DInner(uint32_t x, uint32_t y, uint32_t bpp,
|
||||
uint32_t base_offset) {
|
||||
uint32_t macro = (x >> 5) << (bpp + 7);
|
||||
uint32_t micro = (x & 7) << bpp;
|
||||
uint32_t offset = base_offset + (macro + ((micro & ~15) << 1) + (micro & 15));
|
||||
return ((offset & ~511) << 3) + ((offset & 448) << 2) + (offset & 63) +
|
||||
((y & 16) << 7) + (((((y & 8) >> 2) + (x >> 3)) & 3) << 6);
|
||||
}
|
|
@ -1,110 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_GPU_TEXTURE_H_
|
||||
#define XENIA_GPU_TEXTURE_H_
|
||||
|
||||
#include <xenia/core.h>
|
||||
#include <xenia/gpu/xenos/xenos.h>
|
||||
|
||||
// TODO(benvanik): replace DXGI constants with xenia constants.
|
||||
#include <d3d11.h>
|
||||
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
|
||||
|
||||
class Texture;
|
||||
|
||||
struct TextureSizes1D {};
|
||||
struct TextureSizes2D {
|
||||
uint32_t logical_width;
|
||||
uint32_t logical_height;
|
||||
uint32_t block_width;
|
||||
uint32_t block_height;
|
||||
uint32_t input_width;
|
||||
uint32_t input_height;
|
||||
uint32_t output_width;
|
||||
uint32_t output_height;
|
||||
uint32_t logical_pitch;
|
||||
uint32_t input_pitch;
|
||||
};
|
||||
struct TextureSizes3D {};
|
||||
struct TextureSizesCube {};
|
||||
|
||||
struct TextureView {
|
||||
Texture* texture;
|
||||
xenos::xe_gpu_texture_fetch_t fetch;
|
||||
uint64_t hash;
|
||||
|
||||
union {
|
||||
TextureSizes1D sizes_1d;
|
||||
TextureSizes2D sizes_2d;
|
||||
TextureSizes3D sizes_3d;
|
||||
TextureSizesCube sizes_cube;
|
||||
};
|
||||
|
||||
int dimensions;
|
||||
uint32_t width;
|
||||
uint32_t height;
|
||||
uint32_t depth;
|
||||
uint32_t block_size;
|
||||
uint32_t texel_pitch;
|
||||
bool is_compressed;
|
||||
DXGI_FORMAT format;
|
||||
|
||||
TextureView()
|
||||
: texture(nullptr),
|
||||
dimensions(0),
|
||||
width(0), height(0), depth(0),
|
||||
block_size(0), texel_pitch(0),
|
||||
is_compressed(false), format(DXGI_FORMAT_UNKNOWN) {}
|
||||
};
|
||||
|
||||
|
||||
class Texture {
|
||||
public:
|
||||
Texture(uint32_t address, const uint8_t* host_address);
|
||||
virtual ~Texture();
|
||||
|
||||
TextureView* Fetch(
|
||||
const xenos::xe_gpu_texture_fetch_t& fetch);
|
||||
|
||||
protected:
|
||||
bool FillViewInfo(TextureView* view,
|
||||
const xenos::xe_gpu_texture_fetch_t& fetch);
|
||||
|
||||
virtual TextureView* FetchNew(
|
||||
const xenos::xe_gpu_texture_fetch_t& fetch) = 0;
|
||||
virtual bool FetchDirty(
|
||||
TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch) = 0;
|
||||
|
||||
const TextureSizes2D GetTextureSizes2D(TextureView* view);
|
||||
|
||||
static void TextureSwap(uint8_t* dest, const uint8_t* src, uint32_t pitch,
|
||||
xenos::XE_GPU_ENDIAN endianness);
|
||||
static uint32_t TiledOffset2DOuter(uint32_t y, uint32_t width,
|
||||
uint32_t log_bpp);
|
||||
static uint32_t TiledOffset2DInner(uint32_t x, uint32_t y, uint32_t bpp,
|
||||
uint32_t base_offset);
|
||||
|
||||
uint32_t address_;
|
||||
const uint8_t* host_address_;
|
||||
|
||||
// TODO(benvanik): replace with LRU keyed list.
|
||||
std::vector<TextureView*> views_;
|
||||
};
|
||||
|
||||
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
||||
|
||||
#endif // XENIA_GPU_TEXTURE_H_
|
|
@ -1,50 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include <xenia/gpu/texture_cache.h>
|
||||
|
||||
#include <xenia/gpu/xenos/ucode.h>
|
||||
|
||||
|
||||
using namespace xe;
|
||||
using namespace xe::gpu;
|
||||
using namespace xe::gpu::xenos;
|
||||
|
||||
|
||||
// https://github.com/ivmai/bdwgc/blob/master/os_dep.c
|
||||
|
||||
TextureCache::TextureCache(Memory* memory)
|
||||
: memory_(memory) {
|
||||
}
|
||||
|
||||
TextureCache::~TextureCache() {
|
||||
for (auto it = textures_.begin(); it != textures_.end(); ++it) {
|
||||
auto texture = it->second;
|
||||
delete texture;
|
||||
}
|
||||
textures_.clear();
|
||||
}
|
||||
|
||||
TextureView* TextureCache::FetchTexture(
|
||||
uint32_t address, const xenos::xe_gpu_texture_fetch_t& fetch) {
|
||||
auto it = textures_.find(address);
|
||||
if (it == textures_.end()) {
|
||||
// Texture not found.
|
||||
const uint8_t* host_address = memory_->Translate(address);
|
||||
auto texture = CreateTexture(address, host_address, fetch);
|
||||
if (!texture) {
|
||||
return nullptr;
|
||||
}
|
||||
textures_.insert({ address, texture });
|
||||
return texture->Fetch(fetch);
|
||||
} else {
|
||||
// Texture found.
|
||||
return it->second->Fetch(fetch);
|
||||
}
|
||||
}
|
|
@ -1,50 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_GPU_TEXTURE_CACHE_H_
|
||||
#define XENIA_GPU_TEXTURE_CACHE_H_
|
||||
|
||||
#include <xenia/core.h>
|
||||
#include <xenia/gpu/texture.h>
|
||||
#include <xenia/gpu/xenos/xenos.h>
|
||||
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
|
||||
|
||||
// TODO(benvanik): overlapping textures.
|
||||
// TODO(benvanik): multiple textures (differing formats/etc) per address.
|
||||
class TextureCache {
|
||||
public:
|
||||
TextureCache(Memory* memory);
|
||||
virtual ~TextureCache();
|
||||
|
||||
Memory* memory() const { return memory_; }
|
||||
|
||||
TextureView* FetchTexture(
|
||||
uint32_t address, const xenos::xe_gpu_texture_fetch_t& fetch);
|
||||
|
||||
protected:
|
||||
virtual Texture* CreateTexture(
|
||||
uint32_t address, const uint8_t* host_address,
|
||||
const xenos::xe_gpu_texture_fetch_t& fetch) = 0;
|
||||
|
||||
Memory* memory_;
|
||||
|
||||
// Mapped by guest address.
|
||||
std::unordered_map<uint32_t, Texture*> textures_;
|
||||
};
|
||||
|
||||
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
||||
|
||||
#endif // XENIA_GPU_TEXTURE_CACHE_H_
|
|
@ -9,9 +9,342 @@
|
|||
|
||||
#include <xenia/gpu/texture_resource.h>
|
||||
|
||||
#include <xenia/gpu/xenos/ucode.h>
|
||||
#include <xenia/gpu/xenos/xenos.h>
|
||||
|
||||
|
||||
using namespace std;
|
||||
using namespace xe;
|
||||
using namespace xe::gpu;
|
||||
using namespace xe::gpu::xenos;
|
||||
|
||||
|
||||
bool TextureResource::Info::Prepare(const xe_gpu_texture_fetch_t& fetch,
|
||||
Info& info) {
|
||||
// http://msdn.microsoft.com/en-us/library/windows/desktop/cc308051(v=vs.85).aspx
|
||||
// a2xx_sq_surfaceformat
|
||||
|
||||
info.dimension = (TextureDimension)fetch.dimension;
|
||||
switch (info.dimension) {
|
||||
case TEXTURE_DIMENSION_1D:
|
||||
info.width = fetch.size_1d.width;
|
||||
break;
|
||||
case TEXTURE_DIMENSION_2D:
|
||||
info.width = fetch.size_2d.width;
|
||||
info.height = fetch.size_2d.height;
|
||||
break;
|
||||
case TEXTURE_DIMENSION_3D:
|
||||
case TEXTURE_DIMENSION_CUBE:
|
||||
info.width = fetch.size_3d.width;
|
||||
info.height = fetch.size_3d.height;
|
||||
info.depth = fetch.size_3d.depth;
|
||||
break;
|
||||
}
|
||||
info.block_size = 0;
|
||||
info.texel_pitch = 0;
|
||||
info.endianness = (XE_GPU_ENDIAN)fetch.endianness;
|
||||
info.is_tiled = fetch.tiled;
|
||||
info.is_compressed = false;
|
||||
info.input_length = 0;
|
||||
info.format = DXGI_FORMAT_UNKNOWN;
|
||||
switch (fetch.format) {
|
||||
case FMT_8:
|
||||
switch (fetch.swizzle) {
|
||||
case XE_GPU_SWIZZLE_RRR1:
|
||||
info.format = DXGI_FORMAT_R8_UNORM;
|
||||
break;
|
||||
case XE_GPU_SWIZZLE_000R:
|
||||
info.format = DXGI_FORMAT_A8_UNORM;
|
||||
break;
|
||||
default:
|
||||
XELOGW("D3D11: unhandled swizzle for FMT_8");
|
||||
info.format = DXGI_FORMAT_A8_UNORM;
|
||||
break;
|
||||
}
|
||||
info.block_size = 1;
|
||||
info.texel_pitch = 1;
|
||||
break;
|
||||
case FMT_1_5_5_5:
|
||||
switch (fetch.swizzle) {
|
||||
case XE_GPU_SWIZZLE_BGRA:
|
||||
info.format = DXGI_FORMAT_B5G5R5A1_UNORM;
|
||||
break;
|
||||
default:
|
||||
XELOGW("D3D11: unhandled swizzle for FMT_1_5_5_5");
|
||||
info.format = DXGI_FORMAT_B5G5R5A1_UNORM;
|
||||
break;
|
||||
}
|
||||
info.block_size = 1;
|
||||
info.texel_pitch = 2;
|
||||
break;
|
||||
case FMT_8_8_8_8:
|
||||
switch (fetch.swizzle) {
|
||||
case XE_GPU_SWIZZLE_RGBA:
|
||||
info.format = DXGI_FORMAT_R8G8B8A8_UNORM;
|
||||
break;
|
||||
case XE_GPU_SWIZZLE_BGRA:
|
||||
info.format = DXGI_FORMAT_B8G8R8A8_UNORM;
|
||||
break;
|
||||
case XE_GPU_SWIZZLE_RGB1:
|
||||
info.format = DXGI_FORMAT_R8G8B8A8_UNORM; // ?
|
||||
break;
|
||||
case XE_GPU_SWIZZLE_BGR1:
|
||||
info.format = DXGI_FORMAT_B8G8R8X8_UNORM;
|
||||
break;
|
||||
default:
|
||||
XELOGW("D3D11: unhandled swizzle for FMT_8_8_8_8");
|
||||
info.format = DXGI_FORMAT_R8G8B8A8_UNORM;
|
||||
break;
|
||||
}
|
||||
info.block_size = 1;
|
||||
info.texel_pitch = 4;
|
||||
break;
|
||||
case FMT_4_4_4_4:
|
||||
switch (fetch.swizzle) {
|
||||
case XE_GPU_SWIZZLE_BGRA:
|
||||
info.format = DXGI_FORMAT_B4G4R4A4_UNORM; // only supported on Windows 8+
|
||||
break;
|
||||
default:
|
||||
XELOGW("D3D11: unhandled swizzle for FMT_4_4_4_4");
|
||||
info.format = DXGI_FORMAT_B4G4R4A4_UNORM; // only supported on Windows 8+
|
||||
break;
|
||||
}
|
||||
info.block_size = 1;
|
||||
info.texel_pitch = 2;
|
||||
break;
|
||||
case FMT_16_16_16_16_FLOAT:
|
||||
switch (fetch.swizzle) {
|
||||
case XE_GPU_SWIZZLE_RGBA:
|
||||
info.format = DXGI_FORMAT_R16G16B16A16_FLOAT;
|
||||
break;
|
||||
default:
|
||||
XELOGW("D3D11: unhandled swizzle for FMT_16_16_16_16_FLOAT");
|
||||
info.format = DXGI_FORMAT_R16G16B16A16_FLOAT;
|
||||
break;
|
||||
}
|
||||
info.block_size = 1;
|
||||
info.texel_pitch = 8;
|
||||
break;
|
||||
case FMT_32_FLOAT:
|
||||
switch (fetch.swizzle) {
|
||||
case XE_GPU_SWIZZLE_R111:
|
||||
info.format = DXGI_FORMAT_R32_FLOAT;
|
||||
break;
|
||||
default:
|
||||
XELOGW("D3D11: unhandled swizzle for FMT_32_FLOAT");
|
||||
info.format = DXGI_FORMAT_R32_FLOAT;
|
||||
break;
|
||||
}
|
||||
info.block_size = 1;
|
||||
info.texel_pitch = 4;
|
||||
break;
|
||||
case FMT_DXT1:
|
||||
info.format = DXGI_FORMAT_BC1_UNORM;
|
||||
info.block_size = 4;
|
||||
info.texel_pitch = 8;
|
||||
info.is_compressed = true;
|
||||
break;
|
||||
case FMT_DXT2_3:
|
||||
case FMT_DXT4_5:
|
||||
info.format = (fetch.format == FMT_DXT4_5 ? DXGI_FORMAT_BC3_UNORM : DXGI_FORMAT_BC2_UNORM);
|
||||
info.block_size = 4;
|
||||
info.texel_pitch = 16;
|
||||
info.is_compressed = true;
|
||||
break;
|
||||
case FMT_1_REVERSE:
|
||||
case FMT_1:
|
||||
case FMT_5_6_5:
|
||||
case FMT_6_5_5:
|
||||
case FMT_2_10_10_10:
|
||||
case FMT_8_A:
|
||||
case FMT_8_B:
|
||||
case FMT_8_8:
|
||||
case FMT_Cr_Y1_Cb_Y0:
|
||||
case FMT_Y1_Cr_Y0_Cb:
|
||||
case FMT_5_5_5_1:
|
||||
case FMT_8_8_8_8_A:
|
||||
case FMT_10_11_11:
|
||||
case FMT_11_11_10:
|
||||
case FMT_24_8:
|
||||
case FMT_24_8_FLOAT:
|
||||
case FMT_16:
|
||||
case FMT_16_16:
|
||||
case FMT_16_16_16_16:
|
||||
case FMT_16_EXPAND:
|
||||
case FMT_16_16_EXPAND:
|
||||
case FMT_16_16_16_16_EXPAND:
|
||||
case FMT_16_FLOAT:
|
||||
case FMT_16_16_FLOAT:
|
||||
case FMT_32:
|
||||
case FMT_32_32:
|
||||
case FMT_32_32_32_32:
|
||||
case FMT_32_32_FLOAT:
|
||||
case FMT_32_32_32_32_FLOAT:
|
||||
case FMT_32_AS_8:
|
||||
case FMT_32_AS_8_8:
|
||||
case FMT_16_MPEG:
|
||||
case FMT_16_16_MPEG:
|
||||
case FMT_8_INTERLACED:
|
||||
case FMT_32_AS_8_INTERLACED:
|
||||
case FMT_32_AS_8_8_INTERLACED:
|
||||
case FMT_16_INTERLACED:
|
||||
case FMT_16_MPEG_INTERLACED:
|
||||
case FMT_16_16_MPEG_INTERLACED:
|
||||
case FMT_DXN:
|
||||
case FMT_8_8_8_8_AS_16_16_16_16:
|
||||
case FMT_DXT1_AS_16_16_16_16:
|
||||
case FMT_DXT2_3_AS_16_16_16_16:
|
||||
case FMT_DXT4_5_AS_16_16_16_16:
|
||||
case FMT_2_10_10_10_AS_16_16_16_16:
|
||||
case FMT_10_11_11_AS_16_16_16_16:
|
||||
case FMT_11_11_10_AS_16_16_16_16:
|
||||
case FMT_32_32_32_FLOAT:
|
||||
case FMT_DXT3A:
|
||||
case FMT_DXT5A:
|
||||
case FMT_CTX1:
|
||||
case FMT_DXT3A_AS_1_1_1_1:
|
||||
info.format = DXGI_FORMAT_UNKNOWN;
|
||||
break;
|
||||
}
|
||||
|
||||
if (info.format == DXGI_FORMAT_UNKNOWN) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Must be called here when we know the format.
|
||||
switch (info.dimension) {
|
||||
case TEXTURE_DIMENSION_1D:
|
||||
info.CalculateTextureSizes1D(fetch);
|
||||
break;
|
||||
case TEXTURE_DIMENSION_2D:
|
||||
info.CalculateTextureSizes2D(fetch);
|
||||
break;
|
||||
case TEXTURE_DIMENSION_3D:
|
||||
// TODO(benvanik): calculate size.
|
||||
return false;
|
||||
case TEXTURE_DIMENSION_CUBE:
|
||||
// TODO(benvanik): calculate size.
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void TextureResource::Info::CalculateTextureSizes1D(
|
||||
const xe_gpu_texture_fetch_t& fetch) {
|
||||
// ?
|
||||
size_1d.width = fetch.size_1d.width;
|
||||
}
|
||||
|
||||
void TextureResource::Info::CalculateTextureSizes2D(
|
||||
const xe_gpu_texture_fetch_t& fetch) {
|
||||
size_2d.logical_width = 1 + fetch.size_2d.width;
|
||||
size_2d.logical_height = 1 + fetch.size_2d.height;
|
||||
|
||||
size_2d.block_width = size_2d.logical_width / block_size;
|
||||
size_2d.block_height = size_2d.logical_height / block_size;
|
||||
|
||||
if (!is_compressed) {
|
||||
// must be 32x32 but also must have a pitch that is a multiple of 256 bytes
|
||||
uint32_t bytes_per_block = block_size * block_size * texel_pitch;
|
||||
uint32_t width_multiple = 32;
|
||||
if (bytes_per_block) {
|
||||
uint32_t minimum_multiple = 256 / bytes_per_block;
|
||||
if (width_multiple < minimum_multiple) {
|
||||
width_multiple = minimum_multiple;
|
||||
}
|
||||
}
|
||||
size_2d.input_width = XEROUNDUP(size_2d.logical_width, width_multiple);
|
||||
size_2d.input_height = XEROUNDUP(size_2d.logical_height, 32);
|
||||
size_2d.output_width = size_2d.logical_width;
|
||||
size_2d.output_height = size_2d.logical_height;
|
||||
} else {
|
||||
// must be 128x128
|
||||
size_2d.input_width = XEROUNDUP(size_2d.logical_width, 128);
|
||||
size_2d.input_height = XEROUNDUP(size_2d.logical_height, 128);
|
||||
size_2d.output_width = XENEXTPOW2(size_2d.logical_width);
|
||||
size_2d.output_height = XENEXTPOW2(size_2d.logical_height);
|
||||
}
|
||||
|
||||
size_2d.logical_pitch = (size_2d.logical_width / block_size) * texel_pitch;
|
||||
size_2d.input_pitch = (size_2d.input_width / block_size) * texel_pitch;
|
||||
|
||||
if (!is_tiled) {
|
||||
input_length = size_2d.block_height * size_2d.logical_pitch;
|
||||
} else {
|
||||
input_length = size_2d.block_height * size_2d.logical_pitch; // ?
|
||||
}
|
||||
}
|
||||
|
||||
TextureResource::TextureResource(const MemoryRange& memory_range,
|
||||
const Info& info)
|
||||
: PagedResource(memory_range),
|
||||
info_(info) {
|
||||
}
|
||||
|
||||
TextureResource::~TextureResource() {
|
||||
}
|
||||
|
||||
int TextureResource::Prepare() {
|
||||
if (!handle()) {
|
||||
if (CreateHandle()) {
|
||||
XELOGE("Unable to create texture handle");
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (!dirtied_) {
|
||||
return 0;
|
||||
}
|
||||
dirtied_ = false;
|
||||
|
||||
// pass dirty regions?
|
||||
return InvalidateRegion(memory_range_);
|
||||
}
|
||||
|
||||
void TextureResource::TextureSwap(uint8_t* dest, const uint8_t* src,
|
||||
uint32_t pitch) const {
|
||||
// TODO(benvanik): optimize swapping paths.
|
||||
switch (info_.endianness) {
|
||||
case XE_GPU_ENDIAN_8IN16:
|
||||
for (uint32_t i = 0; i < pitch; i += 2, src += 2, dest += 2) {
|
||||
*(uint16_t*)dest = XESWAP16(*(uint16_t*)src);
|
||||
}
|
||||
break;
|
||||
case XE_GPU_ENDIAN_8IN32: // Swap bytes.
|
||||
for (uint32_t i = 0; i < pitch; i += 4, src += 4, dest += 4) {
|
||||
*(uint32_t*)dest = XESWAP32(*(uint32_t*)src);
|
||||
}
|
||||
break;
|
||||
case XE_GPU_ENDIAN_16IN32: // Swap half words.
|
||||
for (uint32_t i = 0; i < pitch; i += 4, src += 4, dest += 4) {
|
||||
uint32_t value = *(uint32_t*)src;
|
||||
*(uint32_t*)dest = ((value >> 16) & 0xFFFF) | (value << 16);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
case XE_GPU_ENDIAN_NONE:
|
||||
memcpy(dest, src, pitch);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// https://code.google.com/p/crunch/source/browse/trunk/inc/crn_decomp.h#4104
|
||||
uint32_t TextureResource::TiledOffset2DOuter(uint32_t y, uint32_t width,
|
||||
uint32_t log_bpp) const {
|
||||
uint32_t macro = ((y >> 5) * (width >> 5)) << (log_bpp + 7);
|
||||
uint32_t micro = ((y & 6) << 2) << log_bpp;
|
||||
return macro +
|
||||
((micro & ~15) << 1) +
|
||||
(micro & 15) +
|
||||
((y & 8) << (3 + log_bpp)) +
|
||||
((y & 1) << 4);
|
||||
}
|
||||
|
||||
uint32_t TextureResource::TiledOffset2DInner(uint32_t x, uint32_t y, uint32_t bpp,
|
||||
uint32_t base_offset) const {
|
||||
uint32_t macro = (x >> 5) << (bpp + 7);
|
||||
uint32_t micro = (x & 7) << bpp;
|
||||
uint32_t offset = base_offset + (macro + ((micro & ~15) << 1) + (micro & 15));
|
||||
return ((offset & ~511) << 3) + ((offset & 448) << 2) + (offset & 63) +
|
||||
((y & 16) << 7) + (((((y & 8) >> 2) + (x >> 3)) & 3) << 6);
|
||||
}
|
||||
|
|
|
@ -10,7 +10,7 @@
|
|||
#ifndef XENIA_GPU_TEXTURE_RESOURCE_H_
|
||||
#define XENIA_GPU_TEXTURE_RESOURCE_H_
|
||||
|
||||
#include <xenia/core.h>
|
||||
#include <xenia/gpu/resource.h>
|
||||
#include <xenia/gpu/xenos/xenos.h>
|
||||
|
||||
// TODO(benvanik): replace DXGI constants with xenia constants.
|
||||
|
@ -21,8 +21,85 @@ namespace xe {
|
|||
namespace gpu {
|
||||
|
||||
|
||||
class TextureResource : public Resource {
|
||||
enum TextureDimension {
|
||||
TEXTURE_DIMENSION_1D = 0,
|
||||
TEXTURE_DIMENSION_2D = 1,
|
||||
TEXTURE_DIMENSION_3D = 2,
|
||||
TEXTURE_DIMENSION_CUBE = 3,
|
||||
};
|
||||
|
||||
|
||||
class TextureResource : public PagedResource {
|
||||
public:
|
||||
struct Info {
|
||||
TextureDimension dimension;
|
||||
uint32_t width;
|
||||
uint32_t height;
|
||||
uint32_t depth;
|
||||
uint32_t block_size;
|
||||
uint32_t texel_pitch;
|
||||
xenos::XE_GPU_ENDIAN endianness;
|
||||
bool is_tiled;
|
||||
bool is_compressed;
|
||||
uint32_t input_length;
|
||||
|
||||
// TODO(benvanik): replace with our own constants.
|
||||
DXGI_FORMAT format;
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t width;
|
||||
} size_1d;
|
||||
struct {
|
||||
uint32_t logical_width;
|
||||
uint32_t logical_height;
|
||||
uint32_t block_width;
|
||||
uint32_t block_height;
|
||||
uint32_t input_width;
|
||||
uint32_t input_height;
|
||||
uint32_t output_width;
|
||||
uint32_t output_height;
|
||||
uint32_t logical_pitch;
|
||||
uint32_t input_pitch;
|
||||
} size_2d;
|
||||
struct {
|
||||
} size_3d;
|
||||
struct {
|
||||
} size_cube;
|
||||
};
|
||||
|
||||
static bool Prepare(const xenos::xe_gpu_texture_fetch_t& fetch,
|
||||
Info& out_info);
|
||||
|
||||
private:
|
||||
void CalculateTextureSizes1D(const xenos::xe_gpu_texture_fetch_t& fetch);
|
||||
void CalculateTextureSizes2D(const xenos::xe_gpu_texture_fetch_t& fetch);
|
||||
};
|
||||
|
||||
TextureResource(const MemoryRange& memory_range,
|
||||
const Info& info);
|
||||
~TextureResource() override;
|
||||
|
||||
const Info& info() const { return info_; }
|
||||
|
||||
bool Equals(const void* info_ptr, size_t info_length) override {
|
||||
return info_length == sizeof(Info) &&
|
||||
memcmp(info_ptr, &info_, info_length) == 0;
|
||||
}
|
||||
|
||||
virtual int Prepare();
|
||||
|
||||
protected:
|
||||
virtual int CreateHandle() = 0;
|
||||
virtual int InvalidateRegion(const MemoryRange& memory_range) = 0;
|
||||
|
||||
void TextureSwap(uint8_t* dest, const uint8_t* src, uint32_t pitch) const;
|
||||
uint32_t TiledOffset2DOuter(uint32_t y, uint32_t width,
|
||||
uint32_t log_bpp) const;
|
||||
uint32_t TiledOffset2DInner(uint32_t x, uint32_t y, uint32_t bpp,
|
||||
uint32_t base_offset) const;
|
||||
|
||||
Info info_;
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -1,51 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2013 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_GPU_XENOS_REGISTERS_H_
|
||||
#define XENIA_GPU_XENOS_REGISTERS_H_
|
||||
|
||||
#include <xenia/core.h>
|
||||
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace xenos {
|
||||
|
||||
|
||||
static const uint32_t kXEGpuRegisterCount = 0x5003;
|
||||
|
||||
|
||||
enum Registers {
|
||||
#define XE_GPU_REGISTER(index, type, name) \
|
||||
XE_GPU_REG_##name = index,
|
||||
#include <xenia/gpu/xenos/register_table.inc>
|
||||
#undef XE_GPU_REGISTER
|
||||
};
|
||||
|
||||
|
||||
const char* GetRegisterName(uint32_t index);
|
||||
|
||||
|
||||
union RegisterValue {
|
||||
uint32_t u32;
|
||||
float f32;
|
||||
};
|
||||
|
||||
|
||||
struct RegisterFile {
|
||||
RegisterValue values[kXEGpuRegisterCount];
|
||||
};
|
||||
|
||||
|
||||
} // namespace xenos
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
||||
|
||||
#endif // XENIA_GPU_XENOS_REGISTERS_H_
|
|
@ -3,8 +3,6 @@
|
|||
'sources': [
|
||||
'packets.h',
|
||||
'register_table.inc',
|
||||
'registers.cc',
|
||||
'registers.h',
|
||||
'ucode.h',
|
||||
'ucode_disassembler.cc',
|
||||
'ucode_disassembler.h',
|
||||
|
|
Loading…
Reference in New Issue