[D3D12] Primitive converter cache and strip restart, texture invalidation acquire/release
This commit is contained in:
parent
128ac2a3f9
commit
9194c3f8b0
|
@ -632,8 +632,8 @@ bool D3D12CommandProcessor::SetupContext() {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
primitive_converter_ = std::make_unique<PrimitiveConverter>(
|
primitive_converter_ =
|
||||||
this, register_file_, memory_, shared_memory_.get());
|
std::make_unique<PrimitiveConverter>(this, register_file_, memory_);
|
||||||
if (!primitive_converter_->Initialize()) {
|
if (!primitive_converter_->Initialize()) {
|
||||||
XELOGE("Failed to initialize the geometric primitive converter");
|
XELOGE("Failed to initialize the geometric primitive converter");
|
||||||
return false;
|
return false;
|
||||||
|
@ -1060,30 +1060,6 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool indexed = index_buffer_info != nullptr && index_buffer_info->guest_base;
|
|
||||||
if (indexed && regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & (1 << 21)) {
|
|
||||||
uint32_t reset_index = regs[XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX].u32;
|
|
||||||
uint32_t reset_index_expected;
|
|
||||||
if (index_buffer_info->format == IndexFormat::kInt32) {
|
|
||||||
reset_index_expected = 0xFFFFFFFFu;
|
|
||||||
} else {
|
|
||||||
reset_index_expected = 0xFFFFu;
|
|
||||||
}
|
|
||||||
if (reset_index != reset_index_expected) {
|
|
||||||
// Only 0xFFFF and 0xFFFFFFFF primitive restart indices are supported by
|
|
||||||
// Direct3D 12 (endianness doesn't matter for them). With shared memory,
|
|
||||||
// it's impossible to replace the cut index in the buffer without
|
|
||||||
// affecting the game memory.
|
|
||||||
XELOGE(
|
|
||||||
"The game uses the primitive restart index 0x%X that isn't 0xFFFF or "
|
|
||||||
"0xFFFFFFFF. Report the game to Xenia developers so geometry shaders "
|
|
||||||
"will be added to handle this!",
|
|
||||||
reset_index);
|
|
||||||
assert_always();
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Shaders will have already been defined by previous loads.
|
// Shaders will have already been defined by previous loads.
|
||||||
// We need them to do just about anything so validate here.
|
// We need them to do just about anything so validate here.
|
||||||
auto vertex_shader = static_cast<D3D12Shader*>(active_vertex_shader());
|
auto vertex_shader = static_cast<D3D12Shader*>(active_vertex_shader());
|
||||||
|
@ -1122,6 +1098,8 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
||||||
const RenderTargetCache::PipelineRenderTarget* pipeline_render_targets =
|
const RenderTargetCache::PipelineRenderTarget* pipeline_render_targets =
|
||||||
render_target_cache_->GetCurrentPipelineRenderTargets();
|
render_target_cache_->GetCurrentPipelineRenderTargets();
|
||||||
|
|
||||||
|
bool indexed = index_buffer_info != nullptr && index_buffer_info->guest_base;
|
||||||
|
|
||||||
// Set the primitive topology.
|
// Set the primitive topology.
|
||||||
PrimitiveType primitive_type_converted =
|
PrimitiveType primitive_type_converted =
|
||||||
PrimitiveConverter::GetReplacementPrimitiveType(primitive_type);
|
PrimitiveConverter::GetReplacementPrimitiveType(primitive_type);
|
||||||
|
|
|
@ -14,6 +14,7 @@
|
||||||
#include "xenia/base/assert.h"
|
#include "xenia/base/assert.h"
|
||||||
#include "xenia/base/logging.h"
|
#include "xenia/base/logging.h"
|
||||||
#include "xenia/base/math.h"
|
#include "xenia/base/math.h"
|
||||||
|
#include "xenia/base/memory.h"
|
||||||
#include "xenia/base/platform.h"
|
#include "xenia/base/platform.h"
|
||||||
#include "xenia/gpu/d3d12/d3d12_command_processor.h"
|
#include "xenia/gpu/d3d12/d3d12_command_processor.h"
|
||||||
#include "xenia/ui/d3d12/d3d12_util.h"
|
#include "xenia/ui/d3d12/d3d12_util.h"
|
||||||
|
@ -24,12 +25,12 @@ namespace d3d12 {
|
||||||
|
|
||||||
PrimitiveConverter::PrimitiveConverter(D3D12CommandProcessor* command_processor,
|
PrimitiveConverter::PrimitiveConverter(D3D12CommandProcessor* command_processor,
|
||||||
RegisterFile* register_file,
|
RegisterFile* register_file,
|
||||||
Memory* memory,
|
Memory* memory)
|
||||||
SharedMemory* shared_memory)
|
|
||||||
: command_processor_(command_processor),
|
: command_processor_(command_processor),
|
||||||
register_file_(register_file),
|
register_file_(register_file),
|
||||||
memory_(memory),
|
memory_(memory) {
|
||||||
shared_memory_(shared_memory) {}
|
system_page_size_ = uint32_t(memory::page_size());
|
||||||
|
}
|
||||||
|
|
||||||
PrimitiveConverter::~PrimitiveConverter() { Shutdown(); }
|
PrimitiveConverter::~PrimitiveConverter() { Shutdown(); }
|
||||||
|
|
||||||
|
@ -94,10 +95,18 @@ bool PrimitiveConverter::Initialize() {
|
||||||
}
|
}
|
||||||
static_ib_gpu_address_ = static_ib_->GetGPUVirtualAddress();
|
static_ib_gpu_address_ = static_ib_->GetGPUVirtualAddress();
|
||||||
|
|
||||||
|
memory_regions_invalidated_.store(0ull, std::memory_order_relaxed);
|
||||||
|
physical_write_watch_handle_ =
|
||||||
|
memory_->RegisterPhysicalWriteWatch(MemoryWriteCallbackThunk, this);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void PrimitiveConverter::Shutdown() {
|
void PrimitiveConverter::Shutdown() {
|
||||||
|
if (physical_write_watch_handle_ != nullptr) {
|
||||||
|
memory_->UnregisterPhysicalWriteWatch(physical_write_watch_handle_);
|
||||||
|
physical_write_watch_handle_ = nullptr;
|
||||||
|
}
|
||||||
ui::d3d12::util::ReleaseAndNull(static_ib_);
|
ui::d3d12::util::ReleaseAndNull(static_ib_);
|
||||||
ui::d3d12::util::ReleaseAndNull(static_ib_upload_);
|
ui::d3d12::util::ReleaseAndNull(static_ib_upload_);
|
||||||
buffer_pool_.reset();
|
buffer_pool_.reset();
|
||||||
|
@ -106,8 +115,6 @@ void PrimitiveConverter::Shutdown() {
|
||||||
void PrimitiveConverter::ClearCache() { buffer_pool_->ClearCache(); }
|
void PrimitiveConverter::ClearCache() { buffer_pool_->ClearCache(); }
|
||||||
|
|
||||||
void PrimitiveConverter::BeginFrame() {
|
void PrimitiveConverter::BeginFrame() {
|
||||||
buffer_pool_->BeginFrame();
|
|
||||||
|
|
||||||
// Got a command list now - upload and transition the static index buffer if
|
// Got a command list now - upload and transition the static index buffer if
|
||||||
// needed.
|
// needed.
|
||||||
if (static_ib_upload_ != nullptr) {
|
if (static_ib_upload_ != nullptr) {
|
||||||
|
@ -126,6 +133,11 @@ void PrimitiveConverter::BeginFrame() {
|
||||||
static_ib_upload_ = nullptr;
|
static_ib_upload_ = nullptr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
buffer_pool_->BeginFrame();
|
||||||
|
|
||||||
|
converted_indices_cache_.clear();
|
||||||
|
memory_regions_used_ = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void PrimitiveConverter::EndFrame() { buffer_pool_->EndFrame(); }
|
void PrimitiveConverter::EndFrame() { buffer_pool_->EndFrame(); }
|
||||||
|
@ -142,6 +154,7 @@ PrimitiveConverter::ConversionResult PrimitiveConverter::ConvertPrimitives(
|
||||||
PrimitiveType source_type, uint32_t address, uint32_t index_count,
|
PrimitiveType source_type, uint32_t address, uint32_t index_count,
|
||||||
IndexFormat index_format, Endian index_endianness,
|
IndexFormat index_format, Endian index_endianness,
|
||||||
D3D12_GPU_VIRTUAL_ADDRESS& gpu_address_out, uint32_t& index_count_out) {
|
D3D12_GPU_VIRTUAL_ADDRESS& gpu_address_out, uint32_t& index_count_out) {
|
||||||
|
bool index_32bit = index_format == IndexFormat::kInt32;
|
||||||
auto& regs = *register_file_;
|
auto& regs = *register_file_;
|
||||||
bool reset = (regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & (1 << 21)) != 0;
|
bool reset = (regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & (1 << 21)) != 0;
|
||||||
// Swap the reset index because we will be comparing unswapped values to it.
|
// Swap the reset index because we will be comparing unswapped values to it.
|
||||||
|
@ -150,40 +163,79 @@ PrimitiveConverter::ConversionResult PrimitiveConverter::ConvertPrimitives(
|
||||||
// If the specified reset index is the same as the one used by Direct3D 12
|
// If the specified reset index is the same as the one used by Direct3D 12
|
||||||
// (0xFFFF or 0xFFFFFFFF - in the pipeline cache, we use the former for
|
// (0xFFFF or 0xFFFFFFFF - in the pipeline cache, we use the former for
|
||||||
// 16-bit and the latter for 32-bit indices), we can use the buffer directly.
|
// 16-bit and the latter for 32-bit indices), we can use the buffer directly.
|
||||||
uint32_t reset_index_host =
|
uint32_t reset_index_host = index_32bit ? 0xFFFFFFFFu : 0xFFFFu;
|
||||||
index_format == IndexFormat::kInt32 ? 0xFFFFFFFFu : 0xFFFFu;
|
|
||||||
|
|
||||||
// Check if need to convert at all.
|
// Check if need to convert at all.
|
||||||
if (source_type != PrimitiveType::kTriangleFan) {
|
if (source_type != PrimitiveType::kTriangleFan) {
|
||||||
if (!reset || reset_index == reset_index_host) {
|
if (!reset || reset_index == reset_index_host) {
|
||||||
return ConversionResult::kConversionNotNeeded;
|
return ConversionResult::kConversionNotNeeded;
|
||||||
}
|
}
|
||||||
if (source_type != PrimitiveType::kTriangleStrip ||
|
if (source_type != PrimitiveType::kTriangleStrip &&
|
||||||
source_type != PrimitiveType::kLineStrip) {
|
source_type != PrimitiveType::kLineStrip) {
|
||||||
return ConversionResult::kConversionNotNeeded;
|
return ConversionResult::kConversionNotNeeded;
|
||||||
}
|
}
|
||||||
// TODO(Triang3l): Write conversion for triangle and line strip reset index
|
|
||||||
// and for indexed line loops.
|
|
||||||
return ConversionResult::kConversionNotNeeded;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Exit early for clearly empty draws, without even reading the memory.
|
// Exit early for clearly empty draws, without even reading the memory.
|
||||||
if (source_type == PrimitiveType::kTriangleFan ||
|
uint32_t index_count_min;
|
||||||
source_type == PrimitiveType::kTriangleStrip) {
|
if (source_type == PrimitiveType::kLineStrip ||
|
||||||
if (index_count < 3) {
|
|
||||||
return ConversionResult::kPrimitiveEmpty;
|
|
||||||
}
|
|
||||||
} else if (source_type == PrimitiveType::kLineStrip ||
|
|
||||||
source_type == PrimitiveType::kLineLoop) {
|
source_type == PrimitiveType::kLineLoop) {
|
||||||
if (index_count < 2) {
|
index_count_min = 2;
|
||||||
|
} else {
|
||||||
|
index_count_min = 3;
|
||||||
|
}
|
||||||
|
if (index_count < index_count_min) {
|
||||||
return ConversionResult::kPrimitiveEmpty;
|
return ConversionResult::kPrimitiveEmpty;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Invalidate the cache if data behind any entry was modified.
|
||||||
|
if (memory_regions_invalidated_.exchange(0ull, std::memory_order_acquire) &
|
||||||
|
memory_regions_used_) {
|
||||||
|
converted_indices_cache_.clear();
|
||||||
|
memory_regions_used_ = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO(Triang3l): Find the converted data in the cache.
|
address &= index_32bit ? 0x1FFFFFFC : 0x1FFFFFFE;
|
||||||
|
uint32_t index_size = index_32bit ? sizeof(uint32_t) : sizeof(uint16_t);
|
||||||
|
uint32_t address_last = address + index_size * (index_count - 1);
|
||||||
|
|
||||||
// Calculate the index count, and also check if there's nothing to convert in
|
// Create the cache entry, currently only for the key.
|
||||||
// the buffer (for instance, if not using primitive reset).
|
ConvertedIndices converted_indices;
|
||||||
|
converted_indices.key.address = address;
|
||||||
|
converted_indices.key.source_type = source_type;
|
||||||
|
converted_indices.key.format = index_format;
|
||||||
|
converted_indices.key.count = index_count;
|
||||||
|
converted_indices.key.reset = reset ? 1 : 0;
|
||||||
|
converted_indices.reset_index = reset_index;
|
||||||
|
|
||||||
|
// Try to find the previously converted index buffer.
|
||||||
|
auto found_range =
|
||||||
|
converted_indices_cache_.equal_range(converted_indices.key.value);
|
||||||
|
for (auto iter = found_range.first; iter != found_range.second; ++iter) {
|
||||||
|
const ConvertedIndices& found_converted = iter->second;
|
||||||
|
if (reset && found_converted.reset_index != reset_index) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (found_converted.converted_index_count == 0) {
|
||||||
|
return ConversionResult::kPrimitiveEmpty;
|
||||||
|
}
|
||||||
|
if (!found_converted.gpu_address) {
|
||||||
|
return ConversionResult::kConversionNotNeeded;
|
||||||
|
}
|
||||||
|
gpu_address_out = found_converted.gpu_address;
|
||||||
|
index_count_out = found_converted.converted_index_count;
|
||||||
|
return ConversionResult::kConverted;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get the memory usage mask for cache invalidation.
|
||||||
|
// 1 bit = (512 / 64) MB = 8 MB.
|
||||||
|
uint64_t memory_regions_used_bits = ~((1ull << (address >> 23)) - 1);
|
||||||
|
if (address_last < (63 << 23)) {
|
||||||
|
memory_regions_used_bits = (1ull << ((address_last >> 23) + 1)) - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate the new index count, and also check if there's nothing to convert
|
||||||
|
// in the buffer (for instance, if not using actually primitive reset).
|
||||||
uint32_t converted_index_count = 0;
|
uint32_t converted_index_count = 0;
|
||||||
bool conversion_needed = false;
|
bool conversion_needed = false;
|
||||||
bool simd = false;
|
bool simd = false;
|
||||||
|
@ -196,22 +248,44 @@ PrimitiveConverter::ConversionResult PrimitiveConverter::ConvertPrimitives(
|
||||||
} else {
|
} else {
|
||||||
converted_index_count = 3 * (index_count - 2);
|
converted_index_count = 3 * (index_count - 2);
|
||||||
}
|
}
|
||||||
|
} else if (source_type == PrimitiveType::kTriangleStrip ||
|
||||||
|
source_type == PrimitiveType::kLineStrip) {
|
||||||
|
// TODO(Triang3l): Check if the restart index is used at all in this buffer.
|
||||||
|
conversion_needed = true;
|
||||||
|
converted_index_count = index_count;
|
||||||
|
simd = true;
|
||||||
|
}
|
||||||
|
converted_indices.converted_index_count = converted_index_count;
|
||||||
|
|
||||||
|
// If nothing to convert, store this result so the check won't be happening
|
||||||
|
// again and again and exit.
|
||||||
|
if (!conversion_needed || converted_index_count == 0) {
|
||||||
|
converted_indices.gpu_address = 0;
|
||||||
|
converted_indices_cache_.insert(
|
||||||
|
std::make_pair(converted_indices.key.value, converted_indices));
|
||||||
|
memory_regions_used_ |= memory_regions_used_bits;
|
||||||
|
return converted_index_count == 0 ? ConversionResult::kPrimitiveEmpty
|
||||||
|
: ConversionResult::kConversionNotNeeded;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Convert.
|
||||||
|
|
||||||
union {
|
union {
|
||||||
void* source;
|
const void* source;
|
||||||
uint16_t* source_16;
|
const uint8_t* source_8;
|
||||||
uint32_t* source_32;
|
const uint16_t* source_16;
|
||||||
|
const uint32_t* source_32;
|
||||||
};
|
};
|
||||||
source = memory_->TranslatePhysical(address);
|
source = memory_->TranslatePhysical(address);
|
||||||
union {
|
union {
|
||||||
void* target;
|
void* target;
|
||||||
|
uint8_t* target_8;
|
||||||
uint16_t* target_16;
|
uint16_t* target_16;
|
||||||
uint32_t* target_32;
|
uint32_t* target_32;
|
||||||
};
|
};
|
||||||
D3D12_GPU_VIRTUAL_ADDRESS gpu_address;
|
D3D12_GPU_VIRTUAL_ADDRESS gpu_address;
|
||||||
target = AllocateIndices(index_format, index_count, simd ? address & 15 : 0,
|
target = AllocateIndices(index_format, converted_index_count,
|
||||||
gpu_address);
|
simd ? address & 15 : 0, gpu_address);
|
||||||
if (target == nullptr) {
|
if (target == nullptr) {
|
||||||
return ConversionResult::kFailed;
|
return ConversionResult::kFailed;
|
||||||
}
|
}
|
||||||
|
@ -237,11 +311,62 @@ PrimitiveConverter::ConversionResult PrimitiveConverter::ConvertPrimitives(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
} else if (source_type == PrimitiveType::kTriangleStrip ||
|
||||||
|
source_type == PrimitiveType::kLineStrip) {
|
||||||
|
// Replace the reset index with the maximum representable value - vector OR
|
||||||
|
// gives 0 or 0xFFFF/0xFFFFFFFF, which is exactly what is needed.
|
||||||
|
// Allocations in the target index buffer are aligned with 16-byte
|
||||||
|
// granularity, and within 16-byte vectors, both the source and the target
|
||||||
|
// start at the same offset.
|
||||||
|
#if XE_ARCH_AMD64
|
||||||
|
source = reinterpret_cast<const void*>(reinterpret_cast<uintptr_t>(source) &
|
||||||
|
~(uintptr_t(15)));
|
||||||
|
target = reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(target) &
|
||||||
|
~(uintptr_t(15)));
|
||||||
|
uint32_t vector_count = (address_last >> 4) - (address >> 4) + 1;
|
||||||
|
if (index_format == IndexFormat::kInt32) {
|
||||||
|
__m128i reset_index_vector = _mm_set1_epi32(reset_index);
|
||||||
|
for (uint32_t i = 0; i < vector_count; ++i) {
|
||||||
|
__m128i indices_vector =
|
||||||
|
_mm_load_si128(reinterpret_cast<const __m128i*>(&source_8[i << 4]));
|
||||||
|
__m128i indices_are_reset_vector =
|
||||||
|
_mm_cmpeq_epi32(indices_vector, reset_index_vector);
|
||||||
|
_mm_store_si128(reinterpret_cast<__m128i*>(&target_8[i << 4]),
|
||||||
|
_mm_or_si128(indices_vector, indices_are_reset_vector));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
__m128i reset_index_vector = _mm_set1_epi16(reset_index);
|
||||||
|
for (uint32_t i = 0; i < vector_count; ++i) {
|
||||||
|
__m128i indices_vector =
|
||||||
|
_mm_load_si128(reinterpret_cast<const __m128i*>(&source_8[i << 4]));
|
||||||
|
__m128i indices_are_reset_vector =
|
||||||
|
_mm_cmpeq_epi16(indices_vector, reset_index_vector);
|
||||||
|
_mm_store_si128(reinterpret_cast<__m128i*>(&target_8[i << 4]),
|
||||||
|
_mm_or_si128(indices_vector, indices_are_reset_vector));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
if (index_format == IndexFormat::kInt32) {
|
||||||
|
for (uint32_t i = 0; i < index_count; ++i) {
|
||||||
|
uint32_t index = source_32[i];
|
||||||
|
target_32[i] = index == reset_index ? 0xFFFFFFFFu : index;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (uint32_t i = 0; i < index_count; ++i) {
|
||||||
|
uint16_t index = source_16[i];
|
||||||
|
target_16[i] = index == reset_index ? 0xFFFFu : index;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO(Triang3l): Replace primitive reset index in triangle and line strips.
|
|
||||||
// TODO(Triang3l): Line loops.
|
// TODO(Triang3l): Line loops.
|
||||||
|
|
||||||
|
// Cache and return the indices.
|
||||||
|
converted_indices.gpu_address = gpu_address;
|
||||||
|
converted_indices_cache_.insert(
|
||||||
|
std::make_pair(converted_indices.key.value, converted_indices));
|
||||||
|
memory_regions_used_ |= memory_regions_used_bits;
|
||||||
gpu_address_out = gpu_address;
|
gpu_address_out = gpu_address;
|
||||||
index_count_out = converted_index_count;
|
index_count_out = converted_index_count;
|
||||||
return ConversionResult::kConverted;
|
return ConversionResult::kConverted;
|
||||||
|
@ -277,6 +402,25 @@ void* PrimitiveConverter::AllocateIndices(
|
||||||
return mapping + simd_offset;
|
return mapping + simd_offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void PrimitiveConverter::MemoryWriteCallback(uint32_t page_first,
|
||||||
|
uint32_t page_last) {
|
||||||
|
// 1 bit = (512 / 64) MB = 8 MB. Invalidate a region of this size.
|
||||||
|
uint32_t bit_index_first = (page_first * system_page_size_) >> 23;
|
||||||
|
uint32_t bit_index_last = (page_last * system_page_size_) >> 23;
|
||||||
|
uint64_t bits = ~((1ull << bit_index_first) - 1);
|
||||||
|
if (bit_index_last < 63) {
|
||||||
|
bits &= (1ull << (bit_index_last + 1)) - 1;
|
||||||
|
}
|
||||||
|
memory_regions_invalidated_ |= bits;
|
||||||
|
}
|
||||||
|
|
||||||
|
void PrimitiveConverter::MemoryWriteCallbackThunk(void* context_ptr,
|
||||||
|
uint32_t page_first,
|
||||||
|
uint32_t page_last) {
|
||||||
|
reinterpret_cast<PrimitiveConverter*>(context_ptr)
|
||||||
|
->MemoryWriteCallback(page_first, page_last);
|
||||||
|
}
|
||||||
|
|
||||||
D3D12_GPU_VIRTUAL_ADDRESS PrimitiveConverter::GetStaticIndexBuffer(
|
D3D12_GPU_VIRTUAL_ADDRESS PrimitiveConverter::GetStaticIndexBuffer(
|
||||||
PrimitiveType source_type, uint32_t index_count,
|
PrimitiveType source_type, uint32_t index_count,
|
||||||
uint32_t& index_count_out) const {
|
uint32_t& index_count_out) const {
|
||||||
|
|
|
@ -14,11 +14,11 @@
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
|
|
||||||
#include "xenia/gpu/d3d12/shared_memory.h"
|
|
||||||
#include "xenia/gpu/register_file.h"
|
#include "xenia/gpu/register_file.h"
|
||||||
#include "xenia/gpu/xenos.h"
|
#include "xenia/gpu/xenos.h"
|
||||||
#include "xenia/memory.h"
|
#include "xenia/memory.h"
|
||||||
#include "xenia/ui/d3d12/d3d12_context.h"
|
#include "xenia/ui/d3d12/d3d12_context.h"
|
||||||
|
#include "xenia/ui/d3d12/pools.h"
|
||||||
|
|
||||||
namespace xe {
|
namespace xe {
|
||||||
namespace gpu {
|
namespace gpu {
|
||||||
|
@ -35,8 +35,7 @@ class D3D12CommandProcessor;
|
||||||
class PrimitiveConverter {
|
class PrimitiveConverter {
|
||||||
public:
|
public:
|
||||||
PrimitiveConverter(D3D12CommandProcessor* command_processor,
|
PrimitiveConverter(D3D12CommandProcessor* command_processor,
|
||||||
RegisterFile* register_file, Memory* memory,
|
RegisterFile* register_file, Memory* memory);
|
||||||
SharedMemory* shared_memory);
|
|
||||||
~PrimitiveConverter();
|
~PrimitiveConverter();
|
||||||
|
|
||||||
bool Initialize();
|
bool Initialize();
|
||||||
|
@ -78,8 +77,6 @@ class PrimitiveConverter {
|
||||||
D3D12_GPU_VIRTUAL_ADDRESS GetStaticIndexBuffer(
|
D3D12_GPU_VIRTUAL_ADDRESS GetStaticIndexBuffer(
|
||||||
PrimitiveType source_type, uint32_t index_count,
|
PrimitiveType source_type, uint32_t index_count,
|
||||||
uint32_t& index_count_out) const;
|
uint32_t& index_count_out) const;
|
||||||
// TODO(Triang3l): A function that returns a static index buffer for
|
|
||||||
// non-indexed drawing of unsupported primitives
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// simd_offset is source address & 15 - if SIMD is used, the source and the
|
// simd_offset is source address & 15 - if SIMD is used, the source and the
|
||||||
|
@ -89,10 +86,14 @@ class PrimitiveConverter {
|
||||||
uint32_t simd_offset,
|
uint32_t simd_offset,
|
||||||
D3D12_GPU_VIRTUAL_ADDRESS& gpu_address_out);
|
D3D12_GPU_VIRTUAL_ADDRESS& gpu_address_out);
|
||||||
|
|
||||||
|
// Callback for invalidating buffers mid-frame.
|
||||||
|
void MemoryWriteCallback(uint32_t page_first, uint32_t page_last);
|
||||||
|
static void MemoryWriteCallbackThunk(void* context_ptr, uint32_t page_first,
|
||||||
|
uint32_t page_last);
|
||||||
|
|
||||||
D3D12CommandProcessor* command_processor_;
|
D3D12CommandProcessor* command_processor_;
|
||||||
RegisterFile* register_file_;
|
RegisterFile* register_file_;
|
||||||
Memory* memory_;
|
Memory* memory_;
|
||||||
SharedMemory* shared_memory_;
|
|
||||||
|
|
||||||
std::unique_ptr<ui::d3d12::UploadBufferPool> buffer_pool_ = nullptr;
|
std::unique_ptr<ui::d3d12::UploadBufferPool> buffer_pool_ = nullptr;
|
||||||
|
|
||||||
|
@ -113,17 +114,56 @@ class PrimitiveConverter {
|
||||||
static constexpr uint32_t kStaticIBTotalCount =
|
static constexpr uint32_t kStaticIBTotalCount =
|
||||||
kStaticIBTriangleFanOffset + kStaticIBTriangleFanCount;
|
kStaticIBTriangleFanOffset + kStaticIBTriangleFanCount;
|
||||||
|
|
||||||
struct ConvertedIndices {
|
// Not identifying the index buffer uniquely - reset index must also be
|
||||||
D3D12_GPU_VIRTUAL_ADDRESS gpu_address;
|
// checked if reset is enabled.
|
||||||
PrimitiveType primitive_type;
|
union ConvertedIndicesKey {
|
||||||
uint32_t index_count;
|
uint64_t value;
|
||||||
IndexFormat index_format;
|
struct {
|
||||||
// Index pre-swapped - in guest storage endian.
|
uint32_t address; // 32
|
||||||
uint32_t reset_index;
|
PrimitiveType source_type : 6; // 38
|
||||||
bool reset;
|
IndexFormat format : 1; // 39
|
||||||
|
uint32_t count : 16; // 55
|
||||||
|
uint32_t reset : 1; // 56;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Clearing the unused bits.
|
||||||
|
ConvertedIndicesKey() : value(0) {}
|
||||||
|
ConvertedIndicesKey(const ConvertedIndicesKey& key) : value(key.value) {}
|
||||||
|
ConvertedIndicesKey& operator=(const ConvertedIndicesKey& key) {
|
||||||
|
value = key.value;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
bool operator==(const ConvertedIndicesKey& key) const {
|
||||||
|
return value == key.value;
|
||||||
|
}
|
||||||
|
bool operator!=(const ConvertedIndicesKey& key) const {
|
||||||
|
return value != key.value;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ConvertedIndices {
|
||||||
|
ConvertedIndicesKey key;
|
||||||
|
// If reset is enabled, this also must be checked to find cached indices.
|
||||||
|
uint32_t reset_index;
|
||||||
|
|
||||||
|
// Zero GPU address if conversion not needed or the resulting index buffer
|
||||||
|
// is empty.
|
||||||
|
D3D12_GPU_VIRTUAL_ADDRESS gpu_address;
|
||||||
|
// When conversion is not needed, this must be equal to the original index
|
||||||
|
// count.
|
||||||
|
uint32_t converted_index_count;
|
||||||
|
};
|
||||||
|
|
||||||
// Cache for a single frame.
|
// Cache for a single frame.
|
||||||
std::unordered_multimap<uint32_t, ConvertedIndices> converted_indices_;
|
std::unordered_multimap<uint64_t, ConvertedIndices> converted_indices_cache_;
|
||||||
|
|
||||||
|
// Very coarse cache invalidation - if something is modified in a 8 MB portion
|
||||||
|
// of the physical memory and converted indices are also there, invalidate all
|
||||||
|
// the cache.
|
||||||
|
uint64_t memory_regions_used_;
|
||||||
|
std::atomic<uint64_t> memory_regions_invalidated_ = 0;
|
||||||
|
void* physical_write_watch_handle_ = nullptr;
|
||||||
|
uint32_t system_page_size_;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace d3d12
|
} // namespace d3d12
|
||||||
|
|
|
@ -440,7 +440,7 @@ void TextureCache::RequestTextures(uint32_t used_vertex_texture_mask,
|
||||||
SCOPE_profile_cpu_f("gpu");
|
SCOPE_profile_cpu_f("gpu");
|
||||||
#endif // FINE_GRAINED_DRAW_SCOPES
|
#endif // FINE_GRAINED_DRAW_SCOPES
|
||||||
|
|
||||||
if (texture_invalidated_.exchange(false, std::memory_order_relaxed)) {
|
if (texture_invalidated_.exchange(false, std::memory_order_acquire)) {
|
||||||
// Clear the bindings not only for this draw call, but entirely, because
|
// Clear the bindings not only for this draw call, but entirely, because
|
||||||
// loading may be needed in some draw call later, which may have the same
|
// loading may be needed in some draw call later, which may have the same
|
||||||
// key for some binding as before the invalidation, but texture_invalidated_
|
// key for some binding as before the invalidation, but texture_invalidated_
|
||||||
|
@ -1297,7 +1297,7 @@ void TextureCache::WatchCallback(Texture* texture, bool is_mip) {
|
||||||
texture->base_in_sync = false;
|
texture->base_in_sync = false;
|
||||||
texture->base_watch_handle = nullptr;
|
texture->base_watch_handle = nullptr;
|
||||||
}
|
}
|
||||||
texture_invalidated_.store(true, std::memory_order_relaxed);
|
texture_invalidated_.store(true, std::memory_order_release);
|
||||||
}
|
}
|
||||||
|
|
||||||
void TextureCache::ClearBindings() {
|
void TextureCache::ClearBindings() {
|
||||||
|
|
|
@ -384,8 +384,7 @@ class TextureCache {
|
||||||
|
|
||||||
// Whether a texture has been invalidated (a watch has been triggered), so
|
// Whether a texture has been invalidated (a watch has been triggered), so
|
||||||
// need to try to reload textures, disregarding whether fetch constants have
|
// need to try to reload textures, disregarding whether fetch constants have
|
||||||
// been changed. A simple notification (texture validity is protected by a
|
// been changed.
|
||||||
// mutex), so memory_order_relaxed is enough.
|
|
||||||
std::atomic<bool> texture_invalidated_ = false;
|
std::atomic<bool> texture_invalidated_ = false;
|
||||||
|
|
||||||
// Unsupported texture formats used during this frame (for research and
|
// Unsupported texture formats used during this frame (for research and
|
||||||
|
|
Loading…
Reference in New Issue