[D3D12] Triangle fan conversion (untested, only indexed and without cache)

This commit is contained in:
Triang3l 2018-09-15 14:40:00 +03:00
parent a6304cd9fe
commit d1f185c744
4 changed files with 356 additions and 12 deletions

View File

@ -623,6 +623,10 @@ bool D3D12CommandProcessor::SetupContext() {
return false; return false;
} }
primitive_converter_ = std::make_unique<PrimitiveConverter>(
context, register_file_, memory_, shared_memory_.get());
primitive_converter_->Initialize();
D3D12_HEAP_PROPERTIES swap_texture_heap_properties = {}; D3D12_HEAP_PROPERTIES swap_texture_heap_properties = {};
swap_texture_heap_properties.Type = D3D12_HEAP_TYPE_DEFAULT; swap_texture_heap_properties.Type = D3D12_HEAP_TYPE_DEFAULT;
D3D12_RESOURCE_DESC swap_texture_desc; D3D12_RESOURCE_DESC swap_texture_desc;
@ -719,6 +723,8 @@ void D3D12CommandProcessor::ShutdownContext() {
view_heap_pool_.reset(); view_heap_pool_.reset();
constant_buffer_pool_.reset(); constant_buffer_pool_.reset();
primitive_converter_.reset();
render_target_cache_.reset(); render_target_cache_.reset();
texture_cache_.reset(); texture_cache_.reset();
@ -826,6 +832,8 @@ void D3D12CommandProcessor::PerformSwap(uint32_t frontbuffer_ptr,
view_heap_pool_->ClearCache(); view_heap_pool_->ClearCache();
constant_buffer_pool_->ClearCache(); constant_buffer_pool_->ClearCache();
primitive_converter_->ClearCache();
render_target_cache_->ClearCache(); render_target_cache_->ClearCache();
texture_cache_->ClearCache(); texture_cache_->ClearCache();
@ -945,8 +953,10 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
render_target_cache_->GetCurrentPipelineRenderTargets(); render_target_cache_->GetCurrentPipelineRenderTargets();
// Set the primitive topology. // Set the primitive topology.
PrimitiveType primitive_type_converted =
PrimitiveConverter::GetReplacementPrimitiveType(primitive_type);
D3D_PRIMITIVE_TOPOLOGY primitive_topology; D3D_PRIMITIVE_TOPOLOGY primitive_topology;
switch (primitive_type) { switch (primitive_type_converted) {
case PrimitiveType::kPointList: case PrimitiveType::kPointList:
primitive_topology = D3D_PRIMITIVE_TOPOLOGY_POINTLIST; primitive_topology = D3D_PRIMITIVE_TOPOLOGY_POINTLIST;
break; break;
@ -978,7 +988,7 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
ID3D12PipelineState* pipeline; ID3D12PipelineState* pipeline;
ID3D12RootSignature* root_signature; ID3D12RootSignature* root_signature;
auto pipeline_status = pipeline_cache_->ConfigurePipeline( auto pipeline_status = pipeline_cache_->ConfigurePipeline(
vertex_shader, pixel_shader, primitive_type, vertex_shader, pixel_shader, primitive_type_converted,
indexed ? index_buffer_info->format : IndexFormat::kInt16, indexed ? index_buffer_info->format : IndexFormat::kInt16,
pipeline_render_targets, &pipeline, &root_signature); pipeline_render_targets, &pipeline, &root_signature);
if (pipeline_status == PipelineCache::UpdateStatus::kError) { if (pipeline_status == PipelineCache::UpdateStatus::kError) {
@ -1031,27 +1041,47 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
regs[vfetch_constant_index + 1].u32 & 0x3FFFFFC); regs[vfetch_constant_index + 1].u32 & 0x3FFFFFC);
vertex_buffers_resident[vfetch_index >> 6] |= 1ull << (vfetch_index & 63); vertex_buffers_resident[vfetch_index >> 6] |= 1ull << (vfetch_index & 63);
} }
if (indexed) { if (indexed) {
uint32_t index_base = index_buffer_info->guest_base & 0x1FFFFFFF;
uint32_t index_size = index_buffer_info->format == IndexFormat::kInt32 uint32_t index_size = index_buffer_info->format == IndexFormat::kInt32
? sizeof(uint32_t) ? sizeof(uint32_t)
: sizeof(uint16_t); : sizeof(uint16_t);
index_base &= ~(index_size - 1); assert_false(index_buffer_info->guest_base & (index_size - 1));
uint32_t index_buffer_size = index_buffer_info->count * index_size; uint32_t index_base = index_buffer_info->guest_base & ~(index_size - 1);
shared_memory_->RequestRange(index_base, index_buffer_size);
shared_memory_->UseForReading();
D3D12_INDEX_BUFFER_VIEW index_buffer_view; D3D12_INDEX_BUFFER_VIEW index_buffer_view;
index_buffer_view.BufferLocation =
shared_memory_->GetGPUAddress() + index_base;
index_buffer_view.SizeInBytes = index_buffer_size;
index_buffer_view.Format = index_buffer_info->format == IndexFormat::kInt32 index_buffer_view.Format = index_buffer_info->format == IndexFormat::kInt32
? DXGI_FORMAT_R32_UINT ? DXGI_FORMAT_R32_UINT
: DXGI_FORMAT_R16_UINT; : DXGI_FORMAT_R16_UINT;
uint32_t converted_index_count;
PrimitiveConverter::ConversionResult conversion_result =
primitive_converter_->ConvertPrimitives(
primitive_type, index_buffer_info->guest_base,
index_buffer_info->count, index_buffer_info->format,
index_buffer_info->endianness, index_buffer_view.BufferLocation,
converted_index_count);
if (conversion_result == PrimitiveConverter::ConversionResult::kFailed) {
return false;
}
if (conversion_result ==
PrimitiveConverter::ConversionResult::kPrimitiveEmpty) {
return true;
}
if (conversion_result == PrimitiveConverter::ConversionResult::kConverted) {
index_buffer_view.SizeInBytes = converted_index_count * index_size;
} else {
uint32_t index_buffer_size = index_buffer_info->count * index_size;
shared_memory_->RequestRange(index_base, index_buffer_size);
index_buffer_view.BufferLocation =
shared_memory_->GetGPUAddress() + index_base;
index_buffer_view.SizeInBytes = index_buffer_size;
}
shared_memory_->UseForReading();
command_list->IASetIndexBuffer(&index_buffer_view); command_list->IASetIndexBuffer(&index_buffer_view);
SubmitBarriers(); SubmitBarriers();
command_list->DrawIndexedInstanced(index_count, 1, 0, 0, 0); command_list->DrawIndexedInstanced(index_count, 1, 0, 0, 0);
} else { } else {
// TODO(Triang3l): Get a static index buffer for unsupported primitive types
// from the primitive converter.
shared_memory_->UseForReading(); shared_memory_->UseForReading();
SubmitBarriers(); SubmitBarriers();
command_list->DrawInstanced(index_count, 1, 0, 0); command_list->DrawInstanced(index_count, 1, 0, 0);
@ -1134,6 +1164,8 @@ bool D3D12CommandProcessor::BeginFrame() {
render_target_cache_->BeginFrame(); render_target_cache_->BeginFrame();
primitive_converter_->BeginFrame(GetCurrentCommandList());
return true; return true;
} }
@ -1144,6 +1176,8 @@ bool D3D12CommandProcessor::EndFrame() {
assert_false(scratch_buffer_used_); assert_false(scratch_buffer_used_);
primitive_converter_->EndFrame();
render_target_cache_->EndFrame(); render_target_cache_->EndFrame();
shared_memory_->EndFrame(); shared_memory_->EndFrame();

View File

@ -17,6 +17,7 @@
#include "xenia/gpu/command_processor.h" #include "xenia/gpu/command_processor.h"
#include "xenia/gpu/d3d12/d3d12_graphics_system.h" #include "xenia/gpu/d3d12/d3d12_graphics_system.h"
#include "xenia/gpu/d3d12/pipeline_cache.h" #include "xenia/gpu/d3d12/pipeline_cache.h"
#include "xenia/gpu/d3d12/primitive_converter.h"
#include "xenia/gpu/d3d12/render_target_cache.h" #include "xenia/gpu/d3d12/render_target_cache.h"
#include "xenia/gpu/d3d12/shared_memory.h" #include "xenia/gpu/d3d12/shared_memory.h"
#include "xenia/gpu/d3d12/texture_cache.h" #include "xenia/gpu/d3d12/texture_cache.h"
@ -197,7 +198,7 @@ class D3D12CommandProcessor : public CommandProcessor {
bool cache_clear_requested_ = false; bool cache_clear_requested_ = false;
std::unique_ptr<ui::d3d12::CommandList> std::unique_ptr<ui::d3d12::CommandList>
command_lists_[ui::d3d12::D3D12Context::kQueuedFrames] = {}; command_lists_[ui::d3d12::D3D12Context::kQueuedFrames] = {};
std::unique_ptr<SharedMemory> shared_memory_ = nullptr; std::unique_ptr<SharedMemory> shared_memory_ = nullptr;
@ -210,6 +211,8 @@ class D3D12CommandProcessor : public CommandProcessor {
std::unique_ptr<RenderTargetCache> render_target_cache_ = nullptr; std::unique_ptr<RenderTargetCache> render_target_cache_ = nullptr;
std::unique_ptr<PrimitiveConverter> primitive_converter_ = nullptr;
std::unique_ptr<ui::d3d12::UploadBufferPool> constant_buffer_pool_ = nullptr; std::unique_ptr<ui::d3d12::UploadBufferPool> constant_buffer_pool_ = nullptr;
std::unique_ptr<ui::d3d12::DescriptorHeapPool> view_heap_pool_ = nullptr; std::unique_ptr<ui::d3d12::DescriptorHeapPool> view_heap_pool_ = nullptr;
std::unique_ptr<ui::d3d12::DescriptorHeapPool> sampler_heap_pool_ = nullptr; std::unique_ptr<ui::d3d12::DescriptorHeapPool> sampler_heap_pool_ = nullptr;

View File

@ -0,0 +1,199 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2018 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/gpu/d3d12/primitive_converter.h"
#include "xenia/base/logging.h"
#include "xenia/base/math.h"
#include "xenia/base/platform.h"
namespace xe {
namespace gpu {
namespace d3d12 {
PrimitiveConverter::PrimitiveConverter(ui::d3d12::D3D12Context* context,
RegisterFile* register_file,
Memory* memory,
SharedMemory* shared_memory)
: context_(context),
register_file_(register_file),
memory_(memory),
shared_memory_(shared_memory) {}
PrimitiveConverter::~PrimitiveConverter() { Shutdown(); }
void PrimitiveConverter::Initialize() {
// There can be at most 65535 indices in a Xenos draw call, but they can be up
// to 4 bytes large, and conversion can add more indices (almost triple the
// count for triangle strips, for instance).
buffer_pool_ =
std::make_unique<ui::d3d12::UploadBufferPool>(context_, 4 * 1024 * 1024);
}
void PrimitiveConverter::Shutdown() { buffer_pool_.reset(); }
void PrimitiveConverter::ClearCache() { buffer_pool_->ClearCache(); }
void PrimitiveConverter::BeginFrame(ID3D12GraphicsCommandList* command_list) {
buffer_pool_->BeginFrame();
// TODO(Triang3l): Create the static index buffer for unindexed triangle fans.
}
void PrimitiveConverter::EndFrame() { buffer_pool_->EndFrame(); }
PrimitiveType PrimitiveConverter::GetReplacementPrimitiveType(
PrimitiveType type) {
if (type == PrimitiveType::kTriangleFan) {
return PrimitiveType::kTriangleList;
}
return type;
}
PrimitiveConverter::ConversionResult PrimitiveConverter::ConvertPrimitives(
PrimitiveType source_type, uint32_t address, uint32_t index_count,
IndexFormat index_format, Endian index_endianness,
D3D12_GPU_VIRTUAL_ADDRESS& gpu_address_out, uint32_t& index_count_out) {
auto& regs = *register_file_;
bool reset = (regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & (1 << 21)) != 0;
// Swap the reset index because we will be comparing unswapped values to it.
uint32_t reset_index = xenos::GpuSwap(
regs[XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX].u32, index_endianness);
// If the specified reset index is the same as the one used by Direct3D 12
// (0xFFFF or 0xFFFFFFFF - in the pipeline cache, we use the former for
// 16-bit and the latter for 32-bit indices), we can use the buffer directly.
uint32_t reset_index_host =
index_format == IndexFormat::kInt32 ? 0xFFFFFFFFu : 0xFFFFu;
// Check if need to convert at all.
if (source_type != PrimitiveType::kTriangleFan) {
if (!reset || reset_index == reset_index_host) {
return ConversionResult::kConversionNotNeeded;
}
if (source_type != PrimitiveType::kTriangleStrip ||
source_type != PrimitiveType::kLineStrip) {
return ConversionResult::kConversionNotNeeded;
}
// TODO(Triang3l): Write conversion for triangle and line strip reset index
// and for indexed line loops.
return ConversionResult::kConversionNotNeeded;
}
// Exit early for clearly empty draws, without even reading the memory.
if (source_type == PrimitiveType::kTriangleFan ||
source_type == PrimitiveType::kTriangleStrip) {
if (index_count < 3) {
return ConversionResult::kPrimitiveEmpty;
}
} else if (source_type == PrimitiveType::kLineStrip ||
source_type == PrimitiveType::kLineLoop) {
if (index_count < 2) {
return ConversionResult::kPrimitiveEmpty;
}
}
// TODO(Triang3l): Find the converted data in the cache.
// Calculate the index count, and also check if there's nothing to convert in
// the buffer (for instance, if not using primitive reset).
uint32_t converted_index_count = 0;
bool conversion_needed = false;
bool simd = false;
if (source_type == PrimitiveType::kTriangleFan) {
// Triangle fans are not supported by Direct3D 12 at all.
conversion_needed = true;
if (reset) {
// TODO(Triang3l): Triangle fans with primitive reset.
return ConversionResult::kFailed;
} else {
converted_index_count = 3 * (index_count - 2);
}
}
union {
void* source;
uint16_t* source_16;
uint32_t* source_32;
};
source = memory_->TranslatePhysical(address);
union {
void* target;
uint16_t* target_16;
uint32_t* target_32;
};
D3D12_GPU_VIRTUAL_ADDRESS gpu_address;
target = AllocateIndices(index_format, index_count, simd ? address & 15 : 0,
gpu_address);
if (target == nullptr) {
return ConversionResult::kFailed;
}
if (source_type == PrimitiveType::kTriangleFan) {
// https://docs.microsoft.com/en-us/windows/desktop/direct3d9/triangle-fans
// Ordered as (v1, v2, v0), (v2, v3, v0).
if (reset) {
// TODO(Triang3l): Triangle fans with primitive restart.
return ConversionResult::kFailed;
} else {
if (index_format == IndexFormat::kInt32) {
for (uint32_t i = 2; i < index_count; ++i) {
*(target_32++) = source_32[i];
*(target_32++) = source_32[i - 1];
*(target_32++) = source_32[0];
}
} else {
for (uint32_t i = 2; i < index_count; ++i) {
*(target_16++) = source_16[i];
*(target_16++) = source_16[i - 1];
*(target_16++) = source_16[0];
}
}
}
}
// TODO(Triang3l): Replace primitive reset index in triangle and line strips.
// TODO(Triang3l): Line loops.
gpu_address_out = gpu_address;
index_count_out = converted_index_count;
return ConversionResult::kConverted;
}
void* PrimitiveConverter::AllocateIndices(
IndexFormat format, uint32_t count, uint32_t simd_offset,
D3D12_GPU_VIRTUAL_ADDRESS& gpu_address_out) {
if (count == 0) {
return nullptr;
}
uint32_t size = count * (format == IndexFormat::kInt32 ? sizeof(uint32_t)
: sizeof(uint16_t));
// 16-align all index data because SIMD is used to replace the reset index
// (without that, 4-alignment would be required anyway to mix 16-bit and
// 32-bit indices in one buffer page).
size = xe::align(size, uint32_t(16));
// Add some space to align SIMD register components the same way in the source
// and the buffer.
simd_offset &= 15;
if (simd_offset != 0) {
size += 16;
}
D3D12_GPU_VIRTUAL_ADDRESS gpu_address;
uint8_t* mapping =
buffer_pool_->RequestFull(size, nullptr, nullptr, &gpu_address);
if (mapping == nullptr) {
XELOGE("Failed to allocate space for %u converted %u-bit vertex indices",
count, format == IndexFormat::kInt32 ? 32 : 16);
return nullptr;
}
gpu_address_out = gpu_address + simd_offset;
return mapping + simd_offset;
}
} // namespace d3d12
} // namespace gpu
} // namespace xe

View File

@ -0,0 +1,108 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2018 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_D3D12_PRIMITIVE_CONVERTER_H_
#define XENIA_GPU_D3D12_PRIMITIVE_CONVERTER_H_
#include <atomic>
#include <memory>
#include <unordered_map>
#include "xenia/gpu/d3d12/shared_memory.h"
#include "xenia/gpu/register_file.h"
#include "xenia/gpu/xenos.h"
#include "xenia/memory.h"
#include "xenia/ui/d3d12/d3d12_context.h"
namespace xe {
namespace gpu {
namespace d3d12 {
// Index buffer cache for primitive types not natively supported by Direct3D 12:
// - Triangle and line strips with non-0xFFFF/0xFFFFFFFF reset index.
// - Triangle fans.
// - Line loops (only indexed ones - non-indexed are better handled in vertex
// shaders, otherwise a whole index buffer would have to be created for every
// vertex count value).
class PrimitiveConverter {
public:
PrimitiveConverter(ui::d3d12::D3D12Context* context,
RegisterFile* register_file, Memory* memory,
SharedMemory* shared_memory);
~PrimitiveConverter();
void Initialize();
void Shutdown();
void ClearCache();
void BeginFrame(ID3D12GraphicsCommandList* command_list);
void EndFrame();
// Returns the primitive type that the original type will be converted to.
static PrimitiveType GetReplacementPrimitiveType(PrimitiveType type);
enum class ConversionResult {
// Converted to a transient buffer.
kConverted,
// Conversion not required - use the index buffer in shared memory.
kConversionNotNeeded,
// No errors, but nothing to render.
kPrimitiveEmpty,
// Total failure of the draw call.
kFailed
};
// Converts an index buffer to the primitive type returned by
// GetReplacementPrimitiveType. If conversion has been performed, the returned
// buffer will be in the GENERIC_READ state (it's in an upload heap). Only
// writing to the outputs if returning kConverted. The restart index will be
// handled internally from the register values.
ConversionResult ConvertPrimitives(PrimitiveType source_type,
uint32_t address, uint32_t index_count,
IndexFormat index_format,
Endian index_endianness,
D3D12_GPU_VIRTUAL_ADDRESS& gpu_address_out,
uint32_t& index_count_out);
// TODO(Triang3l): A function that returns a static index buffer for
// non-indexed drawing of unsupported primitives
private:
// simd_offset is source address & 15 - if SIMD is used, the source and the
// target must have the same alignment within one register. 0 is optimal when
// not using SIMD.
void* AllocateIndices(IndexFormat format, uint32_t count,
uint32_t simd_offset,
D3D12_GPU_VIRTUAL_ADDRESS& gpu_address_out);
ui::d3d12::D3D12Context* context_;
RegisterFile* register_file_;
Memory* memory_;
SharedMemory* shared_memory_;
std::unique_ptr<ui::d3d12::UploadBufferPool> buffer_pool_ = nullptr;
struct ConvertedIndices {
D3D12_GPU_VIRTUAL_ADDRESS gpu_address;
PrimitiveType primitive_type;
uint32_t index_count;
IndexFormat index_format;
// Index pre-swapped - in guest storage endian.
uint32_t reset_index;
bool reset;
};
// Cache for a single frame.
std::unordered_multimap<uint32_t, ConvertedIndices> converted_indices_;
};
} // namespace d3d12
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_D3D12_PRIMITIVE_CONVERTER_H_