[D3D12] Request memory for memexport in shared memory

This commit is contained in:
Triang3l 2018-12-22 00:57:31 +03:00
parent 0aeff797e5
commit 6025599d3b
2 changed files with 208 additions and 27 deletions

View File

@ -1155,12 +1155,8 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
if ((regs[XE_GPU_REG_RB_SURFACE_INFO].u32 & 0x3FFF) == 0) { if ((regs[XE_GPU_REG_RB_SURFACE_INFO].u32 & 0x3FFF) == 0) {
// Doesn't actually draw. // Doesn't actually draw.
return true; // TODO(Triang3l): Do something so memexport still works in this case maybe?
} // Unlikely that zero would even really be legal though.
if ((regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & 0x3) == 0x3 &&
primitive_type != PrimitiveType::kPointList &&
primitive_type != PrimitiveType::kRectangleList) {
// Both sides are culled - can't reproduce this with rasterizer state.
return true; return true;
} }
@ -1179,17 +1175,36 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
// Need a pixel shader in normal color mode. // Need a pixel shader in normal color mode.
return false; return false;
} }
// Translate shaders now because to get the color mask, which is needed by the // Translate the shaders now to get memexport configuration and color mask,
// render target cache. // which is needed by the render target cache, and also to get used textures
// and samplers.
if (!pipeline_cache_->EnsureShadersTranslated(vertex_shader, pixel_shader, if (!pipeline_cache_->EnsureShadersTranslated(vertex_shader, pixel_shader,
primitive_type)) { primitive_type)) {
return false; return false;
} }
// Check if memexport is used. If it is, we can't skip draw calls that have no
// visual effect.
bool memexport_used_vertex =
!vertex_shader->memexport_stream_constants().empty();
bool memexport_used_pixel =
pixel_shader != nullptr &&
!pixel_shader->memexport_stream_constants().empty();
bool memexport_used = memexport_used_vertex || memexport_used_pixel;
if (!memexport_used_vertex &&
(regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & 0x3) == 0x3 &&
primitive_type != PrimitiveType::kPointList &&
primitive_type != PrimitiveType::kRectangleList) {
// Both sides are culled - can't reproduce this with rasterizer state.
return true;
}
uint32_t color_mask = GetCurrentColorMask(pixel_shader); uint32_t color_mask = GetCurrentColorMask(pixel_shader);
uint32_t rb_depthcontrol = regs[XE_GPU_REG_RB_DEPTHCONTROL].u32; uint32_t rb_depthcontrol = regs[XE_GPU_REG_RB_DEPTHCONTROL].u32;
uint32_t rb_stencilrefmask = regs[XE_GPU_REG_RB_STENCILREFMASK].u32; uint32_t rb_stencilrefmask = regs[XE_GPU_REG_RB_STENCILREFMASK].u32;
if (!color_mask && ((rb_depthcontrol & (0x2 | 0x4)) != (0x2 | 0x4)) && if (!memexport_used && !color_mask &&
((rb_depthcontrol & (0x2 | 0x4)) != (0x2 | 0x4)) &&
(!(rb_depthcontrol & 0x1) || !(rb_stencilrefmask & (0xFF << 16)))) { (!(rb_depthcontrol & 0x1) || !(rb_stencilrefmask & (0xFF << 16)))) {
// Not writing to color, depth or stencil, so doesn't draw. // Not writing to color, depth or stencil, so doesn't draw.
return true; return true;
@ -1201,11 +1216,14 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
// Set up the render targets - this may bind pipelines. // Set up the render targets - this may bind pipelines.
if (!render_target_cache_->UpdateRenderTargets(pixel_shader)) { if (!render_target_cache_->UpdateRenderTargets(pixel_shader)) {
// Doesn't actually draw. // Doesn't actually draw.
// TODO(Triang3l): Do something so memexport still works in this case maybe?
// Not distingushing between no operation and a true failure.
return true; return true;
} }
const RenderTargetCache::PipelineRenderTarget* pipeline_render_targets = const RenderTargetCache::PipelineRenderTarget* pipeline_render_targets =
render_target_cache_->GetCurrentPipelineRenderTargets(); render_target_cache_->GetCurrentPipelineRenderTargets();
// Set up primitive topology.
bool indexed = index_buffer_info != nullptr && index_buffer_info->guest_base; bool indexed = index_buffer_info != nullptr && index_buffer_info->guest_base;
// Adaptive tessellation requires an index buffer, but it contains per-edge // Adaptive tessellation requires an index buffer, but it contains per-edge
// tessellation factors (as floats) in it instead of control point indices. // tessellation factors (as floats) in it instead of control point indices.
@ -1250,14 +1268,11 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
} else { } else {
adaptive_tessellation = false; adaptive_tessellation = false;
} }
// TODO(Triang3l): Non-indexed line loops (by movc'ing zero to the vertex // TODO(Triang3l): Non-indexed line loops (by movc'ing zero to the vertex
// index if it's one beyond the end). // index if it's one beyond the end).
if (primitive_type == PrimitiveType::kLineLoop && !indexed) { if (primitive_type == PrimitiveType::kLineLoop && !indexed) {
return false; return false;
} }
// Set the primitive topology.
PrimitiveType primitive_type_converted = PrimitiveType primitive_type_converted =
PrimitiveConverter::GetReplacementPrimitiveType(primitive_type); PrimitiveConverter::GetReplacementPrimitiveType(primitive_type);
D3D_PRIMITIVE_TOPOLOGY primitive_topology; D3D_PRIMITIVE_TOPOLOGY primitive_topology;
@ -1295,7 +1310,12 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
command_list->IASetPrimitiveTopology(primitive_topology); command_list->IASetPrimitiveTopology(primitive_topology);
} }
// Get the pipeline and translate the shaders so used textures are known. // Update the textures - this may bind pipelines.
texture_cache_->RequestTextures(
vertex_shader->GetUsedTextureMask(),
pixel_shader != nullptr ? pixel_shader->GetUsedTextureMask() : 0);
// Create the pipeline if needed and bind it.
ID3D12PipelineState* pipeline; ID3D12PipelineState* pipeline;
ID3D12RootSignature* root_signature; ID3D12RootSignature* root_signature;
auto pipeline_status = pipeline_cache_->ConfigurePipeline( auto pipeline_status = pipeline_cache_->ConfigurePipeline(
@ -1305,21 +1325,14 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
if (pipeline_status == PipelineCache::UpdateStatus::kError) { if (pipeline_status == PipelineCache::UpdateStatus::kError) {
return false; return false;
} }
// Update the textures - this may bind pipelines.
texture_cache_->RequestTextures(
vertex_shader->GetUsedTextureMask(),
pixel_shader != nullptr ? pixel_shader->GetUsedTextureMask() : 0);
// Update viewport, scissor, blend factor and stencil reference.
UpdateFixedFunctionState(command_list);
// Bind the pipeline.
if (current_pipeline_ != pipeline) { if (current_pipeline_ != pipeline) {
GetCurrentCommandList()->SetPipelineState(pipeline); GetCurrentCommandList()->SetPipelineState(pipeline);
current_pipeline_ = pipeline; current_pipeline_ = pipeline;
} }
// Update viewport, scissor, blend factor and stencil reference.
UpdateFixedFunctionState(command_list);
// Update system constants before uploading them. // Update system constants before uploading them.
UpdateSystemConstantValues( UpdateSystemConstantValues(
primitive_type, primitive_type,
@ -1350,16 +1363,139 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
regs[vfetch_constant_index + 1].u32); regs[vfetch_constant_index + 1].u32);
return false; return false;
} }
shared_memory_->RequestRange( if (!shared_memory_->RequestRange(
regs[vfetch_constant_index].u32 & 0x1FFFFFFC,
regs[vfetch_constant_index + 1].u32 & 0x3FFFFFC)) {
XELOGE(
"Failed to request vertex buffer at 0x%.8X (size %u) in the shared "
"memory",
regs[vfetch_constant_index].u32 & 0x1FFFFFFC, regs[vfetch_constant_index].u32 & 0x1FFFFFFC,
regs[vfetch_constant_index + 1].u32 & 0x3FFFFFC); regs[vfetch_constant_index + 1].u32 & 0x3FFFFFC);
return false;
}
vertex_buffers_resident[vfetch_index >> 6] |= 1ull << (vfetch_index & 63); vertex_buffers_resident[vfetch_index >> 6] |= 1ull << (vfetch_index & 63);
} }
// Gather memexport ranges and ensure the heaps for them are resident, and
// also load the data surrounding the export and to fill the regions that
// won't be modified by the shaders.
struct MemExportRange {
uint32_t base_address_dwords;
uint32_t size_dwords;
};
MemExportRange memexport_ranges[512];
uint32_t memexport_range_count = 0;
if (memexport_used_vertex) {
const std::vector<uint32_t>& memexport_stream_constants_vertex =
vertex_shader->memexport_stream_constants();
for (uint32_t constant_index : memexport_stream_constants_vertex) {
const xenos::xe_gpu_memexport_stream_t* memexport_stream =
reinterpret_cast<const xenos::xe_gpu_memexport_stream_t*>(
&regs[XE_GPU_REG_SHADER_CONSTANT_000_X + constant_index * 4]);
if (memexport_stream->index_count == 0) {
continue;
}
uint32_t memexport_format_size =
GetSupportedMemExportFormatSize(memexport_stream->format);
if (memexport_format_size == 0) {
XELOGE(
"Unsupported memexport format %s",
FormatInfo::Get(TextureFormat(uint32_t(memexport_stream->format)))
->name);
return false;
}
uint32_t memexport_base_address = memexport_stream->base_address;
uint32_t memexport_size_dwords =
memexport_stream->index_count * memexport_format_size;
// Try to reduce the number of shared memory operations when writing
// different elements into the same buffer through different exports
// (happens in Halo 3).
bool memexport_range_reused = false;
for (uint32_t i = 0; i < memexport_range_count; ++i) {
MemExportRange& memexport_range = memexport_ranges[i];
if (memexport_range.base_address_dwords == memexport_base_address) {
memexport_range.size_dwords =
std::max(memexport_range.size_dwords, memexport_size_dwords);
memexport_range_reused = true;
break;
}
}
// Add a new range if haven't expanded an existing one.
if (!memexport_range_reused) {
MemExportRange& memexport_range =
memexport_ranges[memexport_range_count++];
memexport_range.base_address_dwords = memexport_base_address;
memexport_range.size_dwords = memexport_size_dwords;
}
}
}
if (memexport_used_pixel) {
const std::vector<uint32_t>& memexport_stream_constants_pixel =
pixel_shader->memexport_stream_constants();
for (uint32_t constant_index : memexport_stream_constants_pixel) {
const xenos::xe_gpu_memexport_stream_t* memexport_stream =
reinterpret_cast<const xenos::xe_gpu_memexport_stream_t*>(
&regs[XE_GPU_REG_SHADER_CONSTANT_256_X + constant_index * 4]);
if (memexport_stream->index_count == 0) {
continue;
}
uint32_t memexport_format_size =
GetSupportedMemExportFormatSize(memexport_stream->format);
if (memexport_format_size == 0) {
XELOGE(
"Unsupported memexport format %s",
FormatInfo::Get(TextureFormat(uint32_t(memexport_stream->format)))
->name);
return false;
}
uint32_t memexport_base_address = memexport_stream->base_address;
uint32_t memexport_size_dwords =
memexport_stream->index_count * memexport_format_size;
bool memexport_range_reused = false;
for (uint32_t i = 0; i < memexport_range_count; ++i) {
MemExportRange& memexport_range = memexport_ranges[i];
if (memexport_range.base_address_dwords == memexport_base_address) {
memexport_range.size_dwords =
std::max(memexport_range.size_dwords, memexport_size_dwords);
memexport_range_reused = true;
break;
}
}
if (!memexport_range_reused) {
MemExportRange& memexport_range =
memexport_ranges[memexport_range_count++];
memexport_range.base_address_dwords = memexport_base_address;
memexport_range.size_dwords = memexport_size_dwords;
}
}
}
for (uint32_t i = 0; i < memexport_range_count; ++i) {
const MemExportRange& memexport_range = memexport_ranges[i];
if (!shared_memory_->RequestRange(memexport_range.base_address_dwords << 2,
memexport_range.size_dwords << 2)) {
XELOGE(
"Failed to request memexport stream at 0x%.8X (size %u) in the "
"shared memory",
memexport_range.base_address_dwords << 2,
memexport_range.size_dwords << 2);
return false;
}
}
if (IsROVUsedForEDRAM()) { if (IsROVUsedForEDRAM()) {
render_target_cache_->UseEDRAMAsUAV(); render_target_cache_->UseEDRAMAsUAV();
} }
// TODO(Triang3l): Copy the index buffer to a scratch buffer if using
// memexport with an index buffer, because a resource can't be an index buffer
// (read-only) and a UAV (read/write) at once.
// Actually draw.
if (indexed) { if (indexed) {
if (memexport_used) {
// TODO(Triang3l): Index buffer copying for memexport.
return false;
}
uint32_t index_size = index_buffer_info->format == IndexFormat::kInt32 uint32_t index_size = index_buffer_info->format == IndexFormat::kInt32
? sizeof(uint32_t) ? sizeof(uint32_t)
: sizeof(uint16_t); : sizeof(uint16_t);
@ -1388,7 +1524,13 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
index_count = converted_index_count; index_count = converted_index_count;
} else { } else {
uint32_t index_buffer_size = index_buffer_info->count * index_size; uint32_t index_buffer_size = index_buffer_info->count * index_size;
shared_memory_->RequestRange(index_base, index_buffer_size); if (!shared_memory_->RequestRange(index_base, index_buffer_size)) {
XELOGE(
"Failed to request index buffer at 0x%.8X (size %u) in the shared "
"memory",
index_base, index_buffer_size);
return false;
}
index_buffer_view.BufferLocation = index_buffer_view.BufferLocation =
shared_memory_->GetGPUAddress() + index_base; shared_memory_->GetGPUAddress() + index_base;
index_buffer_view.SizeInBytes = index_buffer_size; index_buffer_view.SizeInBytes = index_buffer_size;
@ -1422,6 +1564,8 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
} }
} }
// TODO(Triang3l): Read back memexported data if the respective gflag is set.
return true; return true;
} }
@ -2855,6 +2999,36 @@ bool D3D12CommandProcessor::UpdateBindings(
return true; return true;
} }
uint32_t D3D12CommandProcessor::GetSupportedMemExportFormatSize(
ColorFormat format) {
switch (format) {
case ColorFormat::k_8_8_8_8:
case ColorFormat::k_2_10_10_10:
// TODO(Triang3l): Investigate how k_8_8_8_8_A works - not supported in the
// texture cache currently.
// case ColorFormat::k_8_8_8_8_A:
case ColorFormat::k_10_11_11:
case ColorFormat::k_11_11_10:
case ColorFormat::k_16_16:
case ColorFormat::k_16_16_FLOAT:
case ColorFormat::k_32_FLOAT:
case ColorFormat::k_8_8_8_8_AS_16_16_16_16:
case ColorFormat::k_2_10_10_10_AS_16_16_16_16:
case ColorFormat::k_10_11_11_AS_16_16_16_16:
case ColorFormat::k_11_11_10_AS_16_16_16_16:
return 1;
case ColorFormat::k_16_16_16_16:
case ColorFormat::k_16_16_16_16_FLOAT:
case ColorFormat::k_32_32_FLOAT:
return 2;
case ColorFormat::k_32_32_32_32_FLOAT:
return 4;
default:
break;
}
return 0;
}
} // namespace d3d12 } // namespace d3d12
} // namespace gpu } // namespace gpu
} // namespace xe } // namespace xe

View File

@ -209,6 +209,13 @@ class D3D12CommandProcessor : public CommandProcessor {
const D3D12Shader* pixel_shader, const D3D12Shader* pixel_shader,
ID3D12RootSignature* root_signature); ID3D12RootSignature* root_signature);
// Returns dword count for one element for a memexport format, or 0 if it's
// not supported by the D3D12 command processor (if it's smaller that 1 dword,
// for instance).
// TODO(Triang3l): Check if any game uses memexport with formats smaller than
// 32 bits per element.
static uint32_t GetSupportedMemExportFormatSize(ColorFormat format);
bool cache_clear_requested_ = false; bool cache_clear_requested_ = false;
std::unique_ptr<ui::d3d12::CommandList> std::unique_ptr<ui::d3d12::CommandList>