[D3D12] Request memory for memexport in shared memory
This commit is contained in:
parent
0aeff797e5
commit
6025599d3b
|
@ -1155,12 +1155,8 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
|||
|
||||
if ((regs[XE_GPU_REG_RB_SURFACE_INFO].u32 & 0x3FFF) == 0) {
|
||||
// Doesn't actually draw.
|
||||
return true;
|
||||
}
|
||||
if ((regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & 0x3) == 0x3 &&
|
||||
primitive_type != PrimitiveType::kPointList &&
|
||||
primitive_type != PrimitiveType::kRectangleList) {
|
||||
// Both sides are culled - can't reproduce this with rasterizer state.
|
||||
// TODO(Triang3l): Do something so memexport still works in this case maybe?
|
||||
// Unlikely that zero would even really be legal though.
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1179,17 +1175,36 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
|||
// Need a pixel shader in normal color mode.
|
||||
return false;
|
||||
}
|
||||
// Translate shaders now because to get the color mask, which is needed by the
|
||||
// render target cache.
|
||||
// Translate the shaders now to get memexport configuration and color mask,
|
||||
// which is needed by the render target cache, and also to get used textures
|
||||
// and samplers.
|
||||
if (!pipeline_cache_->EnsureShadersTranslated(vertex_shader, pixel_shader,
|
||||
primitive_type)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check if memexport is used. If it is, we can't skip draw calls that have no
|
||||
// visual effect.
|
||||
bool memexport_used_vertex =
|
||||
!vertex_shader->memexport_stream_constants().empty();
|
||||
bool memexport_used_pixel =
|
||||
pixel_shader != nullptr &&
|
||||
!pixel_shader->memexport_stream_constants().empty();
|
||||
bool memexport_used = memexport_used_vertex || memexport_used_pixel;
|
||||
|
||||
if (!memexport_used_vertex &&
|
||||
(regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & 0x3) == 0x3 &&
|
||||
primitive_type != PrimitiveType::kPointList &&
|
||||
primitive_type != PrimitiveType::kRectangleList) {
|
||||
// Both sides are culled - can't reproduce this with rasterizer state.
|
||||
return true;
|
||||
}
|
||||
|
||||
uint32_t color_mask = GetCurrentColorMask(pixel_shader);
|
||||
uint32_t rb_depthcontrol = regs[XE_GPU_REG_RB_DEPTHCONTROL].u32;
|
||||
uint32_t rb_stencilrefmask = regs[XE_GPU_REG_RB_STENCILREFMASK].u32;
|
||||
if (!color_mask && ((rb_depthcontrol & (0x2 | 0x4)) != (0x2 | 0x4)) &&
|
||||
if (!memexport_used && !color_mask &&
|
||||
((rb_depthcontrol & (0x2 | 0x4)) != (0x2 | 0x4)) &&
|
||||
(!(rb_depthcontrol & 0x1) || !(rb_stencilrefmask & (0xFF << 16)))) {
|
||||
// Not writing to color, depth or stencil, so doesn't draw.
|
||||
return true;
|
||||
|
@ -1201,11 +1216,14 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
|||
// Set up the render targets - this may bind pipelines.
|
||||
if (!render_target_cache_->UpdateRenderTargets(pixel_shader)) {
|
||||
// Doesn't actually draw.
|
||||
// TODO(Triang3l): Do something so memexport still works in this case maybe?
|
||||
// Not distingushing between no operation and a true failure.
|
||||
return true;
|
||||
}
|
||||
const RenderTargetCache::PipelineRenderTarget* pipeline_render_targets =
|
||||
render_target_cache_->GetCurrentPipelineRenderTargets();
|
||||
|
||||
// Set up primitive topology.
|
||||
bool indexed = index_buffer_info != nullptr && index_buffer_info->guest_base;
|
||||
// Adaptive tessellation requires an index buffer, but it contains per-edge
|
||||
// tessellation factors (as floats) in it instead of control point indices.
|
||||
|
@ -1250,14 +1268,11 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
|||
} else {
|
||||
adaptive_tessellation = false;
|
||||
}
|
||||
|
||||
// TODO(Triang3l): Non-indexed line loops (by movc'ing zero to the vertex
|
||||
// index if it's one beyond the end).
|
||||
if (primitive_type == PrimitiveType::kLineLoop && !indexed) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Set the primitive topology.
|
||||
PrimitiveType primitive_type_converted =
|
||||
PrimitiveConverter::GetReplacementPrimitiveType(primitive_type);
|
||||
D3D_PRIMITIVE_TOPOLOGY primitive_topology;
|
||||
|
@ -1295,7 +1310,12 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
|||
command_list->IASetPrimitiveTopology(primitive_topology);
|
||||
}
|
||||
|
||||
// Get the pipeline and translate the shaders so used textures are known.
|
||||
// Update the textures - this may bind pipelines.
|
||||
texture_cache_->RequestTextures(
|
||||
vertex_shader->GetUsedTextureMask(),
|
||||
pixel_shader != nullptr ? pixel_shader->GetUsedTextureMask() : 0);
|
||||
|
||||
// Create the pipeline if needed and bind it.
|
||||
ID3D12PipelineState* pipeline;
|
||||
ID3D12RootSignature* root_signature;
|
||||
auto pipeline_status = pipeline_cache_->ConfigurePipeline(
|
||||
|
@ -1305,21 +1325,14 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
|||
if (pipeline_status == PipelineCache::UpdateStatus::kError) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Update the textures - this may bind pipelines.
|
||||
texture_cache_->RequestTextures(
|
||||
vertex_shader->GetUsedTextureMask(),
|
||||
pixel_shader != nullptr ? pixel_shader->GetUsedTextureMask() : 0);
|
||||
|
||||
// Update viewport, scissor, blend factor and stencil reference.
|
||||
UpdateFixedFunctionState(command_list);
|
||||
|
||||
// Bind the pipeline.
|
||||
if (current_pipeline_ != pipeline) {
|
||||
GetCurrentCommandList()->SetPipelineState(pipeline);
|
||||
current_pipeline_ = pipeline;
|
||||
}
|
||||
|
||||
// Update viewport, scissor, blend factor and stencil reference.
|
||||
UpdateFixedFunctionState(command_list);
|
||||
|
||||
// Update system constants before uploading them.
|
||||
UpdateSystemConstantValues(
|
||||
primitive_type,
|
||||
|
@ -1350,16 +1363,139 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
|||
regs[vfetch_constant_index + 1].u32);
|
||||
return false;
|
||||
}
|
||||
shared_memory_->RequestRange(
|
||||
regs[vfetch_constant_index].u32 & 0x1FFFFFFC,
|
||||
regs[vfetch_constant_index + 1].u32 & 0x3FFFFFC);
|
||||
if (!shared_memory_->RequestRange(
|
||||
regs[vfetch_constant_index].u32 & 0x1FFFFFFC,
|
||||
regs[vfetch_constant_index + 1].u32 & 0x3FFFFFC)) {
|
||||
XELOGE(
|
||||
"Failed to request vertex buffer at 0x%.8X (size %u) in the shared "
|
||||
"memory",
|
||||
regs[vfetch_constant_index].u32 & 0x1FFFFFFC,
|
||||
regs[vfetch_constant_index + 1].u32 & 0x3FFFFFC);
|
||||
return false;
|
||||
}
|
||||
vertex_buffers_resident[vfetch_index >> 6] |= 1ull << (vfetch_index & 63);
|
||||
}
|
||||
|
||||
// Gather memexport ranges and ensure the heaps for them are resident, and
|
||||
// also load the data surrounding the export and to fill the regions that
|
||||
// won't be modified by the shaders.
|
||||
struct MemExportRange {
|
||||
uint32_t base_address_dwords;
|
||||
uint32_t size_dwords;
|
||||
};
|
||||
MemExportRange memexport_ranges[512];
|
||||
uint32_t memexport_range_count = 0;
|
||||
if (memexport_used_vertex) {
|
||||
const std::vector<uint32_t>& memexport_stream_constants_vertex =
|
||||
vertex_shader->memexport_stream_constants();
|
||||
for (uint32_t constant_index : memexport_stream_constants_vertex) {
|
||||
const xenos::xe_gpu_memexport_stream_t* memexport_stream =
|
||||
reinterpret_cast<const xenos::xe_gpu_memexport_stream_t*>(
|
||||
®s[XE_GPU_REG_SHADER_CONSTANT_000_X + constant_index * 4]);
|
||||
if (memexport_stream->index_count == 0) {
|
||||
continue;
|
||||
}
|
||||
uint32_t memexport_format_size =
|
||||
GetSupportedMemExportFormatSize(memexport_stream->format);
|
||||
if (memexport_format_size == 0) {
|
||||
XELOGE(
|
||||
"Unsupported memexport format %s",
|
||||
FormatInfo::Get(TextureFormat(uint32_t(memexport_stream->format)))
|
||||
->name);
|
||||
return false;
|
||||
}
|
||||
uint32_t memexport_base_address = memexport_stream->base_address;
|
||||
uint32_t memexport_size_dwords =
|
||||
memexport_stream->index_count * memexport_format_size;
|
||||
// Try to reduce the number of shared memory operations when writing
|
||||
// different elements into the same buffer through different exports
|
||||
// (happens in Halo 3).
|
||||
bool memexport_range_reused = false;
|
||||
for (uint32_t i = 0; i < memexport_range_count; ++i) {
|
||||
MemExportRange& memexport_range = memexport_ranges[i];
|
||||
if (memexport_range.base_address_dwords == memexport_base_address) {
|
||||
memexport_range.size_dwords =
|
||||
std::max(memexport_range.size_dwords, memexport_size_dwords);
|
||||
memexport_range_reused = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Add a new range if haven't expanded an existing one.
|
||||
if (!memexport_range_reused) {
|
||||
MemExportRange& memexport_range =
|
||||
memexport_ranges[memexport_range_count++];
|
||||
memexport_range.base_address_dwords = memexport_base_address;
|
||||
memexport_range.size_dwords = memexport_size_dwords;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (memexport_used_pixel) {
|
||||
const std::vector<uint32_t>& memexport_stream_constants_pixel =
|
||||
pixel_shader->memexport_stream_constants();
|
||||
for (uint32_t constant_index : memexport_stream_constants_pixel) {
|
||||
const xenos::xe_gpu_memexport_stream_t* memexport_stream =
|
||||
reinterpret_cast<const xenos::xe_gpu_memexport_stream_t*>(
|
||||
®s[XE_GPU_REG_SHADER_CONSTANT_256_X + constant_index * 4]);
|
||||
if (memexport_stream->index_count == 0) {
|
||||
continue;
|
||||
}
|
||||
uint32_t memexport_format_size =
|
||||
GetSupportedMemExportFormatSize(memexport_stream->format);
|
||||
if (memexport_format_size == 0) {
|
||||
XELOGE(
|
||||
"Unsupported memexport format %s",
|
||||
FormatInfo::Get(TextureFormat(uint32_t(memexport_stream->format)))
|
||||
->name);
|
||||
return false;
|
||||
}
|
||||
uint32_t memexport_base_address = memexport_stream->base_address;
|
||||
uint32_t memexport_size_dwords =
|
||||
memexport_stream->index_count * memexport_format_size;
|
||||
bool memexport_range_reused = false;
|
||||
for (uint32_t i = 0; i < memexport_range_count; ++i) {
|
||||
MemExportRange& memexport_range = memexport_ranges[i];
|
||||
if (memexport_range.base_address_dwords == memexport_base_address) {
|
||||
memexport_range.size_dwords =
|
||||
std::max(memexport_range.size_dwords, memexport_size_dwords);
|
||||
memexport_range_reused = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!memexport_range_reused) {
|
||||
MemExportRange& memexport_range =
|
||||
memexport_ranges[memexport_range_count++];
|
||||
memexport_range.base_address_dwords = memexport_base_address;
|
||||
memexport_range.size_dwords = memexport_size_dwords;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (uint32_t i = 0; i < memexport_range_count; ++i) {
|
||||
const MemExportRange& memexport_range = memexport_ranges[i];
|
||||
if (!shared_memory_->RequestRange(memexport_range.base_address_dwords << 2,
|
||||
memexport_range.size_dwords << 2)) {
|
||||
XELOGE(
|
||||
"Failed to request memexport stream at 0x%.8X (size %u) in the "
|
||||
"shared memory",
|
||||
memexport_range.base_address_dwords << 2,
|
||||
memexport_range.size_dwords << 2);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (IsROVUsedForEDRAM()) {
|
||||
render_target_cache_->UseEDRAMAsUAV();
|
||||
}
|
||||
|
||||
// TODO(Triang3l): Copy the index buffer to a scratch buffer if using
|
||||
// memexport with an index buffer, because a resource can't be an index buffer
|
||||
// (read-only) and a UAV (read/write) at once.
|
||||
|
||||
// Actually draw.
|
||||
if (indexed) {
|
||||
if (memexport_used) {
|
||||
// TODO(Triang3l): Index buffer copying for memexport.
|
||||
return false;
|
||||
}
|
||||
uint32_t index_size = index_buffer_info->format == IndexFormat::kInt32
|
||||
? sizeof(uint32_t)
|
||||
: sizeof(uint16_t);
|
||||
|
@ -1388,7 +1524,13 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
|||
index_count = converted_index_count;
|
||||
} else {
|
||||
uint32_t index_buffer_size = index_buffer_info->count * index_size;
|
||||
shared_memory_->RequestRange(index_base, index_buffer_size);
|
||||
if (!shared_memory_->RequestRange(index_base, index_buffer_size)) {
|
||||
XELOGE(
|
||||
"Failed to request index buffer at 0x%.8X (size %u) in the shared "
|
||||
"memory",
|
||||
index_base, index_buffer_size);
|
||||
return false;
|
||||
}
|
||||
index_buffer_view.BufferLocation =
|
||||
shared_memory_->GetGPUAddress() + index_base;
|
||||
index_buffer_view.SizeInBytes = index_buffer_size;
|
||||
|
@ -1422,6 +1564,8 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
|||
}
|
||||
}
|
||||
|
||||
// TODO(Triang3l): Read back memexported data if the respective gflag is set.
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -2855,6 +2999,36 @@ bool D3D12CommandProcessor::UpdateBindings(
|
|||
return true;
|
||||
}
|
||||
|
||||
uint32_t D3D12CommandProcessor::GetSupportedMemExportFormatSize(
|
||||
ColorFormat format) {
|
||||
switch (format) {
|
||||
case ColorFormat::k_8_8_8_8:
|
||||
case ColorFormat::k_2_10_10_10:
|
||||
// TODO(Triang3l): Investigate how k_8_8_8_8_A works - not supported in the
|
||||
// texture cache currently.
|
||||
// case ColorFormat::k_8_8_8_8_A:
|
||||
case ColorFormat::k_10_11_11:
|
||||
case ColorFormat::k_11_11_10:
|
||||
case ColorFormat::k_16_16:
|
||||
case ColorFormat::k_16_16_FLOAT:
|
||||
case ColorFormat::k_32_FLOAT:
|
||||
case ColorFormat::k_8_8_8_8_AS_16_16_16_16:
|
||||
case ColorFormat::k_2_10_10_10_AS_16_16_16_16:
|
||||
case ColorFormat::k_10_11_11_AS_16_16_16_16:
|
||||
case ColorFormat::k_11_11_10_AS_16_16_16_16:
|
||||
return 1;
|
||||
case ColorFormat::k_16_16_16_16:
|
||||
case ColorFormat::k_16_16_16_16_FLOAT:
|
||||
case ColorFormat::k_32_32_FLOAT:
|
||||
return 2;
|
||||
case ColorFormat::k_32_32_32_32_FLOAT:
|
||||
return 4;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
} // namespace d3d12
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
|
|
@ -209,6 +209,13 @@ class D3D12CommandProcessor : public CommandProcessor {
|
|||
const D3D12Shader* pixel_shader,
|
||||
ID3D12RootSignature* root_signature);
|
||||
|
||||
// Returns dword count for one element for a memexport format, or 0 if it's
|
||||
// not supported by the D3D12 command processor (if it's smaller that 1 dword,
|
||||
// for instance).
|
||||
// TODO(Triang3l): Check if any game uses memexport with formats smaller than
|
||||
// 32 bits per element.
|
||||
static uint32_t GetSupportedMemExportFormatSize(ColorFormat format);
|
||||
|
||||
bool cache_clear_requested_ = false;
|
||||
|
||||
std::unique_ptr<ui::d3d12::CommandList>
|
||||
|
|
Loading…
Reference in New Issue