[D3D12] Request memory for memexport in shared memory
This commit is contained in:
parent
0aeff797e5
commit
6025599d3b
|
@ -1155,12 +1155,8 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
||||||
|
|
||||||
if ((regs[XE_GPU_REG_RB_SURFACE_INFO].u32 & 0x3FFF) == 0) {
|
if ((regs[XE_GPU_REG_RB_SURFACE_INFO].u32 & 0x3FFF) == 0) {
|
||||||
// Doesn't actually draw.
|
// Doesn't actually draw.
|
||||||
return true;
|
// TODO(Triang3l): Do something so memexport still works in this case maybe?
|
||||||
}
|
// Unlikely that zero would even really be legal though.
|
||||||
if ((regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & 0x3) == 0x3 &&
|
|
||||||
primitive_type != PrimitiveType::kPointList &&
|
|
||||||
primitive_type != PrimitiveType::kRectangleList) {
|
|
||||||
// Both sides are culled - can't reproduce this with rasterizer state.
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1179,17 +1175,36 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
||||||
// Need a pixel shader in normal color mode.
|
// Need a pixel shader in normal color mode.
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
// Translate shaders now because to get the color mask, which is needed by the
|
// Translate the shaders now to get memexport configuration and color mask,
|
||||||
// render target cache.
|
// which is needed by the render target cache, and also to get used textures
|
||||||
|
// and samplers.
|
||||||
if (!pipeline_cache_->EnsureShadersTranslated(vertex_shader, pixel_shader,
|
if (!pipeline_cache_->EnsureShadersTranslated(vertex_shader, pixel_shader,
|
||||||
primitive_type)) {
|
primitive_type)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check if memexport is used. If it is, we can't skip draw calls that have no
|
||||||
|
// visual effect.
|
||||||
|
bool memexport_used_vertex =
|
||||||
|
!vertex_shader->memexport_stream_constants().empty();
|
||||||
|
bool memexport_used_pixel =
|
||||||
|
pixel_shader != nullptr &&
|
||||||
|
!pixel_shader->memexport_stream_constants().empty();
|
||||||
|
bool memexport_used = memexport_used_vertex || memexport_used_pixel;
|
||||||
|
|
||||||
|
if (!memexport_used_vertex &&
|
||||||
|
(regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & 0x3) == 0x3 &&
|
||||||
|
primitive_type != PrimitiveType::kPointList &&
|
||||||
|
primitive_type != PrimitiveType::kRectangleList) {
|
||||||
|
// Both sides are culled - can't reproduce this with rasterizer state.
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
uint32_t color_mask = GetCurrentColorMask(pixel_shader);
|
uint32_t color_mask = GetCurrentColorMask(pixel_shader);
|
||||||
uint32_t rb_depthcontrol = regs[XE_GPU_REG_RB_DEPTHCONTROL].u32;
|
uint32_t rb_depthcontrol = regs[XE_GPU_REG_RB_DEPTHCONTROL].u32;
|
||||||
uint32_t rb_stencilrefmask = regs[XE_GPU_REG_RB_STENCILREFMASK].u32;
|
uint32_t rb_stencilrefmask = regs[XE_GPU_REG_RB_STENCILREFMASK].u32;
|
||||||
if (!color_mask && ((rb_depthcontrol & (0x2 | 0x4)) != (0x2 | 0x4)) &&
|
if (!memexport_used && !color_mask &&
|
||||||
|
((rb_depthcontrol & (0x2 | 0x4)) != (0x2 | 0x4)) &&
|
||||||
(!(rb_depthcontrol & 0x1) || !(rb_stencilrefmask & (0xFF << 16)))) {
|
(!(rb_depthcontrol & 0x1) || !(rb_stencilrefmask & (0xFF << 16)))) {
|
||||||
// Not writing to color, depth or stencil, so doesn't draw.
|
// Not writing to color, depth or stencil, so doesn't draw.
|
||||||
return true;
|
return true;
|
||||||
|
@ -1201,11 +1216,14 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
||||||
// Set up the render targets - this may bind pipelines.
|
// Set up the render targets - this may bind pipelines.
|
||||||
if (!render_target_cache_->UpdateRenderTargets(pixel_shader)) {
|
if (!render_target_cache_->UpdateRenderTargets(pixel_shader)) {
|
||||||
// Doesn't actually draw.
|
// Doesn't actually draw.
|
||||||
|
// TODO(Triang3l): Do something so memexport still works in this case maybe?
|
||||||
|
// Not distingushing between no operation and a true failure.
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
const RenderTargetCache::PipelineRenderTarget* pipeline_render_targets =
|
const RenderTargetCache::PipelineRenderTarget* pipeline_render_targets =
|
||||||
render_target_cache_->GetCurrentPipelineRenderTargets();
|
render_target_cache_->GetCurrentPipelineRenderTargets();
|
||||||
|
|
||||||
|
// Set up primitive topology.
|
||||||
bool indexed = index_buffer_info != nullptr && index_buffer_info->guest_base;
|
bool indexed = index_buffer_info != nullptr && index_buffer_info->guest_base;
|
||||||
// Adaptive tessellation requires an index buffer, but it contains per-edge
|
// Adaptive tessellation requires an index buffer, but it contains per-edge
|
||||||
// tessellation factors (as floats) in it instead of control point indices.
|
// tessellation factors (as floats) in it instead of control point indices.
|
||||||
|
@ -1250,14 +1268,11 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
||||||
} else {
|
} else {
|
||||||
adaptive_tessellation = false;
|
adaptive_tessellation = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO(Triang3l): Non-indexed line loops (by movc'ing zero to the vertex
|
// TODO(Triang3l): Non-indexed line loops (by movc'ing zero to the vertex
|
||||||
// index if it's one beyond the end).
|
// index if it's one beyond the end).
|
||||||
if (primitive_type == PrimitiveType::kLineLoop && !indexed) {
|
if (primitive_type == PrimitiveType::kLineLoop && !indexed) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set the primitive topology.
|
|
||||||
PrimitiveType primitive_type_converted =
|
PrimitiveType primitive_type_converted =
|
||||||
PrimitiveConverter::GetReplacementPrimitiveType(primitive_type);
|
PrimitiveConverter::GetReplacementPrimitiveType(primitive_type);
|
||||||
D3D_PRIMITIVE_TOPOLOGY primitive_topology;
|
D3D_PRIMITIVE_TOPOLOGY primitive_topology;
|
||||||
|
@ -1295,7 +1310,12 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
||||||
command_list->IASetPrimitiveTopology(primitive_topology);
|
command_list->IASetPrimitiveTopology(primitive_topology);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get the pipeline and translate the shaders so used textures are known.
|
// Update the textures - this may bind pipelines.
|
||||||
|
texture_cache_->RequestTextures(
|
||||||
|
vertex_shader->GetUsedTextureMask(),
|
||||||
|
pixel_shader != nullptr ? pixel_shader->GetUsedTextureMask() : 0);
|
||||||
|
|
||||||
|
// Create the pipeline if needed and bind it.
|
||||||
ID3D12PipelineState* pipeline;
|
ID3D12PipelineState* pipeline;
|
||||||
ID3D12RootSignature* root_signature;
|
ID3D12RootSignature* root_signature;
|
||||||
auto pipeline_status = pipeline_cache_->ConfigurePipeline(
|
auto pipeline_status = pipeline_cache_->ConfigurePipeline(
|
||||||
|
@ -1305,21 +1325,14 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
||||||
if (pipeline_status == PipelineCache::UpdateStatus::kError) {
|
if (pipeline_status == PipelineCache::UpdateStatus::kError) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update the textures - this may bind pipelines.
|
|
||||||
texture_cache_->RequestTextures(
|
|
||||||
vertex_shader->GetUsedTextureMask(),
|
|
||||||
pixel_shader != nullptr ? pixel_shader->GetUsedTextureMask() : 0);
|
|
||||||
|
|
||||||
// Update viewport, scissor, blend factor and stencil reference.
|
|
||||||
UpdateFixedFunctionState(command_list);
|
|
||||||
|
|
||||||
// Bind the pipeline.
|
|
||||||
if (current_pipeline_ != pipeline) {
|
if (current_pipeline_ != pipeline) {
|
||||||
GetCurrentCommandList()->SetPipelineState(pipeline);
|
GetCurrentCommandList()->SetPipelineState(pipeline);
|
||||||
current_pipeline_ = pipeline;
|
current_pipeline_ = pipeline;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Update viewport, scissor, blend factor and stencil reference.
|
||||||
|
UpdateFixedFunctionState(command_list);
|
||||||
|
|
||||||
// Update system constants before uploading them.
|
// Update system constants before uploading them.
|
||||||
UpdateSystemConstantValues(
|
UpdateSystemConstantValues(
|
||||||
primitive_type,
|
primitive_type,
|
||||||
|
@ -1350,16 +1363,139 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
||||||
regs[vfetch_constant_index + 1].u32);
|
regs[vfetch_constant_index + 1].u32);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
shared_memory_->RequestRange(
|
if (!shared_memory_->RequestRange(
|
||||||
|
regs[vfetch_constant_index].u32 & 0x1FFFFFFC,
|
||||||
|
regs[vfetch_constant_index + 1].u32 & 0x3FFFFFC)) {
|
||||||
|
XELOGE(
|
||||||
|
"Failed to request vertex buffer at 0x%.8X (size %u) in the shared "
|
||||||
|
"memory",
|
||||||
regs[vfetch_constant_index].u32 & 0x1FFFFFFC,
|
regs[vfetch_constant_index].u32 & 0x1FFFFFFC,
|
||||||
regs[vfetch_constant_index + 1].u32 & 0x3FFFFFC);
|
regs[vfetch_constant_index + 1].u32 & 0x3FFFFFC);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
vertex_buffers_resident[vfetch_index >> 6] |= 1ull << (vfetch_index & 63);
|
vertex_buffers_resident[vfetch_index >> 6] |= 1ull << (vfetch_index & 63);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Gather memexport ranges and ensure the heaps for them are resident, and
|
||||||
|
// also load the data surrounding the export and to fill the regions that
|
||||||
|
// won't be modified by the shaders.
|
||||||
|
struct MemExportRange {
|
||||||
|
uint32_t base_address_dwords;
|
||||||
|
uint32_t size_dwords;
|
||||||
|
};
|
||||||
|
MemExportRange memexport_ranges[512];
|
||||||
|
uint32_t memexport_range_count = 0;
|
||||||
|
if (memexport_used_vertex) {
|
||||||
|
const std::vector<uint32_t>& memexport_stream_constants_vertex =
|
||||||
|
vertex_shader->memexport_stream_constants();
|
||||||
|
for (uint32_t constant_index : memexport_stream_constants_vertex) {
|
||||||
|
const xenos::xe_gpu_memexport_stream_t* memexport_stream =
|
||||||
|
reinterpret_cast<const xenos::xe_gpu_memexport_stream_t*>(
|
||||||
|
®s[XE_GPU_REG_SHADER_CONSTANT_000_X + constant_index * 4]);
|
||||||
|
if (memexport_stream->index_count == 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
uint32_t memexport_format_size =
|
||||||
|
GetSupportedMemExportFormatSize(memexport_stream->format);
|
||||||
|
if (memexport_format_size == 0) {
|
||||||
|
XELOGE(
|
||||||
|
"Unsupported memexport format %s",
|
||||||
|
FormatInfo::Get(TextureFormat(uint32_t(memexport_stream->format)))
|
||||||
|
->name);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
uint32_t memexport_base_address = memexport_stream->base_address;
|
||||||
|
uint32_t memexport_size_dwords =
|
||||||
|
memexport_stream->index_count * memexport_format_size;
|
||||||
|
// Try to reduce the number of shared memory operations when writing
|
||||||
|
// different elements into the same buffer through different exports
|
||||||
|
// (happens in Halo 3).
|
||||||
|
bool memexport_range_reused = false;
|
||||||
|
for (uint32_t i = 0; i < memexport_range_count; ++i) {
|
||||||
|
MemExportRange& memexport_range = memexport_ranges[i];
|
||||||
|
if (memexport_range.base_address_dwords == memexport_base_address) {
|
||||||
|
memexport_range.size_dwords =
|
||||||
|
std::max(memexport_range.size_dwords, memexport_size_dwords);
|
||||||
|
memexport_range_reused = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Add a new range if haven't expanded an existing one.
|
||||||
|
if (!memexport_range_reused) {
|
||||||
|
MemExportRange& memexport_range =
|
||||||
|
memexport_ranges[memexport_range_count++];
|
||||||
|
memexport_range.base_address_dwords = memexport_base_address;
|
||||||
|
memexport_range.size_dwords = memexport_size_dwords;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (memexport_used_pixel) {
|
||||||
|
const std::vector<uint32_t>& memexport_stream_constants_pixel =
|
||||||
|
pixel_shader->memexport_stream_constants();
|
||||||
|
for (uint32_t constant_index : memexport_stream_constants_pixel) {
|
||||||
|
const xenos::xe_gpu_memexport_stream_t* memexport_stream =
|
||||||
|
reinterpret_cast<const xenos::xe_gpu_memexport_stream_t*>(
|
||||||
|
®s[XE_GPU_REG_SHADER_CONSTANT_256_X + constant_index * 4]);
|
||||||
|
if (memexport_stream->index_count == 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
uint32_t memexport_format_size =
|
||||||
|
GetSupportedMemExportFormatSize(memexport_stream->format);
|
||||||
|
if (memexport_format_size == 0) {
|
||||||
|
XELOGE(
|
||||||
|
"Unsupported memexport format %s",
|
||||||
|
FormatInfo::Get(TextureFormat(uint32_t(memexport_stream->format)))
|
||||||
|
->name);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
uint32_t memexport_base_address = memexport_stream->base_address;
|
||||||
|
uint32_t memexport_size_dwords =
|
||||||
|
memexport_stream->index_count * memexport_format_size;
|
||||||
|
bool memexport_range_reused = false;
|
||||||
|
for (uint32_t i = 0; i < memexport_range_count; ++i) {
|
||||||
|
MemExportRange& memexport_range = memexport_ranges[i];
|
||||||
|
if (memexport_range.base_address_dwords == memexport_base_address) {
|
||||||
|
memexport_range.size_dwords =
|
||||||
|
std::max(memexport_range.size_dwords, memexport_size_dwords);
|
||||||
|
memexport_range_reused = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!memexport_range_reused) {
|
||||||
|
MemExportRange& memexport_range =
|
||||||
|
memexport_ranges[memexport_range_count++];
|
||||||
|
memexport_range.base_address_dwords = memexport_base_address;
|
||||||
|
memexport_range.size_dwords = memexport_size_dwords;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (uint32_t i = 0; i < memexport_range_count; ++i) {
|
||||||
|
const MemExportRange& memexport_range = memexport_ranges[i];
|
||||||
|
if (!shared_memory_->RequestRange(memexport_range.base_address_dwords << 2,
|
||||||
|
memexport_range.size_dwords << 2)) {
|
||||||
|
XELOGE(
|
||||||
|
"Failed to request memexport stream at 0x%.8X (size %u) in the "
|
||||||
|
"shared memory",
|
||||||
|
memexport_range.base_address_dwords << 2,
|
||||||
|
memexport_range.size_dwords << 2);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (IsROVUsedForEDRAM()) {
|
if (IsROVUsedForEDRAM()) {
|
||||||
render_target_cache_->UseEDRAMAsUAV();
|
render_target_cache_->UseEDRAMAsUAV();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO(Triang3l): Copy the index buffer to a scratch buffer if using
|
||||||
|
// memexport with an index buffer, because a resource can't be an index buffer
|
||||||
|
// (read-only) and a UAV (read/write) at once.
|
||||||
|
|
||||||
|
// Actually draw.
|
||||||
if (indexed) {
|
if (indexed) {
|
||||||
|
if (memexport_used) {
|
||||||
|
// TODO(Triang3l): Index buffer copying for memexport.
|
||||||
|
return false;
|
||||||
|
}
|
||||||
uint32_t index_size = index_buffer_info->format == IndexFormat::kInt32
|
uint32_t index_size = index_buffer_info->format == IndexFormat::kInt32
|
||||||
? sizeof(uint32_t)
|
? sizeof(uint32_t)
|
||||||
: sizeof(uint16_t);
|
: sizeof(uint16_t);
|
||||||
|
@ -1388,7 +1524,13 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
||||||
index_count = converted_index_count;
|
index_count = converted_index_count;
|
||||||
} else {
|
} else {
|
||||||
uint32_t index_buffer_size = index_buffer_info->count * index_size;
|
uint32_t index_buffer_size = index_buffer_info->count * index_size;
|
||||||
shared_memory_->RequestRange(index_base, index_buffer_size);
|
if (!shared_memory_->RequestRange(index_base, index_buffer_size)) {
|
||||||
|
XELOGE(
|
||||||
|
"Failed to request index buffer at 0x%.8X (size %u) in the shared "
|
||||||
|
"memory",
|
||||||
|
index_base, index_buffer_size);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
index_buffer_view.BufferLocation =
|
index_buffer_view.BufferLocation =
|
||||||
shared_memory_->GetGPUAddress() + index_base;
|
shared_memory_->GetGPUAddress() + index_base;
|
||||||
index_buffer_view.SizeInBytes = index_buffer_size;
|
index_buffer_view.SizeInBytes = index_buffer_size;
|
||||||
|
@ -1422,6 +1564,8 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO(Triang3l): Read back memexported data if the respective gflag is set.
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2855,6 +2999,36 @@ bool D3D12CommandProcessor::UpdateBindings(
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint32_t D3D12CommandProcessor::GetSupportedMemExportFormatSize(
|
||||||
|
ColorFormat format) {
|
||||||
|
switch (format) {
|
||||||
|
case ColorFormat::k_8_8_8_8:
|
||||||
|
case ColorFormat::k_2_10_10_10:
|
||||||
|
// TODO(Triang3l): Investigate how k_8_8_8_8_A works - not supported in the
|
||||||
|
// texture cache currently.
|
||||||
|
// case ColorFormat::k_8_8_8_8_A:
|
||||||
|
case ColorFormat::k_10_11_11:
|
||||||
|
case ColorFormat::k_11_11_10:
|
||||||
|
case ColorFormat::k_16_16:
|
||||||
|
case ColorFormat::k_16_16_FLOAT:
|
||||||
|
case ColorFormat::k_32_FLOAT:
|
||||||
|
case ColorFormat::k_8_8_8_8_AS_16_16_16_16:
|
||||||
|
case ColorFormat::k_2_10_10_10_AS_16_16_16_16:
|
||||||
|
case ColorFormat::k_10_11_11_AS_16_16_16_16:
|
||||||
|
case ColorFormat::k_11_11_10_AS_16_16_16_16:
|
||||||
|
return 1;
|
||||||
|
case ColorFormat::k_16_16_16_16:
|
||||||
|
case ColorFormat::k_16_16_16_16_FLOAT:
|
||||||
|
case ColorFormat::k_32_32_FLOAT:
|
||||||
|
return 2;
|
||||||
|
case ColorFormat::k_32_32_32_32_FLOAT:
|
||||||
|
return 4;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace d3d12
|
} // namespace d3d12
|
||||||
} // namespace gpu
|
} // namespace gpu
|
||||||
} // namespace xe
|
} // namespace xe
|
||||||
|
|
|
@ -209,6 +209,13 @@ class D3D12CommandProcessor : public CommandProcessor {
|
||||||
const D3D12Shader* pixel_shader,
|
const D3D12Shader* pixel_shader,
|
||||||
ID3D12RootSignature* root_signature);
|
ID3D12RootSignature* root_signature);
|
||||||
|
|
||||||
|
// Returns dword count for one element for a memexport format, or 0 if it's
|
||||||
|
// not supported by the D3D12 command processor (if it's smaller that 1 dword,
|
||||||
|
// for instance).
|
||||||
|
// TODO(Triang3l): Check if any game uses memexport with formats smaller than
|
||||||
|
// 32 bits per element.
|
||||||
|
static uint32_t GetSupportedMemExportFormatSize(ColorFormat format);
|
||||||
|
|
||||||
bool cache_clear_requested_ = false;
|
bool cache_clear_requested_ = false;
|
||||||
|
|
||||||
std::unique_ptr<ui::d3d12::CommandList>
|
std::unique_ptr<ui::d3d12::CommandList>
|
||||||
|
|
Loading…
Reference in New Issue