[D3D12] System constant setting and some cleanup
This commit is contained in:
parent
2183a969af
commit
fb1051b610
|
@ -9,6 +9,7 @@
|
||||||
|
|
||||||
#include "xenia/gpu/d3d12/d3d12_command_processor.h"
|
#include "xenia/gpu/d3d12/d3d12_command_processor.h"
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
|
||||||
#include "xenia/base/assert.h"
|
#include "xenia/base/assert.h"
|
||||||
|
@ -188,7 +189,7 @@ ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature(
|
||||||
if (pixel_textures > 0 && vertex_textures > 0) {
|
if (pixel_textures > 0 && vertex_textures > 0) {
|
||||||
assert_true(vertex_samplers > 0);
|
assert_true(vertex_samplers > 0);
|
||||||
|
|
||||||
desc.NumParameters = UINT(kRootParameter_Count_TwoStageTextures);
|
desc.NumParameters = kRootParameter_Count_TwoStageTextures;
|
||||||
|
|
||||||
// Vertex textures.
|
// Vertex textures.
|
||||||
{
|
{
|
||||||
|
@ -479,6 +480,8 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool indexed = index_buffer_info != nullptr && index_buffer_info->guest_base;
|
||||||
|
|
||||||
// Shaders will have already been defined by previous loads.
|
// Shaders will have already been defined by previous loads.
|
||||||
// We need them to do just about anything so validate here.
|
// We need them to do just about anything so validate here.
|
||||||
auto vertex_shader = static_cast<D3D12Shader*>(active_vertex_shader());
|
auto vertex_shader = static_cast<D3D12Shader*>(active_vertex_shader());
|
||||||
|
@ -504,19 +507,25 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
||||||
ID3D12RootSignature* root_signature;
|
ID3D12RootSignature* root_signature;
|
||||||
auto pipeline_status = pipeline_cache_->ConfigurePipeline(
|
auto pipeline_status = pipeline_cache_->ConfigurePipeline(
|
||||||
vertex_shader, pixel_shader, primitive_type,
|
vertex_shader, pixel_shader, primitive_type,
|
||||||
index_buffer_info != nullptr ? index_buffer_info->format
|
indexed ? index_buffer_info->format : IndexFormat::kInt16, &pipeline,
|
||||||
: IndexFormat::kInt16,
|
&root_signature);
|
||||||
&pipeline, &root_signature);
|
|
||||||
if (pipeline_status == PipelineCache::UpdateStatus::kError) {
|
if (pipeline_status == PipelineCache::UpdateStatus::kError) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Update viewport, scissor, blend factor and stencil reference.
|
||||||
|
UpdateFixedFunctionState(command_list);
|
||||||
|
|
||||||
// Bind the pipeline.
|
// Bind the pipeline.
|
||||||
if (current_pipeline_ != pipeline) {
|
if (current_pipeline_ != pipeline) {
|
||||||
current_pipeline_ = pipeline;
|
current_pipeline_ = pipeline;
|
||||||
command_list->SetPipelineState(pipeline);
|
command_list->SetPipelineState(pipeline);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Update system constants before uploading them.
|
||||||
|
UpdateSystemConstantValues(indexed ? index_buffer_info->endianness
|
||||||
|
: Endian::kUnspecified);
|
||||||
|
|
||||||
// Update constant buffers, descriptors and root parameters.
|
// Update constant buffers, descriptors and root parameters.
|
||||||
if (!UpdateBindings(command_list, vertex_shader, pixel_shader,
|
if (!UpdateBindings(command_list, vertex_shader, pixel_shader,
|
||||||
root_signature)) {
|
root_signature)) {
|
||||||
|
@ -524,7 +533,7 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Shared memory test.
|
// Shared memory test.
|
||||||
if (index_buffer_info != nullptr && index_buffer_info->guest_base != 0) {
|
if (indexed) {
|
||||||
uint32_t index_size = index_buffer_info->format == IndexFormat::kInt32
|
uint32_t index_size = index_buffer_info->format == IndexFormat::kInt32
|
||||||
? sizeof(uint32_t)
|
? sizeof(uint32_t)
|
||||||
: sizeof(uint16_t);
|
: sizeof(uint16_t);
|
||||||
|
@ -546,6 +555,12 @@ bool D3D12CommandProcessor::BeginFrame() {
|
||||||
context->BeginSwap();
|
context->BeginSwap();
|
||||||
current_queue_frame_ = context->GetCurrentQueueFrame();
|
current_queue_frame_ = context->GetCurrentQueueFrame();
|
||||||
|
|
||||||
|
// Reset fixed-function state.
|
||||||
|
ff_viewport_update_needed_ = true;
|
||||||
|
ff_scissor_update_needed_ = true;
|
||||||
|
ff_blend_factor_update_needed_ = true;
|
||||||
|
ff_stencil_ref_update_needed_ = true;
|
||||||
|
|
||||||
// Reset bindings, particularly because the buffers backing them are recycled.
|
// Reset bindings, particularly because the buffers backing them are recycled.
|
||||||
current_pipeline_ = nullptr;
|
current_pipeline_ = nullptr;
|
||||||
current_graphics_root_signature_ = nullptr;
|
current_graphics_root_signature_ = nullptr;
|
||||||
|
@ -594,6 +609,265 @@ bool D3D12CommandProcessor::EndFrame() {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void D3D12CommandProcessor::UpdateFixedFunctionState(
|
||||||
|
ID3D12GraphicsCommandList* command_list) {
|
||||||
|
auto& regs = *register_file_;
|
||||||
|
|
||||||
|
// Window parameters.
|
||||||
|
// http://ftp.tku.edu.tw/NetBSD/NetBSD-current/xsrc/external/mit/xf86-video-ati/dist/src/r600_reg_auto_r6xx.h
|
||||||
|
// See r200UpdateWindow:
|
||||||
|
// https://github.com/freedreno/mesa/blob/master/src/mesa/drivers/dri/r200/r200_state.c
|
||||||
|
uint32_t pa_sc_window_offset = regs[XE_GPU_REG_PA_SC_WINDOW_OFFSET].u32;
|
||||||
|
int16_t window_offset_x = pa_sc_window_offset & 0x7FFF;
|
||||||
|
int16_t window_offset_y = (pa_sc_window_offset >> 16) & 0x7FFF;
|
||||||
|
if (window_offset_x & 0x4000) {
|
||||||
|
window_offset_x |= 0x8000;
|
||||||
|
}
|
||||||
|
if (window_offset_y & 0x4000) {
|
||||||
|
window_offset_y |= 0x8000;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Supersampling replacing multisampling due to difficulties of emulating
|
||||||
|
// EDRAM with multisampling.
|
||||||
|
MsaaSamples msaa_samples =
|
||||||
|
MsaaSamples((regs[XE_GPU_REG_RB_SURFACE_INFO].u32 >> 16) & 0x3);
|
||||||
|
uint32_t ssaa_scale_x = msaa_samples >= MsaaSamples::k4X ? 2 : 1;
|
||||||
|
uint32_t ssaa_scale_y = msaa_samples >= MsaaSamples::k2X ? 2 : 1;
|
||||||
|
|
||||||
|
// Viewport.
|
||||||
|
// PA_CL_VTE_CNTL contains whether offsets and scales are enabled.
|
||||||
|
// http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf
|
||||||
|
// In games, either all are enabled (for regular drawing) or none are (for
|
||||||
|
// rectangle lists usually).
|
||||||
|
//
|
||||||
|
// If scale/offset is enabled, the Xenos shader is writing (neglecting W
|
||||||
|
// division) position in the NDC (-1, -1, dx_clip_space_def - 1) -> (1, 1, 1)
|
||||||
|
// box. If it's not, the position is in screen space. Since we can only use
|
||||||
|
// the NDC in PC APIs, we use a viewport of the largest possible size, and
|
||||||
|
// divide the position by it in translated shaders.
|
||||||
|
uint32_t pa_cl_vte_cntl = regs[XE_GPU_REG_PA_CL_VTE_CNTL].u32;
|
||||||
|
float viewport_scale_x = (pa_cl_vte_cntl & (1 << 0))
|
||||||
|
? regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32
|
||||||
|
: 1280.0f;
|
||||||
|
float viewport_scale_y = (pa_cl_vte_cntl & (1 << 2))
|
||||||
|
? regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32
|
||||||
|
: 1280.0f;
|
||||||
|
// TODO(Triang3l): Investigate how unnormalized coordinates should work when
|
||||||
|
// using a D24FS8 depth buffer. A 20e4 buffer can store values up to
|
||||||
|
// 511.99985, however, in the depth buffer, something like 1/z is stored, and
|
||||||
|
// if the shader writes 1/511.99985, it probably won't become 1 in the depth
|
||||||
|
// buffer. Unnormalized coordinates are mostly used when clearing both depth
|
||||||
|
// and color to 0 though.
|
||||||
|
float viewport_scale_z = (pa_cl_vte_cntl & (1 << 4))
|
||||||
|
? regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32
|
||||||
|
: 1.0f;
|
||||||
|
float viewport_offset_x = (pa_cl_vte_cntl & (1 << 1))
|
||||||
|
? regs[XE_GPU_REG_PA_CL_VPORT_XOFFSET].f32
|
||||||
|
: viewport_scale_x;
|
||||||
|
float viewport_offset_y = (pa_cl_vte_cntl & (1 << 3))
|
||||||
|
? regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32
|
||||||
|
: viewport_scale_y;
|
||||||
|
float viewport_offset_z = (pa_cl_vte_cntl & (1 << 5))
|
||||||
|
? regs[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32
|
||||||
|
: 0.0f;
|
||||||
|
if (regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & (1 << 16)) {
|
||||||
|
viewport_offset_x += float(window_offset_x);
|
||||||
|
viewport_offset_y += float(window_offset_y);
|
||||||
|
}
|
||||||
|
D3D12_VIEWPORT viewport;
|
||||||
|
viewport.TopLeftX =
|
||||||
|
(viewport_offset_x - viewport_scale_x) * float(ssaa_scale_x);
|
||||||
|
viewport.TopLeftY =
|
||||||
|
(viewport_offset_y - viewport_scale_y) * float(ssaa_scale_y);
|
||||||
|
viewport.Width = viewport_scale_x * 2.0f * float(ssaa_scale_x);
|
||||||
|
viewport.Height = viewport_scale_y * 2.0f * float(ssaa_scale_y);
|
||||||
|
viewport.MinDepth = viewport_offset_z;
|
||||||
|
viewport.MaxDepth = viewport_offset_z + viewport_scale_z;
|
||||||
|
ff_viewport_update_needed_ |= ff_viewport_.TopLeftX != viewport.TopLeftX;
|
||||||
|
ff_viewport_update_needed_ |= ff_viewport_.TopLeftY != viewport.TopLeftY;
|
||||||
|
ff_viewport_update_needed_ |= ff_viewport_.Width != viewport.Width;
|
||||||
|
ff_viewport_update_needed_ |= ff_viewport_.Height != viewport.Height;
|
||||||
|
ff_viewport_update_needed_ |= ff_viewport_.MinDepth != viewport.MinDepth;
|
||||||
|
ff_viewport_update_needed_ |= ff_viewport_.MaxDepth != viewport.MaxDepth;
|
||||||
|
if (ff_viewport_update_needed_) {
|
||||||
|
ff_viewport_ = viewport;
|
||||||
|
command_list->RSSetViewports(1, &viewport);
|
||||||
|
ff_viewport_update_needed_ = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Scissor.
|
||||||
|
uint32_t pa_sc_window_scissor_tl =
|
||||||
|
regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL].u32;
|
||||||
|
uint32_t pa_sc_window_scissor_br =
|
||||||
|
regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR].u32;
|
||||||
|
D3D12_RECT scissor;
|
||||||
|
scissor.left = pa_sc_window_scissor_tl & 0x7FFF;
|
||||||
|
scissor.top = (pa_sc_window_scissor_tl >> 16) & 0x7FFF;
|
||||||
|
scissor.right = pa_sc_window_scissor_br & 0x7FFF;
|
||||||
|
scissor.bottom = (pa_sc_window_scissor_br >> 16) & 0x7FFF;
|
||||||
|
if (!(pa_sc_window_scissor_tl & (1u << 31))) {
|
||||||
|
// !WINDOW_OFFSET_DISABLE.
|
||||||
|
scissor.left = std::max(scissor.left + window_offset_x, LONG(0));
|
||||||
|
scissor.top = std::max(scissor.top + window_offset_y, LONG(0));
|
||||||
|
scissor.right = std::max(scissor.right + window_offset_x, LONG(0));
|
||||||
|
scissor.bottom = std::max(scissor.bottom + window_offset_y, LONG(0));
|
||||||
|
}
|
||||||
|
scissor.left *= ssaa_scale_x;
|
||||||
|
scissor.top *= ssaa_scale_y;
|
||||||
|
scissor.right *= ssaa_scale_x;
|
||||||
|
scissor.bottom *= ssaa_scale_y;
|
||||||
|
ff_scissor_update_needed_ |= ff_scissor_.left != scissor.left;
|
||||||
|
ff_scissor_update_needed_ |= ff_scissor_.top != scissor.top;
|
||||||
|
ff_scissor_update_needed_ |= ff_scissor_.right != scissor.right;
|
||||||
|
ff_scissor_update_needed_ |= ff_scissor_.bottom != scissor.bottom;
|
||||||
|
if (ff_scissor_update_needed_) {
|
||||||
|
ff_scissor_ = scissor;
|
||||||
|
command_list->RSSetScissorRects(1, &scissor);
|
||||||
|
ff_scissor_update_needed_ = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Blend factor.
|
||||||
|
ff_blend_factor_update_needed_ |=
|
||||||
|
ff_blend_factor_[0] != regs[XE_GPU_REG_RB_BLEND_RED].f32;
|
||||||
|
ff_blend_factor_update_needed_ |=
|
||||||
|
ff_blend_factor_[1] != regs[XE_GPU_REG_RB_BLEND_GREEN].f32;
|
||||||
|
ff_blend_factor_update_needed_ |=
|
||||||
|
ff_blend_factor_[2] != regs[XE_GPU_REG_RB_BLEND_BLUE].f32;
|
||||||
|
ff_blend_factor_update_needed_ |=
|
||||||
|
ff_blend_factor_[3] != regs[XE_GPU_REG_RB_BLEND_ALPHA].f32;
|
||||||
|
if (ff_blend_factor_update_needed_) {
|
||||||
|
ff_blend_factor_[0] = regs[XE_GPU_REG_RB_BLEND_RED].f32;
|
||||||
|
ff_blend_factor_[1] = regs[XE_GPU_REG_RB_BLEND_GREEN].f32;
|
||||||
|
ff_blend_factor_[2] = regs[XE_GPU_REG_RB_BLEND_BLUE].f32;
|
||||||
|
ff_blend_factor_[3] = regs[XE_GPU_REG_RB_BLEND_ALPHA].f32;
|
||||||
|
command_list->OMSetBlendFactor(ff_blend_factor_);
|
||||||
|
ff_blend_factor_update_needed_ = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stencil reference value.
|
||||||
|
uint32_t stencil_ref = regs[XE_GPU_REG_RB_STENCILREFMASK].u32 & 0xFF;
|
||||||
|
ff_stencil_ref_update_needed_ |= ff_stencil_ref_ != stencil_ref;
|
||||||
|
if (ff_stencil_ref_update_needed_) {
|
||||||
|
ff_stencil_ref_ = stencil_ref;
|
||||||
|
command_list->OMSetStencilRef(stencil_ref);
|
||||||
|
ff_stencil_ref_update_needed_ = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void D3D12CommandProcessor::UpdateSystemConstantValues(Endian index_endian) {
|
||||||
|
auto& regs = *register_file_;
|
||||||
|
uint32_t pa_cl_vte_cntl = regs[XE_GPU_REG_PA_CL_VTE_CNTL].u32;
|
||||||
|
uint32_t pa_cl_clip_cntl = regs[XE_GPU_REG_PA_CL_CLIP_CNTL].u32;
|
||||||
|
uint32_t pa_su_vtx_cntl = regs[XE_GPU_REG_PA_SU_VTX_CNTL].u32;
|
||||||
|
uint32_t sq_program_cntl = regs[XE_GPU_REG_SQ_PROGRAM_CNTL].u32;
|
||||||
|
uint32_t sq_context_misc = regs[XE_GPU_REG_SQ_CONTEXT_MISC].u32;
|
||||||
|
uint32_t rb_surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32;
|
||||||
|
|
||||||
|
bool dirty = false;
|
||||||
|
|
||||||
|
// Index buffer endianness.
|
||||||
|
dirty |= system_constants_.vertex_index_endian != uint32_t(index_endian);
|
||||||
|
system_constants_.vertex_index_endian = uint32_t(index_endian);
|
||||||
|
|
||||||
|
// W0 division control.
|
||||||
|
// http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf
|
||||||
|
// VTX_XY_FMT = true: the incoming XY have already been multiplied by 1/W0.
|
||||||
|
// = false: multiply the X, Y coordinates by 1/W0.
|
||||||
|
// VTX_Z_FMT = true: the incoming Z has already been multiplied by 1/W0.
|
||||||
|
// = false: multiply the Z coordinate by 1/W0.
|
||||||
|
// VTX_W0_FMT = true: the incoming W0 is not 1/W0. Perform the reciprocal to
|
||||||
|
// get 1/W0.
|
||||||
|
float vtx_xy_fmt = (pa_cl_vte_cntl & (1 << 8)) ? 1.0f : 0.0f;
|
||||||
|
float vtx_z_fmt = (pa_cl_vte_cntl & (1 << 9)) ? 1.0f : 0.0f;
|
||||||
|
float vtx_w0_fmt = (pa_cl_vte_cntl & (1 << 10)) ? 1.0f : 0.0f;
|
||||||
|
dirty |= system_constants_.mul_rcp_w[0] != vtx_xy_fmt;
|
||||||
|
dirty |= system_constants_.mul_rcp_w[1] != vtx_z_fmt;
|
||||||
|
dirty |= system_constants_.mul_rcp_w[2] != vtx_w0_fmt;
|
||||||
|
system_constants_.mul_rcp_w[0] = vtx_xy_fmt;
|
||||||
|
system_constants_.mul_rcp_w[1] = vtx_z_fmt;
|
||||||
|
system_constants_.mul_rcp_w[2] = vtx_w0_fmt;
|
||||||
|
|
||||||
|
// Conversion to Direct3D 12 normalized device coordinates.
|
||||||
|
// See viewport configuration in UpdateFixedFunctionState for explanations.
|
||||||
|
// X and Y scale/offset is to convert unnormalized coordinates generated by
|
||||||
|
// shaders (for rectangle list drawing, for instance) to the 2560x2560
|
||||||
|
// viewport that is used to emulate unnormalized coordinates.
|
||||||
|
// Z scale/offset is to convert from OpenGL NDC to Direct3D NDC if needed.
|
||||||
|
bool gl_clip_space_def =
|
||||||
|
!(pa_cl_clip_cntl & (1 << 19)) && (pa_cl_vte_cntl & (1 << 4));
|
||||||
|
float ndc_scale_x = (pa_cl_vte_cntl & (1 << 0)) ? 1.0f / 1280.0f : 1.0f;
|
||||||
|
float ndc_scale_y = (pa_cl_vte_cntl & (1 << 2)) ? 1.0f / 1280.0f : 1.0f;
|
||||||
|
float ndc_scale_z = gl_clip_space_def ? 0.5f : 1.0f;
|
||||||
|
float ndc_offset_x = (pa_cl_vte_cntl & (1 << 1)) ? -1.0f : 0.0f;
|
||||||
|
float ndc_offset_y = (pa_cl_vte_cntl & (1 << 3)) ? -1.0f : 0.0f;
|
||||||
|
float ndc_offset_z = gl_clip_space_def ? 0.5f : 0.0f;
|
||||||
|
dirty |= system_constants_.ndc_scale[0] != ndc_scale_x;
|
||||||
|
dirty |= system_constants_.ndc_scale[1] != ndc_scale_y;
|
||||||
|
dirty |= system_constants_.ndc_scale[2] != ndc_scale_z;
|
||||||
|
dirty |= system_constants_.ndc_offset[0] != ndc_offset_x;
|
||||||
|
dirty |= system_constants_.ndc_offset[1] != ndc_offset_y;
|
||||||
|
dirty |= system_constants_.ndc_offset[2] != ndc_offset_z;
|
||||||
|
system_constants_.ndc_scale[0] = ndc_scale_x;
|
||||||
|
system_constants_.ndc_scale[1] = ndc_scale_y;
|
||||||
|
system_constants_.ndc_scale[2] = ndc_scale_z;
|
||||||
|
system_constants_.ndc_offset[0] = ndc_offset_x;
|
||||||
|
system_constants_.ndc_offset[1] = ndc_offset_y;
|
||||||
|
system_constants_.ndc_offset[2] = ndc_offset_z;
|
||||||
|
|
||||||
|
// Half-pixel offset for vertex and pixel coordinates.
|
||||||
|
// TODO(Triang3l): Check if pixel coordinates need to offset depending on a
|
||||||
|
// different register.
|
||||||
|
float vertex_half_pixel_offset[2], pixel_half_pixel_offset;
|
||||||
|
if (pa_su_vtx_cntl & (1 << 0)) {
|
||||||
|
if (pa_cl_vte_cntl & (1 << 0)) {
|
||||||
|
float viewport_scale_x = regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32;
|
||||||
|
vertex_half_pixel_offset[0] =
|
||||||
|
viewport_scale_x != 0.0f ? -0.5f / viewport_scale_x : 0.0f;
|
||||||
|
} else {
|
||||||
|
vertex_half_pixel_offset[0] = -1.0f / 2560.0f;
|
||||||
|
}
|
||||||
|
if (pa_cl_vte_cntl & (1 << 2)) {
|
||||||
|
float viewport_scale_y = regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32;
|
||||||
|
vertex_half_pixel_offset[1] =
|
||||||
|
viewport_scale_y != 0.0f ? -0.5f / viewport_scale_y : 0.0f;
|
||||||
|
} else {
|
||||||
|
vertex_half_pixel_offset[1] = -1.0f / 2560.0f;
|
||||||
|
}
|
||||||
|
pixel_half_pixel_offset = -0.5f;
|
||||||
|
} else {
|
||||||
|
vertex_half_pixel_offset[0] = 0.0f;
|
||||||
|
vertex_half_pixel_offset[1] = 0.0f;
|
||||||
|
pixel_half_pixel_offset = 0.0f;
|
||||||
|
}
|
||||||
|
dirty |= system_constants_.vertex_half_pixel_offset[0] !=
|
||||||
|
vertex_half_pixel_offset[0];
|
||||||
|
dirty |= system_constants_.vertex_half_pixel_offset[1] !=
|
||||||
|
vertex_half_pixel_offset[1];
|
||||||
|
dirty |= system_constants_.pixel_half_pixel_offset != pixel_half_pixel_offset;
|
||||||
|
system_constants_.vertex_half_pixel_offset[0] = vertex_half_pixel_offset[0];
|
||||||
|
system_constants_.vertex_half_pixel_offset[1] = vertex_half_pixel_offset[1];
|
||||||
|
system_constants_.pixel_half_pixel_offset = pixel_half_pixel_offset;
|
||||||
|
|
||||||
|
// Pixel position register.
|
||||||
|
uint32_t pixel_pos_reg =
|
||||||
|
(sq_program_cntl & (1 << 18)) ? (sq_context_misc >> 8) & 0xFF : UINT_MAX;
|
||||||
|
dirty |= system_constants_.pixel_pos_reg != pixel_pos_reg;
|
||||||
|
system_constants_.pixel_pos_reg = pixel_pos_reg;
|
||||||
|
|
||||||
|
// Supersampling anti-aliasing pixel scale inverse for pixel positions.
|
||||||
|
MsaaSamples msaa_samples = MsaaSamples((rb_surface_info >> 16) & 0x3);
|
||||||
|
float ssaa_inv_scale_x = msaa_samples >= MsaaSamples::k4X ? 0.5f : 1.0f;
|
||||||
|
float ssaa_inv_scale_y = msaa_samples >= MsaaSamples::k2X ? 0.5f : 1.0f;
|
||||||
|
dirty |= system_constants_.ssaa_inv_scale[0] != ssaa_inv_scale_x;
|
||||||
|
dirty |= system_constants_.ssaa_inv_scale[1] != ssaa_inv_scale_y;
|
||||||
|
system_constants_.ssaa_inv_scale[0] = ssaa_inv_scale_x;
|
||||||
|
system_constants_.ssaa_inv_scale[1] = ssaa_inv_scale_y;
|
||||||
|
|
||||||
|
// TODO(Triang3l): Whether textures are 3D or stacked.
|
||||||
|
|
||||||
|
cbuffer_bindings_system_.up_to_date &= dirty;
|
||||||
|
}
|
||||||
|
|
||||||
bool D3D12CommandProcessor::UpdateBindings(
|
bool D3D12CommandProcessor::UpdateBindings(
|
||||||
ID3D12GraphicsCommandList* command_list, const D3D12Shader* vertex_shader,
|
ID3D12GraphicsCommandList* command_list, const D3D12Shader* vertex_shader,
|
||||||
const D3D12Shader* pixel_shader, ID3D12RootSignature* root_signature) {
|
const D3D12Shader* pixel_shader, ID3D12RootSignature* root_signature) {
|
||||||
|
@ -617,32 +891,27 @@ bool D3D12CommandProcessor::UpdateBindings(
|
||||||
|
|
||||||
// Update constant buffers.
|
// Update constant buffers.
|
||||||
// TODO(Triang3l): Update the system constant buffer - will crash without it.
|
// TODO(Triang3l): Update the system constant buffer - will crash without it.
|
||||||
ID3D12Resource* constant_buffer;
|
|
||||||
uint32_t constant_buffer_offset;
|
|
||||||
if (!cbuffer_bindings_system_.up_to_date) {
|
if (!cbuffer_bindings_system_.up_to_date) {
|
||||||
uint8_t* system_constants = constant_buffer_pool_->RequestFull(
|
uint8_t* system_constants = constant_buffer_pool_->RequestFull(
|
||||||
xe::align(uint32_t(sizeof(cbuffer_system_)), 256u), constant_buffer,
|
xe::align(uint32_t(sizeof(system_constants_)), 256u), nullptr, nullptr,
|
||||||
constant_buffer_offset);
|
&cbuffer_bindings_system_.buffer_address);
|
||||||
if (system_constants == nullptr) {
|
if (system_constants == nullptr) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
std::memcpy(system_constants, &cbuffer_system_, sizeof(cbuffer_system_));
|
std::memcpy(system_constants, &system_constants_,
|
||||||
cbuffer_bindings_system_.buffer_address =
|
sizeof(system_constants_));
|
||||||
constant_buffer->GetGPUVirtualAddress() + constant_buffer_offset;
|
|
||||||
cbuffer_bindings_system_.up_to_date = true;
|
cbuffer_bindings_system_.up_to_date = true;
|
||||||
write_common_constant_views = true;
|
write_common_constant_views = true;
|
||||||
}
|
}
|
||||||
if (!cbuffer_bindings_bool_loop_.up_to_date) {
|
if (!cbuffer_bindings_bool_loop_.up_to_date) {
|
||||||
uint8_t* bool_loop_constants = constant_buffer_pool_->RequestFull(
|
uint8_t* bool_loop_constants = constant_buffer_pool_->RequestFull(
|
||||||
256, constant_buffer, constant_buffer_offset);
|
256, nullptr, nullptr, &cbuffer_bindings_bool_loop_.buffer_address);
|
||||||
if (bool_loop_constants == nullptr) {
|
if (bool_loop_constants == nullptr) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
std::memcpy(bool_loop_constants,
|
std::memcpy(bool_loop_constants,
|
||||||
®s[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].u32,
|
®s[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].u32,
|
||||||
40 * sizeof(uint32_t));
|
40 * sizeof(uint32_t));
|
||||||
cbuffer_bindings_bool_loop_.buffer_address =
|
|
||||||
constant_buffer->GetGPUVirtualAddress() + constant_buffer_offset;
|
|
||||||
cbuffer_bindings_bool_loop_.up_to_date = true;
|
cbuffer_bindings_bool_loop_.up_to_date = true;
|
||||||
write_common_constant_views = true;
|
write_common_constant_views = true;
|
||||||
}
|
}
|
||||||
|
@ -652,15 +921,13 @@ bool D3D12CommandProcessor::UpdateBindings(
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
uint8_t* float_constants = constant_buffer_pool_->RequestFull(
|
uint8_t* float_constants = constant_buffer_pool_->RequestFull(
|
||||||
512, constant_buffer, constant_buffer_offset);
|
512, nullptr, nullptr, &float_binding.buffer_address);
|
||||||
if (float_constants == nullptr) {
|
if (float_constants == nullptr) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
std::memcpy(float_constants,
|
std::memcpy(float_constants,
|
||||||
®s[XE_GPU_REG_SHADER_CONSTANT_000_X + (i << 7)].f32,
|
®s[XE_GPU_REG_SHADER_CONSTANT_000_X + (i << 7)].f32,
|
||||||
32 * 4 * sizeof(uint32_t));
|
32 * 4 * sizeof(uint32_t));
|
||||||
float_binding.buffer_address =
|
|
||||||
constant_buffer->GetGPUVirtualAddress() + constant_buffer_offset;
|
|
||||||
float_binding.up_to_date = true;
|
float_binding.up_to_date = true;
|
||||||
if (i < 8) {
|
if (i < 8) {
|
||||||
write_vertex_float_constant_views = true;
|
write_vertex_float_constant_views = true;
|
||||||
|
@ -670,15 +937,13 @@ bool D3D12CommandProcessor::UpdateBindings(
|
||||||
}
|
}
|
||||||
if (!cbuffer_bindings_fetch_.up_to_date) {
|
if (!cbuffer_bindings_fetch_.up_to_date) {
|
||||||
uint8_t* fetch_constants = constant_buffer_pool_->RequestFull(
|
uint8_t* fetch_constants = constant_buffer_pool_->RequestFull(
|
||||||
768, constant_buffer, constant_buffer_offset);
|
768, nullptr, nullptr, &cbuffer_bindings_fetch_.buffer_address);
|
||||||
if (fetch_constants == nullptr) {
|
if (fetch_constants == nullptr) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
std::memcpy(fetch_constants,
|
std::memcpy(fetch_constants,
|
||||||
®s[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0].u32,
|
®s[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0].u32,
|
||||||
32 * 6 * sizeof(uint32_t));
|
32 * 6 * sizeof(uint32_t));
|
||||||
cbuffer_bindings_fetch_.buffer_address =
|
|
||||||
constant_buffer->GetGPUVirtualAddress() + constant_buffer_offset;
|
|
||||||
cbuffer_bindings_fetch_.up_to_date = true;
|
cbuffer_bindings_fetch_.up_to_date = true;
|
||||||
write_fetch_constant_view = true;
|
write_fetch_constant_view = true;
|
||||||
}
|
}
|
||||||
|
@ -733,7 +998,7 @@ bool D3D12CommandProcessor::UpdateBindings(
|
||||||
constant_buffer_desc.BufferLocation =
|
constant_buffer_desc.BufferLocation =
|
||||||
cbuffer_bindings_system_.buffer_address;
|
cbuffer_bindings_system_.buffer_address;
|
||||||
constant_buffer_desc.SizeInBytes =
|
constant_buffer_desc.SizeInBytes =
|
||||||
xe::align(uint32_t(sizeof(cbuffer_system_)), 256u);
|
xe::align(uint32_t(sizeof(system_constants_)), 256u);
|
||||||
device->CreateConstantBufferView(&constant_buffer_desc, view_cpu_handle);
|
device->CreateConstantBufferView(&constant_buffer_desc, view_cpu_handle);
|
||||||
view_cpu_handle.ptr += view_handle_size;
|
view_cpu_handle.ptr += view_handle_size;
|
||||||
view_gpu_handle.ptr += view_handle_size;
|
view_gpu_handle.ptr += view_handle_size;
|
||||||
|
|
|
@ -17,6 +17,7 @@
|
||||||
#include "xenia/gpu/d3d12/d3d12_graphics_system.h"
|
#include "xenia/gpu/d3d12/d3d12_graphics_system.h"
|
||||||
#include "xenia/gpu/d3d12/pipeline_cache.h"
|
#include "xenia/gpu/d3d12/pipeline_cache.h"
|
||||||
#include "xenia/gpu/d3d12/shared_memory.h"
|
#include "xenia/gpu/d3d12/shared_memory.h"
|
||||||
|
#include "xenia/gpu/hlsl_shader_translator.h"
|
||||||
#include "xenia/gpu/xenos.h"
|
#include "xenia/gpu/xenos.h"
|
||||||
#include "xenia/kernel/kernel_state.h"
|
#include "xenia/kernel/kernel_state.h"
|
||||||
#include "xenia/ui/d3d12/command_list.h"
|
#include "xenia/ui/d3d12/command_list.h"
|
||||||
|
@ -125,6 +126,8 @@ class D3D12CommandProcessor : public CommandProcessor {
|
||||||
// Returns true if an open frame was ended.
|
// Returns true if an open frame was ended.
|
||||||
bool EndFrame();
|
bool EndFrame();
|
||||||
|
|
||||||
|
void UpdateFixedFunctionState(ID3D12GraphicsCommandList* command_list);
|
||||||
|
void UpdateSystemConstantValues(Endian index_endian);
|
||||||
bool UpdateBindings(ID3D12GraphicsCommandList* command_list,
|
bool UpdateBindings(ID3D12GraphicsCommandList* command_list,
|
||||||
const D3D12Shader* vertex_shader,
|
const D3D12Shader* vertex_shader,
|
||||||
const D3D12Shader* pixel_shader,
|
const D3D12Shader* pixel_shader,
|
||||||
|
@ -150,6 +153,16 @@ class D3D12CommandProcessor : public CommandProcessor {
|
||||||
|
|
||||||
uint32_t current_queue_frame_ = UINT32_MAX;
|
uint32_t current_queue_frame_ = UINT32_MAX;
|
||||||
|
|
||||||
|
// The current fixed-function drawing state.
|
||||||
|
D3D12_VIEWPORT ff_viewport_;
|
||||||
|
D3D12_RECT ff_scissor_;
|
||||||
|
float ff_blend_factor_[4];
|
||||||
|
uint32_t ff_stencil_ref_;
|
||||||
|
bool ff_viewport_update_needed_;
|
||||||
|
bool ff_scissor_update_needed_;
|
||||||
|
bool ff_blend_factor_update_needed_;
|
||||||
|
bool ff_stencil_ref_update_needed_;
|
||||||
|
|
||||||
// Currently bound graphics or compute pipeline.
|
// Currently bound graphics or compute pipeline.
|
||||||
ID3D12PipelineState* current_pipeline_;
|
ID3D12PipelineState* current_pipeline_;
|
||||||
// Currently bound graphics root signature.
|
// Currently bound graphics root signature.
|
||||||
|
@ -163,12 +176,7 @@ class D3D12CommandProcessor : public CommandProcessor {
|
||||||
ID3D12DescriptorHeap* current_sampler_heap_;
|
ID3D12DescriptorHeap* current_sampler_heap_;
|
||||||
|
|
||||||
// System shader constants.
|
// System shader constants.
|
||||||
struct SystemConstants {
|
HlslShaderTranslator::SystemConstants system_constants_;
|
||||||
float viewport_inv_scale_x;
|
|
||||||
float viewport_inv_scale_y;
|
|
||||||
uint32_t vertex_index_endian;
|
|
||||||
uint32_t textures_are_3d;
|
|
||||||
} cbuffer_system_;
|
|
||||||
|
|
||||||
// Constant buffer bindings.
|
// Constant buffer bindings.
|
||||||
struct ConstantBufferBinding {
|
struct ConstantBufferBinding {
|
||||||
|
|
|
@ -165,8 +165,8 @@ bool SharedMemory::EndFrame(ID3D12GraphicsCommandList* command_list_setup,
|
||||||
ID3D12Resource* upload_buffer;
|
ID3D12Resource* upload_buffer;
|
||||||
uint32_t upload_buffer_offset, upload_buffer_size;
|
uint32_t upload_buffer_offset, upload_buffer_size;
|
||||||
uint8_t* upload_buffer_mapping = upload_buffer_pool_->RequestPartial(
|
uint8_t* upload_buffer_mapping = upload_buffer_pool_->RequestPartial(
|
||||||
upload_range_length << page_size_log2_, upload_buffer,
|
upload_range_length << page_size_log2_, &upload_buffer,
|
||||||
upload_buffer_offset, upload_buffer_size);
|
&upload_buffer_offset, &upload_buffer_size, nullptr);
|
||||||
if (upload_buffer_mapping == nullptr) {
|
if (upload_buffer_mapping == nullptr) {
|
||||||
XELOGE("Shared memory: Failed to get an upload buffer");
|
XELOGE("Shared memory: Failed to get an upload buffer");
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -834,7 +834,7 @@ void HlslShaderTranslator::ProcessVertexFetchInstruction(
|
||||||
}
|
}
|
||||||
EmitSourceDepth("xe_vertex_element%s = XeByteSwap(xe_shared_memory.Load%s(\n",
|
EmitSourceDepth("xe_vertex_element%s = XeByteSwap(xe_shared_memory.Load%s(\n",
|
||||||
load_swizzle, load_function_suffix);
|
load_swizzle, load_function_suffix);
|
||||||
EmitSourceDepth(" (xe_vertex_fetch[%uu].x & 0x1FFFFFFCu)",
|
EmitSourceDepth(" ((xe_vertex_fetch[%uu].x << 2u) & 0x1FFFFFFCu)",
|
||||||
instr.operands[1].storage_index);
|
instr.operands[1].storage_index);
|
||||||
if (instr.attributes.stride != 0) {
|
if (instr.attributes.stride != 0) {
|
||||||
EmitSource(" + uint(xe_src0.x) * %uu", instr.attributes.stride * 4);
|
EmitSource(" + uint(xe_src0.x) * %uu", instr.attributes.stride * 4);
|
||||||
|
|
|
@ -24,6 +24,23 @@ class HlslShaderTranslator : public ShaderTranslator {
|
||||||
HlslShaderTranslator();
|
HlslShaderTranslator();
|
||||||
~HlslShaderTranslator() override;
|
~HlslShaderTranslator() override;
|
||||||
|
|
||||||
|
struct SystemConstants {
|
||||||
|
// vec4 0
|
||||||
|
float mul_rcp_w[3];
|
||||||
|
uint32_t vertex_index_endian;
|
||||||
|
// vec4 1
|
||||||
|
float ndc_scale[3];
|
||||||
|
uint32_t textures_are_3d;
|
||||||
|
// vec4 2
|
||||||
|
float ndc_offset[3];
|
||||||
|
float pixel_half_pixel_offset;
|
||||||
|
// vec4 3
|
||||||
|
float vertex_half_pixel_offset[2];
|
||||||
|
uint32_t pixel_pos_reg;
|
||||||
|
// vec4 4
|
||||||
|
float ssaa_inv_scale[2];
|
||||||
|
};
|
||||||
|
|
||||||
enum class SRVType : uint32_t {
|
enum class SRVType : uint32_t {
|
||||||
// 1D, 2D or stacked texture bound as a 2D array texture.
|
// 1D, 2D or stacked texture bound as a 2D array texture.
|
||||||
Texture2DArray,
|
Texture2DArray,
|
||||||
|
|
|
@ -65,9 +65,9 @@ void UploadBufferPool::ClearCache() {
|
||||||
sent_last_ = nullptr;
|
sent_last_ = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint8_t* UploadBufferPool::RequestFull(uint32_t size,
|
uint8_t* UploadBufferPool::RequestFull(
|
||||||
ID3D12Resource*& buffer_out,
|
uint32_t size, ID3D12Resource** buffer_out, uint32_t* offset_out,
|
||||||
uint32_t& offset_out) {
|
D3D12_GPU_VIRTUAL_ADDRESS* gpu_address_out) {
|
||||||
assert_true(size <= page_size_);
|
assert_true(size <= page_size_);
|
||||||
if (size > page_size_) {
|
if (size > page_size_) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
@ -78,17 +78,26 @@ uint8_t* UploadBufferPool::RequestFull(uint32_t size,
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
buffer_out = unsent_->buffer;
|
if (buffer_out != nullptr) {
|
||||||
offset_out = current_size_;
|
*buffer_out = unsent_->buffer;
|
||||||
|
}
|
||||||
|
if (offset_out != nullptr) {
|
||||||
|
*offset_out = current_size_;
|
||||||
|
}
|
||||||
|
if (gpu_address_out != nullptr) {
|
||||||
|
if (current_gpu_address_ == 0) {
|
||||||
|
current_gpu_address_ = unsent_->buffer->GetGPUVirtualAddress();
|
||||||
|
}
|
||||||
|
*gpu_address_out = current_gpu_address_ = current_size_;
|
||||||
|
}
|
||||||
uint8_t* mapping = current_mapping_ + current_size_;
|
uint8_t* mapping = current_mapping_ + current_size_;
|
||||||
current_size_ += size;
|
current_size_ += size;
|
||||||
return mapping;
|
return mapping;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint8_t* UploadBufferPool::RequestPartial(uint32_t size,
|
uint8_t* UploadBufferPool::RequestPartial(
|
||||||
ID3D12Resource*& buffer_out,
|
uint32_t size, ID3D12Resource** buffer_out, uint32_t* offset_out,
|
||||||
uint32_t& offset_out,
|
uint32_t* size_out, D3D12_GPU_VIRTUAL_ADDRESS* gpu_address_out) {
|
||||||
uint32_t& size_out) {
|
|
||||||
if (current_size_ == page_size_ || current_mapping_ == nullptr) {
|
if (current_size_ == page_size_ || current_mapping_ == nullptr) {
|
||||||
// Start a new page if can't fit any bytes or don't have an open page.
|
// Start a new page if can't fit any bytes or don't have an open page.
|
||||||
if (!BeginNextPage()) {
|
if (!BeginNextPage()) {
|
||||||
|
@ -96,9 +105,21 @@ uint8_t* UploadBufferPool::RequestPartial(uint32_t size,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
size = std::min(size, page_size_ - current_size_);
|
size = std::min(size, page_size_ - current_size_);
|
||||||
buffer_out = unsent_->buffer;
|
if (buffer_out != nullptr) {
|
||||||
offset_out = current_size_;
|
*buffer_out = unsent_->buffer;
|
||||||
size_out = size;
|
}
|
||||||
|
if (offset_out != nullptr) {
|
||||||
|
*offset_out = current_size_;
|
||||||
|
}
|
||||||
|
if (size_out != nullptr) {
|
||||||
|
*size_out = size;
|
||||||
|
}
|
||||||
|
if (gpu_address_out != nullptr) {
|
||||||
|
if (current_gpu_address_ == 0) {
|
||||||
|
current_gpu_address_ = unsent_->buffer->GetGPUVirtualAddress();
|
||||||
|
}
|
||||||
|
*gpu_address_out = current_gpu_address_ = current_size_;
|
||||||
|
}
|
||||||
uint8_t* mapping = current_mapping_ + current_size_;
|
uint8_t* mapping = current_mapping_ + current_size_;
|
||||||
current_size_ += size;
|
current_size_ += size;
|
||||||
return mapping;
|
return mapping;
|
||||||
|
@ -174,6 +195,7 @@ bool UploadBufferPool::BeginNextPage() {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
current_mapping_ = reinterpret_cast<uint8_t*>(mapping);
|
current_mapping_ = reinterpret_cast<uint8_t*>(mapping);
|
||||||
|
current_gpu_address_ = 0;
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
|
@ -28,11 +28,13 @@ class UploadBufferPool {
|
||||||
|
|
||||||
// Request to write data in a single piece, creating a new page if the current
|
// Request to write data in a single piece, creating a new page if the current
|
||||||
// one doesn't have enough free space.
|
// one doesn't have enough free space.
|
||||||
uint8_t* RequestFull(uint32_t size, ID3D12Resource*& buffer_out,
|
uint8_t* RequestFull(uint32_t size, ID3D12Resource** buffer_out,
|
||||||
uint32_t& offset_out);
|
uint32_t* offset_out,
|
||||||
|
D3D12_GPU_VIRTUAL_ADDRESS* gpu_address_out);
|
||||||
// Request to write data in multiple parts, filling the buffer entirely.
|
// Request to write data in multiple parts, filling the buffer entirely.
|
||||||
uint8_t* RequestPartial(uint32_t size, ID3D12Resource*& buffer_out,
|
uint8_t* RequestPartial(uint32_t size, ID3D12Resource** buffer_out,
|
||||||
uint32_t& offset_out, uint32_t& size_out);
|
uint32_t* offset_out, uint32_t* size_out,
|
||||||
|
D3D12_GPU_VIRTUAL_ADDRESS* gpu_address_out);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
D3D12Context* context_;
|
D3D12Context* context_;
|
||||||
|
@ -55,6 +57,8 @@ class UploadBufferPool {
|
||||||
|
|
||||||
uint32_t current_size_ = 0;
|
uint32_t current_size_ = 0;
|
||||||
uint8_t* current_mapping_ = nullptr;
|
uint8_t* current_mapping_ = nullptr;
|
||||||
|
// Not updated until actually requested.
|
||||||
|
D3D12_GPU_VIRTUAL_ADDRESS current_gpu_address_ = 0;
|
||||||
|
|
||||||
// Reset in the beginning of a frame - don't try and fail to create a new page
|
// Reset in the beginning of a frame - don't try and fail to create a new page
|
||||||
// if failed to create one in the current frame.
|
// if failed to create one in the current frame.
|
||||||
|
|
Loading…
Reference in New Issue