diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index 84c7d2395..aefe4b9dd 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -9,6 +9,7 @@ #include "xenia/gpu/d3d12/d3d12_command_processor.h" +#include #include #include "xenia/base/assert.h" @@ -188,7 +189,7 @@ ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature( if (pixel_textures > 0 && vertex_textures > 0) { assert_true(vertex_samplers > 0); - desc.NumParameters = UINT(kRootParameter_Count_TwoStageTextures); + desc.NumParameters = kRootParameter_Count_TwoStageTextures; // Vertex textures. { @@ -479,6 +480,8 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, return true; } + bool indexed = index_buffer_info != nullptr && index_buffer_info->guest_base; + // Shaders will have already been defined by previous loads. // We need them to do just about anything so validate here. auto vertex_shader = static_cast(active_vertex_shader()); @@ -504,19 +507,25 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, ID3D12RootSignature* root_signature; auto pipeline_status = pipeline_cache_->ConfigurePipeline( vertex_shader, pixel_shader, primitive_type, - index_buffer_info != nullptr ? index_buffer_info->format - : IndexFormat::kInt16, - &pipeline, &root_signature); + indexed ? index_buffer_info->format : IndexFormat::kInt16, &pipeline, + &root_signature); if (pipeline_status == PipelineCache::UpdateStatus::kError) { return false; } + // Update viewport, scissor, blend factor and stencil reference. + UpdateFixedFunctionState(command_list); + // Bind the pipeline. if (current_pipeline_ != pipeline) { current_pipeline_ = pipeline; command_list->SetPipelineState(pipeline); } + // Update system constants before uploading them. + UpdateSystemConstantValues(indexed ? index_buffer_info->endianness + : Endian::kUnspecified); + // Update constant buffers, descriptors and root parameters. if (!UpdateBindings(command_list, vertex_shader, pixel_shader, root_signature)) { @@ -524,7 +533,7 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, } // Shared memory test. - if (index_buffer_info != nullptr && index_buffer_info->guest_base != 0) { + if (indexed) { uint32_t index_size = index_buffer_info->format == IndexFormat::kInt32 ? sizeof(uint32_t) : sizeof(uint16_t); @@ -546,6 +555,12 @@ bool D3D12CommandProcessor::BeginFrame() { context->BeginSwap(); current_queue_frame_ = context->GetCurrentQueueFrame(); + // Reset fixed-function state. + ff_viewport_update_needed_ = true; + ff_scissor_update_needed_ = true; + ff_blend_factor_update_needed_ = true; + ff_stencil_ref_update_needed_ = true; + // Reset bindings, particularly because the buffers backing them are recycled. current_pipeline_ = nullptr; current_graphics_root_signature_ = nullptr; @@ -594,6 +609,265 @@ bool D3D12CommandProcessor::EndFrame() { return true; } +void D3D12CommandProcessor::UpdateFixedFunctionState( + ID3D12GraphicsCommandList* command_list) { + auto& regs = *register_file_; + + // Window parameters. + // http://ftp.tku.edu.tw/NetBSD/NetBSD-current/xsrc/external/mit/xf86-video-ati/dist/src/r600_reg_auto_r6xx.h + // See r200UpdateWindow: + // https://github.com/freedreno/mesa/blob/master/src/mesa/drivers/dri/r200/r200_state.c + uint32_t pa_sc_window_offset = regs[XE_GPU_REG_PA_SC_WINDOW_OFFSET].u32; + int16_t window_offset_x = pa_sc_window_offset & 0x7FFF; + int16_t window_offset_y = (pa_sc_window_offset >> 16) & 0x7FFF; + if (window_offset_x & 0x4000) { + window_offset_x |= 0x8000; + } + if (window_offset_y & 0x4000) { + window_offset_y |= 0x8000; + } + + // Supersampling replacing multisampling due to difficulties of emulating + // EDRAM with multisampling. + MsaaSamples msaa_samples = + MsaaSamples((regs[XE_GPU_REG_RB_SURFACE_INFO].u32 >> 16) & 0x3); + uint32_t ssaa_scale_x = msaa_samples >= MsaaSamples::k4X ? 2 : 1; + uint32_t ssaa_scale_y = msaa_samples >= MsaaSamples::k2X ? 2 : 1; + + // Viewport. + // PA_CL_VTE_CNTL contains whether offsets and scales are enabled. + // http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf + // In games, either all are enabled (for regular drawing) or none are (for + // rectangle lists usually). + // + // If scale/offset is enabled, the Xenos shader is writing (neglecting W + // division) position in the NDC (-1, -1, dx_clip_space_def - 1) -> (1, 1, 1) + // box. If it's not, the position is in screen space. Since we can only use + // the NDC in PC APIs, we use a viewport of the largest possible size, and + // divide the position by it in translated shaders. + uint32_t pa_cl_vte_cntl = regs[XE_GPU_REG_PA_CL_VTE_CNTL].u32; + float viewport_scale_x = (pa_cl_vte_cntl & (1 << 0)) + ? regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32 + : 1280.0f; + float viewport_scale_y = (pa_cl_vte_cntl & (1 << 2)) + ? regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32 + : 1280.0f; + // TODO(Triang3l): Investigate how unnormalized coordinates should work when + // using a D24FS8 depth buffer. A 20e4 buffer can store values up to + // 511.99985, however, in the depth buffer, something like 1/z is stored, and + // if the shader writes 1/511.99985, it probably won't become 1 in the depth + // buffer. Unnormalized coordinates are mostly used when clearing both depth + // and color to 0 though. + float viewport_scale_z = (pa_cl_vte_cntl & (1 << 4)) + ? regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32 + : 1.0f; + float viewport_offset_x = (pa_cl_vte_cntl & (1 << 1)) + ? regs[XE_GPU_REG_PA_CL_VPORT_XOFFSET].f32 + : viewport_scale_x; + float viewport_offset_y = (pa_cl_vte_cntl & (1 << 3)) + ? regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32 + : viewport_scale_y; + float viewport_offset_z = (pa_cl_vte_cntl & (1 << 5)) + ? regs[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32 + : 0.0f; + if (regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & (1 << 16)) { + viewport_offset_x += float(window_offset_x); + viewport_offset_y += float(window_offset_y); + } + D3D12_VIEWPORT viewport; + viewport.TopLeftX = + (viewport_offset_x - viewport_scale_x) * float(ssaa_scale_x); + viewport.TopLeftY = + (viewport_offset_y - viewport_scale_y) * float(ssaa_scale_y); + viewport.Width = viewport_scale_x * 2.0f * float(ssaa_scale_x); + viewport.Height = viewport_scale_y * 2.0f * float(ssaa_scale_y); + viewport.MinDepth = viewport_offset_z; + viewport.MaxDepth = viewport_offset_z + viewport_scale_z; + ff_viewport_update_needed_ |= ff_viewport_.TopLeftX != viewport.TopLeftX; + ff_viewport_update_needed_ |= ff_viewport_.TopLeftY != viewport.TopLeftY; + ff_viewport_update_needed_ |= ff_viewport_.Width != viewport.Width; + ff_viewport_update_needed_ |= ff_viewport_.Height != viewport.Height; + ff_viewport_update_needed_ |= ff_viewport_.MinDepth != viewport.MinDepth; + ff_viewport_update_needed_ |= ff_viewport_.MaxDepth != viewport.MaxDepth; + if (ff_viewport_update_needed_) { + ff_viewport_ = viewport; + command_list->RSSetViewports(1, &viewport); + ff_viewport_update_needed_ = false; + } + + // Scissor. + uint32_t pa_sc_window_scissor_tl = + regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL].u32; + uint32_t pa_sc_window_scissor_br = + regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR].u32; + D3D12_RECT scissor; + scissor.left = pa_sc_window_scissor_tl & 0x7FFF; + scissor.top = (pa_sc_window_scissor_tl >> 16) & 0x7FFF; + scissor.right = pa_sc_window_scissor_br & 0x7FFF; + scissor.bottom = (pa_sc_window_scissor_br >> 16) & 0x7FFF; + if (!(pa_sc_window_scissor_tl & (1u << 31))) { + // !WINDOW_OFFSET_DISABLE. + scissor.left = std::max(scissor.left + window_offset_x, LONG(0)); + scissor.top = std::max(scissor.top + window_offset_y, LONG(0)); + scissor.right = std::max(scissor.right + window_offset_x, LONG(0)); + scissor.bottom = std::max(scissor.bottom + window_offset_y, LONG(0)); + } + scissor.left *= ssaa_scale_x; + scissor.top *= ssaa_scale_y; + scissor.right *= ssaa_scale_x; + scissor.bottom *= ssaa_scale_y; + ff_scissor_update_needed_ |= ff_scissor_.left != scissor.left; + ff_scissor_update_needed_ |= ff_scissor_.top != scissor.top; + ff_scissor_update_needed_ |= ff_scissor_.right != scissor.right; + ff_scissor_update_needed_ |= ff_scissor_.bottom != scissor.bottom; + if (ff_scissor_update_needed_) { + ff_scissor_ = scissor; + command_list->RSSetScissorRects(1, &scissor); + ff_scissor_update_needed_ = false; + } + + // Blend factor. + ff_blend_factor_update_needed_ |= + ff_blend_factor_[0] != regs[XE_GPU_REG_RB_BLEND_RED].f32; + ff_blend_factor_update_needed_ |= + ff_blend_factor_[1] != regs[XE_GPU_REG_RB_BLEND_GREEN].f32; + ff_blend_factor_update_needed_ |= + ff_blend_factor_[2] != regs[XE_GPU_REG_RB_BLEND_BLUE].f32; + ff_blend_factor_update_needed_ |= + ff_blend_factor_[3] != regs[XE_GPU_REG_RB_BLEND_ALPHA].f32; + if (ff_blend_factor_update_needed_) { + ff_blend_factor_[0] = regs[XE_GPU_REG_RB_BLEND_RED].f32; + ff_blend_factor_[1] = regs[XE_GPU_REG_RB_BLEND_GREEN].f32; + ff_blend_factor_[2] = regs[XE_GPU_REG_RB_BLEND_BLUE].f32; + ff_blend_factor_[3] = regs[XE_GPU_REG_RB_BLEND_ALPHA].f32; + command_list->OMSetBlendFactor(ff_blend_factor_); + ff_blend_factor_update_needed_ = false; + } + + // Stencil reference value. + uint32_t stencil_ref = regs[XE_GPU_REG_RB_STENCILREFMASK].u32 & 0xFF; + ff_stencil_ref_update_needed_ |= ff_stencil_ref_ != stencil_ref; + if (ff_stencil_ref_update_needed_) { + ff_stencil_ref_ = stencil_ref; + command_list->OMSetStencilRef(stencil_ref); + ff_stencil_ref_update_needed_ = false; + } +} + +void D3D12CommandProcessor::UpdateSystemConstantValues(Endian index_endian) { + auto& regs = *register_file_; + uint32_t pa_cl_vte_cntl = regs[XE_GPU_REG_PA_CL_VTE_CNTL].u32; + uint32_t pa_cl_clip_cntl = regs[XE_GPU_REG_PA_CL_CLIP_CNTL].u32; + uint32_t pa_su_vtx_cntl = regs[XE_GPU_REG_PA_SU_VTX_CNTL].u32; + uint32_t sq_program_cntl = regs[XE_GPU_REG_SQ_PROGRAM_CNTL].u32; + uint32_t sq_context_misc = regs[XE_GPU_REG_SQ_CONTEXT_MISC].u32; + uint32_t rb_surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32; + + bool dirty = false; + + // Index buffer endianness. + dirty |= system_constants_.vertex_index_endian != uint32_t(index_endian); + system_constants_.vertex_index_endian = uint32_t(index_endian); + + // W0 division control. + // http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf + // VTX_XY_FMT = true: the incoming XY have already been multiplied by 1/W0. + // = false: multiply the X, Y coordinates by 1/W0. + // VTX_Z_FMT = true: the incoming Z has already been multiplied by 1/W0. + // = false: multiply the Z coordinate by 1/W0. + // VTX_W0_FMT = true: the incoming W0 is not 1/W0. Perform the reciprocal to + // get 1/W0. + float vtx_xy_fmt = (pa_cl_vte_cntl & (1 << 8)) ? 1.0f : 0.0f; + float vtx_z_fmt = (pa_cl_vte_cntl & (1 << 9)) ? 1.0f : 0.0f; + float vtx_w0_fmt = (pa_cl_vte_cntl & (1 << 10)) ? 1.0f : 0.0f; + dirty |= system_constants_.mul_rcp_w[0] != vtx_xy_fmt; + dirty |= system_constants_.mul_rcp_w[1] != vtx_z_fmt; + dirty |= system_constants_.mul_rcp_w[2] != vtx_w0_fmt; + system_constants_.mul_rcp_w[0] = vtx_xy_fmt; + system_constants_.mul_rcp_w[1] = vtx_z_fmt; + system_constants_.mul_rcp_w[2] = vtx_w0_fmt; + + // Conversion to Direct3D 12 normalized device coordinates. + // See viewport configuration in UpdateFixedFunctionState for explanations. + // X and Y scale/offset is to convert unnormalized coordinates generated by + // shaders (for rectangle list drawing, for instance) to the 2560x2560 + // viewport that is used to emulate unnormalized coordinates. + // Z scale/offset is to convert from OpenGL NDC to Direct3D NDC if needed. + bool gl_clip_space_def = + !(pa_cl_clip_cntl & (1 << 19)) && (pa_cl_vte_cntl & (1 << 4)); + float ndc_scale_x = (pa_cl_vte_cntl & (1 << 0)) ? 1.0f / 1280.0f : 1.0f; + float ndc_scale_y = (pa_cl_vte_cntl & (1 << 2)) ? 1.0f / 1280.0f : 1.0f; + float ndc_scale_z = gl_clip_space_def ? 0.5f : 1.0f; + float ndc_offset_x = (pa_cl_vte_cntl & (1 << 1)) ? -1.0f : 0.0f; + float ndc_offset_y = (pa_cl_vte_cntl & (1 << 3)) ? -1.0f : 0.0f; + float ndc_offset_z = gl_clip_space_def ? 0.5f : 0.0f; + dirty |= system_constants_.ndc_scale[0] != ndc_scale_x; + dirty |= system_constants_.ndc_scale[1] != ndc_scale_y; + dirty |= system_constants_.ndc_scale[2] != ndc_scale_z; + dirty |= system_constants_.ndc_offset[0] != ndc_offset_x; + dirty |= system_constants_.ndc_offset[1] != ndc_offset_y; + dirty |= system_constants_.ndc_offset[2] != ndc_offset_z; + system_constants_.ndc_scale[0] = ndc_scale_x; + system_constants_.ndc_scale[1] = ndc_scale_y; + system_constants_.ndc_scale[2] = ndc_scale_z; + system_constants_.ndc_offset[0] = ndc_offset_x; + system_constants_.ndc_offset[1] = ndc_offset_y; + system_constants_.ndc_offset[2] = ndc_offset_z; + + // Half-pixel offset for vertex and pixel coordinates. + // TODO(Triang3l): Check if pixel coordinates need to offset depending on a + // different register. + float vertex_half_pixel_offset[2], pixel_half_pixel_offset; + if (pa_su_vtx_cntl & (1 << 0)) { + if (pa_cl_vte_cntl & (1 << 0)) { + float viewport_scale_x = regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32; + vertex_half_pixel_offset[0] = + viewport_scale_x != 0.0f ? -0.5f / viewport_scale_x : 0.0f; + } else { + vertex_half_pixel_offset[0] = -1.0f / 2560.0f; + } + if (pa_cl_vte_cntl & (1 << 2)) { + float viewport_scale_y = regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32; + vertex_half_pixel_offset[1] = + viewport_scale_y != 0.0f ? -0.5f / viewport_scale_y : 0.0f; + } else { + vertex_half_pixel_offset[1] = -1.0f / 2560.0f; + } + pixel_half_pixel_offset = -0.5f; + } else { + vertex_half_pixel_offset[0] = 0.0f; + vertex_half_pixel_offset[1] = 0.0f; + pixel_half_pixel_offset = 0.0f; + } + dirty |= system_constants_.vertex_half_pixel_offset[0] != + vertex_half_pixel_offset[0]; + dirty |= system_constants_.vertex_half_pixel_offset[1] != + vertex_half_pixel_offset[1]; + dirty |= system_constants_.pixel_half_pixel_offset != pixel_half_pixel_offset; + system_constants_.vertex_half_pixel_offset[0] = vertex_half_pixel_offset[0]; + system_constants_.vertex_half_pixel_offset[1] = vertex_half_pixel_offset[1]; + system_constants_.pixel_half_pixel_offset = pixel_half_pixel_offset; + + // Pixel position register. + uint32_t pixel_pos_reg = + (sq_program_cntl & (1 << 18)) ? (sq_context_misc >> 8) & 0xFF : UINT_MAX; + dirty |= system_constants_.pixel_pos_reg != pixel_pos_reg; + system_constants_.pixel_pos_reg = pixel_pos_reg; + + // Supersampling anti-aliasing pixel scale inverse for pixel positions. + MsaaSamples msaa_samples = MsaaSamples((rb_surface_info >> 16) & 0x3); + float ssaa_inv_scale_x = msaa_samples >= MsaaSamples::k4X ? 0.5f : 1.0f; + float ssaa_inv_scale_y = msaa_samples >= MsaaSamples::k2X ? 0.5f : 1.0f; + dirty |= system_constants_.ssaa_inv_scale[0] != ssaa_inv_scale_x; + dirty |= system_constants_.ssaa_inv_scale[1] != ssaa_inv_scale_y; + system_constants_.ssaa_inv_scale[0] = ssaa_inv_scale_x; + system_constants_.ssaa_inv_scale[1] = ssaa_inv_scale_y; + + // TODO(Triang3l): Whether textures are 3D or stacked. + + cbuffer_bindings_system_.up_to_date &= dirty; +} + bool D3D12CommandProcessor::UpdateBindings( ID3D12GraphicsCommandList* command_list, const D3D12Shader* vertex_shader, const D3D12Shader* pixel_shader, ID3D12RootSignature* root_signature) { @@ -617,32 +891,27 @@ bool D3D12CommandProcessor::UpdateBindings( // Update constant buffers. // TODO(Triang3l): Update the system constant buffer - will crash without it. - ID3D12Resource* constant_buffer; - uint32_t constant_buffer_offset; if (!cbuffer_bindings_system_.up_to_date) { uint8_t* system_constants = constant_buffer_pool_->RequestFull( - xe::align(uint32_t(sizeof(cbuffer_system_)), 256u), constant_buffer, - constant_buffer_offset); + xe::align(uint32_t(sizeof(system_constants_)), 256u), nullptr, nullptr, + &cbuffer_bindings_system_.buffer_address); if (system_constants == nullptr) { return false; } - std::memcpy(system_constants, &cbuffer_system_, sizeof(cbuffer_system_)); - cbuffer_bindings_system_.buffer_address = - constant_buffer->GetGPUVirtualAddress() + constant_buffer_offset; + std::memcpy(system_constants, &system_constants_, + sizeof(system_constants_)); cbuffer_bindings_system_.up_to_date = true; write_common_constant_views = true; } if (!cbuffer_bindings_bool_loop_.up_to_date) { uint8_t* bool_loop_constants = constant_buffer_pool_->RequestFull( - 256, constant_buffer, constant_buffer_offset); + 256, nullptr, nullptr, &cbuffer_bindings_bool_loop_.buffer_address); if (bool_loop_constants == nullptr) { return false; } std::memcpy(bool_loop_constants, ®s[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].u32, 40 * sizeof(uint32_t)); - cbuffer_bindings_bool_loop_.buffer_address = - constant_buffer->GetGPUVirtualAddress() + constant_buffer_offset; cbuffer_bindings_bool_loop_.up_to_date = true; write_common_constant_views = true; } @@ -652,15 +921,13 @@ bool D3D12CommandProcessor::UpdateBindings( continue; } uint8_t* float_constants = constant_buffer_pool_->RequestFull( - 512, constant_buffer, constant_buffer_offset); + 512, nullptr, nullptr, &float_binding.buffer_address); if (float_constants == nullptr) { return false; } std::memcpy(float_constants, ®s[XE_GPU_REG_SHADER_CONSTANT_000_X + (i << 7)].f32, 32 * 4 * sizeof(uint32_t)); - float_binding.buffer_address = - constant_buffer->GetGPUVirtualAddress() + constant_buffer_offset; float_binding.up_to_date = true; if (i < 8) { write_vertex_float_constant_views = true; @@ -670,15 +937,13 @@ bool D3D12CommandProcessor::UpdateBindings( } if (!cbuffer_bindings_fetch_.up_to_date) { uint8_t* fetch_constants = constant_buffer_pool_->RequestFull( - 768, constant_buffer, constant_buffer_offset); + 768, nullptr, nullptr, &cbuffer_bindings_fetch_.buffer_address); if (fetch_constants == nullptr) { return false; } std::memcpy(fetch_constants, ®s[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0].u32, 32 * 6 * sizeof(uint32_t)); - cbuffer_bindings_fetch_.buffer_address = - constant_buffer->GetGPUVirtualAddress() + constant_buffer_offset; cbuffer_bindings_fetch_.up_to_date = true; write_fetch_constant_view = true; } @@ -733,7 +998,7 @@ bool D3D12CommandProcessor::UpdateBindings( constant_buffer_desc.BufferLocation = cbuffer_bindings_system_.buffer_address; constant_buffer_desc.SizeInBytes = - xe::align(uint32_t(sizeof(cbuffer_system_)), 256u); + xe::align(uint32_t(sizeof(system_constants_)), 256u); device->CreateConstantBufferView(&constant_buffer_desc, view_cpu_handle); view_cpu_handle.ptr += view_handle_size; view_gpu_handle.ptr += view_handle_size; diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.h b/src/xenia/gpu/d3d12/d3d12_command_processor.h index 3796450c1..987937f70 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.h +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.h @@ -17,6 +17,7 @@ #include "xenia/gpu/d3d12/d3d12_graphics_system.h" #include "xenia/gpu/d3d12/pipeline_cache.h" #include "xenia/gpu/d3d12/shared_memory.h" +#include "xenia/gpu/hlsl_shader_translator.h" #include "xenia/gpu/xenos.h" #include "xenia/kernel/kernel_state.h" #include "xenia/ui/d3d12/command_list.h" @@ -125,6 +126,8 @@ class D3D12CommandProcessor : public CommandProcessor { // Returns true if an open frame was ended. bool EndFrame(); + void UpdateFixedFunctionState(ID3D12GraphicsCommandList* command_list); + void UpdateSystemConstantValues(Endian index_endian); bool UpdateBindings(ID3D12GraphicsCommandList* command_list, const D3D12Shader* vertex_shader, const D3D12Shader* pixel_shader, @@ -150,6 +153,16 @@ class D3D12CommandProcessor : public CommandProcessor { uint32_t current_queue_frame_ = UINT32_MAX; + // The current fixed-function drawing state. + D3D12_VIEWPORT ff_viewport_; + D3D12_RECT ff_scissor_; + float ff_blend_factor_[4]; + uint32_t ff_stencil_ref_; + bool ff_viewport_update_needed_; + bool ff_scissor_update_needed_; + bool ff_blend_factor_update_needed_; + bool ff_stencil_ref_update_needed_; + // Currently bound graphics or compute pipeline. ID3D12PipelineState* current_pipeline_; // Currently bound graphics root signature. @@ -163,12 +176,7 @@ class D3D12CommandProcessor : public CommandProcessor { ID3D12DescriptorHeap* current_sampler_heap_; // System shader constants. - struct SystemConstants { - float viewport_inv_scale_x; - float viewport_inv_scale_y; - uint32_t vertex_index_endian; - uint32_t textures_are_3d; - } cbuffer_system_; + HlslShaderTranslator::SystemConstants system_constants_; // Constant buffer bindings. struct ConstantBufferBinding { diff --git a/src/xenia/gpu/d3d12/shared_memory.cc b/src/xenia/gpu/d3d12/shared_memory.cc index 33f6ebbb7..e64851226 100644 --- a/src/xenia/gpu/d3d12/shared_memory.cc +++ b/src/xenia/gpu/d3d12/shared_memory.cc @@ -165,8 +165,8 @@ bool SharedMemory::EndFrame(ID3D12GraphicsCommandList* command_list_setup, ID3D12Resource* upload_buffer; uint32_t upload_buffer_offset, upload_buffer_size; uint8_t* upload_buffer_mapping = upload_buffer_pool_->RequestPartial( - upload_range_length << page_size_log2_, upload_buffer, - upload_buffer_offset, upload_buffer_size); + upload_range_length << page_size_log2_, &upload_buffer, + &upload_buffer_offset, &upload_buffer_size, nullptr); if (upload_buffer_mapping == nullptr) { XELOGE("Shared memory: Failed to get an upload buffer"); break; diff --git a/src/xenia/gpu/hlsl_shader_translator.cc b/src/xenia/gpu/hlsl_shader_translator.cc index 8e8ad632f..22c8f3a7f 100644 --- a/src/xenia/gpu/hlsl_shader_translator.cc +++ b/src/xenia/gpu/hlsl_shader_translator.cc @@ -834,7 +834,7 @@ void HlslShaderTranslator::ProcessVertexFetchInstruction( } EmitSourceDepth("xe_vertex_element%s = XeByteSwap(xe_shared_memory.Load%s(\n", load_swizzle, load_function_suffix); - EmitSourceDepth(" (xe_vertex_fetch[%uu].x & 0x1FFFFFFCu)", + EmitSourceDepth(" ((xe_vertex_fetch[%uu].x << 2u) & 0x1FFFFFFCu)", instr.operands[1].storage_index); if (instr.attributes.stride != 0) { EmitSource(" + uint(xe_src0.x) * %uu", instr.attributes.stride * 4); diff --git a/src/xenia/gpu/hlsl_shader_translator.h b/src/xenia/gpu/hlsl_shader_translator.h index b244b9ebe..ba485d60e 100644 --- a/src/xenia/gpu/hlsl_shader_translator.h +++ b/src/xenia/gpu/hlsl_shader_translator.h @@ -24,6 +24,23 @@ class HlslShaderTranslator : public ShaderTranslator { HlslShaderTranslator(); ~HlslShaderTranslator() override; + struct SystemConstants { + // vec4 0 + float mul_rcp_w[3]; + uint32_t vertex_index_endian; + // vec4 1 + float ndc_scale[3]; + uint32_t textures_are_3d; + // vec4 2 + float ndc_offset[3]; + float pixel_half_pixel_offset; + // vec4 3 + float vertex_half_pixel_offset[2]; + uint32_t pixel_pos_reg; + // vec4 4 + float ssaa_inv_scale[2]; + }; + enum class SRVType : uint32_t { // 1D, 2D or stacked texture bound as a 2D array texture. Texture2DArray, diff --git a/src/xenia/ui/d3d12/pools.cc b/src/xenia/ui/d3d12/pools.cc index 703b3279f..394019083 100644 --- a/src/xenia/ui/d3d12/pools.cc +++ b/src/xenia/ui/d3d12/pools.cc @@ -65,9 +65,9 @@ void UploadBufferPool::ClearCache() { sent_last_ = nullptr; } -uint8_t* UploadBufferPool::RequestFull(uint32_t size, - ID3D12Resource*& buffer_out, - uint32_t& offset_out) { +uint8_t* UploadBufferPool::RequestFull( + uint32_t size, ID3D12Resource** buffer_out, uint32_t* offset_out, + D3D12_GPU_VIRTUAL_ADDRESS* gpu_address_out) { assert_true(size <= page_size_); if (size > page_size_) { return nullptr; @@ -78,17 +78,26 @@ uint8_t* UploadBufferPool::RequestFull(uint32_t size, return nullptr; } } - buffer_out = unsent_->buffer; - offset_out = current_size_; + if (buffer_out != nullptr) { + *buffer_out = unsent_->buffer; + } + if (offset_out != nullptr) { + *offset_out = current_size_; + } + if (gpu_address_out != nullptr) { + if (current_gpu_address_ == 0) { + current_gpu_address_ = unsent_->buffer->GetGPUVirtualAddress(); + } + *gpu_address_out = current_gpu_address_ = current_size_; + } uint8_t* mapping = current_mapping_ + current_size_; current_size_ += size; return mapping; } -uint8_t* UploadBufferPool::RequestPartial(uint32_t size, - ID3D12Resource*& buffer_out, - uint32_t& offset_out, - uint32_t& size_out) { +uint8_t* UploadBufferPool::RequestPartial( + uint32_t size, ID3D12Resource** buffer_out, uint32_t* offset_out, + uint32_t* size_out, D3D12_GPU_VIRTUAL_ADDRESS* gpu_address_out) { if (current_size_ == page_size_ || current_mapping_ == nullptr) { // Start a new page if can't fit any bytes or don't have an open page. if (!BeginNextPage()) { @@ -96,9 +105,21 @@ uint8_t* UploadBufferPool::RequestPartial(uint32_t size, } } size = std::min(size, page_size_ - current_size_); - buffer_out = unsent_->buffer; - offset_out = current_size_; - size_out = size; + if (buffer_out != nullptr) { + *buffer_out = unsent_->buffer; + } + if (offset_out != nullptr) { + *offset_out = current_size_; + } + if (size_out != nullptr) { + *size_out = size; + } + if (gpu_address_out != nullptr) { + if (current_gpu_address_ == 0) { + current_gpu_address_ = unsent_->buffer->GetGPUVirtualAddress(); + } + *gpu_address_out = current_gpu_address_ = current_size_; + } uint8_t* mapping = current_mapping_ + current_size_; current_size_ += size; return mapping; @@ -174,6 +195,7 @@ bool UploadBufferPool::BeginNextPage() { return false; } current_mapping_ = reinterpret_cast(mapping); + current_gpu_address_ = 0; return true; } diff --git a/src/xenia/ui/d3d12/pools.h b/src/xenia/ui/d3d12/pools.h index 9954035af..de40cbedd 100644 --- a/src/xenia/ui/d3d12/pools.h +++ b/src/xenia/ui/d3d12/pools.h @@ -28,11 +28,13 @@ class UploadBufferPool { // Request to write data in a single piece, creating a new page if the current // one doesn't have enough free space. - uint8_t* RequestFull(uint32_t size, ID3D12Resource*& buffer_out, - uint32_t& offset_out); + uint8_t* RequestFull(uint32_t size, ID3D12Resource** buffer_out, + uint32_t* offset_out, + D3D12_GPU_VIRTUAL_ADDRESS* gpu_address_out); // Request to write data in multiple parts, filling the buffer entirely. - uint8_t* RequestPartial(uint32_t size, ID3D12Resource*& buffer_out, - uint32_t& offset_out, uint32_t& size_out); + uint8_t* RequestPartial(uint32_t size, ID3D12Resource** buffer_out, + uint32_t* offset_out, uint32_t* size_out, + D3D12_GPU_VIRTUAL_ADDRESS* gpu_address_out); private: D3D12Context* context_; @@ -55,6 +57,8 @@ class UploadBufferPool { uint32_t current_size_ = 0; uint8_t* current_mapping_ = nullptr; + // Not updated until actually requested. + D3D12_GPU_VIRTUAL_ADDRESS current_gpu_address_ = 0; // Reset in the beginning of a frame - don't try and fail to create a new page // if failed to create one in the current frame.