From ba4eeb5e946976356b3318613239e60c607f577d Mon Sep 17 00:00:00 2001 From: Stenzek Date: Wed, 2 Aug 2023 01:51:26 +1000 Subject: [PATCH] Move over all except VRAM write --- src/core/gpu.cpp | 2 - src/core/gpu.h | 5 +- src/core/gpu/d3d11_device.cpp | 115 +++-- src/core/gpu/d3d11_device.h | 8 +- src/core/gpu/d3d11_texture.cpp | 38 +- src/core/gpu/d3d11_texture.h | 1 + src/core/gpu/gpu_device.cpp | 20 + src/core/gpu/gpu_device.h | 3 + src/core/gpu/gpu_texture.cpp | 1 + src/core/gpu_hw.cpp | 629 +++++++++++++++++------ src/core/gpu_hw.h | 56 +- src/core/gpu_hw_d3d11.cpp | 758 +--------------------------- src/core/gpu_hw_d3d11.h | 100 +--- src/core/gpu_hw_d3d12.cpp | 40 +- src/core/gpu_hw_d3d12.h | 8 - src/core/gpu_hw_opengl.cpp | 71 +-- src/core/gpu_hw_opengl.h | 8 - src/core/gpu_hw_vulkan.cpp | 65 +-- src/core/gpu_hw_vulkan.h | 8 - src/core/system.cpp | 24 +- src/frontend-common/common_host.cpp | 4 - 21 files changed, 707 insertions(+), 1257 deletions(-) diff --git a/src/core/gpu.cpp b/src/core/gpu.cpp index 2e4ef286b..b42d49a10 100644 --- a/src/core/gpu.cpp +++ b/src/core/gpu.cpp @@ -293,8 +293,6 @@ bool GPU::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_displ return !sw.HasError(); } -void GPU::ResetGraphicsAPIState() {} - void GPU::RestoreGraphicsAPIState() {} void GPU::UpdateDMARequest() diff --git a/src/core/gpu.h b/src/core/gpu.h index 89d4a9bdb..5fb66e060 100644 --- a/src/core/gpu.h +++ b/src/core/gpu.h @@ -88,7 +88,6 @@ public: virtual bool DoState(StateWrapper& sw, GPUTexture** save_to_texture, bool update_display); // Graphics API state reset/restore - call when drawing the UI etc. - virtual void ResetGraphicsAPIState(); virtual void RestoreGraphicsAPIState(); // Render statistics debug window. @@ -192,6 +191,9 @@ public: // Dumps raw VRAM to a file. bool DumpVRAMToFile(const char* filename); + // Ensures all buffered vertices are drawn. + virtual void FlushRender(); + protected: TickCount CRTCTicksToSystemTicks(TickCount crtc_ticks, TickCount fractional_ticks) const; TickCount SystemTicksToCRTCTicks(TickCount sysclk_ticks, TickCount* fractional_ticks) const; @@ -291,7 +293,6 @@ protected: virtual void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask); virtual void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height); virtual void DispatchRenderCommand(); - virtual void FlushRender(); virtual void ClearDisplay(); virtual void UpdateDisplay(); virtual void DrawRendererStats(bool is_idle_frame); diff --git a/src/core/gpu/d3d11_device.cpp b/src/core/gpu/d3d11_device.cpp index 354905bcd..f1f677de2 100644 --- a/src/core/gpu/d3d11_device.cpp +++ b/src/core/gpu/d3d11_device.cpp @@ -110,35 +110,6 @@ bool D3D11Device::DownloadTexture(GPUTexture* texture, u32 x, u32 y, u32 width, return true; } -void D3D11Device::CommitClear(GPUTexture* t) -{ - D3D11Texture* T = static_cast(t); - if (T->GetState() == GPUTexture::State::Dirty) - return; - - // TODO: 11.1 - if (T->IsDepthStencil()) - { - if (T->GetState() == GPUTexture::State::Invalidated) - ; // m_context->DiscardView(T->GetD3DDSV()); - else - m_context->ClearDepthStencilView(T->GetD3DDSV(), D3D11_CLEAR_DEPTH, T->GetClearDepth(), 0); - } - else if (T->IsRenderTarget()) - { - if (T->GetState() == GPUTexture::State::Invalidated) - ; // m_context->DiscardView(T->GetD3DRTV()); - else - m_context->ClearRenderTargetView(T->GetD3DRTV(), T->GetUNormClearColor().data()); - } - else - { - return; - } - - T->SetState(GPUTexture::State::Dirty); -} - bool D3D11Device::CheckStagingBufferSize(u32 width, u32 height, DXGI_FORMAT format) { if (m_readback_staging_texture_width >= width && m_readback_staging_texture_width >= height && @@ -191,19 +162,29 @@ void D3D11Device::CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 d D3D11Texture* dst11 = static_cast(dst); D3D11Texture* src11 = static_cast(src); - if (src11->GetState() == GPUTexture::State::Cleared) + if (dst11->IsRenderTargetOrDepthStencil()) { - if (src11->GetWidth() == dst11->GetWidth() && src11->GetHeight() == dst11->GetHeight()) + if (src11->GetState() == GPUTexture::State::Cleared) { - // pass clear through - dst11->m_state = src11->m_state; - dst11->m_clear_value = src11->m_clear_value; - return; + if (src11->GetWidth() == dst11->GetWidth() && src11->GetHeight() == dst11->GetHeight()) + { + // pass clear through + dst11->m_state = src11->m_state; + dst11->m_clear_value = src11->m_clear_value; + return; + } } + else if (dst_x == 0 && dst_y == 0 && width == dst11->GetMipWidth(dst_level) && + height == dst11->GetMipHeight(dst_level)) + { + // TODO: 11.1 discard + dst11->SetState(GPUTexture::State::Dirty); + } + + dst11->CommitClear(m_context.Get()); } - CommitClear(src11); - CommitClear(dst11); + src11->CommitClear(m_context.Get()); const CD3D11_BOX src_box(static_cast(src_x), static_cast(src_y), 0, static_cast(src_x + width), static_cast(src_y + height), 1); @@ -227,13 +208,15 @@ void D3D11Device::ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u3 // DX11 can't resolve partial rects. Assert(src_x == dst_x && src_y == dst_y); - CommitClear(src); - CommitClear(dst); + D3D11Texture* dst11 = static_cast(dst); + D3D11Texture* src11 = static_cast(src); - m_context->ResolveSubresource( - static_cast(dst)->GetD3DTexture(), D3D11CalcSubresource(dst_level, dst_layer, dst->GetLevels()), - static_cast(src)->GetD3DTexture(), D3D11CalcSubresource(src_level, src_layer, src->GetLevels()), - static_cast(dst)->GetDXGIFormat()); + src11->CommitClear(m_context.Get()); + dst11->CommitClear(m_context.Get()); + + m_context->ResolveSubresource(dst11->GetD3DTexture(), D3D11CalcSubresource(dst_level, dst_layer, dst->GetLevels()), + src11->GetD3DTexture(), D3D11CalcSubresource(src_level, src_layer, src->GetLevels()), + dst11->GetDXGIFormat()); } bool D3D11Device::GetHostRefreshRate(float* refresh_rate) @@ -645,7 +628,7 @@ bool D3D11Device::CreateBuffers() { if (!m_vertex_buffer.Create(m_device.Get(), D3D11_BIND_VERTEX_BUFFER, VERTEX_BUFFER_SIZE) || !m_index_buffer.Create(m_device.Get(), D3D11_BIND_INDEX_BUFFER, INDEX_BUFFER_SIZE) || - !m_push_uniform_buffer.Create(m_device.Get(), D3D11_BIND_CONSTANT_BUFFER, PUSH_UNIFORM_BUFFER_SIZE)) + !m_uniform_buffer.Create(m_device.Get(), D3D11_BIND_CONSTANT_BUFFER, MAX_UNIFORM_BUFFER_SIZE)) { Log_ErrorPrintf("Failed to create vertex/index/uniform buffers."); return false; @@ -656,7 +639,7 @@ bool D3D11Device::CreateBuffers() void D3D11Device::DestroyBuffers() { - m_push_uniform_buffer.Release(); + m_uniform_buffer.Release(); m_vertex_buffer.Release(); m_index_buffer.Release(); } @@ -678,6 +661,7 @@ bool D3D11Device::Render(bool skip_present) m_context->ClearRenderTargetView(m_swap_chain_rtv.Get(), s_clear_color.data()); m_context->OMSetRenderTargets(1, m_swap_chain_rtv.GetAddressOf(), nullptr); + m_current_framebuffer = nullptr; RenderDisplay(); @@ -695,7 +679,7 @@ bool D3D11Device::Render(bool skip_present) if (m_gpu_timing_enabled) KickTimestampQuery(); - + return true; } @@ -1571,20 +1555,45 @@ void D3D11Device::UnmapIndexBuffer(u32 used_index_count) void D3D11Device::PushUniformBuffer(const void* data, u32 data_size) { - Assert(data_size <= PUSH_UNIFORM_BUFFER_SIZE); + Assert(data_size <= MAX_UNIFORM_BUFFER_SIZE); - const auto res = m_push_uniform_buffer.Map(m_context.Get(), PUSH_UNIFORM_BUFFER_SIZE, PUSH_UNIFORM_BUFFER_SIZE); + const auto res = m_uniform_buffer.Map(m_context.Get(), MAX_UNIFORM_BUFFER_SIZE, MAX_UNIFORM_BUFFER_SIZE); std::memcpy(res.pointer, data, data_size); - m_push_uniform_buffer.Unmap(m_context.Get(), data_size); + m_uniform_buffer.Unmap(m_context.Get(), data_size); - m_context->VSSetConstantBuffers(0, 1, m_push_uniform_buffer.GetD3DBufferArray()); - m_context->PSSetConstantBuffers(0, 1, m_push_uniform_buffer.GetD3DBufferArray()); + m_context->VSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray()); + m_context->PSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray()); +} + +void* D3D11Device::MapUniformBuffer(u32 size) +{ + Assert(size <= MAX_UNIFORM_BUFFER_SIZE); + + const auto res = m_uniform_buffer.Map(m_context.Get(), MAX_UNIFORM_BUFFER_SIZE, MAX_UNIFORM_BUFFER_SIZE); + return res.pointer; +} + +void D3D11Device::UnmapUniformBuffer(u32 size) +{ + m_uniform_buffer.Unmap(m_context.Get(), size); + m_context->VSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray()); + m_context->PSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray()); } void D3D11Device::SetFramebuffer(GPUFramebuffer* fb) { - D3D11Framebuffer* FB = static_cast(fb); - m_context->OMSetRenderTargets(FB->GetNumRTVs(), FB->GetRTVArray(), FB->GetDSV()); + if (m_current_framebuffer == fb) + return; + + m_current_framebuffer = static_cast(fb); + if (!m_current_framebuffer) + { + m_context->OMSetRenderTargets(0, nullptr, nullptr); + return; + } + + m_context->OMSetRenderTargets(m_current_framebuffer->GetNumRTVs(), m_current_framebuffer->GetRTVArray(), + m_current_framebuffer->GetDSV()); } void D3D11Device::UnbindFramebuffer(D3D11Framebuffer* fb) diff --git a/src/core/gpu/d3d11_device.h b/src/core/gpu/d3d11_device.h index 6d52bbdbc..c9f6f95ef 100644 --- a/src/core/gpu/d3d11_device.h +++ b/src/core/gpu/d3d11_device.h @@ -195,6 +195,8 @@ public: void MapIndexBuffer(u32 index_count, DrawIndex** map_ptr, u32* map_space, u32* map_base_index) override; void UnmapIndexBuffer(u32 used_index_count) override; void PushUniformBuffer(const void* data, u32 data_size) override; + void* MapUniformBuffer(u32 size) override; + void UnmapUniformBuffer(u32 size); void SetFramebuffer(GPUFramebuffer* fb) override; void SetPipeline(GPUPipeline* pipeline) override; void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) override; @@ -225,14 +227,14 @@ private: using InputLayoutMap = std::unordered_map, GPUPipeline::InputLayoutHash>; - static constexpr u32 PUSH_UNIFORM_BUFFER_SIZE = 64; + // Currently we don't stream uniforms, instead just re-map the buffer every time and let the driver take care of it. + static constexpr u32 MAX_UNIFORM_BUFFER_SIZE = 64; static constexpr u32 VERTEX_BUFFER_SIZE = 8 * 1024 * 1024; static constexpr u32 INDEX_BUFFER_SIZE = 4 * 1024 * 1024; static constexpr u8 NUM_TIMESTAMP_QUERIES = 3; static AdapterAndModeList GetAdapterAndModeList(IDXGIFactory* dxgi_factory); - void CommitClear(GPUTexture* t); void PreDrawCheck(); bool CheckStagingBufferSize(u32 width, u32 height, DXGI_FORMAT format); @@ -278,7 +280,7 @@ private: D3D11::StreamBuffer m_vertex_buffer; D3D11::StreamBuffer m_index_buffer; - D3D11::StreamBuffer m_push_uniform_buffer; + D3D11::StreamBuffer m_uniform_buffer; D3D11Framebuffer* m_current_framebuffer = nullptr; D3D11Pipeline* m_current_pipeline = nullptr; diff --git a/src/core/gpu/d3d11_texture.cpp b/src/core/gpu/d3d11_texture.cpp index cac81bbf7..9236d6cae 100644 --- a/src/core/gpu/d3d11_texture.cpp +++ b/src/core/gpu/d3d11_texture.cpp @@ -56,6 +56,30 @@ D3D11_TEXTURE2D_DESC D3D11Texture::GetDesc() const return desc; } +void D3D11Texture::CommitClear(ID3D11DeviceContext* context) +{ + if (m_state == GPUTexture::State::Dirty) + return; + + // TODO: 11.1 + if (IsDepthStencil()) + { + if (m_state == GPUTexture::State::Invalidated) + ; // context->DiscardView(GetD3DDSV()); + else + context->ClearDepthStencilView(GetD3DDSV(), D3D11_CLEAR_DEPTH, GetClearDepth(), 0); + } + else if (IsRenderTarget()) + { + if (m_state == GPUTexture::State::Invalidated) + ; // context->DiscardView(GetD3DRTV()); + else + context->ClearRenderTargetView(GetD3DRTV(), GetUNormClearColor().data()); + } + + m_state = GPUTexture::State::Dirty; +} + bool D3D11Texture::IsValid() const { return static_cast(m_texture); @@ -80,7 +104,10 @@ bool D3D11Texture::Update(u32 x, u32 y, u32 width, u32 height, const void* data, static_cast(y + height), 1); const u32 srnum = D3D11CalcSubresource(level, layer, m_levels); - D3D11Device::GetD3DContext()->UpdateSubresource(m_texture.Get(), srnum, &box, data, pitch, 0); + ID3D11DeviceContext* context = D3D11Device::GetD3DContext(); + CommitClear(context); + context->UpdateSubresource(m_texture.Get(), srnum, &box, data, pitch, 0); + m_state = GPUTexture::State::Dirty; return true; } @@ -92,9 +119,12 @@ bool D3D11Texture::Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u32 const bool discard = (width == m_width && height == m_height); const u32 srnum = D3D11CalcSubresource(level, layer, m_levels); + + ID3D11DeviceContext* context = D3D11Device::GetD3DContext(); + CommitClear(context); + D3D11_MAPPED_SUBRESOURCE sr; - HRESULT hr = D3D11Device::GetD3DContext()->Map(m_texture.Get(), srnum, - discard ? D3D11_MAP_WRITE_DISCARD : D3D11_MAP_READ_WRITE, 0, &sr); + HRESULT hr = context->Map(m_texture.Get(), srnum, discard ? D3D11_MAP_WRITE_DISCARD : D3D11_MAP_READ_WRITE, 0, &sr); if (FAILED(hr)) { Log_ErrorPrintf("Map pixels texture failed: %08X", hr); @@ -104,6 +134,7 @@ bool D3D11Texture::Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u32 *map = static_cast(sr.pData) + (y * sr.RowPitch) + (x * GetPixelSize()); *map_stride = sr.RowPitch; m_mapped_subresource = srnum; + m_state = GPUTexture::State::Dirty; return true; } @@ -284,6 +315,7 @@ bool D3D11Texture::Adopt(ID3D11Device* device, ComPtr texture) m_levels = static_cast(desc.MipLevels); m_samples = static_cast(desc.SampleDesc.Count); m_dynamic = (desc.Usage == D3D11_USAGE_DYNAMIC); + m_state = GPUTexture::State::Dirty; return true; } diff --git a/src/core/gpu/d3d11_texture.h b/src/core/gpu/d3d11_texture.h index 321f7bfd3..5667d624c 100644 --- a/src/core/gpu/d3d11_texture.h +++ b/src/core/gpu/d3d11_texture.h @@ -61,6 +61,7 @@ public: void Destroy(); D3D11_TEXTURE2D_DESC GetDesc() const; + void CommitClear(ID3D11DeviceContext* context); bool IsValid() const override; diff --git a/src/core/gpu/gpu_device.cpp b/src/core/gpu/gpu_device.cpp index c9017a3bc..49325aa7a 100644 --- a/src/core/gpu/gpu_device.cpp +++ b/src/core/gpu/gpu_device.cpp @@ -388,6 +388,26 @@ void GPUDevice::PushUniformBuffer(const void* data, u32 data_size) UnreachableCode(); } +void* GPUDevice::MapUniformBuffer(u32 size) +{ + // TODO: REMOVE ME + UnreachableCode(); + return nullptr; +} + +void GPUDevice::UnmapUniformBuffer(u32 size) +{ + // TODO: REMOVE ME + UnreachableCode(); +} + +void GPUDevice::UploadUniformBuffer(const void* data, u32 data_size) +{ + void* map = MapUniformBuffer(data_size); + std::memcpy(map, data, data_size); + UnmapUniformBuffer(data_size); +} + void GPUDevice::SetFramebuffer(GPUFramebuffer* fb) { // TODO: REMOVE ME diff --git a/src/core/gpu/gpu_device.h b/src/core/gpu/gpu_device.h index 47e01c3ee..cd6c4a301 100644 --- a/src/core/gpu/gpu_device.h +++ b/src/core/gpu/gpu_device.h @@ -457,6 +457,9 @@ public: /// Uniform buffer abstraction. virtual void PushUniformBuffer(const void* data, u32 data_size); + virtual void* MapUniformBuffer(u32 size); + virtual void UnmapUniformBuffer(u32 size); + void UploadUniformBuffer(const void* data, u32 data_size); /// Drawing setup abstraction. virtual void SetFramebuffer(GPUFramebuffer* fb); diff --git a/src/core/gpu/gpu_texture.cpp b/src/core/gpu/gpu_texture.cpp index 6e7a59f15..180747797 100644 --- a/src/core/gpu/gpu_texture.cpp +++ b/src/core/gpu/gpu_texture.cpp @@ -23,6 +23,7 @@ void GPUTexture::ClearBaseProperties() m_levels = 0; m_samples = 0; m_format = GPUTexture::Format::Unknown; + m_state = State::Dirty; } u32 GPUTexture::GPUTexture::GetPixelSize(GPUTexture::Format format) diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index 7387611a3..919e43f58 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -21,6 +21,11 @@ #include Log_SetChannel(GPU_HW); +// TODO: instead of full state restore, only restore what changed + +static constexpr GPUTexture::Format VRAM_RT_FORMAT = GPUTexture::Format::RGBA8; +static constexpr GPUTexture::Format VRAM_DS_FORMAT = GPUTexture::Format::D16; + template ALWAYS_INLINE static constexpr std::tuple MinMax(T v1, T v2) { @@ -48,6 +53,8 @@ GPU_HW::GPU_HW() : GPU() GPU_HW::~GPU_HW() { + g_host_display->ClearDisplayTexture(); + if (m_sw_renderer) { m_sw_renderer->Shutdown(); @@ -110,6 +117,19 @@ bool GPU_HW::Initialize() UpdateSoftwareRenderer(false); PrintSettingsToLog(); + + if (!CompilePipelines()) + { + Log_ErrorPrintf("Failed to compile pipelines"); + return false; + } + + if (!CreateFramebuffer()) + { + Log_ErrorPrintf("Failed to create framebuffer"); + return false; + } + return true; } @@ -128,7 +148,8 @@ void GPU_HW::Reset(bool clear_vram) m_batch_ubo_dirty = true; m_current_depth = 1; - SetFullVRAMDirtyRectangle(); + if (clear_vram) + ClearFramebuffer(); } bool GPU_HW::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_display) @@ -183,6 +204,15 @@ bool GPU_HW::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_di return true; } +void GPU_HW::RestoreGraphicsAPIState() +{ + g_host_display->SetTextureSampler(0, m_vram_read_texture.get(), g_host_display->GetPointSampler()); + g_host_display->SetFramebuffer(m_vram_framebuffer.get()); + g_host_display->SetViewport(0, 0, m_vram_texture->GetWidth(), m_vram_texture->GetHeight()); + SetScissor(); + m_batch_ubo_dirty = true; +} + void GPU_HW::UpdateSettings() { // TODO: Merge UpdateHWSettings() into here. @@ -193,7 +223,6 @@ void GPU_HW::UpdateSettings() { RestoreGraphicsAPIState(); ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); - ResetGraphicsAPIState(); g_host_display->ClearDisplayTexture(); CreateFramebuffer(); } @@ -211,7 +240,6 @@ void GPU_HW::UpdateSettings() UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_ptr, false, false); UpdateDepthBufferFromMaskBit(); UpdateDisplay(); - ResetGraphicsAPIState(); } } @@ -382,21 +410,19 @@ bool GPU_HW::CreateFramebuffer() const u32 texture_width = VRAM_WIDTH * m_resolution_scale; const u32 texture_height = VRAM_HEIGHT * m_resolution_scale; const u8 samples = static_cast(m_multisamples); - const GPUTexture::Format texture_format = GPUTexture::Format::RGBA8; - const GPUTexture::Format depth_format = GPUTexture::Format::D16; if (!(m_vram_texture = g_host_display->CreateTexture(texture_width, texture_height, 1, 1, samples, - GPUTexture::Type::RenderTarget, texture_format)) || + GPUTexture::Type::RenderTarget, VRAM_RT_FORMAT)) || !(m_vram_depth_texture = g_host_display->CreateTexture(texture_width, texture_height, 1, 1, samples, - GPUTexture::Type::DepthStencil, depth_format)) || + GPUTexture::Type::DepthStencil, VRAM_DS_FORMAT)) || !(m_vram_read_texture = g_host_display->CreateTexture(texture_width, texture_height, 1, 1, 1, - GPUTexture::Type::Texture, texture_format)) || + GPUTexture::Type::Texture, VRAM_RT_FORMAT)) || !(m_display_texture = g_host_display->CreateTexture( ((m_downsample_mode == GPUDownsampleMode::Adaptive) ? VRAM_WIDTH : GPU_MAX_DISPLAY_WIDTH) * m_resolution_scale, - GPU_MAX_DISPLAY_HEIGHT * m_resolution_scale, 1, 1, 1, GPUTexture::Type::RenderTarget, texture_format)) || + GPU_MAX_DISPLAY_HEIGHT * m_resolution_scale, 1, 1, 1, GPUTexture::Type::RenderTarget, VRAM_RT_FORMAT)) || !(m_vram_readback_texture = g_host_display->CreateTexture(VRAM_WIDTH / 2, VRAM_HEIGHT, 1, 1, 1, - GPUTexture::Type::RenderTarget, texture_format))) + GPUTexture::Type::RenderTarget, VRAM_RT_FORMAT))) { return false; } @@ -419,9 +445,64 @@ bool GPU_HW::CreateFramebuffer() GL_OBJECT_NAME(m_display_framebuffer, "Display Framebuffer"); Log_InfoPrintf("Created HW framebuffer of %ux%u", texture_width, texture_height); + +#if 0 + if (m_downsample_mode == GPUDownsampleMode::Adaptive) + { + const u32 levels = GetAdaptiveDownsamplingMipLevels(); + + if (!m_downsample_texture.Create(m_device.Get(), texture_width, texture_height, 1, static_cast(levels), 1, + GPUTexture::Type::RenderTarget, texture_format) || + !m_downsample_weight_texture.Create(m_device.Get(), texture_width >> (levels - 1), + texture_height >> (levels - 1), 1, 1, 1, GPUTexture::Type::RenderTarget, + GPUTexture::Format::R8)) + { + return false; + } + + m_downsample_mip_views.resize(levels); + for (u32 i = 0; i < levels; i++) + { + const CD3D11_SHADER_RESOURCE_VIEW_DESC srv_desc(m_downsample_texture, D3D11_SRV_DIMENSION_TEXTURE2D, + m_downsample_texture.GetDXGIFormat(), i, 1); + const CD3D11_RENDER_TARGET_VIEW_DESC rtv_desc(m_downsample_texture, D3D11_RTV_DIMENSION_TEXTURE2D, + m_downsample_texture.GetDXGIFormat(), i, 1); + + HRESULT hr = m_device->CreateShaderResourceView(m_downsample_texture, &srv_desc, + m_downsample_mip_views[i].first.GetAddressOf()); + if (FAILED(hr)) + return false; + + hr = m_device->CreateRenderTargetView(m_downsample_texture, &rtv_desc, + m_downsample_mip_views[i].second.GetAddressOf()); + if (FAILED(hr)) + return false; + } + } + else if (m_downsample_mode == GPUDownsampleMode::Box) + { + if (!m_downsample_texture.Create(m_device.Get(), VRAM_WIDTH, VRAM_HEIGHT, 1, 1, 1, GPUTexture::Type::RenderTarget, + texture_format)) + { + return false; + } + } +#endif + + g_host_display->SetFramebuffer(m_vram_framebuffer.get()); + SetFullVRAMDirtyRectangle(); return true; } +void GPU_HW::ClearFramebuffer() +{ + g_host_display->ClearRenderTarget(m_vram_texture.get(), 0); + g_host_display->ClearDepth(m_vram_depth_texture.get(), m_pgxp_depth_buffer ? 1.0f : 0.0f); + g_host_display->ClearRenderTarget(m_display_texture.get(), 0); + ClearVRAMDirtyRectangle(); + m_last_depth_z = 1.0f; +} + void GPU_HW::DestroyFramebuffer() { m_display_framebuffer.reset(); @@ -514,8 +595,8 @@ bool GPU_HW::CompilePipelines() plconfig.input_layout.vertex_stride = sizeof(BatchVertex); plconfig.rasterization = GPUPipeline::RasterizationState::GetNoCullState(); plconfig.primitive = GPUPipeline::Primitive::Triangles; - plconfig.color_format = GPUTexture::Format::RGBA8; - plconfig.depth_format = GPUTexture::Format::D16; + plconfig.color_format = VRAM_RT_FORMAT; + plconfig.depth_format = VRAM_DS_FORMAT; plconfig.samples = m_multisamples; plconfig.per_sample_shading = m_per_sample_shading; @@ -661,6 +742,8 @@ bool GPU_HW::CompilePipelines() if (!(m_vram_copy_pipelines[depth_test] = g_host_display->CreatePipeline(plconfig))) return false; + GL_OBJECT_NAME(m_vram_copy_pipelines[depth_test], "VRAM Write Pipeline, depth=%u", depth_test); + progress.Increment(); } } @@ -683,38 +766,35 @@ bool GPU_HW::CompilePipelines() if (!(m_vram_write_pipelines[depth_test] = g_host_display->CreatePipeline(plconfig))) return false; + GL_OBJECT_NAME(m_vram_write_pipelines[depth_test], "VRAM Write Pipeline, depth=%u", depth_test); + progress.Increment(); } } -#if 0 // VRAM update depth - // TODO { - std::unique_ptr fs = g_host_display->CreateShader( - GPUShader::Stage::Pixel, shadergen.GenerateVRAMUpdateDepthFragmentShader()); + std::unique_ptr fs = + g_host_display->CreateShader(GPUShaderStage::Fragment, shadergen.GenerateVRAMUpdateDepthFragmentShader()); if (!fs) return false; - gpbuilder.SetRenderPass(m_vram_update_depth_render_pass, 0); - gpbuilder.SetPipelineLayout(m_single_sampler_pipeline_layout); - gpbuilder.SetFragmentShader(fs); - gpbuilder.SetDepthState(true, true, VK_COMPARE_OP_ALWAYS); - gpbuilder.SetBlendAttachment(0, false, VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD, - VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD, 0); - // COLOR MASK ZERO + plconfig.pixel_shader = fs.get(); + plconfig.color_format = GPUTexture::Format::Unknown; + plconfig.depth_format = VRAM_DS_FORMAT; + plconfig.depth = GPUPipeline::DepthState::GetAlwaysWriteState(); + plconfig.blend.write_mask = 0; - m_vram_update_depth_pipeline = gpbuilder.Create(device, pipeline_cache, false); - vkDestroyShaderModule(device, fs, nullptr); - if (m_vram_update_depth_pipeline == VK_NULL_HANDLE) + if (!(m_vram_update_depth_pipeline = g_host_display->CreatePipeline(plconfig))) return false; - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_vram_update_depth_pipeline, - "VRAM Update Depth Pipeline"); + + GL_OBJECT_NAME(m_vram_update_depth_pipeline, "VRAM Update Depth Pipeline"); progress.Increment(); } -#endif + plconfig.color_format = VRAM_RT_FORMAT; + plconfig.depth_format = GPUTexture::Format::Unknown; plconfig.depth = GPUPipeline::DepthState::GetNoTestsState(); plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState(); @@ -757,6 +837,17 @@ bool GPU_HW::CompilePipelines() } } + { + std::unique_ptr fs = + g_host_display->CreateShader(GPUShaderStage::Fragment, shadergen.GenerateCopyFragmentShader()); + if (!fs) + return false; + + plconfig.pixel_shader = fs.get(); + if (!(m_copy_pipeline = g_host_display->CreatePipeline(plconfig))) + return false; + } + #if 0 if (m_downsample_mode == GPUDownsampleMode::Adaptive) { @@ -905,6 +996,42 @@ void GPU_HW::UpdateVRAMReadTexture() ClearVRAMDirtyRectangle(); } +void GPU_HW::UpdateDepthBufferFromMaskBit() +{ + if (m_pgxp_depth_buffer) + return; + + // Viewport should already be set full, only need to fudge the scissor. + g_host_display->SetScissor(0, 0, m_vram_texture->GetWidth(), m_vram_texture->GetHeight()); + g_host_display->SetFramebuffer(m_vram_update_depth_framebuffer.get()); + g_host_display->SetPipeline(m_vram_update_depth_pipeline.get()); + g_host_display->SetTextureSampler(0, m_vram_texture.get(), g_host_display->GetPointSampler()); + g_host_display->Draw(3, 0); + + // Restore. + g_host_display->SetTextureSampler(0, m_vram_read_texture.get(), g_host_display->GetPointSampler()); + g_host_display->SetFramebuffer(m_vram_framebuffer.get()); + SetScissor(); +} + +void GPU_HW::ClearDepthBuffer() +{ + DebugAssert(m_pgxp_depth_buffer); + + g_host_display->ClearDepth(m_vram_depth_texture.get(), 1.0f); + m_last_depth_z = 1.0f; +} + +void GPU_HW::SetScissor() +{ + const s32 left = m_drawing_area.left * m_resolution_scale; + const s32 right = std::max((m_drawing_area.right + 1) * m_resolution_scale, left + 1); + const s32 top = m_drawing_area.top * m_resolution_scale; + const s32 bottom = std::max((m_drawing_area.bottom + 1) * m_resolution_scale, top + 1); + + g_host_display->SetScissor(left, top, right - left, bottom - top); +} + void GPU_HW::MapBatchVertexPointer(u32 required_vertices) { DebugAssert(!m_batch_start_vertex_ptr); @@ -940,111 +1067,8 @@ void GPU_HW::DrawBatchVertices(BatchRenderMode render_mode, u32 base_vertex, u32 void GPU_HW::ClearDisplay() { - Panic("Not implemented"); -} - -void GPU_HW::UpdateDisplay() -{ - FlushRender(); - - if (g_settings.debugging.show_vram) - { - if (IsUsingMultisampling()) - { - UpdateVRAMReadTexture(); - g_host_display->SetDisplayTexture(m_vram_read_texture.get(), 0, 0, m_vram_read_texture->GetWidth(), - m_vram_read_texture->GetHeight()); - } - else - { - g_host_display->SetDisplayTexture(m_vram_texture.get(), 0, 0, m_vram_texture->GetWidth(), - m_vram_texture->GetHeight()); - } - - g_host_display->SetDisplayParameters(VRAM_WIDTH, VRAM_HEIGHT, 0, 0, VRAM_WIDTH, VRAM_HEIGHT, - static_cast(VRAM_WIDTH) / static_cast(VRAM_HEIGHT)); - } - else - { - g_host_display->SetDisplayParameters(m_crtc_state.display_width, m_crtc_state.display_height, - m_crtc_state.display_origin_left, m_crtc_state.display_origin_top, - m_crtc_state.display_vram_width, m_crtc_state.display_vram_height, - GetDisplayAspectRatio()); - - const u32 resolution_scale = m_GPUSTAT.display_area_color_depth_24 ? 1 : m_resolution_scale; - const u32 vram_offset_x = m_crtc_state.display_vram_left; - const u32 vram_offset_y = m_crtc_state.display_vram_top; - const u32 scaled_vram_offset_x = vram_offset_x * resolution_scale; - const u32 scaled_vram_offset_y = vram_offset_y * resolution_scale; - const u32 display_width = m_crtc_state.display_vram_width; - const u32 display_height = m_crtc_state.display_vram_height; - const u32 scaled_display_width = display_width * resolution_scale; - const u32 scaled_display_height = display_height * resolution_scale; - const InterlacedRenderMode interlaced = GetInterlacedRenderMode(); - - if (IsDisplayDisabled()) - { - g_host_display->ClearDisplayTexture(); - } - else if (!m_GPUSTAT.display_area_color_depth_24 && interlaced == InterlacedRenderMode::None && - !IsUsingMultisampling() && (scaled_vram_offset_x + scaled_display_width) <= m_vram_texture->GetWidth() && - (scaled_vram_offset_y + scaled_display_height) <= m_vram_texture->GetHeight()) - { - - if (IsUsingDownsampling()) - { -#if 0 - DownsampleFramebuffer(GetVRAMTexture(), scaled_vram_offset_x, scaled_vram_offset_y, scaled_display_width, - scaled_display_height); -#else - Panic("Fixme"); -#endif - } - else - { - g_host_display->SetDisplayTexture(m_vram_texture.get(), scaled_vram_offset_x, scaled_vram_offset_y, - scaled_display_width, scaled_display_height); - } - } - else - { - // TODO: discard vs load for interlaced - if (interlaced == InterlacedRenderMode::None) - g_host_display->InvalidateRenderTarget(m_display_texture.get()); - - g_host_display->SetPipeline( - m_display_pipelines[BoolToUInt8(m_GPUSTAT.display_area_color_depth_24)][static_cast(interlaced)].get()); - g_host_display->SetFramebuffer(m_display_framebuffer.get()); - - const u32 reinterpret_field_offset = (interlaced != InterlacedRenderMode::None) ? GetInterlacedDisplayField() : 0; - const u32 reinterpret_start_x = m_crtc_state.regs.X * resolution_scale; - const u32 reinterpret_crop_left = (m_crtc_state.display_vram_left - m_crtc_state.regs.X) * resolution_scale; - const u32 uniforms[4] = {reinterpret_start_x, scaled_vram_offset_y + reinterpret_field_offset, - reinterpret_crop_left, reinterpret_field_offset}; - g_host_display->PushUniformBuffer(uniforms, sizeof(uniforms)); - - Assert(scaled_display_width <= m_display_texture->GetWidth() && - scaled_display_height <= m_display_texture->GetHeight()); - - g_host_display->SetViewportAndScissor(0, 0, scaled_display_width, scaled_display_height); - g_host_display->Draw(3, 0); - - if (IsUsingDownsampling()) - { -#if 0 - DownsampleFramebuffer(GetDisplayTexture(), 0, 0, scaled_display_width, scaled_display_height); -#else - Panic("Fixme"); -#endif - } - else - { - g_host_display->SetDisplayTexture(m_display_texture.get(), 0, 0, scaled_display_width, scaled_display_height); - } - - RestoreGraphicsAPIState(); - } - } + g_host_display->ClearDisplayTexture(); + g_host_display->ClearRenderTarget(m_display_texture.get(), 0xFF000000u); } void GPU_HW::HandleFlippedQuadTextureCoordinates(BatchVertex* vertices) @@ -1720,14 +1744,6 @@ void GPU_HW::LoadVertices() } } -void GPU_HW::CalcScissorRect(int* left, int* top, int* right, int* bottom) -{ - *left = m_drawing_area.left * m_resolution_scale; - *right = std::max((m_drawing_area.right + 1) * m_resolution_scale, *left + 1); - *top = m_drawing_area.top * m_resolution_scale; - *bottom = std::max((m_drawing_area.bottom + 1) * m_resolution_scale, *top + 1); -} - GPU_HW::VRAMFillUBOData GPU_HW::GetVRAMFillUBOData(u32 x, u32 y, u32 width, u32 height, u32 color) const { // drop precision unless true colour is enabled @@ -1798,6 +1814,40 @@ GPU_HW::VRAMCopyUBOData GPU_HW::GetVRAMCopyUBOData(u32 src_x, u32 src_y, u32 dst return uniforms; } +bool GPU_HW::BlitVRAMReplacementTexture(const TextureReplacementTexture* tex, u32 dst_x, u32 dst_y, u32 width, + u32 height) +{ + if (!m_vram_replacement_texture || m_vram_replacement_texture->GetWidth() < tex->GetWidth() || + m_vram_replacement_texture->GetHeight() < tex->GetHeight()) + { + m_vram_replacement_texture.reset(); + + if (!(m_vram_replacement_texture = + g_host_display->CreateTexture(tex->GetWidth(), tex->GetHeight(), 1, 1, 1, GPUTexture::Type::Texture, + GPUTexture::Format::RGBA8, tex->GetPixels(), tex->GetPitch(), true))) + { + return false; + } + } + else + { + if (!m_vram_replacement_texture->Update(0, 0, width, height, tex->GetPixels(), tex->GetPitch())) + { + Log_ErrorPrintf("Update %ux%u texture failed.", width, height); + return false; + } + } + + g_host_display->SetFramebuffer(m_vram_framebuffer.get()); // TODO: needed? + g_host_display->SetTextureSampler(0, m_vram_replacement_texture.get(), g_host_display->GetLinearSampler()); + g_host_display->SetPipeline(m_copy_pipeline.get()); + g_host_display->SetViewportAndScissor(dst_x, dst_y, width, height); + g_host_display->Draw(3, 0); + + RestoreGraphicsAPIState(); + return true; +} + void GPU_HW::IncludeVRAMDirtyRectangle(const Common::Rectangle& rect) { m_vram_dirty_rect.Include(rect); @@ -1976,8 +2026,56 @@ void GPU_HW::CopySoftwareRendererVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y void GPU_HW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) { + if (IsUsingSoftwareRendererForReadbacks()) + FillSoftwareRendererVRAM(x, y, width, height, color); + IncludeVRAMDirtyRectangle( Common::Rectangle::FromExtents(x, y, width, height).Clamped(0, 0, VRAM_WIDTH, VRAM_HEIGHT)); + + g_host_display->SetPipeline(m_vram_fill_pipelines[BoolToUInt8(IsVRAMFillOversized(x, y, width, height))] + [BoolToUInt8(IsInterlacedRenderingEnabled())] + .get()); + + const Common::Rectangle bounds(GetVRAMTransferBounds(x, y, width, height)); + g_host_display->SetViewportAndScissor(bounds.left * m_resolution_scale, bounds.top * m_resolution_scale, + bounds.GetWidth() * m_resolution_scale, + bounds.GetHeight() * m_resolution_scale); + + const VRAMFillUBOData uniforms = GetVRAMFillUBOData(x, y, width, height, color); + g_host_display->PushUniformBuffer(&uniforms, sizeof(uniforms)); + g_host_display->Draw(3, 0); + + RestoreGraphicsAPIState(); +} + +void GPU_HW::ReadVRAM(u32 x, u32 y, u32 width, u32 height) +{ + if (IsUsingSoftwareRendererForReadbacks()) + { + ReadSoftwareRendererVRAM(x, y, width, height); + return; + } + + // Get bounds with wrap-around handled. + const Common::Rectangle copy_rect = GetVRAMTransferBounds(x, y, width, height); + const u32 encoded_width = (copy_rect.GetWidth() + 1) / 2; + const u32 encoded_height = copy_rect.GetHeight(); + + // Encode the 24-bit texture as 16-bit. + const u32 uniforms[4] = {copy_rect.left, copy_rect.top, copy_rect.GetWidth(), copy_rect.GetHeight()}; + g_host_display->SetPipeline(m_vram_readback_pipeline.get()); + g_host_display->SetFramebuffer(m_vram_readback_framebuffer.get()); + g_host_display->SetTextureSampler(0, m_vram_texture.get(), g_host_display->GetPointSampler()); + g_host_display->SetViewportAndScissor(0, 0, encoded_width, encoded_height); + g_host_display->PushUniformBuffer(uniforms, sizeof(uniforms)); + g_host_display->Draw(3, 0); + + // Stage the readback and copy it into our shadow buffer. + g_host_display->DownloadTexture(m_vram_readback_texture.get(), 0, 0, encoded_width, encoded_height, + reinterpret_cast(&m_vram_shadow[copy_rect.top * VRAM_WIDTH + copy_rect.left]), + VRAM_WIDTH * sizeof(u16)); + + RestoreGraphicsAPIState(); } void GPU_HW::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) @@ -1994,6 +2092,44 @@ void GPU_HW::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, b void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) { + if (IsUsingSoftwareRendererForReadbacks()) + CopySoftwareRendererVRAM(src_x, src_y, dst_x, dst_y, width, height); + + if (UseVRAMCopyShader(src_x, src_y, dst_x, dst_y, width, height) || IsUsingMultisampling()) + { + const Common::Rectangle src_bounds = GetVRAMTransferBounds(src_x, src_y, width, height); + const Common::Rectangle dst_bounds = GetVRAMTransferBounds(dst_x, dst_y, width, height); + if (m_vram_dirty_rect.Intersects(src_bounds)) + UpdateVRAMReadTexture(); + IncludeVRAMDirtyRectangle(dst_bounds); + + const VRAMCopyUBOData uniforms = GetVRAMCopyUBOData(src_x, src_y, dst_x, dst_y, width, height); + + // VRAM read texture should already be bound. + const Common::Rectangle dst_bounds_scaled(dst_bounds * m_resolution_scale); + g_host_display->SetViewportAndScissor(dst_bounds_scaled.left, dst_bounds_scaled.top, dst_bounds_scaled.GetWidth(), + dst_bounds_scaled.GetHeight()); + g_host_display->SetPipeline( + m_vram_copy_pipelines[BoolToUInt8(m_GPUSTAT.check_mask_before_draw && !m_pgxp_depth_buffer)].get()); + g_host_display->PushUniformBuffer(&uniforms, sizeof(uniforms)); + g_host_display->Draw(3, 0); + RestoreGraphicsAPIState(); + + if (m_GPUSTAT.check_mask_before_draw && !m_pgxp_depth_buffer) + m_current_depth++; + + return; + } + + // We can't CopySubresourceRegion to the same resource. So use the shadow texture if we can, but that may need to be + // updated first. Copying to the same resource seemed to work on Windows 10, but breaks on Windows 7. But, it's + // against the API spec, so better to be safe than sorry. + + // TODO: make this an optional feature, DX12 can do it + + if (m_vram_dirty_rect.Intersects(Common::Rectangle::FromExtents(src_x, src_y, width, height))) + UpdateVRAMReadTexture(); + IncludeVRAMDirtyRectangle( Common::Rectangle::FromExtents(dst_x, dst_y, width, height).Clamped(0, 0, VRAM_WIDTH, VRAM_HEIGHT)); @@ -2002,6 +2138,10 @@ void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 // set new vertex counter since we want this to take into consideration previous masked pixels m_current_depth++; } + + g_host_display->CopyTextureRegion(m_vram_texture.get(), dst_x * m_resolution_scale, dst_y * m_resolution_scale, 0, 0, + m_vram_read_texture.get(), src_x * m_resolution_scale, src_y * m_resolution_scale, + 0, 0, width * m_resolution_scale, height * m_resolution_scale); } void GPU_HW::DispatchRenderCommand() @@ -2102,7 +2242,7 @@ void GPU_HW::DispatchRenderCommand() if (m_drawing_area_changed) { m_drawing_area_changed = false; - SetScissorFromDrawingArea(); + SetScissor(); if (m_pgxp_depth_buffer && m_last_depth_z < 1.0f) ClearDepthBuffer(); @@ -2131,7 +2271,8 @@ void GPU_HW::FlushRender() if (m_batch_ubo_dirty) { - UploadUniformBuffer(&m_batch_ubo_data, sizeof(m_batch_ubo_data)); + g_host_display->UploadUniformBuffer(&m_batch_ubo_data, sizeof(m_batch_ubo_data)); + m_renderer_stats.num_uniform_buffer_updates++; m_batch_ubo_dirty = false; } @@ -2148,6 +2289,206 @@ void GPU_HW::FlushRender() } } +void GPU_HW::UpdateDisplay() +{ + FlushRender(); + + if (g_settings.debugging.show_vram) + { + if (IsUsingMultisampling()) + { + UpdateVRAMReadTexture(); + g_host_display->SetDisplayTexture(m_vram_read_texture.get(), 0, 0, m_vram_read_texture->GetWidth(), + m_vram_read_texture->GetHeight()); + } + else + { + g_host_display->SetDisplayTexture(m_vram_texture.get(), 0, 0, m_vram_texture->GetWidth(), + m_vram_texture->GetHeight()); + } + + g_host_display->SetDisplayParameters(VRAM_WIDTH, VRAM_HEIGHT, 0, 0, VRAM_WIDTH, VRAM_HEIGHT, + static_cast(VRAM_WIDTH) / static_cast(VRAM_HEIGHT)); + } + else + { + g_host_display->SetDisplayParameters(m_crtc_state.display_width, m_crtc_state.display_height, + m_crtc_state.display_origin_left, m_crtc_state.display_origin_top, + m_crtc_state.display_vram_width, m_crtc_state.display_vram_height, + GetDisplayAspectRatio()); + + const u32 resolution_scale = m_GPUSTAT.display_area_color_depth_24 ? 1 : m_resolution_scale; + const u32 vram_offset_x = m_crtc_state.display_vram_left; + const u32 vram_offset_y = m_crtc_state.display_vram_top; + const u32 scaled_vram_offset_x = vram_offset_x * resolution_scale; + const u32 scaled_vram_offset_y = vram_offset_y * resolution_scale; + const u32 display_width = m_crtc_state.display_vram_width; + const u32 display_height = m_crtc_state.display_vram_height; + const u32 scaled_display_width = display_width * resolution_scale; + const u32 scaled_display_height = display_height * resolution_scale; + const InterlacedRenderMode interlaced = GetInterlacedRenderMode(); + + if (IsDisplayDisabled()) + { + g_host_display->ClearDisplayTexture(); + } + else if (!m_GPUSTAT.display_area_color_depth_24 && interlaced == InterlacedRenderMode::None && + !IsUsingMultisampling() && (scaled_vram_offset_x + scaled_display_width) <= m_vram_texture->GetWidth() && + (scaled_vram_offset_y + scaled_display_height) <= m_vram_texture->GetHeight()) + { + + if (IsUsingDownsampling()) + { + DownsampleFramebuffer(m_vram_texture.get(), scaled_vram_offset_x, scaled_vram_offset_y, scaled_display_width, + scaled_display_height); + } + else + { + g_host_display->SetDisplayTexture(m_vram_texture.get(), scaled_vram_offset_x, scaled_vram_offset_y, + scaled_display_width, scaled_display_height); + } + } + else + { + // TODO: discard vs load for interlaced + if (interlaced == InterlacedRenderMode::None) + g_host_display->InvalidateRenderTarget(m_display_texture.get()); + + g_host_display->SetPipeline( + m_display_pipelines[BoolToUInt8(m_GPUSTAT.display_area_color_depth_24)][static_cast(interlaced)].get()); + g_host_display->SetFramebuffer(m_display_framebuffer.get()); + + const u32 reinterpret_field_offset = (interlaced != InterlacedRenderMode::None) ? GetInterlacedDisplayField() : 0; + const u32 reinterpret_start_x = m_crtc_state.regs.X * resolution_scale; + const u32 reinterpret_crop_left = (m_crtc_state.display_vram_left - m_crtc_state.regs.X) * resolution_scale; + const u32 uniforms[4] = {reinterpret_start_x, scaled_vram_offset_y + reinterpret_field_offset, + reinterpret_crop_left, reinterpret_field_offset}; + g_host_display->PushUniformBuffer(uniforms, sizeof(uniforms)); + + Assert(scaled_display_width <= m_display_texture->GetWidth() && + scaled_display_height <= m_display_texture->GetHeight()); + + g_host_display->SetViewportAndScissor(0, 0, scaled_display_width, scaled_display_height); + g_host_display->Draw(3, 0); + + if (IsUsingDownsampling()) + DownsampleFramebuffer(m_display_texture.get(), 0, 0, scaled_display_width, scaled_display_height); + else + g_host_display->SetDisplayTexture(m_display_texture.get(), 0, 0, scaled_display_width, scaled_display_height); + + RestoreGraphicsAPIState(); + } + } +} + +void GPU_HW::DownsampleFramebuffer(const GPUTexture* source, u32 left, u32 top, u32 width, u32 height) +{ + if (m_downsample_mode == GPUDownsampleMode::Adaptive) + DownsampleFramebufferAdaptive(source, left, top, width, height); + else + DownsampleFramebufferBoxFilter(source, left, top, width, height); +} + +void GPU_HW::DownsampleFramebufferAdaptive(const GPUTexture* source, u32 left, u32 top, u32 width, u32 height) +{ +#if 0 + CD3D11_BOX src_box(left, top, 0, left + width, top + height, 1); + m_context->OMSetDepthStencilState(m_depth_disabled_state.Get(), 0); + m_context->OMSetBlendState(m_blend_disabled_state.Get(), nullptr, 0xFFFFFFFFu); + m_context->CopySubresourceRegion(m_downsample_texture, 0, left, top, 0, source->GetD3DTexture(), 0, &src_box); + m_context->PSSetSamplers(0, 1, m_point_sampler_state.GetAddressOf()); + m_context->VSSetShader(m_uv_quad_vertex_shader.Get(), nullptr, 0); + + // create mip chain + const u32 levels = m_downsample_texture.GetLevels(); + for (u32 level = 1; level < levels; level++) + { + static constexpr float clear_color[4] = {}; + + SetViewportAndScissor(left >> level, top >> level, width >> level, height >> level); + m_context->ClearRenderTargetView(m_downsample_mip_views[level].second.Get(), clear_color); + m_context->OMSetRenderTargets(1, m_downsample_mip_views[level].second.GetAddressOf(), nullptr); + m_context->PSSetShaderResources(0, 1, m_downsample_mip_views[level - 1].first.GetAddressOf()); + + const SmoothingUBOData ubo = GetSmoothingUBO(level, left, top, width, height, m_downsample_texture.GetWidth(), + m_downsample_texture.GetHeight()); + m_context->PSSetShader( + (level == 1) ? m_downsample_first_pass_pixel_shader.Get() : m_downsample_mid_pass_pixel_shader.Get(), nullptr, 0); + UploadUniformBuffer(&ubo, sizeof(ubo)); + m_context->Draw(3, 0); + } + + // blur pass at lowest level + { + const u32 last_level = levels - 1; + static constexpr float clear_color[4] = {}; + + SetViewportAndScissor(left >> last_level, top >> last_level, width >> last_level, height >> last_level); + m_context->ClearRenderTargetView(m_downsample_weight_texture.GetD3DRTV(), clear_color); + m_context->OMSetRenderTargets(1, m_downsample_weight_texture.GetD3DRTVArray(), nullptr); + m_context->PSSetShaderResources(0, 1, m_downsample_mip_views.back().first.GetAddressOf()); + m_context->PSSetShader(m_downsample_blur_pass_pixel_shader.Get(), nullptr, 0); + + const SmoothingUBOData ubo = GetSmoothingUBO(last_level, left, top, width, height, m_downsample_texture.GetWidth(), + m_downsample_texture.GetHeight()); + m_context->PSSetShader(m_downsample_blur_pass_pixel_shader.Get(), nullptr, 0); + UploadUniformBuffer(&ubo, sizeof(ubo)); + m_context->Draw(3, 0); + } + + // composite downsampled and upsampled images together + { + SetViewportAndScissor(left, top, width, height); + m_context->OMSetRenderTargets(1, GetDisplayTexture()->GetD3DRTVArray(), nullptr); + + ID3D11ShaderResourceView* const srvs[2] = {m_downsample_texture.GetD3DSRV(), + m_downsample_weight_texture.GetD3DSRV()}; + ID3D11SamplerState* const samplers[2] = {m_trilinear_sampler_state.Get(), m_linear_sampler_state.Get()}; + m_context->PSSetShaderResources(0, countof(srvs), srvs); + m_context->PSSetSamplers(0, countof(samplers), samplers); + m_context->PSSetShader(m_downsample_composite_pixel_shader.Get(), nullptr, 0); + m_context->Draw(3, 0); + } + + ID3D11ShaderResourceView* const null_srvs[2] = {}; + m_context->PSSetShaderResources(0, countof(null_srvs), null_srvs); + m_batch_ubo_dirty = true; + + RestoreGraphicsAPIState(); + + g_host_display->SetDisplayTexture(m_display_texture.get(), left, top, width, height); +#else + Panic("Not implemented"); +#endif +} + +void GPU_HW::DownsampleFramebufferBoxFilter(const GPUTexture* source, u32 left, u32 top, u32 width, u32 height) +{ +#if 0 + const u32 ds_left = left / m_resolution_scale; + const u32 ds_top = top / m_resolution_scale; + const u32 ds_width = width / m_resolution_scale; + const u32 ds_height = height / m_resolution_scale; + static constexpr float clear_color[4] = {}; + + m_context->ClearRenderTargetView(m_downsample_texture.GetD3DRTV(), clear_color); + m_context->OMSetDepthStencilState(m_depth_disabled_state.Get(), 0); + m_context->OMSetRenderTargets(1, m_downsample_texture.GetD3DRTVArray(), nullptr); + m_context->OMSetBlendState(m_blend_disabled_state.Get(), nullptr, 0xFFFFFFFFu); + m_context->VSSetShader(m_screen_quad_vertex_shader.Get(), nullptr, 0); + m_context->PSSetShader(m_downsample_first_pass_pixel_shader.Get(), nullptr, 0); + m_context->PSSetShaderResources(0, 1, source->GetD3DSRVArray()); + SetViewportAndScissor(ds_left, ds_top, ds_width, ds_height); + m_context->Draw(3, 0); + + RestoreGraphicsAPIState(); + + g_host_display->SetDisplayTexture(&m_downsample_texture, ds_left, ds_top, ds_width, ds_height); +#else + Panic("Not implemented"); +#endif +} + void GPU_HW::DrawRendererStats(bool is_idle_frame) { if (!is_idle_frame) diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h index 436f18584..29eb329a9 100644 --- a/src/core/gpu_hw.h +++ b/src/core/gpu_hw.h @@ -2,10 +2,14 @@ // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #pragma once -#include "common/dimensional_array.h" -#include "common/heap_array.h" + #include "gpu.h" #include "gpu/gpu_device.h" +#include "texture_replacements.h" + +#include "common/dimensional_array.h" +#include "common/heap_array.h" + #include #include #include @@ -35,15 +39,17 @@ public: }; GPU_HW(); - virtual ~GPU_HW(); + ~GPU_HW() override; const Threading::Thread* GetSWThread() const override; - virtual bool Initialize() override; - virtual void Reset(bool clear_vram) override; - virtual bool DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_display) override; + bool Initialize() override; + void Reset(bool clear_vram) override; + bool DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_display) override; - virtual void UpdateSettings() override; + void RestoreGraphicsAPIState() override; + + void UpdateSettings() override; void UpdateResolutionScale() override final; std::tuple GetEffectiveDisplayResolution(bool scaled = true) override final; std::tuple GetFullDisplayResolution(bool scaled = true) override final; @@ -131,6 +137,7 @@ protected: u32 u_set_mask_while_drawing; }; + // TODO: move all these to source struct VRAMFillUBOData { u32 u_dst_x; @@ -202,22 +209,22 @@ protected: void UpdateHWSettings(bool* framebuffer_changed, bool* shaders_changed); - virtual bool CreateFramebuffer(); - virtual void DestroyFramebuffer(); + bool CreateFramebuffer(); + void ClearFramebuffer(); + void DestroyFramebuffer(); bool CompilePipelines(); void DestroyPipelines(); void UpdateVRAMReadTexture(); - virtual void UpdateDepthBufferFromMaskBit() = 0; - virtual void ClearDepthBuffer() = 0; - virtual void SetScissorFromDrawingArea() = 0; - virtual void MapBatchVertexPointer(u32 required_vertices); - virtual void UnmapBatchVertexPointer(u32 used_vertices); - virtual void UploadUniformBuffer(const void* uniforms, u32 uniforms_size) = 0; - virtual void DrawBatchVertices(BatchRenderMode render_mode, u32 base_vertex, u32 num_vertices); - virtual void ClearDisplay(); - virtual void UpdateDisplay(); + void UpdateDepthBufferFromMaskBit(); + void ClearDepthBuffer(); + void SetScissor(); + void MapBatchVertexPointer(u32 required_vertices); + void UnmapBatchVertexPointer(u32 used_vertices); + void DrawBatchVertices(BatchRenderMode render_mode, u32 base_vertex, u32 num_vertices); + void ClearDisplay(); + void UpdateDisplay(); u32 CalculateResolutionScale() const; GPUDownsampleMode GetDownsampleMode(u32 resolution_scale) const; @@ -313,14 +320,13 @@ protected: void CopySoftwareRendererVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height); void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override; + void ReadVRAM(u32 x, u32 y, u32 width, u32 height) override; void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) override; void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override; void DispatchRenderCommand() override; void FlushRender() override; void DrawRendererStats(bool is_idle_frame) override; - void CalcScissorRect(int* left, int* top, int* right, int* bottom); - std::tuple ScaleVRAMCoordinates(s32 x, s32 y) const { return std::make_tuple(x * s32(m_resolution_scale), y * s32(m_resolution_scale)); @@ -337,6 +343,8 @@ protected: bool check_mask) const; VRAMCopyUBOData GetVRAMCopyUBOData(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) const; + bool BlitVRAMReplacementTexture(const TextureReplacementTexture* tex, u32 dst_x, u32 dst_y, u32 width, u32 height); + /// Expands a line into two triangles. void DrawLine(float x0, float y0, u32 col0, float x1, float y1, u32 col1, float depth); @@ -365,11 +373,16 @@ protected: SmoothingUBOData GetSmoothingUBO(u32 level, u32 left, u32 top, u32 width, u32 height, u32 tex_width, u32 tex_height) const; + void DownsampleFramebuffer(const GPUTexture* source, u32 left, u32 top, u32 width, u32 height); + void DownsampleFramebufferAdaptive(const GPUTexture* source, u32 left, u32 top, u32 width, u32 height); + void DownsampleFramebufferBoxFilter(const GPUTexture* source, u32 left, u32 top, u32 width, u32 height); + std::unique_ptr m_vram_texture; std::unique_ptr m_vram_depth_texture; std::unique_ptr m_vram_depth_view; std::unique_ptr m_vram_read_texture; std::unique_ptr m_vram_readback_texture; + std::unique_ptr m_vram_replacement_texture; std::unique_ptr m_display_texture; std::unique_ptr m_vram_framebuffer; @@ -439,6 +452,9 @@ protected: // [depth_24][interlace_mode] DimensionalArray, 3, 2> m_display_pipelines{}; + // TODO: get rid of this, and use image blits instead where supported + std::unique_ptr m_copy_pipeline; + std::unique_ptr m_downsample_first_pass_pipeline; std::unique_ptr m_downsample_mid_pass_pipeline; std::unique_ptr m_downsample_blur_pass_pipeline; diff --git a/src/core/gpu_hw_d3d11.cpp b/src/core/gpu_hw_d3d11.cpp index b7df66ccc..7aebf2204 100644 --- a/src/core/gpu_hw_d3d11.cpp +++ b/src/core/gpu_hw_d3d11.cpp @@ -5,7 +5,7 @@ #include "common/assert.h" #include "common/log.h" #include "common/timer.h" -#include "gpu/d3d11/shader_compiler.h" +#include "gpu/d3d11_device.h" #include "gpu/gpu_device.h" #include "gpu_hw_shadergen.h" #include "gpu_sw_backend.h" @@ -14,15 +14,9 @@ #include "util/state_wrapper.h" Log_SetChannel(GPU_HW_D3D11); -GPU_HW_D3D11::GPU_HW_D3D11(ID3D11Device* device, ID3D11DeviceContext* context) : m_device(device), m_context(context) {} +GPU_HW_D3D11::GPU_HW_D3D11() = default; -GPU_HW_D3D11::~GPU_HW_D3D11() -{ - g_host_display->ClearDisplayTexture(); - - DestroyShaders(); - DestroyStateObjects(); -} +GPU_HW_D3D11::~GPU_HW_D3D11() = default; GPURenderer GPU_HW_D3D11::GetRendererType() const { @@ -36,76 +30,26 @@ bool GPU_HW_D3D11::Initialize() if (!GPU_HW::Initialize()) return false; - if (!CreateFramebuffer()) - { - Log_ErrorPrintf("Failed to create framebuffer"); - return false; - } - - if (!CreateUniformBuffer()) - { - Log_ErrorPrintf("Failed to create uniform buffer"); - return false; - } - if (!CreateTextureBuffer()) { Log_ErrorPrintf("Failed to create texture buffer"); return false; } - if (!CreateStateObjects()) - { - Log_ErrorPrintf("Failed to create state objects"); - return false; - } - - if (!CompileShaders()) - { - Log_ErrorPrintf("Failed to compile shaders"); - return false; - } - RestoreGraphicsAPIState(); return true; } -void GPU_HW_D3D11::Reset(bool clear_vram) -{ - GPU_HW::Reset(clear_vram); - - if (clear_vram) - ClearFramebuffer(); -} - -void GPU_HW_D3D11::ResetGraphicsAPIState() -{ - GPU_HW::ResetGraphicsAPIState(); - - // In D3D11 we can't leave a buffer mapped across a Present() call. - FlushRender(); -} - -void GPU_HW_D3D11::RestoreGraphicsAPIState() -{ - m_context->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); - m_context->PSSetShaderResources(0, 1, GetVRAMReadTexture()->GetD3DSRVArray()); - m_context->PSSetSamplers(0, 1, m_point_sampler_state.GetAddressOf()); - m_context->OMSetRenderTargets(1, GetVRAMTexture()->GetD3DRTVArray(), GetVRAMDepthTexture()->GetD3DDSV()); - m_context->RSSetState(m_cull_none_rasterizer_state.Get()); - SetViewport(0, 0, GetVRAMTexture()->GetWidth(), GetVRAMTexture()->GetHeight()); - SetScissorFromDrawingArea(); - m_batch_ubo_dirty = true; -} - void GPU_HW_D3D11::SetCapabilities() { const u32 max_texture_size = D3D11_REQ_TEXTURE2D_U_OR_V_DIMENSION; const u32 max_texture_scale = max_texture_size / VRAM_WIDTH; + ID3D11Device* device = D3D11Device::GetD3DDevice(); + m_max_resolution_scale = max_texture_scale; m_supports_dual_source_blend = true; - m_supports_per_sample_shading = (m_device->GetFeatureLevel() >= D3D_FEATURE_LEVEL_10_1); + m_supports_per_sample_shading = (device->GetFeatureLevel() >= D3D_FEATURE_LEVEL_10_1); m_supports_adaptive_downsampling = true; m_supports_disable_color_perspective = true; @@ -114,7 +58,7 @@ void GPU_HW_D3D11::SetCapabilities() { UINT num_quality_levels; if (SUCCEEDED( - m_device->CheckMultisampleQualityLevels(DXGI_FORMAT_R8G8B8A8_UNORM, multisamples, &num_quality_levels)) && + device->CheckMultisampleQualityLevels(DXGI_FORMAT_R8G8B8A8_UNORM, multisamples, &num_quality_levels)) && num_quality_levels > 0) { m_max_multisamples = multisamples; @@ -122,95 +66,17 @@ void GPU_HW_D3D11::SetCapabilities() } } -bool GPU_HW_D3D11::CreateFramebuffer() -{ - if (!GPU_HW::CreateFramebuffer()) - return false; - - const u32 texture_width = m_vram_texture->GetWidth(); - const u32 texture_height = m_vram_texture->GetHeight(); - const GPUTexture::Format texture_format = m_vram_texture->GetFormat(); - - if (m_downsample_mode == GPUDownsampleMode::Adaptive) - { - const u32 levels = GetAdaptiveDownsamplingMipLevels(); - - if (!m_downsample_texture.Create(m_device.Get(), texture_width, texture_height, 1, static_cast(levels), 1, - GPUTexture::Type::RenderTarget, texture_format) || - !m_downsample_weight_texture.Create(m_device.Get(), texture_width >> (levels - 1), - texture_height >> (levels - 1), 1, 1, 1, GPUTexture::Type::RenderTarget, - GPUTexture::Format::R8)) - { - return false; - } - - m_downsample_mip_views.resize(levels); - for (u32 i = 0; i < levels; i++) - { - const CD3D11_SHADER_RESOURCE_VIEW_DESC srv_desc(m_downsample_texture, D3D11_SRV_DIMENSION_TEXTURE2D, - m_downsample_texture.GetDXGIFormat(), i, 1); - const CD3D11_RENDER_TARGET_VIEW_DESC rtv_desc(m_downsample_texture, D3D11_RTV_DIMENSION_TEXTURE2D, - m_downsample_texture.GetDXGIFormat(), i, 1); - - HRESULT hr = m_device->CreateShaderResourceView(m_downsample_texture, &srv_desc, - m_downsample_mip_views[i].first.GetAddressOf()); - if (FAILED(hr)) - return false; - - hr = m_device->CreateRenderTargetView(m_downsample_texture, &rtv_desc, - m_downsample_mip_views[i].second.GetAddressOf()); - if (FAILED(hr)) - return false; - } - } - else if (m_downsample_mode == GPUDownsampleMode::Box) - { - if (!m_downsample_texture.Create(m_device.Get(), VRAM_WIDTH, VRAM_HEIGHT, 1, 1, 1, GPUTexture::Type::RenderTarget, - texture_format)) - { - return false; - } - } - - m_context->OMSetRenderTargets(1, GetVRAMTexture()->GetD3DRTVArray(), nullptr); - SetFullVRAMDirtyRectangle(); - return true; -} - -void GPU_HW_D3D11::ClearFramebuffer() -{ - static constexpr std::array color = {}; - m_context->ClearRenderTargetView(GetVRAMTexture()->GetD3DRTV(), color.data()); - m_context->ClearDepthStencilView(GetVRAMDepthTexture()->GetD3DDSV(), D3D11_CLEAR_DEPTH, - m_pgxp_depth_buffer ? 1.0f : 0.0f, 0); - m_context->ClearRenderTargetView(GetDisplayTexture()->GetD3DRTV(), color.data()); - SetFullVRAMDirtyRectangle(); - m_last_depth_z = 1.0f; -} - -void GPU_HW_D3D11::DestroyFramebuffer() -{ - m_downsample_mip_views.clear(); - m_downsample_weight_texture.Destroy(); - m_downsample_texture.Destroy(); - - GPU_HW::DestroyFramebuffer(); -} - -bool GPU_HW_D3D11::CreateUniformBuffer() -{ - return m_uniform_stream_buffer.Create(m_device.Get(), D3D11_BIND_CONSTANT_BUFFER, MAX_UNIFORM_BUFFER_SIZE); -} - bool GPU_HW_D3D11::CreateTextureBuffer() { - if (!m_texture_stream_buffer.Create(m_device.Get(), D3D11_BIND_SHADER_RESOURCE, VRAM_UPDATE_TEXTURE_BUFFER_SIZE)) + ID3D11Device* device = D3D11Device::GetD3DDevice(); + + if (!m_texture_stream_buffer.Create(device, D3D11_BIND_SHADER_RESOURCE, VRAM_UPDATE_TEXTURE_BUFFER_SIZE)) return false; const CD3D11_SHADER_RESOURCE_VIEW_DESC srv_desc(D3D11_SRV_DIMENSION_BUFFER, DXGI_FORMAT_R16_UINT, 0, VRAM_UPDATE_TEXTURE_BUFFER_SIZE / sizeof(u16)); - const HRESULT hr = m_device->CreateShaderResourceView(m_texture_stream_buffer.GetD3DBuffer(), &srv_desc, - m_texture_stream_buffer_srv_r16ui.ReleaseAndGetAddressOf()); + const HRESULT hr = device->CreateShaderResourceView(m_texture_stream_buffer.GetD3DBuffer(), &srv_desc, + m_texture_stream_buffer_srv_r16ui.ReleaseAndGetAddressOf()); if (FAILED(hr)) { Log_ErrorPrintf("Creation of texture buffer SRV failed: 0x%08X", hr); @@ -220,404 +86,6 @@ bool GPU_HW_D3D11::CreateTextureBuffer() return true; } -bool GPU_HW_D3D11::CreateStateObjects() -{ - HRESULT hr; - - CD3D11_RASTERIZER_DESC rs_desc = CD3D11_RASTERIZER_DESC(CD3D11_DEFAULT()); - rs_desc.CullMode = D3D11_CULL_NONE; - rs_desc.ScissorEnable = TRUE; - rs_desc.MultisampleEnable = IsUsingMultisampling(); - rs_desc.DepthClipEnable = FALSE; - hr = m_device->CreateRasterizerState(&rs_desc, m_cull_none_rasterizer_state.ReleaseAndGetAddressOf()); - if (FAILED(hr)) - return false; - if (IsUsingMultisampling()) - { - rs_desc.MultisampleEnable = FALSE; - hr = m_device->CreateRasterizerState(&rs_desc, m_cull_none_rasterizer_state_no_msaa.ReleaseAndGetAddressOf()); - if (FAILED(hr)) - return false; - } - else - { - m_cull_none_rasterizer_state_no_msaa = m_cull_none_rasterizer_state; - } - - CD3D11_DEPTH_STENCIL_DESC ds_desc = CD3D11_DEPTH_STENCIL_DESC(CD3D11_DEFAULT()); - ds_desc.DepthEnable = FALSE; - ds_desc.DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ZERO; - hr = m_device->CreateDepthStencilState(&ds_desc, m_depth_disabled_state.ReleaseAndGetAddressOf()); - if (FAILED(hr)) - return false; - - ds_desc.DepthEnable = TRUE; - ds_desc.DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ALL; - ds_desc.DepthFunc = D3D11_COMPARISON_ALWAYS; - hr = m_device->CreateDepthStencilState(&ds_desc, m_depth_test_always_state.ReleaseAndGetAddressOf()); - if (FAILED(hr)) - return false; - - ds_desc.DepthFunc = D3D11_COMPARISON_LESS_EQUAL; - hr = m_device->CreateDepthStencilState(&ds_desc, m_depth_test_less_state.ReleaseAndGetAddressOf()); - if (FAILED(hr)) - return false; - - ds_desc.DepthFunc = D3D11_COMPARISON_GREATER_EQUAL; - hr = m_device->CreateDepthStencilState(&ds_desc, m_depth_test_greater_state.ReleaseAndGetAddressOf()); - if (FAILED(hr)) - return false; - - CD3D11_BLEND_DESC bl_desc = CD3D11_BLEND_DESC(CD3D11_DEFAULT()); - hr = m_device->CreateBlendState(&bl_desc, m_blend_disabled_state.ReleaseAndGetAddressOf()); - if (FAILED(hr)) - return false; - - bl_desc.RenderTarget[0].RenderTargetWriteMask = 0; - hr = m_device->CreateBlendState(&bl_desc, m_blend_no_color_writes_state.ReleaseAndGetAddressOf()); - if (FAILED(hr)) - return false; - - CD3D11_SAMPLER_DESC sampler_desc = CD3D11_SAMPLER_DESC(CD3D11_DEFAULT()); - sampler_desc.Filter = D3D11_FILTER_MIN_MAG_MIP_POINT; - sampler_desc.AddressU = D3D11_TEXTURE_ADDRESS_WRAP; - sampler_desc.AddressV = D3D11_TEXTURE_ADDRESS_WRAP; - hr = m_device->CreateSamplerState(&sampler_desc, m_point_sampler_state.ReleaseAndGetAddressOf()); - if (FAILED(hr)) - return false; - - sampler_desc.Filter = D3D11_FILTER_MIN_MAG_LINEAR_MIP_POINT; - hr = m_device->CreateSamplerState(&sampler_desc, m_linear_sampler_state.ReleaseAndGetAddressOf()); - if (FAILED(hr)) - return false; - - sampler_desc.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR; - hr = m_device->CreateSamplerState(&sampler_desc, m_trilinear_sampler_state.ReleaseAndGetAddressOf()); - if (FAILED(hr)) - return false; - - return true; -} - -void GPU_HW_D3D11::DestroyStateObjects() -{ - m_linear_sampler_state.Reset(); - m_point_sampler_state.Reset(); - m_trilinear_sampler_state.Reset(); - m_blend_no_color_writes_state.Reset(); - m_blend_disabled_state.Reset(); - m_depth_test_greater_state.Reset(); - m_depth_test_less_state.Reset(); - m_depth_test_always_state.Reset(); - m_depth_disabled_state.Reset(); - m_cull_none_rasterizer_state.Reset(); - m_cull_none_rasterizer_state_no_msaa.Reset(); -} - -bool GPU_HW_D3D11::CompileShaders() -{ - if (!GPU_HW::CompilePipelines()) - return false; - - D3D11::ShaderCache shader_cache; -#if 0 - shader_cache.Open(EmuFolders::Cache, m_device->GetFeatureLevel(), SHADER_CACHE_VERSION, - g_settings.gpu_use_debug_device); -#endif - - GPU_HW_ShaderGen shadergen(g_host_display->GetRenderAPI(), m_resolution_scale, m_multisamples, m_per_sample_shading, - m_true_color, m_scaled_dithering, m_texture_filtering, m_using_uv_limits, - m_pgxp_depth_buffer, m_disable_color_perspective, m_supports_dual_source_blend); - - ShaderCompileProgressTracker progress("Compiling Shaders", - 1 + 1 + 2 + (4 * 9 * 2 * 2) + 1 + (2 * 2) + 4 + (2 * 3) + 1); - - m_screen_quad_vertex_shader = - shader_cache.GetVertexShader(m_device.Get(), shadergen.GenerateScreenQuadVertexShader()); - m_uv_quad_vertex_shader = shader_cache.GetVertexShader(m_device.Get(), shadergen.GenerateUVQuadVertexShader()); - if (!m_screen_quad_vertex_shader || !m_uv_quad_vertex_shader) - return false; - - progress.Increment(); - - m_copy_pixel_shader = shader_cache.GetPixelShader(m_device.Get(), shadergen.GenerateCopyFragmentShader()); - if (!m_copy_pixel_shader) - return false; - - progress.Increment(); - - for (u8 wrapped = 0; wrapped < 2; wrapped++) - { - for (u8 interlaced = 0; interlaced < 2; interlaced++) - { - const std::string ps = - shadergen.GenerateVRAMFillFragmentShader(ConvertToBoolUnchecked(wrapped), ConvertToBoolUnchecked(interlaced)); - m_vram_fill_pixel_shaders[wrapped][interlaced] = shader_cache.GetPixelShader(m_device.Get(), ps); - if (!m_vram_fill_pixel_shaders[wrapped][interlaced]) - return false; - - progress.Increment(); - } - } - - m_vram_read_pixel_shader = shader_cache.GetPixelShader(m_device.Get(), shadergen.GenerateVRAMReadFragmentShader()); - if (!m_vram_read_pixel_shader) - return false; - - progress.Increment(); - - m_vram_write_pixel_shader = - shader_cache.GetPixelShader(m_device.Get(), shadergen.GenerateVRAMWriteFragmentShader(false)); - if (!m_vram_write_pixel_shader) - return false; - - progress.Increment(); - - m_vram_copy_pixel_shader = shader_cache.GetPixelShader(m_device.Get(), shadergen.GenerateVRAMCopyFragmentShader()); - if (!m_vram_copy_pixel_shader) - return false; - - progress.Increment(); - - m_vram_update_depth_pixel_shader = - shader_cache.GetPixelShader(m_device.Get(), shadergen.GenerateVRAMUpdateDepthFragmentShader()); - if (!m_vram_update_depth_pixel_shader) - return false; - - progress.Increment(); - - for (u8 depth_24bit = 0; depth_24bit < 2; depth_24bit++) - { - for (u8 interlacing = 0; interlacing < 3; interlacing++) - { - const std::string ps = shadergen.GenerateDisplayFragmentShader( - ConvertToBoolUnchecked(depth_24bit), static_cast(interlacing), - ConvertToBoolUnchecked(depth_24bit) && m_chroma_smoothing); - m_display_pixel_shaders[depth_24bit][interlacing] = shader_cache.GetPixelShader(m_device.Get(), ps); - if (!m_display_pixel_shaders[depth_24bit][interlacing]) - return false; - - progress.Increment(); - } - } - - if (m_downsample_mode == GPUDownsampleMode::Adaptive) - { - m_downsample_first_pass_pixel_shader = - shader_cache.GetPixelShader(m_device.Get(), shadergen.GenerateAdaptiveDownsampleMipFragmentShader(true)); - m_downsample_mid_pass_pixel_shader = - shader_cache.GetPixelShader(m_device.Get(), shadergen.GenerateAdaptiveDownsampleMipFragmentShader(false)); - m_downsample_blur_pass_pixel_shader = - shader_cache.GetPixelShader(m_device.Get(), shadergen.GenerateAdaptiveDownsampleBlurFragmentShader()); - m_downsample_composite_pixel_shader = - shader_cache.GetPixelShader(m_device.Get(), shadergen.GenerateAdaptiveDownsampleCompositeFragmentShader()); - - if (!m_downsample_first_pass_pixel_shader || !m_downsample_mid_pass_pixel_shader || - !m_downsample_blur_pass_pixel_shader || !m_downsample_composite_pixel_shader) - { - return false; - } - } - else if (m_downsample_mode == GPUDownsampleMode::Box) - { - m_downsample_first_pass_pixel_shader = - shader_cache.GetPixelShader(m_device.Get(), shadergen.GenerateBoxSampleDownsampleFragmentShader()); - if (!m_downsample_first_pass_pixel_shader) - return false; - } - - progress.Increment(); - -#undef UPDATE_PROGRESS - - return true; -} - -void GPU_HW_D3D11::DestroyShaders() -{ - m_downsample_composite_pixel_shader.Reset(); - m_downsample_blur_pass_pixel_shader.Reset(); - m_downsample_mid_pass_pixel_shader.Reset(); - m_downsample_first_pass_pixel_shader.Reset(); - m_display_pixel_shaders = {}; - m_vram_update_depth_pixel_shader.Reset(); - m_vram_copy_pixel_shader.Reset(); - m_vram_write_pixel_shader.Reset(); - m_vram_read_pixel_shader.Reset(); - m_vram_fill_pixel_shaders = {}; - m_copy_pixel_shader.Reset(); - m_uv_quad_vertex_shader.Reset(); - m_screen_quad_vertex_shader.Reset(); - - GPU_HW::DestroyPipelines(); -} - -void GPU_HW_D3D11::UploadUniformBuffer(const void* data, u32 data_size) -{ - Assert(data_size <= MAX_UNIFORM_BUFFER_SIZE); - - const auto res = m_uniform_stream_buffer.Map(m_context.Get(), MAX_UNIFORM_BUFFER_SIZE, data_size); - std::memcpy(res.pointer, data, data_size); - m_uniform_stream_buffer.Unmap(m_context.Get(), data_size); - - m_context->VSSetConstantBuffers(0, 1, m_uniform_stream_buffer.GetD3DBufferArray()); - m_context->PSSetConstantBuffers(0, 1, m_uniform_stream_buffer.GetD3DBufferArray()); - - m_renderer_stats.num_uniform_buffer_updates++; -} - -void GPU_HW_D3D11::SetViewport(u32 x, u32 y, u32 width, u32 height) -{ - const CD3D11_VIEWPORT vp(static_cast(x), static_cast(y), static_cast(width), - static_cast(height)); - m_context->RSSetViewports(1, &vp); -} - -void GPU_HW_D3D11::SetScissor(u32 x, u32 y, u32 width, u32 height) -{ - const CD3D11_RECT rc(x, y, x + width, y + height); - m_context->RSSetScissorRects(1, &rc); -} - -void GPU_HW_D3D11::SetViewportAndScissor(u32 x, u32 y, u32 width, u32 height) -{ - SetViewport(x, y, width, height); - SetScissor(x, y, width, height); -} - -void GPU_HW_D3D11::DrawUtilityShader(ID3D11PixelShader* shader, const void* uniforms, u32 uniforms_size) -{ - if (uniforms) - { - UploadUniformBuffer(uniforms, uniforms_size); - m_batch_ubo_dirty = true; - } - - m_context->VSSetShader(m_screen_quad_vertex_shader.Get(), nullptr, 0); - m_context->PSSetShader(shader, nullptr, 0); - m_context->OMSetBlendState(m_blend_disabled_state.Get(), nullptr, 0xFFFFFFFFu); - - m_context->Draw(3, 0); -} - -bool GPU_HW_D3D11::BlitVRAMReplacementTexture(const TextureReplacementTexture* tex, u32 dst_x, u32 dst_y, u32 width, - u32 height) -{ - if (m_vram_replacement_texture.GetWidth() < tex->GetWidth() || - m_vram_replacement_texture.GetHeight() < tex->GetHeight()) - { - if (!m_vram_replacement_texture.Create(m_device.Get(), tex->GetWidth(), tex->GetHeight(), 1, 1, 1, - GPUTexture::Type::Texture, GPUTexture::Format::RGBA8, tex->GetPixels(), - tex->GetPitch(), true)) - { - return false; - } - } - else - { - D3D11_MAPPED_SUBRESOURCE sr; - HRESULT hr = m_context->Map(m_vram_replacement_texture, 0, D3D11_MAP_WRITE_DISCARD, 0, &sr); - if (FAILED(hr)) - { - Log_ErrorPrintf("Texture map failed: %08X", hr); - return false; - } - - const u32 copy_size = std::min(tex->GetPitch(), sr.RowPitch); - const u8* src_ptr = reinterpret_cast(tex->GetPixels()); - u8* dst_ptr = static_cast(sr.pData); - for (u32 i = 0; i < tex->GetHeight(); i++) - { - std::memcpy(dst_ptr, src_ptr, copy_size); - src_ptr += tex->GetPitch(); - dst_ptr += sr.RowPitch; - } - - m_context->Unmap(m_vram_replacement_texture, 0); - } - - m_context->OMSetDepthStencilState(m_depth_disabled_state.Get(), 0); - m_context->PSSetShaderResources(0, 1, m_vram_replacement_texture.GetD3DSRVArray()); - m_context->PSSetSamplers(0, 1, m_linear_sampler_state.GetAddressOf()); - SetViewportAndScissor(dst_x, dst_y, width, height); - - const float uniforms[] = {0.0f, 0.0f, 1.0f, 1.0f}; - DrawUtilityShader(m_copy_pixel_shader.Get(), uniforms, sizeof(uniforms)); - RestoreGraphicsAPIState(); - return true; -} - -void GPU_HW_D3D11::SetScissorFromDrawingArea() -{ - int left, top, right, bottom; - CalcScissorRect(&left, &top, &right, &bottom); - - CD3D11_RECT rc(left, top, right, bottom); - m_context->RSSetScissorRects(1, &rc); -} - -void GPU_HW_D3D11::ClearDisplay() -{ - GPU_HW::ClearDisplay(); - - g_host_display->ClearDisplayTexture(); - - static constexpr std::array clear_color = {0.0f, 0.0f, 0.0f, 1.0f}; - m_context->ClearRenderTargetView(GetDisplayTexture()->GetD3DRTV(), clear_color.data()); -} - -void GPU_HW_D3D11::ReadVRAM(u32 x, u32 y, u32 width, u32 height) -{ - if (IsUsingSoftwareRendererForReadbacks()) - { - ReadSoftwareRendererVRAM(x, y, width, height); - return; - } - - // Get bounds with wrap-around handled. - const Common::Rectangle copy_rect = GetVRAMTransferBounds(x, y, width, height); - const u32 encoded_width = (copy_rect.GetWidth() + 1) / 2; - const u32 encoded_height = copy_rect.GetHeight(); - - // Encode the 24-bit texture as 16-bit. - const u32 uniforms[4] = {copy_rect.left, copy_rect.top, copy_rect.GetWidth(), copy_rect.GetHeight()}; - m_context->RSSetState(m_cull_none_rasterizer_state_no_msaa.Get()); - m_context->OMSetRenderTargets(1, GetVRAMEncodingTexture()->GetD3DRTVArray(), nullptr); - m_context->OMSetDepthStencilState(m_depth_disabled_state.Get(), 0); - m_context->PSSetShaderResources(0, 1, GetVRAMTexture()->GetD3DSRVArray()); - SetViewportAndScissor(0, 0, encoded_width, encoded_height); - DrawUtilityShader(m_vram_read_pixel_shader.Get(), uniforms, sizeof(uniforms)); - - // Stage the readback and copy it into our shadow buffer. - g_host_display->DownloadTexture(m_vram_readback_texture.get(), 0, 0, encoded_width, encoded_height, - reinterpret_cast(&m_vram_shadow[copy_rect.top * VRAM_WIDTH + copy_rect.left]), - VRAM_WIDTH * sizeof(u16)); - - RestoreGraphicsAPIState(); -} - -void GPU_HW_D3D11::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) -{ - if (IsUsingSoftwareRendererForReadbacks()) - FillSoftwareRendererVRAM(x, y, width, height, color); - - GPU_HW::FillVRAM(x, y, width, height, color); - - m_context->OMSetDepthStencilState(m_depth_test_always_state.Get(), 0); - - const Common::Rectangle bounds(GetVRAMTransferBounds(x, y, width, height)); - SetViewportAndScissor(bounds.left * m_resolution_scale, bounds.top * m_resolution_scale, - bounds.GetWidth() * m_resolution_scale, bounds.GetHeight() * m_resolution_scale); - - const VRAMFillUBOData uniforms = GetVRAMFillUBOData(x, y, width, height, color); - DrawUtilityShader(m_vram_fill_pixel_shaders[BoolToUInt8(IsVRAMFillOversized(x, y, width, height))] - [BoolToUInt8(IsInterlacedRenderingEnabled())] - .Get(), - &uniforms, sizeof(uniforms)); - - RestoreGraphicsAPIState(); -} - void GPU_HW_D3D11::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) { if (IsUsingSoftwareRendererForReadbacks()) @@ -637,203 +105,26 @@ void GPU_HW_D3D11::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* d } const u32 num_pixels = width * height; - const auto map_result = m_texture_stream_buffer.Map(m_context.Get(), sizeof(u16), num_pixels * sizeof(u16)); + const auto map_result = + m_texture_stream_buffer.Map(D3D11Device::GetD3DContext(), sizeof(u16), num_pixels * sizeof(u16)); std::memcpy(map_result.pointer, data, num_pixels * sizeof(u16)); - m_texture_stream_buffer.Unmap(m_context.Get(), num_pixels * sizeof(u16)); + m_texture_stream_buffer.Unmap(D3D11Device::GetD3DContext(), num_pixels * sizeof(u16)); const VRAMWriteUBOData uniforms = GetVRAMWriteUBOData(x, y, width, height, map_result.index_aligned, set_mask, check_mask); - m_context->OMSetDepthStencilState( - (check_mask && !m_pgxp_depth_buffer) ? m_depth_test_greater_state.Get() : m_depth_test_always_state.Get(), 0); - m_context->PSSetShaderResources(0, 1, m_texture_stream_buffer_srv_r16ui.GetAddressOf()); // the viewport should already be set to the full vram, so just adjust the scissor const Common::Rectangle scaled_bounds = bounds * m_resolution_scale; - SetScissor(scaled_bounds.left, scaled_bounds.top, scaled_bounds.GetWidth(), scaled_bounds.GetHeight()); - - DrawUtilityShader(m_vram_write_pixel_shader.Get(), &uniforms, sizeof(uniforms)); + g_host_display->SetScissor(scaled_bounds.left, scaled_bounds.top, scaled_bounds.GetWidth(), + scaled_bounds.GetHeight()); + g_host_display->SetPipeline(m_vram_write_pipelines[BoolToUInt8(check_mask && !m_pgxp_depth_buffer)].get()); + g_host_display->PushUniformBuffer(&uniforms, sizeof(uniforms)); + D3D11Device::GetD3DContext()->PSSetShaderResources(0, 1, m_texture_stream_buffer_srv_r16ui.GetAddressOf()); + g_host_display->Draw(3, 0); RestoreGraphicsAPIState(); } -void GPU_HW_D3D11::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) -{ - if (IsUsingSoftwareRendererForReadbacks()) - CopySoftwareRendererVRAM(src_x, src_y, dst_x, dst_y, width, height); - - if (UseVRAMCopyShader(src_x, src_y, dst_x, dst_y, width, height) || IsUsingMultisampling()) - { - const Common::Rectangle src_bounds = GetVRAMTransferBounds(src_x, src_y, width, height); - const Common::Rectangle dst_bounds = GetVRAMTransferBounds(dst_x, dst_y, width, height); - if (m_vram_dirty_rect.Intersects(src_bounds)) - UpdateVRAMReadTexture(); - IncludeVRAMDirtyRectangle(dst_bounds); - - const VRAMCopyUBOData uniforms = GetVRAMCopyUBOData(src_x, src_y, dst_x, dst_y, width, height); - - const Common::Rectangle dst_bounds_scaled(dst_bounds * m_resolution_scale); - SetViewportAndScissor(dst_bounds_scaled.left, dst_bounds_scaled.top, dst_bounds_scaled.GetWidth(), - dst_bounds_scaled.GetHeight()); - m_context->OMSetDepthStencilState((m_GPUSTAT.check_mask_before_draw && !m_pgxp_depth_buffer) ? - m_depth_test_greater_state.Get() : - m_depth_test_always_state.Get(), - 0); - m_context->PSSetShaderResources(0, 1, GetVRAMReadTexture()->GetD3DSRVArray()); - DrawUtilityShader(m_vram_copy_pixel_shader.Get(), &uniforms, sizeof(uniforms)); - RestoreGraphicsAPIState(); - - if (m_GPUSTAT.check_mask_before_draw && !m_pgxp_depth_buffer) - m_current_depth++; - - return; - } - - // We can't CopySubresourceRegion to the same resource. So use the shadow texture if we can, but that may need to be - // updated first. Copying to the same resource seemed to work on Windows 10, but breaks on Windows 7. But, it's - // against the API spec, so better to be safe than sorry. - if (m_vram_dirty_rect.Intersects(Common::Rectangle::FromExtents(src_x, src_y, width, height))) - UpdateVRAMReadTexture(); - - GPU_HW::CopyVRAM(src_x, src_y, dst_x, dst_y, width, height); - - src_x *= m_resolution_scale; - src_y *= m_resolution_scale; - dst_x *= m_resolution_scale; - dst_y *= m_resolution_scale; - width *= m_resolution_scale; - height *= m_resolution_scale; - - const CD3D11_BOX src_box(src_x, src_y, 0, src_x + width, src_y + height, 1); - m_context->CopySubresourceRegion(GetVRAMTexture()->GetD3DTexture(), 0, dst_x, dst_y, 0, - GetVRAMReadTexture()->GetD3DTexture(), 0, &src_box); -} - -void GPU_HW_D3D11::UpdateDepthBufferFromMaskBit() -{ - if (m_pgxp_depth_buffer) - return; - - SetViewportAndScissor(0, 0, GetVRAMTexture()->GetWidth(), GetVRAMTexture()->GetHeight()); - - m_context->OMSetRenderTargets(0, nullptr, GetVRAMDepthTexture()->GetD3DDSV()); - m_context->OMSetDepthStencilState(m_depth_test_always_state.Get(), 0); - m_context->OMSetBlendState(m_blend_no_color_writes_state.Get(), nullptr, 0xFFFFFFFFu); - - m_context->PSSetShaderResources(0, 1, GetVRAMTexture()->GetD3DSRVArray()); - DrawUtilityShader(m_vram_update_depth_pixel_shader.Get(), nullptr, 0); - - m_context->PSSetShaderResources(0, 1, GetVRAMReadTexture()->GetD3DSRVArray()); - RestoreGraphicsAPIState(); -} - -void GPU_HW_D3D11::ClearDepthBuffer() -{ - DebugAssert(m_pgxp_depth_buffer); - - m_context->ClearDepthStencilView(GetVRAMDepthTexture()->GetD3DDSV(), D3D11_CLEAR_DEPTH, 1.0f, 0); - m_last_depth_z = 1.0f; -} - -void GPU_HW_D3D11::DownsampleFramebuffer(const D3D11Texture* source, u32 left, u32 top, u32 width, u32 height) -{ - if (m_downsample_mode == GPUDownsampleMode::Adaptive) - DownsampleFramebufferAdaptive(source, left, top, width, height); - else - DownsampleFramebufferBoxFilter(source, left, top, width, height); -} - -void GPU_HW_D3D11::DownsampleFramebufferAdaptive(const D3D11Texture* source, u32 left, u32 top, u32 width, u32 height) -{ - CD3D11_BOX src_box(left, top, 0, left + width, top + height, 1); - m_context->OMSetDepthStencilState(m_depth_disabled_state.Get(), 0); - m_context->OMSetBlendState(m_blend_disabled_state.Get(), nullptr, 0xFFFFFFFFu); - m_context->CopySubresourceRegion(m_downsample_texture, 0, left, top, 0, source->GetD3DTexture(), 0, &src_box); - m_context->PSSetSamplers(0, 1, m_point_sampler_state.GetAddressOf()); - m_context->VSSetShader(m_uv_quad_vertex_shader.Get(), nullptr, 0); - - // create mip chain - const u32 levels = m_downsample_texture.GetLevels(); - for (u32 level = 1; level < levels; level++) - { - static constexpr float clear_color[4] = {}; - - SetViewportAndScissor(left >> level, top >> level, width >> level, height >> level); - m_context->ClearRenderTargetView(m_downsample_mip_views[level].second.Get(), clear_color); - m_context->OMSetRenderTargets(1, m_downsample_mip_views[level].second.GetAddressOf(), nullptr); - m_context->PSSetShaderResources(0, 1, m_downsample_mip_views[level - 1].first.GetAddressOf()); - - const SmoothingUBOData ubo = GetSmoothingUBO(level, left, top, width, height, m_downsample_texture.GetWidth(), - m_downsample_texture.GetHeight()); - m_context->PSSetShader( - (level == 1) ? m_downsample_first_pass_pixel_shader.Get() : m_downsample_mid_pass_pixel_shader.Get(), nullptr, 0); - UploadUniformBuffer(&ubo, sizeof(ubo)); - m_context->Draw(3, 0); - } - - // blur pass at lowest level - { - const u32 last_level = levels - 1; - static constexpr float clear_color[4] = {}; - - SetViewportAndScissor(left >> last_level, top >> last_level, width >> last_level, height >> last_level); - m_context->ClearRenderTargetView(m_downsample_weight_texture.GetD3DRTV(), clear_color); - m_context->OMSetRenderTargets(1, m_downsample_weight_texture.GetD3DRTVArray(), nullptr); - m_context->PSSetShaderResources(0, 1, m_downsample_mip_views.back().first.GetAddressOf()); - m_context->PSSetShader(m_downsample_blur_pass_pixel_shader.Get(), nullptr, 0); - - const SmoothingUBOData ubo = GetSmoothingUBO(last_level, left, top, width, height, m_downsample_texture.GetWidth(), - m_downsample_texture.GetHeight()); - m_context->PSSetShader(m_downsample_blur_pass_pixel_shader.Get(), nullptr, 0); - UploadUniformBuffer(&ubo, sizeof(ubo)); - m_context->Draw(3, 0); - } - - // composite downsampled and upsampled images together - { - SetViewportAndScissor(left, top, width, height); - m_context->OMSetRenderTargets(1, GetDisplayTexture()->GetD3DRTVArray(), nullptr); - - ID3D11ShaderResourceView* const srvs[2] = {m_downsample_texture.GetD3DSRV(), - m_downsample_weight_texture.GetD3DSRV()}; - ID3D11SamplerState* const samplers[2] = {m_trilinear_sampler_state.Get(), m_linear_sampler_state.Get()}; - m_context->PSSetShaderResources(0, countof(srvs), srvs); - m_context->PSSetSamplers(0, countof(samplers), samplers); - m_context->PSSetShader(m_downsample_composite_pixel_shader.Get(), nullptr, 0); - m_context->Draw(3, 0); - } - - ID3D11ShaderResourceView* const null_srvs[2] = {}; - m_context->PSSetShaderResources(0, countof(null_srvs), null_srvs); - m_batch_ubo_dirty = true; - - RestoreGraphicsAPIState(); - - g_host_display->SetDisplayTexture(m_display_texture.get(), left, top, width, height); -} - -void GPU_HW_D3D11::DownsampleFramebufferBoxFilter(const D3D11Texture* source, u32 left, u32 top, u32 width, u32 height) -{ - const u32 ds_left = left / m_resolution_scale; - const u32 ds_top = top / m_resolution_scale; - const u32 ds_width = width / m_resolution_scale; - const u32 ds_height = height / m_resolution_scale; - static constexpr float clear_color[4] = {}; - - m_context->ClearRenderTargetView(m_downsample_texture.GetD3DRTV(), clear_color); - m_context->OMSetDepthStencilState(m_depth_disabled_state.Get(), 0); - m_context->OMSetRenderTargets(1, m_downsample_texture.GetD3DRTVArray(), nullptr); - m_context->OMSetBlendState(m_blend_disabled_state.Get(), nullptr, 0xFFFFFFFFu); - m_context->VSSetShader(m_screen_quad_vertex_shader.Get(), nullptr, 0); - m_context->PSSetShader(m_downsample_first_pass_pixel_shader.Get(), nullptr, 0); - m_context->PSSetShaderResources(0, 1, source->GetD3DSRVArray()); - SetViewportAndScissor(ds_left, ds_top, ds_width, ds_height); - m_context->Draw(3, 0); - - RestoreGraphicsAPIState(); - - g_host_display->SetDisplayTexture(&m_downsample_texture, ds_left, ds_top, ds_width, ds_height); -} - std::unique_ptr GPU::CreateHardwareD3D11Renderer() { if (!Host::AcquireHostDisplay(RenderAPI::D3D11)) @@ -842,12 +133,7 @@ std::unique_ptr GPU::CreateHardwareD3D11Renderer() return nullptr; } - ID3D11Device* device = static_cast(g_host_display->GetDevice()); - ID3D11DeviceContext* context = static_cast(g_host_display->GetContext()); - if (!device || !context) - return nullptr; - - std::unique_ptr gpu(std::make_unique(device, context)); + std::unique_ptr gpu(std::make_unique()); if (!gpu->Initialize()) return nullptr; diff --git a/src/core/gpu_hw_d3d11.h b/src/core/gpu_hw_d3d11.h index 5a65523fc..2d6b3d4de 100644 --- a/src/core/gpu_hw_d3d11.h +++ b/src/core/gpu_hw_d3d11.h @@ -6,7 +6,6 @@ #include "gpu/d3d11/stream_buffer.h" #include "gpu/d3d11_texture.h" #include "gpu_hw.h" -#include "texture_replacements.h" #include #include #include @@ -19,119 +18,22 @@ public: template using ComPtr = Microsoft::WRL::ComPtr; - GPU_HW_D3D11(ID3D11Device* device, ID3D11DeviceContext* context); + GPU_HW_D3D11(); ~GPU_HW_D3D11() override; GPURenderer GetRendererType() const override; bool Initialize() override; - void Reset(bool clear_vram) override; - - void ResetGraphicsAPIState() override; - void RestoreGraphicsAPIState() override; protected: - void ClearDisplay() override; - void ReadVRAM(u32 x, u32 y, u32 width, u32 height) override; - void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override; void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) override; - void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override; - void UpdateDepthBufferFromMaskBit() override; - void ClearDepthBuffer() override; - void SetScissorFromDrawingArea() override; - void UploadUniformBuffer(const void* data, u32 data_size) override; private: - enum : u32 - { - // Currently we don't stream uniforms, instead just re-map the buffer every time and let the driver take care of it. - MAX_UNIFORM_BUFFER_SIZE = 64 - }; - - ALWAYS_INLINE D3D11Texture* GetVRAMTexture() const { return static_cast(m_vram_texture.get()); } - ALWAYS_INLINE D3D11Texture* GetVRAMDepthTexture() const - { - return static_cast(m_vram_depth_texture.get()); - } - ALWAYS_INLINE D3D11Texture* GetVRAMReadTexture() const - { - return static_cast(m_vram_read_texture.get()); - } - ALWAYS_INLINE D3D11Texture* GetVRAMEncodingTexture() const - { - return static_cast(m_vram_readback_texture.get()); - } - ALWAYS_INLINE D3D11Texture* GetDisplayTexture() const - { - return static_cast(m_display_texture.get()); - } - void SetCapabilities(); - bool CreateFramebuffer() override; - void ClearFramebuffer(); - void DestroyFramebuffer() override; - bool CreateUniformBuffer(); bool CreateTextureBuffer(); - bool CreateStateObjects(); - void DestroyStateObjects(); - - bool CompileShaders(); - void DestroyShaders(); - void SetViewport(u32 x, u32 y, u32 width, u32 height); - void SetScissor(u32 x, u32 y, u32 width, u32 height); - void SetViewportAndScissor(u32 x, u32 y, u32 width, u32 height); - - void DrawUtilityShader(ID3D11PixelShader* shader, const void* uniforms, u32 uniforms_size); - - bool BlitVRAMReplacementTexture(const TextureReplacementTexture* tex, u32 dst_x, u32 dst_y, u32 width, u32 height); - - void DownsampleFramebuffer(const D3D11Texture* source, u32 left, u32 top, u32 width, u32 height); - void DownsampleFramebufferAdaptive(const D3D11Texture* source, u32 left, u32 top, u32 width, u32 height); - void DownsampleFramebufferBoxFilter(const D3D11Texture* source, u32 left, u32 top, u32 width, u32 height); - - ComPtr m_device; - ComPtr m_context; - - D3D11::StreamBuffer m_uniform_stream_buffer; D3D11::StreamBuffer m_texture_stream_buffer; ComPtr m_texture_stream_buffer_srv_r16ui; - - ComPtr m_cull_none_rasterizer_state; - ComPtr m_cull_none_rasterizer_state_no_msaa; - - ComPtr m_depth_disabled_state; - ComPtr m_depth_test_always_state; - ComPtr m_depth_test_less_state; - ComPtr m_depth_test_greater_state; - - ComPtr m_blend_disabled_state; - ComPtr m_blend_no_color_writes_state; - - ComPtr m_point_sampler_state; - ComPtr m_linear_sampler_state; - ComPtr m_trilinear_sampler_state; - - ComPtr m_screen_quad_vertex_shader; - ComPtr m_uv_quad_vertex_shader; - ComPtr m_copy_pixel_shader; - std::array, 2>, 2> m_vram_fill_pixel_shaders; // [wrapped][interlaced] - ComPtr m_vram_read_pixel_shader; - ComPtr m_vram_write_pixel_shader; - ComPtr m_vram_copy_pixel_shader; - ComPtr m_vram_update_depth_pixel_shader; - std::array, 3>, 2> m_display_pixel_shaders; // [depth_24][interlaced] - - D3D11Texture m_vram_replacement_texture; - - // downsampling - ComPtr m_downsample_first_pass_pixel_shader; - ComPtr m_downsample_mid_pass_pixel_shader; - ComPtr m_downsample_blur_pass_pixel_shader; - ComPtr m_downsample_composite_pixel_shader; - D3D11Texture m_downsample_texture; - D3D11Texture m_downsample_weight_texture; - std::vector, ComPtr>> m_downsample_mip_views; }; diff --git a/src/core/gpu_hw_d3d12.cpp b/src/core/gpu_hw_d3d12.cpp index ee50d4051..4ceff89c2 100644 --- a/src/core/gpu_hw_d3d12.cpp +++ b/src/core/gpu_hw_d3d12.cpp @@ -93,11 +93,6 @@ void GPU_HW_D3D12::Reset(bool clear_vram) ClearFramebuffer(); } -void GPU_HW_D3D12::ResetGraphicsAPIState() -{ - GPU_HW::ResetGraphicsAPIState(); -} - void GPU_HW_D3D12::RestoreGraphicsAPIState() { ID3D12GraphicsCommandList* cmdlist = g_d3d12_context->GetCommandList(); @@ -118,7 +113,7 @@ void GPU_HW_D3D12::RestoreGraphicsAPIState() D3D12::SetViewport(cmdlist, 0, 0, m_vram_texture.GetWidth(), m_vram_texture.GetHeight()); - SetScissorFromDrawingArea(); + SetScissor(); } void GPU_HW_D3D12::UpdateSettings() @@ -132,7 +127,6 @@ void GPU_HW_D3D12::UpdateSettings() { RestoreGraphicsAPIState(); ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); - ResetGraphicsAPIState(); } // Everything should be finished executing before recreating resources. @@ -156,10 +150,10 @@ void GPU_HW_D3D12::UpdateSettings() UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_ptr, false, false); UpdateDepthBufferFromMaskBit(); UpdateDisplay(); - ResetGraphicsAPIState(); } } +#if 0 void GPU_HW_D3D12::MapBatchVertexPointer(u32 required_vertices) { DebugAssert(!m_batch_start_vertex_ptr); @@ -209,6 +203,7 @@ void GPU_HW_D3D12::UploadUniformBuffer(const void* data, u32 data_size) g_d3d12_context->GetCommandList()->SetGraphicsRootConstantBufferView(0, m_uniform_stream_buffer.GetGPUPointer() + m_current_uniform_buffer_offset); } +#endif void GPU_HW_D3D12::SetCapabilities() { @@ -817,6 +812,7 @@ bool GPU_HW_D3D12::BlitVRAMReplacementTexture(const TextureReplacementTexture* t return true; } +#if 0 void GPU_HW_D3D12::DrawBatchVertices(BatchRenderMode render_mode, u32 base_vertex, u32 num_vertices) { ID3D12GraphicsCommandList* cmdlist = g_d3d12_context->GetCommandList(); @@ -831,14 +827,7 @@ void GPU_HW_D3D12::DrawBatchVertices(BatchRenderMode render_mode, u32 base_verte cmdlist->SetPipelineState(pipeline); cmdlist->DrawInstanced(num_vertices, 1, base_vertex, 0); } - -void GPU_HW_D3D12::SetScissorFromDrawingArea() -{ - int left, top, right, bottom; - CalcScissorRect(&left, &top, &right, &bottom); - - D3D12::SetScissor(g_d3d12_context->GetCommandList(), left, top, right - left, bottom - top); -} +#endif void GPU_HW_D3D12::ClearDisplay() { @@ -1159,24 +1148,6 @@ void GPU_HW_D3D12::UpdateVRAMReadTexture() GPU_HW::UpdateVRAMReadTexture(); } -#endif - -void GPU_HW_D3D12::UpdateDepthBufferFromMaskBit() -{ - ID3D12GraphicsCommandList* cmdlist = g_d3d12_context->GetCommandList(); - - m_vram_texture.TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); - - cmdlist->OMSetRenderTargets(0, nullptr, FALSE, &m_vram_depth_texture.GetRTVOrDSVDescriptor().cpu_handle); - cmdlist->SetGraphicsRootDescriptorTable(1, m_vram_texture.GetSRVDescriptor()); - cmdlist->SetPipelineState(m_vram_update_depth_pipeline.Get()); - D3D12::SetViewportAndScissor(cmdlist, 0, 0, m_vram_texture.GetWidth(), m_vram_texture.GetHeight()); - cmdlist->DrawInstanced(3, 1, 0, 0); - - m_vram_texture.TransitionToState(D3D12_RESOURCE_STATE_RENDER_TARGET); - - RestoreGraphicsAPIState(); -} void GPU_HW_D3D12::ClearDepthBuffer() { @@ -1184,6 +1155,7 @@ void GPU_HW_D3D12::ClearDepthBuffer() cmdlist->ClearDepthStencilView(m_vram_depth_texture.GetRTVOrDSVDescriptor(), D3D12_CLEAR_FLAG_DEPTH, m_pgxp_depth_buffer ? 1.0f : 0.0f, 0, 0, nullptr); } +#endif std::unique_ptr GPU::CreateHardwareD3D12Renderer() { diff --git a/src/core/gpu_hw_d3d12.h b/src/core/gpu_hw_d3d12.h index a5af21513..d8bf6e093 100644 --- a/src/core/gpu_hw_d3d12.h +++ b/src/core/gpu_hw_d3d12.h @@ -26,7 +26,6 @@ public: bool Initialize() override; void Reset(bool clear_vram) override; - void ResetGraphicsAPIState() override; void RestoreGraphicsAPIState() override; void UpdateSettings() override; @@ -37,13 +36,6 @@ protected: void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override; void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) override; void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override; - void UpdateDepthBufferFromMaskBit() override; - void ClearDepthBuffer() override; - void SetScissorFromDrawingArea() override; - void MapBatchVertexPointer(u32 required_vertices) override; - void UnmapBatchVertexPointer(u32 used_vertices) override; - void UploadUniformBuffer(const void* data, u32 data_size) override; - void DrawBatchVertices(BatchRenderMode render_mode, u32 base_vertex, u32 num_vertices) override; private: enum : u32 diff --git a/src/core/gpu_hw_opengl.cpp b/src/core/gpu_hw_opengl.cpp index 28f20f897..66b3ec8ac 100644 --- a/src/core/gpu_hw_opengl.cpp +++ b/src/core/gpu_hw_opengl.cpp @@ -150,17 +150,6 @@ void GPU_HW_OpenGL::CopyFramebufferForState(GLenum target, GLuint src_texture, u } #endif -void GPU_HW_OpenGL::ResetGraphicsAPIState() -{ - GPU_HW::ResetGraphicsAPIState(); - - glEnable(GL_CULL_FACE); - glDisable(GL_SCISSOR_TEST); - glDisable(GL_BLEND); - glBindVertexArray(0); - m_uniform_stream_buffer->Unbind(); -} - void GPU_HW_OpenGL::RestoreGraphicsAPIState() { glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_vram_fbo_id); @@ -176,7 +165,7 @@ void GPU_HW_OpenGL::RestoreGraphicsAPIState() SetBlendMode(); m_current_depth_test = 0; SetDepthFunc(); - SetScissorFromDrawingArea(); + SetScissor(); m_batch_ubo_dirty = true; } @@ -191,7 +180,6 @@ void GPU_HW_OpenGL::UpdateSettings() { RestoreGraphicsAPIState(); ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); - ResetGraphicsAPIState(); g_host_display->ClearDisplayTexture(); CreateFramebuffer(); } @@ -204,10 +192,10 @@ void GPU_HW_OpenGL::UpdateSettings() UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_ptr, false, false); UpdateDepthBufferFromMaskBit(); UpdateDisplay(); - ResetGraphicsAPIState(); } } +#if 0 void GPU_HW_OpenGL::MapBatchVertexPointer(u32 required_vertices) { DebugAssert(!m_batch_start_vertex_ptr); @@ -230,6 +218,7 @@ void GPU_HW_OpenGL::UnmapBatchVertexPointer(u32 used_vertices) m_batch_end_vertex_ptr = nullptr; m_batch_current_vertex_ptr = nullptr; } +#endif void GPU_HW_OpenGL::SetCapabilities() { @@ -664,6 +653,7 @@ bool GPU_HW_OpenGL::CompilePrograms() return true; } +#if 0 void GPU_HW_OpenGL::DrawBatchVertices(BatchRenderMode render_mode, u32 base_vertex, u32 num_vertices) { const GL::Program& prog = m_render_programs[static_cast(render_mode)][static_cast(m_batch.texture_mode)] @@ -681,6 +671,7 @@ void GPU_HW_OpenGL::DrawBatchVertices(BatchRenderMode render_mode, u32 base_vert glDrawArrays(GL_TRIANGLES, m_batch_base_vertex, num_vertices); } +#endif void GPU_HW_OpenGL::SetBlendMode() { @@ -753,20 +744,7 @@ void GPU_HW_OpenGL::SetDepthFunc(GLenum func) m_current_depth_test = func; } -void GPU_HW_OpenGL::SetScissorFromDrawingArea() -{ - int left, top, right, bottom; - CalcScissorRect(&left, &top, &right, &bottom); - - const int width = right - left; - const int height = bottom - top; - const int x = left; - const int y = top; - - Log_DebugPrintf("SetScissor: (%d-%d, %d-%d)", x, x + width, y, y + height); - glScissor(x, y, width, height); -} - +#if 0 void GPU_HW_OpenGL::UploadUniformBuffer(const void* data, u32 data_size) { const GL::StreamBuffer::MappingResult res = m_uniform_stream_buffer->Map(m_uniform_buffer_alignment, data_size); @@ -777,6 +755,7 @@ void GPU_HW_OpenGL::UploadUniformBuffer(const void* data, u32 data_size) m_renderer_stats.num_uniform_buffer_updates++; } +#endif void GPU_HW_OpenGL::ClearDisplay() { @@ -871,7 +850,7 @@ void GPU_HW_OpenGL::UpdateDisplay() const u32 reinterpret_crop_left = (m_crtc_state.display_vram_left - m_crtc_state.regs.X) * resolution_scale; const u32 uniforms[4] = {reinterpret_start_x, scaled_vram_offset_y + reinterpret_field_offset, reinterpret_crop_left, reinterpret_field_offset}; - UploadUniformBuffer(uniforms, sizeof(uniforms)); + g_host_display->UploadUniformBuffer(uniforms, sizeof(uniforms)); m_batch_ubo_dirty = true; Assert(scaled_display_width <= m_display_texture.GetWidth() && @@ -921,7 +900,7 @@ void GPU_HW_OpenGL::ReadVRAM(u32 x, u32 y, u32 width, u32 height) m_vram_readback_texture.BindFramebuffer(GL_DRAW_FRAMEBUFFER); m_vram_texture.Bind(); m_vram_read_program.Bind(); - UploadUniformBuffer(uniforms, sizeof(uniforms)); + g_host_display->UploadUniformBuffer(uniforms, sizeof(uniforms)); glDisable(GL_BLEND); glDisable(GL_SCISSOR_TEST); glViewport(0, 0, encoded_width, encoded_height); @@ -960,14 +939,14 @@ void GPU_HW_OpenGL::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) glClearColor(r, g, b, a); IsGLES() ? glClearDepthf(a) : glClearDepth(a); glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); - SetScissorFromDrawingArea(); + SetScissor(); } else { const VRAMFillUBOData uniforms = GetVRAMFillUBOData(x, y, width, height, color); m_vram_fill_programs[BoolToUInt8(wrapped)][BoolToUInt8(interlaced)].Bind(); - UploadUniformBuffer(&uniforms, sizeof(uniforms)); + g_host_display->UploadUniformBuffer(&uniforms, sizeof(uniforms)); glDisable(GL_BLEND); SetDepthFunc(GL_ALWAYS); glBindVertexArray(m_attributeless_vao_id); @@ -1014,7 +993,7 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* const VRAMWriteUBOData uniforms = GetVRAMWriteUBOData(x, y, width, height, map_result.index_aligned, set_mask, check_mask); - UploadUniformBuffer(&uniforms, sizeof(uniforms)); + g_host_display->UploadUniformBuffer(&uniforms, sizeof(uniforms)); // the viewport should already be set to the full vram, so just adjust the scissor const Common::Rectangle scaled_bounds = bounds * m_resolution_scale; @@ -1105,7 +1084,7 @@ void GPU_HW_OpenGL::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 wid IncludeVRAMDirtyRectangle(dst_bounds); const VRAMCopyUBOData uniforms = GetVRAMCopyUBOData(src_x, src_y, dst_x, dst_y, width, height); - UploadUniformBuffer(&uniforms, sizeof(uniforms)); + g_host_display->UploadUniformBuffer(&uniforms, sizeof(uniforms)); glDisable(GL_SCISSOR_TEST); glDisable(GL_BLEND); @@ -1205,29 +1184,6 @@ void GPU_HW_OpenGL::UpdateVRAMReadTexture() GPU_HW::UpdateVRAMReadTexture(); } -#endif - -void GPU_HW_OpenGL::UpdateDepthBufferFromMaskBit() -{ - if (m_pgxp_depth_buffer) - return; - - glDisable(GL_SCISSOR_TEST); - glDisable(GL_BLEND); - glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); - glDepthFunc(GL_ALWAYS); - - m_vram_texture.Bind(); - m_vram_update_depth_program.Bind(); - glBindVertexArray(m_attributeless_vao_id); - glDrawArrays(GL_TRIANGLES, 0, 3); - - glBindVertexArray(m_vao_id); - glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); - glEnable(GL_SCISSOR_TEST); - - m_vram_read_texture.Bind(); -} void GPU_HW_OpenGL::ClearDepthBuffer() { @@ -1237,6 +1193,7 @@ void GPU_HW_OpenGL::ClearDepthBuffer() glEnable(GL_SCISSOR_TEST); m_last_depth_z = 1.0f; } +#endif void GPU_HW_OpenGL::DownsampleFramebuffer(GL::Texture& source, u32 left, u32 top, u32 width, u32 height) { diff --git a/src/core/gpu_hw_opengl.h b/src/core/gpu_hw_opengl.h index a0be544dd..28cf26483 100644 --- a/src/core/gpu_hw_opengl.h +++ b/src/core/gpu_hw_opengl.h @@ -24,7 +24,6 @@ public: bool Initialize() override; void Reset(bool clear_vram) override; - void ResetGraphicsAPIState() override; void RestoreGraphicsAPIState() override; void UpdateSettings() override; @@ -35,13 +34,6 @@ protected: void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override; void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) override; void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override; - void UpdateDepthBufferFromMaskBit() override; - void ClearDepthBuffer() override; - void SetScissorFromDrawingArea() override; - void MapBatchVertexPointer(u32 required_vertices) override; - void UnmapBatchVertexPointer(u32 used_vertices) override; - void UploadUniformBuffer(const void* data, u32 data_size) override; - void DrawBatchVertices(BatchRenderMode render_mode, u32 base_vertex, u32 num_vertices) override; private: struct GLStats diff --git a/src/core/gpu_hw_vulkan.cpp b/src/core/gpu_hw_vulkan.cpp index 29328b5c9..b5f0672f8 100644 --- a/src/core/gpu_hw_vulkan.cpp +++ b/src/core/gpu_hw_vulkan.cpp @@ -93,22 +93,6 @@ void GPU_HW_Vulkan::Reset(bool clear_vram) ClearFramebuffer(); } -void GPU_HW_Vulkan::ResetGraphicsAPIState() -{ - GPU_HW::ResetGraphicsAPIState(); - - EndRenderPass(); - - if (g_host_display->GetDisplayTextureHandle() == &m_vram_texture) - { - m_vram_texture.TransitionToLayout(g_vulkan_context->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - } - - // this is called at the end of the frame, so the UBO is associated with the previous command buffer. - m_batch_ubo_dirty = true; -} - void GPU_HW_Vulkan::RestoreGraphicsAPIState() { VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer(); @@ -120,7 +104,10 @@ void GPU_HW_Vulkan::RestoreGraphicsAPIState() Vulkan::Util::SetViewport(cmdbuf, 0, 0, m_vram_texture.GetWidth(), m_vram_texture.GetHeight()); vkCmdBindDescriptorSets(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_batch_pipeline_layout, 0, 1, &m_batch_descriptor_set, 1, &m_current_uniform_buffer_offset); - SetScissorFromDrawingArea(); + SetScissor(); + + // FIXME this is called at the end of the frame, so the UBO is associated with the previous command buffer. + m_batch_ubo_dirty = true; } void GPU_HW_Vulkan::UpdateSettings() @@ -134,7 +121,6 @@ void GPU_HW_Vulkan::UpdateSettings() { RestoreGraphicsAPIState(); ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); - ResetGraphicsAPIState(); } // Everything should be finished executing before recreating resources. @@ -158,10 +144,10 @@ void GPU_HW_Vulkan::UpdateSettings() UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_ptr, false, false); UpdateDepthBufferFromMaskBit(); UpdateDisplay(); - ResetGraphicsAPIState(); } } +#if 0 void GPU_HW_Vulkan::MapBatchVertexPointer(u32 required_vertices) { DebugAssert(!m_batch_start_vertex_ptr); @@ -210,6 +196,7 @@ void GPU_HW_Vulkan::UploadUniformBuffer(const void* data, u32 data_size) vkCmdBindDescriptorSets(g_vulkan_context->GetCurrentCommandBuffer(), VK_PIPELINE_BIND_POINT_GRAPHICS, m_batch_pipeline_layout, 0, 1, &m_batch_descriptor_set, 1, &m_current_uniform_buffer_offset); } +#endif void GPU_HW_Vulkan::SetCapabilities() { @@ -1293,6 +1280,7 @@ void GPU_HW_Vulkan::DestroyPipelines() m_display_pipelines.enumerate(Vulkan::Util::SafeDestroyPipeline); } +#if 0 void GPU_HW_Vulkan::DrawBatchVertices(BatchRenderMode render_mode, u32 base_vertex, u32 num_vertices) { BeginVRAMRenderPass(); @@ -1310,16 +1298,7 @@ void GPU_HW_Vulkan::DrawBatchVertices(BatchRenderMode render_mode, u32 base_vert vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); vkCmdDraw(cmdbuf, num_vertices, 1, base_vertex, 0); } - -void GPU_HW_Vulkan::SetScissorFromDrawingArea() -{ - int left, top, right, bottom; - CalcScissorRect(&left, &top, &right, &bottom); - const Vulkan::Util::DebugScope debugScope(g_vulkan_context->GetCurrentCommandBuffer(), - "GPU_HW_Vulkan::SetScissorFromDrawingArea: {%u,%u} {%u,%u}", left, top, - right, bottom); - Vulkan::Util::SetScissor(g_vulkan_context->GetCurrentCommandBuffer(), left, top, right - left, bottom - top); -} +#endif void GPU_HW_Vulkan::ClearDisplay() { @@ -1688,33 +1667,6 @@ void GPU_HW_Vulkan::UpdateVRAMReadTexture() m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); GPU_HW::UpdateVRAMReadTexture(); } -#endif - -void GPU_HW_Vulkan::UpdateDepthBufferFromMaskBit() -{ - if (m_pgxp_depth_buffer) - return; - - EndRenderPass(); - VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer(); - const Vulkan::Util::DebugScope debugScope(cmdbuf, "GPU_HW_Vulkan::UpdateDepthBufferFromMaskBit"); - m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - - BeginRenderPass(m_vram_update_depth_render_pass, m_vram_update_depth_framebuffer, 0, 0, m_vram_texture.GetWidth(), - m_vram_texture.GetHeight()); - - vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_vram_update_depth_pipeline); - vkCmdBindDescriptorSets(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_single_sampler_pipeline_layout, 0, 1, - &m_vram_read_descriptor_set, 0, nullptr); - Vulkan::Util::SetViewportAndScissor(cmdbuf, 0, 0, m_vram_texture.GetWidth(), m_vram_texture.GetHeight()); - vkCmdDraw(cmdbuf, 3, 1, 0, 0); - - EndRenderPass(); - - m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); - - RestoreGraphicsAPIState(); -} void GPU_HW_Vulkan::ClearDepthBuffer() { @@ -1732,6 +1684,7 @@ void GPU_HW_Vulkan::ClearDepthBuffer() m_vram_depth_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); m_last_depth_z = 1.0f; } +#endif bool GPU_HW_Vulkan::BlitVRAMReplacementTexture(const TextureReplacementTexture* tex, u32 dst_x, u32 dst_y, u32 width, u32 height) diff --git a/src/core/gpu_hw_vulkan.h b/src/core/gpu_hw_vulkan.h index ad9b55991..f1ef84682 100644 --- a/src/core/gpu_hw_vulkan.h +++ b/src/core/gpu_hw_vulkan.h @@ -22,7 +22,6 @@ public: bool Initialize() override; void Reset(bool clear_vram) override; - void ResetGraphicsAPIState() override; void RestoreGraphicsAPIState() override; void UpdateSettings() override; @@ -33,13 +32,6 @@ protected: void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override; void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) override; void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override; - void UpdateDepthBufferFromMaskBit() override; - void ClearDepthBuffer() override; - void SetScissorFromDrawingArea() override; - void MapBatchVertexPointer(u32 required_vertices) override; - void UnmapBatchVertexPointer(u32 used_vertices) override; - void UploadUniformBuffer(const void* data, u32 data_size) override; - void DrawBatchVertices(BatchRenderMode render_mode, u32 base_vertex, u32 num_vertices) override; private: enum : u32 diff --git a/src/core/system.cpp b/src/core/system.cpp index 1e50a4f9f..84ac4b3ea 100644 --- a/src/core/system.cpp +++ b/src/core/system.cpp @@ -799,8 +799,6 @@ bool System::RecreateGPU(GPURenderer renderer, bool force_recreate_display, bool if (!state_valid) Log_ErrorPrintf("Failed to save old GPU state when switching renderers"); - g_gpu->ResetGraphicsAPIState(); - // create new renderer g_gpu.reset(); if (force_recreate_display) @@ -822,7 +820,6 @@ bool System::RecreateGPU(GPURenderer renderer, bool force_recreate_display, bool g_gpu->RestoreGraphicsAPIState(); g_gpu->DoState(sw, nullptr, update_display); TimingEvents::DoState(sw); - g_gpu->ResetGraphicsAPIState(); } // fix up vsync etc @@ -1507,8 +1504,6 @@ void System::DestroySystem() Timers::Shutdown(); Pad::Shutdown(); CDROM::Shutdown(); - if (g_gpu) - g_gpu->ResetGraphicsAPIState(); g_gpu.reset(); InterruptController::Shutdown(); DMA::Shutdown(); @@ -1727,9 +1722,7 @@ bool System::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_di return false; g_gpu->RestoreGraphicsAPIState(); - const bool gpu_result = sw.DoMarker("GPU") && g_gpu->DoState(sw, host_texture, update_display); - g_gpu->ResetGraphicsAPIState(); - if (!gpu_result) + if (!sw.DoMarker("GPU") || !g_gpu->DoState(sw, host_texture, update_display)) return false; if (!sw.DoMarker("CDROM") || !CDROM::DoState(sw)) @@ -1864,8 +1857,6 @@ void System::InternalReset() #ifdef WITH_CHEEVOS Achievements::ResetRuntime(); #endif - - g_gpu->ResetGraphicsAPIState(); } std::string System::GetMediaPathFromSaveState(const char* path) @@ -2147,8 +2138,6 @@ bool System::InternalSaveState(ByteStream* state, u32 screenshot_size /* = 256 * header.data_compressed_size = static_cast(state->GetPosition() - header.offset_to_data); } - g_gpu->ResetGraphicsAPIState(); - if (!result) return false; } @@ -2175,11 +2164,10 @@ void System::SingleStepCPU() CPU::SingleStep(); SPU::GeneratePendingSamples(); + g_gpu->FlushRender(); if (s_frame_number != old_frame_number && s_cheat_list) s_cheat_list->Apply(); - - g_gpu->ResetGraphicsAPIState(); } void System::DoRunFrame() @@ -2215,11 +2203,10 @@ void System::DoRunFrame() // Generate any pending samples from the SPU before sleeping, this way we reduce the chances of underruns. SPU::GeneratePendingSamples(); + g_gpu->FlushRender(); if (s_cheat_list) s_cheat_list->Apply(); - - g_gpu->ResetGraphicsAPIState(); } void System::RunFrame() @@ -2986,10 +2973,7 @@ bool System::DumpVRAM(const char* filename) return false; g_gpu->RestoreGraphicsAPIState(); - const bool result = g_gpu->DumpVRAMToFile(filename); - g_gpu->ResetGraphicsAPIState(); - - return result; + return g_gpu->DumpVRAMToFile(filename); } bool System::DumpSPURAM(const char* filename) diff --git a/src/frontend-common/common_host.cpp b/src/frontend-common/common_host.cpp index 445ede799..24ba62ad7 100644 --- a/src/frontend-common/common_host.cpp +++ b/src/frontend-common/common_host.cpp @@ -623,7 +623,6 @@ static void HotkeyModifyResolutionScale(s32 increment) { g_gpu->RestoreGraphicsAPIState(); g_gpu->UpdateSettings(); - g_gpu->ResetGraphicsAPIState(); System::ClearMemorySaveStates(); Host::InvalidateDisplay(); } @@ -883,7 +882,6 @@ DEFINE_HOTKEY("TogglePGXP", TRANSLATABLE("Hotkeys", "Graphics"), TRANSLATABLE("H g_settings.gpu_pgxp_enable = !g_settings.gpu_pgxp_enable; g_gpu->RestoreGraphicsAPIState(); g_gpu->UpdateSettings(); - g_gpu->ResetGraphicsAPIState(); System::ClearMemorySaveStates(); Host::AddKeyedOSDMessage("TogglePGXP", g_settings.gpu_pgxp_enable ? @@ -953,7 +951,6 @@ DEFINE_HOTKEY("TogglePGXPDepth", TRANSLATABLE("Hotkeys", "Graphics"), g_gpu->RestoreGraphicsAPIState(); g_gpu->UpdateSettings(); - g_gpu->ResetGraphicsAPIState(); System::ClearMemorySaveStates(); Host::AddKeyedOSDMessage( "TogglePGXPDepth", @@ -974,7 +971,6 @@ DEFINE_HOTKEY("TogglePGXPCPU", TRANSLATABLE("Hotkeys", "Graphics"), TRANSLATABLE g_gpu->RestoreGraphicsAPIState(); g_gpu->UpdateSettings(); - g_gpu->ResetGraphicsAPIState(); System::ClearMemorySaveStates(); Host::AddKeyedOSDMessage("TogglePGXPCPU", g_settings.gpu_pgxp_cpu ?