From ac73e7b050462374387a0887e623717e644ae0bb Mon Sep 17 00:00:00 2001 From: Stenzek Date: Tue, 1 Aug 2023 23:49:15 +1000 Subject: [PATCH] More conversion --- .clang-format | 2 +- src/core/core.vcxproj | 2 + src/core/core.vcxproj.filters | 6 + src/core/gpu/d3d11_device.cpp | 1215 +++++++++++++++------------- src/core/gpu/d3d11_device.h | 117 ++- src/core/gpu/d3d11_texture.cpp | 2 +- src/core/gpu/d3d11_texture.h | 4 + src/core/gpu/d3d12_gpu_device.cpp | 50 -- src/core/gpu/d3d12_gpu_device.h | 2 - src/core/gpu/gpu_device.cpp | 519 ++++++++++-- src/core/gpu/gpu_device.h | 279 ++++--- src/core/gpu/gpu_shader_cache.cpp | 290 +++++++ src/core/gpu/gpu_shader_cache.h | 76 ++ src/core/gpu/gpu_texture.cpp | 6 +- src/core/gpu/gpu_texture.h | 57 +- src/core/gpu/opengl_gpu_device.cpp | 43 - src/core/gpu/opengl_gpu_device.h | 2 - src/core/gpu/vulkan/entry_points.h | 207 +---- src/core/gpu/vulkan/loader.cpp | 12 +- src/core/gpu/vulkan_gpu_device.cpp | 96 --- src/core/gpu/vulkan_gpu_device.h | 2 - src/core/gpu_hw.cpp | 650 ++++++++++++++- src/core/gpu_hw.h | 49 +- src/core/gpu_hw_d3d11.cpp | 282 +------ src/core/gpu_hw_d3d11.h | 16 +- src/core/gpu_hw_opengl.cpp | 12 +- src/core/gpu_hw_opengl.h | 2 +- src/core/shader_cache_version.h | 2 +- src/core/shadergen.cpp | 68 +- src/core/shadergen.h | 6 +- 30 files changed, 2519 insertions(+), 1557 deletions(-) create mode 100644 src/core/gpu/gpu_shader_cache.cpp create mode 100644 src/core/gpu/gpu_shader_cache.h diff --git a/.clang-format b/.clang-format index 3b7f8f2de..59cf6e97d 100644 --- a/.clang-format +++ b/.clang-format @@ -10,7 +10,7 @@ AlignTrailingComments: true AllowAllParametersOfDeclarationOnNextLine: true AllowShortBlocksOnASingleLine: false AllowShortCaseLabelsOnASingleLine: false -AllowShortFunctionsOnASingleLine: Inline +AllowShortFunctionsOnASingleLine: InlineOnly AllowShortIfStatementsOnASingleLine: false AllowShortLoopsOnASingleLine: false AlwaysBreakAfterDefinitionReturnType: None diff --git a/src/core/core.vcxproj b/src/core/core.vcxproj index 22fe45004..4e322932a 100644 --- a/src/core/core.vcxproj +++ b/src/core/core.vcxproj @@ -76,6 +76,7 @@ + @@ -205,6 +206,7 @@ + diff --git a/src/core/core.vcxproj.filters b/src/core/core.vcxproj.filters index 3109cb210..ff0ba7e45 100644 --- a/src/core/core.vcxproj.filters +++ b/src/core/core.vcxproj.filters @@ -188,6 +188,9 @@ gpu + + gpu + @@ -395,6 +398,9 @@ gpu + + gpu + diff --git a/src/core/gpu/d3d11_device.cpp b/src/core/gpu/d3d11_device.cpp index a167e4e9f..354905bcd 100644 --- a/src/core/gpu/d3d11_device.cpp +++ b/src/core/gpu/d3d11_device.cpp @@ -5,29 +5,45 @@ #include "../host_settings.h" #include "../settings.h" #include "../shader_cache_version.h" + #include "common/assert.h" +#include "common/file_system.h" #include "common/log.h" +#include "common/path.h" #include "common/string_util.h" -#include "d3d11/shader_cache.h" -#include "d3d11/shader_compiler.h" -#include "d3d_shaders.h" + #include "imgui.h" -#include "postprocessing_shadergen.h" + +#include "fmt/format.h" + #include +#include #include + Log_SetChannel(D3D11Device); #pragma comment(lib, "d3d11.lib") #pragma comment(lib, "dxgi.lib") static constexpr std::array s_clear_color = {}; +static unsigned s_next_bad_shader_id = 1; + +static void SetD3DDebugObjectName(ID3D11DeviceChild* obj, const std::string_view& name) +{ + // WKPDID_D3DDebugObjectName + static constexpr GUID guid = {0x429b8c22, 0x9188, 0x4b0c, 0x87, 0x42, 0xac, 0xb0, 0xbf, 0x85, 0xc2, 0x00}; + const std::wstring wname = StringUtil::UTF8StringToWideString(name); + obj->SetPrivateData(guid, static_cast(wname.length()) * 2u, wname.c_str()); +} D3D11Device::D3D11Device() = default; D3D11Device::~D3D11Device() { + // TODO: Make virtual Destroy() method instead due to order of shit.. DestroyStagingBuffer(); DestroyResources(); + DestroyBuffers(); DestroySurface(); m_context.Reset(); m_device.Reset(); @@ -94,6 +110,35 @@ bool D3D11Device::DownloadTexture(GPUTexture* texture, u32 x, u32 y, u32 width, return true; } +void D3D11Device::CommitClear(GPUTexture* t) +{ + D3D11Texture* T = static_cast(t); + if (T->GetState() == GPUTexture::State::Dirty) + return; + + // TODO: 11.1 + if (T->IsDepthStencil()) + { + if (T->GetState() == GPUTexture::State::Invalidated) + ; // m_context->DiscardView(T->GetD3DDSV()); + else + m_context->ClearDepthStencilView(T->GetD3DDSV(), D3D11_CLEAR_DEPTH, T->GetClearDepth(), 0); + } + else if (T->IsRenderTarget()) + { + if (T->GetState() == GPUTexture::State::Invalidated) + ; // m_context->DiscardView(T->GetD3DRTV()); + else + m_context->ClearRenderTargetView(T->GetD3DRTV(), T->GetUNormClearColor().data()); + } + else + { + return; + } + + T->SetState(GPUTexture::State::Dirty); +} + bool D3D11Device::CheckStagingBufferSize(u32 width, u32 height, DXGI_FORMAT format) { if (m_readback_staging_texture_width >= width && m_readback_staging_texture_width >= height && @@ -143,11 +188,27 @@ void D3D11Device::CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 d DebugAssert((dst_x + width) <= dst->GetMipWidth(dst_level)); DebugAssert((dst_y + height) <= dst->GetMipWidth(dst_level)); + D3D11Texture* dst11 = static_cast(dst); + D3D11Texture* src11 = static_cast(src); + + if (src11->GetState() == GPUTexture::State::Cleared) + { + if (src11->GetWidth() == dst11->GetWidth() && src11->GetHeight() == dst11->GetHeight()) + { + // pass clear through + dst11->m_state = src11->m_state; + dst11->m_clear_value = src11->m_clear_value; + return; + } + } + + CommitClear(src11); + CommitClear(dst11); + const CD3D11_BOX src_box(static_cast(src_x), static_cast(src_y), 0, static_cast(src_x + width), static_cast(src_y + height), 1); - m_context->CopySubresourceRegion(static_cast(dst)->GetD3DTexture(), - D3D11CalcSubresource(dst_level, dst_layer, dst->GetLevels()), dst_x, dst_y, 0, - static_cast(src)->GetD3DTexture(), + m_context->CopySubresourceRegion(dst11->GetD3DTexture(), D3D11CalcSubresource(dst_level, dst_layer, dst->GetLevels()), + dst_x, dst_y, 0, src11->GetD3DTexture(), D3D11CalcSubresource(src_level, src_layer, src->GetLevels()), &src_box); } @@ -166,6 +227,9 @@ void D3D11Device::ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u3 // DX11 can't resolve partial rects. Assert(src_x == dst_x && src_y == dst_y); + CommitClear(src); + CommitClear(dst); + m_context->ResolveSubresource( static_cast(dst)->GetD3DTexture(), D3D11CalcSubresource(dst_level, dst_layer, dst->GetLevels()), static_cast(src)->GetD3DTexture(), D3D11CalcSubresource(src_level, src_layer, src->GetLevels()), @@ -266,6 +330,9 @@ bool D3D11Device::CreateDevice(const WindowInfo& wi, bool vsync) } } + if (g_settings.gpu_use_debug_device) + m_context.As(&m_annotation); + // we need the specific factory for the device, otherwise MakeWindowAssociation() is flaky. ComPtr dxgi_device; if (FAILED(m_device.As(&dxgi_device)) || FAILED(dxgi_device->GetParent(IID_PPV_ARGS(dxgi_adapter.GetAddressOf()))) || @@ -318,7 +385,10 @@ bool D3D11Device::CreateDevice(const WindowInfo& wi, bool vsync) bool D3D11Device::SetupDevice() { - if (!CreateResources()) + if (!GPUDevice::SetupDevice()) + return false; + + if (!CreateBuffers() || !CreateResources()) return false; return true; @@ -467,6 +537,21 @@ void D3D11Device::DestroySurface() m_swap_chain.Reset(); } +std::string D3D11Device::GetShaderCacheBaseName(const std::string_view& type, bool debug) const +{ + std::string_view flname; + switch (m_device->GetFeatureLevel()) + { + // clang-format off + case D3D_FEATURE_LEVEL_10_0: flname = "sm40"; break; + case D3D_FEATURE_LEVEL_10_1: flname = "sm41"; break; + case D3D_FEATURE_LEVEL_11_0: default: flname = "sm50"; break; + // clang-format on + } + + return fmt::format("d3d_{}_{}{}", type, flname, debug ? "_debug" : ""); +} + void D3D11Device::ResizeWindow(s32 new_window_width, s32 new_window_height) { if (!m_swap_chain) @@ -556,238 +641,24 @@ bool D3D11Device::SetFullscreen(bool fullscreen, u32 width, u32 height, float re return true; } -bool D3D11Device::CreateResources() +bool D3D11Device::CreateBuffers() { - if (!GPUDevice::CreateResources()) - return false; - - HRESULT hr; - - m_display_vertex_shader = - D3D11::ShaderCompiler::CreateVertexShader(m_device.Get(), s_display_vs_bytecode, sizeof(s_display_vs_bytecode)); - m_display_pixel_shader = - D3D11::ShaderCompiler::CreatePixelShader(m_device.Get(), s_display_ps_bytecode, sizeof(s_display_ps_bytecode)); - m_display_alpha_pixel_shader = D3D11::ShaderCompiler::CreatePixelShader(m_device.Get(), s_display_ps_alpha_bytecode, - sizeof(s_display_ps_alpha_bytecode)); - if (!m_display_vertex_shader || !m_display_pixel_shader || !m_display_alpha_pixel_shader) - return false; - - if (!m_display_uniform_buffer.Create(m_device.Get(), D3D11_BIND_CONSTANT_BUFFER, DISPLAY_UNIFORM_BUFFER_SIZE)) - return false; - - CD3D11_RASTERIZER_DESC rasterizer_desc = CD3D11_RASTERIZER_DESC(CD3D11_DEFAULT()); - rasterizer_desc.CullMode = D3D11_CULL_NONE; - hr = m_device->CreateRasterizerState(&rasterizer_desc, m_display_rasterizer_state.GetAddressOf()); - if (FAILED(hr)) - return false; - - CD3D11_DEPTH_STENCIL_DESC depth_stencil_desc = CD3D11_DEPTH_STENCIL_DESC(CD3D11_DEFAULT()); - depth_stencil_desc.DepthEnable = FALSE; - depth_stencil_desc.DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ZERO; - hr = m_device->CreateDepthStencilState(&depth_stencil_desc, m_display_depth_stencil_state.GetAddressOf()); - if (FAILED(hr)) - return false; - - CD3D11_BLEND_DESC blend_desc = CD3D11_BLEND_DESC(CD3D11_DEFAULT()); - hr = m_device->CreateBlendState(&blend_desc, m_display_blend_state.GetAddressOf()); - if (FAILED(hr)) - return false; - - blend_desc.RenderTarget[0] = {TRUE, - D3D11_BLEND_SRC_ALPHA, - D3D11_BLEND_INV_SRC_ALPHA, - D3D11_BLEND_OP_ADD, - D3D11_BLEND_ONE, - D3D11_BLEND_ZERO, - D3D11_BLEND_OP_ADD, - D3D11_COLOR_WRITE_ENABLE_ALL}; - hr = m_device->CreateBlendState(&blend_desc, m_software_cursor_blend_state.GetAddressOf()); - if (FAILED(hr)) - return false; - - CD3D11_SAMPLER_DESC sampler_desc = CD3D11_SAMPLER_DESC(CD3D11_DEFAULT()); - sampler_desc.Filter = D3D11_FILTER_MIN_MAG_MIP_POINT; - hr = m_device->CreateSamplerState(&sampler_desc, m_point_sampler.GetAddressOf()); - if (FAILED(hr)) - return false; - - sampler_desc.Filter = D3D11_FILTER_MIN_MAG_LINEAR_MIP_POINT; - hr = m_device->CreateSamplerState(&sampler_desc, m_linear_sampler.GetAddressOf()); - if (FAILED(hr)) - return false; - - sampler_desc.Filter = D3D11_FILTER_MIN_MAG_MIP_POINT; - sampler_desc.AddressU = D3D11_TEXTURE_ADDRESS_BORDER; - sampler_desc.AddressV = D3D11_TEXTURE_ADDRESS_BORDER; - sampler_desc.BorderColor[0] = 0.0f; - sampler_desc.BorderColor[1] = 0.0f; - sampler_desc.BorderColor[2] = 0.0f; - sampler_desc.BorderColor[3] = 1.0f; - hr = m_device->CreateSamplerState(&sampler_desc, m_border_sampler.GetAddressOf()); - if (FAILED(hr)) - return false; - - if (!CreateImGuiResources()) - return false; - - return true; -} - -void D3D11Device::DestroyResources() -{ - GPUDevice::DestroyResources(); - - DestroyImGuiResources(); - - m_post_processing_chain.ClearStages(); - m_post_processing_input_texture.Destroy(); - m_post_processing_stages.clear(); - - m_display_uniform_buffer.Release(); - m_border_sampler.Reset(); - m_linear_sampler.Reset(); - m_point_sampler.Reset(); - m_display_alpha_pixel_shader.Reset(); - m_display_pixel_shader.Reset(); - m_display_vertex_shader.Reset(); - m_display_blend_state.Reset(); - m_display_depth_stencil_state.Reset(); - m_display_rasterizer_state.Reset(); -} - -bool D3D11Device::CreateImGuiResources() -{ - if (!m_imgui_vertex_buffer.Create(m_device.Get(), D3D11_BIND_VERTEX_BUFFER, IMGUI_VERTEX_BUFFER_SIZE)) + if (!m_vertex_buffer.Create(m_device.Get(), D3D11_BIND_VERTEX_BUFFER, VERTEX_BUFFER_SIZE) || + !m_index_buffer.Create(m_device.Get(), D3D11_BIND_INDEX_BUFFER, INDEX_BUFFER_SIZE) || + !m_push_uniform_buffer.Create(m_device.Get(), D3D11_BIND_CONSTANT_BUFFER, PUSH_UNIFORM_BUFFER_SIZE)) { - Log_ErrorPrintf("Failed to create ImGui vertex buffer."); - return false; - } - - if (!m_imgui_index_buffer.Create(m_device.Get(), D3D11_BIND_INDEX_BUFFER, IMGUI_INDEX_BUFFER_SIZE)) - { - Log_ErrorPrintf("Failed to create ImGui index buffer."); - return false; - } - - HRESULT hr; - D3D11_BLEND_DESC blend_desc = {}; - blend_desc.RenderTarget[0].BlendEnable = true; - blend_desc.RenderTarget[0].SrcBlend = D3D11_BLEND_SRC_ALPHA; - blend_desc.RenderTarget[0].DestBlend = D3D11_BLEND_INV_SRC_ALPHA; - blend_desc.RenderTarget[0].BlendOp = D3D11_BLEND_OP_ADD; - blend_desc.RenderTarget[0].SrcBlendAlpha = D3D11_BLEND_ONE; - blend_desc.RenderTarget[0].DestBlendAlpha = D3D11_BLEND_INV_SRC_ALPHA; - blend_desc.RenderTarget[0].BlendOpAlpha = D3D11_BLEND_OP_ADD; - blend_desc.RenderTarget[0].RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL; - if (FAILED(hr = m_device->CreateBlendState(&blend_desc, m_imgui_blend_state.ReleaseAndGetAddressOf()))) - { - Log_ErrorPrintf("Failed to create ImGui blend state: %08X", hr); - return false; - } - - m_imgui_vertex_shader = - D3D11::ShaderCompiler::CreateVertexShader(m_device.Get(), s_imgui_vs_bytecode, sizeof(s_imgui_vs_bytecode)); - m_imgui_pixel_shader = - D3D11::ShaderCompiler::CreatePixelShader(m_device.Get(), s_imgui_ps_bytecode, sizeof(s_imgui_ps_bytecode)); - if (!m_imgui_vertex_shader || !m_imgui_pixel_shader) - return false; - - static constexpr D3D11_INPUT_ELEMENT_DESC layout[] = { - {"POSITION", 0, DXGI_FORMAT_R32G32_FLOAT, 0, (UINT)IM_OFFSETOF(ImDrawVert, pos), D3D11_INPUT_PER_VERTEX_DATA, 0}, - {"TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, (UINT)IM_OFFSETOF(ImDrawVert, uv), D3D11_INPUT_PER_VERTEX_DATA, 0}, - {"COLOR", 0, DXGI_FORMAT_R8G8B8A8_UNORM, 0, (UINT)IM_OFFSETOF(ImDrawVert, col), D3D11_INPUT_PER_VERTEX_DATA, 0}, - }; - if (FAILED(hr = - m_device->CreateInputLayout(layout, static_cast(std::size(layout)), s_imgui_vs_bytecode, - sizeof(s_imgui_vs_bytecode), m_imgui_input_layout.ReleaseAndGetAddressOf()))) - { - Log_ErrorPrintf("Failed to create ImGui input layout: %08X", hr); + Log_ErrorPrintf("Failed to create vertex/index/uniform buffers."); return false; } return true; } -void D3D11Device::DestroyImGuiResources() +void D3D11Device::DestroyBuffers() { - m_imgui_blend_state.Reset(); - m_imgui_index_buffer.Release(); - m_imgui_vertex_buffer.Release(); - m_imgui_texture.Destroy(); -} - -void D3D11Device::RenderImGui() -{ - ImGui::Render(); - - const ImDrawData* draw_data = ImGui::GetDrawData(); - if (draw_data->CmdListsCount == 0) - return; - - m_context->IASetInputLayout(m_imgui_input_layout.Get()); - m_context->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); - m_context->VSSetShader(m_imgui_vertex_shader.Get(), nullptr, 0); - m_context->PSSetShader(m_imgui_pixel_shader.Get(), nullptr, 0); - m_context->PSSetSamplers(0, 1, m_linear_sampler.GetAddressOf()); - m_context->OMSetBlendState(m_imgui_blend_state.Get(), nullptr, 0xFFFFFFFFu); - m_context->OMSetDepthStencilState(m_display_depth_stencil_state.Get(), 0); - - { - const float L = 0.0f; - const float R = static_cast(m_window_info.surface_width); - const float T = 0.0f; - const float B = static_cast(m_window_info.surface_height); - - const float ortho_projection[4][4] = { - {2.0f / (R - L), 0.0f, 0.0f, 0.0f}, - {0.0f, 2.0f / (T - B), 0.0f, 0.0f}, - {0.0f, 0.0f, 0.5f, 0.0f}, - {(R + L) / (L - R), (T + B) / (B - T), 0.5f, 1.0f}, - }; - - auto res = m_display_uniform_buffer.Map(m_context.Get(), DISPLAY_UNIFORM_BUFFER_SIZE, DISPLAY_UNIFORM_BUFFER_SIZE); - std::memcpy(res.pointer, ortho_projection, sizeof(ortho_projection)); - m_display_uniform_buffer.Unmap(m_context.Get(), sizeof(ortho_projection)); - m_context->VSSetConstantBuffers(0, 1, m_display_uniform_buffer.GetD3DBufferArray()); - } - - const UINT vb_stride = sizeof(ImDrawVert); - const UINT vb_offset = 0; - static_assert(sizeof(ImDrawIdx) == sizeof(u16)); - m_context->IASetVertexBuffers(0, 1, m_imgui_vertex_buffer.GetD3DBufferArray(), &vb_stride, &vb_offset); - m_context->IASetIndexBuffer(m_imgui_index_buffer.GetD3DBuffer(), DXGI_FORMAT_R16_UINT, 0); - - // Render command lists - for (int n = 0; n < draw_data->CmdListsCount; n++) - { - const ImDrawList* cmd_list = draw_data->CmdLists[n]; - - const u32 vert_size = cmd_list->VtxBuffer.Size * sizeof(ImDrawVert); - const auto vb_map = m_imgui_vertex_buffer.Map(m_context.Get(), sizeof(ImDrawVert), vert_size); - std::memcpy(vb_map.pointer, cmd_list->VtxBuffer.Data, vert_size); - m_imgui_vertex_buffer.Unmap(m_context.Get(), vert_size); - - const u32 idx_size = cmd_list->IdxBuffer.Size * sizeof(ImDrawIdx); - const auto ib_map = m_imgui_index_buffer.Map(m_context.Get(), sizeof(ImDrawIdx), idx_size); - std::memcpy(ib_map.pointer, cmd_list->IdxBuffer.Data, idx_size); - m_imgui_index_buffer.Unmap(m_context.Get(), idx_size); - - for (int cmd_i = 0; cmd_i < cmd_list->CmdBuffer.Size; cmd_i++) - { - const ImDrawCmd* pcmd = &cmd_list->CmdBuffer[cmd_i]; - DebugAssert(!pcmd->UserCallback); - - if (pcmd->ClipRect.z <= pcmd->ClipRect.x || pcmd->ClipRect.w <= pcmd->ClipRect.x) - continue; - - const CD3D11_RECT rc(static_cast(pcmd->ClipRect.x), static_cast(pcmd->ClipRect.y), - static_cast(pcmd->ClipRect.z), static_cast(pcmd->ClipRect.w)); - m_context->RSSetScissorRects(1, &rc); - m_context->PSSetShaderResources(0, 1, reinterpret_cast(pcmd->TextureId)->GetD3DSRVArray()); - m_context->DrawIndexed(pcmd->ElemCount, ib_map.index_aligned + pcmd->IdxOffset, - vb_map.index_aligned + pcmd->VtxOffset); - } - } + m_push_uniform_buffer.Release(); + m_vertex_buffer.Release(); + m_index_buffer.Release(); } bool D3D11Device::Render(bool skip_present) @@ -805,12 +676,10 @@ bool D3D11Device::Render(bool skip_present) if (m_vsync_enabled && m_gpu_timing_enabled) PopTimestampQuery(); - RenderDisplay(); + m_context->ClearRenderTargetView(m_swap_chain_rtv.Get(), s_clear_color.data()); + m_context->OMSetRenderTargets(1, m_swap_chain_rtv.GetAddressOf(), nullptr); - // TODO: move up... - const CD3D11_VIEWPORT vp(0.0f, 0.0f, static_cast(m_window_info.surface_width), - static_cast(m_window_info.surface_height), 0.0f, 1.0f); - m_context->RSSetViewports(1, &vp); + RenderDisplay(); RenderImGui(); @@ -830,137 +699,6 @@ bool D3D11Device::Render(bool skip_present) return true; } -bool D3D11Device::RenderScreenshot(u32 width, u32 height, const Common::Rectangle& draw_rect, - std::vector* out_pixels, u32* out_stride, GPUTexture::Format* out_format) -{ - static constexpr GPUTexture::Format hdformat = GPUTexture::Format::RGBA8; - - D3D11Texture render_texture; - if (!render_texture.Create(m_device.Get(), width, height, 1, 1, 1, GPUTexture::Type::RenderTarget, hdformat)) - return false; - - static constexpr std::array clear_color = {}; - m_context->ClearRenderTargetView(render_texture.GetD3DRTV(), clear_color.data()); - m_context->OMSetRenderTargets(1, render_texture.GetD3DRTVArray(), nullptr); - - if (HasDisplayTexture()) - { - if (!m_post_processing_chain.IsEmpty()) - { - ApplyPostProcessingChain(render_texture.GetD3DRTV(), draw_rect.left, draw_rect.top, draw_rect.GetWidth(), - draw_rect.GetHeight(), static_cast(m_display_texture), - m_display_texture_view_x, m_display_texture_view_y, m_display_texture_view_width, - m_display_texture_view_height, width, height); - } - else - { - RenderDisplay(draw_rect.left, draw_rect.top, draw_rect.GetWidth(), draw_rect.GetHeight(), - static_cast(m_display_texture), m_display_texture_view_x, m_display_texture_view_y, - m_display_texture_view_width, m_display_texture_view_height, IsUsingLinearFiltering()); - } - } - - m_context->OMSetRenderTargets(0, nullptr, nullptr); - - const u32 stride = GPUTexture::GetPixelSize(hdformat) * width; - out_pixels->resize(width * height); - if (!DownloadTexture(&render_texture, 0, 0, width, height, out_pixels->data(), stride)) - return false; - - *out_stride = stride; - *out_format = hdformat; - return true; -} - -void D3D11Device::RenderDisplay() -{ - const auto [left, top, width, height] = CalculateDrawRect(GetWindowWidth(), GetWindowHeight()); - - if (HasDisplayTexture() && !m_post_processing_chain.IsEmpty()) - { - ApplyPostProcessingChain(m_swap_chain_rtv.Get(), left, top, width, height, - static_cast(m_display_texture), m_display_texture_view_x, - m_display_texture_view_y, m_display_texture_view_width, m_display_texture_view_height, - GetWindowWidth(), GetWindowHeight()); - return; - } - - m_context->ClearRenderTargetView(m_swap_chain_rtv.Get(), s_clear_color.data()); - m_context->OMSetRenderTargets(1, m_swap_chain_rtv.GetAddressOf(), nullptr); - - if (!HasDisplayTexture()) - return; - - RenderDisplay(left, top, width, height, static_cast(m_display_texture), m_display_texture_view_x, - m_display_texture_view_y, m_display_texture_view_width, m_display_texture_view_height, - IsUsingLinearFiltering()); -} - -void D3D11Device::RenderDisplay(s32 left, s32 top, s32 width, s32 height, D3D11Texture* texture, s32 texture_view_x, - s32 texture_view_y, s32 texture_view_width, s32 texture_view_height, bool linear_filter) -{ - m_context->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); - m_context->VSSetShader(m_display_vertex_shader.Get(), nullptr, 0); - m_context->PSSetShader(m_display_pixel_shader.Get(), nullptr, 0); - m_context->PSSetShaderResources(0, 1, texture->GetD3DSRVArray()); - m_context->PSSetSamplers(0, 1, linear_filter ? m_linear_sampler.GetAddressOf() : m_point_sampler.GetAddressOf()); - - const bool linear = IsUsingLinearFiltering(); - const float position_adjust = linear ? 0.5f : 0.0f; - const float size_adjust = linear ? 1.0f : 0.0f; - const float uniforms[4] = { - (static_cast(texture_view_x) + position_adjust) / static_cast(texture->GetWidth()), - (static_cast(texture_view_y) + position_adjust) / static_cast(texture->GetHeight()), - (static_cast(texture_view_width) - size_adjust) / static_cast(texture->GetWidth()), - (static_cast(texture_view_height) - size_adjust) / static_cast(texture->GetHeight())}; - const auto map = m_display_uniform_buffer.Map(m_context.Get(), m_display_uniform_buffer.GetSize(), sizeof(uniforms)); - std::memcpy(map.pointer, uniforms, sizeof(uniforms)); - m_display_uniform_buffer.Unmap(m_context.Get(), sizeof(uniforms)); - m_context->VSSetConstantBuffers(0, 1, m_display_uniform_buffer.GetD3DBufferArray()); - - const CD3D11_VIEWPORT vp(static_cast(left), static_cast(top), static_cast(width), - static_cast(height)); - m_context->RSSetViewports(1, &vp); - m_context->RSSetState(m_display_rasterizer_state.Get()); - m_context->OMSetDepthStencilState(m_display_depth_stencil_state.Get(), 0); - m_context->OMSetBlendState(m_display_blend_state.Get(), nullptr, 0xFFFFFFFFu); - - m_context->Draw(3, 0); -} - -void D3D11Device::RenderSoftwareCursor() -{ - if (!HasSoftwareCursor()) - return; - - const auto [left, top, width, height] = CalculateSoftwareCursorDrawRect(); - RenderSoftwareCursor(left, top, width, height, m_cursor_texture.get()); -} - -void D3D11Device::RenderSoftwareCursor(s32 left, s32 top, s32 width, s32 height, GPUTexture* texture_handle) -{ - m_context->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); - m_context->VSSetShader(m_display_vertex_shader.Get(), nullptr, 0); - m_context->PSSetShader(m_display_alpha_pixel_shader.Get(), nullptr, 0); - m_context->PSSetShaderResources(0, 1, static_cast(texture_handle)->GetD3DSRVArray()); - m_context->PSSetSamplers(0, 1, m_linear_sampler.GetAddressOf()); - - const float uniforms[4] = {0.0f, 0.0f, 1.0f, 1.0f}; - const auto map = m_display_uniform_buffer.Map(m_context.Get(), m_display_uniform_buffer.GetSize(), sizeof(uniforms)); - std::memcpy(map.pointer, uniforms, sizeof(uniforms)); - m_display_uniform_buffer.Unmap(m_context.Get(), sizeof(uniforms)); - m_context->VSSetConstantBuffers(0, 1, m_display_uniform_buffer.GetD3DBufferArray()); - - const CD3D11_VIEWPORT vp(static_cast(left), static_cast(top), static_cast(width), - static_cast(height)); - m_context->RSSetViewports(1, &vp); - m_context->RSSetState(m_display_rasterizer_state.Get()); - m_context->OMSetDepthStencilState(m_display_depth_stencil_state.Get(), 0); - m_context->OMSetBlendState(m_software_cursor_blend_state.Get(), nullptr, 0xFFFFFFFFu); - - m_context->Draw(3, 0); -} - GPUDevice::AdapterAndModeList D3D11Device::StaticGetAdapterAndModeList() { ComPtr dxgi_factory; @@ -1044,151 +782,6 @@ GPUDevice::AdapterAndModeList D3D11Device::GetAdapterAndModeList() return GetAdapterAndModeList(m_dxgi_factory.Get()); } -bool D3D11Device::SetPostProcessingChain(const std::string_view& config) -{ - if (config.empty()) - { - m_post_processing_input_texture.Destroy(); - m_post_processing_stages.clear(); - m_post_processing_chain.ClearStages(); - return true; - } - - if (!m_post_processing_chain.CreateFromString(config)) - return false; - - m_post_processing_stages.clear(); - - D3D11::ShaderCache shader_cache; - shader_cache.Open(EmuFolders::Cache, m_device->GetFeatureLevel(), SHADER_CACHE_VERSION, - g_settings.gpu_use_debug_device); - - FrontendCommon::PostProcessingShaderGen shadergen(RenderAPI::D3D11, true); - u32 max_ubo_size = 0; - - for (u32 i = 0; i < m_post_processing_chain.GetStageCount(); i++) - { - const FrontendCommon::PostProcessingShader& shader = m_post_processing_chain.GetShaderStage(i); - const std::string vs = shadergen.GeneratePostProcessingVertexShader(shader); - const std::string ps = shadergen.GeneratePostProcessingFragmentShader(shader); - - PostProcessingStage stage; - stage.uniforms_size = shader.GetUniformsSize(); - stage.vertex_shader = shader_cache.GetVertexShader(m_device.Get(), vs); - stage.pixel_shader = shader_cache.GetPixelShader(m_device.Get(), ps); - if (!stage.vertex_shader || !stage.pixel_shader) - { - Log_ErrorPrintf("Failed to compile one or more post-processing shaders, disabling."); - m_post_processing_stages.clear(); - m_post_processing_chain.ClearStages(); - return false; - } - - max_ubo_size = std::max(max_ubo_size, stage.uniforms_size); - m_post_processing_stages.push_back(std::move(stage)); - } - - if (m_display_uniform_buffer.GetSize() < max_ubo_size && - !m_display_uniform_buffer.Create(m_device.Get(), D3D11_BIND_CONSTANT_BUFFER, max_ubo_size)) - { - Log_ErrorPrintf("Failed to allocate %u byte constant buffer for postprocessing", max_ubo_size); - m_post_processing_stages.clear(); - m_post_processing_chain.ClearStages(); - return false; - } - - m_post_processing_timer.Reset(); - return true; -} - -bool D3D11Device::CheckPostProcessingRenderTargets(u32 target_width, u32 target_height) -{ - DebugAssert(!m_post_processing_stages.empty()); - - const GPUTexture::Type type = GPUTexture::Type::RenderTarget; - const GPUTexture::Format format = GPUTexture::Format::RGBA8; - - if (m_post_processing_input_texture.GetWidth() != target_width || - m_post_processing_input_texture.GetHeight() != target_height) - { - if (!m_post_processing_input_texture.Create(m_device.Get(), target_width, target_height, 1, 1, 1, type, format)) - return false; - } - - const u32 target_count = (static_cast(m_post_processing_stages.size()) - 1); - for (u32 i = 0; i < target_count; i++) - { - PostProcessingStage& pps = m_post_processing_stages[i]; - if (pps.output_texture.GetWidth() != target_width || pps.output_texture.GetHeight() != target_height) - { - if (!pps.output_texture.Create(m_device.Get(), target_width, target_height, 1, 1, 1, type, format)) - return false; - } - } - - return true; -} - -void D3D11Device::ApplyPostProcessingChain(ID3D11RenderTargetView* final_target, s32 final_left, s32 final_top, - s32 final_width, s32 final_height, D3D11Texture* texture, s32 texture_view_x, - s32 texture_view_y, s32 texture_view_width, s32 texture_view_height, - u32 target_width, u32 target_height) -{ - if (!CheckPostProcessingRenderTargets(target_width, target_height)) - { - RenderDisplay(final_left, final_top, final_width, final_height, texture, texture_view_x, texture_view_y, - texture_view_width, texture_view_height, IsUsingLinearFiltering()); - return; - } - - // downsample/upsample - use same viewport for remainder - m_context->ClearRenderTargetView(m_post_processing_input_texture.GetD3DRTV(), s_clear_color.data()); - m_context->OMSetRenderTargets(1, m_post_processing_input_texture.GetD3DRTVArray(), nullptr); - RenderDisplay(final_left, final_top, final_width, final_height, texture, texture_view_x, texture_view_y, - texture_view_width, texture_view_height, IsUsingLinearFiltering()); - - const s32 orig_texture_width = texture_view_width; - const s32 orig_texture_height = texture_view_height; - texture = &m_post_processing_input_texture; - texture_view_x = final_left; - texture_view_y = final_top; - texture_view_width = final_width; - texture_view_height = final_height; - - const u32 final_stage = static_cast(m_post_processing_stages.size()) - 1u; - for (u32 i = 0; i < static_cast(m_post_processing_stages.size()); i++) - { - PostProcessingStage& pps = m_post_processing_stages[i]; - ID3D11RenderTargetView* rtv = (i == final_stage) ? final_target : pps.output_texture.GetD3DRTV(); - m_context->ClearRenderTargetView(rtv, s_clear_color.data()); - m_context->OMSetRenderTargets(1, &rtv, nullptr); - - m_context->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); - m_context->VSSetShader(pps.vertex_shader.Get(), nullptr, 0); - m_context->PSSetShader(pps.pixel_shader.Get(), nullptr, 0); - m_context->PSSetShaderResources(0, 1, texture->GetD3DSRVArray()); - m_context->PSSetSamplers(0, 1, m_border_sampler.GetAddressOf()); - - const auto map = - m_display_uniform_buffer.Map(m_context.Get(), m_display_uniform_buffer.GetSize(), pps.uniforms_size); - m_post_processing_chain.GetShaderStage(i).FillUniformBuffer( - map.pointer, texture->GetWidth(), texture->GetHeight(), texture_view_x, texture_view_y, texture_view_width, - texture_view_height, GetWindowWidth(), GetWindowHeight(), orig_texture_width, orig_texture_height, - static_cast(m_post_processing_timer.GetTimeSeconds())); - m_display_uniform_buffer.Unmap(m_context.Get(), pps.uniforms_size); - m_context->VSSetConstantBuffers(0, 1, m_display_uniform_buffer.GetD3DBufferArray()); - m_context->PSSetConstantBuffers(0, 1, m_display_uniform_buffer.GetD3DBufferArray()); - - m_context->Draw(3, 0); - - if (i != final_stage) - texture = &pps.output_texture; - } - - ID3D11ShaderResourceView* null_srv = nullptr; - m_context->PSSetShaderResources(0, 1, &null_srv); -} - bool D3D11Device::CreateTimestampQueries() { for (u32 i = 0; i < NUM_TIMESTAMP_QUERIES; i++) @@ -1308,8 +901,9 @@ float D3D11Device::GetAndResetAccumulatedGPUTime() return value; } -D3D11Framebuffer::D3D11Framebuffer(ComPtr rtv, ComPtr dsv) - : m_rtv(std::move(rtv)), m_dsv(std::move(dsv)) +D3D11Framebuffer::D3D11Framebuffer(GPUTexture* rt, GPUTexture* ds, u32 width, u32 height, + ComPtr rtv, ComPtr dsv) + : GPUFramebuffer(rt, ds, width, height), m_rtv(std::move(rtv)), m_dsv(std::move(dsv)) { } @@ -1317,7 +911,33 @@ D3D11Framebuffer::~D3D11Framebuffer() = default; void D3D11Framebuffer::SetDebugName(const std::string_view& name) { - Panic("Implement me"); + if (m_rtv) + SetD3DDebugObjectName(m_rtv.Get(), name); + if (m_dsv) + SetD3DDebugObjectName(m_dsv.Get(), name); +} + +void D3D11Framebuffer::CommitClear(ID3D11DeviceContext* context) +{ + if (UNLIKELY(m_rt && m_rt->GetState() != GPUTexture::State::Dirty)) + { + if (m_rt->GetState() == GPUTexture::State::Invalidated) + ; // m_context->DiscardView(m_rtv.Get()); + else + context->ClearRenderTargetView(m_rtv.Get(), m_rt->GetUNormClearColor().data()); + + m_rt->SetState(GPUTexture::State::Dirty); + } + + if (UNLIKELY(m_ds && m_ds->GetState() != GPUTexture::State::Dirty)) + { + if (m_ds->GetState() == GPUTexture::State::Invalidated) + ; // m_context->DiscardView(m_dsv.Get()); + else + context->ClearDepthStencilView(m_dsv.Get(), D3D11_CLEAR_DEPTH, m_ds->GetClearDepth(), 0); + + m_ds->SetState(GPUTexture::State::Dirty); + } } std::unique_ptr D3D11Device::CreateFramebuffer(GPUTexture* rt, u32 rt_layer, u32 rt_level, @@ -1327,6 +947,13 @@ std::unique_ptr D3D11Device::CreateFramebuffer(GPUTexture* rt, u ComPtr dsv; HRESULT hr; + Assert(rt || ds); + Assert(!rt || (rt_layer < rt->GetLayers() && rt_level < rt->GetLevels())); + Assert(!ds || (ds_layer < ds->GetLevels() && ds_level < ds->GetLevels())); + Assert(!rt || !ds || + (rt->GetMipWidth(rt_level) == ds->GetMipWidth(ds_level) && + rt->GetMipHeight(rt_level) == ds->GetMipHeight(ds_level))); + if (rt) { D3D11_RENDER_TARGET_VIEW_DESC rtv_desc = {}; @@ -1411,16 +1038,20 @@ std::unique_ptr D3D11Device::CreateFramebuffer(GPUTexture* rt, u } } - return std::unique_ptr(new D3D11Framebuffer(std::move(rtv), std::move(dsv))); + return std::unique_ptr( + new D3D11Framebuffer(rt, ds, rt ? rt->GetMipWidth(rt_level) : ds->GetMipWidth(ds_level), + rt ? rt->GetMipHeight(rt_level) : ds->GetMipHeight(ds_level), std::move(rtv), std::move(dsv))); } -D3D11Sampler::D3D11Sampler(ComPtr ss) : m_ss(std::move(ss)) {} +D3D11Sampler::D3D11Sampler(ComPtr ss) : m_ss(std::move(ss)) +{ +} D3D11Sampler::~D3D11Sampler() = default; void D3D11Sampler::SetDebugName(const std::string_view& name) { - Panic("Not implemented"); + SetD3DDebugObjectName(m_ss.Get(), name); } std::unique_ptr D3D11Device::CreateSampler(const GPUSampler::Config& config) @@ -1477,7 +1108,8 @@ std::unique_ptr D3D11Device::CreateSampler(const GPUSampler::Config& return std::unique_ptr(new D3D11Sampler(std::move(ss))); } -D3D11Shader::D3D11Shader(Stage stage, Microsoft::WRL::ComPtr shader, std::vector bytecode) +D3D11Shader::D3D11Shader(GPUShaderStage stage, Microsoft::WRL::ComPtr shader, + std::vector bytecode) : GPUShader(stage), m_shader(std::move(shader)), m_bytecode(std::move(bytecode)) { } @@ -1486,45 +1118,49 @@ D3D11Shader::~D3D11Shader() = default; ID3D11VertexShader* D3D11Shader::GetVertexShader() const { - DebugAssert(m_stage == Stage::Vertex); + DebugAssert(m_stage == GPUShaderStage::Vertex); return static_cast(m_shader.Get()); } ID3D11PixelShader* D3D11Shader::GetPixelShader() const { - DebugAssert(m_stage == Stage::Pixel); + DebugAssert(m_stage == GPUShaderStage::Fragment); return static_cast(m_shader.Get()); } ID3D11ComputeShader* D3D11Shader::GetComputeShader() const { - DebugAssert(m_stage == Stage::Compute); + DebugAssert(m_stage == GPUShaderStage::Compute); return static_cast(m_shader.Get()); } void D3D11Shader::SetDebugName(const std::string_view& name) { - Panic("Implement me"); + SetD3DDebugObjectName(m_shader.Get(), name); } -std::unique_ptr D3D11Device::CreateShaderFromBinary(GPUShader::Stage stage, gsl::span data) +std::unique_ptr D3D11Device::CreateShaderFromBinary(GPUShaderStage stage, gsl::span data) { ComPtr shader; std::vector bytecode; + HRESULT hr; switch (stage) { - case GPUShader::Stage::Vertex: - shader = D3D11::ShaderCompiler::CreateVertexShader(D3D11Device::GetD3DDevice(), data.data(), data.size()); + case GPUShaderStage::Vertex: + hr = m_device->CreateVertexShader(data.data(), data.size(), nullptr, + reinterpret_cast(shader.GetAddressOf())); bytecode.resize(data.size()); std::memcpy(bytecode.data(), data.data(), data.size()); break; - case GPUShader::Stage::Pixel: - shader = D3D11::ShaderCompiler::CreatePixelShader(D3D11Device::GetD3DDevice(), data.data(), data.size()); + case GPUShaderStage::Fragment: + hr = m_device->CreatePixelShader(data.data(), data.size(), nullptr, + reinterpret_cast(shader.GetAddressOf())); break; - case GPUShader::Stage::Compute: - shader = D3D11::ShaderCompiler::CreateComputeShader(D3D11Device::GetD3DDevice(), data.data(), data.size()); + case GPUShaderStage::Compute: + hr = m_device->CreateComputeShader(data.data(), data.size(), nullptr, + reinterpret_cast(shader.GetAddressOf())); break; default: @@ -1538,7 +1174,7 @@ std::unique_ptr D3D11Device::CreateShaderFromBinary(GPUShader::Stage return std::unique_ptr(new D3D11Shader(stage, std::move(shader), std::move(bytecode))); } -std::unique_ptr D3D11Device::CreateShaderFromSource(GPUShader::Stage stage, const std::string_view& source, +std::unique_ptr D3D11Device::CreateShaderFromSource(GPUShaderStage stage, const std::string_view& source, std::vector* out_binary /* = nullptr */) { // TODO: This shouldn't be dependent on build type. @@ -1548,29 +1184,74 @@ std::unique_ptr D3D11Device::CreateShaderFromSource(GPUShader::Stage constexpr bool debug = false; #endif - ComPtr blob; - switch (stage) + const char* target; + switch (m_device->GetFeatureLevel()) { - case GPUShader::Stage::Vertex: - blob = D3D11::ShaderCompiler::CompileShader(D3D11::ShaderCompiler::Type::Vertex, m_device->GetFeatureLevel(), - source, debug); - break; + case D3D_FEATURE_LEVEL_10_0: + { + static constexpr std::array targets = {{"vs_4_0", "ps_4_0", "cs_4_0"}}; + target = targets[static_cast(stage)]; + } + break; - case GPUShader::Stage::Pixel: - blob = D3D11::ShaderCompiler::CompileShader(D3D11::ShaderCompiler::Type::Pixel, m_device->GetFeatureLevel(), - source, debug); - break; + case D3D_FEATURE_LEVEL_10_1: + { + static constexpr std::array targets = {{"vs_4_1", "ps_4_1", "cs_4_1"}}; + target = targets[static_cast(stage)]; + } + break; - case GPUShader::Stage::Compute: - blob = D3D11::ShaderCompiler::CompileShader(D3D11::ShaderCompiler::Type::Compute, m_device->GetFeatureLevel(), - source, debug); - break; + case D3D_FEATURE_LEVEL_11_0: + { + static constexpr std::array targets = {{"vs_5_0", "ps_5_0", "cs_5_0"}}; + target = targets[static_cast(stage)]; + } + break; + case D3D_FEATURE_LEVEL_11_1: default: - UnreachableCode(); - break; + { + static constexpr std::array targets = {{"vs_5_1", "ps_5_1", "cs_5_1"}}; + target = targets[static_cast(stage)]; + } + break; } + static constexpr UINT flags_non_debug = D3DCOMPILE_OPTIMIZATION_LEVEL3; + static constexpr UINT flags_debug = D3DCOMPILE_SKIP_OPTIMIZATION | D3DCOMPILE_DEBUG; + + ComPtr blob; + ComPtr error_blob; + const HRESULT hr = + D3DCompile(source.data(), source.size(), "0", nullptr, nullptr, "main", target, + debug ? flags_debug : flags_non_debug, 0, blob.GetAddressOf(), error_blob.GetAddressOf()); + + std::string error_string; + if (error_blob) + { + error_string.append(static_cast(error_blob->GetBufferPointer()), error_blob->GetBufferSize()); + error_blob.Reset(); + } + + if (FAILED(hr)) + { + Log_ErrorPrintf("Failed to compile '%s':\n%s", target, error_string.c_str()); + + auto fp = FileSystem::OpenManagedCFile( + Path::Combine(EmuFolders::DataRoot, fmt::format("bad_shader_{}.txt", s_next_bad_shader_id++)).c_str(), "wb"); + if (fp) + { + std::fwrite(source.data(), source.size(), 1, fp.get()); + std::fprintf(fp.get(), "\n\nCompile as %s failed: %08X\n", target, hr); + std::fwrite(error_string.c_str(), error_string.size(), 1, fp.get()); + } + + return {}; + } + + if (!error_string.empty()) + Log_WarningPrintf("'%s' compiled with warnings:\n%s", target, error_string.c_str()); + if (out_binary) { const size_t size = blob->GetBufferSize(); @@ -1594,11 +1275,12 @@ D3D11Pipeline::~D3D11Pipeline() = default; void D3D11Pipeline::SetDebugName(const std::string_view& name) { - UnreachableCode(); + // can't label this directly } void D3D11Pipeline::Bind(ID3D11DeviceContext* context) { + // TODO: constant blend factor context->IASetInputLayout(GetInputLayout()); context->IASetPrimitiveTopology(GetPrimitiveTopology()); context->RSSetState(GetRasterizerState()); @@ -1686,18 +1368,20 @@ D3D11Device::ComPtr D3D11Device::GetBlendState(const GPUPipeli } static constexpr std::array(GPUPipeline::BlendFunc::MaxCount)> blend_mapping = {{ - D3D11_BLEND_ZERO, // Zero - D3D11_BLEND_ONE, // One - D3D11_BLEND_SRC_COLOR, // SrcColor - D3D11_BLEND_INV_SRC_COLOR, // InvSrcColor - D3D11_BLEND_DEST_COLOR, // DstColor - D3D11_BLEND_INV_DEST_COLOR, // InvDstColor - D3D11_BLEND_SRC_ALPHA, // SrcAlpha - D3D11_BLEND_INV_SRC_ALPHA, // InvSrcAlpha - D3D11_BLEND_SRC1_ALPHA, // SrcAlpha1 - D3D11_BLEND_INV_SRC1_ALPHA, // InvSrcAlpha1 - D3D11_BLEND_DEST_ALPHA, // DstAlpha - D3D11_BLEND_INV_DEST_ALPHA, // InvDstAlpha + D3D11_BLEND_ZERO, // Zero + D3D11_BLEND_ONE, // One + D3D11_BLEND_SRC_COLOR, // SrcColor + D3D11_BLEND_INV_SRC_COLOR, // InvSrcColor + D3D11_BLEND_DEST_COLOR, // DstColor + D3D11_BLEND_INV_DEST_COLOR, // InvDstColor + D3D11_BLEND_SRC_ALPHA, // SrcAlpha + D3D11_BLEND_INV_SRC_ALPHA, // InvSrcAlpha + D3D11_BLEND_SRC1_ALPHA, // SrcAlpha1 + D3D11_BLEND_INV_SRC1_ALPHA, // InvSrcAlpha1 + D3D11_BLEND_DEST_ALPHA, // DstAlpha + D3D11_BLEND_INV_DEST_ALPHA, // InvDstAlpha + D3D11_BLEND_BLEND_FACTOR, // ConstantColor + D3D11_BLEND_INV_BLEND_FACTOR, // InvConstantColor }}; static constexpr std::array(GPUPipeline::BlendOp::MaxCount)> op_mapping = {{ @@ -1741,12 +1425,11 @@ D3D11Device::ComPtr D3D11Device::GetInputLayout(const GPUPipe return dil; } - static constexpr std::array(GPUPipeline::VertexAttribute::Semantic::MaxCount)> - semantics = {{ - "POSITION", // Position - "TEXCOORD", // Texcoord - "COLOR", // Color - }}; +#if 0 + static constexpr std::array(GPUPipeline::VertexAttribute::MaxAttributes)> semantics = { + {"ATTR0", "ATTR1", "ATTR2", "ATTR3", "ATTR4", "ATTR5", "ATTR6", "ATTR7", "ATTR8", "ATTR9", "ATTR10", "ATTR11", + "ATTR12", "ATTR13", "ATTR14", "ATTR15"}}; +#endif static constexpr u32 MAX_COMPONENTS = 4; static constexpr const DXGI_FORMAT @@ -1768,11 +1451,11 @@ D3D11Device::ComPtr D3D11Device::GetInputLayout(const GPUPipe for (size_t i = 0; i < il.vertex_attributes.size(); i++) { const GPUPipeline::VertexAttribute& va = il.vertex_attributes[i]; - Assert(va.components > 0 && va.components < MAX_COMPONENTS); + Assert(va.components > 0 && va.components <= MAX_COMPONENTS); D3D11_INPUT_ELEMENT_DESC& elem = elems[i]; - elem.SemanticName = semantics[static_cast(va.semantic.GetValue())]; - elem.SemanticIndex = va.semantic_index; + elem.SemanticName = "ATTR"; + elem.SemanticIndex = va.index; elem.Format = format_mapping[static_cast(va.type.GetValue())][va.components - 1]; elem.InputSlot = 0; elem.AlignedByteOffset = va.offset; @@ -1794,6 +1477,16 @@ std::unique_ptr D3D11Device::CreatePipeline(const GPUPipeline::Grap ComPtr rs = GetRasterizationState(config.rasterization); ComPtr ds = GetDepthState(config.depth); ComPtr bs = GetBlendState(config.blend); + if (!rs || !ds || !bs) + return {}; + + ComPtr il; + if (!config.input_layout.vertex_attributes.empty()) + { + il = GetInputLayout(config.input_layout, static_cast(config.vertex_shader)); + if (!il) + return {}; + } static constexpr std::array(GPUPipeline::Primitive::MaxCount)> primitives = {{ @@ -1804,8 +1497,390 @@ std::unique_ptr D3D11Device::CreatePipeline(const GPUPipeline::Grap }}; return std::unique_ptr( - new D3D11Pipeline(std::move(rs), std::move(ds), std::move(bs), nullptr, + new D3D11Pipeline(std::move(rs), std::move(ds), std::move(bs), std::move(il), static_cast(config.vertex_shader)->GetVertexShader(), static_cast(config.pixel_shader)->GetPixelShader(), primitives[static_cast(config.primitive)])); } + +void D3D11Device::PushDebugGroup(const char* fmt, ...) +{ + if (!m_annotation) + return; + + std::va_list ap; + va_start(ap, fmt); + std::string str(StringUtil::StdStringFromFormatV(fmt, ap)); + va_end(ap); + + m_annotation->BeginEvent(StringUtil::UTF8StringToWideString(str).c_str()); +} + +void D3D11Device::PopDebugGroup() +{ + if (!m_annotation) + return; + + m_annotation->EndEvent(); +} + +void D3D11Device::InsertDebugMessage(const char* fmt, ...) +{ + if (!m_annotation) + return; + + std::va_list ap; + va_start(ap, fmt); + std::string str(StringUtil::StdStringFromFormatV(fmt, ap)); + va_end(ap); + + m_annotation->SetMarker(StringUtil::UTF8StringToWideString(str).c_str()); +} + +void D3D11Device::MapVertexBuffer(u32 vertex_size, u32 vertex_count, void** map_ptr, u32* map_space, + u32* map_base_vertex) +{ + const auto res = m_vertex_buffer.Map(m_context.Get(), vertex_size, vertex_size * vertex_count); + *map_ptr = res.pointer; + *map_space = res.space_aligned; + *map_base_vertex = res.index_aligned; +} + +void D3D11Device::UnmapVertexBuffer(u32 vertex_size, u32 vertex_count) +{ + m_vertex_buffer.Unmap(m_context.Get(), vertex_size * vertex_count); + + // TODO: cache - should come from pipeline + const UINT offset = 0; + m_context->IASetVertexBuffers(0, 1, m_vertex_buffer.GetD3DBufferArray(), &vertex_size, &offset); +} + +void D3D11Device::MapIndexBuffer(u32 index_count, DrawIndex** map_ptr, u32* map_space, u32* map_base_index) +{ + const auto res = m_index_buffer.Map(m_context.Get(), sizeof(DrawIndex), sizeof(DrawIndex) * index_count); + *map_ptr = static_cast(res.pointer); + *map_space = res.space_aligned; + *map_base_index = res.index_aligned; +} + +void D3D11Device::UnmapIndexBuffer(u32 used_index_count) +{ + m_index_buffer.Unmap(m_context.Get(), sizeof(DrawIndex) * used_index_count); + m_context->IASetIndexBuffer(m_index_buffer.GetD3DBuffer(), DXGI_FORMAT_R16_UINT, 0); +} + +void D3D11Device::PushUniformBuffer(const void* data, u32 data_size) +{ + Assert(data_size <= PUSH_UNIFORM_BUFFER_SIZE); + + const auto res = m_push_uniform_buffer.Map(m_context.Get(), PUSH_UNIFORM_BUFFER_SIZE, PUSH_UNIFORM_BUFFER_SIZE); + std::memcpy(res.pointer, data, data_size); + m_push_uniform_buffer.Unmap(m_context.Get(), data_size); + + m_context->VSSetConstantBuffers(0, 1, m_push_uniform_buffer.GetD3DBufferArray()); + m_context->PSSetConstantBuffers(0, 1, m_push_uniform_buffer.GetD3DBufferArray()); +} + +void D3D11Device::SetFramebuffer(GPUFramebuffer* fb) +{ + D3D11Framebuffer* FB = static_cast(fb); + m_context->OMSetRenderTargets(FB->GetNumRTVs(), FB->GetRTVArray(), FB->GetDSV()); +} + +void D3D11Device::UnbindFramebuffer(D3D11Framebuffer* fb) +{ + if (m_current_framebuffer != fb) + return; + + m_current_framebuffer = nullptr; + m_context->OMSetRenderTargets(0, nullptr, nullptr); +} + +void D3D11Device::SetPipeline(GPUPipeline* pipeline) +{ + D3D11Pipeline* PL = static_cast(pipeline); + + // TODO: cache + PL->Bind(m_context.Get()); +} + +void D3D11Device::UnbindPipeline(D3D11Pipeline* pl) +{ + if (m_current_pipeline != pl) + return; + + m_current_pipeline = nullptr; +} + +void D3D11Device::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) +{ + // TODO: cache when old rt == tex + D3D11Texture* T = static_cast(texture); + D3D11Sampler* S = static_cast(sampler); + m_context->PSSetShaderResources(0, 1, T->GetD3DSRVArray()); + m_context->PSSetSamplers(0, 1, S->GetSamplerStateArray()); +} + +void D3D11Device::UnbindTexture(D3D11Texture* tex) +{ + // TODO +} + +void D3D11Device::SetViewport(s32 x, s32 y, s32 width, s32 height) +{ + const CD3D11_VIEWPORT vp(static_cast(x), static_cast(y), static_cast(width), + static_cast(height), 0.0f, 1.0f); + m_context->RSSetViewports(1, &vp); +} + +void D3D11Device::SetScissor(s32 x, s32 y, s32 width, s32 height) +{ + const CD3D11_RECT rc(x, y, x + width, y + height); + m_context->RSSetScissorRects(1, &rc); +} + +void D3D11Device::PreDrawCheck() +{ + if (m_current_framebuffer) + m_current_framebuffer->CommitClear(m_context.Get()); +} + +void D3D11Device::Draw(u32 vertex_count, u32 base_vertex) +{ + PreDrawCheck(); + m_context->Draw(vertex_count, base_vertex); +} + +void D3D11Device::DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) +{ + PreDrawCheck(); + m_context->DrawIndexed(index_count, base_index, base_vertex); +} + +#if 0 + struct PostProcessingStage + { + ComPtr vertex_shader; + ComPtr pixel_shader; + D3D11Texture output_texture; + u32 uniforms_size; + }; + + bool CheckPostProcessingRenderTargets(u32 target_width, u32 target_height); + void ApplyPostProcessingChain(ID3D11RenderTargetView* final_target, s32 final_left, s32 final_top, s32 final_width, + s32 final_height, D3D11Texture* texture, s32 texture_view_x, s32 texture_view_y, + s32 texture_view_width, s32 texture_view_height, u32 target_width, u32 target_height); + FrontendCommon::PostProcessingChain m_post_processing_chain; + D3D11Texture m_post_processing_input_texture; + std::vector m_post_processing_stages; + Common::Timer m_post_processing_timer; + + bool D3D11Device::SetPostProcessingChain(const std::string_view& config) +{ + if (config.empty()) + { + m_post_processing_input_texture.Destroy(); + m_post_processing_stages.clear(); + m_post_processing_chain.ClearStages(); + return true; + } + + if (!m_post_processing_chain.CreateFromString(config)) + return false; + + m_post_processing_stages.clear(); + + D3D11::ShaderCache shader_cache; + shader_cache.Open(EmuFolders::Cache, m_device->GetFeatureLevel(), SHADER_CACHE_VERSION, + g_settings.gpu_use_debug_device); + + FrontendCommon::PostProcessingShaderGen shadergen(RenderAPI::D3D11, true); + u32 max_ubo_size = 0; + + for (u32 i = 0; i < m_post_processing_chain.GetStageCount(); i++) + { + const FrontendCommon::PostProcessingShader& shader = m_post_processing_chain.GetShaderStage(i); + const std::string vs = shadergen.GeneratePostProcessingVertexShader(shader); + const std::string ps = shadergen.GeneratePostProcessingFragmentShader(shader); + + PostProcessingStage stage; + stage.uniforms_size = shader.GetUniformsSize(); + stage.vertex_shader = shader_cache.GetVertexShader(m_device.Get(), vs); + stage.pixel_shader = shader_cache.GetPixelShader(m_device.Get(), ps); + if (!stage.vertex_shader || !stage.pixel_shader) + { + Log_ErrorPrintf("Failed to compile one or more post-processing shaders, disabling."); + m_post_processing_stages.clear(); + m_post_processing_chain.ClearStages(); + return false; + } + + max_ubo_size = std::max(max_ubo_size, stage.uniforms_size); + m_post_processing_stages.push_back(std::move(stage)); + } + + if (m_push_uniform_buffer.GetSize() < max_ubo_size && + !m_push_uniform_buffer.Create(m_device.Get(), D3D11_BIND_CONSTANT_BUFFER, max_ubo_size)) + { + Log_ErrorPrintf("Failed to allocate %u byte constant buffer for postprocessing", max_ubo_size); + m_post_processing_stages.clear(); + m_post_processing_chain.ClearStages(); + return false; + } + + m_post_processing_timer.Reset(); + return true; +} + +bool D3D11Device::CheckPostProcessingRenderTargets(u32 target_width, u32 target_height) +{ + DebugAssert(!m_post_processing_stages.empty()); + + const GPUTexture::Type type = GPUTexture::Type::RenderTarget; + const GPUTexture::Format format = GPUTexture::Format::RGBA8; + + if (m_post_processing_input_texture.GetWidth() != target_width || + m_post_processing_input_texture.GetHeight() != target_height) + { + if (!m_post_processing_input_texture.Create(m_device.Get(), target_width, target_height, 1, 1, 1, type, format)) + return false; + } + + const u32 target_count = (static_cast(m_post_processing_stages.size()) - 1); + for (u32 i = 0; i < target_count; i++) + { + PostProcessingStage& pps = m_post_processing_stages[i]; + if (pps.output_texture.GetWidth() != target_width || pps.output_texture.GetHeight() != target_height) + { + if (!pps.output_texture.Create(m_device.Get(), target_width, target_height, 1, 1, 1, type, format)) + return false; + } + } + + return true; +} + +void D3D11Device::ApplyPostProcessingChain(ID3D11RenderTargetView* final_target, s32 final_left, s32 final_top, + s32 final_width, s32 final_height, D3D11Texture* texture, s32 texture_view_x, + s32 texture_view_y, s32 texture_view_width, s32 texture_view_height, + u32 target_width, u32 target_height) +{ + if (!CheckPostProcessingRenderTargets(target_width, target_height)) + { + RenderDisplay(final_left, final_top, final_width, final_height, texture, texture_view_x, texture_view_y, + texture_view_width, texture_view_height, IsUsingLinearFiltering()); + return; + } + + // downsample/upsample - use same viewport for remainder + m_context->ClearRenderTargetView(m_post_processing_input_texture.GetD3DRTV(), s_clear_color.data()); + m_context->OMSetRenderTargets(1, m_post_processing_input_texture.GetD3DRTVArray(), nullptr); + RenderDisplay(final_left, final_top, final_width, final_height, texture, texture_view_x, texture_view_y, + texture_view_width, texture_view_height, IsUsingLinearFiltering()); + + const s32 orig_texture_width = texture_view_width; + const s32 orig_texture_height = texture_view_height; + texture = &m_post_processing_input_texture; + texture_view_x = final_left; + texture_view_y = final_top; + texture_view_width = final_width; + texture_view_height = final_height; + + const u32 final_stage = static_cast(m_post_processing_stages.size()) - 1u; + for (u32 i = 0; i < static_cast(m_post_processing_stages.size()); i++) + { + PostProcessingStage& pps = m_post_processing_stages[i]; + ID3D11RenderTargetView* rtv = (i == final_stage) ? final_target : pps.output_texture.GetD3DRTV(); + m_context->ClearRenderTargetView(rtv, s_clear_color.data()); + m_context->OMSetRenderTargets(1, &rtv, nullptr); + + m_context->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + m_context->VSSetShader(pps.vertex_shader.Get(), nullptr, 0); + m_context->PSSetShader(pps.pixel_shader.Get(), nullptr, 0); + m_context->PSSetShaderResources(0, 1, texture->GetD3DSRVArray()); + m_context->PSSetSamplers(0, 1, m_border_sampler.GetAddressOf()); + + const auto map = m_push_uniform_buffer.Map(m_context.Get(), m_push_uniform_buffer.GetSize(), pps.uniforms_size); + m_post_processing_chain.GetShaderStage(i).FillUniformBuffer( + map.pointer, texture->GetWidth(), texture->GetHeight(), texture_view_x, texture_view_y, texture_view_width, + texture_view_height, GetWindowWidth(), GetWindowHeight(), orig_texture_width, orig_texture_height, + static_cast(m_post_processing_timer.GetTimeSeconds())); + m_push_uniform_buffer.Unmap(m_context.Get(), pps.uniforms_size); + m_context->VSSetConstantBuffers(0, 1, m_push_uniform_buffer.GetD3DBufferArray()); + m_context->PSSetConstantBuffers(0, 1, m_push_uniform_buffer.GetD3DBufferArray()); + + m_context->Draw(3, 0); + + if (i != final_stage) + texture = &pps.output_texture; + } + + ID3D11ShaderResourceView* null_srv = nullptr; + m_context->PSSetShaderResources(0, 1, &null_srv); +} +void D3D11Device::RenderDisplay() +{ + const auto [left, top, width, height] = CalculateDrawRect(GetWindowWidth(), GetWindowHeight()); + + if (HasDisplayTexture() && !m_post_processing_chain.IsEmpty()) + { + ApplyPostProcessingChain(m_swap_chain_rtv.Get(), left, top, width, height, + static_cast(m_display_texture), m_display_texture_view_x, + m_display_texture_view_y, m_display_texture_view_width, m_display_texture_view_height, + GetWindowWidth(), GetWindowHeight()); + return; + } + + m_context->ClearRenderTargetView(m_swap_chain_rtv.Get(), s_clear_color.data()); + m_context->OMSetRenderTargets(1, m_swap_chain_rtv.GetAddressOf(), nullptr); + + if (!HasDisplayTexture()) + return; + + RenderDisplay(left, top, width, height, static_cast(m_display_texture), m_display_texture_view_x, + m_display_texture_view_y, m_display_texture_view_width, m_display_texture_view_height, + IsUsingLinearFiltering()); +} +bool D3D11Device::RenderScreenshot(u32 width, u32 height, const Common::Rectangle& draw_rect, + std::vector* out_pixels, u32* out_stride, GPUTexture::Format* out_format) +{ + static constexpr GPUTexture::Format hdformat = GPUTexture::Format::RGBA8; + + D3D11Texture render_texture; + if (!render_texture.Create(m_device.Get(), width, height, 1, 1, 1, GPUTexture::Type::RenderTarget, hdformat)) + return false; + + static constexpr std::array clear_color = {}; + m_context->ClearRenderTargetView(render_texture.GetD3DRTV(), clear_color.data()); + m_context->OMSetRenderTargets(1, render_texture.GetD3DRTVArray(), nullptr); + + if (HasDisplayTexture()) + { + if (!m_post_processing_chain.IsEmpty()) + { + ApplyPostProcessingChain(render_texture.GetD3DRTV(), draw_rect.left, draw_rect.top, draw_rect.GetWidth(), + draw_rect.GetHeight(), static_cast(m_display_texture), + m_display_texture_view_x, m_display_texture_view_y, m_display_texture_view_width, + m_display_texture_view_height, width, height); + } + else + { + RenderDisplay(draw_rect.left, draw_rect.top, draw_rect.GetWidth(), draw_rect.GetHeight(), + static_cast(m_display_texture), m_display_texture_view_x, m_display_texture_view_y, + m_display_texture_view_width, m_display_texture_view_height, IsUsingLinearFiltering()); + } + } + + m_context->OMSetRenderTargets(0, nullptr, nullptr); + + const u32 stride = GPUTexture::GetPixelSize(hdformat) * width; + out_pixels->resize(width * height); + if (!DownloadTexture(&render_texture, 0, 0, width, height, out_pixels->data(), stride)) + return false; + + *out_stride = stride; + *out_format = hdformat; + return true; +} + +#endif \ No newline at end of file diff --git a/src/core/gpu/d3d11_device.h b/src/core/gpu/d3d11_device.h index 63de54a52..6d52bbdbc 100644 --- a/src/core/gpu/d3d11_device.h +++ b/src/core/gpu/d3d11_device.h @@ -1,4 +1,5 @@ // SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin +// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #pragma once @@ -9,7 +10,7 @@ #include "d3d11_texture.h" #include "gpu_device.h" #include "postprocessing_chain.h" -#include +#include #include #include #include @@ -30,13 +31,17 @@ class D3D11Framebuffer final : public GPUFramebuffer public: ~D3D11Framebuffer() override; + ALWAYS_INLINE u32 GetNumRTVs() const { return m_rtv ? 1 : 0; } ALWAYS_INLINE ID3D11RenderTargetView* GetRTV() const { return m_rtv.Get(); } + ALWAYS_INLINE ID3D11RenderTargetView* const* GetRTVArray() const { return m_rtv.GetAddressOf(); } ALWAYS_INLINE ID3D11DepthStencilView* GetDSV() const { return m_dsv.Get(); } void SetDebugName(const std::string_view& name) override; + void CommitClear(ID3D11DeviceContext* context); private: - D3D11Framebuffer(ComPtr rtv, ComPtr dsv); + D3D11Framebuffer(GPUTexture* rt, GPUTexture* ds, u32 width, u32 height, ComPtr rtv, + ComPtr dsv); ComPtr m_rtv; ComPtr m_dsv; @@ -53,6 +58,7 @@ public: ~D3D11Sampler() override; ALWAYS_INLINE ID3D11SamplerState* GetSamplerState() const { return m_ss.Get(); } + ALWAYS_INLINE ID3D11SamplerState* const* GetSamplerStateArray() const { return m_ss.GetAddressOf(); } void SetDebugName(const std::string_view& name) override; @@ -78,7 +84,7 @@ public: void SetDebugName(const std::string_view& name) override; private: - D3D11Shader(Stage stage, Microsoft::WRL::ComPtr shader, std::vector bytecode); + D3D11Shader(GPUShaderStage stage, Microsoft::WRL::ComPtr shader, std::vector bytecode); Microsoft::WRL::ComPtr m_shader; std::vector m_bytecode; // only for VS @@ -154,7 +160,7 @@ public: AdapterAndModeList GetAdapterAndModeList() override; void DestroySurface() override; - bool SetPostProcessingChain(const std::string_view& config) override; + std::string GetShaderCacheBaseName(const std::string_view& type, bool debug) const override; std::unique_ptr CreateTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, GPUTexture::Type type, GPUTexture::Format format, @@ -170,14 +176,33 @@ public: void ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, GPUTexture* src, u32 src_x, u32 src_y, u32 src_layer, u32 src_level, u32 width, u32 height) override; - std::unique_ptr CreateFramebuffer(GPUTexture* rt, u32 rt_layer, u32 rt_level, GPUTexture* ds, - u32 ds_layer, u32 ds_level) override; + std::unique_ptr CreateFramebuffer(GPUTexture* rt = nullptr, u32 rt_layer = 0, u32 rt_level = 0, + GPUTexture* ds = nullptr, u32 ds_layer = 0, + u32 ds_level = 0) override; - std::unique_ptr CreateShaderFromBinary(GPUShader::Stage stage, gsl::span data) override; - std::unique_ptr CreateShaderFromSource(GPUShader::Stage stage, const std::string_view& source, + std::unique_ptr CreateShaderFromBinary(GPUShaderStage stage, gsl::span data) override; + std::unique_ptr CreateShaderFromSource(GPUShaderStage stage, const std::string_view& source, std::vector* out_binary = nullptr) override; std::unique_ptr CreatePipeline(const GPUPipeline::GraphicsConfig& config) override; + void PushDebugGroup(const char* fmt, ...) override; + void PopDebugGroup() override; + void InsertDebugMessage(const char* fmt, ...) override; + + void MapVertexBuffer(u32 vertex_size, u32 vertex_count, void** map_ptr, u32* map_space, + u32* map_base_vertex) override; + void UnmapVertexBuffer(u32 vertex_size, u32 vertex_count) override; + void MapIndexBuffer(u32 index_count, DrawIndex** map_ptr, u32* map_space, u32* map_base_index) override; + void UnmapIndexBuffer(u32 used_index_count) override; + void PushUniformBuffer(const void* data, u32 data_size) override; + void SetFramebuffer(GPUFramebuffer* fb) override; + void SetPipeline(GPUPipeline* pipeline) override; + void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) override; + void SetViewport(s32 x, s32 y, s32 width, s32 height) override; + void SetScissor(s32 x, s32 y, s32 width, s32 height) override; + void Draw(u32 vertex_count, u32 base_vertex) override; + void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override; + bool GetHostRefreshRate(float* refresh_rate) override; bool SetGPUTimingEnabled(bool enabled) override; @@ -186,63 +211,44 @@ public: void SetVSync(bool enabled) override; bool Render(bool skip_present) override; - bool RenderScreenshot(u32 width, u32 height, const Common::Rectangle& draw_rect, std::vector* out_pixels, - u32* out_stride, GPUTexture::Format* out_format) override; + + void UnbindFramebuffer(D3D11Framebuffer* fb); + void UnbindPipeline(D3D11Pipeline* pl); + void UnbindTexture(D3D11Texture* tex); static AdapterAndModeList StaticGetAdapterAndModeList(); private: using RasterizationStateMap = std::unordered_map>; using DepthStateMap = std::unordered_map>; - using BlendStateMap = std::unordered_map>; + using BlendStateMap = std::unordered_map>; using InputLayoutMap = std::unordered_map, GPUPipeline::InputLayoutHash>; - static constexpr u32 DISPLAY_UNIFORM_BUFFER_SIZE = 64; - static constexpr u32 IMGUI_VERTEX_BUFFER_SIZE = 4 * 1024 * 1024; - static constexpr u32 IMGUI_INDEX_BUFFER_SIZE = 2 * 1024 * 1024; + static constexpr u32 PUSH_UNIFORM_BUFFER_SIZE = 64; + static constexpr u32 VERTEX_BUFFER_SIZE = 8 * 1024 * 1024; + static constexpr u32 INDEX_BUFFER_SIZE = 4 * 1024 * 1024; static constexpr u8 NUM_TIMESTAMP_QUERIES = 3; static AdapterAndModeList GetAdapterAndModeList(IDXGIFactory* dxgi_factory); + void CommitClear(GPUTexture* t); + void PreDrawCheck(); + bool CheckStagingBufferSize(u32 width, u32 height, DXGI_FORMAT format); void DestroyStagingBuffer(); - bool CreateResources() override; - void DestroyResources() override; - - bool CreateImGuiResources(); - void DestroyImGuiResources(); - bool CreateSwapChain(const DXGI_MODE_DESC* fullscreen_mode); bool CreateSwapChainRTV(); + bool CreateBuffers(); + void DestroyBuffers(); + ComPtr GetRasterizationState(const GPUPipeline::RasterizationState& rs); ComPtr GetDepthState(const GPUPipeline::DepthState& ds); ComPtr GetBlendState(const GPUPipeline::BlendState& bs); ComPtr GetInputLayout(const GPUPipeline::InputLayout& il, const D3D11Shader* vs); - void RenderDisplay(); - void RenderSoftwareCursor(); - void RenderImGui(); - - void RenderDisplay(s32 left, s32 top, s32 width, s32 height, D3D11Texture* texture, s32 texture_view_x, - s32 texture_view_y, s32 texture_view_width, s32 texture_view_height, bool linear_filter); - void RenderSoftwareCursor(s32 left, s32 top, s32 width, s32 height, GPUTexture* texture_handle); - - struct PostProcessingStage - { - ComPtr vertex_shader; - ComPtr pixel_shader; - D3D11Texture output_texture; - u32 uniforms_size; - }; - - bool CheckPostProcessingRenderTargets(u32 target_width, u32 target_height); - void ApplyPostProcessingChain(ID3D11RenderTargetView* final_target, s32 final_left, s32 final_top, s32 final_width, - s32 final_height, D3D11Texture* texture, s32 texture_view_x, s32 texture_view_y, - s32 texture_view_width, s32 texture_view_height, u32 target_width, u32 target_height); - bool CreateTimestampQueries(); void DestroyTimestampQueries(); void PopTimestampQuery(); @@ -250,28 +256,17 @@ private: ComPtr m_device; ComPtr m_context; + ComPtr m_annotation; ComPtr m_dxgi_factory; ComPtr m_swap_chain; ComPtr m_swap_chain_rtv; - ComPtr m_display_rasterizer_state; - ComPtr m_display_depth_stencil_state; - ComPtr m_display_blend_state; - ComPtr m_software_cursor_blend_state; - ComPtr m_display_vertex_shader; - ComPtr m_display_pixel_shader; - ComPtr m_display_alpha_pixel_shader; - ComPtr m_point_sampler; - ComPtr m_linear_sampler; - ComPtr m_border_sampler; - RasterizationStateMap m_rasterization_states; DepthStateMap m_depth_states; BlendStateMap m_blend_states; InputLayoutMap m_input_layouts; - D3D11::StreamBuffer m_display_uniform_buffer; ComPtr m_readback_staging_texture; DXGI_FORMAT m_readback_staging_texture_format = DXGI_FORMAT_UNKNOWN; u32 m_readback_staging_texture_width = 0; @@ -281,18 +276,12 @@ private: bool m_using_flip_model_swap_chain = true; bool m_using_allow_tearing = false; - D3D11Texture m_imgui_texture; - D3D11::StreamBuffer m_imgui_vertex_buffer; - D3D11::StreamBuffer m_imgui_index_buffer; - ComPtr m_imgui_input_layout; - ComPtr m_imgui_vertex_shader; - ComPtr m_imgui_pixel_shader; - ComPtr m_imgui_blend_state; + D3D11::StreamBuffer m_vertex_buffer; + D3D11::StreamBuffer m_index_buffer; + D3D11::StreamBuffer m_push_uniform_buffer; - FrontendCommon::PostProcessingChain m_post_processing_chain; - D3D11Texture m_post_processing_input_texture; - std::vector m_post_processing_stages; - Common::Timer m_post_processing_timer; + D3D11Framebuffer* m_current_framebuffer = nullptr; + D3D11Pipeline* m_current_pipeline = nullptr; std::array, 3>, NUM_TIMESTAMP_QUERIES> m_timestamp_queries = {}; u8 m_read_timestamp_query = 0; diff --git a/src/core/gpu/d3d11_texture.cpp b/src/core/gpu/d3d11_texture.cpp index 7ffbe6506..cac81bbf7 100644 --- a/src/core/gpu/d3d11_texture.cpp +++ b/src/core/gpu/d3d11_texture.cpp @@ -94,7 +94,7 @@ bool D3D11Texture::Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u32 const u32 srnum = D3D11CalcSubresource(level, layer, m_levels); D3D11_MAPPED_SUBRESOURCE sr; HRESULT hr = D3D11Device::GetD3DContext()->Map(m_texture.Get(), srnum, - discard ? D3D11_MAP_WRITE_DISCARD : D3D11_MAP_WRITE, 0, &sr); + discard ? D3D11_MAP_WRITE_DISCARD : D3D11_MAP_READ_WRITE, 0, &sr); if (FAILED(hr)) { Log_ErrorPrintf("Map pixels texture failed: %08X", hr); diff --git a/src/core/gpu/d3d11_texture.h b/src/core/gpu/d3d11_texture.h index a33a03f70..321f7bfd3 100644 --- a/src/core/gpu/d3d11_texture.h +++ b/src/core/gpu/d3d11_texture.h @@ -7,8 +7,12 @@ #include #include +class D3D11Device; + class D3D11Texture final : public GPUTexture { + friend D3D11Device; + public: template using ComPtr = Microsoft::WRL::ComPtr; diff --git a/src/core/gpu/d3d12_gpu_device.cpp b/src/core/gpu/d3d12_gpu_device.cpp index e7d9de229..8b1caf9d5 100644 --- a/src/core/gpu/d3d12_gpu_device.cpp +++ b/src/core/gpu/d3d12_gpu_device.cpp @@ -617,56 +617,6 @@ bool D3D12GPUDevice::Render(bool skip_present) return true; } -bool D3D12GPUDevice::RenderScreenshot(u32 width, u32 height, const Common::Rectangle& draw_rect, - std::vector* out_pixels, u32* out_stride, GPUTexture::Format* out_format) -{ - static constexpr DXGI_FORMAT format = DXGI_FORMAT_R8G8B8A8_UNORM; - static constexpr GPUTexture::Format hdformat = GPUTexture::Format::RGBA8; - - D3D12::Texture render_texture; - if (!render_texture.Create(width, height, 1, 1, 1, format, DXGI_FORMAT_UNKNOWN, format, DXGI_FORMAT_UNKNOWN, - D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET) || - !m_readback_staging_texture.EnsureSize(width, height, format, false)) - { - return false; - } - - ID3D12GraphicsCommandList* cmdlist = g_d3d12_context->GetCommandList(); - - if (HasDisplayTexture() && !m_post_processing_chain.IsEmpty()) - { - ApplyPostProcessingChain(cmdlist, &render_texture, draw_rect.left, draw_rect.top, draw_rect.GetWidth(), - draw_rect.GetHeight(), static_cast(m_display_texture), - m_display_texture_view_x, m_display_texture_view_y, m_display_texture_view_width, - m_display_texture_view_height, width, height); - } - else - { - render_texture.TransitionToState(D3D12_RESOURCE_STATE_RENDER_TARGET); - cmdlist->ClearRenderTargetView(render_texture.GetRTVOrDSVDescriptor(), s_clear_color.data(), 0, nullptr); - cmdlist->OMSetRenderTargets(1, &render_texture.GetRTVOrDSVDescriptor().cpu_handle, FALSE, nullptr); - - if (HasDisplayTexture()) - { - RenderDisplay(cmdlist, draw_rect.left, draw_rect.top, draw_rect.GetWidth(), draw_rect.GetHeight(), - static_cast(m_display_texture), m_display_texture_view_x, m_display_texture_view_y, - m_display_texture_view_width, m_display_texture_view_height, IsUsingLinearFiltering()); - } - } - - cmdlist->OMSetRenderTargets(0, nullptr, FALSE, nullptr); - - render_texture.TransitionToState(D3D12_RESOURCE_STATE_COPY_SOURCE); - m_readback_staging_texture.CopyFromTexture(render_texture, 0, 0, 0, 0, 0, width, height); - - const u32 stride = sizeof(u32) * width; - out_pixels->resize(width * height); - *out_stride = stride; - *out_format = hdformat; - - return m_readback_staging_texture.ReadPixels(0, 0, width, height, out_pixels->data(), stride); -} - bool D3D12GPUDevice::SetGPUTimingEnabled(bool enabled) { g_d3d12_context->SetEnableGPUTiming(enabled); diff --git a/src/core/gpu/d3d12_gpu_device.h b/src/core/gpu/d3d12_gpu_device.h index f7494f679..505b578d1 100644 --- a/src/core/gpu/d3d12_gpu_device.h +++ b/src/core/gpu/d3d12_gpu_device.h @@ -63,8 +63,6 @@ public: void SetVSync(bool enabled) override; bool Render(bool skip_present) override; - bool RenderScreenshot(u32 width, u32 height, const Common::Rectangle& draw_rect, std::vector* out_pixels, - u32* out_stride, GPUTexture::Format* out_format) override; bool SetGPUTimingEnabled(bool enabled) override; float GetAndResetAccumulatedGPUTime() override; diff --git a/src/core/gpu/gpu_device.cpp b/src/core/gpu/gpu_device.cpp index fb0e69627..c9017a3bc 100644 --- a/src/core/gpu/gpu_device.cpp +++ b/src/core/gpu/gpu_device.cpp @@ -3,11 +3,13 @@ #include "gpu_device.h" #include "../settings.h" +#include "../shadergen.h" #include "common/align.h" #include "common/assert.h" #include "common/file_system.h" #include "common/hash_combine.h" #include "common/log.h" +#include "common/path.h" #include "common/string_util.h" #include "common/timer.h" #include "imgui.h" @@ -23,10 +25,50 @@ Log_SetChannel(GPUDevice); // FIXME #include "common/windows_headers.h" -#include "d3d_shaders.h" + +// TODO: default sampler mode, create a persistent descriptor set in Vulkan for textures +// TODO: input layout => VAO in GL, buffer might change +// TODO: one big lookup table for render passes, or dynamic rendering std::unique_ptr g_host_display; +GPUFramebuffer::GPUFramebuffer(GPUTexture* rt, GPUTexture* ds, u32 width, u32 height) + : m_rt(rt), m_ds(ds), m_width(width), m_height(height) +{ +} + +GPUFramebuffer::~GPUFramebuffer() = default; + +GPUSampler::GPUSampler() = default; + +GPUSampler::~GPUSampler() = default; + +GPUShader::GPUShader(GPUShaderStage stage) : m_stage(stage) +{ +} + +GPUShader::~GPUShader() = default; + +const char* GPUShader::GetStageName(GPUShaderStage stage) +{ + switch (stage) + { + case GPUShaderStage::Vertex: + return "Vertex"; + case GPUShaderStage::Fragment: + return "Fragment"; + case GPUShaderStage::Compute: + return "Compute"; + default: + UnreachableCode(); + return ""; + } +} + +GPUPipeline::GPUPipeline() = default; + +GPUPipeline::~GPUPipeline() = default; + size_t GPUPipeline::InputLayoutHash::operator()(const InputLayout& il) const { std::size_t h = 0; @@ -96,7 +138,11 @@ GPUPipeline::BlendState GPUPipeline::BlendState::GetAlphaBlendingState() return ret; } -GPUDevice::~GPUDevice() = default; +GPUDevice::~GPUDevice() +{ + // TODO: move to Destroy() method + m_shader_cache.Close(); +} RenderAPI GPUDevice::GetPreferredAPI() { @@ -107,84 +153,191 @@ RenderAPI GPUDevice::GetPreferredAPI() #endif } +bool GPUDevice::SetupDevice() +{ + // TODO: option to disable shader cache + if (true) + { + const std::string basename = GetShaderCacheBaseName("shaders", g_settings.gpu_use_debug_device); + const std::string filename = Path::Combine(EmuFolders::Cache, basename); + if (!m_shader_cache.Open(filename.c_str())) + Log_WarningPrintf("Failed to open shader cache."); + } + else + { + Log_WarningPrintf("Shader cache is disabled."); + } + + return true; +} + bool GPUDevice::CreateResources() { - GPUSampler::Config config = {}; - config.address_u = GPUSampler::AddressMode::ClampToEdge; - config.address_v = GPUSampler::AddressMode::ClampToEdge; - config.address_w = GPUSampler::AddressMode::ClampToEdge; - config.min_filter = GPUSampler::Filter::Nearest; - config.mag_filter = GPUSampler::Filter::Nearest; - if (!(m_point_sampler = CreateSampler(config))) + GPUSampler::Config spconfig = {}; + spconfig.address_u = GPUSampler::AddressMode::ClampToEdge; + spconfig.address_v = GPUSampler::AddressMode::ClampToEdge; + spconfig.address_w = GPUSampler::AddressMode::ClampToEdge; + spconfig.min_filter = GPUSampler::Filter::Nearest; + spconfig.mag_filter = GPUSampler::Filter::Nearest; + if (!(m_point_sampler = CreateSampler(spconfig))) return false; - config.min_filter = GPUSampler::Filter::Linear; - config.mag_filter = GPUSampler::Filter::Linear; - if (!(m_linear_sampler = CreateSampler(config))) + spconfig.min_filter = GPUSampler::Filter::Linear; + spconfig.mag_filter = GPUSampler::Filter::Linear; + if (!(m_linear_sampler = CreateSampler(spconfig))) return false; - if (!CreateImGuiResources()) + spconfig.mag_filter = GPUSampler::Filter::Nearest; + spconfig.mag_filter = GPUSampler::Filter::Nearest; + spconfig.address_u = GPUSampler::AddressMode::ClampToBorder; + spconfig.address_v = GPUSampler::AddressMode::ClampToBorder; + spconfig.border_color = 0xFF000000u; + if (!(m_border_sampler = CreateSampler(spconfig))) return false; + ShaderGen shadergen(GetRenderAPI(), /*FIXME DSB*/ true); + + GPUPipeline::GraphicsConfig plconfig; + plconfig.layout = GPUPipeline::Layout::SingleTexture; + plconfig.primitive = GPUPipeline::Primitive::Triangles; + plconfig.rasterization = GPUPipeline::RasterizationState::GetNoCullState(); + plconfig.depth = GPUPipeline::DepthState::GetNoTestsState(); + plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState(); + plconfig.color_format = GPUTexture::Format::RGBA8; // FIXME m_window_info.surface_format; + plconfig.depth_format = GPUTexture::Format::Unknown; + plconfig.samples = 1; + plconfig.per_sample_shading = false; + + std::unique_ptr display_vs = CreateShader(GPUShaderStage::Vertex, shadergen.GenerateDisplayVertexShader()); + std::unique_ptr display_fs = + CreateShader(GPUShaderStage::Fragment, shadergen.GenerateDisplayFragmentShader(true)); + std::unique_ptr cursor_fs = + CreateShader(GPUShaderStage::Fragment, shadergen.GenerateDisplayFragmentShader(false)); + if (!display_vs || !display_fs || !cursor_fs) + return false; + GL_OBJECT_NAME(display_vs, "Display Vertex Shader"); + GL_OBJECT_NAME(display_fs, "Display Fragment Shader"); + GL_OBJECT_NAME(cursor_fs, "Cursor Fragment Shader"); + + plconfig.vertex_shader = display_vs.get(); + plconfig.pixel_shader = display_fs.get(); + if (!(m_display_pipeline = CreatePipeline(plconfig))) + return false; + GL_OBJECT_NAME(m_display_pipeline, "Display Pipeline"); + + plconfig.blend = GPUPipeline::BlendState::GetAlphaBlendingState(); + plconfig.pixel_shader = cursor_fs.get(); + if (!(m_cursor_pipeline = CreatePipeline(plconfig))) + return false; + GL_OBJECT_NAME(m_cursor_pipeline, "Cursor Pipeline"); + + std::unique_ptr imgui_vs = CreateShader(GPUShaderStage::Vertex, shadergen.GenerateImGuiVertexShader()); + std::unique_ptr imgui_fs = CreateShader(GPUShaderStage::Fragment, shadergen.GenerateImGuiFragmentShader()); + if (!imgui_vs || !imgui_fs) + return false; + GL_OBJECT_NAME(imgui_vs, "ImGui Vertex Shader"); + GL_OBJECT_NAME(imgui_fs, "ImGui Fragment Shader"); + + static constexpr GPUPipeline::VertexAttribute imgui_attributes[] = { + GPUPipeline::VertexAttribute::Make(0, GPUPipeline::VertexAttribute::Type::Float, 2, offsetof(ImDrawVert, pos)), + GPUPipeline::VertexAttribute::Make(1, GPUPipeline::VertexAttribute::Type::Float, 2, offsetof(ImDrawVert, uv)), + GPUPipeline::VertexAttribute::Make(2, GPUPipeline::VertexAttribute::Type::UNorm8, 4, offsetof(ImDrawVert, col)), + }; + + plconfig.input_layout.vertex_attributes = imgui_attributes; + plconfig.input_layout.vertex_stride = sizeof(ImDrawVert); + plconfig.vertex_shader = imgui_vs.get(); + plconfig.pixel_shader = imgui_fs.get(); + + m_imgui_pipeline = CreatePipeline(plconfig); + if (!m_imgui_pipeline) + { + Log_ErrorPrintf("Failed to compile ImGui pipeline."); + return false; + } + GL_OBJECT_NAME(m_imgui_pipeline, "ImGui Pipeline"); + return true; } void GPUDevice::DestroyResources() { - DestroyImGuiResources(); - m_cursor_texture.reset(); + + m_imgui_font_texture.reset(); + m_imgui_pipeline.reset(); + + m_cursor_pipeline.reset(); + m_display_pipeline.reset(); + m_imgui_pipeline.reset(); + m_linear_sampler.reset(); m_point_sampler.reset(); + + m_shader_cache.Close(); } -bool GPUDevice::CreateImGuiResources() +bool GPUDevice::SetPostProcessingChain(const std::string_view& config) { - std::unique_ptr imgui_vs = CreateShaderFromBinary(GPUShader::Stage::Vertex, s_imgui_vs_bytecode); - std::unique_ptr imgui_ps = CreateShaderFromBinary(GPUShader::Stage::Pixel, s_imgui_ps_bytecode); - if (!imgui_vs || !imgui_ps) - { - Log_ErrorPrintf("Failed to create ImGui shaders."); - return false; - } + return false; +} - static constexpr GPUPipeline::VertexAttribute attributes[] = { - GPUPipeline::VertexAttribute::Make(GPUPipeline::VertexAttribute::Semantic::Position, 0, - GPUPipeline::VertexAttribute::Type::Float, 2, offsetof(ImDrawVert, pos)), - GPUPipeline::VertexAttribute::Make(GPUPipeline::VertexAttribute::Semantic::Texcoord, 0, - GPUPipeline::VertexAttribute::Type::Float, 2, offsetof(ImDrawVert, uv)), - GPUPipeline::VertexAttribute::Make(GPUPipeline::VertexAttribute::Semantic::Color, 0, - GPUPipeline::VertexAttribute::Type::UNorm8, 4, offsetof(ImDrawVert, col)), +std::string GPUDevice::GetShaderCacheBaseName(const std::string_view& type, bool debug) const +{ + Panic("Not implemented"); + return {}; +} + +void GPUDevice::RenderImGui() +{ + GL_SCOPE("RenderImGui"); + + ImGui::Render(); + + const ImDrawData* draw_data = ImGui::GetDrawData(); + if (draw_data->CmdListsCount == 0) + return; + + SetPipeline(m_imgui_pipeline.get()); + SetViewportAndScissor(0, 0, m_window_info.surface_width, m_window_info.surface_height); + + const float L = 0.0f; + const float R = static_cast(m_window_info.surface_width); + const float T = 0.0f; + const float B = static_cast(m_window_info.surface_height); + const float ortho_projection[4][4] = { + {2.0f / (R - L), 0.0f, 0.0f, 0.0f}, + {0.0f, 2.0f / (T - B), 0.0f, 0.0f}, + {0.0f, 0.0f, 0.5f, 0.0f}, + {(R + L) / (L - R), (T + B) / (B - T), 0.5f, 1.0f}, }; + PushUniformBuffer(ortho_projection, sizeof(ortho_projection)); - GPUPipeline::GraphicsConfig config; - config.layout = GPUPipeline::Layout::SingleTexture; - config.primitive = GPUPipeline::Primitive::Triangles; - config.input_layout.vertex_attributes = attributes; - config.input_layout.vertex_stride = sizeof(ImDrawVert); - config.vertex_shader = imgui_vs.get(); - config.pixel_shader = imgui_ps.get(); - config.rasterization = GPUPipeline::RasterizationState::GetNoCullState(); - config.depth = GPUPipeline::DepthState::GetNoTestsState(); - config.blend = GPUPipeline::BlendState::GetAlphaBlendingState(); - config.color_format = GPUTexture::Format::RGBA8; // FIXME m_window_info.surface_format; - config.depth_format = GPUTexture::Format::Unknown; - config.samples = 1; - config.per_sample_shading = false; - - m_imgui_pipeline = CreatePipeline(config); - if (!m_imgui_pipeline) + // Render command lists + for (int n = 0; n < draw_data->CmdListsCount; n++) { - Log_ErrorPrintf("Failed to compile ImGui pipeline."); - return false; + const ImDrawList* cmd_list = draw_data->CmdLists[n]; + static_assert(sizeof(ImDrawIdx) == sizeof(DrawIndex)); + + u32 base_vertex, base_index; + UploadVertexBuffer(cmd_list->VtxBuffer.Data, sizeof(ImDrawVert), cmd_list->VtxBuffer.Size, &base_vertex); + UploadIndexBuffer(cmd_list->IdxBuffer.Data, cmd_list->IdxBuffer.Size, &base_index); + + for (int cmd_i = 0; cmd_i < cmd_list->CmdBuffer.Size; cmd_i++) + { + const ImDrawCmd* pcmd = &cmd_list->CmdBuffer[cmd_i]; + DebugAssert(!pcmd->UserCallback); + + if (pcmd->ClipRect.z <= pcmd->ClipRect.x || pcmd->ClipRect.w <= pcmd->ClipRect.x) + continue; + + SetScissor(static_cast(pcmd->ClipRect.x), static_cast(pcmd->ClipRect.y), + static_cast(pcmd->ClipRect.z - pcmd->ClipRect.x), + static_cast(pcmd->ClipRect.w - pcmd->ClipRect.y)); + SetTextureSampler(0, reinterpret_cast(pcmd->TextureId), m_linear_sampler.get()); + DrawIndexed(pcmd->ElemCount, base_index + pcmd->IdxOffset, base_vertex + pcmd->VtxOffset); + } } - - return true; -} - -void GPUDevice::DestroyImGuiResources() -{ - m_imgui_font_texture.reset(); } void GPUDevice::MapVertexBuffer(u32 vertex_size, u32 vertex_count, void** map_ptr, u32* map_space, u32* map_base_vertex) @@ -193,13 +346,13 @@ void GPUDevice::MapVertexBuffer(u32 vertex_size, u32 vertex_count, void** map_pt UnreachableCode(); } -void GPUDevice::UnmapVertexBuffer(u32 used_vertex_count) +void GPUDevice::UnmapVertexBuffer(u32 vertex_size, u32 vertex_count) { // TODO: REMOVE ME UnreachableCode(); } -void GPUDevice::MapIndexBuffer(u32 index_count, u16** map_ptr, u32* map_space, u32* map_base_index) +void GPUDevice::MapIndexBuffer(u32 index_count, DrawIndex** map_ptr, u32* map_space, u32* map_base_index) { // TODO: REMOVE ME UnreachableCode(); @@ -217,7 +370,7 @@ void GPUDevice::UploadVertexBuffer(const void* vertices, u32 vertex_size, u32 ve u32 space; MapVertexBuffer(vertex_size, vertex_count, &map, &space, base_vertex); std::memcpy(map, vertices, vertex_size * vertex_count); - UnmapVertexBuffer(vertex_count); + UnmapVertexBuffer(vertex_size, vertex_count); } void GPUDevice::UploadIndexBuffer(const u16* indices, u32 index_count, u32* base_index) @@ -253,22 +406,23 @@ void GPUDevice::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sam UnreachableCode(); } -void GPUDevice::SetViewport(u32 x, u32 y, u32 width, u32 height) +void GPUDevice::SetViewport(s32 x, s32 y, s32 width, s32 height) +{ + // TODO: REMOVE ME + // GL needs to invert if writing to the window framebuffer + UnreachableCode(); +} + +void GPUDevice::SetScissor(s32 x, s32 y, s32 width, s32 height) { // TODO: REMOVE ME UnreachableCode(); } -void GPUDevice::SetScissor(u32 x, u32 y, u32 width, u32 height) +void GPUDevice::SetViewportAndScissor(s32 x, s32 y, s32 width, s32 height) { - // TODO: REMOVE ME - UnreachableCode(); -} - -void GPUDevice::SetViewportAndScissor(u32 x, u32 y, u32 width, u32 height) -{ - // TODO: REMOVE ME - UnreachableCode(); + SetViewport(x, y, width, height); + SetScissor(x, y, width, height); } void GPUDevice::Draw(u32 base_vertex, u32 vertex_count) @@ -298,14 +452,29 @@ void GPUDevice::ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 UnreachableCode(); } -std::unique_ptr GPUDevice::CreateShaderFromBinary(GPUShader::Stage stage, gsl::span data) +void GPUDevice::ClearRenderTarget(GPUTexture* t, u32 c) +{ + t->SetClearColor(c); +} + +void GPUDevice::ClearDepth(GPUTexture* t, float d) +{ + t->SetClearDepth(d); +} + +void GPUDevice::InvalidateRenderTarget(GPUTexture* t) +{ + t->SetState(GPUTexture::State::Invalidated); +} + +std::unique_ptr GPUDevice::CreateShaderFromBinary(GPUShaderStage stage, gsl::span data) { // TODO: REMOVE ME UnreachableCode(); return {}; } -std::unique_ptr GPUDevice::CreateShaderFromSource(GPUShader::Stage stage, const std::string_view& source, +std::unique_ptr GPUDevice::CreateShaderFromSource(GPUShaderStage stage, const std::string_view& source, std::vector* out_binary /* = nullptr */) { // TODO: REMOVE ME @@ -320,6 +489,18 @@ std::unique_ptr GPUDevice::CreatePipeline(const GPUPipeline::Graphi return {}; } +void GPUDevice::PushDebugGroup(const char* fmt, ...) +{ +} + +void GPUDevice::PopDebugGroup() +{ +} + +void GPUDevice::InsertDebugMessage(const char* fmt, ...) +{ +} + std::unique_ptr GPUDevice::CreateSampler(const GPUSampler::Config& config) { // TODO: REMOVE ME @@ -335,6 +516,42 @@ std::unique_ptr GPUDevice::CreateFramebuffer(GPUTexture* rt, u32 return {}; } +std::unique_ptr GPUDevice::CreateShader(GPUShaderStage stage, const std::string_view& source) +{ + std::unique_ptr shader; + if (!m_shader_cache.IsOpen()) + { + shader = CreateShaderFromSource(stage, source); + return shader; + } + + const GPUShaderCache::CacheIndexKey key = m_shader_cache.GetCacheKey(stage, source, "main"); + std::vector binary; + if (m_shader_cache.Lookup(key, &binary)) + { + shader = CreateShaderFromBinary(stage, binary); + if (shader) + return shader; + + Log_ErrorPrintf("Failed to create shader from binary (driver changed?). Clearing cache."); + m_shader_cache.Clear(); + } + + binary.clear(); + shader = CreateShaderFromSource(stage, source, &binary); + if (!shader) + return shader; + + // Don't insert empty shaders into the cache... + if (!binary.empty()) + { + if (!m_shader_cache.Insert(key, binary.data(), static_cast(binary.size()))) + m_shader_cache.Close(); + } + + return shader; +} + bool GPUDevice::ParseFullscreenMode(const std::string_view& mode, u32* width, u32* height, float* refresh_rate) { if (!mode.empty()) @@ -457,6 +674,50 @@ void GPUDevice::ThrottlePresentation() Common::Timer::SleepUntil(m_last_frame_displayed_time, false); } +void GPUDevice::ClearDisplayTexture() +{ + m_display_texture = nullptr; + m_display_texture_view_x = 0; + m_display_texture_view_y = 0; + m_display_texture_view_width = 0; + m_display_texture_view_height = 0; + m_display_changed = true; +} + +void GPUDevice::SetDisplayTexture(GPUTexture* texture, s32 view_x, s32 view_y, s32 view_width, s32 view_height) +{ + DebugAssert(texture); + texture->MakeReadyForSampling(); + m_display_texture = texture; + m_display_texture_view_x = view_x; + m_display_texture_view_y = view_y; + m_display_texture_view_width = view_width; + m_display_texture_view_height = view_height; + m_display_changed = true; +} + +void GPUDevice::SetDisplayTextureRect(s32 view_x, s32 view_y, s32 view_width, s32 view_height) +{ + m_display_texture_view_x = view_x; + m_display_texture_view_y = view_y; + m_display_texture_view_width = view_width; + m_display_texture_view_height = view_height; + m_display_changed = true; +} + +void GPUDevice::SetDisplayParameters(s32 display_width, s32 display_height, s32 active_left, s32 active_top, + s32 active_width, s32 active_height, float display_aspect_ratio) +{ + m_display_width = display_width; + m_display_height = display_height; + m_display_active_left = active_left; + m_display_active_top = active_top; + m_display_active_width = active_width; + m_display_active_height = active_height; + m_display_aspect_ratio = display_aspect_ratio; + m_display_changed = true; +} + bool GPUDevice::GetHostRefreshRate(float* refresh_rate) { if (m_window_info.surface_refresh_rate > 0.0f) @@ -480,6 +741,9 @@ float GPUDevice::GetAndResetAccumulatedGPUTime() void GPUDevice::SetSoftwareCursor(std::unique_ptr texture, float scale /*= 1.0f*/) { + if (texture) + texture->MakeReadyForSampling(); + m_cursor_texture = std::move(texture); m_cursor_texture_scale = scale; } @@ -535,6 +799,119 @@ bool GPUDevice::IsUsingLinearFiltering() const return g_settings.display_linear_filtering; } +bool GPUDevice::RenderScreenshot(u32 width, u32 height, const Common::Rectangle& draw_rect, + std::vector* out_pixels, u32* out_stride, GPUTexture::Format* out_format) +{ + static constexpr GPUTexture::Format hdformat = GPUTexture::Format::RGBA8; // TODO FIXME m_window_info.surface_format + + std::unique_ptr render_texture = + CreateTexture(width, height, 1, 1, 1, GPUTexture::Type::RenderTarget, hdformat); + if (!render_texture) + return false; + + std::unique_ptr render_fb = CreateFramebuffer(render_texture.get()); + if (!render_fb) + return false; + + ClearRenderTarget(render_texture.get(), 0); + SetFramebuffer(render_fb.get()); + + if (HasDisplayTexture()) + { +#if 0 + if (!m_post_processing_chain.IsEmpty()) + { + ApplyPostProcessingChain(render_texture.GetD3DRTV(), draw_rect.left, draw_rect.top, draw_rect.GetWidth(), + draw_rect.GetHeight(), static_cast(m_display_texture), + m_display_texture_view_x, m_display_texture_view_y, m_display_texture_view_width, + m_display_texture_view_height, width, height); + } + else +#endif + { + RenderDisplay(draw_rect.left, draw_rect.top, draw_rect.GetWidth(), draw_rect.GetHeight(), m_display_texture, + m_display_texture_view_x, m_display_texture_view_y, m_display_texture_view_width, + m_display_texture_view_height, IsUsingLinearFiltering()); + } + } + + SetFramebuffer(nullptr); + + const u32 stride = GPUTexture::GetPixelSize(hdformat) * width; + out_pixels->resize(width * height); + if (!DownloadTexture(render_texture.get(), 0, 0, width, height, out_pixels->data(), stride)) + return false; + + *out_stride = stride; + *out_format = hdformat; + return true; +} + +void GPUDevice::RenderDisplay() +{ + const auto [left, top, width, height] = CalculateDrawRect(GetWindowWidth(), GetWindowHeight()); + + GL_SCOPE("RenderDisplay: %dx%d at %d,%d", left, top, width, height); + +#if 0 + if (HasDisplayTexture() && !m_post_processing_chain.IsEmpty()) + { + ApplyPostProcessingChain(m_swap_chain_rtv.Get(), left, top, width, height, + static_cast(m_display_texture), m_display_texture_view_x, + m_display_texture_view_y, m_display_texture_view_width, m_display_texture_view_height, + GetWindowWidth(), GetWindowHeight()); + return; + } +#endif + + if (!HasDisplayTexture()) + return; + + RenderDisplay(left, top, width, height, m_display_texture, m_display_texture_view_x, m_display_texture_view_y, + m_display_texture_view_width, m_display_texture_view_height, IsUsingLinearFiltering()); +} + +void GPUDevice::RenderDisplay(s32 left, s32 top, s32 width, s32 height, GPUTexture* texture, s32 texture_view_x, + s32 texture_view_y, s32 texture_view_width, s32 texture_view_height, bool linear_filter) +{ + SetPipeline(m_display_pipeline.get()); + SetTextureSampler(0, texture, linear_filter ? m_linear_sampler.get() : m_point_sampler.get()); + + const bool linear = IsUsingLinearFiltering(); + const float position_adjust = linear ? 0.5f : 0.0f; + const float size_adjust = linear ? 1.0f : 0.0f; + const float uniforms[4] = { + (static_cast(texture_view_x) + position_adjust) / static_cast(texture->GetWidth()), + (static_cast(texture_view_y) + position_adjust) / static_cast(texture->GetHeight()), + (static_cast(texture_view_width) - size_adjust) / static_cast(texture->GetWidth()), + (static_cast(texture_view_height) - size_adjust) / static_cast(texture->GetHeight())}; + PushUniformBuffer(uniforms, sizeof(uniforms)); + + SetViewportAndScissor(left, top, width, height); + Draw(3, 0); +} + +void GPUDevice::RenderSoftwareCursor() +{ + if (!HasSoftwareCursor()) + return; + + const auto [left, top, width, height] = CalculateSoftwareCursorDrawRect(); + RenderSoftwareCursor(left, top, width, height, m_cursor_texture.get()); +} + +void GPUDevice::RenderSoftwareCursor(s32 left, s32 top, s32 width, s32 height, GPUTexture* texture) +{ + SetPipeline(m_display_pipeline.get()); + SetTextureSampler(0, texture, m_linear_sampler.get()); + + const float uniforms[4] = {0.0f, 0.0f, 1.0f, 1.0f}; + PushUniformBuffer(uniforms, sizeof(uniforms)); + + SetViewportAndScissor(left, top, width, height); + Draw(3, 0); +} + void GPUDevice::CalculateDrawRect(s32 window_width, s32 window_height, float* out_left, float* out_top, float* out_width, float* out_height, float* out_left_padding, float* out_top_padding, float* out_scale, float* out_x_scale, bool apply_aspect_ratio /* = true */) const diff --git a/src/core/gpu/gpu_device.h b/src/core/gpu/gpu_device.h index 55bbf7e4a..47e01c3ee 100644 --- a/src/core/gpu/gpu_device.h +++ b/src/core/gpu/gpu_device.h @@ -3,6 +3,7 @@ #pragma once +#include "gpu_shader_cache.h" #include "gpu_texture.h" #include "common/bitfield.h" @@ -31,10 +32,22 @@ enum class RenderAPI : u32 class GPUFramebuffer { public: - GPUFramebuffer() = default; - virtual ~GPUFramebuffer() = default; + GPUFramebuffer(GPUTexture* rt, GPUTexture* ds, u32 width, u32 height); + virtual ~GPUFramebuffer(); + + ALWAYS_INLINE GPUTexture* GetRT() const { return m_rt; } + ALWAYS_INLINE GPUTexture* GetDS() const { return m_ds; } + + ALWAYS_INLINE u32 GetWidth() const { return m_width; } + ALWAYS_INLINE u32 GetHeight() const { return m_height; } virtual void SetDebugName(const std::string_view& name) = 0; + +protected: + GPUTexture* m_rt; + GPUTexture* m_ds; + u32 m_width; + u32 m_height; }; class GPUSampler @@ -57,7 +70,7 @@ public: MaxCount }; - struct Config + union Config { BitField min_filter; BitField mag_filter; @@ -72,31 +85,33 @@ public: u64 key; }; - GPUSampler() = default; - virtual ~GPUSampler() = default; + GPUSampler(); + virtual ~GPUSampler(); virtual void SetDebugName(const std::string_view& name) = 0; }; +enum class GPUShaderStage : u8 +{ + Vertex, + Fragment, + Compute +}; + class GPUShader { public: - enum class Stage - { - Vertex, - Pixel, - Compute - }; + GPUShader(GPUShaderStage stage); + virtual ~GPUShader(); - GPUShader(Stage stage) : m_stage(stage) {} - virtual ~GPUShader() = default; + static const char* GetStageName(GPUShaderStage stage); - ALWAYS_INLINE Stage GetStage() const { return m_stage; } + ALWAYS_INLINE GPUShaderStage GetStage() const { return m_stage; } virtual void SetDebugName(const std::string_view& name) = 0; protected: - Stage m_stage; + GPUShaderStage m_stage; }; class GPUPipeline @@ -107,6 +122,9 @@ public: // 128 byte UBO via push constants, 1 texture. SingleTexture, + // 1 streamed UBO, 1 texture in PS. + HWBatch, + MaxCount }; @@ -122,14 +140,7 @@ public: union VertexAttribute { - enum class Semantic : u8 - { - Position, - Texcoord, - Color, - - MaxCount - }; + static constexpr u32 MaxAttributes = 16; enum class Type : u8 { @@ -146,11 +157,10 @@ public: MaxCount }; - BitField semantic; - BitField semantic_index; - BitField type; - BitField components; - BitField offset; + BitField index; + BitField type; + BitField components; + BitField offset; u32 key; // clang-format off @@ -160,20 +170,18 @@ public: ALWAYS_INLINE bool operator<(const VertexAttribute& rhs) const { return key < rhs.key; } // clang-format on - static constexpr VertexAttribute Make(Semantic semantic, u8 semantic_index, Type type, u8 components, u8 offset) + static constexpr VertexAttribute Make(u8 index, Type type, u8 components, u8 offset) { VertexAttribute ret = {}; #if 0 - ret.semantic = semantic; - ret.semantic_index = semantic_index; + ret.index = index; ret.type = type; ret.components = components; ret.offset = offset; #else // Nasty :/ can't access an inactive element of a union here.. - ret.key = (static_cast(semantic) & 0x7) | ((static_cast(semantic_index) & 0xff) << 4) | - ((static_cast(type) & 0xf) << 12) | ((static_cast(components) & 0x7) << 16) | - ((static_cast(offset) & 0xff) << 19); + ret.key = (static_cast(index) & 0xf) | ((static_cast(type) & 0xf) << 4) | + ((static_cast(components) & 0x7) << 8) | ((static_cast(offset) & 0xffff) << 16); #endif return ret; } @@ -229,6 +237,8 @@ public: InvSrcAlpha1, DstAlpha, InvDstAlpha, + ConstantColor, + InvConstantColor, MaxCount }; @@ -259,7 +269,7 @@ public: static RasterizationState GetNoCullState(); }; - struct DepthState + union DepthState { BitField depth_test; BitField depth_write; @@ -276,21 +286,22 @@ public: static DepthState GetAlwaysWriteState(); }; - struct BlendState + union BlendState { - BitField enable; - BitField src_blend; - BitField src_alpha_blend; - BitField dst_blend; - BitField dst_alpha_blend; - BitField blend_op; - BitField alpha_blend_op; - BitField write_r; - BitField write_g; - BitField write_b; - BitField write_a; - BitField write_mask; - u32 key; + BitField enable; + BitField src_blend; + BitField src_alpha_blend; + BitField dst_blend; + BitField dst_alpha_blend; + BitField blend_op; + BitField alpha_blend_op; + BitField write_r; + BitField write_g; + BitField write_b; + BitField write_a; + BitField write_mask; + BitField constant; + u64 key; // clang-format off ALWAYS_INLINE BlendState& operator=(const BlendState& rhs) { key = rhs.key; return *this; } @@ -323,8 +334,8 @@ public: bool per_sample_shading; }; - GPUPipeline() = default; - virtual ~GPUPipeline() = default; + GPUPipeline(); + virtual ~GPUPipeline(); virtual void SetDebugName(const std::string_view& name) = 0; }; @@ -332,6 +343,9 @@ public: class GPUDevice { public: + // TODO: drop virtuals + using DrawIndex = u16; + struct AdapterAndModeList { std::vector adapter_names; @@ -354,6 +368,9 @@ public: ALWAYS_INLINE s32 GetWindowHeight() const { return static_cast(m_window_info.surface_height); } ALWAYS_INLINE float GetWindowScale() const { return m_window_info.surface_scale; } + ALWAYS_INLINE GPUSampler* GetLinearSampler() const { return m_linear_sampler.get(); } + ALWAYS_INLINE GPUSampler* GetPointSampler() const { return m_point_sampler.get(); } + // Position is relative to the top-left corner of the window. ALWAYS_INLINE s32 GetMousePositionX() const { return m_mouse_position_x; } ALWAYS_INLINE s32 GetMousePositionY() const { return m_mouse_position_y; } @@ -377,7 +394,7 @@ public: virtual bool HasSurface() const = 0; virtual bool CreateDevice(const WindowInfo& wi, bool vsync) = 0; - virtual bool SetupDevice() = 0; + virtual bool SetupDevice(); virtual bool MakeCurrent() = 0; virtual bool DoneCurrent() = 0; virtual void DestroySurface() = 0; @@ -389,35 +406,13 @@ public: virtual bool CreateResources(); virtual void DestroyResources(); - virtual bool SetPostProcessingChain(const std::string_view& config) = 0; + virtual bool SetPostProcessingChain(const std::string_view& config); + + virtual std::string GetShaderCacheBaseName(const std::string_view& type, bool debug) const; /// Call when the window size changes externally to recreate any resources. virtual void ResizeWindow(s32 new_window_width, s32 new_window_height) = 0; - /// Vertex/index buffer abstraction. - virtual void MapVertexBuffer(u32 vertex_size, u32 vertex_count, void** map_ptr, u32* map_space, u32* map_base_vertex); - virtual void UnmapVertexBuffer(u32 used_vertex_count); - virtual void MapIndexBuffer(u32 index_count, u16** map_ptr, u32* map_space, u32* map_base_index); - virtual void UnmapIndexBuffer(u32 used_index_count); - - void UploadVertexBuffer(const void* vertices, u32 vertex_size, u32 vertex_count, u32* base_vertex); - void UploadIndexBuffer(const u16* indices, u32 index_count, u32* base_index); - - /// Uniform buffer abstraction. - virtual void PushUniformBuffer(const void* data, u32 data_size); - - /// Drawing setup abstraction. - virtual void SetFramebuffer(GPUFramebuffer* fb); - virtual void SetPipeline(GPUPipeline* pipeline); - virtual void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler); - virtual void SetViewport(u32 x, u32 y, u32 width, u32 height); - virtual void SetScissor(u32 x, u32 y, u32 width, u32 height); - void SetViewportAndScissor(u32 x, u32 y, u32 width, u32 height); - - // Drawing abstraction. - virtual void Draw(u32 base_vertex, u32 vertex_count); - virtual void DrawIndexed(u32 base_index, u32 index_count, u32 base_vertex); - /// Creates an abstracted RGBA8 texture. If dynamic, the texture can be updated with UpdateTexture() below. virtual std::unique_ptr CreateTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, GPUTexture::Type type, GPUTexture::Format format, @@ -432,23 +427,55 @@ public: virtual void ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, GPUTexture* src, u32 src_x, u32 src_y, u32 src_layer, u32 src_level, u32 width, u32 height); + void ClearRenderTarget(GPUTexture* t, u32 c); + void ClearDepth(GPUTexture* t, float d); + void InvalidateRenderTarget(GPUTexture* t); /// Framebuffer abstraction. - virtual std::unique_ptr CreateFramebuffer(GPUTexture* rt, u32 rt_layer, u32 rt_level, GPUTexture* ds, - u32 ds_layer, u32 ds_level); + virtual std::unique_ptr CreateFramebuffer(GPUTexture* rt = nullptr, u32 rt_layer = 0, + u32 rt_level = 0, GPUTexture* ds = nullptr, + u32 ds_layer = 0, u32 ds_level = 0); /// Shader abstraction. - virtual std::unique_ptr CreateShaderFromBinary(GPUShader::Stage stage, gsl::span data); - virtual std::unique_ptr CreateShaderFromSource(GPUShader::Stage stage, const std::string_view& source, - std::vector* out_binary = nullptr); + // TODO: entry point? + std::unique_ptr CreateShader(GPUShaderStage stage, const std::string_view& source); virtual std::unique_ptr CreatePipeline(const GPUPipeline::GraphicsConfig& config); + /// Debug messaging. + virtual void PushDebugGroup(const char* fmt, ...); + virtual void PopDebugGroup(); + virtual void InsertDebugMessage(const char* fmt, ...); + + /// Vertex/index buffer abstraction. + virtual void MapVertexBuffer(u32 vertex_size, u32 vertex_count, void** map_ptr, u32* map_space, u32* map_base_vertex); + virtual void UnmapVertexBuffer(u32 vertex_size, u32 vertex_count); + virtual void MapIndexBuffer(u32 index_count, DrawIndex** map_ptr, u32* map_space, u32* map_base_index); + virtual void UnmapIndexBuffer(u32 used_size); + + void UploadVertexBuffer(const void* vertices, u32 vertex_size, u32 vertex_count, u32* base_vertex); + void UploadIndexBuffer(const DrawIndex* indices, u32 index_count, u32* base_index); + + /// Uniform buffer abstraction. + virtual void PushUniformBuffer(const void* data, u32 data_size); + + /// Drawing setup abstraction. + virtual void SetFramebuffer(GPUFramebuffer* fb); + virtual void SetPipeline(GPUPipeline* pipeline); + virtual void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler); + virtual void SetViewport(s32 x, s32 y, s32 width, s32 height); + virtual void SetScissor(s32 x, s32 y, s32 width, s32 height); + void SetViewportAndScissor(s32 x, s32 y, s32 width, s32 height); + + // Drawing abstraction. + virtual void Draw(u32 vertex_count, u32 base_vertex); + virtual void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex); + /// Returns false if the window was completely occluded. virtual bool Render(bool skip_present) = 0; /// Renders the display with postprocessing to the specified image. - virtual bool RenderScreenshot(u32 width, u32 height, const Common::Rectangle& draw_rect, - std::vector* out_pixels, u32* out_stride, GPUTexture::Format* out_format) = 0; + bool RenderScreenshot(u32 width, u32 height, const Common::Rectangle& draw_rect, std::vector* out_pixels, + u32* out_stride, GPUTexture::Format* out_format); ALWAYS_INLINE bool IsVsyncEnabled() const { return m_vsync_enabled; } virtual void SetVSync(bool enabled) = 0; @@ -459,47 +486,11 @@ public: bool ShouldSkipDisplayingFrame(); void ThrottlePresentation(); - void ClearDisplayTexture() - { - m_display_texture = nullptr; - m_display_texture_view_x = 0; - m_display_texture_view_y = 0; - m_display_texture_view_width = 0; - m_display_texture_view_height = 0; - m_display_changed = true; - } - - void SetDisplayTexture(GPUTexture* texture, s32 view_x, s32 view_y, s32 view_width, s32 view_height) - { - m_display_texture = texture; - m_display_texture_view_x = view_x; - m_display_texture_view_y = view_y; - m_display_texture_view_width = view_width; - m_display_texture_view_height = view_height; - m_display_changed = true; - } - - void SetDisplayTextureRect(s32 view_x, s32 view_y, s32 view_width, s32 view_height) - { - m_display_texture_view_x = view_x; - m_display_texture_view_y = view_y; - m_display_texture_view_width = view_width; - m_display_texture_view_height = view_height; - m_display_changed = true; - } - + void ClearDisplayTexture(); + void SetDisplayTexture(GPUTexture* texture, s32 view_x, s32 view_y, s32 view_width, s32 view_height); + void SetDisplayTextureRect(s32 view_x, s32 view_y, s32 view_width, s32 view_height); void SetDisplayParameters(s32 display_width, s32 display_height, s32 active_left, s32 active_top, s32 active_width, - s32 active_height, float display_aspect_ratio) - { - m_display_width = display_width; - m_display_height = display_height; - m_display_active_left = active_left; - m_display_active_top = active_top; - m_display_active_width = active_width; - m_display_active_height = active_height; - m_display_aspect_ratio = display_aspect_ratio; - m_display_changed = true; - } + s32 active_height, float display_aspect_ratio); virtual bool SupportsTextureFormat(GPUTexture::Format format) const = 0; @@ -548,6 +539,10 @@ public: bool WriteScreenshotToFile(std::string filename, bool internal_resolution = false, bool compress_on_thread = false); protected: + virtual std::unique_ptr CreateShaderFromBinary(GPUShaderStage stage, gsl::span data); + virtual std::unique_ptr CreateShaderFromSource(GPUShaderStage stage, const std::string_view& source, + std::vector* out_binary = nullptr); + ALWAYS_INLINE bool HasSoftwareCursor() const { return static_cast(m_cursor_texture); } ALWAYS_INLINE bool HasDisplayTexture() const { return (m_display_texture != nullptr); } @@ -560,13 +555,22 @@ protected: std::tuple CalculateSoftwareCursorDrawRect() const; std::tuple CalculateSoftwareCursorDrawRect(s32 cursor_x, s32 cursor_y) const; - bool CreateImGuiResources(); - void DestroyImGuiResources(); + void RenderImGui(); + + void RenderDisplay(); + void RenderSoftwareCursor(); + + void RenderDisplay(s32 left, s32 top, s32 width, s32 height, GPUTexture* texture, s32 texture_view_x, + s32 texture_view_y, s32 texture_view_width, s32 texture_view_height, bool linear_filter); + void RenderSoftwareCursor(s32 left, s32 top, s32 width, s32 height, GPUTexture* texture); WindowInfo m_window_info; + GPUShaderCache m_shader_cache; + std::unique_ptr m_point_sampler; std::unique_ptr m_linear_sampler; + std::unique_ptr m_border_sampler; u64 m_last_frame_displayed_time = 0; @@ -582,6 +586,7 @@ protected: float m_display_aspect_ratio = 1.0f; float m_display_frame_interval = 0.0f; + std::unique_ptr m_display_pipeline; GPUTexture* m_display_texture = nullptr; s32 m_display_texture_view_x = 0; s32 m_display_texture_view_y = 0; @@ -591,6 +596,7 @@ protected: std::unique_ptr m_imgui_pipeline; std::unique_ptr m_imgui_font_texture; + std::unique_ptr m_cursor_pipeline; std::unique_ptr m_cursor_texture; float m_cursor_texture_scale = 1.0f; @@ -622,3 +628,24 @@ void ReleaseHostDisplay(); void RenderDisplay(bool skip_present); void InvalidateDisplay(); } // namespace Host + +// Macros for debug messages. +#ifdef _DEBUG +struct GLAutoPop +{ + GLAutoPop(int dummy) {} + ~GLAutoPop() { g_host_display->PopDebugGroup(); } +}; + +#define GL_SCOPE(...) GLAutoPop gl_auto_pop((g_host_display->PushDebugGroup(__VA_ARGS__), 0)) +#define GL_PUSH(...) g_host_display->PushDebugGroup(__VA_ARGS__) +#define GL_POP() g_host_display->PopDebugGroup() +#define GL_INS(...) g_host_display->InsertDebugMessage(__VA_ARGS__) +#define GL_OBJECT_NAME(obj, ...) (obj)->SetDebugName(StringUtil::StdStringFromFormat(__VA_ARGS__)) +#else +#define GL_SCOPE(...) (void)0 +#define GL_PUSH(...) (void)0 +#define GL_POP() (void)0 +#define GL_INS(...) (void)0 +#define GL_OBJECT_NAME(...) (void)0 +#endif diff --git a/src/core/gpu/gpu_shader_cache.cpp b/src/core/gpu/gpu_shader_cache.cpp new file mode 100644 index 000000000..ebce0bbec --- /dev/null +++ b/src/core/gpu/gpu_shader_cache.cpp @@ -0,0 +1,290 @@ +// SPDX-FileCopyrightText: 2023 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#include "gpu_shader_cache.h" +#include "../shader_cache_version.h" +#include "gpu_device.h" + +#include "common/file_system.h" +#include "common/log.h" +#include "common/md5_digest.h" + +#include "fmt/format.h" + +Log_SetChannel(GPUShaderCache); + +#pragma pack(push, 1) +struct CacheIndexEntry +{ + u32 shader_type; + u32 source_length; + u64 source_hash_low; + u64 source_hash_high; + u64 entry_point_low; + u64 entry_point_high; + u32 file_offset; + u32 blob_size; +}; +#pragma pack(pop) + +GPUShaderCache::GPUShaderCache() = default; + +GPUShaderCache::~GPUShaderCache() +{ + Close(); +} + +bool GPUShaderCache::CacheIndexKey::operator==(const CacheIndexKey& key) const +{ + return (source_hash_low == key.source_hash_low && source_hash_high == key.source_hash_high && + entry_point_low == key.entry_point_low && entry_point_high == key.entry_point_high && + shader_type == key.shader_type && source_length == key.source_length); +} + +bool GPUShaderCache::CacheIndexKey::operator!=(const CacheIndexKey& key) const +{ + return (source_hash_low != key.source_hash_low || source_hash_high != key.source_hash_high || + entry_point_low != key.entry_point_low || entry_point_high != key.entry_point_high || + shader_type != key.shader_type || source_length != key.source_length); +} + +bool GPUShaderCache::Open(const std::string_view& base_filename) +{ + if (base_filename.empty()) + return false; + + m_base_filename = base_filename; + const std::string index_filename = fmt::format("{}.idx", m_base_filename); + const std::string blob_filename = fmt::format("{}.bin", m_base_filename); + + if (!ReadExisting(index_filename, blob_filename)) + return CreateNew(index_filename, blob_filename); + + return true; +} + +void GPUShaderCache::Close() +{ + if (m_index_file) + { + std::fclose(m_index_file); + m_index_file = nullptr; + } + if (m_blob_file) + { + std::fclose(m_blob_file); + m_blob_file = nullptr; + } +} + +void GPUShaderCache::Clear() +{ + if (!IsOpen()) + return; + + Close(); + + Log_WarningPrintf("Clearing shader cache at %s.", m_base_filename.c_str()); + + const std::string index_filename = fmt::format("{}.idx", m_base_filename); + const std::string blob_filename = fmt::format("{}.bin", m_base_filename); + CreateNew(index_filename, blob_filename); +} + +bool GPUShaderCache::CreateNew(const std::string& index_filename, const std::string& blob_filename) +{ + if (FileSystem::FileExists(index_filename.c_str())) + { + Log_WarningPrintf("Removing existing index file '%s'", index_filename.c_str()); + FileSystem::DeleteFile(index_filename.c_str()); + } + if (FileSystem::FileExists(blob_filename.c_str())) + { + Log_WarningPrintf("Removing existing blob file '%s'", blob_filename.c_str()); + FileSystem::DeleteFile(blob_filename.c_str()); + } + + m_index_file = FileSystem::OpenCFile(index_filename.c_str(), "wb"); + if (!m_index_file) + { + Log_ErrorPrintf("Failed to open index file '%s' for writing", index_filename.c_str()); + return false; + } + + const u32 file_version = SHADER_CACHE_VERSION; + if (std::fwrite(&file_version, sizeof(file_version), 1, m_index_file) != 1) + { + Log_ErrorPrintf("Failed to write version to index file '%s'", index_filename.c_str()); + std::fclose(m_index_file); + m_index_file = nullptr; + FileSystem::DeleteFile(index_filename.c_str()); + return false; + } + + m_blob_file = FileSystem::OpenCFile(blob_filename.c_str(), "w+b"); + if (!m_blob_file) + { + Log_ErrorPrintf("Failed to open blob file '%s' for writing", blob_filename.c_str()); + std::fclose(m_index_file); + m_index_file = nullptr; + FileSystem::DeleteFile(index_filename.c_str()); + return false; + } + + return true; +} + +bool GPUShaderCache::ReadExisting(const std::string& index_filename, const std::string& blob_filename) +{ + m_index_file = FileSystem::OpenCFile(index_filename.c_str(), "r+b"); + if (!m_index_file) + { + // special case here: when there's a sharing violation (i.e. two instances running), + // we don't want to blow away the cache. so just continue without a cache. + if (errno == EACCES) + { + Log_WarningPrintf("Failed to open shader cache index with EACCES, are you running two instances?"); + return true; + } + + return false; + } + + u32 file_version = 0; + if (std::fread(&file_version, sizeof(file_version), 1, m_index_file) != 1 || file_version != SHADER_CACHE_VERSION) + { + Log_ErrorPrintf("Bad file/data version in '%s'", index_filename.c_str()); + std::fclose(m_index_file); + m_index_file = nullptr; + return false; + } + + m_blob_file = FileSystem::OpenCFile(blob_filename.c_str(), "a+b"); + if (!m_blob_file) + { + Log_ErrorPrintf("Blob file '%s' is missing", blob_filename.c_str()); + std::fclose(m_index_file); + m_index_file = nullptr; + return false; + } + + std::fseek(m_blob_file, 0, SEEK_END); + const u32 blob_file_size = static_cast(std::ftell(m_blob_file)); + + for (;;) + { + CacheIndexEntry entry; + if (std::fread(&entry, sizeof(entry), 1, m_index_file) != 1 || + (entry.file_offset + entry.blob_size) > blob_file_size) + { + if (std::feof(m_index_file)) + break; + + Log_ErrorPrintf("Failed to read entry from '%s', corrupt file?", index_filename.c_str()); + m_index.clear(); + std::fclose(m_blob_file); + m_blob_file = nullptr; + std::fclose(m_index_file); + m_index_file = nullptr; + return false; + } + + const CacheIndexKey key{static_cast(entry.shader_type), + entry.source_length, + entry.source_hash_low, + entry.source_hash_high, + entry.entry_point_low, + entry.entry_point_high}; + const CacheIndexData data{entry.file_offset, entry.blob_size}; + m_index.emplace(key, data); + } + + // ensure we don't write before seeking + std::fseek(m_index_file, 0, SEEK_END); + + Log_DevPrintf("Read %zu entries from '%s'", m_index.size(), index_filename.c_str()); + return true; +} + +GPUShaderCache::CacheIndexKey GPUShaderCache::GetCacheKey(GPUShaderStage stage, const std::string_view& shader_code, + const std::string_view& entry_point) +{ + union + { + struct + { + u64 hash_low; + u64 hash_high; + }; + u8 hash[16]; + }; + + CacheIndexKey key = {}; + key.shader_type = stage; + + MD5Digest digest; + digest.Update(shader_code.data(), static_cast(shader_code.length())); + digest.Final(hash); + key.source_hash_low = hash_low; + key.source_hash_high = hash_high; + key.source_length = static_cast(shader_code.length()); + + digest.Reset(); + digest.Update(entry_point.data(), static_cast(entry_point.length())); + digest.Final(hash); + key.entry_point_low = hash_low; + key.entry_point_high = hash_high; + + return key; +} + +bool GPUShaderCache::Lookup(const CacheIndexKey& key, ShaderBinary* binary) +{ + auto iter = m_index.find(key); + if (iter == m_index.end()) + return false; + + // TODO: extra memset :/ + binary->resize(iter->second.blob_size); + if (std::fseek(m_blob_file, iter->second.file_offset, SEEK_SET) != 0 || + std::fread(binary->data(), 1, iter->second.blob_size, m_blob_file) != iter->second.blob_size) + { + Log_ErrorPrintf("Read %u byte %s shader from file failed", iter->second.blob_size, + GPUShader::GetStageName(key.shader_type)); + return false; + } + + return true; +} + +bool GPUShaderCache::Insert(const CacheIndexKey& key, const void* data, u32 data_size) +{ + if (!m_blob_file || std::fseek(m_blob_file, 0, SEEK_END) != 0) + return false; + + CacheIndexData idata; + idata.file_offset = static_cast(std::ftell(m_blob_file)); + idata.blob_size = data_size; + + CacheIndexEntry entry = {}; + entry.shader_type = static_cast(key.shader_type); + entry.source_length = key.source_length; + entry.source_hash_low = key.source_hash_low; + entry.source_hash_high = key.source_hash_high; + entry.entry_point_low = key.entry_point_low; + entry.entry_point_high = key.entry_point_high; + entry.blob_size = idata.blob_size; + entry.file_offset = idata.file_offset; + + if (std::fwrite(data, 1, data_size, m_blob_file) != data_size || std::fflush(m_blob_file) != 0 || + std::fwrite(&entry, sizeof(entry), 1, m_index_file) != 1 || std::fflush(m_index_file) != 0) + { + Log_ErrorPrintf("Failed to write %u byte %s shader blob to file", data_size, + GPUShader::GetStageName(key.shader_type)); + return false; + } + + Log_DevPrintf("Cached %u byte %s shader", data_size, GPUShader::GetStageName(key.shader_type)); + m_index.emplace(key, idata); + return true; +} diff --git a/src/core/gpu/gpu_shader_cache.h b/src/core/gpu/gpu_shader_cache.h new file mode 100644 index 000000000..fab8c0a23 --- /dev/null +++ b/src/core/gpu/gpu_shader_cache.h @@ -0,0 +1,76 @@ +// SPDX-FileCopyrightText: 2023 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#pragma once + +#include "common/hash_combine.h" +#include "common/types.h" + +#include +#include +#include + +enum class GPUShaderStage : u8; + +class GPUShaderCache +{ +public: + using ShaderBinary = std::vector; + + struct CacheIndexKey + { + GPUShaderStage shader_type; + u32 source_length; + u64 source_hash_low; + u64 source_hash_high; + u64 entry_point_low; + u64 entry_point_high; + + bool operator==(const CacheIndexKey& key) const; + bool operator!=(const CacheIndexKey& key) const; + }; + + GPUShaderCache(); + ~GPUShaderCache(); + + bool IsOpen() const { return (m_index_file != nullptr); } + + bool Open(const std::string_view& base_filename); + void Close(); + + static CacheIndexKey GetCacheKey(GPUShaderStage stage, const std::string_view& shader_code, + const std::string_view& entry_point); + + bool Lookup(const CacheIndexKey& key, ShaderBinary* binary); + bool Insert(const CacheIndexKey& key, const void* data, u32 data_size); + void Clear(); + +private: + struct CacheIndexEntryHasher + { + std::size_t operator()(const CacheIndexKey& e) const noexcept + { + std::size_t h = 0; + hash_combine(h, e.entry_point_low, e.entry_point_high, e.source_hash_low, e.source_hash_high, e.source_length, + e.shader_type); + return h; + } + }; + + struct CacheIndexData + { + u32 file_offset; + u32 blob_size; + }; + + using CacheIndex = std::unordered_map; + + bool CreateNew(const std::string& index_filename, const std::string& blob_filename); + bool ReadExisting(const std::string& index_filename, const std::string& blob_filename); + + std::string m_base_filename; + std::FILE* m_index_file = nullptr; + std::FILE* m_blob_file = nullptr; + + CacheIndex m_index; +}; diff --git a/src/core/gpu/gpu_texture.cpp b/src/core/gpu/gpu_texture.cpp index a8f6a96a2..6e7a59f15 100644 --- a/src/core/gpu/gpu_texture.cpp +++ b/src/core/gpu/gpu_texture.cpp @@ -145,4 +145,8 @@ void GPUTexture::FlipTextureDataRGBA8(u32 width, u32 height, std::vector& t std::memcpy(top_ptr, bottom_ptr, texture_data_stride); std::memcpy(bottom_ptr, temp.data(), texture_data_stride); } -} \ No newline at end of file +} + +void GPUTexture::MakeReadyForSampling() +{ +} diff --git a/src/core/gpu/gpu_texture.h b/src/core/gpu/gpu_texture.h index f90ac9036..3d59a6989 100644 --- a/src/core/gpu/gpu_texture.h +++ b/src/core/gpu/gpu_texture.h @@ -4,6 +4,7 @@ #pragma once #include "common/types.h" #include +#include #include class GPUTexture @@ -39,6 +40,19 @@ public: Count }; + enum class State : u8 + { + Dirty, + Cleared, + Invalidated + }; + + union ClearValue + { + u32 color; + float depth; + }; + public: virtual ~GPUTexture(); @@ -57,6 +71,38 @@ public: ALWAYS_INLINE u32 GetMipWidth(u32 level) const { return std::max(m_width >> level, 1u); } ALWAYS_INLINE u32 GetMipHeight(u32 level) const { return std::max(m_height >> level, 1u); } + ALWAYS_INLINE State GetState() const { return m_state; } + ALWAYS_INLINE void SetState(State state) { m_state = state; } + + ALWAYS_INLINE bool IsRenderTargetOrDepthStencil() const + { + return (m_type >= Type::RenderTarget && m_type <= Type::DepthStencil); + } + ALWAYS_INLINE bool IsRenderTarget() const { return (m_type == Type::RenderTarget); } + ALWAYS_INLINE bool IsDepthStencil() const { return (m_type == Type::DepthStencil); } + ALWAYS_INLINE bool IsTexture() const { return (m_type == Type::Texture); } + + ALWAYS_INLINE u32 GetClearColor() const { return m_clear_value.color; } + ALWAYS_INLINE float GetClearDepth() const { return m_clear_value.depth; } + ALWAYS_INLINE std::array GetUNormClearColor() const + { + return std::array{static_cast((m_clear_value.color) & 0xFF) / 255.0f, + static_cast((m_clear_value.color >> 8) & 0xFF) / 255.0f, + static_cast((m_clear_value.color >> 16) & 0xFF) / 255.0f, + static_cast((m_clear_value.color >> 24) & 0xFF) / 255.0f}; + } + + ALWAYS_INLINE void SetClearColor(u32 color) + { + m_state = State::Cleared; + m_clear_value.color = color; + } + ALWAYS_INLINE void SetClearDepth(float depth) + { + m_state = State::Cleared; + m_clear_value.depth = depth; + } + static u32 GetPixelSize(GPUTexture::Format format); static bool IsDepthFormat(GPUTexture::Format format); @@ -71,6 +117,9 @@ public: virtual bool Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u32 height, u32 layer = 0, u32 level = 0) = 0; virtual void Unmap() = 0; + // Instructs the backend that we're finished rendering to this texture. It may transition it to a new layout. + virtual void MakeReadyForSampling(); + protected: GPUTexture(); GPUTexture(u16 width, u16 height, u8 layers, u8 levels, u8 samples, Format format); @@ -84,11 +133,7 @@ protected: u8 m_samples = 0; Type m_type = Type::Unknown; Format m_format = Format::Unknown; + State m_state = State::Dirty; -// u16 m_map_x = 0; -// u16 m_map_y = 0; -// u16 m_map_width = 0; -// u16 m_map_height = 0; -// u8 m_map_layer = 0; -// u8 m_map_level = 0; + ClearValue m_clear_value = {}; }; diff --git a/src/core/gpu/opengl_gpu_device.cpp b/src/core/gpu/opengl_gpu_device.cpp index 8bdc9235d..8a44ad8b9 100644 --- a/src/core/gpu/opengl_gpu_device.cpp +++ b/src/core/gpu/opengl_gpu_device.cpp @@ -684,49 +684,6 @@ bool OpenGLGPUDevice::Render(bool skip_present) return true; } -bool OpenGLGPUDevice::RenderScreenshot(u32 width, u32 height, const Common::Rectangle& draw_rect, - std::vector* out_pixels, u32* out_stride, GPUTexture::Format* out_format) -{ - GL::Texture texture; - if (!texture.Create(width, height, 1, 1, 1, GPUTexture::Format::RGBA8, nullptr, 0) || !texture.CreateFramebuffer()) - { - return false; - } - - glDisable(GL_SCISSOR_TEST); - glClearColor(0.0f, 0.0f, 0.0f, 1.0f); - - if (HasDisplayTexture() && !m_post_processing_chain.IsEmpty()) - { - ApplyPostProcessingChain(texture.GetGLFramebufferID(), draw_rect.left, - height - draw_rect.top - draw_rect.GetHeight(), draw_rect.GetWidth(), - draw_rect.GetHeight(), static_cast(m_display_texture), - m_display_texture_view_x, m_display_texture_view_y, m_display_texture_view_width, - m_display_texture_view_height, width, height); - } - else - { - texture.BindFramebuffer(GL_FRAMEBUFFER); - glClear(GL_COLOR_BUFFER_BIT); - - if (HasDisplayTexture()) - { - RenderDisplay(draw_rect.left, height - draw_rect.top - draw_rect.GetHeight(), draw_rect.GetWidth(), - draw_rect.GetHeight(), static_cast(m_display_texture), m_display_texture_view_x, - m_display_texture_view_y, m_display_texture_view_width, m_display_texture_view_height, - IsUsingLinearFiltering()); - } - } - - out_pixels->resize(width * height); - *out_stride = sizeof(u32) * width; - *out_format = GPUTexture::Format::RGBA8; - glReadPixels(0, 0, width, height, GL_RGBA, GL_UNSIGNED_BYTE, out_pixels->data()); - glBindFramebuffer(GL_FRAMEBUFFER, 0); - - return true; -} - void OpenGLGPUDevice::RenderImGui() { ImGui::Render(); diff --git a/src/core/gpu/opengl_gpu_device.h b/src/core/gpu/opengl_gpu_device.h index d8a244c75..74706494d 100644 --- a/src/core/gpu/opengl_gpu_device.h +++ b/src/core/gpu/opengl_gpu_device.h @@ -52,8 +52,6 @@ public: void SetVSync(bool enabled) override; bool Render(bool skip_present) override; - bool RenderScreenshot(u32 width, u32 height, const Common::Rectangle& draw_rect, std::vector* out_pixels, - u32* out_stride, GPUTexture::Format* out_format) override; bool SetGPUTimingEnabled(bool enabled) override; float GetAndResetAccumulatedGPUTime() override; diff --git a/src/core/gpu/vulkan/entry_points.h b/src/core/gpu/vulkan/entry_points.h index df2921763..83749d015 100644 --- a/src/core/gpu/vulkan/entry_points.h +++ b/src/core/gpu/vulkan/entry_points.h @@ -7,14 +7,10 @@ extern "C" { #endif -// We abuse the preprocessor here to only need to specify function names once. -// Function names are prefixed so to not conflict with system symbols at runtime. -#define VULKAN_MODULE_ENTRY_POINT(name, required) extern PFN_##name ds_##name; -#define VULKAN_INSTANCE_ENTRY_POINT(name, required) extern PFN_##name ds_##name; -#define VULKAN_DEVICE_ENTRY_POINT(name, required) extern PFN_##name ds_##name; -#define VULKAN_DEFINE_NAME_PREFIX ds_ +#define VULKAN_MODULE_ENTRY_POINT(name, required) extern PFN_##name name; +#define VULKAN_INSTANCE_ENTRY_POINT(name, required) extern PFN_##name name; +#define VULKAN_DEVICE_ENTRY_POINT(name, required) extern PFN_##name name; #include "entry_points.inl" -#undef VULKAN_DEFINE_NAME_PREFIX #undef VULKAN_DEVICE_ENTRY_POINT #undef VULKAN_INSTANCE_ENTRY_POINT #undef VULKAN_MODULE_ENTRY_POINT @@ -22,200 +18,3 @@ extern "C" { #ifdef __cplusplus } #endif - -#define vkCreateInstance ds_vkCreateInstance -#define vkGetInstanceProcAddr ds_vkGetInstanceProcAddr -#define vkEnumerateInstanceExtensionProperties ds_vkEnumerateInstanceExtensionProperties -#define vkEnumerateInstanceLayerProperties ds_vkEnumerateInstanceLayerProperties -#define vkEnumerateInstanceVersion ds_vkEnumerateInstanceVersion - -#define vkGetDeviceProcAddr ds_vkGetDeviceProcAddr -#define vkDestroyInstance ds_vkDestroyInstance -#define vkEnumeratePhysicalDevices ds_vkEnumeratePhysicalDevices -#define vkGetPhysicalDeviceFeatures ds_vkGetPhysicalDeviceFeatures -#define vkGetPhysicalDeviceFormatProperties ds_vkGetPhysicalDeviceFormatProperties -#define vkGetPhysicalDeviceImageFormatProperties ds_vkGetPhysicalDeviceImageFormatProperties -#define vkGetPhysicalDeviceProperties ds_vkGetPhysicalDeviceProperties -#define vkGetPhysicalDeviceQueueFamilyProperties ds_vkGetPhysicalDeviceQueueFamilyProperties -#define vkGetPhysicalDeviceMemoryProperties ds_vkGetPhysicalDeviceMemoryProperties -#define vkCreateDevice ds_vkCreateDevice -#define vkEnumerateDeviceExtensionProperties ds_vkEnumerateDeviceExtensionProperties -#define vkEnumerateDeviceLayerProperties ds_vkEnumerateDeviceLayerProperties -#define vkGetPhysicalDeviceSparseImageFormatProperties ds_vkGetPhysicalDeviceSparseImageFormatProperties -#define vkDestroySurfaceKHR ds_vkDestroySurfaceKHR -#define vkGetPhysicalDeviceSurfaceSupportKHR ds_vkGetPhysicalDeviceSurfaceSupportKHR -#define vkGetPhysicalDeviceSurfaceCapabilitiesKHR ds_vkGetPhysicalDeviceSurfaceCapabilitiesKHR -#define vkGetPhysicalDeviceSurfaceFormatsKHR ds_vkGetPhysicalDeviceSurfaceFormatsKHR -#define vkGetPhysicalDeviceSurfacePresentModesKHR ds_vkGetPhysicalDeviceSurfacePresentModesKHR -#define vkCreateWin32SurfaceKHR ds_vkCreateWin32SurfaceKHR -#define vkGetPhysicalDeviceWin32PresentationSupportKHR ds_vkGetPhysicalDeviceWin32PresentationSupportKHR -#define vkCreateXlibSurfaceKHR ds_vkCreateXlibSurfaceKHR -#define vkGetPhysicalDeviceXlibPresentationSupportKHR ds_vkGetPhysicalDeviceXlibPresentationSupportKHR -#define vkCreateWaylandSurfaceKHR ds_vkCreateWaylandSurfaceKHR -#define vkCreateAndroidSurfaceKHR ds_vkCreateAndroidSurfaceKHR -#define vkCreateMacOSSurfaceMVK ds_vkCreateMacOSSurfaceMVK -#define vkCreateMetalSurfaceEXT ds_vkCreateMetalSurfaceEXT - -// VK_EXT_debug_utils -#define vkCmdBeginDebugUtilsLabelEXT ds_vkCmdBeginDebugUtilsLabelEXT -#define vkCmdEndDebugUtilsLabelEXT ds_vkCmdEndDebugUtilsLabelEXT -#define vkCmdInsertDebugUtilsLabelEXT ds_vkCmdInsertDebugUtilsLabelEXT -#define vkCreateDebugUtilsMessengerEXT ds_vkCreateDebugUtilsMessengerEXT -#define vkDestroyDebugUtilsMessengerEXT ds_vkDestroyDebugUtilsMessengerEXT -#define vkQueueBeginDebugUtilsLabelEXT ds_vkQueueBeginDebugUtilsLabelEXT -#define vkQueueEndDebugUtilsLabelEXT ds_vkQueueEndDebugUtilsLabelEXT -#define vkQueueInsertDebugUtilsLabelEXT ds_vkQueueInsertDebugUtilsLabelEXT -#define vkSetDebugUtilsObjectNameEXT ds_vkSetDebugUtilsObjectNameEXT -#define vkSetDebugUtilsObjectTagEXT ds_vkSetDebugUtilsObjectTagEXT -#define vkSubmitDebugUtilsMessageEXT ds_vkSubmitDebugUtilsMessageEXT - -#define vkGetPhysicalDeviceSurfaceCapabilities2KHR ds_vkGetPhysicalDeviceSurfaceCapabilities2KHR -#define vkGetPhysicalDeviceDisplayPropertiesKHR ds_vkGetPhysicalDeviceDisplayPropertiesKHR -#define vkGetPhysicalDeviceDisplayPlanePropertiesKHR ds_vkGetPhysicalDeviceDisplayPlanePropertiesKHR -#define vkGetDisplayPlaneSupportedDisplaysKHR ds_vkGetDisplayPlaneSupportedDisplaysKHR -#define vkGetDisplayModePropertiesKHR ds_vkGetDisplayModePropertiesKHR -#define vkCreateDisplayModeKHR ds_vkCreateDisplayModeKHR -#define vkGetDisplayPlaneCapabilitiesKHR ds_vkGetDisplayPlaneCapabilitiesKHR -#define vkCreateDisplayPlaneSurfaceKHR ds_vkCreateDisplayPlaneSurfaceKHR - -// Vulkan 1.1 functions. -#define vkGetPhysicalDeviceFeatures2 ds_vkGetPhysicalDeviceFeatures2 -#define vkGetPhysicalDeviceProperties2 ds_vkGetPhysicalDeviceProperties2 -#define vkGetPhysicalDeviceMemoryProperties2 ds_vkGetPhysicalDeviceMemoryProperties2 - -#define vkDestroyDevice ds_vkDestroyDevice -#define vkGetDeviceQueue ds_vkGetDeviceQueue -#define vkQueueSubmit ds_vkQueueSubmit -#define vkQueueWaitIdle ds_vkQueueWaitIdle -#define vkDeviceWaitIdle ds_vkDeviceWaitIdle -#define vkAllocateMemory ds_vkAllocateMemory -#define vkFreeMemory ds_vkFreeMemory -#define vkMapMemory ds_vkMapMemory -#define vkUnmapMemory ds_vkUnmapMemory -#define vkFlushMappedMemoryRanges ds_vkFlushMappedMemoryRanges -#define vkInvalidateMappedMemoryRanges ds_vkInvalidateMappedMemoryRanges -#define vkGetDeviceMemoryCommitment ds_vkGetDeviceMemoryCommitment -#define vkBindBufferMemory ds_vkBindBufferMemory -#define vkBindImageMemory ds_vkBindImageMemory -#define vkGetBufferMemoryRequirements ds_vkGetBufferMemoryRequirements -#define vkGetImageMemoryRequirements ds_vkGetImageMemoryRequirements -#define vkGetImageSparseMemoryRequirements ds_vkGetImageSparseMemoryRequirements -#define vkQueueBindSparse ds_vkQueueBindSparse -#define vkCreateFence ds_vkCreateFence -#define vkDestroyFence ds_vkDestroyFence -#define vkResetFences ds_vkResetFences -#define vkGetFenceStatus ds_vkGetFenceStatus -#define vkWaitForFences ds_vkWaitForFences -#define vkCreateSemaphore ds_vkCreateSemaphore -#define vkDestroySemaphore ds_vkDestroySemaphore -#define vkCreateEvent ds_vkCreateEvent -#define vkDestroyEvent ds_vkDestroyEvent -#define vkGetEventStatus ds_vkGetEventStatus -#define vkSetEvent ds_vkSetEvent -#define vkResetEvent ds_vkResetEvent -#define vkCreateQueryPool ds_vkCreateQueryPool -#define vkDestroyQueryPool ds_vkDestroyQueryPool -#define vkGetQueryPoolResults ds_vkGetQueryPoolResults -#define vkCreateBuffer ds_vkCreateBuffer -#define vkDestroyBuffer ds_vkDestroyBuffer -#define vkCreateBufferView ds_vkCreateBufferView -#define vkDestroyBufferView ds_vkDestroyBufferView -#define vkCreateImage ds_vkCreateImage -#define vkDestroyImage ds_vkDestroyImage -#define vkGetImageSubresourceLayout ds_vkGetImageSubresourceLayout -#define vkCreateImageView ds_vkCreateImageView -#define vkDestroyImageView ds_vkDestroyImageView -#define vkCreateShaderModule ds_vkCreateShaderModule -#define vkDestroyShaderModule ds_vkDestroyShaderModule -#define vkCreatePipelineCache ds_vkCreatePipelineCache -#define vkDestroyPipelineCache ds_vkDestroyPipelineCache -#define vkGetPipelineCacheData ds_vkGetPipelineCacheData -#define vkMergePipelineCaches ds_vkMergePipelineCaches -#define vkCreateGraphicsPipelines ds_vkCreateGraphicsPipelines -#define vkCreateComputePipelines ds_vkCreateComputePipelines -#define vkDestroyPipeline ds_vkDestroyPipeline -#define vkCreatePipelineLayout ds_vkCreatePipelineLayout -#define vkDestroyPipelineLayout ds_vkDestroyPipelineLayout -#define vkCreateSampler ds_vkCreateSampler -#define vkDestroySampler ds_vkDestroySampler -#define vkCreateDescriptorSetLayout ds_vkCreateDescriptorSetLayout -#define vkDestroyDescriptorSetLayout ds_vkDestroyDescriptorSetLayout -#define vkCreateDescriptorPool ds_vkCreateDescriptorPool -#define vkDestroyDescriptorPool ds_vkDestroyDescriptorPool -#define vkResetDescriptorPool ds_vkResetDescriptorPool -#define vkAllocateDescriptorSets ds_vkAllocateDescriptorSets -#define vkFreeDescriptorSets ds_vkFreeDescriptorSets -#define vkUpdateDescriptorSets ds_vkUpdateDescriptorSets -#define vkCreateFramebuffer ds_vkCreateFramebuffer -#define vkDestroyFramebuffer ds_vkDestroyFramebuffer -#define vkCreateRenderPass ds_vkCreateRenderPass -#define vkDestroyRenderPass ds_vkDestroyRenderPass -#define vkGetRenderAreaGranularity ds_vkGetRenderAreaGranularity -#define vkCreateCommandPool ds_vkCreateCommandPool -#define vkDestroyCommandPool ds_vkDestroyCommandPool -#define vkResetCommandPool ds_vkResetCommandPool -#define vkAllocateCommandBuffers ds_vkAllocateCommandBuffers -#define vkFreeCommandBuffers ds_vkFreeCommandBuffers -#define vkBeginCommandBuffer ds_vkBeginCommandBuffer -#define vkEndCommandBuffer ds_vkEndCommandBuffer -#define vkResetCommandBuffer ds_vkResetCommandBuffer -#define vkCmdBindPipeline ds_vkCmdBindPipeline -#define vkCmdSetViewport ds_vkCmdSetViewport -#define vkCmdSetScissor ds_vkCmdSetScissor -#define vkCmdSetLineWidth ds_vkCmdSetLineWidth -#define vkCmdSetDepthBias ds_vkCmdSetDepthBias -#define vkCmdSetBlendConstants ds_vkCmdSetBlendConstants -#define vkCmdSetDepthBounds ds_vkCmdSetDepthBounds -#define vkCmdSetStencilCompareMask ds_vkCmdSetStencilCompareMask -#define vkCmdSetStencilWriteMask ds_vkCmdSetStencilWriteMask -#define vkCmdSetStencilReference ds_vkCmdSetStencilReference -#define vkCmdBindDescriptorSets ds_vkCmdBindDescriptorSets -#define vkCmdBindIndexBuffer ds_vkCmdBindIndexBuffer -#define vkCmdBindVertexBuffers ds_vkCmdBindVertexBuffers -#define vkCmdDraw ds_vkCmdDraw -#define vkCmdDrawIndexed ds_vkCmdDrawIndexed -#define vkCmdDrawIndirect ds_vkCmdDrawIndirect -#define vkCmdDrawIndexedIndirect ds_vkCmdDrawIndexedIndirect -#define vkCmdDispatch ds_vkCmdDispatch -#define vkCmdDispatchIndirect ds_vkCmdDispatchIndirect -#define vkCmdCopyBuffer ds_vkCmdCopyBuffer -#define vkCmdCopyImage ds_vkCmdCopyImage -#define vkCmdBlitImage ds_vkCmdBlitImage -#define vkCmdCopyBufferToImage ds_vkCmdCopyBufferToImage -#define vkCmdCopyImageToBuffer ds_vkCmdCopyImageToBuffer -#define vkCmdUpdateBuffer ds_vkCmdUpdateBuffer -#define vkCmdFillBuffer ds_vkCmdFillBuffer -#define vkCmdClearColorImage ds_vkCmdClearColorImage -#define vkCmdClearDepthStencilImage ds_vkCmdClearDepthStencilImage -#define vkCmdClearAttachments ds_vkCmdClearAttachments -#define vkCmdResolveImage ds_vkCmdResolveImage -#define vkCmdSetEvent ds_vkCmdSetEvent -#define vkCmdResetEvent ds_vkCmdResetEvent -#define vkCmdWaitEvents ds_vkCmdWaitEvents -#define vkCmdPipelineBarrier ds_vkCmdPipelineBarrier -#define vkCmdBeginQuery ds_vkCmdBeginQuery -#define vkCmdEndQuery ds_vkCmdEndQuery -#define vkCmdResetQueryPool ds_vkCmdResetQueryPool -#define vkCmdWriteTimestamp ds_vkCmdWriteTimestamp -#define vkCmdCopyQueryPoolResults ds_vkCmdCopyQueryPoolResults -#define vkCmdPushConstants ds_vkCmdPushConstants -#define vkCmdBeginRenderPass ds_vkCmdBeginRenderPass -#define vkCmdNextSubpass ds_vkCmdNextSubpass -#define vkCmdEndRenderPass ds_vkCmdEndRenderPass -#define vkCmdExecuteCommands ds_vkCmdExecuteCommands -#define vkCreateSwapchainKHR ds_vkCreateSwapchainKHR -#define vkDestroySwapchainKHR ds_vkDestroySwapchainKHR -#define vkGetSwapchainImagesKHR ds_vkGetSwapchainImagesKHR -#define vkAcquireNextImageKHR ds_vkAcquireNextImageKHR -#define vkQueuePresentKHR ds_vkQueuePresentKHR - -// Vulkan 1.1 functions. -#define vkGetBufferMemoryRequirements2 ds_vkGetBufferMemoryRequirements2 -#define vkGetImageMemoryRequirements2 ds_vkGetImageMemoryRequirements2 -#define vkBindBufferMemory2 ds_vkBindBufferMemory2 -#define vkBindImageMemory2 ds_vkBindImageMemory2 - -#ifdef SUPPORTS_VULKAN_EXCLUSIVE_FULLSCREEN -#define vkAcquireFullScreenExclusiveModeEXT ds_vkAcquireFullScreenExclusiveModeEXT -#define vkReleaseFullScreenExclusiveModeEXT ds_vkReleaseFullScreenExclusiveModeEXT -#endif \ No newline at end of file diff --git a/src/core/gpu/vulkan/loader.cpp b/src/core/gpu/vulkan/loader.cpp index 10718f202..23cb46de2 100644 --- a/src/core/gpu/vulkan/loader.cpp +++ b/src/core/gpu/vulkan/loader.cpp @@ -22,9 +22,9 @@ extern "C" { -#define VULKAN_MODULE_ENTRY_POINT(name, required) PFN_##name ds_##name; -#define VULKAN_INSTANCE_ENTRY_POINT(name, required) PFN_##name ds_##name; -#define VULKAN_DEVICE_ENTRY_POINT(name, required) PFN_##name ds_##name; +#define VULKAN_MODULE_ENTRY_POINT(name, required) PFN_##name name; +#define VULKAN_INSTANCE_ENTRY_POINT(name, required) PFN_##name name; +#define VULKAN_DEVICE_ENTRY_POINT(name, required) PFN_##name name; #include "entry_points.inl" #undef VULKAN_DEVICE_ENTRY_POINT #undef VULKAN_INSTANCE_ENTRY_POINT @@ -34,9 +34,9 @@ extern "C" { namespace Vulkan { void ResetVulkanLibraryFunctionPointers() { -#define VULKAN_MODULE_ENTRY_POINT(name, required) ds_##name = nullptr; -#define VULKAN_INSTANCE_ENTRY_POINT(name, required) ds_##name = nullptr; -#define VULKAN_DEVICE_ENTRY_POINT(name, required) ds_##name = nullptr; +#define VULKAN_MODULE_ENTRY_POINT(name, required) name = nullptr; +#define VULKAN_INSTANCE_ENTRY_POINT(name, required) name = nullptr; +#define VULKAN_DEVICE_ENTRY_POINT(name, required) name = nullptr; #include "entry_points.inl" #undef VULKAN_DEVICE_ENTRY_POINT #undef VULKAN_INSTANCE_ENTRY_POINT diff --git a/src/core/gpu/vulkan_gpu_device.cpp b/src/core/gpu/vulkan_gpu_device.cpp index 55da366dd..bd8b3d5b2 100644 --- a/src/core/gpu/vulkan_gpu_device.cpp +++ b/src/core/gpu/vulkan_gpu_device.cpp @@ -680,102 +680,6 @@ bool VulkanGPUDevice::Render(bool skip_present) return true; } -bool VulkanGPUDevice::RenderScreenshot(u32 width, u32 height, const Common::Rectangle& draw_rect, - std::vector* out_pixels, u32* out_stride, GPUTexture::Format* out_format) -{ - // in theory we could do this without a swap chain, but postprocessing assumes it for now... - if (!m_swap_chain) - return false; - - const VkFormat format = m_swap_chain ? m_swap_chain->GetTextureFormat() : VK_FORMAT_R8G8B8A8_UNORM; - switch (format) - { - case VK_FORMAT_R8G8B8A8_UNORM: - case VK_FORMAT_R8G8B8A8_SRGB: - *out_format = GPUTexture::Format::RGBA8; - *out_stride = sizeof(u32) * width; - out_pixels->resize(width * height); - break; - - case VK_FORMAT_B8G8R8A8_UNORM: - case VK_FORMAT_B8G8R8A8_SRGB: - *out_format = GPUTexture::Format::BGRA8; - *out_stride = sizeof(u32) * width; - out_pixels->resize(width * height); - break; - - case VK_FORMAT_A1R5G5B5_UNORM_PACK16: - *out_format = GPUTexture::Format::RGBA5551; - *out_stride = sizeof(u16) * width; - out_pixels->resize(((width * height) + 1) / 2); - break; - - case VK_FORMAT_R5G6B5_UNORM_PACK16: - *out_format = GPUTexture::Format::RGB565; - *out_stride = sizeof(u16) * width; - out_pixels->resize(((width * height) + 1) / 2); - break; - - default: - Log_ErrorPrintf("Unhandled swap chain pixel format %u", static_cast(format)); - break; - } - - // if we don't have a texture (display off), then just write out nothing. - if (!HasDisplayTexture()) - { - std::fill(out_pixels->begin(), out_pixels->end(), static_cast(0)); - return true; - } - - Vulkan::Texture tex; - if (!tex.Create(width, height, 1, 1, format, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_VIEW_TYPE_2D, VK_IMAGE_TILING_OPTIMAL, - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT)) - { - return false; - } - - const VkRenderPass rp = - m_swap_chain ? - m_swap_chain->GetClearRenderPass() : - g_vulkan_context->GetRenderPass(format, VK_FORMAT_UNDEFINED, VK_SAMPLE_COUNT_1_BIT, VK_ATTACHMENT_LOAD_OP_CLEAR); - if (!rp) - return false; - - const VkFramebuffer fb = tex.CreateFramebuffer(rp); - if (!fb) - return false; - const Vulkan::Util::DebugScope debugScope(g_vulkan_context->GetCurrentCommandBuffer(), - "VulkanHostDisplay::RenderScreenshot: %ux%u", width, height); - tex.TransitionToLayout(g_vulkan_context->GetCurrentCommandBuffer(), VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); - - if (!m_post_processing_chain.IsEmpty()) - { - ApplyPostProcessingChain(fb, draw_rect.left, draw_rect.top, draw_rect.GetWidth(), draw_rect.GetHeight(), - static_cast(m_display_texture), m_display_texture_view_x, - m_display_texture_view_y, m_display_texture_view_width, m_display_texture_view_height, - width, height); - } - else - { - BeginSwapChainRenderPass(fb, width, height); - RenderDisplay(draw_rect.left, draw_rect.top, draw_rect.GetWidth(), draw_rect.GetHeight(), - static_cast(m_display_texture), m_display_texture_view_x, m_display_texture_view_y, - m_display_texture_view_width, m_display_texture_view_height, IsUsingLinearFiltering()); - } - - vkCmdEndRenderPass(g_vulkan_context->GetCurrentCommandBuffer()); - Vulkan::Util::EndDebugScope(g_vulkan_context->GetCurrentCommandBuffer()); - tex.TransitionToLayout(g_vulkan_context->GetCurrentCommandBuffer(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); - DownloadTexture(&tex, 0, 0, width, height, out_pixels->data(), *out_stride); - - // destroying these immediately should be safe since nothing's going to access them, and it's not part of the command - // stream - vkDestroyFramebuffer(g_vulkan_context->GetDevice(), fb, nullptr); - tex.Destroy(false); - return true; -} - void VulkanGPUDevice::BeginSwapChainRenderPass(VkFramebuffer framebuffer, u32 width, u32 height) { const VkClearValue clear_value = {{{0.0f, 0.0f, 0.0f, 1.0f}}}; diff --git a/src/core/gpu/vulkan_gpu_device.h b/src/core/gpu/vulkan_gpu_device.h index d7ff9c492..280cebaa2 100644 --- a/src/core/gpu/vulkan_gpu_device.h +++ b/src/core/gpu/vulkan_gpu_device.h @@ -56,8 +56,6 @@ public: void SetVSync(bool enabled) override; bool Render(bool skip_present) override; - bool RenderScreenshot(u32 width, u32 height, const Common::Rectangle& draw_rect, std::vector* out_pixels, - u32* out_stride, GPUTexture::Format* out_format) override; bool SetGPUTimingEnabled(bool enabled) override; float GetAndResetAccumulatedGPUTime() override; diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index 52f4f881d..7387611a3 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -5,7 +5,10 @@ #include "common/align.h" #include "common/assert.h" #include "common/log.h" +#include "common/scoped_guard.h" +#include "common/string_util.h" #include "cpu_core.h" +#include "gpu_hw_shadergen.h" #include "gpu_sw_backend.h" #include "host.h" #include "imgui.h" @@ -180,6 +183,38 @@ bool GPU_HW::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_di return true; } +void GPU_HW::UpdateSettings() +{ + // TODO: Merge UpdateHWSettings() into here. + bool framebuffer_changed, shaders_changed; + UpdateHWSettings(&framebuffer_changed, &shaders_changed); + + if (framebuffer_changed) + { + RestoreGraphicsAPIState(); + ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); + ResetGraphicsAPIState(); + g_host_display->ClearDisplayTexture(); + CreateFramebuffer(); + } + + if (shaders_changed) + { + DestroyPipelines(); + if (!CompilePipelines()) + Panic("Failed to recompile pipelnes."); + } + + if (framebuffer_changed) + { + RestoreGraphicsAPIState(); + UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_ptr, false, false); + UpdateDepthBufferFromMaskBit(); + UpdateDisplay(); + ResetGraphicsAPIState(); + } +} + void GPU_HW::UpdateHWSettings(bool* framebuffer_changed, bool* shaders_changed) { const u32 resolution_scale = CalculateResolutionScale(); @@ -360,26 +395,495 @@ bool GPU_HW::CreateFramebuffer() ((m_downsample_mode == GPUDownsampleMode::Adaptive) ? VRAM_WIDTH : GPU_MAX_DISPLAY_WIDTH) * m_resolution_scale, GPU_MAX_DISPLAY_HEIGHT * m_resolution_scale, 1, 1, 1, GPUTexture::Type::RenderTarget, texture_format)) || - !(m_vram_encoding_texture = g_host_display->CreateTexture(VRAM_WIDTH / 2, VRAM_HEIGHT, 1, 1, 1, + !(m_vram_readback_texture = g_host_display->CreateTexture(VRAM_WIDTH / 2, VRAM_HEIGHT, 1, 1, 1, GPUTexture::Type::RenderTarget, texture_format))) { return false; } + // vram framebuffer has both colour and depth + if (!(m_vram_framebuffer = + g_host_display->CreateFramebuffer(m_vram_texture.get(), 0, 0, m_vram_depth_texture.get(), 0, 0)) || + !(m_vram_update_depth_framebuffer = + g_host_display->CreateFramebuffer(nullptr, 0, 0, m_vram_depth_texture.get(), 0, 0)) || + !(m_vram_readback_framebuffer = + g_host_display->CreateFramebuffer(m_vram_readback_texture.get(), 0, 0, nullptr, 0, 0)) || + !(m_display_framebuffer = g_host_display->CreateFramebuffer(m_display_texture.get(), 0, 0, nullptr, 0, 0))) + { + return false; + } + + GL_OBJECT_NAME(m_vram_framebuffer, "VRAM Framebuffer"); + GL_OBJECT_NAME(m_vram_update_depth_framebuffer, "VRAM Update Depth Framebuffer"); + GL_OBJECT_NAME(m_vram_readback_framebuffer, "VRAM Readback Framebuffer"); + GL_OBJECT_NAME(m_display_framebuffer, "Display Framebuffer"); + Log_InfoPrintf("Created HW framebuffer of %ux%u", texture_width, texture_height); return true; } void GPU_HW::DestroyFramebuffer() { + m_display_framebuffer.reset(); + m_vram_readback_framebuffer.reset(); + m_vram_update_depth_framebuffer.reset(); + m_vram_framebuffer.reset(); m_vram_read_texture.reset(); m_vram_depth_view.reset(); m_vram_depth_texture.reset(); m_vram_texture.reset(); - m_vram_encoding_texture.reset(); + m_vram_readback_texture.reset(); m_display_texture.reset(); } +bool GPU_HW::CompilePipelines() +{ + GPU_HW_ShaderGen shadergen(g_host_display->GetRenderAPI(), m_resolution_scale, m_multisamples, m_per_sample_shading, + m_true_color, m_scaled_dithering, m_texture_filtering, m_using_uv_limits, + m_pgxp_depth_buffer, m_disable_color_perspective, m_supports_dual_source_blend); + + ShaderCompileProgressTracker progress("Compiling Pipelines", 2 + (4 * 9 * 2 * 2) + (3 * 4 * 5 * 9 * 2 * 2) + 1 + 2 + + (2 * 2) + 2 + 1 + 1 + (2 * 3) + 1); + + // vertex shaders - [textured] + // fragment shaders - [render_mode][texture_mode][dithering][interlacing] + static constexpr auto destroy_shader = [](std::unique_ptr& s) { s.reset(); }; + DimensionalArray, 2> batch_vertex_shaders{}; + DimensionalArray, 2, 2, 9, 4> batch_fragment_shaders{}; + ScopedGuard batch_shader_guard([&batch_vertex_shaders, &batch_fragment_shaders]() { + batch_vertex_shaders.enumerate(destroy_shader); + batch_fragment_shaders.enumerate(destroy_shader); + }); + + for (u8 textured = 0; textured < 2; textured++) + { + const std::string vs = shadergen.GenerateBatchVertexShader(ConvertToBoolUnchecked(textured)); + if (!(batch_vertex_shaders[textured] = g_host_display->CreateShader(GPUShaderStage::Vertex, vs))) + return false; + + progress.Increment(); + } + + for (u8 render_mode = 0; render_mode < 4; render_mode++) + { + for (u8 texture_mode = 0; texture_mode < 9; texture_mode++) + { + for (u8 dithering = 0; dithering < 2; dithering++) + { + for (u8 interlacing = 0; interlacing < 2; interlacing++) + { + const std::string fs = shadergen.GenerateBatchFragmentShader( + static_cast(render_mode), static_cast(texture_mode), + ConvertToBoolUnchecked(dithering), ConvertToBoolUnchecked(interlacing)); + + if (!(batch_fragment_shaders[render_mode][texture_mode][dithering][interlacing] = + g_host_display->CreateShader(GPUShaderStage::Fragment, fs))) + { + return false; + } + + progress.Increment(); + } + } + } + } + + static constexpr GPUPipeline::VertexAttribute batch_attributes[] = { + GPUPipeline::VertexAttribute::Make(0, GPUPipeline::VertexAttribute::Type::Float, 2, offsetof(BatchVertex, x)), + GPUPipeline::VertexAttribute::Make(1, GPUPipeline::VertexAttribute::Type::UNorm8, 4, offsetof(BatchVertex, color)), + }; + static constexpr GPUPipeline::VertexAttribute textured_batch_attributes[] = { + GPUPipeline::VertexAttribute::Make(0, GPUPipeline::VertexAttribute::Type::Float, 2, offsetof(BatchVertex, x)), + GPUPipeline::VertexAttribute::Make(1, GPUPipeline::VertexAttribute::Type::UNorm8, 4, offsetof(BatchVertex, color)), + GPUPipeline::VertexAttribute::Make(2, GPUPipeline::VertexAttribute::Type::UInt32, 1, offsetof(BatchVertex, u)), + GPUPipeline::VertexAttribute::Make(3, GPUPipeline::VertexAttribute::Type::UInt32, 1, + offsetof(BatchVertex, texpage)), + }; + static constexpr GPUPipeline::VertexAttribute textured_limits_batch_attributes[] = { + GPUPipeline::VertexAttribute::Make(0, GPUPipeline::VertexAttribute::Type::Float, 2, offsetof(BatchVertex, x)), + GPUPipeline::VertexAttribute::Make(1, GPUPipeline::VertexAttribute::Type::UNorm8, 4, offsetof(BatchVertex, color)), + GPUPipeline::VertexAttribute::Make(2, GPUPipeline::VertexAttribute::Type::UInt32, 1, offsetof(BatchVertex, u)), + GPUPipeline::VertexAttribute::Make(3, GPUPipeline::VertexAttribute::Type::UInt32, 1, + offsetof(BatchVertex, texpage)), + GPUPipeline::VertexAttribute::Make(4, GPUPipeline::VertexAttribute::Type::UNorm8, 4, + offsetof(BatchVertex, uv_limits)), + }; + + GPUPipeline::GraphicsConfig plconfig = {}; + plconfig.layout = GPUPipeline::Layout::HWBatch; + plconfig.input_layout.vertex_stride = sizeof(BatchVertex); + plconfig.rasterization = GPUPipeline::RasterizationState::GetNoCullState(); + plconfig.primitive = GPUPipeline::Primitive::Triangles; + plconfig.color_format = GPUTexture::Format::RGBA8; + plconfig.depth_format = GPUTexture::Format::D16; + plconfig.samples = m_multisamples; + plconfig.per_sample_shading = m_per_sample_shading; + + // [depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing] + for (u8 depth_test = 0; depth_test < 3; depth_test++) + { + for (u8 render_mode = 0; render_mode < 4; render_mode++) + { + for (u8 transparency_mode = 0; transparency_mode < 5; transparency_mode++) + { + for (u8 texture_mode = 0; texture_mode < 9; texture_mode++) + { + for (u8 dithering = 0; dithering < 2; dithering++) + { + for (u8 interlacing = 0; interlacing < 2; interlacing++) + { + static constexpr std::array depth_test_values = { + GPUPipeline::DepthFunc::Always, GPUPipeline::DepthFunc::GreaterEqual, + GPUPipeline::DepthFunc::LessEqual}; + const bool textured = (static_cast(texture_mode) != GPUTextureMode::Disabled); + + plconfig.input_layout.vertex_attributes = + textured ? + (m_using_uv_limits ? gsl::span(textured_limits_batch_attributes) : + gsl::span(textured_batch_attributes)) : + gsl::span(batch_attributes); + + plconfig.vertex_shader = batch_vertex_shaders[BoolToUInt8(textured)].get(); + plconfig.pixel_shader = batch_fragment_shaders[render_mode][texture_mode][dithering][interlacing].get(); + + // TODO: Depth write always on??? + plconfig.depth.depth_test = depth_test_values[depth_test]; + plconfig.depth.depth_write = true; + plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState(); + + if ((static_cast(transparency_mode) != GPUTransparencyMode::Disabled && + (static_cast(render_mode) != BatchRenderMode::TransparencyDisabled && + static_cast(render_mode) != BatchRenderMode::OnlyOpaque)) || + m_texture_filtering != GPUTextureFilter::Nearest) + { + plconfig.blend.enable = true; + plconfig.blend.src_alpha_blend = GPUPipeline::BlendFunc::One; + plconfig.blend.dst_alpha_blend = GPUPipeline::BlendFunc::Zero; + plconfig.blend.alpha_blend_op = GPUPipeline::BlendOp::Add; + + if (m_supports_dual_source_blend) + { + plconfig.blend.src_blend = GPUPipeline::BlendFunc::One; + plconfig.blend.dst_blend = GPUPipeline::BlendFunc::SrcAlpha1; + plconfig.blend.blend_op = + (static_cast(transparency_mode) == + GPUTransparencyMode::BackgroundMinusForeground && + static_cast(render_mode) != BatchRenderMode::TransparencyDisabled && + static_cast(render_mode) != BatchRenderMode::OnlyOpaque) ? + GPUPipeline::BlendOp::ReverseSubtract : + GPUPipeline::BlendOp::Add; + } + else + { + const u32 factor = (static_cast(transparency_mode) == + GPUTransparencyMode::HalfBackgroundPlusHalfForeground) ? + 0xFF808080u : + 0xFFFFFFFFu; + plconfig.blend.src_blend = GPUPipeline::BlendFunc::One; + plconfig.blend.dst_blend = GPUPipeline::BlendFunc::ConstantColor; + plconfig.blend.blend_op = + (static_cast(transparency_mode) == + GPUTransparencyMode::BackgroundMinusForeground && + static_cast(render_mode) != BatchRenderMode::TransparencyDisabled && + static_cast(render_mode) != BatchRenderMode::OnlyOpaque) ? + GPUPipeline::BlendOp::ReverseSubtract : + GPUPipeline::BlendOp::Add; + plconfig.blend.constant = factor; + } + } + + if (!(m_batch_pipelines[depth_test][render_mode][texture_mode][transparency_mode][dithering] + [interlacing] = g_host_display->CreatePipeline(plconfig))) + { + return false; + } + + progress.Increment(); + } + } + } + } + } + } + + batch_shader_guard.Run(); + + std::unique_ptr fullscreen_quad_vertex_shader = + g_host_display->CreateShader(GPUShaderStage::Vertex, shadergen.GenerateScreenQuadVertexShader()); + std::unique_ptr uv_quad_vertex_shader = + g_host_display->CreateShader(GPUShaderStage::Vertex, shadergen.GenerateUVQuadVertexShader()); + if (!fullscreen_quad_vertex_shader || !uv_quad_vertex_shader) + return false; + + progress.Increment(); + + // common state + plconfig.layout = GPUPipeline::Layout::SingleTexture; + plconfig.per_sample_shading = false; + plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState(); + plconfig.vertex_shader = fullscreen_quad_vertex_shader.get(); + + // VRAM fill + for (u8 wrapped = 0; wrapped < 2; wrapped++) + { + for (u8 interlaced = 0; interlaced < 2; interlaced++) + { + std::unique_ptr fs = g_host_display->CreateShader( + GPUShaderStage::Fragment, + shadergen.GenerateVRAMFillFragmentShader(ConvertToBoolUnchecked(wrapped), ConvertToBoolUnchecked(interlaced))); + if (!fs) + return false; + + plconfig.pixel_shader = fs.get(); + plconfig.depth = GPUPipeline::DepthState::GetAlwaysWriteState(); + + if (!(m_vram_fill_pipelines[wrapped][interlaced] = g_host_display->CreatePipeline(plconfig))) + return false; + + progress.Increment(); + } + } + + // VRAM copy + { + std::unique_ptr fs = + g_host_display->CreateShader(GPUShaderStage::Fragment, shadergen.GenerateVRAMCopyFragmentShader()); + if (!fs) + return false; + + plconfig.pixel_shader = fs.get(); + for (u8 depth_test = 0; depth_test < 2; depth_test++) + { + plconfig.depth.depth_write = true; + plconfig.depth.depth_test = + (depth_test != 0) ? GPUPipeline::DepthFunc::GreaterEqual : GPUPipeline::DepthFunc::Always; + + if (!(m_vram_copy_pipelines[depth_test] = g_host_display->CreatePipeline(plconfig))) + return false; + + progress.Increment(); + } + } + + // VRAM write + // TODO: SSBO path here... + { + std::unique_ptr fs = g_host_display->CreateShader( + GPUShaderStage::Fragment, shadergen.GenerateVRAMWriteFragmentShader(false /*m_use_ssbos_for_vram_writes*/)); + if (!fs) + return false; + + plconfig.pixel_shader = fs.get(); + for (u8 depth_test = 0; depth_test < 2; depth_test++) + { + plconfig.depth.depth_write = true; + plconfig.depth.depth_test = + (depth_test != 0) ? GPUPipeline::DepthFunc::GreaterEqual : GPUPipeline::DepthFunc::Always; + + if (!(m_vram_write_pipelines[depth_test] = g_host_display->CreatePipeline(plconfig))) + return false; + + progress.Increment(); + } + } + +#if 0 + // VRAM update depth + // TODO + { + std::unique_ptr fs = g_host_display->CreateShader( + GPUShader::Stage::Pixel, shadergen.GenerateVRAMUpdateDepthFragmentShader()); + if (!fs) + return false; + + gpbuilder.SetRenderPass(m_vram_update_depth_render_pass, 0); + gpbuilder.SetPipelineLayout(m_single_sampler_pipeline_layout); + gpbuilder.SetFragmentShader(fs); + gpbuilder.SetDepthState(true, true, VK_COMPARE_OP_ALWAYS); + gpbuilder.SetBlendAttachment(0, false, VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD, + VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD, 0); + // COLOR MASK ZERO + + m_vram_update_depth_pipeline = gpbuilder.Create(device, pipeline_cache, false); + vkDestroyShaderModule(device, fs, nullptr); + if (m_vram_update_depth_pipeline == VK_NULL_HANDLE) + return false; + Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_vram_update_depth_pipeline, + "VRAM Update Depth Pipeline"); + + progress.Increment(); + } +#endif + + plconfig.depth = GPUPipeline::DepthState::GetNoTestsState(); + plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState(); + + // VRAM read + { + std::unique_ptr fs = + g_host_display->CreateShader(GPUShaderStage::Fragment, shadergen.GenerateVRAMReadFragmentShader()); + if (!fs) + return false; + + plconfig.pixel_shader = fs.get(); + + if (!(m_vram_readback_pipeline = g_host_display->CreatePipeline(plconfig))) + return false; + + GL_OBJECT_NAME(m_vram_readback_pipeline, "VRAM Read Pipeline"); + progress.Increment(); + } + + // Display + { + for (u8 depth_24 = 0; depth_24 < 2; depth_24++) + { + for (u8 interlace_mode = 0; interlace_mode < 3; interlace_mode++) + { + std::unique_ptr fs = g_host_display->CreateShader( + GPUShaderStage::Fragment, + shadergen.GenerateDisplayFragmentShader( + ConvertToBoolUnchecked(depth_24), static_cast(interlace_mode), m_chroma_smoothing)); + if (!fs) + return false; + + plconfig.pixel_shader = fs.get(); + + if (!(m_display_pipelines[depth_24][interlace_mode] = g_host_display->CreatePipeline(plconfig))) + return false; + + progress.Increment(); + } + } + } + +#if 0 + if (m_downsample_mode == GPUDownsampleMode::Adaptive) + { + gpbuilder.Clear(); + gpbuilder.SetRenderPass(m_downsample_render_pass, 0); + gpbuilder.SetPipelineLayout(m_downsample_pipeline_layout); + gpbuilder.SetVertexShader(uv_quad_vertex_shader); + gpbuilder.SetNoCullRasterizationState(); + gpbuilder.SetNoDepthTestState(); + gpbuilder.SetNoBlendingState(); + gpbuilder.SetDynamicViewportAndScissorState(); + + std::unique_ptr fs = g_host_display->CreateShaderFromSource( + GPUShader::Stage::Pixel, shadergen.GenerateAdaptiveDownsampleMipFragmentShader(true)); + if (fs == VK_NULL_HANDLE) + return false; + + gpbuilder.SetFragmentShader(fs); + m_downsample_first_pass_pipeline = gpbuilder.Create(device, pipeline_cache, false); + vkDestroyShaderModule(g_vulkan_context->GetDevice(), fs, nullptr); + if (m_downsample_first_pass_pipeline == VK_NULL_HANDLE) + return false; + Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_downsample_first_pass_pipeline, + "Downsample First Pass Pipeline"); + + fs = g_host_display->CreateShaderFromSource(GPUShader::Stage::Pixel, + shadergen.GenerateAdaptiveDownsampleMipFragmentShader(false)); + if (fs == VK_NULL_HANDLE) + return false; + + gpbuilder.SetFragmentShader(fs); + m_downsample_mid_pass_pipeline = gpbuilder.Create(device, pipeline_cache, false); + vkDestroyShaderModule(g_vulkan_context->GetDevice(), fs, nullptr); + if (m_downsample_mid_pass_pipeline == VK_NULL_HANDLE) + return false; + Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_downsample_mid_pass_pipeline, + "Downsample Mid Pass Pipeline"); + + fs = g_host_display->CreateShaderFromSource(GPUShader::Stage::Pixel, + shadergen.GenerateAdaptiveDownsampleBlurFragmentShader()); + if (fs == VK_NULL_HANDLE) + return false; + + gpbuilder.SetFragmentShader(fs); + gpbuilder.SetRenderPass(m_downsample_weight_render_pass, 0); + m_downsample_blur_pass_pipeline = gpbuilder.Create(device, pipeline_cache, false); + vkDestroyShaderModule(g_vulkan_context->GetDevice(), fs, nullptr); + if (m_downsample_blur_pass_pipeline == VK_NULL_HANDLE) + return false; + Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_downsample_blur_pass_pipeline, + "Downsample Blur Pass Pipeline"); + + fs = g_host_display->CreateShaderFromSource(GPUShader::Stage::Pixel, + shadergen.GenerateAdaptiveDownsampleCompositeFragmentShader()); + if (fs == VK_NULL_HANDLE) + return false; + + gpbuilder.SetFragmentShader(fs); + gpbuilder.SetPipelineLayout(m_downsample_composite_pipeline_layout); + gpbuilder.SetRenderPass(m_display_load_render_pass, 0); + m_downsample_composite_pass_pipeline = gpbuilder.Create(device, pipeline_cache, false); + vkDestroyShaderModule(g_vulkan_context->GetDevice(), fs, nullptr); + if (m_downsample_composite_pass_pipeline == VK_NULL_HANDLE) + return false; + + Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_downsample_composite_pass_pipeline, + "Downsample Composite Pass Pipeline"); + } + else if (m_downsample_mode == GPUDownsampleMode::Box) + { + gpbuilder.Clear(); + gpbuilder.SetRenderPass(m_downsample_render_pass, 0); + gpbuilder.SetPipelineLayout(m_single_sampler_pipeline_layout); + gpbuilder.SetVertexShader(fullscreen_quad_vertex_shader); + gpbuilder.SetNoCullRasterizationState(); + gpbuilder.SetNoDepthTestState(); + gpbuilder.SetNoBlendingState(); + gpbuilder.SetDynamicViewportAndScissorState(); + + std::unique_ptr fs = g_host_display->CreateShaderFromSource( + GPUShader::Stage::Pixel, shadergen.GenerateBoxSampleDownsampleFragmentShader()); + if (fs == VK_NULL_HANDLE) + return false; + + gpbuilder.SetFragmentShader(fs); + m_downsample_first_pass_pipeline = gpbuilder.Create(device, pipeline_cache, false); + vkDestroyShaderModule(g_vulkan_context->GetDevice(), fs, nullptr); + if (m_downsample_first_pass_pipeline == VK_NULL_HANDLE) + return false; + + Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_downsample_first_pass_pipeline, + "Downsample First Pass Pipeline"); + } +#endif + + progress.Increment(); + +#undef UPDATE_PROGRESS + + return true; +} + +void GPU_HW::DestroyPipelines() +{ + static constexpr auto destroy = [](std::unique_ptr& p) { p.reset(); }; + + m_batch_pipelines.enumerate(destroy); + + m_vram_fill_pipelines.enumerate(destroy); + + for (std::unique_ptr& p : m_vram_write_pipelines) + destroy(p); + + for (std::unique_ptr& p : m_vram_copy_pipelines) + destroy(p); + + destroy(m_vram_readback_pipeline); + destroy(m_vram_update_depth_pipeline); + + destroy(m_downsample_first_pass_pipeline); + destroy(m_downsample_mid_pass_pipeline); + destroy(m_downsample_blur_pass_pipeline); + destroy(m_downsample_composite_pass_pipeline); + + m_display_pipelines.enumerate(destroy); +} + void GPU_HW::UpdateVRAMReadTexture() { const auto scaled_rect = m_vram_dirty_rect * m_resolution_scale; @@ -401,6 +905,148 @@ void GPU_HW::UpdateVRAMReadTexture() ClearVRAMDirtyRectangle(); } +void GPU_HW::MapBatchVertexPointer(u32 required_vertices) +{ + DebugAssert(!m_batch_start_vertex_ptr); + + void* map; + u32 space; + g_host_display->MapVertexBuffer(sizeof(BatchVertex), required_vertices, &map, &space, &m_batch_base_vertex); + + m_batch_start_vertex_ptr = static_cast(map); + m_batch_current_vertex_ptr = m_batch_start_vertex_ptr; + m_batch_end_vertex_ptr = m_batch_start_vertex_ptr + space; +} + +void GPU_HW::UnmapBatchVertexPointer(u32 used_vertices) +{ + DebugAssert(m_batch_start_vertex_ptr); + g_host_display->UnmapVertexBuffer(sizeof(BatchVertex), used_vertices); + m_batch_start_vertex_ptr = nullptr; + m_batch_end_vertex_ptr = nullptr; + m_batch_current_vertex_ptr = nullptr; +} + +void GPU_HW::DrawBatchVertices(BatchRenderMode render_mode, u32 base_vertex, u32 num_vertices) +{ + // [depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing] + const u8 depth_test = m_batch.use_depth_buffer ? static_cast(2) : BoolToUInt8(m_batch.check_mask_before_draw); + g_host_display->SetPipeline( + m_batch_pipelines[depth_test][static_cast(render_mode)][static_cast(m_batch.texture_mode)][static_cast( + m_batch.transparency_mode)][BoolToUInt8(m_batch.dithering)][BoolToUInt8(m_batch.interlacing)] + .get()); + g_host_display->Draw(num_vertices, base_vertex); +} + +void GPU_HW::ClearDisplay() +{ + Panic("Not implemented"); +} + +void GPU_HW::UpdateDisplay() +{ + FlushRender(); + + if (g_settings.debugging.show_vram) + { + if (IsUsingMultisampling()) + { + UpdateVRAMReadTexture(); + g_host_display->SetDisplayTexture(m_vram_read_texture.get(), 0, 0, m_vram_read_texture->GetWidth(), + m_vram_read_texture->GetHeight()); + } + else + { + g_host_display->SetDisplayTexture(m_vram_texture.get(), 0, 0, m_vram_texture->GetWidth(), + m_vram_texture->GetHeight()); + } + + g_host_display->SetDisplayParameters(VRAM_WIDTH, VRAM_HEIGHT, 0, 0, VRAM_WIDTH, VRAM_HEIGHT, + static_cast(VRAM_WIDTH) / static_cast(VRAM_HEIGHT)); + } + else + { + g_host_display->SetDisplayParameters(m_crtc_state.display_width, m_crtc_state.display_height, + m_crtc_state.display_origin_left, m_crtc_state.display_origin_top, + m_crtc_state.display_vram_width, m_crtc_state.display_vram_height, + GetDisplayAspectRatio()); + + const u32 resolution_scale = m_GPUSTAT.display_area_color_depth_24 ? 1 : m_resolution_scale; + const u32 vram_offset_x = m_crtc_state.display_vram_left; + const u32 vram_offset_y = m_crtc_state.display_vram_top; + const u32 scaled_vram_offset_x = vram_offset_x * resolution_scale; + const u32 scaled_vram_offset_y = vram_offset_y * resolution_scale; + const u32 display_width = m_crtc_state.display_vram_width; + const u32 display_height = m_crtc_state.display_vram_height; + const u32 scaled_display_width = display_width * resolution_scale; + const u32 scaled_display_height = display_height * resolution_scale; + const InterlacedRenderMode interlaced = GetInterlacedRenderMode(); + + if (IsDisplayDisabled()) + { + g_host_display->ClearDisplayTexture(); + } + else if (!m_GPUSTAT.display_area_color_depth_24 && interlaced == InterlacedRenderMode::None && + !IsUsingMultisampling() && (scaled_vram_offset_x + scaled_display_width) <= m_vram_texture->GetWidth() && + (scaled_vram_offset_y + scaled_display_height) <= m_vram_texture->GetHeight()) + { + + if (IsUsingDownsampling()) + { +#if 0 + DownsampleFramebuffer(GetVRAMTexture(), scaled_vram_offset_x, scaled_vram_offset_y, scaled_display_width, + scaled_display_height); +#else + Panic("Fixme"); +#endif + } + else + { + g_host_display->SetDisplayTexture(m_vram_texture.get(), scaled_vram_offset_x, scaled_vram_offset_y, + scaled_display_width, scaled_display_height); + } + } + else + { + // TODO: discard vs load for interlaced + if (interlaced == InterlacedRenderMode::None) + g_host_display->InvalidateRenderTarget(m_display_texture.get()); + + g_host_display->SetPipeline( + m_display_pipelines[BoolToUInt8(m_GPUSTAT.display_area_color_depth_24)][static_cast(interlaced)].get()); + g_host_display->SetFramebuffer(m_display_framebuffer.get()); + + const u32 reinterpret_field_offset = (interlaced != InterlacedRenderMode::None) ? GetInterlacedDisplayField() : 0; + const u32 reinterpret_start_x = m_crtc_state.regs.X * resolution_scale; + const u32 reinterpret_crop_left = (m_crtc_state.display_vram_left - m_crtc_state.regs.X) * resolution_scale; + const u32 uniforms[4] = {reinterpret_start_x, scaled_vram_offset_y + reinterpret_field_offset, + reinterpret_crop_left, reinterpret_field_offset}; + g_host_display->PushUniformBuffer(uniforms, sizeof(uniforms)); + + Assert(scaled_display_width <= m_display_texture->GetWidth() && + scaled_display_height <= m_display_texture->GetHeight()); + + g_host_display->SetViewportAndScissor(0, 0, scaled_display_width, scaled_display_height); + g_host_display->Draw(3, 0); + + if (IsUsingDownsampling()) + { +#if 0 + DownsampleFramebuffer(GetDisplayTexture(), 0, 0, scaled_display_width, scaled_display_height); +#else + Panic("Fixme"); +#endif + } + else + { + g_host_display->SetDisplayTexture(m_display_texture.get(), 0, 0, scaled_display_width, scaled_display_height); + } + + RestoreGraphicsAPIState(); + } + } +} + void GPU_HW::HandleFlippedQuadTextureCoordinates(BatchVertex* vertices) { // Taken from beetle-psx gpu_polygon.cpp diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h index 71593b1c8..436f18584 100644 --- a/src/core/gpu_hw.h +++ b/src/core/gpu_hw.h @@ -2,6 +2,7 @@ // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) #pragma once +#include "common/dimensional_array.h" #include "common/heap_array.h" #include "gpu.h" #include "gpu/gpu_device.h" @@ -42,6 +43,7 @@ public: virtual void Reset(bool clear_vram) override; virtual bool DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_display) override; + virtual void UpdateSettings() override; void UpdateResolutionScale() override final; std::tuple GetEffectiveDisplayResolution(bool scaled = true) override final; std::tuple GetFullDisplayResolution(bool scaled = true) override final; @@ -202,14 +204,20 @@ protected: virtual bool CreateFramebuffer(); virtual void DestroyFramebuffer(); + + bool CompilePipelines(); + void DestroyPipelines(); + void UpdateVRAMReadTexture(); virtual void UpdateDepthBufferFromMaskBit() = 0; virtual void ClearDepthBuffer() = 0; virtual void SetScissorFromDrawingArea() = 0; - virtual void MapBatchVertexPointer(u32 required_vertices) = 0; - virtual void UnmapBatchVertexPointer(u32 used_vertices) = 0; + virtual void MapBatchVertexPointer(u32 required_vertices); + virtual void UnmapBatchVertexPointer(u32 used_vertices); virtual void UploadUniformBuffer(const void* uniforms, u32 uniforms_size) = 0; - virtual void DrawBatchVertices(BatchRenderMode render_mode, u32 base_vertex, u32 num_vertices) = 0; + virtual void DrawBatchVertices(BatchRenderMode render_mode, u32 base_vertex, u32 num_vertices); + virtual void ClearDisplay(); + virtual void UpdateDisplay(); u32 CalculateResolutionScale() const; GPUDownsampleMode GetDownsampleMode(u32 resolution_scale) const; @@ -361,8 +369,14 @@ protected: std::unique_ptr m_vram_depth_texture; std::unique_ptr m_vram_depth_view; std::unique_ptr m_vram_read_texture; - std::unique_ptr m_vram_encoding_texture; + std::unique_ptr m_vram_readback_texture; std::unique_ptr m_display_texture; + + std::unique_ptr m_vram_framebuffer; + std::unique_ptr m_vram_update_depth_framebuffer; + std::unique_ptr m_vram_readback_framebuffer; + std::unique_ptr m_display_framebuffer; + HeapArray m_vram_shadow; std::unique_ptr m_sw_renderer; @@ -406,13 +420,34 @@ protected: // Bounding box of VRAM area that the GPU has drawn into. Common::Rectangle m_vram_dirty_rect; + // Changed state + bool m_batch_ubo_dirty = true; + + // [depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing] + DimensionalArray, 2, 2, 5, 9, 4, 3> m_batch_pipelines{}; + + // [wrapped][interlaced] + DimensionalArray, 2, 2> m_vram_fill_pipelines{}; + + // [depth_test] + std::array, 2> m_vram_write_pipelines{}; + std::array, 2> m_vram_copy_pipelines{}; + + std::unique_ptr m_vram_readback_pipeline; + std::unique_ptr m_vram_update_depth_pipeline; + + // [depth_24][interlace_mode] + DimensionalArray, 3, 2> m_display_pipelines{}; + + std::unique_ptr m_downsample_first_pass_pipeline; + std::unique_ptr m_downsample_mid_pass_pipeline; + std::unique_ptr m_downsample_blur_pass_pipeline; + std::unique_ptr m_downsample_composite_pass_pipeline; + // Statistics RendererStats m_renderer_stats = {}; RendererStats m_last_renderer_stats = {}; - // Changed state - bool m_batch_ubo_dirty = true; - private: enum : u32 { diff --git a/src/core/gpu_hw_d3d11.cpp b/src/core/gpu_hw_d3d11.cpp index 4c1db8118..b7df66ccc 100644 --- a/src/core/gpu_hw_d3d11.cpp +++ b/src/core/gpu_hw_d3d11.cpp @@ -42,12 +42,6 @@ bool GPU_HW_D3D11::Initialize() return false; } - if (!CreateVertexBuffer()) - { - Log_ErrorPrintf("Failed to create vertex buffer"); - return false; - } - if (!CreateUniformBuffer()) { Log_ErrorPrintf("Failed to create uniform buffer"); @@ -94,10 +88,6 @@ void GPU_HW_D3D11::ResetGraphicsAPIState() void GPU_HW_D3D11::RestoreGraphicsAPIState() { - const UINT stride = sizeof(BatchVertex); - const UINT offset = 0; - m_context->IASetVertexBuffers(0, 1, m_vertex_stream_buffer.GetD3DBufferArray(), &stride, &offset); - m_context->IASetInputLayout(m_batch_input_layout.Get()); m_context->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); m_context->PSSetShaderResources(0, 1, GetVRAMReadTexture()->GetD3DSRVArray()); m_context->PSSetSamplers(0, 1, m_point_sampler_state.GetAddressOf()); @@ -108,62 +98,6 @@ void GPU_HW_D3D11::RestoreGraphicsAPIState() m_batch_ubo_dirty = true; } -void GPU_HW_D3D11::UpdateSettings() -{ - GPU_HW::UpdateSettings(); - - bool framebuffer_changed, shaders_changed; - UpdateHWSettings(&framebuffer_changed, &shaders_changed); - - if (framebuffer_changed) - { - RestoreGraphicsAPIState(); - ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); - ResetGraphicsAPIState(); - g_host_display->ClearDisplayTexture(); - CreateFramebuffer(); - } - - if (shaders_changed) - { - DestroyShaders(); - DestroyStateObjects(); - CreateStateObjects(); - CompileShaders(); - } - - if (framebuffer_changed) - { - RestoreGraphicsAPIState(); - UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_ptr, false, false); - UpdateDepthBufferFromMaskBit(); - UpdateDisplay(); - ResetGraphicsAPIState(); - } -} - -void GPU_HW_D3D11::MapBatchVertexPointer(u32 required_vertices) -{ - DebugAssert(!m_batch_start_vertex_ptr); - - const D3D11::StreamBuffer::MappingResult res = - m_vertex_stream_buffer.Map(m_context.Get(), sizeof(BatchVertex), required_vertices * sizeof(BatchVertex)); - - m_batch_start_vertex_ptr = static_cast(res.pointer); - m_batch_current_vertex_ptr = m_batch_start_vertex_ptr; - m_batch_end_vertex_ptr = m_batch_start_vertex_ptr + res.space_aligned; - m_batch_base_vertex = res.index_aligned; -} - -void GPU_HW_D3D11::UnmapBatchVertexPointer(u32 used_vertices) -{ - DebugAssert(m_batch_start_vertex_ptr); - m_vertex_stream_buffer.Unmap(m_context.Get(), used_vertices * sizeof(BatchVertex)); - m_batch_start_vertex_ptr = nullptr; - m_batch_end_vertex_ptr = nullptr; - m_batch_current_vertex_ptr = nullptr; -} - void GPU_HW_D3D11::SetCapabilities() { const u32 max_texture_size = D3D11_REQ_TEXTURE2D_U_OR_V_DIMENSION; @@ -263,11 +197,6 @@ void GPU_HW_D3D11::DestroyFramebuffer() GPU_HW::DestroyFramebuffer(); } -bool GPU_HW_D3D11::CreateVertexBuffer() -{ - return m_vertex_stream_buffer.Create(m_device.Get(), D3D11_BIND_VERTEX_BUFFER, VERTEX_BUFFER_SIZE); -} - bool GPU_HW_D3D11::CreateUniformBuffer() { return m_uniform_stream_buffer.Create(m_device.Get(), D3D11_BIND_CONSTANT_BUFFER, MAX_UNIFORM_BUFFER_SIZE); @@ -367,35 +296,11 @@ bool GPU_HW_D3D11::CreateStateObjects() if (FAILED(hr)) return false; - for (u8 transparency_mode = 0; transparency_mode < 5; transparency_mode++) - { - bl_desc = CD3D11_BLEND_DESC(CD3D11_DEFAULT()); - if (transparency_mode != static_cast(GPUTransparencyMode::Disabled) || - m_texture_filtering != GPUTextureFilter::Nearest) - { - bl_desc.RenderTarget[0].BlendEnable = TRUE; - bl_desc.RenderTarget[0].SrcBlend = D3D11_BLEND_ONE; - bl_desc.RenderTarget[0].DestBlend = D3D11_BLEND_SRC1_ALPHA; - bl_desc.RenderTarget[0].SrcBlendAlpha = D3D11_BLEND_ONE; - bl_desc.RenderTarget[0].DestBlendAlpha = D3D11_BLEND_ZERO; - bl_desc.RenderTarget[0].BlendOp = - (transparency_mode == static_cast(GPUTransparencyMode::BackgroundMinusForeground)) ? - D3D11_BLEND_OP_REV_SUBTRACT : - D3D11_BLEND_OP_ADD; - bl_desc.RenderTarget[0].BlendOpAlpha = D3D11_BLEND_OP_ADD; - } - - hr = m_device->CreateBlendState(&bl_desc, m_batch_blend_states[transparency_mode].ReleaseAndGetAddressOf()); - if (FAILED(hr)) - return false; - } - return true; } void GPU_HW_D3D11::DestroyStateObjects() { - m_batch_blend_states = {}; m_linear_sampler_state.Reset(); m_point_sampler_state.Reset(); m_trilinear_sampler_state.Reset(); @@ -411,9 +316,14 @@ void GPU_HW_D3D11::DestroyStateObjects() bool GPU_HW_D3D11::CompileShaders() { + if (!GPU_HW::CompilePipelines()) + return false; + D3D11::ShaderCache shader_cache; +#if 0 shader_cache.Open(EmuFolders::Cache, m_device->GetFeatureLevel(), SHADER_CACHE_VERSION, g_settings.gpu_use_debug_device); +#endif GPU_HW_ShaderGen shadergen(g_host_display->GetRenderAPI(), m_resolution_scale, m_multisamples, m_per_sample_shading, m_true_color, m_scaled_dithering, m_texture_filtering, m_using_uv_limits, @@ -422,34 +332,6 @@ bool GPU_HW_D3D11::CompileShaders() ShaderCompileProgressTracker progress("Compiling Shaders", 1 + 1 + 2 + (4 * 9 * 2 * 2) + 1 + (2 * 2) + 4 + (2 * 3) + 1); - // input layout - { - static constexpr std::array attributes = { - {{"ATTR", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, offsetof(BatchVertex, x), D3D11_INPUT_PER_VERTEX_DATA, 0}, - {"ATTR", 1, DXGI_FORMAT_R8G8B8A8_UNORM, 0, offsetof(BatchVertex, color), D3D11_INPUT_PER_VERTEX_DATA, 0}, - {"ATTR", 2, DXGI_FORMAT_R32_UINT, 0, offsetof(BatchVertex, u), D3D11_INPUT_PER_VERTEX_DATA, 0}, - {"ATTR", 3, DXGI_FORMAT_R32_UINT, 0, offsetof(BatchVertex, texpage), D3D11_INPUT_PER_VERTEX_DATA, 0}, - {"ATTR", 4, DXGI_FORMAT_R8G8B8A8_UNORM, 0, offsetof(BatchVertex, uv_limits), D3D11_INPUT_PER_VERTEX_DATA, 0}}}; - - // we need a vertex shader... - ComPtr vs_bytecode = - shader_cache.GetShaderBlob(D3D11::ShaderCompiler::Type::Vertex, shadergen.GenerateBatchVertexShader(true)); - if (!vs_bytecode) - return false; - - const UINT num_attributes = static_cast(attributes.size()) - (m_using_uv_limits ? 0 : 1); - const HRESULT hr = - m_device->CreateInputLayout(attributes.data(), num_attributes, vs_bytecode->GetBufferPointer(), - vs_bytecode->GetBufferSize(), m_batch_input_layout.ReleaseAndGetAddressOf()); - if (FAILED(hr)) - { - Log_ErrorPrintf("CreateInputLayout failed: 0x%08X", hr); - return false; - } - } - - progress.Increment(); - m_screen_quad_vertex_shader = shader_cache.GetVertexShader(m_device.Get(), shadergen.GenerateScreenQuadVertexShader()); m_uv_quad_vertex_shader = shader_cache.GetVertexShader(m_device.Get(), shadergen.GenerateUVQuadVertexShader()); @@ -458,39 +340,6 @@ bool GPU_HW_D3D11::CompileShaders() progress.Increment(); - for (u8 textured = 0; textured < 2; textured++) - { - const std::string vs = shadergen.GenerateBatchVertexShader(ConvertToBoolUnchecked(textured)); - m_batch_vertex_shaders[textured] = shader_cache.GetVertexShader(m_device.Get(), vs); - if (!m_batch_vertex_shaders[textured]) - return false; - - progress.Increment(); - } - - for (u8 render_mode = 0; render_mode < 4; render_mode++) - { - for (u8 texture_mode = 0; texture_mode < 9; texture_mode++) - { - for (u8 dithering = 0; dithering < 2; dithering++) - { - for (u8 interlacing = 0; interlacing < 2; interlacing++) - { - const std::string ps = shadergen.GenerateBatchFragmentShader( - static_cast(render_mode), static_cast(texture_mode), - ConvertToBoolUnchecked(dithering), ConvertToBoolUnchecked(interlacing)); - - m_batch_pixel_shaders[render_mode][texture_mode][dithering][interlacing] = - shader_cache.GetPixelShader(m_device.Get(), ps); - if (!m_batch_pixel_shaders[render_mode][texture_mode][dithering][interlacing]) - return false; - - progress.Increment(); - } - } - } - } - m_copy_pixel_shader = shader_cache.GetPixelShader(m_device.Get(), shadergen.GenerateCopyFragmentShader()); if (!m_copy_pixel_shader) return false; @@ -599,9 +448,8 @@ void GPU_HW_D3D11::DestroyShaders() m_copy_pixel_shader.Reset(); m_uv_quad_vertex_shader.Reset(); m_screen_quad_vertex_shader.Reset(); - m_batch_pixel_shaders = {}; - m_batch_vertex_shaders = {}; - m_batch_input_layout.Reset(); + + GPU_HW::DestroyPipelines(); } void GPU_HW_D3D11::UploadUniformBuffer(const void* data, u32 data_size) @@ -699,30 +547,6 @@ bool GPU_HW_D3D11::BlitVRAMReplacementTexture(const TextureReplacementTexture* t return true; } -void GPU_HW_D3D11::DrawBatchVertices(BatchRenderMode render_mode, u32 base_vertex, u32 num_vertices) -{ - const bool textured = (m_batch.texture_mode != GPUTextureMode::Disabled); - - m_context->VSSetShader(m_batch_vertex_shaders[BoolToUInt8(textured)].Get(), nullptr, 0); - - m_context->PSSetShader(m_batch_pixel_shaders[static_cast(render_mode)][static_cast(m_batch.texture_mode)] - [BoolToUInt8(m_batch.dithering)][BoolToUInt8(m_batch.interlacing)] - .Get(), - nullptr, 0); - - const GPUTransparencyMode transparency_mode = - (render_mode == BatchRenderMode::OnlyOpaque) ? GPUTransparencyMode::Disabled : m_batch.transparency_mode; - m_context->OMSetBlendState(m_batch_blend_states[static_cast(transparency_mode)].Get(), nullptr, 0xFFFFFFFFu); - - m_context->OMSetDepthStencilState( - (m_batch.use_depth_buffer ? - m_depth_test_less_state.Get() : - (m_batch.check_mask_before_draw ? m_depth_test_greater_state.Get() : m_depth_test_always_state.Get())), - 0); - - m_context->Draw(num_vertices, base_vertex); -} - void GPU_HW_D3D11::SetScissorFromDrawingArea() { int left, top, right, bottom; @@ -742,96 +566,6 @@ void GPU_HW_D3D11::ClearDisplay() m_context->ClearRenderTargetView(GetDisplayTexture()->GetD3DRTV(), clear_color.data()); } -void GPU_HW_D3D11::UpdateDisplay() -{ - GPU_HW::UpdateDisplay(); - - if (g_settings.debugging.show_vram) - { - if (IsUsingMultisampling()) - { - UpdateVRAMReadTexture(); - g_host_display->SetDisplayTexture(m_vram_read_texture.get(), 0, 0, GetVRAMReadTexture()->GetWidth(), - GetVRAMReadTexture()->GetHeight()); - } - else - { - g_host_display->SetDisplayTexture(m_vram_texture.get(), 0, 0, GetVRAMTexture()->GetWidth(), - GetVRAMTexture()->GetHeight()); - } - - g_host_display->SetDisplayParameters(VRAM_WIDTH, VRAM_HEIGHT, 0, 0, VRAM_WIDTH, VRAM_HEIGHT, - static_cast(VRAM_WIDTH) / static_cast(VRAM_HEIGHT)); - } - else - { - g_host_display->SetDisplayParameters(m_crtc_state.display_width, m_crtc_state.display_height, - m_crtc_state.display_origin_left, m_crtc_state.display_origin_top, - m_crtc_state.display_vram_width, m_crtc_state.display_vram_height, - GetDisplayAspectRatio()); - - const u32 resolution_scale = m_GPUSTAT.display_area_color_depth_24 ? 1 : m_resolution_scale; - const u32 vram_offset_x = m_crtc_state.display_vram_left; - const u32 vram_offset_y = m_crtc_state.display_vram_top; - const u32 scaled_vram_offset_x = vram_offset_x * resolution_scale; - const u32 scaled_vram_offset_y = vram_offset_y * resolution_scale; - const u32 display_width = m_crtc_state.display_vram_width; - const u32 display_height = m_crtc_state.display_vram_height; - const u32 scaled_display_width = display_width * resolution_scale; - const u32 scaled_display_height = display_height * resolution_scale; - const InterlacedRenderMode interlaced = GetInterlacedRenderMode(); - - if (IsDisplayDisabled()) - { - g_host_display->ClearDisplayTexture(); - } - else if (!m_GPUSTAT.display_area_color_depth_24 && interlaced == InterlacedRenderMode::None && - !IsUsingMultisampling() && (scaled_vram_offset_x + scaled_display_width) <= GetVRAMTexture()->GetWidth() && - (scaled_vram_offset_y + scaled_display_height) <= GetVRAMTexture()->GetHeight()) - { - - if (IsUsingDownsampling()) - { - DownsampleFramebuffer(GetVRAMTexture(), scaled_vram_offset_x, scaled_vram_offset_y, scaled_display_width, - scaled_display_height); - } - else - { - g_host_display->SetDisplayTexture(m_vram_texture.get(), scaled_vram_offset_x, scaled_vram_offset_y, - scaled_display_width, scaled_display_height); - } - } - else - { - m_context->RSSetState(m_cull_none_rasterizer_state_no_msaa.Get()); - m_context->OMSetRenderTargets(1, GetDisplayTexture()->GetD3DRTVArray(), nullptr); - m_context->OMSetDepthStencilState(m_depth_disabled_state.Get(), 0); - m_context->PSSetShaderResources(0, 1, GetVRAMTexture()->GetD3DSRVArray()); - - const u32 reinterpret_field_offset = (interlaced != InterlacedRenderMode::None) ? GetInterlacedDisplayField() : 0; - const u32 reinterpret_start_x = m_crtc_state.regs.X * resolution_scale; - const u32 reinterpret_crop_left = (m_crtc_state.display_vram_left - m_crtc_state.regs.X) * resolution_scale; - const u32 uniforms[4] = {reinterpret_start_x, scaled_vram_offset_y + reinterpret_field_offset, - reinterpret_crop_left, reinterpret_field_offset}; - ID3D11PixelShader* display_pixel_shader = - m_display_pixel_shaders[BoolToUInt8(m_GPUSTAT.display_area_color_depth_24)][static_cast(interlaced)].Get(); - - Assert(scaled_display_width <= m_display_texture->GetWidth() && - scaled_display_height <= m_display_texture->GetHeight()); - - SetViewportAndScissor(0, 0, scaled_display_width, scaled_display_height); - DrawUtilityShader(display_pixel_shader, uniforms, sizeof(uniforms)); - - if (IsUsingDownsampling()) - DownsampleFramebuffer(GetDisplayTexture(), 0, 0, scaled_display_width, scaled_display_height); - else - g_host_display->SetDisplayTexture(m_display_texture.get(), 0, 0, scaled_display_width, scaled_display_height); - - RestoreGraphicsAPIState(); - } - } -} - void GPU_HW_D3D11::ReadVRAM(u32 x, u32 y, u32 width, u32 height) { if (IsUsingSoftwareRendererForReadbacks()) @@ -855,7 +589,7 @@ void GPU_HW_D3D11::ReadVRAM(u32 x, u32 y, u32 width, u32 height) DrawUtilityShader(m_vram_read_pixel_shader.Get(), uniforms, sizeof(uniforms)); // Stage the readback and copy it into our shadow buffer. - g_host_display->DownloadTexture(m_vram_encoding_texture.get(), 0, 0, encoded_width, encoded_height, + g_host_display->DownloadTexture(m_vram_readback_texture.get(), 0, 0, encoded_width, encoded_height, reinterpret_cast(&m_vram_shadow[copy_rect.top * VRAM_WIDTH + copy_rect.left]), VRAM_WIDTH * sizeof(u16)); diff --git a/src/core/gpu_hw_d3d11.h b/src/core/gpu_hw_d3d11.h index 2bbe97405..5a65523fc 100644 --- a/src/core/gpu_hw_d3d11.h +++ b/src/core/gpu_hw_d3d11.h @@ -29,11 +29,9 @@ public: void ResetGraphicsAPIState() override; void RestoreGraphicsAPIState() override; - void UpdateSettings() override; protected: void ClearDisplay() override; - void UpdateDisplay() override; void ReadVRAM(u32 x, u32 y, u32 width, u32 height) override; void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override; void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) override; @@ -41,10 +39,7 @@ protected: void UpdateDepthBufferFromMaskBit() override; void ClearDepthBuffer() override; void SetScissorFromDrawingArea() override; - void MapBatchVertexPointer(u32 required_vertices) override; - void UnmapBatchVertexPointer(u32 used_vertices) override; void UploadUniformBuffer(const void* data, u32 data_size) override; - void DrawBatchVertices(BatchRenderMode render_mode, u32 base_vertex, u32 num_vertices) override; private: enum : u32 @@ -64,7 +59,7 @@ private: } ALWAYS_INLINE D3D11Texture* GetVRAMEncodingTexture() const { - return static_cast(m_vram_encoding_texture.get()); + return static_cast(m_vram_readback_texture.get()); } ALWAYS_INLINE D3D11Texture* GetDisplayTexture() const { @@ -76,7 +71,6 @@ private: void ClearFramebuffer(); void DestroyFramebuffer() override; - bool CreateVertexBuffer(); bool CreateUniformBuffer(); bool CreateTextureBuffer(); bool CreateStateObjects(); @@ -99,8 +93,6 @@ private: ComPtr m_device; ComPtr m_context; - D3D11::StreamBuffer m_vertex_stream_buffer; - D3D11::StreamBuffer m_uniform_stream_buffer; D3D11::StreamBuffer m_texture_stream_buffer; @@ -122,12 +114,6 @@ private: ComPtr m_linear_sampler_state; ComPtr m_trilinear_sampler_state; - std::array, 5> m_batch_blend_states; // [transparency_mode] - ComPtr m_batch_input_layout; - std::array, 2> m_batch_vertex_shaders; // [textured] - std::array, 2>, 2>, 9>, 4> - m_batch_pixel_shaders; // [render_mode][texture_mode][dithering][interlacing] - ComPtr m_screen_quad_vertex_shader; ComPtr m_uv_quad_vertex_shader; ComPtr m_copy_pixel_shader; diff --git a/src/core/gpu_hw_opengl.cpp b/src/core/gpu_hw_opengl.cpp index fe814d7fd..28f20f897 100644 --- a/src/core/gpu_hw_opengl.cpp +++ b/src/core/gpu_hw_opengl.cpp @@ -341,9 +341,9 @@ bool GPU_HW_OpenGL::CreateFramebuffer() !m_vram_read_texture.Create(texture_width, texture_height, 1, 1, 1, GPUTexture::Format::RGBA8, nullptr, 0, false, true) || !m_vram_read_texture.CreateFramebuffer() || - !m_vram_encoding_texture.Create(VRAM_WIDTH, VRAM_HEIGHT, 1, 1, 1, GPUTexture::Format::RGBA8, nullptr, 0, false, + !m_vram_readback_texture.Create(VRAM_WIDTH, VRAM_HEIGHT, 1, 1, 1, GPUTexture::Format::RGBA8, nullptr, 0, false, true) || - !m_vram_encoding_texture.CreateFramebuffer() || + !m_vram_readback_texture.CreateFramebuffer() || !m_display_texture.Create(GPU_MAX_DISPLAY_WIDTH * m_resolution_scale, GPU_MAX_DISPLAY_HEIGHT * m_resolution_scale, 1, 1, 1, GPUTexture::Format::RGBA8, nullptr, 0, true, true) || !m_display_texture.CreateFramebuffer()) @@ -918,7 +918,7 @@ void GPU_HW_OpenGL::ReadVRAM(u32 x, u32 y, u32 width, u32 height) // Encode the 24-bit texture as 16-bit. const u32 uniforms[4] = {copy_rect.left, copy_rect.top, copy_rect.GetWidth(), copy_rect.GetHeight()}; - m_vram_encoding_texture.BindFramebuffer(GL_DRAW_FRAMEBUFFER); + m_vram_readback_texture.BindFramebuffer(GL_DRAW_FRAMEBUFFER); m_vram_texture.Bind(); m_vram_read_program.Bind(); UploadUniformBuffer(uniforms, sizeof(uniforms)); @@ -929,7 +929,7 @@ void GPU_HW_OpenGL::ReadVRAM(u32 x, u32 y, u32 width, u32 height) glDrawArrays(GL_TRIANGLES, 0, 3); // Readback encoded texture. - m_vram_encoding_texture.BindFramebuffer(GL_READ_FRAMEBUFFER); + m_vram_readback_texture.BindFramebuffer(GL_READ_FRAMEBUFFER); glPixelStorei(GL_PACK_ALIGNMENT, 2); glPixelStorei(GL_PACK_ROW_LENGTH, VRAM_WIDTH / 2); glReadPixels(0, 0, encoded_width, encoded_height, GL_RGBA, GL_UNSIGNED_BYTE, @@ -1064,7 +1064,7 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* // have to write to the 1x texture first if (m_resolution_scale > 1) - m_vram_encoding_texture.Bind(); + m_vram_readback_texture.Bind(); else m_vram_texture.Bind(); @@ -1081,7 +1081,7 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* const u32 scaled_x = x * m_resolution_scale; const u32 scaled_y = y * m_resolution_scale; glDisable(GL_SCISSOR_TEST); - m_vram_encoding_texture.BindFramebuffer(GL_READ_FRAMEBUFFER); + m_vram_readback_texture.BindFramebuffer(GL_READ_FRAMEBUFFER); glBlitFramebuffer(x, y, x + width, y + height, scaled_x, scaled_y, scaled_x + scaled_width, scaled_y + scaled_height, GL_COLOR_BUFFER_BIT, GL_NEAREST); glEnable(GL_SCISSOR_TEST); diff --git a/src/core/gpu_hw_opengl.h b/src/core/gpu_hw_opengl.h index 169794504..a0be544dd 100644 --- a/src/core/gpu_hw_opengl.h +++ b/src/core/gpu_hw_opengl.h @@ -78,7 +78,7 @@ private: GL::Texture m_vram_texture; GL::Texture m_vram_depth_texture; GL::Texture m_vram_read_texture; - GL::Texture m_vram_encoding_texture; + GL::Texture m_vram_readback_texture; GL::Texture m_display_texture; GL::Texture m_vram_write_replacement_texture; diff --git a/src/core/shader_cache_version.h b/src/core/shader_cache_version.h index 6fc020391..5b7dbb113 100644 --- a/src/core/shader_cache_version.h +++ b/src/core/shader_cache_version.h @@ -4,4 +4,4 @@ #pragma once #include "types.h" -static constexpr u32 SHADER_CACHE_VERSION = 7; \ No newline at end of file +static constexpr u32 SHADER_CACHE_VERSION = 8; \ No newline at end of file diff --git a/src/core/shadergen.cpp b/src/core/shadergen.cpp index 7b5a0505b..7ea826e06 100644 --- a/src/core/shadergen.cpp +++ b/src/core/shadergen.cpp @@ -662,16 +662,74 @@ std::string ShaderGen::GenerateCopyFragmentShader() return ss.str(); } -std::string ShaderGen::GenerateSampleFragmentShader() +std::string ShaderGen::GenerateDisplayVertexShader() { std::stringstream ss; WriteHeader(ss); - DeclareTexture(ss, "samp0", 0); - DeclareFragmentEntryPoint(ss, 0, 1, {}, false, 1); - + DeclareUniformBuffer(ss, {"float4 u_src_rect"}, true); + DeclareVertexEntryPoint(ss, {}, 0, 1, {}, true); ss << R"( { - o_col0 = SAMPLE_TEXTURE(samp0, v_tex0); + float2 pos = float2(float((v_id << 1) & 2u), float(v_id & 2u)); + v_tex0 = u_src_rect.xy + pos * u_src_rect.zw; + v_pos = float4(pos * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), 0.0f, 1.0f); + #if API_OPENGL || API_OPENGL_ES || API_VULKAN + v_pos.y = -v_pos.y; + #endif +} +)"; + + return ss.str(); +} + +std::string ShaderGen::GenerateDisplayFragmentShader(bool set_alpha_to_one /* = false */) +{ + std::stringstream ss; + WriteHeader(ss); + DeclareTexture(ss, "samp0", 0); + DeclareFragmentEntryPoint(ss, 0, 1, {}, false, 1); + ss << "{\n"; + + if (set_alpha_to_one) + ss << "o_col0 = float4(SAMPLE_TEXTURE(samp0, v_tex0).rgb, 1.0f);"; + else + ss << "o_col0 = SAMPLE_TEXTURE(samp0, v_tex0);"; + + ss << "\n}\n"; + + return ss.str(); +} + +std::string ShaderGen::GenerateImGuiVertexShader() +{ + std::stringstream ss; + WriteHeader(ss); + DeclareUniformBuffer(ss, {"float4x4 ProjectionMatrix"}, true); + DeclareVertexEntryPoint(ss, {"float2 a_pos", "float2 a_tex0", "float4 a_col0"}, 1, 1, {}, false); + ss << R"( +{ + v_pos = mul(ProjectionMatrix, float4(a_pos, 0.f, 1.f)); + v_col0 = a_col0; + v_tex0 = a_tex0; + #if API_OPENGL || API_OPENGL_ES || API_VULKAN + v_pos.y = -v_pos.y; + #endif +} +)"; + + return ss.str(); +} + +std::string ShaderGen::GenerateImGuiFragmentShader() +{ + std::stringstream ss; + WriteHeader(ss); + DeclareTexture(ss, "samp0", 0); + DeclareFragmentEntryPoint(ss, 1, 1, {}, false, 1); + + ss << R"( +{ + o_col0 = v_col0 * SAMPLE_TEXTURE(samp0, v_tex0); } )"; diff --git a/src/core/shadergen.h b/src/core/shadergen.h index 83fac9a94..a23bd869e 100644 --- a/src/core/shadergen.h +++ b/src/core/shadergen.h @@ -19,7 +19,11 @@ public: std::string GenerateUVQuadVertexShader(); std::string GenerateFillFragmentShader(); std::string GenerateCopyFragmentShader(); - std::string GenerateSampleFragmentShader(); + std::string GenerateDisplayVertexShader(); + std::string GenerateDisplayFragmentShader(bool set_alpha_to_one = false); + + std::string GenerateImGuiVertexShader(); + std::string GenerateImGuiFragmentShader(); protected: ALWAYS_INLINE bool IsVulkan() const { return (m_render_api == RenderAPI::Vulkan); }