More conversion

This commit is contained in:
Stenzek 2023-08-01 23:49:15 +10:00
parent de08feeffb
commit ac73e7b050
30 changed files with 2519 additions and 1557 deletions

View File

@ -10,7 +10,7 @@ AlignTrailingComments: true
AllowAllParametersOfDeclarationOnNextLine: true
AllowShortBlocksOnASingleLine: false
AllowShortCaseLabelsOnASingleLine: false
AllowShortFunctionsOnASingleLine: Inline
AllowShortFunctionsOnASingleLine: InlineOnly
AllowShortIfStatementsOnASingleLine: false
AllowShortLoopsOnASingleLine: false
AlwaysBreakAfterDefinitionReturnType: None

View File

@ -76,6 +76,7 @@
<ClCompile Include="gpu\postprocessing_chain.cpp" />
<ClCompile Include="gpu\postprocessing_shader.cpp" />
<ClCompile Include="gpu\postprocessing_shadergen.cpp" />
<ClCompile Include="gpu\gpu_shader_cache.cpp" />
<ClCompile Include="gpu\vulkan\builders.cpp" />
<ClCompile Include="gpu\vulkan\context.cpp" />
<ClCompile Include="gpu\vulkan\loader.cpp" />
@ -205,6 +206,7 @@
<ClInclude Include="gpu\postprocessing_chain.h" />
<ClInclude Include="gpu\postprocessing_shader.h" />
<ClInclude Include="gpu\postprocessing_shadergen.h" />
<ClInclude Include="gpu\gpu_shader_cache.h" />
<ClInclude Include="gpu\vulkan\builders.h" />
<ClInclude Include="gpu\vulkan\context.h" />
<ClInclude Include="gpu\vulkan\entry_points.h" />

View File

@ -188,6 +188,9 @@
<ClCompile Include="gpu\d3d11_texture.cpp">
<Filter>gpu</Filter>
</ClCompile>
<ClCompile Include="gpu\gpu_shader_cache.cpp">
<Filter>gpu</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="types.h" />
@ -395,6 +398,9 @@
<ClInclude Include="gpu\d3d_shaders.h">
<Filter>gpu</Filter>
</ClInclude>
<ClInclude Include="gpu\gpu_shader_cache.h">
<Filter>gpu</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<Filter Include="gpu">

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,5 @@
// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin <stenzek@gmail.com>
// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
#pragma once
@ -9,7 +10,7 @@
#include "d3d11_texture.h"
#include "gpu_device.h"
#include "postprocessing_chain.h"
#include <d3d11.h>
#include <d3d11_1.h>
#include <dxgi.h>
#include <memory>
#include <string>
@ -30,13 +31,17 @@ class D3D11Framebuffer final : public GPUFramebuffer
public:
~D3D11Framebuffer() override;
ALWAYS_INLINE u32 GetNumRTVs() const { return m_rtv ? 1 : 0; }
ALWAYS_INLINE ID3D11RenderTargetView* GetRTV() const { return m_rtv.Get(); }
ALWAYS_INLINE ID3D11RenderTargetView* const* GetRTVArray() const { return m_rtv.GetAddressOf(); }
ALWAYS_INLINE ID3D11DepthStencilView* GetDSV() const { return m_dsv.Get(); }
void SetDebugName(const std::string_view& name) override;
void CommitClear(ID3D11DeviceContext* context);
private:
D3D11Framebuffer(ComPtr<ID3D11RenderTargetView> rtv, ComPtr<ID3D11DepthStencilView> dsv);
D3D11Framebuffer(GPUTexture* rt, GPUTexture* ds, u32 width, u32 height, ComPtr<ID3D11RenderTargetView> rtv,
ComPtr<ID3D11DepthStencilView> dsv);
ComPtr<ID3D11RenderTargetView> m_rtv;
ComPtr<ID3D11DepthStencilView> m_dsv;
@ -53,6 +58,7 @@ public:
~D3D11Sampler() override;
ALWAYS_INLINE ID3D11SamplerState* GetSamplerState() const { return m_ss.Get(); }
ALWAYS_INLINE ID3D11SamplerState* const* GetSamplerStateArray() const { return m_ss.GetAddressOf(); }
void SetDebugName(const std::string_view& name) override;
@ -78,7 +84,7 @@ public:
void SetDebugName(const std::string_view& name) override;
private:
D3D11Shader(Stage stage, Microsoft::WRL::ComPtr<ID3D11DeviceChild> shader, std::vector<u8> bytecode);
D3D11Shader(GPUShaderStage stage, Microsoft::WRL::ComPtr<ID3D11DeviceChild> shader, std::vector<u8> bytecode);
Microsoft::WRL::ComPtr<ID3D11DeviceChild> m_shader;
std::vector<u8> m_bytecode; // only for VS
@ -154,7 +160,7 @@ public:
AdapterAndModeList GetAdapterAndModeList() override;
void DestroySurface() override;
bool SetPostProcessingChain(const std::string_view& config) override;
std::string GetShaderCacheBaseName(const std::string_view& type, bool debug) const override;
std::unique_ptr<GPUTexture> CreateTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples,
GPUTexture::Type type, GPUTexture::Format format,
@ -170,14 +176,33 @@ public:
void ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, GPUTexture* src,
u32 src_x, u32 src_y, u32 src_layer, u32 src_level, u32 width, u32 height) override;
std::unique_ptr<GPUFramebuffer> CreateFramebuffer(GPUTexture* rt, u32 rt_layer, u32 rt_level, GPUTexture* ds,
u32 ds_layer, u32 ds_level) override;
std::unique_ptr<GPUFramebuffer> CreateFramebuffer(GPUTexture* rt = nullptr, u32 rt_layer = 0, u32 rt_level = 0,
GPUTexture* ds = nullptr, u32 ds_layer = 0,
u32 ds_level = 0) override;
std::unique_ptr<GPUShader> CreateShaderFromBinary(GPUShader::Stage stage, gsl::span<const u8> data) override;
std::unique_ptr<GPUShader> CreateShaderFromSource(GPUShader::Stage stage, const std::string_view& source,
std::unique_ptr<GPUShader> CreateShaderFromBinary(GPUShaderStage stage, gsl::span<const u8> data) override;
std::unique_ptr<GPUShader> CreateShaderFromSource(GPUShaderStage stage, const std::string_view& source,
std::vector<u8>* out_binary = nullptr) override;
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::GraphicsConfig& config) override;
void PushDebugGroup(const char* fmt, ...) override;
void PopDebugGroup() override;
void InsertDebugMessage(const char* fmt, ...) override;
void MapVertexBuffer(u32 vertex_size, u32 vertex_count, void** map_ptr, u32* map_space,
u32* map_base_vertex) override;
void UnmapVertexBuffer(u32 vertex_size, u32 vertex_count) override;
void MapIndexBuffer(u32 index_count, DrawIndex** map_ptr, u32* map_space, u32* map_base_index) override;
void UnmapIndexBuffer(u32 used_index_count) override;
void PushUniformBuffer(const void* data, u32 data_size) override;
void SetFramebuffer(GPUFramebuffer* fb) override;
void SetPipeline(GPUPipeline* pipeline) override;
void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler) override;
void SetViewport(s32 x, s32 y, s32 width, s32 height) override;
void SetScissor(s32 x, s32 y, s32 width, s32 height) override;
void Draw(u32 vertex_count, u32 base_vertex) override;
void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override;
bool GetHostRefreshRate(float* refresh_rate) override;
bool SetGPUTimingEnabled(bool enabled) override;
@ -186,63 +211,44 @@ public:
void SetVSync(bool enabled) override;
bool Render(bool skip_present) override;
bool RenderScreenshot(u32 width, u32 height, const Common::Rectangle<s32>& draw_rect, std::vector<u32>* out_pixels,
u32* out_stride, GPUTexture::Format* out_format) override;
void UnbindFramebuffer(D3D11Framebuffer* fb);
void UnbindPipeline(D3D11Pipeline* pl);
void UnbindTexture(D3D11Texture* tex);
static AdapterAndModeList StaticGetAdapterAndModeList();
private:
using RasterizationStateMap = std::unordered_map<u8, ComPtr<ID3D11RasterizerState>>;
using DepthStateMap = std::unordered_map<u8, ComPtr<ID3D11DepthStencilState>>;
using BlendStateMap = std::unordered_map<u32, ComPtr<ID3D11BlendState>>;
using BlendStateMap = std::unordered_map<u64, ComPtr<ID3D11BlendState>>;
using InputLayoutMap =
std::unordered_map<GPUPipeline::InputLayout, ComPtr<ID3D11InputLayout>, GPUPipeline::InputLayoutHash>;
static constexpr u32 DISPLAY_UNIFORM_BUFFER_SIZE = 64;
static constexpr u32 IMGUI_VERTEX_BUFFER_SIZE = 4 * 1024 * 1024;
static constexpr u32 IMGUI_INDEX_BUFFER_SIZE = 2 * 1024 * 1024;
static constexpr u32 PUSH_UNIFORM_BUFFER_SIZE = 64;
static constexpr u32 VERTEX_BUFFER_SIZE = 8 * 1024 * 1024;
static constexpr u32 INDEX_BUFFER_SIZE = 4 * 1024 * 1024;
static constexpr u8 NUM_TIMESTAMP_QUERIES = 3;
static AdapterAndModeList GetAdapterAndModeList(IDXGIFactory* dxgi_factory);
void CommitClear(GPUTexture* t);
void PreDrawCheck();
bool CheckStagingBufferSize(u32 width, u32 height, DXGI_FORMAT format);
void DestroyStagingBuffer();
bool CreateResources() override;
void DestroyResources() override;
bool CreateImGuiResources();
void DestroyImGuiResources();
bool CreateSwapChain(const DXGI_MODE_DESC* fullscreen_mode);
bool CreateSwapChainRTV();
bool CreateBuffers();
void DestroyBuffers();
ComPtr<ID3D11RasterizerState> GetRasterizationState(const GPUPipeline::RasterizationState& rs);
ComPtr<ID3D11DepthStencilState> GetDepthState(const GPUPipeline::DepthState& ds);
ComPtr<ID3D11BlendState> GetBlendState(const GPUPipeline::BlendState& bs);
ComPtr<ID3D11InputLayout> GetInputLayout(const GPUPipeline::InputLayout& il, const D3D11Shader* vs);
void RenderDisplay();
void RenderSoftwareCursor();
void RenderImGui();
void RenderDisplay(s32 left, s32 top, s32 width, s32 height, D3D11Texture* texture, s32 texture_view_x,
s32 texture_view_y, s32 texture_view_width, s32 texture_view_height, bool linear_filter);
void RenderSoftwareCursor(s32 left, s32 top, s32 width, s32 height, GPUTexture* texture_handle);
struct PostProcessingStage
{
ComPtr<ID3D11VertexShader> vertex_shader;
ComPtr<ID3D11PixelShader> pixel_shader;
D3D11Texture output_texture;
u32 uniforms_size;
};
bool CheckPostProcessingRenderTargets(u32 target_width, u32 target_height);
void ApplyPostProcessingChain(ID3D11RenderTargetView* final_target, s32 final_left, s32 final_top, s32 final_width,
s32 final_height, D3D11Texture* texture, s32 texture_view_x, s32 texture_view_y,
s32 texture_view_width, s32 texture_view_height, u32 target_width, u32 target_height);
bool CreateTimestampQueries();
void DestroyTimestampQueries();
void PopTimestampQuery();
@ -250,28 +256,17 @@ private:
ComPtr<ID3D11Device> m_device;
ComPtr<ID3D11DeviceContext> m_context;
ComPtr<ID3DUserDefinedAnnotation> m_annotation;
ComPtr<IDXGIFactory> m_dxgi_factory;
ComPtr<IDXGISwapChain> m_swap_chain;
ComPtr<ID3D11RenderTargetView> m_swap_chain_rtv;
ComPtr<ID3D11RasterizerState> m_display_rasterizer_state;
ComPtr<ID3D11DepthStencilState> m_display_depth_stencil_state;
ComPtr<ID3D11BlendState> m_display_blend_state;
ComPtr<ID3D11BlendState> m_software_cursor_blend_state;
ComPtr<ID3D11VertexShader> m_display_vertex_shader;
ComPtr<ID3D11PixelShader> m_display_pixel_shader;
ComPtr<ID3D11PixelShader> m_display_alpha_pixel_shader;
ComPtr<ID3D11SamplerState> m_point_sampler;
ComPtr<ID3D11SamplerState> m_linear_sampler;
ComPtr<ID3D11SamplerState> m_border_sampler;
RasterizationStateMap m_rasterization_states;
DepthStateMap m_depth_states;
BlendStateMap m_blend_states;
InputLayoutMap m_input_layouts;
D3D11::StreamBuffer m_display_uniform_buffer;
ComPtr<ID3D11Texture2D> m_readback_staging_texture;
DXGI_FORMAT m_readback_staging_texture_format = DXGI_FORMAT_UNKNOWN;
u32 m_readback_staging_texture_width = 0;
@ -281,18 +276,12 @@ private:
bool m_using_flip_model_swap_chain = true;
bool m_using_allow_tearing = false;
D3D11Texture m_imgui_texture;
D3D11::StreamBuffer m_imgui_vertex_buffer;
D3D11::StreamBuffer m_imgui_index_buffer;
ComPtr<ID3D11InputLayout> m_imgui_input_layout;
ComPtr<ID3D11VertexShader> m_imgui_vertex_shader;
ComPtr<ID3D11PixelShader> m_imgui_pixel_shader;
ComPtr<ID3D11BlendState> m_imgui_blend_state;
D3D11::StreamBuffer m_vertex_buffer;
D3D11::StreamBuffer m_index_buffer;
D3D11::StreamBuffer m_push_uniform_buffer;
FrontendCommon::PostProcessingChain m_post_processing_chain;
D3D11Texture m_post_processing_input_texture;
std::vector<PostProcessingStage> m_post_processing_stages;
Common::Timer m_post_processing_timer;
D3D11Framebuffer* m_current_framebuffer = nullptr;
D3D11Pipeline* m_current_pipeline = nullptr;
std::array<std::array<ComPtr<ID3D11Query>, 3>, NUM_TIMESTAMP_QUERIES> m_timestamp_queries = {};
u8 m_read_timestamp_query = 0;

View File

@ -94,7 +94,7 @@ bool D3D11Texture::Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u32
const u32 srnum = D3D11CalcSubresource(level, layer, m_levels);
D3D11_MAPPED_SUBRESOURCE sr;
HRESULT hr = D3D11Device::GetD3DContext()->Map(m_texture.Get(), srnum,
discard ? D3D11_MAP_WRITE_DISCARD : D3D11_MAP_WRITE, 0, &sr);
discard ? D3D11_MAP_WRITE_DISCARD : D3D11_MAP_READ_WRITE, 0, &sr);
if (FAILED(hr))
{
Log_ErrorPrintf("Map pixels texture failed: %08X", hr);

View File

@ -7,8 +7,12 @@
#include <d3d11.h>
#include <wrl/client.h>
class D3D11Device;
class D3D11Texture final : public GPUTexture
{
friend D3D11Device;
public:
template<typename T>
using ComPtr = Microsoft::WRL::ComPtr<T>;

View File

@ -617,56 +617,6 @@ bool D3D12GPUDevice::Render(bool skip_present)
return true;
}
bool D3D12GPUDevice::RenderScreenshot(u32 width, u32 height, const Common::Rectangle<s32>& draw_rect,
std::vector<u32>* out_pixels, u32* out_stride, GPUTexture::Format* out_format)
{
static constexpr DXGI_FORMAT format = DXGI_FORMAT_R8G8B8A8_UNORM;
static constexpr GPUTexture::Format hdformat = GPUTexture::Format::RGBA8;
D3D12::Texture render_texture;
if (!render_texture.Create(width, height, 1, 1, 1, format, DXGI_FORMAT_UNKNOWN, format, DXGI_FORMAT_UNKNOWN,
D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET) ||
!m_readback_staging_texture.EnsureSize(width, height, format, false))
{
return false;
}
ID3D12GraphicsCommandList* cmdlist = g_d3d12_context->GetCommandList();
if (HasDisplayTexture() && !m_post_processing_chain.IsEmpty())
{
ApplyPostProcessingChain(cmdlist, &render_texture, draw_rect.left, draw_rect.top, draw_rect.GetWidth(),
draw_rect.GetHeight(), static_cast<D3D12::Texture*>(m_display_texture),
m_display_texture_view_x, m_display_texture_view_y, m_display_texture_view_width,
m_display_texture_view_height, width, height);
}
else
{
render_texture.TransitionToState(D3D12_RESOURCE_STATE_RENDER_TARGET);
cmdlist->ClearRenderTargetView(render_texture.GetRTVOrDSVDescriptor(), s_clear_color.data(), 0, nullptr);
cmdlist->OMSetRenderTargets(1, &render_texture.GetRTVOrDSVDescriptor().cpu_handle, FALSE, nullptr);
if (HasDisplayTexture())
{
RenderDisplay(cmdlist, draw_rect.left, draw_rect.top, draw_rect.GetWidth(), draw_rect.GetHeight(),
static_cast<D3D12::Texture*>(m_display_texture), m_display_texture_view_x, m_display_texture_view_y,
m_display_texture_view_width, m_display_texture_view_height, IsUsingLinearFiltering());
}
}
cmdlist->OMSetRenderTargets(0, nullptr, FALSE, nullptr);
render_texture.TransitionToState(D3D12_RESOURCE_STATE_COPY_SOURCE);
m_readback_staging_texture.CopyFromTexture(render_texture, 0, 0, 0, 0, 0, width, height);
const u32 stride = sizeof(u32) * width;
out_pixels->resize(width * height);
*out_stride = stride;
*out_format = hdformat;
return m_readback_staging_texture.ReadPixels(0, 0, width, height, out_pixels->data(), stride);
}
bool D3D12GPUDevice::SetGPUTimingEnabled(bool enabled)
{
g_d3d12_context->SetEnableGPUTiming(enabled);

View File

@ -63,8 +63,6 @@ public:
void SetVSync(bool enabled) override;
bool Render(bool skip_present) override;
bool RenderScreenshot(u32 width, u32 height, const Common::Rectangle<s32>& draw_rect, std::vector<u32>* out_pixels,
u32* out_stride, GPUTexture::Format* out_format) override;
bool SetGPUTimingEnabled(bool enabled) override;
float GetAndResetAccumulatedGPUTime() override;

View File

@ -3,11 +3,13 @@
#include "gpu_device.h"
#include "../settings.h"
#include "../shadergen.h"
#include "common/align.h"
#include "common/assert.h"
#include "common/file_system.h"
#include "common/hash_combine.h"
#include "common/log.h"
#include "common/path.h"
#include "common/string_util.h"
#include "common/timer.h"
#include "imgui.h"
@ -23,10 +25,50 @@ Log_SetChannel(GPUDevice);
// FIXME
#include "common/windows_headers.h"
#include "d3d_shaders.h"
// TODO: default sampler mode, create a persistent descriptor set in Vulkan for textures
// TODO: input layout => VAO in GL, buffer might change
// TODO: one big lookup table for render passes, or dynamic rendering
std::unique_ptr<GPUDevice> g_host_display;
GPUFramebuffer::GPUFramebuffer(GPUTexture* rt, GPUTexture* ds, u32 width, u32 height)
: m_rt(rt), m_ds(ds), m_width(width), m_height(height)
{
}
GPUFramebuffer::~GPUFramebuffer() = default;
GPUSampler::GPUSampler() = default;
GPUSampler::~GPUSampler() = default;
GPUShader::GPUShader(GPUShaderStage stage) : m_stage(stage)
{
}
GPUShader::~GPUShader() = default;
const char* GPUShader::GetStageName(GPUShaderStage stage)
{
switch (stage)
{
case GPUShaderStage::Vertex:
return "Vertex";
case GPUShaderStage::Fragment:
return "Fragment";
case GPUShaderStage::Compute:
return "Compute";
default:
UnreachableCode();
return "";
}
}
GPUPipeline::GPUPipeline() = default;
GPUPipeline::~GPUPipeline() = default;
size_t GPUPipeline::InputLayoutHash::operator()(const InputLayout& il) const
{
std::size_t h = 0;
@ -96,7 +138,11 @@ GPUPipeline::BlendState GPUPipeline::BlendState::GetAlphaBlendingState()
return ret;
}
GPUDevice::~GPUDevice() = default;
GPUDevice::~GPUDevice()
{
// TODO: move to Destroy() method
m_shader_cache.Close();
}
RenderAPI GPUDevice::GetPreferredAPI()
{
@ -107,84 +153,191 @@ RenderAPI GPUDevice::GetPreferredAPI()
#endif
}
bool GPUDevice::SetupDevice()
{
// TODO: option to disable shader cache
if (true)
{
const std::string basename = GetShaderCacheBaseName("shaders", g_settings.gpu_use_debug_device);
const std::string filename = Path::Combine(EmuFolders::Cache, basename);
if (!m_shader_cache.Open(filename.c_str()))
Log_WarningPrintf("Failed to open shader cache.");
}
else
{
Log_WarningPrintf("Shader cache is disabled.");
}
return true;
}
bool GPUDevice::CreateResources()
{
GPUSampler::Config config = {};
config.address_u = GPUSampler::AddressMode::ClampToEdge;
config.address_v = GPUSampler::AddressMode::ClampToEdge;
config.address_w = GPUSampler::AddressMode::ClampToEdge;
config.min_filter = GPUSampler::Filter::Nearest;
config.mag_filter = GPUSampler::Filter::Nearest;
if (!(m_point_sampler = CreateSampler(config)))
GPUSampler::Config spconfig = {};
spconfig.address_u = GPUSampler::AddressMode::ClampToEdge;
spconfig.address_v = GPUSampler::AddressMode::ClampToEdge;
spconfig.address_w = GPUSampler::AddressMode::ClampToEdge;
spconfig.min_filter = GPUSampler::Filter::Nearest;
spconfig.mag_filter = GPUSampler::Filter::Nearest;
if (!(m_point_sampler = CreateSampler(spconfig)))
return false;
config.min_filter = GPUSampler::Filter::Linear;
config.mag_filter = GPUSampler::Filter::Linear;
if (!(m_linear_sampler = CreateSampler(config)))
spconfig.min_filter = GPUSampler::Filter::Linear;
spconfig.mag_filter = GPUSampler::Filter::Linear;
if (!(m_linear_sampler = CreateSampler(spconfig)))
return false;
if (!CreateImGuiResources())
spconfig.mag_filter = GPUSampler::Filter::Nearest;
spconfig.mag_filter = GPUSampler::Filter::Nearest;
spconfig.address_u = GPUSampler::AddressMode::ClampToBorder;
spconfig.address_v = GPUSampler::AddressMode::ClampToBorder;
spconfig.border_color = 0xFF000000u;
if (!(m_border_sampler = CreateSampler(spconfig)))
return false;
ShaderGen shadergen(GetRenderAPI(), /*FIXME DSB*/ true);
GPUPipeline::GraphicsConfig plconfig;
plconfig.layout = GPUPipeline::Layout::SingleTexture;
plconfig.primitive = GPUPipeline::Primitive::Triangles;
plconfig.rasterization = GPUPipeline::RasterizationState::GetNoCullState();
plconfig.depth = GPUPipeline::DepthState::GetNoTestsState();
plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState();
plconfig.color_format = GPUTexture::Format::RGBA8; // FIXME m_window_info.surface_format;
plconfig.depth_format = GPUTexture::Format::Unknown;
plconfig.samples = 1;
plconfig.per_sample_shading = false;
std::unique_ptr<GPUShader> display_vs = CreateShader(GPUShaderStage::Vertex, shadergen.GenerateDisplayVertexShader());
std::unique_ptr<GPUShader> display_fs =
CreateShader(GPUShaderStage::Fragment, shadergen.GenerateDisplayFragmentShader(true));
std::unique_ptr<GPUShader> cursor_fs =
CreateShader(GPUShaderStage::Fragment, shadergen.GenerateDisplayFragmentShader(false));
if (!display_vs || !display_fs || !cursor_fs)
return false;
GL_OBJECT_NAME(display_vs, "Display Vertex Shader");
GL_OBJECT_NAME(display_fs, "Display Fragment Shader");
GL_OBJECT_NAME(cursor_fs, "Cursor Fragment Shader");
plconfig.vertex_shader = display_vs.get();
plconfig.pixel_shader = display_fs.get();
if (!(m_display_pipeline = CreatePipeline(plconfig)))
return false;
GL_OBJECT_NAME(m_display_pipeline, "Display Pipeline");
plconfig.blend = GPUPipeline::BlendState::GetAlphaBlendingState();
plconfig.pixel_shader = cursor_fs.get();
if (!(m_cursor_pipeline = CreatePipeline(plconfig)))
return false;
GL_OBJECT_NAME(m_cursor_pipeline, "Cursor Pipeline");
std::unique_ptr<GPUShader> imgui_vs = CreateShader(GPUShaderStage::Vertex, shadergen.GenerateImGuiVertexShader());
std::unique_ptr<GPUShader> imgui_fs = CreateShader(GPUShaderStage::Fragment, shadergen.GenerateImGuiFragmentShader());
if (!imgui_vs || !imgui_fs)
return false;
GL_OBJECT_NAME(imgui_vs, "ImGui Vertex Shader");
GL_OBJECT_NAME(imgui_fs, "ImGui Fragment Shader");
static constexpr GPUPipeline::VertexAttribute imgui_attributes[] = {
GPUPipeline::VertexAttribute::Make(0, GPUPipeline::VertexAttribute::Type::Float, 2, offsetof(ImDrawVert, pos)),
GPUPipeline::VertexAttribute::Make(1, GPUPipeline::VertexAttribute::Type::Float, 2, offsetof(ImDrawVert, uv)),
GPUPipeline::VertexAttribute::Make(2, GPUPipeline::VertexAttribute::Type::UNorm8, 4, offsetof(ImDrawVert, col)),
};
plconfig.input_layout.vertex_attributes = imgui_attributes;
plconfig.input_layout.vertex_stride = sizeof(ImDrawVert);
plconfig.vertex_shader = imgui_vs.get();
plconfig.pixel_shader = imgui_fs.get();
m_imgui_pipeline = CreatePipeline(plconfig);
if (!m_imgui_pipeline)
{
Log_ErrorPrintf("Failed to compile ImGui pipeline.");
return false;
}
GL_OBJECT_NAME(m_imgui_pipeline, "ImGui Pipeline");
return true;
}
void GPUDevice::DestroyResources()
{
DestroyImGuiResources();
m_cursor_texture.reset();
m_imgui_font_texture.reset();
m_imgui_pipeline.reset();
m_cursor_pipeline.reset();
m_display_pipeline.reset();
m_imgui_pipeline.reset();
m_linear_sampler.reset();
m_point_sampler.reset();
m_shader_cache.Close();
}
bool GPUDevice::CreateImGuiResources()
bool GPUDevice::SetPostProcessingChain(const std::string_view& config)
{
std::unique_ptr<GPUShader> imgui_vs = CreateShaderFromBinary(GPUShader::Stage::Vertex, s_imgui_vs_bytecode);
std::unique_ptr<GPUShader> imgui_ps = CreateShaderFromBinary(GPUShader::Stage::Pixel, s_imgui_ps_bytecode);
if (!imgui_vs || !imgui_ps)
{
Log_ErrorPrintf("Failed to create ImGui shaders.");
return false;
}
return false;
}
static constexpr GPUPipeline::VertexAttribute attributes[] = {
GPUPipeline::VertexAttribute::Make(GPUPipeline::VertexAttribute::Semantic::Position, 0,
GPUPipeline::VertexAttribute::Type::Float, 2, offsetof(ImDrawVert, pos)),
GPUPipeline::VertexAttribute::Make(GPUPipeline::VertexAttribute::Semantic::Texcoord, 0,
GPUPipeline::VertexAttribute::Type::Float, 2, offsetof(ImDrawVert, uv)),
GPUPipeline::VertexAttribute::Make(GPUPipeline::VertexAttribute::Semantic::Color, 0,
GPUPipeline::VertexAttribute::Type::UNorm8, 4, offsetof(ImDrawVert, col)),
std::string GPUDevice::GetShaderCacheBaseName(const std::string_view& type, bool debug) const
{
Panic("Not implemented");
return {};
}
void GPUDevice::RenderImGui()
{
GL_SCOPE("RenderImGui");
ImGui::Render();
const ImDrawData* draw_data = ImGui::GetDrawData();
if (draw_data->CmdListsCount == 0)
return;
SetPipeline(m_imgui_pipeline.get());
SetViewportAndScissor(0, 0, m_window_info.surface_width, m_window_info.surface_height);
const float L = 0.0f;
const float R = static_cast<float>(m_window_info.surface_width);
const float T = 0.0f;
const float B = static_cast<float>(m_window_info.surface_height);
const float ortho_projection[4][4] = {
{2.0f / (R - L), 0.0f, 0.0f, 0.0f},
{0.0f, 2.0f / (T - B), 0.0f, 0.0f},
{0.0f, 0.0f, 0.5f, 0.0f},
{(R + L) / (L - R), (T + B) / (B - T), 0.5f, 1.0f},
};
PushUniformBuffer(ortho_projection, sizeof(ortho_projection));
GPUPipeline::GraphicsConfig config;
config.layout = GPUPipeline::Layout::SingleTexture;
config.primitive = GPUPipeline::Primitive::Triangles;
config.input_layout.vertex_attributes = attributes;
config.input_layout.vertex_stride = sizeof(ImDrawVert);
config.vertex_shader = imgui_vs.get();
config.pixel_shader = imgui_ps.get();
config.rasterization = GPUPipeline::RasterizationState::GetNoCullState();
config.depth = GPUPipeline::DepthState::GetNoTestsState();
config.blend = GPUPipeline::BlendState::GetAlphaBlendingState();
config.color_format = GPUTexture::Format::RGBA8; // FIXME m_window_info.surface_format;
config.depth_format = GPUTexture::Format::Unknown;
config.samples = 1;
config.per_sample_shading = false;
m_imgui_pipeline = CreatePipeline(config);
if (!m_imgui_pipeline)
// Render command lists
for (int n = 0; n < draw_data->CmdListsCount; n++)
{
Log_ErrorPrintf("Failed to compile ImGui pipeline.");
return false;
const ImDrawList* cmd_list = draw_data->CmdLists[n];
static_assert(sizeof(ImDrawIdx) == sizeof(DrawIndex));
u32 base_vertex, base_index;
UploadVertexBuffer(cmd_list->VtxBuffer.Data, sizeof(ImDrawVert), cmd_list->VtxBuffer.Size, &base_vertex);
UploadIndexBuffer(cmd_list->IdxBuffer.Data, cmd_list->IdxBuffer.Size, &base_index);
for (int cmd_i = 0; cmd_i < cmd_list->CmdBuffer.Size; cmd_i++)
{
const ImDrawCmd* pcmd = &cmd_list->CmdBuffer[cmd_i];
DebugAssert(!pcmd->UserCallback);
if (pcmd->ClipRect.z <= pcmd->ClipRect.x || pcmd->ClipRect.w <= pcmd->ClipRect.x)
continue;
SetScissor(static_cast<s32>(pcmd->ClipRect.x), static_cast<s32>(pcmd->ClipRect.y),
static_cast<s32>(pcmd->ClipRect.z - pcmd->ClipRect.x),
static_cast<s32>(pcmd->ClipRect.w - pcmd->ClipRect.y));
SetTextureSampler(0, reinterpret_cast<GPUTexture*>(pcmd->TextureId), m_linear_sampler.get());
DrawIndexed(pcmd->ElemCount, base_index + pcmd->IdxOffset, base_vertex + pcmd->VtxOffset);
}
}
return true;
}
void GPUDevice::DestroyImGuiResources()
{
m_imgui_font_texture.reset();
}
void GPUDevice::MapVertexBuffer(u32 vertex_size, u32 vertex_count, void** map_ptr, u32* map_space, u32* map_base_vertex)
@ -193,13 +346,13 @@ void GPUDevice::MapVertexBuffer(u32 vertex_size, u32 vertex_count, void** map_pt
UnreachableCode();
}
void GPUDevice::UnmapVertexBuffer(u32 used_vertex_count)
void GPUDevice::UnmapVertexBuffer(u32 vertex_size, u32 vertex_count)
{
// TODO: REMOVE ME
UnreachableCode();
}
void GPUDevice::MapIndexBuffer(u32 index_count, u16** map_ptr, u32* map_space, u32* map_base_index)
void GPUDevice::MapIndexBuffer(u32 index_count, DrawIndex** map_ptr, u32* map_space, u32* map_base_index)
{
// TODO: REMOVE ME
UnreachableCode();
@ -217,7 +370,7 @@ void GPUDevice::UploadVertexBuffer(const void* vertices, u32 vertex_size, u32 ve
u32 space;
MapVertexBuffer(vertex_size, vertex_count, &map, &space, base_vertex);
std::memcpy(map, vertices, vertex_size * vertex_count);
UnmapVertexBuffer(vertex_count);
UnmapVertexBuffer(vertex_size, vertex_count);
}
void GPUDevice::UploadIndexBuffer(const u16* indices, u32 index_count, u32* base_index)
@ -253,22 +406,23 @@ void GPUDevice::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sam
UnreachableCode();
}
void GPUDevice::SetViewport(u32 x, u32 y, u32 width, u32 height)
void GPUDevice::SetViewport(s32 x, s32 y, s32 width, s32 height)
{
// TODO: REMOVE ME
// GL needs to invert if writing to the window framebuffer
UnreachableCode();
}
void GPUDevice::SetScissor(s32 x, s32 y, s32 width, s32 height)
{
// TODO: REMOVE ME
UnreachableCode();
}
void GPUDevice::SetScissor(u32 x, u32 y, u32 width, u32 height)
void GPUDevice::SetViewportAndScissor(s32 x, s32 y, s32 width, s32 height)
{
// TODO: REMOVE ME
UnreachableCode();
}
void GPUDevice::SetViewportAndScissor(u32 x, u32 y, u32 width, u32 height)
{
// TODO: REMOVE ME
UnreachableCode();
SetViewport(x, y, width, height);
SetScissor(x, y, width, height);
}
void GPUDevice::Draw(u32 base_vertex, u32 vertex_count)
@ -298,14 +452,29 @@ void GPUDevice::ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32
UnreachableCode();
}
std::unique_ptr<GPUShader> GPUDevice::CreateShaderFromBinary(GPUShader::Stage stage, gsl::span<const u8> data)
void GPUDevice::ClearRenderTarget(GPUTexture* t, u32 c)
{
t->SetClearColor(c);
}
void GPUDevice::ClearDepth(GPUTexture* t, float d)
{
t->SetClearDepth(d);
}
void GPUDevice::InvalidateRenderTarget(GPUTexture* t)
{
t->SetState(GPUTexture::State::Invalidated);
}
std::unique_ptr<GPUShader> GPUDevice::CreateShaderFromBinary(GPUShaderStage stage, gsl::span<const u8> data)
{
// TODO: REMOVE ME
UnreachableCode();
return {};
}
std::unique_ptr<GPUShader> GPUDevice::CreateShaderFromSource(GPUShader::Stage stage, const std::string_view& source,
std::unique_ptr<GPUShader> GPUDevice::CreateShaderFromSource(GPUShaderStage stage, const std::string_view& source,
std::vector<u8>* out_binary /* = nullptr */)
{
// TODO: REMOVE ME
@ -320,6 +489,18 @@ std::unique_ptr<GPUPipeline> GPUDevice::CreatePipeline(const GPUPipeline::Graphi
return {};
}
void GPUDevice::PushDebugGroup(const char* fmt, ...)
{
}
void GPUDevice::PopDebugGroup()
{
}
void GPUDevice::InsertDebugMessage(const char* fmt, ...)
{
}
std::unique_ptr<GPUSampler> GPUDevice::CreateSampler(const GPUSampler::Config& config)
{
// TODO: REMOVE ME
@ -335,6 +516,42 @@ std::unique_ptr<GPUFramebuffer> GPUDevice::CreateFramebuffer(GPUTexture* rt, u32
return {};
}
std::unique_ptr<GPUShader> GPUDevice::CreateShader(GPUShaderStage stage, const std::string_view& source)
{
std::unique_ptr<GPUShader> shader;
if (!m_shader_cache.IsOpen())
{
shader = CreateShaderFromSource(stage, source);
return shader;
}
const GPUShaderCache::CacheIndexKey key = m_shader_cache.GetCacheKey(stage, source, "main");
std::vector<u8> binary;
if (m_shader_cache.Lookup(key, &binary))
{
shader = CreateShaderFromBinary(stage, binary);
if (shader)
return shader;
Log_ErrorPrintf("Failed to create shader from binary (driver changed?). Clearing cache.");
m_shader_cache.Clear();
}
binary.clear();
shader = CreateShaderFromSource(stage, source, &binary);
if (!shader)
return shader;
// Don't insert empty shaders into the cache...
if (!binary.empty())
{
if (!m_shader_cache.Insert(key, binary.data(), static_cast<u32>(binary.size())))
m_shader_cache.Close();
}
return shader;
}
bool GPUDevice::ParseFullscreenMode(const std::string_view& mode, u32* width, u32* height, float* refresh_rate)
{
if (!mode.empty())
@ -457,6 +674,50 @@ void GPUDevice::ThrottlePresentation()
Common::Timer::SleepUntil(m_last_frame_displayed_time, false);
}
void GPUDevice::ClearDisplayTexture()
{
m_display_texture = nullptr;
m_display_texture_view_x = 0;
m_display_texture_view_y = 0;
m_display_texture_view_width = 0;
m_display_texture_view_height = 0;
m_display_changed = true;
}
void GPUDevice::SetDisplayTexture(GPUTexture* texture, s32 view_x, s32 view_y, s32 view_width, s32 view_height)
{
DebugAssert(texture);
texture->MakeReadyForSampling();
m_display_texture = texture;
m_display_texture_view_x = view_x;
m_display_texture_view_y = view_y;
m_display_texture_view_width = view_width;
m_display_texture_view_height = view_height;
m_display_changed = true;
}
void GPUDevice::SetDisplayTextureRect(s32 view_x, s32 view_y, s32 view_width, s32 view_height)
{
m_display_texture_view_x = view_x;
m_display_texture_view_y = view_y;
m_display_texture_view_width = view_width;
m_display_texture_view_height = view_height;
m_display_changed = true;
}
void GPUDevice::SetDisplayParameters(s32 display_width, s32 display_height, s32 active_left, s32 active_top,
s32 active_width, s32 active_height, float display_aspect_ratio)
{
m_display_width = display_width;
m_display_height = display_height;
m_display_active_left = active_left;
m_display_active_top = active_top;
m_display_active_width = active_width;
m_display_active_height = active_height;
m_display_aspect_ratio = display_aspect_ratio;
m_display_changed = true;
}
bool GPUDevice::GetHostRefreshRate(float* refresh_rate)
{
if (m_window_info.surface_refresh_rate > 0.0f)
@ -480,6 +741,9 @@ float GPUDevice::GetAndResetAccumulatedGPUTime()
void GPUDevice::SetSoftwareCursor(std::unique_ptr<GPUTexture> texture, float scale /*= 1.0f*/)
{
if (texture)
texture->MakeReadyForSampling();
m_cursor_texture = std::move(texture);
m_cursor_texture_scale = scale;
}
@ -535,6 +799,119 @@ bool GPUDevice::IsUsingLinearFiltering() const
return g_settings.display_linear_filtering;
}
bool GPUDevice::RenderScreenshot(u32 width, u32 height, const Common::Rectangle<s32>& draw_rect,
std::vector<u32>* out_pixels, u32* out_stride, GPUTexture::Format* out_format)
{
static constexpr GPUTexture::Format hdformat = GPUTexture::Format::RGBA8; // TODO FIXME m_window_info.surface_format
std::unique_ptr<GPUTexture> render_texture =
CreateTexture(width, height, 1, 1, 1, GPUTexture::Type::RenderTarget, hdformat);
if (!render_texture)
return false;
std::unique_ptr<GPUFramebuffer> render_fb = CreateFramebuffer(render_texture.get());
if (!render_fb)
return false;
ClearRenderTarget(render_texture.get(), 0);
SetFramebuffer(render_fb.get());
if (HasDisplayTexture())
{
#if 0
if (!m_post_processing_chain.IsEmpty())
{
ApplyPostProcessingChain(render_texture.GetD3DRTV(), draw_rect.left, draw_rect.top, draw_rect.GetWidth(),
draw_rect.GetHeight(), static_cast<D3D11Texture*>(m_display_texture),
m_display_texture_view_x, m_display_texture_view_y, m_display_texture_view_width,
m_display_texture_view_height, width, height);
}
else
#endif
{
RenderDisplay(draw_rect.left, draw_rect.top, draw_rect.GetWidth(), draw_rect.GetHeight(), m_display_texture,
m_display_texture_view_x, m_display_texture_view_y, m_display_texture_view_width,
m_display_texture_view_height, IsUsingLinearFiltering());
}
}
SetFramebuffer(nullptr);
const u32 stride = GPUTexture::GetPixelSize(hdformat) * width;
out_pixels->resize(width * height);
if (!DownloadTexture(render_texture.get(), 0, 0, width, height, out_pixels->data(), stride))
return false;
*out_stride = stride;
*out_format = hdformat;
return true;
}
void GPUDevice::RenderDisplay()
{
const auto [left, top, width, height] = CalculateDrawRect(GetWindowWidth(), GetWindowHeight());
GL_SCOPE("RenderDisplay: %dx%d at %d,%d", left, top, width, height);
#if 0
if (HasDisplayTexture() && !m_post_processing_chain.IsEmpty())
{
ApplyPostProcessingChain(m_swap_chain_rtv.Get(), left, top, width, height,
static_cast<D3D11Texture*>(m_display_texture), m_display_texture_view_x,
m_display_texture_view_y, m_display_texture_view_width, m_display_texture_view_height,
GetWindowWidth(), GetWindowHeight());
return;
}
#endif
if (!HasDisplayTexture())
return;
RenderDisplay(left, top, width, height, m_display_texture, m_display_texture_view_x, m_display_texture_view_y,
m_display_texture_view_width, m_display_texture_view_height, IsUsingLinearFiltering());
}
void GPUDevice::RenderDisplay(s32 left, s32 top, s32 width, s32 height, GPUTexture* texture, s32 texture_view_x,
s32 texture_view_y, s32 texture_view_width, s32 texture_view_height, bool linear_filter)
{
SetPipeline(m_display_pipeline.get());
SetTextureSampler(0, texture, linear_filter ? m_linear_sampler.get() : m_point_sampler.get());
const bool linear = IsUsingLinearFiltering();
const float position_adjust = linear ? 0.5f : 0.0f;
const float size_adjust = linear ? 1.0f : 0.0f;
const float uniforms[4] = {
(static_cast<float>(texture_view_x) + position_adjust) / static_cast<float>(texture->GetWidth()),
(static_cast<float>(texture_view_y) + position_adjust) / static_cast<float>(texture->GetHeight()),
(static_cast<float>(texture_view_width) - size_adjust) / static_cast<float>(texture->GetWidth()),
(static_cast<float>(texture_view_height) - size_adjust) / static_cast<float>(texture->GetHeight())};
PushUniformBuffer(uniforms, sizeof(uniforms));
SetViewportAndScissor(left, top, width, height);
Draw(3, 0);
}
void GPUDevice::RenderSoftwareCursor()
{
if (!HasSoftwareCursor())
return;
const auto [left, top, width, height] = CalculateSoftwareCursorDrawRect();
RenderSoftwareCursor(left, top, width, height, m_cursor_texture.get());
}
void GPUDevice::RenderSoftwareCursor(s32 left, s32 top, s32 width, s32 height, GPUTexture* texture)
{
SetPipeline(m_display_pipeline.get());
SetTextureSampler(0, texture, m_linear_sampler.get());
const float uniforms[4] = {0.0f, 0.0f, 1.0f, 1.0f};
PushUniformBuffer(uniforms, sizeof(uniforms));
SetViewportAndScissor(left, top, width, height);
Draw(3, 0);
}
void GPUDevice::CalculateDrawRect(s32 window_width, s32 window_height, float* out_left, float* out_top,
float* out_width, float* out_height, float* out_left_padding, float* out_top_padding,
float* out_scale, float* out_x_scale, bool apply_aspect_ratio /* = true */) const

View File

@ -3,6 +3,7 @@
#pragma once
#include "gpu_shader_cache.h"
#include "gpu_texture.h"
#include "common/bitfield.h"
@ -31,10 +32,22 @@ enum class RenderAPI : u32
class GPUFramebuffer
{
public:
GPUFramebuffer() = default;
virtual ~GPUFramebuffer() = default;
GPUFramebuffer(GPUTexture* rt, GPUTexture* ds, u32 width, u32 height);
virtual ~GPUFramebuffer();
ALWAYS_INLINE GPUTexture* GetRT() const { return m_rt; }
ALWAYS_INLINE GPUTexture* GetDS() const { return m_ds; }
ALWAYS_INLINE u32 GetWidth() const { return m_width; }
ALWAYS_INLINE u32 GetHeight() const { return m_height; }
virtual void SetDebugName(const std::string_view& name) = 0;
protected:
GPUTexture* m_rt;
GPUTexture* m_ds;
u32 m_width;
u32 m_height;
};
class GPUSampler
@ -57,7 +70,7 @@ public:
MaxCount
};
struct Config
union Config
{
BitField<u64, Filter, 0, 1> min_filter;
BitField<u64, Filter, 1, 1> mag_filter;
@ -72,31 +85,33 @@ public:
u64 key;
};
GPUSampler() = default;
virtual ~GPUSampler() = default;
GPUSampler();
virtual ~GPUSampler();
virtual void SetDebugName(const std::string_view& name) = 0;
};
enum class GPUShaderStage : u8
{
Vertex,
Fragment,
Compute
};
class GPUShader
{
public:
enum class Stage
{
Vertex,
Pixel,
Compute
};
GPUShader(GPUShaderStage stage);
virtual ~GPUShader();
GPUShader(Stage stage) : m_stage(stage) {}
virtual ~GPUShader() = default;
static const char* GetStageName(GPUShaderStage stage);
ALWAYS_INLINE Stage GetStage() const { return m_stage; }
ALWAYS_INLINE GPUShaderStage GetStage() const { return m_stage; }
virtual void SetDebugName(const std::string_view& name) = 0;
protected:
Stage m_stage;
GPUShaderStage m_stage;
};
class GPUPipeline
@ -107,6 +122,9 @@ public:
// 128 byte UBO via push constants, 1 texture.
SingleTexture,
// 1 streamed UBO, 1 texture in PS.
HWBatch,
MaxCount
};
@ -122,14 +140,7 @@ public:
union VertexAttribute
{
enum class Semantic : u8
{
Position,
Texcoord,
Color,
MaxCount
};
static constexpr u32 MaxAttributes = 16;
enum class Type : u8
{
@ -146,11 +157,10 @@ public:
MaxCount
};
BitField<u32, Semantic, 0, 3> semantic;
BitField<u32, u8, 4, 8> semantic_index;
BitField<u32, Type, 12, 4> type;
BitField<u32, u8, 16, 3> components;
BitField<u32, u8, 19, 8> offset;
BitField<u32, u8, 0, 4> index;
BitField<u32, Type, 4, 4> type;
BitField<u32, u8, 8, 3> components;
BitField<u32, u16, 16, 16> offset;
u32 key;
// clang-format off
@ -160,20 +170,18 @@ public:
ALWAYS_INLINE bool operator<(const VertexAttribute& rhs) const { return key < rhs.key; }
// clang-format on
static constexpr VertexAttribute Make(Semantic semantic, u8 semantic_index, Type type, u8 components, u8 offset)
static constexpr VertexAttribute Make(u8 index, Type type, u8 components, u8 offset)
{
VertexAttribute ret = {};
#if 0
ret.semantic = semantic;
ret.semantic_index = semantic_index;
ret.index = index;
ret.type = type;
ret.components = components;
ret.offset = offset;
#else
// Nasty :/ can't access an inactive element of a union here..
ret.key = (static_cast<u32>(semantic) & 0x7) | ((static_cast<u32>(semantic_index) & 0xff) << 4) |
((static_cast<u32>(type) & 0xf) << 12) | ((static_cast<u32>(components) & 0x7) << 16) |
((static_cast<u32>(offset) & 0xff) << 19);
ret.key = (static_cast<u32>(index) & 0xf) | ((static_cast<u32>(type) & 0xf) << 4) |
((static_cast<u32>(components) & 0x7) << 8) | ((static_cast<u32>(offset) & 0xffff) << 16);
#endif
return ret;
}
@ -229,6 +237,8 @@ public:
InvSrcAlpha1,
DstAlpha,
InvDstAlpha,
ConstantColor,
InvConstantColor,
MaxCount
};
@ -259,7 +269,7 @@ public:
static RasterizationState GetNoCullState();
};
struct DepthState
union DepthState
{
BitField<u8, DepthFunc, 0, 3> depth_test;
BitField<u8, bool, 4, 1> depth_write;
@ -276,21 +286,22 @@ public:
static DepthState GetAlwaysWriteState();
};
struct BlendState
union BlendState
{
BitField<u32, bool, 0, 1> enable;
BitField<u32, BlendFunc, 1, 4> src_blend;
BitField<u32, BlendFunc, 5, 4> src_alpha_blend;
BitField<u32, BlendFunc, 9, 4> dst_blend;
BitField<u32, BlendFunc, 13, 4> dst_alpha_blend;
BitField<u32, BlendOp, 17, 3> blend_op;
BitField<u32, BlendOp, 20, 3> alpha_blend_op;
BitField<u32, bool, 24, 1> write_r;
BitField<u32, bool, 25, 1> write_g;
BitField<u32, bool, 26, 1> write_b;
BitField<u32, bool, 27, 1> write_a;
BitField<u32, u8, 24, 4> write_mask;
u32 key;
BitField<u64, bool, 0, 1> enable;
BitField<u64, BlendFunc, 1, 4> src_blend;
BitField<u64, BlendFunc, 5, 4> src_alpha_blend;
BitField<u64, BlendFunc, 9, 4> dst_blend;
BitField<u64, BlendFunc, 13, 4> dst_alpha_blend;
BitField<u64, BlendOp, 17, 3> blend_op;
BitField<u64, BlendOp, 20, 3> alpha_blend_op;
BitField<u64, bool, 24, 1> write_r;
BitField<u64, bool, 25, 1> write_g;
BitField<u64, bool, 26, 1> write_b;
BitField<u64, bool, 27, 1> write_a;
BitField<u64, u8, 24, 4> write_mask;
BitField<u64, u32, 32, 32> constant;
u64 key;
// clang-format off
ALWAYS_INLINE BlendState& operator=(const BlendState& rhs) { key = rhs.key; return *this; }
@ -323,8 +334,8 @@ public:
bool per_sample_shading;
};
GPUPipeline() = default;
virtual ~GPUPipeline() = default;
GPUPipeline();
virtual ~GPUPipeline();
virtual void SetDebugName(const std::string_view& name) = 0;
};
@ -332,6 +343,9 @@ public:
class GPUDevice
{
public:
// TODO: drop virtuals
using DrawIndex = u16;
struct AdapterAndModeList
{
std::vector<std::string> adapter_names;
@ -354,6 +368,9 @@ public:
ALWAYS_INLINE s32 GetWindowHeight() const { return static_cast<s32>(m_window_info.surface_height); }
ALWAYS_INLINE float GetWindowScale() const { return m_window_info.surface_scale; }
ALWAYS_INLINE GPUSampler* GetLinearSampler() const { return m_linear_sampler.get(); }
ALWAYS_INLINE GPUSampler* GetPointSampler() const { return m_point_sampler.get(); }
// Position is relative to the top-left corner of the window.
ALWAYS_INLINE s32 GetMousePositionX() const { return m_mouse_position_x; }
ALWAYS_INLINE s32 GetMousePositionY() const { return m_mouse_position_y; }
@ -377,7 +394,7 @@ public:
virtual bool HasSurface() const = 0;
virtual bool CreateDevice(const WindowInfo& wi, bool vsync) = 0;
virtual bool SetupDevice() = 0;
virtual bool SetupDevice();
virtual bool MakeCurrent() = 0;
virtual bool DoneCurrent() = 0;
virtual void DestroySurface() = 0;
@ -389,35 +406,13 @@ public:
virtual bool CreateResources();
virtual void DestroyResources();
virtual bool SetPostProcessingChain(const std::string_view& config) = 0;
virtual bool SetPostProcessingChain(const std::string_view& config);
virtual std::string GetShaderCacheBaseName(const std::string_view& type, bool debug) const;
/// Call when the window size changes externally to recreate any resources.
virtual void ResizeWindow(s32 new_window_width, s32 new_window_height) = 0;
/// Vertex/index buffer abstraction.
virtual void MapVertexBuffer(u32 vertex_size, u32 vertex_count, void** map_ptr, u32* map_space, u32* map_base_vertex);
virtual void UnmapVertexBuffer(u32 used_vertex_count);
virtual void MapIndexBuffer(u32 index_count, u16** map_ptr, u32* map_space, u32* map_base_index);
virtual void UnmapIndexBuffer(u32 used_index_count);
void UploadVertexBuffer(const void* vertices, u32 vertex_size, u32 vertex_count, u32* base_vertex);
void UploadIndexBuffer(const u16* indices, u32 index_count, u32* base_index);
/// Uniform buffer abstraction.
virtual void PushUniformBuffer(const void* data, u32 data_size);
/// Drawing setup abstraction.
virtual void SetFramebuffer(GPUFramebuffer* fb);
virtual void SetPipeline(GPUPipeline* pipeline);
virtual void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler);
virtual void SetViewport(u32 x, u32 y, u32 width, u32 height);
virtual void SetScissor(u32 x, u32 y, u32 width, u32 height);
void SetViewportAndScissor(u32 x, u32 y, u32 width, u32 height);
// Drawing abstraction.
virtual void Draw(u32 base_vertex, u32 vertex_count);
virtual void DrawIndexed(u32 base_index, u32 index_count, u32 base_vertex);
/// Creates an abstracted RGBA8 texture. If dynamic, the texture can be updated with UpdateTexture() below.
virtual std::unique_ptr<GPUTexture> CreateTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples,
GPUTexture::Type type, GPUTexture::Format format,
@ -432,23 +427,55 @@ public:
virtual void ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level,
GPUTexture* src, u32 src_x, u32 src_y, u32 src_layer, u32 src_level, u32 width,
u32 height);
void ClearRenderTarget(GPUTexture* t, u32 c);
void ClearDepth(GPUTexture* t, float d);
void InvalidateRenderTarget(GPUTexture* t);
/// Framebuffer abstraction.
virtual std::unique_ptr<GPUFramebuffer> CreateFramebuffer(GPUTexture* rt, u32 rt_layer, u32 rt_level, GPUTexture* ds,
u32 ds_layer, u32 ds_level);
virtual std::unique_ptr<GPUFramebuffer> CreateFramebuffer(GPUTexture* rt = nullptr, u32 rt_layer = 0,
u32 rt_level = 0, GPUTexture* ds = nullptr,
u32 ds_layer = 0, u32 ds_level = 0);
/// Shader abstraction.
virtual std::unique_ptr<GPUShader> CreateShaderFromBinary(GPUShader::Stage stage, gsl::span<const u8> data);
virtual std::unique_ptr<GPUShader> CreateShaderFromSource(GPUShader::Stage stage, const std::string_view& source,
std::vector<u8>* out_binary = nullptr);
// TODO: entry point?
std::unique_ptr<GPUShader> CreateShader(GPUShaderStage stage, const std::string_view& source);
virtual std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::GraphicsConfig& config);
/// Debug messaging.
virtual void PushDebugGroup(const char* fmt, ...);
virtual void PopDebugGroup();
virtual void InsertDebugMessage(const char* fmt, ...);
/// Vertex/index buffer abstraction.
virtual void MapVertexBuffer(u32 vertex_size, u32 vertex_count, void** map_ptr, u32* map_space, u32* map_base_vertex);
virtual void UnmapVertexBuffer(u32 vertex_size, u32 vertex_count);
virtual void MapIndexBuffer(u32 index_count, DrawIndex** map_ptr, u32* map_space, u32* map_base_index);
virtual void UnmapIndexBuffer(u32 used_size);
void UploadVertexBuffer(const void* vertices, u32 vertex_size, u32 vertex_count, u32* base_vertex);
void UploadIndexBuffer(const DrawIndex* indices, u32 index_count, u32* base_index);
/// Uniform buffer abstraction.
virtual void PushUniformBuffer(const void* data, u32 data_size);
/// Drawing setup abstraction.
virtual void SetFramebuffer(GPUFramebuffer* fb);
virtual void SetPipeline(GPUPipeline* pipeline);
virtual void SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* sampler);
virtual void SetViewport(s32 x, s32 y, s32 width, s32 height);
virtual void SetScissor(s32 x, s32 y, s32 width, s32 height);
void SetViewportAndScissor(s32 x, s32 y, s32 width, s32 height);
// Drawing abstraction.
virtual void Draw(u32 vertex_count, u32 base_vertex);
virtual void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex);
/// Returns false if the window was completely occluded.
virtual bool Render(bool skip_present) = 0;
/// Renders the display with postprocessing to the specified image.
virtual bool RenderScreenshot(u32 width, u32 height, const Common::Rectangle<s32>& draw_rect,
std::vector<u32>* out_pixels, u32* out_stride, GPUTexture::Format* out_format) = 0;
bool RenderScreenshot(u32 width, u32 height, const Common::Rectangle<s32>& draw_rect, std::vector<u32>* out_pixels,
u32* out_stride, GPUTexture::Format* out_format);
ALWAYS_INLINE bool IsVsyncEnabled() const { return m_vsync_enabled; }
virtual void SetVSync(bool enabled) = 0;
@ -459,47 +486,11 @@ public:
bool ShouldSkipDisplayingFrame();
void ThrottlePresentation();
void ClearDisplayTexture()
{
m_display_texture = nullptr;
m_display_texture_view_x = 0;
m_display_texture_view_y = 0;
m_display_texture_view_width = 0;
m_display_texture_view_height = 0;
m_display_changed = true;
}
void SetDisplayTexture(GPUTexture* texture, s32 view_x, s32 view_y, s32 view_width, s32 view_height)
{
m_display_texture = texture;
m_display_texture_view_x = view_x;
m_display_texture_view_y = view_y;
m_display_texture_view_width = view_width;
m_display_texture_view_height = view_height;
m_display_changed = true;
}
void SetDisplayTextureRect(s32 view_x, s32 view_y, s32 view_width, s32 view_height)
{
m_display_texture_view_x = view_x;
m_display_texture_view_y = view_y;
m_display_texture_view_width = view_width;
m_display_texture_view_height = view_height;
m_display_changed = true;
}
void ClearDisplayTexture();
void SetDisplayTexture(GPUTexture* texture, s32 view_x, s32 view_y, s32 view_width, s32 view_height);
void SetDisplayTextureRect(s32 view_x, s32 view_y, s32 view_width, s32 view_height);
void SetDisplayParameters(s32 display_width, s32 display_height, s32 active_left, s32 active_top, s32 active_width,
s32 active_height, float display_aspect_ratio)
{
m_display_width = display_width;
m_display_height = display_height;
m_display_active_left = active_left;
m_display_active_top = active_top;
m_display_active_width = active_width;
m_display_active_height = active_height;
m_display_aspect_ratio = display_aspect_ratio;
m_display_changed = true;
}
s32 active_height, float display_aspect_ratio);
virtual bool SupportsTextureFormat(GPUTexture::Format format) const = 0;
@ -548,6 +539,10 @@ public:
bool WriteScreenshotToFile(std::string filename, bool internal_resolution = false, bool compress_on_thread = false);
protected:
virtual std::unique_ptr<GPUShader> CreateShaderFromBinary(GPUShaderStage stage, gsl::span<const u8> data);
virtual std::unique_ptr<GPUShader> CreateShaderFromSource(GPUShaderStage stage, const std::string_view& source,
std::vector<u8>* out_binary = nullptr);
ALWAYS_INLINE bool HasSoftwareCursor() const { return static_cast<bool>(m_cursor_texture); }
ALWAYS_INLINE bool HasDisplayTexture() const { return (m_display_texture != nullptr); }
@ -560,13 +555,22 @@ protected:
std::tuple<s32, s32, s32, s32> CalculateSoftwareCursorDrawRect() const;
std::tuple<s32, s32, s32, s32> CalculateSoftwareCursorDrawRect(s32 cursor_x, s32 cursor_y) const;
bool CreateImGuiResources();
void DestroyImGuiResources();
void RenderImGui();
void RenderDisplay();
void RenderSoftwareCursor();
void RenderDisplay(s32 left, s32 top, s32 width, s32 height, GPUTexture* texture, s32 texture_view_x,
s32 texture_view_y, s32 texture_view_width, s32 texture_view_height, bool linear_filter);
void RenderSoftwareCursor(s32 left, s32 top, s32 width, s32 height, GPUTexture* texture);
WindowInfo m_window_info;
GPUShaderCache m_shader_cache;
std::unique_ptr<GPUSampler> m_point_sampler;
std::unique_ptr<GPUSampler> m_linear_sampler;
std::unique_ptr<GPUSampler> m_border_sampler;
u64 m_last_frame_displayed_time = 0;
@ -582,6 +586,7 @@ protected:
float m_display_aspect_ratio = 1.0f;
float m_display_frame_interval = 0.0f;
std::unique_ptr<GPUPipeline> m_display_pipeline;
GPUTexture* m_display_texture = nullptr;
s32 m_display_texture_view_x = 0;
s32 m_display_texture_view_y = 0;
@ -591,6 +596,7 @@ protected:
std::unique_ptr<GPUPipeline> m_imgui_pipeline;
std::unique_ptr<GPUTexture> m_imgui_font_texture;
std::unique_ptr<GPUPipeline> m_cursor_pipeline;
std::unique_ptr<GPUTexture> m_cursor_texture;
float m_cursor_texture_scale = 1.0f;
@ -622,3 +628,24 @@ void ReleaseHostDisplay();
void RenderDisplay(bool skip_present);
void InvalidateDisplay();
} // namespace Host
// Macros for debug messages.
#ifdef _DEBUG
struct GLAutoPop
{
GLAutoPop(int dummy) {}
~GLAutoPop() { g_host_display->PopDebugGroup(); }
};
#define GL_SCOPE(...) GLAutoPop gl_auto_pop((g_host_display->PushDebugGroup(__VA_ARGS__), 0))
#define GL_PUSH(...) g_host_display->PushDebugGroup(__VA_ARGS__)
#define GL_POP() g_host_display->PopDebugGroup()
#define GL_INS(...) g_host_display->InsertDebugMessage(__VA_ARGS__)
#define GL_OBJECT_NAME(obj, ...) (obj)->SetDebugName(StringUtil::StdStringFromFormat(__VA_ARGS__))
#else
#define GL_SCOPE(...) (void)0
#define GL_PUSH(...) (void)0
#define GL_POP() (void)0
#define GL_INS(...) (void)0
#define GL_OBJECT_NAME(...) (void)0
#endif

View File

@ -0,0 +1,290 @@
// SPDX-FileCopyrightText: 2023 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
#include "gpu_shader_cache.h"
#include "../shader_cache_version.h"
#include "gpu_device.h"
#include "common/file_system.h"
#include "common/log.h"
#include "common/md5_digest.h"
#include "fmt/format.h"
Log_SetChannel(GPUShaderCache);
#pragma pack(push, 1)
struct CacheIndexEntry
{
u32 shader_type;
u32 source_length;
u64 source_hash_low;
u64 source_hash_high;
u64 entry_point_low;
u64 entry_point_high;
u32 file_offset;
u32 blob_size;
};
#pragma pack(pop)
GPUShaderCache::GPUShaderCache() = default;
GPUShaderCache::~GPUShaderCache()
{
Close();
}
bool GPUShaderCache::CacheIndexKey::operator==(const CacheIndexKey& key) const
{
return (source_hash_low == key.source_hash_low && source_hash_high == key.source_hash_high &&
entry_point_low == key.entry_point_low && entry_point_high == key.entry_point_high &&
shader_type == key.shader_type && source_length == key.source_length);
}
bool GPUShaderCache::CacheIndexKey::operator!=(const CacheIndexKey& key) const
{
return (source_hash_low != key.source_hash_low || source_hash_high != key.source_hash_high ||
entry_point_low != key.entry_point_low || entry_point_high != key.entry_point_high ||
shader_type != key.shader_type || source_length != key.source_length);
}
bool GPUShaderCache::Open(const std::string_view& base_filename)
{
if (base_filename.empty())
return false;
m_base_filename = base_filename;
const std::string index_filename = fmt::format("{}.idx", m_base_filename);
const std::string blob_filename = fmt::format("{}.bin", m_base_filename);
if (!ReadExisting(index_filename, blob_filename))
return CreateNew(index_filename, blob_filename);
return true;
}
void GPUShaderCache::Close()
{
if (m_index_file)
{
std::fclose(m_index_file);
m_index_file = nullptr;
}
if (m_blob_file)
{
std::fclose(m_blob_file);
m_blob_file = nullptr;
}
}
void GPUShaderCache::Clear()
{
if (!IsOpen())
return;
Close();
Log_WarningPrintf("Clearing shader cache at %s.", m_base_filename.c_str());
const std::string index_filename = fmt::format("{}.idx", m_base_filename);
const std::string blob_filename = fmt::format("{}.bin", m_base_filename);
CreateNew(index_filename, blob_filename);
}
bool GPUShaderCache::CreateNew(const std::string& index_filename, const std::string& blob_filename)
{
if (FileSystem::FileExists(index_filename.c_str()))
{
Log_WarningPrintf("Removing existing index file '%s'", index_filename.c_str());
FileSystem::DeleteFile(index_filename.c_str());
}
if (FileSystem::FileExists(blob_filename.c_str()))
{
Log_WarningPrintf("Removing existing blob file '%s'", blob_filename.c_str());
FileSystem::DeleteFile(blob_filename.c_str());
}
m_index_file = FileSystem::OpenCFile(index_filename.c_str(), "wb");
if (!m_index_file)
{
Log_ErrorPrintf("Failed to open index file '%s' for writing", index_filename.c_str());
return false;
}
const u32 file_version = SHADER_CACHE_VERSION;
if (std::fwrite(&file_version, sizeof(file_version), 1, m_index_file) != 1)
{
Log_ErrorPrintf("Failed to write version to index file '%s'", index_filename.c_str());
std::fclose(m_index_file);
m_index_file = nullptr;
FileSystem::DeleteFile(index_filename.c_str());
return false;
}
m_blob_file = FileSystem::OpenCFile(blob_filename.c_str(), "w+b");
if (!m_blob_file)
{
Log_ErrorPrintf("Failed to open blob file '%s' for writing", blob_filename.c_str());
std::fclose(m_index_file);
m_index_file = nullptr;
FileSystem::DeleteFile(index_filename.c_str());
return false;
}
return true;
}
bool GPUShaderCache::ReadExisting(const std::string& index_filename, const std::string& blob_filename)
{
m_index_file = FileSystem::OpenCFile(index_filename.c_str(), "r+b");
if (!m_index_file)
{
// special case here: when there's a sharing violation (i.e. two instances running),
// we don't want to blow away the cache. so just continue without a cache.
if (errno == EACCES)
{
Log_WarningPrintf("Failed to open shader cache index with EACCES, are you running two instances?");
return true;
}
return false;
}
u32 file_version = 0;
if (std::fread(&file_version, sizeof(file_version), 1, m_index_file) != 1 || file_version != SHADER_CACHE_VERSION)
{
Log_ErrorPrintf("Bad file/data version in '%s'", index_filename.c_str());
std::fclose(m_index_file);
m_index_file = nullptr;
return false;
}
m_blob_file = FileSystem::OpenCFile(blob_filename.c_str(), "a+b");
if (!m_blob_file)
{
Log_ErrorPrintf("Blob file '%s' is missing", blob_filename.c_str());
std::fclose(m_index_file);
m_index_file = nullptr;
return false;
}
std::fseek(m_blob_file, 0, SEEK_END);
const u32 blob_file_size = static_cast<u32>(std::ftell(m_blob_file));
for (;;)
{
CacheIndexEntry entry;
if (std::fread(&entry, sizeof(entry), 1, m_index_file) != 1 ||
(entry.file_offset + entry.blob_size) > blob_file_size)
{
if (std::feof(m_index_file))
break;
Log_ErrorPrintf("Failed to read entry from '%s', corrupt file?", index_filename.c_str());
m_index.clear();
std::fclose(m_blob_file);
m_blob_file = nullptr;
std::fclose(m_index_file);
m_index_file = nullptr;
return false;
}
const CacheIndexKey key{static_cast<GPUShaderStage>(entry.shader_type),
entry.source_length,
entry.source_hash_low,
entry.source_hash_high,
entry.entry_point_low,
entry.entry_point_high};
const CacheIndexData data{entry.file_offset, entry.blob_size};
m_index.emplace(key, data);
}
// ensure we don't write before seeking
std::fseek(m_index_file, 0, SEEK_END);
Log_DevPrintf("Read %zu entries from '%s'", m_index.size(), index_filename.c_str());
return true;
}
GPUShaderCache::CacheIndexKey GPUShaderCache::GetCacheKey(GPUShaderStage stage, const std::string_view& shader_code,
const std::string_view& entry_point)
{
union
{
struct
{
u64 hash_low;
u64 hash_high;
};
u8 hash[16];
};
CacheIndexKey key = {};
key.shader_type = stage;
MD5Digest digest;
digest.Update(shader_code.data(), static_cast<u32>(shader_code.length()));
digest.Final(hash);
key.source_hash_low = hash_low;
key.source_hash_high = hash_high;
key.source_length = static_cast<u32>(shader_code.length());
digest.Reset();
digest.Update(entry_point.data(), static_cast<u32>(entry_point.length()));
digest.Final(hash);
key.entry_point_low = hash_low;
key.entry_point_high = hash_high;
return key;
}
bool GPUShaderCache::Lookup(const CacheIndexKey& key, ShaderBinary* binary)
{
auto iter = m_index.find(key);
if (iter == m_index.end())
return false;
// TODO: extra memset :/
binary->resize(iter->second.blob_size);
if (std::fseek(m_blob_file, iter->second.file_offset, SEEK_SET) != 0 ||
std::fread(binary->data(), 1, iter->second.blob_size, m_blob_file) != iter->second.blob_size)
{
Log_ErrorPrintf("Read %u byte %s shader from file failed", iter->second.blob_size,
GPUShader::GetStageName(key.shader_type));
return false;
}
return true;
}
bool GPUShaderCache::Insert(const CacheIndexKey& key, const void* data, u32 data_size)
{
if (!m_blob_file || std::fseek(m_blob_file, 0, SEEK_END) != 0)
return false;
CacheIndexData idata;
idata.file_offset = static_cast<u32>(std::ftell(m_blob_file));
idata.blob_size = data_size;
CacheIndexEntry entry = {};
entry.shader_type = static_cast<u32>(key.shader_type);
entry.source_length = key.source_length;
entry.source_hash_low = key.source_hash_low;
entry.source_hash_high = key.source_hash_high;
entry.entry_point_low = key.entry_point_low;
entry.entry_point_high = key.entry_point_high;
entry.blob_size = idata.blob_size;
entry.file_offset = idata.file_offset;
if (std::fwrite(data, 1, data_size, m_blob_file) != data_size || std::fflush(m_blob_file) != 0 ||
std::fwrite(&entry, sizeof(entry), 1, m_index_file) != 1 || std::fflush(m_index_file) != 0)
{
Log_ErrorPrintf("Failed to write %u byte %s shader blob to file", data_size,
GPUShader::GetStageName(key.shader_type));
return false;
}
Log_DevPrintf("Cached %u byte %s shader", data_size, GPUShader::GetStageName(key.shader_type));
m_index.emplace(key, idata);
return true;
}

View File

@ -0,0 +1,76 @@
// SPDX-FileCopyrightText: 2023 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
#pragma once
#include "common/hash_combine.h"
#include "common/types.h"
#include <string_view>
#include <unordered_map>
#include <vector>
enum class GPUShaderStage : u8;
class GPUShaderCache
{
public:
using ShaderBinary = std::vector<u8>;
struct CacheIndexKey
{
GPUShaderStage shader_type;
u32 source_length;
u64 source_hash_low;
u64 source_hash_high;
u64 entry_point_low;
u64 entry_point_high;
bool operator==(const CacheIndexKey& key) const;
bool operator!=(const CacheIndexKey& key) const;
};
GPUShaderCache();
~GPUShaderCache();
bool IsOpen() const { return (m_index_file != nullptr); }
bool Open(const std::string_view& base_filename);
void Close();
static CacheIndexKey GetCacheKey(GPUShaderStage stage, const std::string_view& shader_code,
const std::string_view& entry_point);
bool Lookup(const CacheIndexKey& key, ShaderBinary* binary);
bool Insert(const CacheIndexKey& key, const void* data, u32 data_size);
void Clear();
private:
struct CacheIndexEntryHasher
{
std::size_t operator()(const CacheIndexKey& e) const noexcept
{
std::size_t h = 0;
hash_combine(h, e.entry_point_low, e.entry_point_high, e.source_hash_low, e.source_hash_high, e.source_length,
e.shader_type);
return h;
}
};
struct CacheIndexData
{
u32 file_offset;
u32 blob_size;
};
using CacheIndex = std::unordered_map<CacheIndexKey, CacheIndexData, CacheIndexEntryHasher>;
bool CreateNew(const std::string& index_filename, const std::string& blob_filename);
bool ReadExisting(const std::string& index_filename, const std::string& blob_filename);
std::string m_base_filename;
std::FILE* m_index_file = nullptr;
std::FILE* m_blob_file = nullptr;
CacheIndex m_index;
};

View File

@ -145,4 +145,8 @@ void GPUTexture::FlipTextureDataRGBA8(u32 width, u32 height, std::vector<u32>& t
std::memcpy(top_ptr, bottom_ptr, texture_data_stride);
std::memcpy(bottom_ptr, temp.data(), texture_data_stride);
}
}
}
void GPUTexture::MakeReadyForSampling()
{
}

View File

@ -4,6 +4,7 @@
#pragma once
#include "common/types.h"
#include <algorithm>
#include <array>
#include <vector>
class GPUTexture
@ -39,6 +40,19 @@ public:
Count
};
enum class State : u8
{
Dirty,
Cleared,
Invalidated
};
union ClearValue
{
u32 color;
float depth;
};
public:
virtual ~GPUTexture();
@ -57,6 +71,38 @@ public:
ALWAYS_INLINE u32 GetMipWidth(u32 level) const { return std::max<u32>(m_width >> level, 1u); }
ALWAYS_INLINE u32 GetMipHeight(u32 level) const { return std::max<u32>(m_height >> level, 1u); }
ALWAYS_INLINE State GetState() const { return m_state; }
ALWAYS_INLINE void SetState(State state) { m_state = state; }
ALWAYS_INLINE bool IsRenderTargetOrDepthStencil() const
{
return (m_type >= Type::RenderTarget && m_type <= Type::DepthStencil);
}
ALWAYS_INLINE bool IsRenderTarget() const { return (m_type == Type::RenderTarget); }
ALWAYS_INLINE bool IsDepthStencil() const { return (m_type == Type::DepthStencil); }
ALWAYS_INLINE bool IsTexture() const { return (m_type == Type::Texture); }
ALWAYS_INLINE u32 GetClearColor() const { return m_clear_value.color; }
ALWAYS_INLINE float GetClearDepth() const { return m_clear_value.depth; }
ALWAYS_INLINE std::array<float, 4> GetUNormClearColor() const
{
return std::array<float, 4>{static_cast<float>((m_clear_value.color) & 0xFF) / 255.0f,
static_cast<float>((m_clear_value.color >> 8) & 0xFF) / 255.0f,
static_cast<float>((m_clear_value.color >> 16) & 0xFF) / 255.0f,
static_cast<float>((m_clear_value.color >> 24) & 0xFF) / 255.0f};
}
ALWAYS_INLINE void SetClearColor(u32 color)
{
m_state = State::Cleared;
m_clear_value.color = color;
}
ALWAYS_INLINE void SetClearDepth(float depth)
{
m_state = State::Cleared;
m_clear_value.depth = depth;
}
static u32 GetPixelSize(GPUTexture::Format format);
static bool IsDepthFormat(GPUTexture::Format format);
@ -71,6 +117,9 @@ public:
virtual bool Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u32 height, u32 layer = 0, u32 level = 0) = 0;
virtual void Unmap() = 0;
// Instructs the backend that we're finished rendering to this texture. It may transition it to a new layout.
virtual void MakeReadyForSampling();
protected:
GPUTexture();
GPUTexture(u16 width, u16 height, u8 layers, u8 levels, u8 samples, Format format);
@ -84,11 +133,7 @@ protected:
u8 m_samples = 0;
Type m_type = Type::Unknown;
Format m_format = Format::Unknown;
State m_state = State::Dirty;
// u16 m_map_x = 0;
// u16 m_map_y = 0;
// u16 m_map_width = 0;
// u16 m_map_height = 0;
// u8 m_map_layer = 0;
// u8 m_map_level = 0;
ClearValue m_clear_value = {};
};

View File

@ -684,49 +684,6 @@ bool OpenGLGPUDevice::Render(bool skip_present)
return true;
}
bool OpenGLGPUDevice::RenderScreenshot(u32 width, u32 height, const Common::Rectangle<s32>& draw_rect,
std::vector<u32>* out_pixels, u32* out_stride, GPUTexture::Format* out_format)
{
GL::Texture texture;
if (!texture.Create(width, height, 1, 1, 1, GPUTexture::Format::RGBA8, nullptr, 0) || !texture.CreateFramebuffer())
{
return false;
}
glDisable(GL_SCISSOR_TEST);
glClearColor(0.0f, 0.0f, 0.0f, 1.0f);
if (HasDisplayTexture() && !m_post_processing_chain.IsEmpty())
{
ApplyPostProcessingChain(texture.GetGLFramebufferID(), draw_rect.left,
height - draw_rect.top - draw_rect.GetHeight(), draw_rect.GetWidth(),
draw_rect.GetHeight(), static_cast<GL::Texture*>(m_display_texture),
m_display_texture_view_x, m_display_texture_view_y, m_display_texture_view_width,
m_display_texture_view_height, width, height);
}
else
{
texture.BindFramebuffer(GL_FRAMEBUFFER);
glClear(GL_COLOR_BUFFER_BIT);
if (HasDisplayTexture())
{
RenderDisplay(draw_rect.left, height - draw_rect.top - draw_rect.GetHeight(), draw_rect.GetWidth(),
draw_rect.GetHeight(), static_cast<GL::Texture*>(m_display_texture), m_display_texture_view_x,
m_display_texture_view_y, m_display_texture_view_width, m_display_texture_view_height,
IsUsingLinearFiltering());
}
}
out_pixels->resize(width * height);
*out_stride = sizeof(u32) * width;
*out_format = GPUTexture::Format::RGBA8;
glReadPixels(0, 0, width, height, GL_RGBA, GL_UNSIGNED_BYTE, out_pixels->data());
glBindFramebuffer(GL_FRAMEBUFFER, 0);
return true;
}
void OpenGLGPUDevice::RenderImGui()
{
ImGui::Render();

View File

@ -52,8 +52,6 @@ public:
void SetVSync(bool enabled) override;
bool Render(bool skip_present) override;
bool RenderScreenshot(u32 width, u32 height, const Common::Rectangle<s32>& draw_rect, std::vector<u32>* out_pixels,
u32* out_stride, GPUTexture::Format* out_format) override;
bool SetGPUTimingEnabled(bool enabled) override;
float GetAndResetAccumulatedGPUTime() override;

View File

@ -7,14 +7,10 @@
extern "C" {
#endif
// We abuse the preprocessor here to only need to specify function names once.
// Function names are prefixed so to not conflict with system symbols at runtime.
#define VULKAN_MODULE_ENTRY_POINT(name, required) extern PFN_##name ds_##name;
#define VULKAN_INSTANCE_ENTRY_POINT(name, required) extern PFN_##name ds_##name;
#define VULKAN_DEVICE_ENTRY_POINT(name, required) extern PFN_##name ds_##name;
#define VULKAN_DEFINE_NAME_PREFIX ds_
#define VULKAN_MODULE_ENTRY_POINT(name, required) extern PFN_##name name;
#define VULKAN_INSTANCE_ENTRY_POINT(name, required) extern PFN_##name name;
#define VULKAN_DEVICE_ENTRY_POINT(name, required) extern PFN_##name name;
#include "entry_points.inl"
#undef VULKAN_DEFINE_NAME_PREFIX
#undef VULKAN_DEVICE_ENTRY_POINT
#undef VULKAN_INSTANCE_ENTRY_POINT
#undef VULKAN_MODULE_ENTRY_POINT
@ -22,200 +18,3 @@ extern "C" {
#ifdef __cplusplus
}
#endif
#define vkCreateInstance ds_vkCreateInstance
#define vkGetInstanceProcAddr ds_vkGetInstanceProcAddr
#define vkEnumerateInstanceExtensionProperties ds_vkEnumerateInstanceExtensionProperties
#define vkEnumerateInstanceLayerProperties ds_vkEnumerateInstanceLayerProperties
#define vkEnumerateInstanceVersion ds_vkEnumerateInstanceVersion
#define vkGetDeviceProcAddr ds_vkGetDeviceProcAddr
#define vkDestroyInstance ds_vkDestroyInstance
#define vkEnumeratePhysicalDevices ds_vkEnumeratePhysicalDevices
#define vkGetPhysicalDeviceFeatures ds_vkGetPhysicalDeviceFeatures
#define vkGetPhysicalDeviceFormatProperties ds_vkGetPhysicalDeviceFormatProperties
#define vkGetPhysicalDeviceImageFormatProperties ds_vkGetPhysicalDeviceImageFormatProperties
#define vkGetPhysicalDeviceProperties ds_vkGetPhysicalDeviceProperties
#define vkGetPhysicalDeviceQueueFamilyProperties ds_vkGetPhysicalDeviceQueueFamilyProperties
#define vkGetPhysicalDeviceMemoryProperties ds_vkGetPhysicalDeviceMemoryProperties
#define vkCreateDevice ds_vkCreateDevice
#define vkEnumerateDeviceExtensionProperties ds_vkEnumerateDeviceExtensionProperties
#define vkEnumerateDeviceLayerProperties ds_vkEnumerateDeviceLayerProperties
#define vkGetPhysicalDeviceSparseImageFormatProperties ds_vkGetPhysicalDeviceSparseImageFormatProperties
#define vkDestroySurfaceKHR ds_vkDestroySurfaceKHR
#define vkGetPhysicalDeviceSurfaceSupportKHR ds_vkGetPhysicalDeviceSurfaceSupportKHR
#define vkGetPhysicalDeviceSurfaceCapabilitiesKHR ds_vkGetPhysicalDeviceSurfaceCapabilitiesKHR
#define vkGetPhysicalDeviceSurfaceFormatsKHR ds_vkGetPhysicalDeviceSurfaceFormatsKHR
#define vkGetPhysicalDeviceSurfacePresentModesKHR ds_vkGetPhysicalDeviceSurfacePresentModesKHR
#define vkCreateWin32SurfaceKHR ds_vkCreateWin32SurfaceKHR
#define vkGetPhysicalDeviceWin32PresentationSupportKHR ds_vkGetPhysicalDeviceWin32PresentationSupportKHR
#define vkCreateXlibSurfaceKHR ds_vkCreateXlibSurfaceKHR
#define vkGetPhysicalDeviceXlibPresentationSupportKHR ds_vkGetPhysicalDeviceXlibPresentationSupportKHR
#define vkCreateWaylandSurfaceKHR ds_vkCreateWaylandSurfaceKHR
#define vkCreateAndroidSurfaceKHR ds_vkCreateAndroidSurfaceKHR
#define vkCreateMacOSSurfaceMVK ds_vkCreateMacOSSurfaceMVK
#define vkCreateMetalSurfaceEXT ds_vkCreateMetalSurfaceEXT
// VK_EXT_debug_utils
#define vkCmdBeginDebugUtilsLabelEXT ds_vkCmdBeginDebugUtilsLabelEXT
#define vkCmdEndDebugUtilsLabelEXT ds_vkCmdEndDebugUtilsLabelEXT
#define vkCmdInsertDebugUtilsLabelEXT ds_vkCmdInsertDebugUtilsLabelEXT
#define vkCreateDebugUtilsMessengerEXT ds_vkCreateDebugUtilsMessengerEXT
#define vkDestroyDebugUtilsMessengerEXT ds_vkDestroyDebugUtilsMessengerEXT
#define vkQueueBeginDebugUtilsLabelEXT ds_vkQueueBeginDebugUtilsLabelEXT
#define vkQueueEndDebugUtilsLabelEXT ds_vkQueueEndDebugUtilsLabelEXT
#define vkQueueInsertDebugUtilsLabelEXT ds_vkQueueInsertDebugUtilsLabelEXT
#define vkSetDebugUtilsObjectNameEXT ds_vkSetDebugUtilsObjectNameEXT
#define vkSetDebugUtilsObjectTagEXT ds_vkSetDebugUtilsObjectTagEXT
#define vkSubmitDebugUtilsMessageEXT ds_vkSubmitDebugUtilsMessageEXT
#define vkGetPhysicalDeviceSurfaceCapabilities2KHR ds_vkGetPhysicalDeviceSurfaceCapabilities2KHR
#define vkGetPhysicalDeviceDisplayPropertiesKHR ds_vkGetPhysicalDeviceDisplayPropertiesKHR
#define vkGetPhysicalDeviceDisplayPlanePropertiesKHR ds_vkGetPhysicalDeviceDisplayPlanePropertiesKHR
#define vkGetDisplayPlaneSupportedDisplaysKHR ds_vkGetDisplayPlaneSupportedDisplaysKHR
#define vkGetDisplayModePropertiesKHR ds_vkGetDisplayModePropertiesKHR
#define vkCreateDisplayModeKHR ds_vkCreateDisplayModeKHR
#define vkGetDisplayPlaneCapabilitiesKHR ds_vkGetDisplayPlaneCapabilitiesKHR
#define vkCreateDisplayPlaneSurfaceKHR ds_vkCreateDisplayPlaneSurfaceKHR
// Vulkan 1.1 functions.
#define vkGetPhysicalDeviceFeatures2 ds_vkGetPhysicalDeviceFeatures2
#define vkGetPhysicalDeviceProperties2 ds_vkGetPhysicalDeviceProperties2
#define vkGetPhysicalDeviceMemoryProperties2 ds_vkGetPhysicalDeviceMemoryProperties2
#define vkDestroyDevice ds_vkDestroyDevice
#define vkGetDeviceQueue ds_vkGetDeviceQueue
#define vkQueueSubmit ds_vkQueueSubmit
#define vkQueueWaitIdle ds_vkQueueWaitIdle
#define vkDeviceWaitIdle ds_vkDeviceWaitIdle
#define vkAllocateMemory ds_vkAllocateMemory
#define vkFreeMemory ds_vkFreeMemory
#define vkMapMemory ds_vkMapMemory
#define vkUnmapMemory ds_vkUnmapMemory
#define vkFlushMappedMemoryRanges ds_vkFlushMappedMemoryRanges
#define vkInvalidateMappedMemoryRanges ds_vkInvalidateMappedMemoryRanges
#define vkGetDeviceMemoryCommitment ds_vkGetDeviceMemoryCommitment
#define vkBindBufferMemory ds_vkBindBufferMemory
#define vkBindImageMemory ds_vkBindImageMemory
#define vkGetBufferMemoryRequirements ds_vkGetBufferMemoryRequirements
#define vkGetImageMemoryRequirements ds_vkGetImageMemoryRequirements
#define vkGetImageSparseMemoryRequirements ds_vkGetImageSparseMemoryRequirements
#define vkQueueBindSparse ds_vkQueueBindSparse
#define vkCreateFence ds_vkCreateFence
#define vkDestroyFence ds_vkDestroyFence
#define vkResetFences ds_vkResetFences
#define vkGetFenceStatus ds_vkGetFenceStatus
#define vkWaitForFences ds_vkWaitForFences
#define vkCreateSemaphore ds_vkCreateSemaphore
#define vkDestroySemaphore ds_vkDestroySemaphore
#define vkCreateEvent ds_vkCreateEvent
#define vkDestroyEvent ds_vkDestroyEvent
#define vkGetEventStatus ds_vkGetEventStatus
#define vkSetEvent ds_vkSetEvent
#define vkResetEvent ds_vkResetEvent
#define vkCreateQueryPool ds_vkCreateQueryPool
#define vkDestroyQueryPool ds_vkDestroyQueryPool
#define vkGetQueryPoolResults ds_vkGetQueryPoolResults
#define vkCreateBuffer ds_vkCreateBuffer
#define vkDestroyBuffer ds_vkDestroyBuffer
#define vkCreateBufferView ds_vkCreateBufferView
#define vkDestroyBufferView ds_vkDestroyBufferView
#define vkCreateImage ds_vkCreateImage
#define vkDestroyImage ds_vkDestroyImage
#define vkGetImageSubresourceLayout ds_vkGetImageSubresourceLayout
#define vkCreateImageView ds_vkCreateImageView
#define vkDestroyImageView ds_vkDestroyImageView
#define vkCreateShaderModule ds_vkCreateShaderModule
#define vkDestroyShaderModule ds_vkDestroyShaderModule
#define vkCreatePipelineCache ds_vkCreatePipelineCache
#define vkDestroyPipelineCache ds_vkDestroyPipelineCache
#define vkGetPipelineCacheData ds_vkGetPipelineCacheData
#define vkMergePipelineCaches ds_vkMergePipelineCaches
#define vkCreateGraphicsPipelines ds_vkCreateGraphicsPipelines
#define vkCreateComputePipelines ds_vkCreateComputePipelines
#define vkDestroyPipeline ds_vkDestroyPipeline
#define vkCreatePipelineLayout ds_vkCreatePipelineLayout
#define vkDestroyPipelineLayout ds_vkDestroyPipelineLayout
#define vkCreateSampler ds_vkCreateSampler
#define vkDestroySampler ds_vkDestroySampler
#define vkCreateDescriptorSetLayout ds_vkCreateDescriptorSetLayout
#define vkDestroyDescriptorSetLayout ds_vkDestroyDescriptorSetLayout
#define vkCreateDescriptorPool ds_vkCreateDescriptorPool
#define vkDestroyDescriptorPool ds_vkDestroyDescriptorPool
#define vkResetDescriptorPool ds_vkResetDescriptorPool
#define vkAllocateDescriptorSets ds_vkAllocateDescriptorSets
#define vkFreeDescriptorSets ds_vkFreeDescriptorSets
#define vkUpdateDescriptorSets ds_vkUpdateDescriptorSets
#define vkCreateFramebuffer ds_vkCreateFramebuffer
#define vkDestroyFramebuffer ds_vkDestroyFramebuffer
#define vkCreateRenderPass ds_vkCreateRenderPass
#define vkDestroyRenderPass ds_vkDestroyRenderPass
#define vkGetRenderAreaGranularity ds_vkGetRenderAreaGranularity
#define vkCreateCommandPool ds_vkCreateCommandPool
#define vkDestroyCommandPool ds_vkDestroyCommandPool
#define vkResetCommandPool ds_vkResetCommandPool
#define vkAllocateCommandBuffers ds_vkAllocateCommandBuffers
#define vkFreeCommandBuffers ds_vkFreeCommandBuffers
#define vkBeginCommandBuffer ds_vkBeginCommandBuffer
#define vkEndCommandBuffer ds_vkEndCommandBuffer
#define vkResetCommandBuffer ds_vkResetCommandBuffer
#define vkCmdBindPipeline ds_vkCmdBindPipeline
#define vkCmdSetViewport ds_vkCmdSetViewport
#define vkCmdSetScissor ds_vkCmdSetScissor
#define vkCmdSetLineWidth ds_vkCmdSetLineWidth
#define vkCmdSetDepthBias ds_vkCmdSetDepthBias
#define vkCmdSetBlendConstants ds_vkCmdSetBlendConstants
#define vkCmdSetDepthBounds ds_vkCmdSetDepthBounds
#define vkCmdSetStencilCompareMask ds_vkCmdSetStencilCompareMask
#define vkCmdSetStencilWriteMask ds_vkCmdSetStencilWriteMask
#define vkCmdSetStencilReference ds_vkCmdSetStencilReference
#define vkCmdBindDescriptorSets ds_vkCmdBindDescriptorSets
#define vkCmdBindIndexBuffer ds_vkCmdBindIndexBuffer
#define vkCmdBindVertexBuffers ds_vkCmdBindVertexBuffers
#define vkCmdDraw ds_vkCmdDraw
#define vkCmdDrawIndexed ds_vkCmdDrawIndexed
#define vkCmdDrawIndirect ds_vkCmdDrawIndirect
#define vkCmdDrawIndexedIndirect ds_vkCmdDrawIndexedIndirect
#define vkCmdDispatch ds_vkCmdDispatch
#define vkCmdDispatchIndirect ds_vkCmdDispatchIndirect
#define vkCmdCopyBuffer ds_vkCmdCopyBuffer
#define vkCmdCopyImage ds_vkCmdCopyImage
#define vkCmdBlitImage ds_vkCmdBlitImage
#define vkCmdCopyBufferToImage ds_vkCmdCopyBufferToImage
#define vkCmdCopyImageToBuffer ds_vkCmdCopyImageToBuffer
#define vkCmdUpdateBuffer ds_vkCmdUpdateBuffer
#define vkCmdFillBuffer ds_vkCmdFillBuffer
#define vkCmdClearColorImage ds_vkCmdClearColorImage
#define vkCmdClearDepthStencilImage ds_vkCmdClearDepthStencilImage
#define vkCmdClearAttachments ds_vkCmdClearAttachments
#define vkCmdResolveImage ds_vkCmdResolveImage
#define vkCmdSetEvent ds_vkCmdSetEvent
#define vkCmdResetEvent ds_vkCmdResetEvent
#define vkCmdWaitEvents ds_vkCmdWaitEvents
#define vkCmdPipelineBarrier ds_vkCmdPipelineBarrier
#define vkCmdBeginQuery ds_vkCmdBeginQuery
#define vkCmdEndQuery ds_vkCmdEndQuery
#define vkCmdResetQueryPool ds_vkCmdResetQueryPool
#define vkCmdWriteTimestamp ds_vkCmdWriteTimestamp
#define vkCmdCopyQueryPoolResults ds_vkCmdCopyQueryPoolResults
#define vkCmdPushConstants ds_vkCmdPushConstants
#define vkCmdBeginRenderPass ds_vkCmdBeginRenderPass
#define vkCmdNextSubpass ds_vkCmdNextSubpass
#define vkCmdEndRenderPass ds_vkCmdEndRenderPass
#define vkCmdExecuteCommands ds_vkCmdExecuteCommands
#define vkCreateSwapchainKHR ds_vkCreateSwapchainKHR
#define vkDestroySwapchainKHR ds_vkDestroySwapchainKHR
#define vkGetSwapchainImagesKHR ds_vkGetSwapchainImagesKHR
#define vkAcquireNextImageKHR ds_vkAcquireNextImageKHR
#define vkQueuePresentKHR ds_vkQueuePresentKHR
// Vulkan 1.1 functions.
#define vkGetBufferMemoryRequirements2 ds_vkGetBufferMemoryRequirements2
#define vkGetImageMemoryRequirements2 ds_vkGetImageMemoryRequirements2
#define vkBindBufferMemory2 ds_vkBindBufferMemory2
#define vkBindImageMemory2 ds_vkBindImageMemory2
#ifdef SUPPORTS_VULKAN_EXCLUSIVE_FULLSCREEN
#define vkAcquireFullScreenExclusiveModeEXT ds_vkAcquireFullScreenExclusiveModeEXT
#define vkReleaseFullScreenExclusiveModeEXT ds_vkReleaseFullScreenExclusiveModeEXT
#endif

View File

@ -22,9 +22,9 @@
extern "C" {
#define VULKAN_MODULE_ENTRY_POINT(name, required) PFN_##name ds_##name;
#define VULKAN_INSTANCE_ENTRY_POINT(name, required) PFN_##name ds_##name;
#define VULKAN_DEVICE_ENTRY_POINT(name, required) PFN_##name ds_##name;
#define VULKAN_MODULE_ENTRY_POINT(name, required) PFN_##name name;
#define VULKAN_INSTANCE_ENTRY_POINT(name, required) PFN_##name name;
#define VULKAN_DEVICE_ENTRY_POINT(name, required) PFN_##name name;
#include "entry_points.inl"
#undef VULKAN_DEVICE_ENTRY_POINT
#undef VULKAN_INSTANCE_ENTRY_POINT
@ -34,9 +34,9 @@ extern "C" {
namespace Vulkan {
void ResetVulkanLibraryFunctionPointers()
{
#define VULKAN_MODULE_ENTRY_POINT(name, required) ds_##name = nullptr;
#define VULKAN_INSTANCE_ENTRY_POINT(name, required) ds_##name = nullptr;
#define VULKAN_DEVICE_ENTRY_POINT(name, required) ds_##name = nullptr;
#define VULKAN_MODULE_ENTRY_POINT(name, required) name = nullptr;
#define VULKAN_INSTANCE_ENTRY_POINT(name, required) name = nullptr;
#define VULKAN_DEVICE_ENTRY_POINT(name, required) name = nullptr;
#include "entry_points.inl"
#undef VULKAN_DEVICE_ENTRY_POINT
#undef VULKAN_INSTANCE_ENTRY_POINT

View File

@ -680,102 +680,6 @@ bool VulkanGPUDevice::Render(bool skip_present)
return true;
}
bool VulkanGPUDevice::RenderScreenshot(u32 width, u32 height, const Common::Rectangle<s32>& draw_rect,
std::vector<u32>* out_pixels, u32* out_stride, GPUTexture::Format* out_format)
{
// in theory we could do this without a swap chain, but postprocessing assumes it for now...
if (!m_swap_chain)
return false;
const VkFormat format = m_swap_chain ? m_swap_chain->GetTextureFormat() : VK_FORMAT_R8G8B8A8_UNORM;
switch (format)
{
case VK_FORMAT_R8G8B8A8_UNORM:
case VK_FORMAT_R8G8B8A8_SRGB:
*out_format = GPUTexture::Format::RGBA8;
*out_stride = sizeof(u32) * width;
out_pixels->resize(width * height);
break;
case VK_FORMAT_B8G8R8A8_UNORM:
case VK_FORMAT_B8G8R8A8_SRGB:
*out_format = GPUTexture::Format::BGRA8;
*out_stride = sizeof(u32) * width;
out_pixels->resize(width * height);
break;
case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
*out_format = GPUTexture::Format::RGBA5551;
*out_stride = sizeof(u16) * width;
out_pixels->resize(((width * height) + 1) / 2);
break;
case VK_FORMAT_R5G6B5_UNORM_PACK16:
*out_format = GPUTexture::Format::RGB565;
*out_stride = sizeof(u16) * width;
out_pixels->resize(((width * height) + 1) / 2);
break;
default:
Log_ErrorPrintf("Unhandled swap chain pixel format %u", static_cast<unsigned>(format));
break;
}
// if we don't have a texture (display off), then just write out nothing.
if (!HasDisplayTexture())
{
std::fill(out_pixels->begin(), out_pixels->end(), static_cast<u32>(0));
return true;
}
Vulkan::Texture tex;
if (!tex.Create(width, height, 1, 1, format, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_VIEW_TYPE_2D, VK_IMAGE_TILING_OPTIMAL,
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT))
{
return false;
}
const VkRenderPass rp =
m_swap_chain ?
m_swap_chain->GetClearRenderPass() :
g_vulkan_context->GetRenderPass(format, VK_FORMAT_UNDEFINED, VK_SAMPLE_COUNT_1_BIT, VK_ATTACHMENT_LOAD_OP_CLEAR);
if (!rp)
return false;
const VkFramebuffer fb = tex.CreateFramebuffer(rp);
if (!fb)
return false;
const Vulkan::Util::DebugScope debugScope(g_vulkan_context->GetCurrentCommandBuffer(),
"VulkanHostDisplay::RenderScreenshot: %ux%u", width, height);
tex.TransitionToLayout(g_vulkan_context->GetCurrentCommandBuffer(), VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
if (!m_post_processing_chain.IsEmpty())
{
ApplyPostProcessingChain(fb, draw_rect.left, draw_rect.top, draw_rect.GetWidth(), draw_rect.GetHeight(),
static_cast<Vulkan::Texture*>(m_display_texture), m_display_texture_view_x,
m_display_texture_view_y, m_display_texture_view_width, m_display_texture_view_height,
width, height);
}
else
{
BeginSwapChainRenderPass(fb, width, height);
RenderDisplay(draw_rect.left, draw_rect.top, draw_rect.GetWidth(), draw_rect.GetHeight(),
static_cast<Vulkan::Texture*>(m_display_texture), m_display_texture_view_x, m_display_texture_view_y,
m_display_texture_view_width, m_display_texture_view_height, IsUsingLinearFiltering());
}
vkCmdEndRenderPass(g_vulkan_context->GetCurrentCommandBuffer());
Vulkan::Util::EndDebugScope(g_vulkan_context->GetCurrentCommandBuffer());
tex.TransitionToLayout(g_vulkan_context->GetCurrentCommandBuffer(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
DownloadTexture(&tex, 0, 0, width, height, out_pixels->data(), *out_stride);
// destroying these immediately should be safe since nothing's going to access them, and it's not part of the command
// stream
vkDestroyFramebuffer(g_vulkan_context->GetDevice(), fb, nullptr);
tex.Destroy(false);
return true;
}
void VulkanGPUDevice::BeginSwapChainRenderPass(VkFramebuffer framebuffer, u32 width, u32 height)
{
const VkClearValue clear_value = {{{0.0f, 0.0f, 0.0f, 1.0f}}};

View File

@ -56,8 +56,6 @@ public:
void SetVSync(bool enabled) override;
bool Render(bool skip_present) override;
bool RenderScreenshot(u32 width, u32 height, const Common::Rectangle<s32>& draw_rect, std::vector<u32>* out_pixels,
u32* out_stride, GPUTexture::Format* out_format) override;
bool SetGPUTimingEnabled(bool enabled) override;
float GetAndResetAccumulatedGPUTime() override;

View File

@ -5,7 +5,10 @@
#include "common/align.h"
#include "common/assert.h"
#include "common/log.h"
#include "common/scoped_guard.h"
#include "common/string_util.h"
#include "cpu_core.h"
#include "gpu_hw_shadergen.h"
#include "gpu_sw_backend.h"
#include "host.h"
#include "imgui.h"
@ -180,6 +183,38 @@ bool GPU_HW::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_di
return true;
}
void GPU_HW::UpdateSettings()
{
// TODO: Merge UpdateHWSettings() into here.
bool framebuffer_changed, shaders_changed;
UpdateHWSettings(&framebuffer_changed, &shaders_changed);
if (framebuffer_changed)
{
RestoreGraphicsAPIState();
ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT);
ResetGraphicsAPIState();
g_host_display->ClearDisplayTexture();
CreateFramebuffer();
}
if (shaders_changed)
{
DestroyPipelines();
if (!CompilePipelines())
Panic("Failed to recompile pipelnes.");
}
if (framebuffer_changed)
{
RestoreGraphicsAPIState();
UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_ptr, false, false);
UpdateDepthBufferFromMaskBit();
UpdateDisplay();
ResetGraphicsAPIState();
}
}
void GPU_HW::UpdateHWSettings(bool* framebuffer_changed, bool* shaders_changed)
{
const u32 resolution_scale = CalculateResolutionScale();
@ -360,26 +395,495 @@ bool GPU_HW::CreateFramebuffer()
((m_downsample_mode == GPUDownsampleMode::Adaptive) ? VRAM_WIDTH : GPU_MAX_DISPLAY_WIDTH) *
m_resolution_scale,
GPU_MAX_DISPLAY_HEIGHT * m_resolution_scale, 1, 1, 1, GPUTexture::Type::RenderTarget, texture_format)) ||
!(m_vram_encoding_texture = g_host_display->CreateTexture(VRAM_WIDTH / 2, VRAM_HEIGHT, 1, 1, 1,
!(m_vram_readback_texture = g_host_display->CreateTexture(VRAM_WIDTH / 2, VRAM_HEIGHT, 1, 1, 1,
GPUTexture::Type::RenderTarget, texture_format)))
{
return false;
}
// vram framebuffer has both colour and depth
if (!(m_vram_framebuffer =
g_host_display->CreateFramebuffer(m_vram_texture.get(), 0, 0, m_vram_depth_texture.get(), 0, 0)) ||
!(m_vram_update_depth_framebuffer =
g_host_display->CreateFramebuffer(nullptr, 0, 0, m_vram_depth_texture.get(), 0, 0)) ||
!(m_vram_readback_framebuffer =
g_host_display->CreateFramebuffer(m_vram_readback_texture.get(), 0, 0, nullptr, 0, 0)) ||
!(m_display_framebuffer = g_host_display->CreateFramebuffer(m_display_texture.get(), 0, 0, nullptr, 0, 0)))
{
return false;
}
GL_OBJECT_NAME(m_vram_framebuffer, "VRAM Framebuffer");
GL_OBJECT_NAME(m_vram_update_depth_framebuffer, "VRAM Update Depth Framebuffer");
GL_OBJECT_NAME(m_vram_readback_framebuffer, "VRAM Readback Framebuffer");
GL_OBJECT_NAME(m_display_framebuffer, "Display Framebuffer");
Log_InfoPrintf("Created HW framebuffer of %ux%u", texture_width, texture_height);
return true;
}
void GPU_HW::DestroyFramebuffer()
{
m_display_framebuffer.reset();
m_vram_readback_framebuffer.reset();
m_vram_update_depth_framebuffer.reset();
m_vram_framebuffer.reset();
m_vram_read_texture.reset();
m_vram_depth_view.reset();
m_vram_depth_texture.reset();
m_vram_texture.reset();
m_vram_encoding_texture.reset();
m_vram_readback_texture.reset();
m_display_texture.reset();
}
bool GPU_HW::CompilePipelines()
{
GPU_HW_ShaderGen shadergen(g_host_display->GetRenderAPI(), m_resolution_scale, m_multisamples, m_per_sample_shading,
m_true_color, m_scaled_dithering, m_texture_filtering, m_using_uv_limits,
m_pgxp_depth_buffer, m_disable_color_perspective, m_supports_dual_source_blend);
ShaderCompileProgressTracker progress("Compiling Pipelines", 2 + (4 * 9 * 2 * 2) + (3 * 4 * 5 * 9 * 2 * 2) + 1 + 2 +
(2 * 2) + 2 + 1 + 1 + (2 * 3) + 1);
// vertex shaders - [textured]
// fragment shaders - [render_mode][texture_mode][dithering][interlacing]
static constexpr auto destroy_shader = [](std::unique_ptr<GPUShader>& s) { s.reset(); };
DimensionalArray<std::unique_ptr<GPUShader>, 2> batch_vertex_shaders{};
DimensionalArray<std::unique_ptr<GPUShader>, 2, 2, 9, 4> batch_fragment_shaders{};
ScopedGuard batch_shader_guard([&batch_vertex_shaders, &batch_fragment_shaders]() {
batch_vertex_shaders.enumerate(destroy_shader);
batch_fragment_shaders.enumerate(destroy_shader);
});
for (u8 textured = 0; textured < 2; textured++)
{
const std::string vs = shadergen.GenerateBatchVertexShader(ConvertToBoolUnchecked(textured));
if (!(batch_vertex_shaders[textured] = g_host_display->CreateShader(GPUShaderStage::Vertex, vs)))
return false;
progress.Increment();
}
for (u8 render_mode = 0; render_mode < 4; render_mode++)
{
for (u8 texture_mode = 0; texture_mode < 9; texture_mode++)
{
for (u8 dithering = 0; dithering < 2; dithering++)
{
for (u8 interlacing = 0; interlacing < 2; interlacing++)
{
const std::string fs = shadergen.GenerateBatchFragmentShader(
static_cast<BatchRenderMode>(render_mode), static_cast<GPUTextureMode>(texture_mode),
ConvertToBoolUnchecked(dithering), ConvertToBoolUnchecked(interlacing));
if (!(batch_fragment_shaders[render_mode][texture_mode][dithering][interlacing] =
g_host_display->CreateShader(GPUShaderStage::Fragment, fs)))
{
return false;
}
progress.Increment();
}
}
}
}
static constexpr GPUPipeline::VertexAttribute batch_attributes[] = {
GPUPipeline::VertexAttribute::Make(0, GPUPipeline::VertexAttribute::Type::Float, 2, offsetof(BatchVertex, x)),
GPUPipeline::VertexAttribute::Make(1, GPUPipeline::VertexAttribute::Type::UNorm8, 4, offsetof(BatchVertex, color)),
};
static constexpr GPUPipeline::VertexAttribute textured_batch_attributes[] = {
GPUPipeline::VertexAttribute::Make(0, GPUPipeline::VertexAttribute::Type::Float, 2, offsetof(BatchVertex, x)),
GPUPipeline::VertexAttribute::Make(1, GPUPipeline::VertexAttribute::Type::UNorm8, 4, offsetof(BatchVertex, color)),
GPUPipeline::VertexAttribute::Make(2, GPUPipeline::VertexAttribute::Type::UInt32, 1, offsetof(BatchVertex, u)),
GPUPipeline::VertexAttribute::Make(3, GPUPipeline::VertexAttribute::Type::UInt32, 1,
offsetof(BatchVertex, texpage)),
};
static constexpr GPUPipeline::VertexAttribute textured_limits_batch_attributes[] = {
GPUPipeline::VertexAttribute::Make(0, GPUPipeline::VertexAttribute::Type::Float, 2, offsetof(BatchVertex, x)),
GPUPipeline::VertexAttribute::Make(1, GPUPipeline::VertexAttribute::Type::UNorm8, 4, offsetof(BatchVertex, color)),
GPUPipeline::VertexAttribute::Make(2, GPUPipeline::VertexAttribute::Type::UInt32, 1, offsetof(BatchVertex, u)),
GPUPipeline::VertexAttribute::Make(3, GPUPipeline::VertexAttribute::Type::UInt32, 1,
offsetof(BatchVertex, texpage)),
GPUPipeline::VertexAttribute::Make(4, GPUPipeline::VertexAttribute::Type::UNorm8, 4,
offsetof(BatchVertex, uv_limits)),
};
GPUPipeline::GraphicsConfig plconfig = {};
plconfig.layout = GPUPipeline::Layout::HWBatch;
plconfig.input_layout.vertex_stride = sizeof(BatchVertex);
plconfig.rasterization = GPUPipeline::RasterizationState::GetNoCullState();
plconfig.primitive = GPUPipeline::Primitive::Triangles;
plconfig.color_format = GPUTexture::Format::RGBA8;
plconfig.depth_format = GPUTexture::Format::D16;
plconfig.samples = m_multisamples;
plconfig.per_sample_shading = m_per_sample_shading;
// [depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing]
for (u8 depth_test = 0; depth_test < 3; depth_test++)
{
for (u8 render_mode = 0; render_mode < 4; render_mode++)
{
for (u8 transparency_mode = 0; transparency_mode < 5; transparency_mode++)
{
for (u8 texture_mode = 0; texture_mode < 9; texture_mode++)
{
for (u8 dithering = 0; dithering < 2; dithering++)
{
for (u8 interlacing = 0; interlacing < 2; interlacing++)
{
static constexpr std::array<GPUPipeline::DepthFunc, 3> depth_test_values = {
GPUPipeline::DepthFunc::Always, GPUPipeline::DepthFunc::GreaterEqual,
GPUPipeline::DepthFunc::LessEqual};
const bool textured = (static_cast<GPUTextureMode>(texture_mode) != GPUTextureMode::Disabled);
plconfig.input_layout.vertex_attributes =
textured ?
(m_using_uv_limits ? gsl::span<const GPUPipeline::VertexAttribute>(textured_limits_batch_attributes) :
gsl::span<const GPUPipeline::VertexAttribute>(textured_batch_attributes)) :
gsl::span<const GPUPipeline::VertexAttribute>(batch_attributes);
plconfig.vertex_shader = batch_vertex_shaders[BoolToUInt8(textured)].get();
plconfig.pixel_shader = batch_fragment_shaders[render_mode][texture_mode][dithering][interlacing].get();
// TODO: Depth write always on???
plconfig.depth.depth_test = depth_test_values[depth_test];
plconfig.depth.depth_write = true;
plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState();
if ((static_cast<GPUTransparencyMode>(transparency_mode) != GPUTransparencyMode::Disabled &&
(static_cast<BatchRenderMode>(render_mode) != BatchRenderMode::TransparencyDisabled &&
static_cast<BatchRenderMode>(render_mode) != BatchRenderMode::OnlyOpaque)) ||
m_texture_filtering != GPUTextureFilter::Nearest)
{
plconfig.blend.enable = true;
plconfig.blend.src_alpha_blend = GPUPipeline::BlendFunc::One;
plconfig.blend.dst_alpha_blend = GPUPipeline::BlendFunc::Zero;
plconfig.blend.alpha_blend_op = GPUPipeline::BlendOp::Add;
if (m_supports_dual_source_blend)
{
plconfig.blend.src_blend = GPUPipeline::BlendFunc::One;
plconfig.blend.dst_blend = GPUPipeline::BlendFunc::SrcAlpha1;
plconfig.blend.blend_op =
(static_cast<GPUTransparencyMode>(transparency_mode) ==
GPUTransparencyMode::BackgroundMinusForeground &&
static_cast<BatchRenderMode>(render_mode) != BatchRenderMode::TransparencyDisabled &&
static_cast<BatchRenderMode>(render_mode) != BatchRenderMode::OnlyOpaque) ?
GPUPipeline::BlendOp::ReverseSubtract :
GPUPipeline::BlendOp::Add;
}
else
{
const u32 factor = (static_cast<GPUTransparencyMode>(transparency_mode) ==
GPUTransparencyMode::HalfBackgroundPlusHalfForeground) ?
0xFF808080u :
0xFFFFFFFFu;
plconfig.blend.src_blend = GPUPipeline::BlendFunc::One;
plconfig.blend.dst_blend = GPUPipeline::BlendFunc::ConstantColor;
plconfig.blend.blend_op =
(static_cast<GPUTransparencyMode>(transparency_mode) ==
GPUTransparencyMode::BackgroundMinusForeground &&
static_cast<BatchRenderMode>(render_mode) != BatchRenderMode::TransparencyDisabled &&
static_cast<BatchRenderMode>(render_mode) != BatchRenderMode::OnlyOpaque) ?
GPUPipeline::BlendOp::ReverseSubtract :
GPUPipeline::BlendOp::Add;
plconfig.blend.constant = factor;
}
}
if (!(m_batch_pipelines[depth_test][render_mode][texture_mode][transparency_mode][dithering]
[interlacing] = g_host_display->CreatePipeline(plconfig)))
{
return false;
}
progress.Increment();
}
}
}
}
}
}
batch_shader_guard.Run();
std::unique_ptr<GPUShader> fullscreen_quad_vertex_shader =
g_host_display->CreateShader(GPUShaderStage::Vertex, shadergen.GenerateScreenQuadVertexShader());
std::unique_ptr<GPUShader> uv_quad_vertex_shader =
g_host_display->CreateShader(GPUShaderStage::Vertex, shadergen.GenerateUVQuadVertexShader());
if (!fullscreen_quad_vertex_shader || !uv_quad_vertex_shader)
return false;
progress.Increment();
// common state
plconfig.layout = GPUPipeline::Layout::SingleTexture;
plconfig.per_sample_shading = false;
plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState();
plconfig.vertex_shader = fullscreen_quad_vertex_shader.get();
// VRAM fill
for (u8 wrapped = 0; wrapped < 2; wrapped++)
{
for (u8 interlaced = 0; interlaced < 2; interlaced++)
{
std::unique_ptr<GPUShader> fs = g_host_display->CreateShader(
GPUShaderStage::Fragment,
shadergen.GenerateVRAMFillFragmentShader(ConvertToBoolUnchecked(wrapped), ConvertToBoolUnchecked(interlaced)));
if (!fs)
return false;
plconfig.pixel_shader = fs.get();
plconfig.depth = GPUPipeline::DepthState::GetAlwaysWriteState();
if (!(m_vram_fill_pipelines[wrapped][interlaced] = g_host_display->CreatePipeline(plconfig)))
return false;
progress.Increment();
}
}
// VRAM copy
{
std::unique_ptr<GPUShader> fs =
g_host_display->CreateShader(GPUShaderStage::Fragment, shadergen.GenerateVRAMCopyFragmentShader());
if (!fs)
return false;
plconfig.pixel_shader = fs.get();
for (u8 depth_test = 0; depth_test < 2; depth_test++)
{
plconfig.depth.depth_write = true;
plconfig.depth.depth_test =
(depth_test != 0) ? GPUPipeline::DepthFunc::GreaterEqual : GPUPipeline::DepthFunc::Always;
if (!(m_vram_copy_pipelines[depth_test] = g_host_display->CreatePipeline(plconfig)))
return false;
progress.Increment();
}
}
// VRAM write
// TODO: SSBO path here...
{
std::unique_ptr<GPUShader> fs = g_host_display->CreateShader(
GPUShaderStage::Fragment, shadergen.GenerateVRAMWriteFragmentShader(false /*m_use_ssbos_for_vram_writes*/));
if (!fs)
return false;
plconfig.pixel_shader = fs.get();
for (u8 depth_test = 0; depth_test < 2; depth_test++)
{
plconfig.depth.depth_write = true;
plconfig.depth.depth_test =
(depth_test != 0) ? GPUPipeline::DepthFunc::GreaterEqual : GPUPipeline::DepthFunc::Always;
if (!(m_vram_write_pipelines[depth_test] = g_host_display->CreatePipeline(plconfig)))
return false;
progress.Increment();
}
}
#if 0
// VRAM update depth
// TODO
{
std::unique_ptr<GPUShader> fs = g_host_display->CreateShader(
GPUShader::Stage::Pixel, shadergen.GenerateVRAMUpdateDepthFragmentShader());
if (!fs)
return false;
gpbuilder.SetRenderPass(m_vram_update_depth_render_pass, 0);
gpbuilder.SetPipelineLayout(m_single_sampler_pipeline_layout);
gpbuilder.SetFragmentShader(fs);
gpbuilder.SetDepthState(true, true, VK_COMPARE_OP_ALWAYS);
gpbuilder.SetBlendAttachment(0, false, VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD,
VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD, 0);
// COLOR MASK ZERO
m_vram_update_depth_pipeline = gpbuilder.Create(device, pipeline_cache, false);
vkDestroyShaderModule(device, fs, nullptr);
if (m_vram_update_depth_pipeline == VK_NULL_HANDLE)
return false;
Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_vram_update_depth_pipeline,
"VRAM Update Depth Pipeline");
progress.Increment();
}
#endif
plconfig.depth = GPUPipeline::DepthState::GetNoTestsState();
plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState();
// VRAM read
{
std::unique_ptr<GPUShader> fs =
g_host_display->CreateShader(GPUShaderStage::Fragment, shadergen.GenerateVRAMReadFragmentShader());
if (!fs)
return false;
plconfig.pixel_shader = fs.get();
if (!(m_vram_readback_pipeline = g_host_display->CreatePipeline(plconfig)))
return false;
GL_OBJECT_NAME(m_vram_readback_pipeline, "VRAM Read Pipeline");
progress.Increment();
}
// Display
{
for (u8 depth_24 = 0; depth_24 < 2; depth_24++)
{
for (u8 interlace_mode = 0; interlace_mode < 3; interlace_mode++)
{
std::unique_ptr<GPUShader> fs = g_host_display->CreateShader(
GPUShaderStage::Fragment,
shadergen.GenerateDisplayFragmentShader(
ConvertToBoolUnchecked(depth_24), static_cast<InterlacedRenderMode>(interlace_mode), m_chroma_smoothing));
if (!fs)
return false;
plconfig.pixel_shader = fs.get();
if (!(m_display_pipelines[depth_24][interlace_mode] = g_host_display->CreatePipeline(plconfig)))
return false;
progress.Increment();
}
}
}
#if 0
if (m_downsample_mode == GPUDownsampleMode::Adaptive)
{
gpbuilder.Clear();
gpbuilder.SetRenderPass(m_downsample_render_pass, 0);
gpbuilder.SetPipelineLayout(m_downsample_pipeline_layout);
gpbuilder.SetVertexShader(uv_quad_vertex_shader);
gpbuilder.SetNoCullRasterizationState();
gpbuilder.SetNoDepthTestState();
gpbuilder.SetNoBlendingState();
gpbuilder.SetDynamicViewportAndScissorState();
std::unique_ptr<GPUShader> fs = g_host_display->CreateShaderFromSource(
GPUShader::Stage::Pixel, shadergen.GenerateAdaptiveDownsampleMipFragmentShader(true));
if (fs == VK_NULL_HANDLE)
return false;
gpbuilder.SetFragmentShader(fs);
m_downsample_first_pass_pipeline = gpbuilder.Create(device, pipeline_cache, false);
vkDestroyShaderModule(g_vulkan_context->GetDevice(), fs, nullptr);
if (m_downsample_first_pass_pipeline == VK_NULL_HANDLE)
return false;
Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_downsample_first_pass_pipeline,
"Downsample First Pass Pipeline");
fs = g_host_display->CreateShaderFromSource(GPUShader::Stage::Pixel,
shadergen.GenerateAdaptiveDownsampleMipFragmentShader(false));
if (fs == VK_NULL_HANDLE)
return false;
gpbuilder.SetFragmentShader(fs);
m_downsample_mid_pass_pipeline = gpbuilder.Create(device, pipeline_cache, false);
vkDestroyShaderModule(g_vulkan_context->GetDevice(), fs, nullptr);
if (m_downsample_mid_pass_pipeline == VK_NULL_HANDLE)
return false;
Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_downsample_mid_pass_pipeline,
"Downsample Mid Pass Pipeline");
fs = g_host_display->CreateShaderFromSource(GPUShader::Stage::Pixel,
shadergen.GenerateAdaptiveDownsampleBlurFragmentShader());
if (fs == VK_NULL_HANDLE)
return false;
gpbuilder.SetFragmentShader(fs);
gpbuilder.SetRenderPass(m_downsample_weight_render_pass, 0);
m_downsample_blur_pass_pipeline = gpbuilder.Create(device, pipeline_cache, false);
vkDestroyShaderModule(g_vulkan_context->GetDevice(), fs, nullptr);
if (m_downsample_blur_pass_pipeline == VK_NULL_HANDLE)
return false;
Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_downsample_blur_pass_pipeline,
"Downsample Blur Pass Pipeline");
fs = g_host_display->CreateShaderFromSource(GPUShader::Stage::Pixel,
shadergen.GenerateAdaptiveDownsampleCompositeFragmentShader());
if (fs == VK_NULL_HANDLE)
return false;
gpbuilder.SetFragmentShader(fs);
gpbuilder.SetPipelineLayout(m_downsample_composite_pipeline_layout);
gpbuilder.SetRenderPass(m_display_load_render_pass, 0);
m_downsample_composite_pass_pipeline = gpbuilder.Create(device, pipeline_cache, false);
vkDestroyShaderModule(g_vulkan_context->GetDevice(), fs, nullptr);
if (m_downsample_composite_pass_pipeline == VK_NULL_HANDLE)
return false;
Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_downsample_composite_pass_pipeline,
"Downsample Composite Pass Pipeline");
}
else if (m_downsample_mode == GPUDownsampleMode::Box)
{
gpbuilder.Clear();
gpbuilder.SetRenderPass(m_downsample_render_pass, 0);
gpbuilder.SetPipelineLayout(m_single_sampler_pipeline_layout);
gpbuilder.SetVertexShader(fullscreen_quad_vertex_shader);
gpbuilder.SetNoCullRasterizationState();
gpbuilder.SetNoDepthTestState();
gpbuilder.SetNoBlendingState();
gpbuilder.SetDynamicViewportAndScissorState();
std::unique_ptr<GPUShader> fs = g_host_display->CreateShaderFromSource(
GPUShader::Stage::Pixel, shadergen.GenerateBoxSampleDownsampleFragmentShader());
if (fs == VK_NULL_HANDLE)
return false;
gpbuilder.SetFragmentShader(fs);
m_downsample_first_pass_pipeline = gpbuilder.Create(device, pipeline_cache, false);
vkDestroyShaderModule(g_vulkan_context->GetDevice(), fs, nullptr);
if (m_downsample_first_pass_pipeline == VK_NULL_HANDLE)
return false;
Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_downsample_first_pass_pipeline,
"Downsample First Pass Pipeline");
}
#endif
progress.Increment();
#undef UPDATE_PROGRESS
return true;
}
void GPU_HW::DestroyPipelines()
{
static constexpr auto destroy = [](std::unique_ptr<GPUPipeline>& p) { p.reset(); };
m_batch_pipelines.enumerate(destroy);
m_vram_fill_pipelines.enumerate(destroy);
for (std::unique_ptr<GPUPipeline>& p : m_vram_write_pipelines)
destroy(p);
for (std::unique_ptr<GPUPipeline>& p : m_vram_copy_pipelines)
destroy(p);
destroy(m_vram_readback_pipeline);
destroy(m_vram_update_depth_pipeline);
destroy(m_downsample_first_pass_pipeline);
destroy(m_downsample_mid_pass_pipeline);
destroy(m_downsample_blur_pass_pipeline);
destroy(m_downsample_composite_pass_pipeline);
m_display_pipelines.enumerate(destroy);
}
void GPU_HW::UpdateVRAMReadTexture()
{
const auto scaled_rect = m_vram_dirty_rect * m_resolution_scale;
@ -401,6 +905,148 @@ void GPU_HW::UpdateVRAMReadTexture()
ClearVRAMDirtyRectangle();
}
void GPU_HW::MapBatchVertexPointer(u32 required_vertices)
{
DebugAssert(!m_batch_start_vertex_ptr);
void* map;
u32 space;
g_host_display->MapVertexBuffer(sizeof(BatchVertex), required_vertices, &map, &space, &m_batch_base_vertex);
m_batch_start_vertex_ptr = static_cast<BatchVertex*>(map);
m_batch_current_vertex_ptr = m_batch_start_vertex_ptr;
m_batch_end_vertex_ptr = m_batch_start_vertex_ptr + space;
}
void GPU_HW::UnmapBatchVertexPointer(u32 used_vertices)
{
DebugAssert(m_batch_start_vertex_ptr);
g_host_display->UnmapVertexBuffer(sizeof(BatchVertex), used_vertices);
m_batch_start_vertex_ptr = nullptr;
m_batch_end_vertex_ptr = nullptr;
m_batch_current_vertex_ptr = nullptr;
}
void GPU_HW::DrawBatchVertices(BatchRenderMode render_mode, u32 base_vertex, u32 num_vertices)
{
// [depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing]
const u8 depth_test = m_batch.use_depth_buffer ? static_cast<u8>(2) : BoolToUInt8(m_batch.check_mask_before_draw);
g_host_display->SetPipeline(
m_batch_pipelines[depth_test][static_cast<u8>(render_mode)][static_cast<u8>(m_batch.texture_mode)][static_cast<u8>(
m_batch.transparency_mode)][BoolToUInt8(m_batch.dithering)][BoolToUInt8(m_batch.interlacing)]
.get());
g_host_display->Draw(num_vertices, base_vertex);
}
void GPU_HW::ClearDisplay()
{
Panic("Not implemented");
}
void GPU_HW::UpdateDisplay()
{
FlushRender();
if (g_settings.debugging.show_vram)
{
if (IsUsingMultisampling())
{
UpdateVRAMReadTexture();
g_host_display->SetDisplayTexture(m_vram_read_texture.get(), 0, 0, m_vram_read_texture->GetWidth(),
m_vram_read_texture->GetHeight());
}
else
{
g_host_display->SetDisplayTexture(m_vram_texture.get(), 0, 0, m_vram_texture->GetWidth(),
m_vram_texture->GetHeight());
}
g_host_display->SetDisplayParameters(VRAM_WIDTH, VRAM_HEIGHT, 0, 0, VRAM_WIDTH, VRAM_HEIGHT,
static_cast<float>(VRAM_WIDTH) / static_cast<float>(VRAM_HEIGHT));
}
else
{
g_host_display->SetDisplayParameters(m_crtc_state.display_width, m_crtc_state.display_height,
m_crtc_state.display_origin_left, m_crtc_state.display_origin_top,
m_crtc_state.display_vram_width, m_crtc_state.display_vram_height,
GetDisplayAspectRatio());
const u32 resolution_scale = m_GPUSTAT.display_area_color_depth_24 ? 1 : m_resolution_scale;
const u32 vram_offset_x = m_crtc_state.display_vram_left;
const u32 vram_offset_y = m_crtc_state.display_vram_top;
const u32 scaled_vram_offset_x = vram_offset_x * resolution_scale;
const u32 scaled_vram_offset_y = vram_offset_y * resolution_scale;
const u32 display_width = m_crtc_state.display_vram_width;
const u32 display_height = m_crtc_state.display_vram_height;
const u32 scaled_display_width = display_width * resolution_scale;
const u32 scaled_display_height = display_height * resolution_scale;
const InterlacedRenderMode interlaced = GetInterlacedRenderMode();
if (IsDisplayDisabled())
{
g_host_display->ClearDisplayTexture();
}
else if (!m_GPUSTAT.display_area_color_depth_24 && interlaced == InterlacedRenderMode::None &&
!IsUsingMultisampling() && (scaled_vram_offset_x + scaled_display_width) <= m_vram_texture->GetWidth() &&
(scaled_vram_offset_y + scaled_display_height) <= m_vram_texture->GetHeight())
{
if (IsUsingDownsampling())
{
#if 0
DownsampleFramebuffer(GetVRAMTexture(), scaled_vram_offset_x, scaled_vram_offset_y, scaled_display_width,
scaled_display_height);
#else
Panic("Fixme");
#endif
}
else
{
g_host_display->SetDisplayTexture(m_vram_texture.get(), scaled_vram_offset_x, scaled_vram_offset_y,
scaled_display_width, scaled_display_height);
}
}
else
{
// TODO: discard vs load for interlaced
if (interlaced == InterlacedRenderMode::None)
g_host_display->InvalidateRenderTarget(m_display_texture.get());
g_host_display->SetPipeline(
m_display_pipelines[BoolToUInt8(m_GPUSTAT.display_area_color_depth_24)][static_cast<u8>(interlaced)].get());
g_host_display->SetFramebuffer(m_display_framebuffer.get());
const u32 reinterpret_field_offset = (interlaced != InterlacedRenderMode::None) ? GetInterlacedDisplayField() : 0;
const u32 reinterpret_start_x = m_crtc_state.regs.X * resolution_scale;
const u32 reinterpret_crop_left = (m_crtc_state.display_vram_left - m_crtc_state.regs.X) * resolution_scale;
const u32 uniforms[4] = {reinterpret_start_x, scaled_vram_offset_y + reinterpret_field_offset,
reinterpret_crop_left, reinterpret_field_offset};
g_host_display->PushUniformBuffer(uniforms, sizeof(uniforms));
Assert(scaled_display_width <= m_display_texture->GetWidth() &&
scaled_display_height <= m_display_texture->GetHeight());
g_host_display->SetViewportAndScissor(0, 0, scaled_display_width, scaled_display_height);
g_host_display->Draw(3, 0);
if (IsUsingDownsampling())
{
#if 0
DownsampleFramebuffer(GetDisplayTexture(), 0, 0, scaled_display_width, scaled_display_height);
#else
Panic("Fixme");
#endif
}
else
{
g_host_display->SetDisplayTexture(m_display_texture.get(), 0, 0, scaled_display_width, scaled_display_height);
}
RestoreGraphicsAPIState();
}
}
}
void GPU_HW::HandleFlippedQuadTextureCoordinates(BatchVertex* vertices)
{
// Taken from beetle-psx gpu_polygon.cpp

View File

@ -2,6 +2,7 @@
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
#pragma once
#include "common/dimensional_array.h"
#include "common/heap_array.h"
#include "gpu.h"
#include "gpu/gpu_device.h"
@ -42,6 +43,7 @@ public:
virtual void Reset(bool clear_vram) override;
virtual bool DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_display) override;
virtual void UpdateSettings() override;
void UpdateResolutionScale() override final;
std::tuple<u32, u32> GetEffectiveDisplayResolution(bool scaled = true) override final;
std::tuple<u32, u32> GetFullDisplayResolution(bool scaled = true) override final;
@ -202,14 +204,20 @@ protected:
virtual bool CreateFramebuffer();
virtual void DestroyFramebuffer();
bool CompilePipelines();
void DestroyPipelines();
void UpdateVRAMReadTexture();
virtual void UpdateDepthBufferFromMaskBit() = 0;
virtual void ClearDepthBuffer() = 0;
virtual void SetScissorFromDrawingArea() = 0;
virtual void MapBatchVertexPointer(u32 required_vertices) = 0;
virtual void UnmapBatchVertexPointer(u32 used_vertices) = 0;
virtual void MapBatchVertexPointer(u32 required_vertices);
virtual void UnmapBatchVertexPointer(u32 used_vertices);
virtual void UploadUniformBuffer(const void* uniforms, u32 uniforms_size) = 0;
virtual void DrawBatchVertices(BatchRenderMode render_mode, u32 base_vertex, u32 num_vertices) = 0;
virtual void DrawBatchVertices(BatchRenderMode render_mode, u32 base_vertex, u32 num_vertices);
virtual void ClearDisplay();
virtual void UpdateDisplay();
u32 CalculateResolutionScale() const;
GPUDownsampleMode GetDownsampleMode(u32 resolution_scale) const;
@ -361,8 +369,14 @@ protected:
std::unique_ptr<GPUTexture> m_vram_depth_texture;
std::unique_ptr<GPUTexture> m_vram_depth_view;
std::unique_ptr<GPUTexture> m_vram_read_texture;
std::unique_ptr<GPUTexture> m_vram_encoding_texture;
std::unique_ptr<GPUTexture> m_vram_readback_texture;
std::unique_ptr<GPUTexture> m_display_texture;
std::unique_ptr<GPUFramebuffer> m_vram_framebuffer;
std::unique_ptr<GPUFramebuffer> m_vram_update_depth_framebuffer;
std::unique_ptr<GPUFramebuffer> m_vram_readback_framebuffer;
std::unique_ptr<GPUFramebuffer> m_display_framebuffer;
HeapArray<u16, VRAM_WIDTH * VRAM_HEIGHT> m_vram_shadow;
std::unique_ptr<GPU_SW_Backend> m_sw_renderer;
@ -406,13 +420,34 @@ protected:
// Bounding box of VRAM area that the GPU has drawn into.
Common::Rectangle<u32> m_vram_dirty_rect;
// Changed state
bool m_batch_ubo_dirty = true;
// [depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing]
DimensionalArray<std::unique_ptr<GPUPipeline>, 2, 2, 5, 9, 4, 3> m_batch_pipelines{};
// [wrapped][interlaced]
DimensionalArray<std::unique_ptr<GPUPipeline>, 2, 2> m_vram_fill_pipelines{};
// [depth_test]
std::array<std::unique_ptr<GPUPipeline>, 2> m_vram_write_pipelines{};
std::array<std::unique_ptr<GPUPipeline>, 2> m_vram_copy_pipelines{};
std::unique_ptr<GPUPipeline> m_vram_readback_pipeline;
std::unique_ptr<GPUPipeline> m_vram_update_depth_pipeline;
// [depth_24][interlace_mode]
DimensionalArray<std::unique_ptr<GPUPipeline>, 3, 2> m_display_pipelines{};
std::unique_ptr<GPUPipeline> m_downsample_first_pass_pipeline;
std::unique_ptr<GPUPipeline> m_downsample_mid_pass_pipeline;
std::unique_ptr<GPUPipeline> m_downsample_blur_pass_pipeline;
std::unique_ptr<GPUPipeline> m_downsample_composite_pass_pipeline;
// Statistics
RendererStats m_renderer_stats = {};
RendererStats m_last_renderer_stats = {};
// Changed state
bool m_batch_ubo_dirty = true;
private:
enum : u32
{

View File

@ -42,12 +42,6 @@ bool GPU_HW_D3D11::Initialize()
return false;
}
if (!CreateVertexBuffer())
{
Log_ErrorPrintf("Failed to create vertex buffer");
return false;
}
if (!CreateUniformBuffer())
{
Log_ErrorPrintf("Failed to create uniform buffer");
@ -94,10 +88,6 @@ void GPU_HW_D3D11::ResetGraphicsAPIState()
void GPU_HW_D3D11::RestoreGraphicsAPIState()
{
const UINT stride = sizeof(BatchVertex);
const UINT offset = 0;
m_context->IASetVertexBuffers(0, 1, m_vertex_stream_buffer.GetD3DBufferArray(), &stride, &offset);
m_context->IASetInputLayout(m_batch_input_layout.Get());
m_context->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
m_context->PSSetShaderResources(0, 1, GetVRAMReadTexture()->GetD3DSRVArray());
m_context->PSSetSamplers(0, 1, m_point_sampler_state.GetAddressOf());
@ -108,62 +98,6 @@ void GPU_HW_D3D11::RestoreGraphicsAPIState()
m_batch_ubo_dirty = true;
}
void GPU_HW_D3D11::UpdateSettings()
{
GPU_HW::UpdateSettings();
bool framebuffer_changed, shaders_changed;
UpdateHWSettings(&framebuffer_changed, &shaders_changed);
if (framebuffer_changed)
{
RestoreGraphicsAPIState();
ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT);
ResetGraphicsAPIState();
g_host_display->ClearDisplayTexture();
CreateFramebuffer();
}
if (shaders_changed)
{
DestroyShaders();
DestroyStateObjects();
CreateStateObjects();
CompileShaders();
}
if (framebuffer_changed)
{
RestoreGraphicsAPIState();
UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_ptr, false, false);
UpdateDepthBufferFromMaskBit();
UpdateDisplay();
ResetGraphicsAPIState();
}
}
void GPU_HW_D3D11::MapBatchVertexPointer(u32 required_vertices)
{
DebugAssert(!m_batch_start_vertex_ptr);
const D3D11::StreamBuffer::MappingResult res =
m_vertex_stream_buffer.Map(m_context.Get(), sizeof(BatchVertex), required_vertices * sizeof(BatchVertex));
m_batch_start_vertex_ptr = static_cast<BatchVertex*>(res.pointer);
m_batch_current_vertex_ptr = m_batch_start_vertex_ptr;
m_batch_end_vertex_ptr = m_batch_start_vertex_ptr + res.space_aligned;
m_batch_base_vertex = res.index_aligned;
}
void GPU_HW_D3D11::UnmapBatchVertexPointer(u32 used_vertices)
{
DebugAssert(m_batch_start_vertex_ptr);
m_vertex_stream_buffer.Unmap(m_context.Get(), used_vertices * sizeof(BatchVertex));
m_batch_start_vertex_ptr = nullptr;
m_batch_end_vertex_ptr = nullptr;
m_batch_current_vertex_ptr = nullptr;
}
void GPU_HW_D3D11::SetCapabilities()
{
const u32 max_texture_size = D3D11_REQ_TEXTURE2D_U_OR_V_DIMENSION;
@ -263,11 +197,6 @@ void GPU_HW_D3D11::DestroyFramebuffer()
GPU_HW::DestroyFramebuffer();
}
bool GPU_HW_D3D11::CreateVertexBuffer()
{
return m_vertex_stream_buffer.Create(m_device.Get(), D3D11_BIND_VERTEX_BUFFER, VERTEX_BUFFER_SIZE);
}
bool GPU_HW_D3D11::CreateUniformBuffer()
{
return m_uniform_stream_buffer.Create(m_device.Get(), D3D11_BIND_CONSTANT_BUFFER, MAX_UNIFORM_BUFFER_SIZE);
@ -367,35 +296,11 @@ bool GPU_HW_D3D11::CreateStateObjects()
if (FAILED(hr))
return false;
for (u8 transparency_mode = 0; transparency_mode < 5; transparency_mode++)
{
bl_desc = CD3D11_BLEND_DESC(CD3D11_DEFAULT());
if (transparency_mode != static_cast<u8>(GPUTransparencyMode::Disabled) ||
m_texture_filtering != GPUTextureFilter::Nearest)
{
bl_desc.RenderTarget[0].BlendEnable = TRUE;
bl_desc.RenderTarget[0].SrcBlend = D3D11_BLEND_ONE;
bl_desc.RenderTarget[0].DestBlend = D3D11_BLEND_SRC1_ALPHA;
bl_desc.RenderTarget[0].SrcBlendAlpha = D3D11_BLEND_ONE;
bl_desc.RenderTarget[0].DestBlendAlpha = D3D11_BLEND_ZERO;
bl_desc.RenderTarget[0].BlendOp =
(transparency_mode == static_cast<u8>(GPUTransparencyMode::BackgroundMinusForeground)) ?
D3D11_BLEND_OP_REV_SUBTRACT :
D3D11_BLEND_OP_ADD;
bl_desc.RenderTarget[0].BlendOpAlpha = D3D11_BLEND_OP_ADD;
}
hr = m_device->CreateBlendState(&bl_desc, m_batch_blend_states[transparency_mode].ReleaseAndGetAddressOf());
if (FAILED(hr))
return false;
}
return true;
}
void GPU_HW_D3D11::DestroyStateObjects()
{
m_batch_blend_states = {};
m_linear_sampler_state.Reset();
m_point_sampler_state.Reset();
m_trilinear_sampler_state.Reset();
@ -411,9 +316,14 @@ void GPU_HW_D3D11::DestroyStateObjects()
bool GPU_HW_D3D11::CompileShaders()
{
if (!GPU_HW::CompilePipelines())
return false;
D3D11::ShaderCache shader_cache;
#if 0
shader_cache.Open(EmuFolders::Cache, m_device->GetFeatureLevel(), SHADER_CACHE_VERSION,
g_settings.gpu_use_debug_device);
#endif
GPU_HW_ShaderGen shadergen(g_host_display->GetRenderAPI(), m_resolution_scale, m_multisamples, m_per_sample_shading,
m_true_color, m_scaled_dithering, m_texture_filtering, m_using_uv_limits,
@ -422,34 +332,6 @@ bool GPU_HW_D3D11::CompileShaders()
ShaderCompileProgressTracker progress("Compiling Shaders",
1 + 1 + 2 + (4 * 9 * 2 * 2) + 1 + (2 * 2) + 4 + (2 * 3) + 1);
// input layout
{
static constexpr std::array<D3D11_INPUT_ELEMENT_DESC, 5> attributes = {
{{"ATTR", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, offsetof(BatchVertex, x), D3D11_INPUT_PER_VERTEX_DATA, 0},
{"ATTR", 1, DXGI_FORMAT_R8G8B8A8_UNORM, 0, offsetof(BatchVertex, color), D3D11_INPUT_PER_VERTEX_DATA, 0},
{"ATTR", 2, DXGI_FORMAT_R32_UINT, 0, offsetof(BatchVertex, u), D3D11_INPUT_PER_VERTEX_DATA, 0},
{"ATTR", 3, DXGI_FORMAT_R32_UINT, 0, offsetof(BatchVertex, texpage), D3D11_INPUT_PER_VERTEX_DATA, 0},
{"ATTR", 4, DXGI_FORMAT_R8G8B8A8_UNORM, 0, offsetof(BatchVertex, uv_limits), D3D11_INPUT_PER_VERTEX_DATA, 0}}};
// we need a vertex shader...
ComPtr<ID3DBlob> vs_bytecode =
shader_cache.GetShaderBlob(D3D11::ShaderCompiler::Type::Vertex, shadergen.GenerateBatchVertexShader(true));
if (!vs_bytecode)
return false;
const UINT num_attributes = static_cast<UINT>(attributes.size()) - (m_using_uv_limits ? 0 : 1);
const HRESULT hr =
m_device->CreateInputLayout(attributes.data(), num_attributes, vs_bytecode->GetBufferPointer(),
vs_bytecode->GetBufferSize(), m_batch_input_layout.ReleaseAndGetAddressOf());
if (FAILED(hr))
{
Log_ErrorPrintf("CreateInputLayout failed: 0x%08X", hr);
return false;
}
}
progress.Increment();
m_screen_quad_vertex_shader =
shader_cache.GetVertexShader(m_device.Get(), shadergen.GenerateScreenQuadVertexShader());
m_uv_quad_vertex_shader = shader_cache.GetVertexShader(m_device.Get(), shadergen.GenerateUVQuadVertexShader());
@ -458,39 +340,6 @@ bool GPU_HW_D3D11::CompileShaders()
progress.Increment();
for (u8 textured = 0; textured < 2; textured++)
{
const std::string vs = shadergen.GenerateBatchVertexShader(ConvertToBoolUnchecked(textured));
m_batch_vertex_shaders[textured] = shader_cache.GetVertexShader(m_device.Get(), vs);
if (!m_batch_vertex_shaders[textured])
return false;
progress.Increment();
}
for (u8 render_mode = 0; render_mode < 4; render_mode++)
{
for (u8 texture_mode = 0; texture_mode < 9; texture_mode++)
{
for (u8 dithering = 0; dithering < 2; dithering++)
{
for (u8 interlacing = 0; interlacing < 2; interlacing++)
{
const std::string ps = shadergen.GenerateBatchFragmentShader(
static_cast<BatchRenderMode>(render_mode), static_cast<GPUTextureMode>(texture_mode),
ConvertToBoolUnchecked(dithering), ConvertToBoolUnchecked(interlacing));
m_batch_pixel_shaders[render_mode][texture_mode][dithering][interlacing] =
shader_cache.GetPixelShader(m_device.Get(), ps);
if (!m_batch_pixel_shaders[render_mode][texture_mode][dithering][interlacing])
return false;
progress.Increment();
}
}
}
}
m_copy_pixel_shader = shader_cache.GetPixelShader(m_device.Get(), shadergen.GenerateCopyFragmentShader());
if (!m_copy_pixel_shader)
return false;
@ -599,9 +448,8 @@ void GPU_HW_D3D11::DestroyShaders()
m_copy_pixel_shader.Reset();
m_uv_quad_vertex_shader.Reset();
m_screen_quad_vertex_shader.Reset();
m_batch_pixel_shaders = {};
m_batch_vertex_shaders = {};
m_batch_input_layout.Reset();
GPU_HW::DestroyPipelines();
}
void GPU_HW_D3D11::UploadUniformBuffer(const void* data, u32 data_size)
@ -699,30 +547,6 @@ bool GPU_HW_D3D11::BlitVRAMReplacementTexture(const TextureReplacementTexture* t
return true;
}
void GPU_HW_D3D11::DrawBatchVertices(BatchRenderMode render_mode, u32 base_vertex, u32 num_vertices)
{
const bool textured = (m_batch.texture_mode != GPUTextureMode::Disabled);
m_context->VSSetShader(m_batch_vertex_shaders[BoolToUInt8(textured)].Get(), nullptr, 0);
m_context->PSSetShader(m_batch_pixel_shaders[static_cast<u8>(render_mode)][static_cast<u8>(m_batch.texture_mode)]
[BoolToUInt8(m_batch.dithering)][BoolToUInt8(m_batch.interlacing)]
.Get(),
nullptr, 0);
const GPUTransparencyMode transparency_mode =
(render_mode == BatchRenderMode::OnlyOpaque) ? GPUTransparencyMode::Disabled : m_batch.transparency_mode;
m_context->OMSetBlendState(m_batch_blend_states[static_cast<u8>(transparency_mode)].Get(), nullptr, 0xFFFFFFFFu);
m_context->OMSetDepthStencilState(
(m_batch.use_depth_buffer ?
m_depth_test_less_state.Get() :
(m_batch.check_mask_before_draw ? m_depth_test_greater_state.Get() : m_depth_test_always_state.Get())),
0);
m_context->Draw(num_vertices, base_vertex);
}
void GPU_HW_D3D11::SetScissorFromDrawingArea()
{
int left, top, right, bottom;
@ -742,96 +566,6 @@ void GPU_HW_D3D11::ClearDisplay()
m_context->ClearRenderTargetView(GetDisplayTexture()->GetD3DRTV(), clear_color.data());
}
void GPU_HW_D3D11::UpdateDisplay()
{
GPU_HW::UpdateDisplay();
if (g_settings.debugging.show_vram)
{
if (IsUsingMultisampling())
{
UpdateVRAMReadTexture();
g_host_display->SetDisplayTexture(m_vram_read_texture.get(), 0, 0, GetVRAMReadTexture()->GetWidth(),
GetVRAMReadTexture()->GetHeight());
}
else
{
g_host_display->SetDisplayTexture(m_vram_texture.get(), 0, 0, GetVRAMTexture()->GetWidth(),
GetVRAMTexture()->GetHeight());
}
g_host_display->SetDisplayParameters(VRAM_WIDTH, VRAM_HEIGHT, 0, 0, VRAM_WIDTH, VRAM_HEIGHT,
static_cast<float>(VRAM_WIDTH) / static_cast<float>(VRAM_HEIGHT));
}
else
{
g_host_display->SetDisplayParameters(m_crtc_state.display_width, m_crtc_state.display_height,
m_crtc_state.display_origin_left, m_crtc_state.display_origin_top,
m_crtc_state.display_vram_width, m_crtc_state.display_vram_height,
GetDisplayAspectRatio());
const u32 resolution_scale = m_GPUSTAT.display_area_color_depth_24 ? 1 : m_resolution_scale;
const u32 vram_offset_x = m_crtc_state.display_vram_left;
const u32 vram_offset_y = m_crtc_state.display_vram_top;
const u32 scaled_vram_offset_x = vram_offset_x * resolution_scale;
const u32 scaled_vram_offset_y = vram_offset_y * resolution_scale;
const u32 display_width = m_crtc_state.display_vram_width;
const u32 display_height = m_crtc_state.display_vram_height;
const u32 scaled_display_width = display_width * resolution_scale;
const u32 scaled_display_height = display_height * resolution_scale;
const InterlacedRenderMode interlaced = GetInterlacedRenderMode();
if (IsDisplayDisabled())
{
g_host_display->ClearDisplayTexture();
}
else if (!m_GPUSTAT.display_area_color_depth_24 && interlaced == InterlacedRenderMode::None &&
!IsUsingMultisampling() && (scaled_vram_offset_x + scaled_display_width) <= GetVRAMTexture()->GetWidth() &&
(scaled_vram_offset_y + scaled_display_height) <= GetVRAMTexture()->GetHeight())
{
if (IsUsingDownsampling())
{
DownsampleFramebuffer(GetVRAMTexture(), scaled_vram_offset_x, scaled_vram_offset_y, scaled_display_width,
scaled_display_height);
}
else
{
g_host_display->SetDisplayTexture(m_vram_texture.get(), scaled_vram_offset_x, scaled_vram_offset_y,
scaled_display_width, scaled_display_height);
}
}
else
{
m_context->RSSetState(m_cull_none_rasterizer_state_no_msaa.Get());
m_context->OMSetRenderTargets(1, GetDisplayTexture()->GetD3DRTVArray(), nullptr);
m_context->OMSetDepthStencilState(m_depth_disabled_state.Get(), 0);
m_context->PSSetShaderResources(0, 1, GetVRAMTexture()->GetD3DSRVArray());
const u32 reinterpret_field_offset = (interlaced != InterlacedRenderMode::None) ? GetInterlacedDisplayField() : 0;
const u32 reinterpret_start_x = m_crtc_state.regs.X * resolution_scale;
const u32 reinterpret_crop_left = (m_crtc_state.display_vram_left - m_crtc_state.regs.X) * resolution_scale;
const u32 uniforms[4] = {reinterpret_start_x, scaled_vram_offset_y + reinterpret_field_offset,
reinterpret_crop_left, reinterpret_field_offset};
ID3D11PixelShader* display_pixel_shader =
m_display_pixel_shaders[BoolToUInt8(m_GPUSTAT.display_area_color_depth_24)][static_cast<u8>(interlaced)].Get();
Assert(scaled_display_width <= m_display_texture->GetWidth() &&
scaled_display_height <= m_display_texture->GetHeight());
SetViewportAndScissor(0, 0, scaled_display_width, scaled_display_height);
DrawUtilityShader(display_pixel_shader, uniforms, sizeof(uniforms));
if (IsUsingDownsampling())
DownsampleFramebuffer(GetDisplayTexture(), 0, 0, scaled_display_width, scaled_display_height);
else
g_host_display->SetDisplayTexture(m_display_texture.get(), 0, 0, scaled_display_width, scaled_display_height);
RestoreGraphicsAPIState();
}
}
}
void GPU_HW_D3D11::ReadVRAM(u32 x, u32 y, u32 width, u32 height)
{
if (IsUsingSoftwareRendererForReadbacks())
@ -855,7 +589,7 @@ void GPU_HW_D3D11::ReadVRAM(u32 x, u32 y, u32 width, u32 height)
DrawUtilityShader(m_vram_read_pixel_shader.Get(), uniforms, sizeof(uniforms));
// Stage the readback and copy it into our shadow buffer.
g_host_display->DownloadTexture(m_vram_encoding_texture.get(), 0, 0, encoded_width, encoded_height,
g_host_display->DownloadTexture(m_vram_readback_texture.get(), 0, 0, encoded_width, encoded_height,
reinterpret_cast<u32*>(&m_vram_shadow[copy_rect.top * VRAM_WIDTH + copy_rect.left]),
VRAM_WIDTH * sizeof(u16));

View File

@ -29,11 +29,9 @@ public:
void ResetGraphicsAPIState() override;
void RestoreGraphicsAPIState() override;
void UpdateSettings() override;
protected:
void ClearDisplay() override;
void UpdateDisplay() override;
void ReadVRAM(u32 x, u32 y, u32 width, u32 height) override;
void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override;
void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) override;
@ -41,10 +39,7 @@ protected:
void UpdateDepthBufferFromMaskBit() override;
void ClearDepthBuffer() override;
void SetScissorFromDrawingArea() override;
void MapBatchVertexPointer(u32 required_vertices) override;
void UnmapBatchVertexPointer(u32 used_vertices) override;
void UploadUniformBuffer(const void* data, u32 data_size) override;
void DrawBatchVertices(BatchRenderMode render_mode, u32 base_vertex, u32 num_vertices) override;
private:
enum : u32
@ -64,7 +59,7 @@ private:
}
ALWAYS_INLINE D3D11Texture* GetVRAMEncodingTexture() const
{
return static_cast<D3D11Texture*>(m_vram_encoding_texture.get());
return static_cast<D3D11Texture*>(m_vram_readback_texture.get());
}
ALWAYS_INLINE D3D11Texture* GetDisplayTexture() const
{
@ -76,7 +71,6 @@ private:
void ClearFramebuffer();
void DestroyFramebuffer() override;
bool CreateVertexBuffer();
bool CreateUniformBuffer();
bool CreateTextureBuffer();
bool CreateStateObjects();
@ -99,8 +93,6 @@ private:
ComPtr<ID3D11Device> m_device;
ComPtr<ID3D11DeviceContext> m_context;
D3D11::StreamBuffer m_vertex_stream_buffer;
D3D11::StreamBuffer m_uniform_stream_buffer;
D3D11::StreamBuffer m_texture_stream_buffer;
@ -122,12 +114,6 @@ private:
ComPtr<ID3D11SamplerState> m_linear_sampler_state;
ComPtr<ID3D11SamplerState> m_trilinear_sampler_state;
std::array<ComPtr<ID3D11BlendState>, 5> m_batch_blend_states; // [transparency_mode]
ComPtr<ID3D11InputLayout> m_batch_input_layout;
std::array<ComPtr<ID3D11VertexShader>, 2> m_batch_vertex_shaders; // [textured]
std::array<std::array<std::array<std::array<ComPtr<ID3D11PixelShader>, 2>, 2>, 9>, 4>
m_batch_pixel_shaders; // [render_mode][texture_mode][dithering][interlacing]
ComPtr<ID3D11VertexShader> m_screen_quad_vertex_shader;
ComPtr<ID3D11VertexShader> m_uv_quad_vertex_shader;
ComPtr<ID3D11PixelShader> m_copy_pixel_shader;

View File

@ -341,9 +341,9 @@ bool GPU_HW_OpenGL::CreateFramebuffer()
!m_vram_read_texture.Create(texture_width, texture_height, 1, 1, 1, GPUTexture::Format::RGBA8, nullptr, 0, false,
true) ||
!m_vram_read_texture.CreateFramebuffer() ||
!m_vram_encoding_texture.Create(VRAM_WIDTH, VRAM_HEIGHT, 1, 1, 1, GPUTexture::Format::RGBA8, nullptr, 0, false,
!m_vram_readback_texture.Create(VRAM_WIDTH, VRAM_HEIGHT, 1, 1, 1, GPUTexture::Format::RGBA8, nullptr, 0, false,
true) ||
!m_vram_encoding_texture.CreateFramebuffer() ||
!m_vram_readback_texture.CreateFramebuffer() ||
!m_display_texture.Create(GPU_MAX_DISPLAY_WIDTH * m_resolution_scale, GPU_MAX_DISPLAY_HEIGHT * m_resolution_scale,
1, 1, 1, GPUTexture::Format::RGBA8, nullptr, 0, true, true) ||
!m_display_texture.CreateFramebuffer())
@ -918,7 +918,7 @@ void GPU_HW_OpenGL::ReadVRAM(u32 x, u32 y, u32 width, u32 height)
// Encode the 24-bit texture as 16-bit.
const u32 uniforms[4] = {copy_rect.left, copy_rect.top, copy_rect.GetWidth(), copy_rect.GetHeight()};
m_vram_encoding_texture.BindFramebuffer(GL_DRAW_FRAMEBUFFER);
m_vram_readback_texture.BindFramebuffer(GL_DRAW_FRAMEBUFFER);
m_vram_texture.Bind();
m_vram_read_program.Bind();
UploadUniformBuffer(uniforms, sizeof(uniforms));
@ -929,7 +929,7 @@ void GPU_HW_OpenGL::ReadVRAM(u32 x, u32 y, u32 width, u32 height)
glDrawArrays(GL_TRIANGLES, 0, 3);
// Readback encoded texture.
m_vram_encoding_texture.BindFramebuffer(GL_READ_FRAMEBUFFER);
m_vram_readback_texture.BindFramebuffer(GL_READ_FRAMEBUFFER);
glPixelStorei(GL_PACK_ALIGNMENT, 2);
glPixelStorei(GL_PACK_ROW_LENGTH, VRAM_WIDTH / 2);
glReadPixels(0, 0, encoded_width, encoded_height, GL_RGBA, GL_UNSIGNED_BYTE,
@ -1064,7 +1064,7 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void*
// have to write to the 1x texture first
if (m_resolution_scale > 1)
m_vram_encoding_texture.Bind();
m_vram_readback_texture.Bind();
else
m_vram_texture.Bind();
@ -1081,7 +1081,7 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void*
const u32 scaled_x = x * m_resolution_scale;
const u32 scaled_y = y * m_resolution_scale;
glDisable(GL_SCISSOR_TEST);
m_vram_encoding_texture.BindFramebuffer(GL_READ_FRAMEBUFFER);
m_vram_readback_texture.BindFramebuffer(GL_READ_FRAMEBUFFER);
glBlitFramebuffer(x, y, x + width, y + height, scaled_x, scaled_y, scaled_x + scaled_width,
scaled_y + scaled_height, GL_COLOR_BUFFER_BIT, GL_NEAREST);
glEnable(GL_SCISSOR_TEST);

View File

@ -78,7 +78,7 @@ private:
GL::Texture m_vram_texture;
GL::Texture m_vram_depth_texture;
GL::Texture m_vram_read_texture;
GL::Texture m_vram_encoding_texture;
GL::Texture m_vram_readback_texture;
GL::Texture m_display_texture;
GL::Texture m_vram_write_replacement_texture;

View File

@ -4,4 +4,4 @@
#pragma once
#include "types.h"
static constexpr u32 SHADER_CACHE_VERSION = 7;
static constexpr u32 SHADER_CACHE_VERSION = 8;

View File

@ -662,16 +662,74 @@ std::string ShaderGen::GenerateCopyFragmentShader()
return ss.str();
}
std::string ShaderGen::GenerateSampleFragmentShader()
std::string ShaderGen::GenerateDisplayVertexShader()
{
std::stringstream ss;
WriteHeader(ss);
DeclareTexture(ss, "samp0", 0);
DeclareFragmentEntryPoint(ss, 0, 1, {}, false, 1);
DeclareUniformBuffer(ss, {"float4 u_src_rect"}, true);
DeclareVertexEntryPoint(ss, {}, 0, 1, {}, true);
ss << R"(
{
o_col0 = SAMPLE_TEXTURE(samp0, v_tex0);
float2 pos = float2(float((v_id << 1) & 2u), float(v_id & 2u));
v_tex0 = u_src_rect.xy + pos * u_src_rect.zw;
v_pos = float4(pos * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), 0.0f, 1.0f);
#if API_OPENGL || API_OPENGL_ES || API_VULKAN
v_pos.y = -v_pos.y;
#endif
}
)";
return ss.str();
}
std::string ShaderGen::GenerateDisplayFragmentShader(bool set_alpha_to_one /* = false */)
{
std::stringstream ss;
WriteHeader(ss);
DeclareTexture(ss, "samp0", 0);
DeclareFragmentEntryPoint(ss, 0, 1, {}, false, 1);
ss << "{\n";
if (set_alpha_to_one)
ss << "o_col0 = float4(SAMPLE_TEXTURE(samp0, v_tex0).rgb, 1.0f);";
else
ss << "o_col0 = SAMPLE_TEXTURE(samp0, v_tex0);";
ss << "\n}\n";
return ss.str();
}
std::string ShaderGen::GenerateImGuiVertexShader()
{
std::stringstream ss;
WriteHeader(ss);
DeclareUniformBuffer(ss, {"float4x4 ProjectionMatrix"}, true);
DeclareVertexEntryPoint(ss, {"float2 a_pos", "float2 a_tex0", "float4 a_col0"}, 1, 1, {}, false);
ss << R"(
{
v_pos = mul(ProjectionMatrix, float4(a_pos, 0.f, 1.f));
v_col0 = a_col0;
v_tex0 = a_tex0;
#if API_OPENGL || API_OPENGL_ES || API_VULKAN
v_pos.y = -v_pos.y;
#endif
}
)";
return ss.str();
}
std::string ShaderGen::GenerateImGuiFragmentShader()
{
std::stringstream ss;
WriteHeader(ss);
DeclareTexture(ss, "samp0", 0);
DeclareFragmentEntryPoint(ss, 1, 1, {}, false, 1);
ss << R"(
{
o_col0 = v_col0 * SAMPLE_TEXTURE(samp0, v_tex0);
}
)";

View File

@ -19,7 +19,11 @@ public:
std::string GenerateUVQuadVertexShader();
std::string GenerateFillFragmentShader();
std::string GenerateCopyFragmentShader();
std::string GenerateSampleFragmentShader();
std::string GenerateDisplayVertexShader();
std::string GenerateDisplayFragmentShader(bool set_alpha_to_one = false);
std::string GenerateImGuiVertexShader();
std::string GenerateImGuiFragmentShader();
protected:
ALWAYS_INLINE bool IsVulkan() const { return (m_render_api == RenderAPI::Vulkan); }