GPUDevice: Add GPUDownloadTexture

Which can also be based in host/client memory.
Use it for screenshots and VRAM downloads.
This commit is contained in:
Stenzek 2024-02-28 16:13:50 +10:00
parent 4309d8ebf3
commit f936a36c85
No known key found for this signature in database
27 changed files with 1501 additions and 527 deletions

View File

@ -5,6 +5,7 @@
#pragma once
#include "align.h"
#include "types.h"
#include <type_traits>
@ -27,6 +28,16 @@
#include <malloc.h> // alloca
#endif
/// Only currently using 128-bit vectors at max.
static constexpr u32 VECTOR_ALIGNMENT = 16;
/// Aligns allocation/pitch size to preferred host size.
template<typename T>
ALWAYS_INLINE static T VectorAlign(T value)
{
return Common::AlignUpPow2(value, VECTOR_ALIGNMENT);
}
template<typename T>
ALWAYS_INLINE_RELEASE static void MemsetPtrs(T* ptr, T value, u32 count)
{

View File

@ -1907,7 +1907,7 @@ Common::Rectangle<s32> GPU::CalculateDrawRect(s32 window_width, s32 window_heigh
static bool CompressAndWriteTextureToFile(u32 width, u32 height, std::string filename, FileSystem::ManagedCFilePtr fp,
bool clear_alpha, bool flip_y, u32 resize_width, u32 resize_height,
std::vector<u32> texture_data, u32 texture_data_stride,
std::vector<u8> texture_data, u32 texture_data_stride,
GPUTexture::Format texture_format)
{
@ -1923,8 +1923,18 @@ static bool CompressAndWriteTextureToFile(u32 width, u32 height, std::string fil
if (clear_alpha)
{
for (u32& pixel : texture_data)
pixel |= 0xFF000000;
for (u32 y = 0; y < height; y++)
{
u8* pixels = &texture_data[y * texture_data_stride];
for (u32 x = 0; x < width; x++)
{
u32 pixel;
std::memcpy(&pixel, pixels, sizeof(pixel));
pixel |= 0xFF000000u;
std::memcpy(pixels, &pixel, sizeof(pixel));
pixels += sizeof(pixel);
}
}
}
if (flip_y)
@ -1932,11 +1942,10 @@ static bool CompressAndWriteTextureToFile(u32 width, u32 height, std::string fil
if (resize_width > 0 && resize_height > 0 && (resize_width != width || resize_height != height))
{
std::vector<u32> resized_texture_data(resize_width * resize_height);
std::vector<u8> resized_texture_data(resize_width * resize_height * sizeof(u32));
u32 resized_texture_stride = sizeof(u32) * resize_width;
if (!stbir_resize_uint8(reinterpret_cast<u8*>(texture_data.data()), width, height, texture_data_stride,
reinterpret_cast<u8*>(resized_texture_data.data()), resize_width, resize_height,
resized_texture_stride, 4))
if (!stbir_resize_uint8(texture_data.data(), width, height, texture_data_stride, resized_texture_data.data(),
resize_width, resize_height, resized_texture_stride, 4))
{
Log_ErrorPrintf("Failed to resize texture data from %ux%u to %ux%u", width, height, resize_width, resize_height);
return false;
@ -2022,13 +2031,29 @@ bool GPU::WriteDisplayTextureToFile(std::string filename, bool full_resolution /
const u32 read_width = static_cast<u32>(m_display_texture_view_width);
const u32 read_height = static_cast<u32>(m_display_texture_view_height);
std::vector<u32> texture_data(read_width * read_height);
const u32 texture_data_stride =
Common::AlignUpPow2(GPUTexture::GetPixelSize(m_display_texture->GetFormat()) * read_width, 4);
if (!g_gpu_device->DownloadTexture(m_display_texture, read_x, read_y, read_width, read_height, texture_data.data(),
texture_data_stride))
std::vector<u8> texture_data(texture_data_stride * read_height);
std::unique_ptr<GPUDownloadTexture> dltex;
if (g_gpu_device->GetFeatures().memory_import)
{
dltex = g_gpu_device->CreateDownloadTexture(read_width, read_height, m_display_texture->GetFormat(),
texture_data.data(), texture_data.size(), texture_data_stride);
}
if (!dltex)
{
if (!(dltex = g_gpu_device->CreateDownloadTexture(read_width, read_height, m_display_texture->GetFormat())))
{
Log_ErrorFmt("Failed to create {}x{} {} download texture", read_width, read_height,
GPUTexture::GetFormatName(m_display_texture->GetFormat()));
return false;
}
}
dltex->CopyFromTexture(0, 0, m_display_texture, read_x, read_y, read_width, read_height, 0, 0, !dltex->IsImported());
if (!dltex->ReadTexels(0, 0, read_width, read_height, texture_data.data(), texture_data_stride))
{
Log_ErrorPrintf("Texture download failed");
RestoreDeviceContext();
return false;
}
@ -2060,7 +2085,7 @@ bool GPU::WriteDisplayTextureToFile(std::string filename, bool full_resolution /
}
bool GPU::RenderScreenshotToBuffer(u32 width, u32 height, const Common::Rectangle<s32>& draw_rect, bool postfx,
std::vector<u32>* out_pixels, u32* out_stride, GPUTexture::Format* out_format)
std::vector<u8>* out_pixels, u32* out_stride, GPUTexture::Format* out_format)
{
const GPUTexture::Format hdformat =
g_gpu_device->HasSurface() ? g_gpu_device->GetWindowFormat() : GPUTexture::Format::RGBA8;
@ -2076,8 +2101,25 @@ bool GPU::RenderScreenshotToBuffer(u32 width, u32 height, const Common::Rectangl
RenderDisplay(render_texture.get(), draw_rect, postfx);
const u32 stride = GPUTexture::GetPixelSize(hdformat) * width;
out_pixels->resize(width * height);
if (!g_gpu_device->DownloadTexture(render_texture.get(), 0, 0, width, height, out_pixels->data(), stride))
out_pixels->resize(height * stride);
std::unique_ptr<GPUDownloadTexture> dltex;
if (g_gpu_device->GetFeatures().memory_import)
{
dltex =
g_gpu_device->CreateDownloadTexture(width, height, hdformat, out_pixels->data(), out_pixels->size(), stride);
}
if (!dltex)
{
if (!(dltex = g_gpu_device->CreateDownloadTexture(width, height, hdformat)))
{
Log_ErrorFmt("Failed to create {}x{} download texture", width, height);
return false;
}
}
dltex->CopyFromTexture(0, 0, render_texture.get(), 0, 0, width, height, 0, 0, false);
if (!dltex->ReadTexels(0, 0, width, height, out_pixels->data(), stride))
{
RestoreDeviceContext();
return false;
@ -2142,7 +2184,7 @@ bool GPU::RenderScreenshotToFile(std::string filename, bool internal_resolution
if (width == 0 || height == 0)
return false;
std::vector<u32> pixels;
std::vector<u8> pixels;
u32 pixels_stride;
GPUTexture::Format pixels_format;
if (!RenderScreenshotToBuffer(width, height, draw_rect, !internal_resolution, &pixels, &pixels_stride,

View File

@ -206,7 +206,7 @@ public:
/// Renders the display, optionally with postprocessing to the specified image.
bool RenderScreenshotToBuffer(u32 width, u32 height, const Common::Rectangle<s32>& draw_rect, bool postfx,
std::vector<u32>* out_pixels, u32* out_stride, GPUTexture::Format* out_format);
std::vector<u8>* out_pixels, u32* out_stride, GPUTexture::Format* out_format);
/// Helper function to save screenshot to PNG.
bool RenderScreenshotToFile(std::string filename, bool internal_resolution = false, bool compress_on_thread = false);

View File

@ -663,6 +663,26 @@ bool GPU_HW::CreateBuffers()
GL_OBJECT_NAME(m_vram_read_texture, "VRAM Read Texture");
GL_OBJECT_NAME(m_vram_readback_texture, "VRAM Readback Texture");
if (g_gpu_device->GetFeatures().memory_import)
{
Log_DevPrint("Trying to import guest VRAM buffer for downloads...");
m_vram_readback_download_texture = g_gpu_device->CreateDownloadTexture(
m_vram_readback_texture->GetWidth(), m_vram_readback_texture->GetHeight(), m_vram_readback_texture->GetFormat(),
g_vram, sizeof(g_vram), VRAM_WIDTH * sizeof(u16));
if (!m_vram_readback_download_texture)
Log_ErrorPrint("Failed to create imported readback buffer");
}
if (!m_vram_readback_download_texture)
{
m_vram_readback_download_texture = g_gpu_device->CreateDownloadTexture(
m_vram_readback_texture->GetWidth(), m_vram_readback_texture->GetHeight(), m_vram_readback_texture->GetFormat());
if (!m_vram_readback_download_texture)
{
Log_ErrorPrint("Failed to create readback download texture");
return false;
}
}
if (g_gpu_device->GetFeatures().supports_texture_buffers)
{
if (!(m_vram_upload_buffer =
@ -703,6 +723,7 @@ void GPU_HW::DestroyBuffers()
ClearDisplayTexture();
m_vram_upload_buffer.reset();
m_vram_readback_download_texture.reset();
g_gpu_device->RecycleTexture(std::move(m_downsample_texture));
g_gpu_device->RecycleTexture(std::move(m_vram_read_texture));
g_gpu_device->RecycleTexture(std::move(m_vram_depth_texture));
@ -2405,8 +2426,18 @@ void GPU_HW::ReadVRAM(u32 x, u32 y, u32 width, u32 height)
}
// Get bounds with wrap-around handled.
const Common::Rectangle<u32> copy_rect = GetVRAMTransferBounds(x, y, width, height);
const u32 encoded_width = (copy_rect.GetWidth() + 1) / 2;
Common::Rectangle<u32> copy_rect = GetVRAMTransferBounds(x, y, width, height);
// Has to be aligned to an even pixel for the download, due to 32-bit packing.
if (copy_rect.left & 1)
copy_rect.left--;
if (copy_rect.right & 1)
copy_rect.right++;
DebugAssert((copy_rect.left % 2) == 0 && (copy_rect.GetWidth() % 2) == 0);
const u32 encoded_left = copy_rect.left / 2;
const u32 encoded_top = copy_rect.top;
const u32 encoded_width = copy_rect.GetWidth() / 2;
const u32 encoded_height = copy_rect.GetHeight();
// Encode the 24-bit texture as 16-bit.
@ -2421,9 +2452,22 @@ void GPU_HW::ReadVRAM(u32 x, u32 y, u32 width, u32 height)
GL_POP();
// Stage the readback and copy it into our shadow buffer.
g_gpu_device->DownloadTexture(m_vram_readback_texture.get(), 0, 0, encoded_width, encoded_height,
reinterpret_cast<u32*>(&g_vram[copy_rect.top * VRAM_WIDTH + copy_rect.left]),
VRAM_WIDTH * sizeof(u16));
if (m_vram_readback_download_texture->IsImported())
{
// Fast path, read directly.
m_vram_readback_download_texture->CopyFromTexture(encoded_left, encoded_top, m_vram_readback_texture.get(), 0, 0,
encoded_width, encoded_height, 0, 0, false);
m_vram_readback_download_texture->Flush();
}
else
{
// Copy to staging buffer, then to VRAM.
m_vram_readback_download_texture->CopyFromTexture(0, 0, m_vram_readback_texture.get(), 0, 0, encoded_width,
encoded_height, 0, 0, true);
m_vram_readback_download_texture->ReadTexels(0, 0, encoded_width, encoded_height,
&g_vram[copy_rect.top * VRAM_WIDTH + copy_rect.left],
VRAM_WIDTH * sizeof(u16));
}
RestoreDeviceContext();
}

View File

@ -216,6 +216,7 @@ private:
std::unique_ptr<GPUTexture> m_vram_depth_texture;
std::unique_ptr<GPUTexture> m_vram_read_texture;
std::unique_ptr<GPUTexture> m_vram_readback_texture;
std::unique_ptr<GPUDownloadTexture> m_vram_readback_download_texture;
std::unique_ptr<GPUTexture> m_vram_replacement_texture;
std::unique_ptr<GPUTexture> m_display_private_texture; // TODO: Move to base.

View File

@ -265,7 +265,7 @@ bool System::Internal::ProcessStartup()
InitializeDiscordPresence();
#endif
return true;
return true;
}
void System::Internal::ProcessShutdown()
@ -2430,7 +2430,7 @@ bool System::SaveStateToStream(ByteStream* state, u32 screenshot_size /* = 256 *
((display_aspect_ratio > 0.0f) ? display_aspect_ratio : 1.0f)));
Log_VerbosePrintf("Saving %ux%u screenshot for state", screenshot_width, screenshot_height);
std::vector<u32> screenshot_buffer;
std::vector<u8> screenshot_buffer;
u32 screenshot_stride;
GPUTexture::Format screenshot_format;
if (g_gpu->RenderScreenshotToBuffer(screenshot_width, screenshot_height,
@ -2454,7 +2454,7 @@ bool System::SaveStateToStream(ByteStream* state, u32 screenshot_size /* = 256 *
header.offset_to_screenshot = static_cast<u32>(state->GetPosition());
header.screenshot_width = screenshot_width;
header.screenshot_height = screenshot_height;
header.screenshot_size = static_cast<u32>(screenshot_buffer.size() * sizeof(u32));
header.screenshot_size = static_cast<u32>(screenshot_buffer.size());
if (!state->Write2(screenshot_buffer.data(), header.screenshot_size))
return false;
}

View File

@ -155,7 +155,6 @@ void D3D11Device::DestroyDevice()
{
std::unique_lock lock(s_instance_mutex);
DestroyStagingBuffer();
DestroyBuffers();
m_context.Reset();
m_device.Reset();
@ -187,6 +186,7 @@ void D3D11Device::SetFeatures(FeatureMask disabled_features)
m_features.texture_buffers_emulated_with_ssbo = false;
m_features.geometry_shaders = !(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS);
m_features.partial_msaa_resolve = false;
m_features.memory_import = false;
m_features.gpu_timing = true;
m_features.shader_cache = true;
m_features.pipeline_cache = false;

View File

@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin <stenzek@gmail.com>
// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
#pragma once
@ -53,8 +53,11 @@ public:
std::unique_ptr<GPUSampler> CreateSampler(const GPUSampler::Config& config) override;
std::unique_ptr<GPUTextureBuffer> CreateTextureBuffer(GPUTextureBuffer::Format format, u32 size_in_elements) override;
bool DownloadTexture(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height, void* out_data,
u32 out_data_stride) override;
std::unique_ptr<GPUDownloadTexture> CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format) override;
std::unique_ptr<GPUDownloadTexture> CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format,
void* memory, size_t memory_size,
u32 memory_stride) override;
bool SupportsTextureFormat(GPUTexture::Format format) const override;
void CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, GPUTexture* src,
u32 src_x, u32 src_y, u32 src_layer, u32 src_level, u32 width, u32 height) override;
@ -128,9 +131,6 @@ private:
void SetFeatures(FeatureMask disabled_features);
bool CheckStagingBufferSize(u32 width, u32 height, DXGI_FORMAT format);
void DestroyStagingBuffer();
bool CreateSwapChain();
bool CreateSwapChainRTV();
void DestroySwapChain();
@ -163,11 +163,6 @@ private:
BlendStateMap m_blend_states;
InputLayoutMap m_input_layouts;
ComPtr<ID3D11Texture2D> m_readback_staging_texture;
DXGI_FORMAT m_readback_staging_texture_format = DXGI_FORMAT_UNKNOWN;
u32 m_readback_staging_texture_width = 0;
u32 m_readback_staging_texture_height = 0;
bool m_allow_tearing_supported = false;
bool m_using_flip_model_swap_chain = true;
bool m_using_allow_tearing = false;

View File

@ -1,16 +1,11 @@
// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin <stenzek@gmail.com>
// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
#include "d3d11_texture.h"
#include "d3d11_device.h"
#include "d3d_common.h"
// #include "common/align.h"
// #include "common/assert.h"
// #include "common/file_system.h"
#include "common/log.h"
// #include "common/path.h"
// #include "common/rectangle.h"
#include "common/string_util.h"
#include "fmt/format.h"
@ -26,60 +21,6 @@ std::unique_ptr<GPUTexture> D3D11Device::CreateTexture(u32 width, u32 height, u3
return D3D11Texture::Create(m_device.Get(), width, height, layers, levels, samples, type, format, data, data_stride);
}
bool D3D11Device::DownloadTexture(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height, void* out_data,
u32 out_data_stride)
{
const D3D11Texture* tex = static_cast<const D3D11Texture*>(texture);
if (!CheckStagingBufferSize(width, height, tex->GetDXGIFormat()))
return false;
const CD3D11_BOX box(static_cast<LONG>(x), static_cast<LONG>(y), 0, static_cast<LONG>(x + width),
static_cast<LONG>(y + height), 1);
m_context->CopySubresourceRegion(m_readback_staging_texture.Get(), 0, 0, 0, 0, tex->GetD3DTexture(), 0, &box);
D3D11_MAPPED_SUBRESOURCE sr;
HRESULT hr = m_context->Map(m_readback_staging_texture.Get(), 0, D3D11_MAP_READ, 0, &sr);
if (FAILED(hr))
{
Log_ErrorPrintf("Map() failed with HRESULT %08X", hr);
return false;
}
s_stats.num_downloads++;
const u32 copy_size = tex->GetPixelSize() * width;
StringUtil::StrideMemCpy(out_data, out_data_stride, sr.pData, sr.RowPitch, copy_size, height);
m_context->Unmap(m_readback_staging_texture.Get(), 0);
return true;
}
bool D3D11Device::CheckStagingBufferSize(u32 width, u32 height, DXGI_FORMAT format)
{
if (m_readback_staging_texture_width >= width && m_readback_staging_texture_width >= height &&
m_readback_staging_texture_format == format)
return true;
DestroyStagingBuffer();
CD3D11_TEXTURE2D_DESC desc(format, width, height, 1, 1, 0, D3D11_USAGE_STAGING, D3D11_CPU_ACCESS_READ);
HRESULT hr = m_device->CreateTexture2D(&desc, nullptr, m_readback_staging_texture.ReleaseAndGetAddressOf());
if (FAILED(hr))
{
Log_ErrorPrintf("CreateTexture2D() failed with HRESULT %08X", hr);
return false;
}
return true;
}
void D3D11Device::DestroyStagingBuffer()
{
m_readback_staging_texture.Reset();
m_readback_staging_texture_width = 0;
m_readback_staging_texture_height = 0;
m_readback_staging_texture_format = DXGI_FORMAT_UNKNOWN;
}
bool D3D11Device::SupportsTextureFormat(GPUTexture::Format format) const
{
const DXGI_FORMAT dfmt = D3DCommon::GetFormatMapping(format).resource_format;
@ -447,3 +388,132 @@ std::unique_ptr<GPUTextureBuffer> D3D11Device::CreateTextureBuffer(GPUTextureBuf
return tb;
}
D3D11DownloadTexture::D3D11DownloadTexture(Microsoft::WRL::ComPtr<ID3D11Texture2D> tex, u32 width, u32 height,
GPUTexture::Format format)
: GPUDownloadTexture(width, height, format, false), m_texture(std::move(tex))
{
}
D3D11DownloadTexture::~D3D11DownloadTexture()
{
if (IsMapped())
D3D11DownloadTexture::Unmap();
}
std::unique_ptr<D3D11DownloadTexture> D3D11DownloadTexture::Create(u32 width, u32 height, GPUTexture::Format format)
{
D3D11_TEXTURE2D_DESC desc = {};
desc.Width = width;
desc.Height = height;
desc.Format = D3DCommon::GetFormatMapping(format).srv_format;
desc.MipLevels = 1;
desc.ArraySize = 1;
desc.SampleDesc.Count = 1;
desc.SampleDesc.Quality = 0;
desc.Usage = D3D11_USAGE_STAGING;
desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
Microsoft::WRL::ComPtr<ID3D11Texture2D> tex;
HRESULT hr = D3D11Device::GetD3DDevice()->CreateTexture2D(&desc, nullptr, tex.GetAddressOf());
if (FAILED(hr))
{
Log_ErrorFmt("CreateTexture2D() failed: {:08X}", hr);
return {};
}
return std::unique_ptr<D3D11DownloadTexture>(new D3D11DownloadTexture(std::move(tex), width, height, format));
}
void D3D11DownloadTexture::CopyFromTexture(u32 dst_x, u32 dst_y, GPUTexture* src, u32 src_x, u32 src_y, u32 width,
u32 height, u32 src_layer, u32 src_level, bool use_transfer_pitch)
{
D3D11Texture* src11 = static_cast<D3D11Texture*>(src);
DebugAssert(src11->GetFormat() == m_format);
DebugAssert(src_level < src11->GetLevels());
DebugAssert((src_x + width) <= src11->GetMipWidth(src_level) && (src_y + height) <= src11->GetMipHeight(src_level));
DebugAssert((dst_x + width) <= m_width && (dst_y + height) <= m_height);
DebugAssert((dst_x == 0 && dst_y == 0) || !use_transfer_pitch);
ID3D11DeviceContext1* const ctx = D3D11Device::GetD3DContext();
src11->CommitClear(ctx);
D3D11Device::GetStatistics().num_downloads++;
if (IsMapped())
Unmap();
// depth textures need to copy the whole thing..
const u32 subresource = D3D11CalcSubresource(src_level, src_layer, src11->GetLevels());
if (GPUTexture::IsDepthFormat(src11->GetFormat()))
{
ctx->CopySubresourceRegion(m_texture.Get(), 0, 0, 0, 0, src11->GetD3DTexture(), subresource, nullptr);
}
else
{
const CD3D11_BOX sbox(src_x, src_y, 0, src_x + width, src_y + height, 1);
ctx->CopySubresourceRegion(m_texture.Get(), 0, dst_x, dst_y, 0, src11->GetD3DTexture(), subresource, &sbox);
}
m_needs_flush = true;
}
bool D3D11DownloadTexture::Map(u32 x, u32 y, u32 width, u32 height)
{
if (IsMapped())
return true;
D3D11_MAPPED_SUBRESOURCE sr;
HRESULT hr = D3D11Device::GetD3DContext()->Map(m_texture.Get(), 0, D3D11_MAP_READ, 0, &sr);
if (FAILED(hr))
{
Log_ErrorFmt("Map() failed: {:08X}", hr);
return false;
}
m_map_pointer = static_cast<u8*>(sr.pData);
m_current_pitch = sr.RowPitch;
return true;
}
void D3D11DownloadTexture::Unmap()
{
if (!IsMapped())
return;
D3D11Device::GetD3DContext()->Unmap(m_texture.Get(), 0);
m_map_pointer = nullptr;
}
void D3D11DownloadTexture::Flush()
{
if (!m_needs_flush)
return;
if (IsMapped())
Unmap();
// Handled when mapped.
}
void D3D11DownloadTexture::SetDebugName(std::string_view name)
{
if (name.empty())
return;
SetD3DDebugObjectName(m_texture.Get(), name);
}
std::unique_ptr<GPUDownloadTexture> D3D11Device::CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format)
{
return D3D11DownloadTexture::Create(width, height, format);
}
std::unique_ptr<GPUDownloadTexture> D3D11Device::CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format,
void* memory, size_t memory_size,
u32 memory_stride)
{
Log_ErrorPrint("D3D11 cannot import memory for download textures");
return {};
}

View File

@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin <stenzek@gmail.com>
// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
#pragma once
@ -119,3 +119,26 @@ private:
D3D11StreamBuffer m_buffer;
Microsoft::WRL::ComPtr<ID3D11ShaderResourceView> m_srv;
};
class D3D11DownloadTexture final : public GPUDownloadTexture
{
public:
~D3D11DownloadTexture() override;
static std::unique_ptr<D3D11DownloadTexture> Create(u32 width, u32 height, GPUTexture::Format format);
void CopyFromTexture(u32 dst_x, u32 dst_y, GPUTexture* src, u32 src_x, u32 src_y, u32 width, u32 height,
u32 src_layer, u32 src_level, bool use_transfer_pitch) override;
bool Map(u32 x, u32 y, u32 width, u32 height) override;
void Unmap() override;
void Flush() override;
void SetDebugName(std::string_view name) override;
private:
D3D11DownloadTexture(Microsoft::WRL::ComPtr<ID3D11Texture2D> tex, u32 width, u32 height, GPUTexture::Format format);
Microsoft::WRL::ComPtr<ID3D11Texture2D> m_texture;
};

View File

@ -261,7 +261,6 @@ void D3D12Device::DestroyDevice()
WaitForGPUIdle();
DestroyDeferredObjects(m_current_fence_value);
DestroyDownloadBuffer();
DestroySamplers();
DestroyTimestampQuery();
DestroyBuffers();
@ -1195,6 +1194,7 @@ void D3D12Device::SetFeatures(FeatureMask disabled_features)
m_features.texture_buffers_emulated_with_ssbo = false;
m_features.geometry_shaders = !(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS);
m_features.partial_msaa_resolve = true;
m_features.memory_import = false;
m_features.gpu_timing = true;
m_features.shader_cache = true;
m_features.pipeline_cache = true;

View File

@ -30,6 +30,7 @@ class D3D12Pipeline;
class D3D12SwapChain;
class D3D12Texture;
class D3D12TextureBuffer;
class D3D12DownloadTexture;
namespace D3D12MA {
class Allocator;
@ -39,6 +40,7 @@ class D3D12Device final : public GPUDevice
{
public:
friend D3D12Texture;
friend D3D12DownloadTexture;
template<typename T>
using ComPtr = Microsoft::WRL::ComPtr<T>;
@ -74,8 +76,11 @@ public:
std::unique_ptr<GPUSampler> CreateSampler(const GPUSampler::Config& config) override;
std::unique_ptr<GPUTextureBuffer> CreateTextureBuffer(GPUTextureBuffer::Format format, u32 size_in_elements) override;
bool DownloadTexture(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height, void* out_data,
u32 out_data_stride) override;
std::unique_ptr<GPUDownloadTexture> CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format) override;
std::unique_ptr<GPUDownloadTexture> CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format,
void* memory, size_t memory_size,
u32 memory_stride) override;
bool SupportsTextureFormat(GPUTexture::Format format) const override;
void CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, GPUTexture* src,
u32 src_x, u32 src_y, u32 src_layer, u32 src_level, u32 width, u32 height) override;
@ -244,9 +249,6 @@ private:
bool IsRenderTargetBound(const GPUTexture* tex) const;
bool CheckDownloadBufferSize(u32 required_size);
void DestroyDownloadBuffer();
/// Set dirty flags on everything to force re-bind at next draw time.
void InvalidateCachedState();
void SetVertexBuffer(ID3D12GraphicsCommandList4* cmdlist);
@ -321,10 +323,6 @@ private:
SamplerMap m_sampler_map;
ComPtr<ID3D12PipelineLibrary> m_pipeline_library;
ComPtr<D3D12MA::Allocation> m_download_buffer_allocation;
ComPtr<ID3D12Resource> m_download_buffer;
u32 m_download_buffer_size = 0;
// Which bindings/state has to be updated before the next draw.
u32 m_dirty_flags = ALL_DIRTY_STATE;

View File

@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin <stenzek@gmail.com>
// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
#include "d3d12_texture.h"
@ -664,112 +664,6 @@ void D3D12Texture::MakeReadyForSampling()
TransitionToState(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
}
bool D3D12Device::DownloadTexture(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height, void* out_data,
u32 out_data_stride)
{
D3D12Texture* T = static_cast<D3D12Texture*>(texture);
T->CommitClear();
const u32 pitch = Common::AlignUp(width * T->GetPixelSize(), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
const u32 size = pitch * height;
const u32 subresource = 0;
if (!CheckDownloadBufferSize(size))
{
Log_ErrorPrintf("Can't read back %ux%u", width, height);
return false;
}
if (InRenderPass())
EndRenderPass();
ID3D12GraphicsCommandList4* cmdlist = GetCommandList();
D3D12_TEXTURE_COPY_LOCATION srcloc;
srcloc.pResource = T->GetResource();
srcloc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
srcloc.SubresourceIndex = subresource;
D3D12_TEXTURE_COPY_LOCATION dstloc;
dstloc.pResource = m_download_buffer.Get();
dstloc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
dstloc.PlacedFootprint.Offset = 0;
dstloc.PlacedFootprint.Footprint.Format = T->GetDXGIFormat();
dstloc.PlacedFootprint.Footprint.Width = width;
dstloc.PlacedFootprint.Footprint.Height = height;
dstloc.PlacedFootprint.Footprint.Depth = 1;
dstloc.PlacedFootprint.Footprint.RowPitch = pitch;
const D3D12_RESOURCE_STATES old_layout = T->GetResourceState();
if (old_layout != D3D12_RESOURCE_STATE_COPY_SOURCE)
T->TransitionSubresourceToState(cmdlist, subresource, old_layout, D3D12_RESOURCE_STATE_COPY_SOURCE);
// TODO: Rules for depth buffers here?
const D3D12_BOX srcbox{static_cast<UINT>(x), static_cast<UINT>(y), 0u,
static_cast<UINT>(x + width), static_cast<UINT>(y + height), 1u};
cmdlist->CopyTextureRegion(&dstloc, 0, 0, 0, &srcloc, &srcbox);
if (old_layout != D3D12_RESOURCE_STATE_COPY_SOURCE)
T->TransitionSubresourceToState(cmdlist, subresource, D3D12_RESOURCE_STATE_COPY_SOURCE, old_layout);
SubmitCommandList(true);
u8* map_pointer;
const D3D12_RANGE read_range{0u, size};
const HRESULT hr = m_download_buffer->Map(0, &read_range, reinterpret_cast<void**>(const_cast<u8**>(&map_pointer)));
if (FAILED(hr))
{
Log_ErrorPrintf("Map() failed with HRESULT %08X", hr);
return false;
}
StringUtil::StrideMemCpy(out_data, out_data_stride, map_pointer, pitch, width * T->GetPixelSize(), height);
m_download_buffer->Unmap(0, nullptr);
return true;
}
bool D3D12Device::CheckDownloadBufferSize(u32 required_size)
{
if (m_download_buffer_size >= required_size)
return true;
DestroyDownloadBuffer();
D3D12MA::ALLOCATION_DESC allocation_desc = {};
allocation_desc.HeapType = D3D12_HEAP_TYPE_READBACK;
const D3D12_RESOURCE_DESC resource_desc = {D3D12_RESOURCE_DIMENSION_BUFFER,
0,
required_size,
1,
1,
1,
DXGI_FORMAT_UNKNOWN,
{1, 0},
D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
D3D12_RESOURCE_FLAG_NONE};
HRESULT hr = m_allocator->CreateResource(&allocation_desc, &resource_desc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr,
m_download_buffer_allocation.ReleaseAndGetAddressOf(),
IID_PPV_ARGS(m_download_buffer.ReleaseAndGetAddressOf()));
if (FAILED(hr))
{
Log_ErrorPrintf("CreateResource() failed with HRESULT %08X", hr);
return false;
}
return true;
}
void D3D12Device::DestroyDownloadBuffer()
{
if (!m_download_buffer)
return;
m_download_buffer.Reset();
m_download_buffer_allocation.Reset();
m_download_buffer_size = 0;
}
D3D12Sampler::D3D12Sampler(D3D12DescriptorHandle descriptor) : m_descriptor(descriptor)
{
}
@ -934,3 +828,184 @@ std::unique_ptr<GPUTextureBuffer> D3D12Device::CreateTextureBuffer(GPUTextureBuf
return tb;
}
D3D12DownloadTexture::D3D12DownloadTexture(u32 width, u32 height, GPUTexture::Format format,
ComPtr<D3D12MA::Allocation> allocation, ComPtr<ID3D12Resource> buffer,
size_t buffer_size)
: GPUDownloadTexture(width, height, format, false), m_allocation(std::move(allocation)), m_buffer(std::move(buffer)),
m_buffer_size(buffer_size)
{
}
D3D12DownloadTexture::~D3D12DownloadTexture()
{
if (IsMapped())
D3D12DownloadTexture::Unmap();
if (m_buffer)
D3D12Device::GetInstance().DeferResourceDestruction(m_allocation.Get(), m_buffer.Get());
}
std::unique_ptr<D3D12DownloadTexture> D3D12DownloadTexture::Create(u32 width, u32 height, GPUTexture::Format format)
{
const u32 buffer_size = GetBufferSize(width, height, format, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
D3D12MA::ALLOCATION_DESC allocation_desc = {};
allocation_desc.HeapType = D3D12_HEAP_TYPE_READBACK;
const D3D12_RESOURCE_DESC resource_desc = {D3D12_RESOURCE_DIMENSION_BUFFER,
0,
buffer_size,
1,
1,
1,
DXGI_FORMAT_UNKNOWN,
{1, 0},
D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
D3D12_RESOURCE_FLAG_NONE};
ComPtr<D3D12MA::Allocation> allocation;
ComPtr<ID3D12Resource> buffer;
HRESULT hr = D3D12Device::GetInstance().GetAllocator()->CreateResource(
&allocation_desc, &resource_desc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, allocation.GetAddressOf(),
IID_PPV_ARGS(buffer.GetAddressOf()));
if (FAILED(hr))
{
Log_ErrorFmt("CreateResource() failed with HRESULT {:08X}", hr);
return {};
}
return std::unique_ptr<D3D12DownloadTexture>(
new D3D12DownloadTexture(width, height, format, std::move(allocation), std::move(buffer), buffer_size));
}
void D3D12DownloadTexture::CopyFromTexture(u32 dst_x, u32 dst_y, GPUTexture* src, u32 src_x, u32 src_y, u32 width,
u32 height, u32 src_layer, u32 src_level, bool use_transfer_pitch)
{
D3D12Texture* const src12 = static_cast<D3D12Texture*>(src);
D3D12Device& dev = D3D12Device::GetInstance();
DebugAssert(src12->GetFormat() == m_format);
DebugAssert(src_level < src12->GetLevels());
DebugAssert((src_x + width) <= src12->GetMipWidth(src_level) && (src_y + height) <= src12->GetMipHeight(src_level));
DebugAssert((dst_x + width) <= m_width && (dst_y + height) <= m_height);
DebugAssert((dst_x == 0 && dst_y == 0) || !use_transfer_pitch);
u32 copy_offset, copy_size, copy_rows;
m_current_pitch = GetTransferPitch(use_transfer_pitch ? width : m_width, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
GetTransferSize(dst_x, dst_y, width, height, m_current_pitch, &copy_offset, &copy_size, &copy_rows);
dev.GetStatistics().num_downloads++;
if (dev.InRenderPass())
dev.EndRenderPass();
src12->CommitClear();
if (IsMapped())
Unmap();
ID3D12GraphicsCommandList* cmdlist = dev.GetCommandList();
GL_INS_FMT("ReadbackTexture: {{{},{}}} {}x{} => {{{},{}}}", src_x, src_y, width, height, dst_x, dst_y);
D3D12_TEXTURE_COPY_LOCATION srcloc;
srcloc.pResource = src12->GetResource();
srcloc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
srcloc.SubresourceIndex = src12->CalculateSubresource(src_layer, src_level);
D3D12_TEXTURE_COPY_LOCATION dstloc;
dstloc.pResource = m_buffer.Get();
dstloc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
dstloc.PlacedFootprint.Offset = copy_offset;
dstloc.PlacedFootprint.Footprint.Format = src12->GetDXGIFormat();
dstloc.PlacedFootprint.Footprint.Width = width;
dstloc.PlacedFootprint.Footprint.Height = height;
dstloc.PlacedFootprint.Footprint.Depth = 1;
dstloc.PlacedFootprint.Footprint.RowPitch = m_current_pitch;
const D3D12_RESOURCE_STATES old_layout = src12->GetResourceState();
if (old_layout != D3D12_RESOURCE_STATE_COPY_SOURCE)
src12->TransitionSubresourceToState(cmdlist, src_level, old_layout, D3D12_RESOURCE_STATE_COPY_SOURCE);
// TODO: Rules for depth buffers here?
const D3D12_BOX srcbox{static_cast<UINT>(src_x), static_cast<UINT>(src_y), 0u,
static_cast<UINT>(src_x + width), static_cast<UINT>(src_y + height), 1u};
cmdlist->CopyTextureRegion(&dstloc, 0, 0, 0, &srcloc, &srcbox);
if (old_layout != D3D12_RESOURCE_STATE_COPY_SOURCE)
src12->TransitionSubresourceToState(cmdlist, src_level, D3D12_RESOURCE_STATE_COPY_SOURCE, old_layout);
m_copy_fence_value = dev.GetCurrentFenceValue();
m_needs_flush = true;
}
bool D3D12DownloadTexture::Map(u32 x, u32 y, u32 width, u32 height)
{
if (IsMapped())
return true;
// Never populated?
if (!m_current_pitch)
return false;
u32 copy_offset, copy_size, copy_rows;
GetTransferSize(x, y, width, height, m_current_pitch, &copy_offset, &copy_size, &copy_rows);
const D3D12_RANGE read_range{copy_offset, copy_offset + m_current_pitch * copy_rows};
const HRESULT hr = m_buffer->Map(0, &read_range, reinterpret_cast<void**>(const_cast<u8**>(&m_map_pointer)));
if (FAILED(hr))
{
Log_ErrorFmt("Map() failed with HRESULT {:08X}", hr);
return false;
}
return true;
}
void D3D12DownloadTexture::Unmap()
{
if (!IsMapped())
return;
const D3D12_RANGE write_range = {};
m_buffer->Unmap(0, &write_range);
m_map_pointer = nullptr;
}
void D3D12DownloadTexture::Flush()
{
if (!m_needs_flush)
return;
m_needs_flush = false;
D3D12Device& dev = D3D12Device::GetInstance();
if (dev.GetCompletedFenceValue() >= m_copy_fence_value)
return;
// Need to execute command buffer.
if (dev.GetCurrentFenceValue() == m_copy_fence_value)
dev.SubmitCommandList(true);
else
dev.WaitForFence(m_copy_fence_value);
}
void D3D12DownloadTexture::SetDebugName(std::string_view name)
{
if (name.empty())
return;
D3D12::SetObjectName(m_buffer.Get(), name);
}
std::unique_ptr<GPUDownloadTexture> D3D12Device::CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format)
{
return D3D12DownloadTexture::Create(width, height, format);
}
std::unique_ptr<GPUDownloadTexture> D3D12Device::CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format,
void* memory, size_t memory_size,
u32 memory_stride)
{
Log_ErrorPrint("D3D12 cannot import memory for download textures");
return {};
}

View File

@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin <stenzek@gmail.com>
// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
#pragma once
@ -146,3 +146,34 @@ private:
D3D12StreamBuffer m_buffer;
D3D12DescriptorHandle m_descriptor;
};
class D3D12DownloadTexture final : public GPUDownloadTexture
{
public:
template<typename T>
using ComPtr = Microsoft::WRL::ComPtr<T>;
~D3D12DownloadTexture() override;
static std::unique_ptr<D3D12DownloadTexture> Create(u32 width, u32 height, GPUTexture::Format format);
void CopyFromTexture(u32 dst_x, u32 dst_y, GPUTexture* src, u32 src_x, u32 src_y, u32 width, u32 height,
u32 src_layer, u32 src_level, bool use_transfer_pitch) override;
bool Map(u32 x, u32 y, u32 width, u32 height) override;
void Unmap() override;
void Flush() override;
void SetDebugName(std::string_view name) override;
private:
D3D12DownloadTexture(u32 width, u32 height, GPUTexture::Format format, ComPtr<D3D12MA::Allocation> allocation,
ComPtr<ID3D12Resource> buffer, size_t buffer_size);
ComPtr<D3D12MA::Allocation> m_allocation;
ComPtr<ID3D12Resource> m_buffer;
u64 m_copy_fence_value = 0;
size_t m_buffer_size = 0;
};

View File

@ -439,6 +439,7 @@ public:
FEATURE_MASK_TEXTURE_BUFFERS = (1 << 2),
FEATURE_MASK_GEOMETRY_SHADERS = (1 << 3),
FEATURE_MASK_TEXTURE_COPY_TO_SELF = (1 << 4),
FEATURE_MASK_MEMORY_IMPORT = (1 << 5),
};
struct Features
@ -452,6 +453,7 @@ public:
bool texture_buffers_emulated_with_ssbo : 1;
bool geometry_shaders : 1;
bool partial_msaa_resolve : 1;
bool memory_import : 1;
bool gpu_timing : 1;
bool shader_cache : 1;
bool pipeline_cache : 1;
@ -583,8 +585,12 @@ public:
void RecycleTexture(std::unique_ptr<GPUTexture> texture);
void PurgeTexturePool();
virtual bool DownloadTexture(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height, void* out_data,
u32 out_data_stride) = 0;
virtual std::unique_ptr<GPUDownloadTexture> CreateDownloadTexture(u32 width, u32 height,
GPUTexture::Format format) = 0;
virtual std::unique_ptr<GPUDownloadTexture> CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format,
void* memory, size_t memory_size,
u32 memory_stride) = 0;
virtual void CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, GPUTexture* src,
u32 src_x, u32 src_y, u32 src_layer, u32 src_level, u32 width, u32 height) = 0;
virtual void ResolveTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level,

View File

@ -1,9 +1,10 @@
// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin <stenzek@gmail.com>
// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
#include "gpu_texture.h"
#include "gpu_device.h"
#include "common/align.h"
#include "common/bitutils.h"
#include "common/log.h"
#include "common/string_util.h"
@ -52,6 +53,68 @@ const char* GPUTexture::GetFormatName(Format format)
return format_names[static_cast<u8>(format)];
}
u32 GPUTexture::GetCompressedBytesPerBlock() const
{
return GetCompressedBytesPerBlock(m_format);
}
u32 GPUTexture::GetCompressedBytesPerBlock(Format format)
{
// TODO: Implement me
return GetPixelSize(format);
}
u32 GPUTexture::GetCompressedBlockSize() const
{
return GetCompressedBlockSize(m_format);
}
u32 GPUTexture::GetCompressedBlockSize(Format format)
{
// TODO: Implement me
/*if (format >= Format::BC1 && format <= Format::BC7)
return 4;
else*/
return 1;
}
u32 GPUTexture::CalcUploadPitch(Format format, u32 width)
{
/*
if (format >= Format::BC1 && format <= Format::BC7)
width = Common::AlignUpPow2(width, 4) / 4;
*/
return width * GetCompressedBytesPerBlock(format);
}
u32 GPUTexture::CalcUploadPitch(u32 width) const
{
return CalcUploadPitch(m_format, width);
}
u32 GPUTexture::CalcUploadRowLengthFromPitch(u32 pitch) const
{
return CalcUploadRowLengthFromPitch(m_format, pitch);
}
u32 GPUTexture::CalcUploadRowLengthFromPitch(Format format, u32 pitch)
{
const u32 block_size = GetCompressedBlockSize(format);
const u32 bytes_per_block = GetCompressedBytesPerBlock(format);
return ((pitch + (bytes_per_block - 1)) / bytes_per_block) * block_size;
}
u32 GPUTexture::CalcUploadSize(u32 height, u32 pitch) const
{
return CalcUploadSize(m_format, height, pitch);
}
u32 GPUTexture::CalcUploadSize(Format format, u32 height, u32 pitch)
{
const u32 block_size = GetCompressedBlockSize(format);
return pitch * ((static_cast<u32>(height) + (block_size - 1)) / block_size);
}
std::array<float, 4> GPUTexture::GetUNormClearColor() const
{
return GPUDevice::RGBA8ToFloat(m_clear_value.color);
@ -117,6 +180,12 @@ bool GPUTexture::IsDepthStencilFormat(Format format)
return false;
}
bool GPUTexture::IsCompressedFormat(Format format)
{
// TODO: Implement me
return false;
}
bool GPUTexture::ValidateConfig(u32 width, u32 height, u32 layers, u32 levels, u32 samples, Type type, Format format)
{
if (width > MAX_WIDTH || height > MAX_HEIGHT || layers > MAX_LAYERS || levels > MAX_LEVELS || samples > MAX_SAMPLES)
@ -161,7 +230,7 @@ bool GPUTexture::ValidateConfig(u32 width, u32 height, u32 layers, u32 levels, u
return true;
}
bool GPUTexture::ConvertTextureDataToRGBA8(u32 width, u32 height, std::vector<u32>& texture_data,
bool GPUTexture::ConvertTextureDataToRGBA8(u32 width, u32 height, std::vector<u8>& texture_data,
u32& texture_data_stride, GPUTexture::Format format)
{
switch (format)
@ -170,9 +239,15 @@ bool GPUTexture::ConvertTextureDataToRGBA8(u32 width, u32 height, std::vector<u3
{
for (u32 y = 0; y < height; y++)
{
u32* pixels = reinterpret_cast<u32*>(reinterpret_cast<u8*>(texture_data.data()) + (y * texture_data_stride));
u8* pixels = texture_data.data() + (y * texture_data_stride);
for (u32 x = 0; x < width; x++)
pixels[x] = (pixels[x] & 0xFF00FF00) | ((pixels[x] & 0xFF) << 16) | ((pixels[x] >> 16) & 0xFF);
{
u32 pixel;
std::memcpy(&pixel, pixels, sizeof(pixel));
pixel = (pixel & 0xFF00FF00) | ((pixel & 0xFF) << 16) | ((pixel >> 16) & 0xFF);
std::memcpy(pixels, &pixel, sizeof(pixel));
pixels += sizeof(pixel);
}
}
return true;
@ -183,12 +258,12 @@ bool GPUTexture::ConvertTextureDataToRGBA8(u32 width, u32 height, std::vector<u3
case Format::RGB565:
{
std::vector<u32> temp(width * height);
std::vector<u8> temp(width * height * sizeof(u32));
for (u32 y = 0; y < height; y++)
{
const u8* pixels_in = reinterpret_cast<u8*>(texture_data.data()) + (y * texture_data_stride);
u32* pixels_out = &temp[y * width];
const u8* pixels_in = texture_data.data() + (y * texture_data_stride);
u8* pixels_out = &temp[y * width * sizeof(u32)];
for (u32 x = 0; x < width; x++)
{
@ -199,8 +274,10 @@ bool GPUTexture::ConvertTextureDataToRGBA8(u32 width, u32 height, std::vector<u3
const u8 r5 = Truncate8(pixel_in >> 11);
const u8 g6 = Truncate8((pixel_in >> 5) & 0x3F);
const u8 b5 = Truncate8(pixel_in & 0x1F);
*(pixels_out++) = ZeroExtend32((r5 << 3) | (r5 & 7)) | (ZeroExtend32((g6 << 2) | (g6 & 3)) << 8) |
const u32 rgba8 = ZeroExtend32((r5 << 3) | (r5 & 7)) | (ZeroExtend32((g6 << 2) | (g6 & 3)) << 8) |
(ZeroExtend32((b5 << 3) | (b5 & 7)) << 16) | (0xFF000000u);
std::memcpy(pixels_out, &rgba8, sizeof(u32));
pixels_out += sizeof(u32);
}
}
@ -211,12 +288,12 @@ bool GPUTexture::ConvertTextureDataToRGBA8(u32 width, u32 height, std::vector<u3
case Format::RGBA5551:
{
std::vector<u32> temp(width * height);
std::vector<u8> temp(width * height * sizeof(u32));
for (u32 y = 0; y < height; y++)
{
const u8* pixels_in = reinterpret_cast<u8*>(texture_data.data()) + (y * texture_data_stride);
u32* pixels_out = &temp[y * width];
const u8* pixels_in = texture_data.data() + (y * texture_data_stride);
u8* pixels_out = &temp[y * width];
for (u32 x = 0; x < width; x++)
{
@ -228,8 +305,10 @@ bool GPUTexture::ConvertTextureDataToRGBA8(u32 width, u32 height, std::vector<u3
const u8 r5 = Truncate8((pixel_in >> 10) & 0x1F);
const u8 g6 = Truncate8((pixel_in >> 5) & 0x1F);
const u8 b5 = Truncate8(pixel_in & 0x1F);
*(pixels_out++) = ZeroExtend32((r5 << 3) | (r5 & 7)) | (ZeroExtend32((g6 << 3) | (g6 & 7)) << 8) |
const u32 rgba8 = ZeroExtend32((r5 << 3) | (r5 & 7)) | (ZeroExtend32((g6 << 3) | (g6 & 7)) << 8) |
(ZeroExtend32((b5 << 3) | (b5 & 7)) << 16) | (a1 ? 0xFF000000u : 0u);
std::memcpy(pixels_out, &rgba8, sizeof(u32));
pixels_out += sizeof(u32);
}
}
@ -244,13 +323,13 @@ bool GPUTexture::ConvertTextureDataToRGBA8(u32 width, u32 height, std::vector<u3
}
}
void GPUTexture::FlipTextureDataRGBA8(u32 width, u32 height, std::vector<u32>& texture_data, u32 texture_data_stride)
void GPUTexture::FlipTextureDataRGBA8(u32 width, u32 height, std::vector<u8>& texture_data, u32 texture_data_stride)
{
std::vector<u32> temp(width);
std::vector<u8> temp(width * sizeof(u32));
for (u32 flip_row = 0; flip_row < (height / 2); flip_row++)
{
u32* top_ptr = &texture_data[flip_row * width];
u32* bottom_ptr = &texture_data[((height - 1) - flip_row) * width];
u8* top_ptr = &texture_data[flip_row * texture_data_stride];
u8* bottom_ptr = &texture_data[((height - 1) - flip_row) * texture_data_stride];
std::memcpy(temp.data(), top_ptr, texture_data_stride);
std::memcpy(top_ptr, bottom_ptr, texture_data_stride);
std::memcpy(bottom_ptr, temp.data(), texture_data_stride);
@ -260,3 +339,56 @@ void GPUTexture::FlipTextureDataRGBA8(u32 width, u32 height, std::vector<u32>& t
void GPUTexture::MakeReadyForSampling()
{
}
GPUDownloadTexture::GPUDownloadTexture(u32 width, u32 height, GPUTexture::Format format, bool is_imported)
: m_width(width), m_height(height), m_format(format), m_is_imported(is_imported)
{
}
GPUDownloadTexture::~GPUDownloadTexture() = default;
u32 GPUDownloadTexture::GetBufferSize(u32 width, u32 height, GPUTexture::Format format, u32 pitch_align /* = 1 */)
{
DebugAssert(std::has_single_bit(pitch_align));
const u32 bytes_per_pixel = GPUTexture::GetPixelSize(format);
const u32 pitch = Common::AlignUpPow2(width * bytes_per_pixel, pitch_align);
return (pitch * height);
}
u32 GPUDownloadTexture::GetTransferPitch(u32 width, u32 pitch_align) const
{
DebugAssert(std::has_single_bit(pitch_align));
const u32 bytes_per_pixel = GPUTexture::GetPixelSize(m_format);
return Common::AlignUpPow2(width * bytes_per_pixel, pitch_align);
}
void GPUDownloadTexture::GetTransferSize(u32 x, u32 y, u32 width, u32 height, u32 pitch, u32* copy_offset,
u32* copy_size, u32* copy_rows) const
{
const u32 bytes_per_pixel = GPUTexture::GetPixelSize(m_format);
*copy_offset = (y * pitch) + (x * bytes_per_pixel);
*copy_size = width * bytes_per_pixel;
*copy_rows = height;
}
bool GPUDownloadTexture::ReadTexels(u32 x, u32 y, u32 width, u32 height, void* out_ptr, u32 out_stride)
{
if (m_needs_flush)
Flush();
// if we're imported, and this is the same buffer, bail out
if (m_map_pointer == out_ptr)
{
// but stride should match
DebugAssert(x == 0 && y == 0 && width <= m_width && height <= m_height && out_stride == m_current_pitch);
return true;
}
if (!Map(x, y, width, height))
return false;
u32 copy_offset, copy_size, copy_rows;
GetTransferSize(x, y, width, height, m_current_pitch, &copy_offset, &copy_size, &copy_rows);
StringUtil::StrideMemCpy(out_ptr, out_stride, m_map_pointer + copy_offset, m_current_pitch, copy_size, copy_rows);
return true;
}

View File

@ -80,11 +80,18 @@ public:
static u32 GetPixelSize(GPUTexture::Format format);
static bool IsDepthFormat(GPUTexture::Format format);
static bool IsDepthStencilFormat(GPUTexture::Format format);
static bool IsCompressedFormat(Format format);
static u32 GetCompressedBytesPerBlock(Format format);
static u32 GetCompressedBlockSize(Format format);
static u32 CalcUploadPitch(Format format, u32 width);
static u32 CalcUploadRowLengthFromPitch(Format format, u32 pitch);
static u32 CalcUploadSize(Format format, u32 height, u32 pitch);
static bool ValidateConfig(u32 width, u32 height, u32 layers, u32 levels, u32 samples, Type type, Format format);
static bool ConvertTextureDataToRGBA8(u32 width, u32 height, std::vector<u32>& texture_data, u32& texture_data_stride,
static bool ConvertTextureDataToRGBA8(u32 width, u32 height, std::vector<u8>& texture_data, u32& texture_data_stride,
GPUTexture::Format format);
static void FlipTextureDataRGBA8(u32 width, u32 height, std::vector<u32>& texture_data, u32 texture_data_stride);
static void FlipTextureDataRGBA8(u32 width, u32 height, std::vector<u8>& texture_data, u32 texture_data_stride);
ALWAYS_INLINE u32 GetWidth() const { return m_width; }
ALWAYS_INLINE u32 GetHeight() const { return m_height; }
@ -133,6 +140,12 @@ public:
size_t GetVRAMUsage() const;
u32 GetCompressedBytesPerBlock() const;
u32 GetCompressedBlockSize() const;
u32 CalcUploadPitch(u32 width) const;
u32 CalcUploadRowLengthFromPitch(u32 pitch) const;
u32 CalcUploadSize(u32 height, u32 pitch) const;
GPUTexture& operator=(const GPUTexture&) = delete;
virtual bool Update(u32 x, u32 y, u32 width, u32 height, const void* data, u32 pitch, u32 layer = 0,
@ -160,3 +173,71 @@ protected:
ClearValue m_clear_value = {};
};
class GPUDownloadTexture
{
public:
GPUDownloadTexture(u32 width, u32 height, GPUTexture::Format format, bool is_imported);
virtual ~GPUDownloadTexture();
/// Basically, this has dimensions only because of DX11.
ALWAYS_INLINE u32 GetWidth() const { return m_width; }
ALWAYS_INLINE u32 GetHeight() const { return m_height; }
ALWAYS_INLINE GPUTexture::Format GetFormat() const { return m_format; }
ALWAYS_INLINE bool NeedsFlush() const { return m_needs_flush; }
ALWAYS_INLINE bool IsMapped() const { return (m_map_pointer != nullptr); }
ALWAYS_INLINE bool IsImported() const { return m_is_imported; }
ALWAYS_INLINE const u8* GetMapPointer() const { return m_map_pointer; }
ALWAYS_INLINE u32 GetMapPitch() const { return m_current_pitch; }
/// Calculates the pitch of a transfer.
u32 GetTransferPitch(u32 width, u32 pitch_align) const;
/// Calculates the size of the data you should transfer.
void GetTransferSize(u32 x, u32 y, u32 width, u32 height, u32 pitch, u32* copy_offset, u32* copy_size,
u32* copy_rows) const;
/// Queues a copy from the specified texture to this buffer.
/// Does not complete immediately, you should flush before accessing the buffer.
/// use_transfer_pitch should be true if there's only a single texture being copied to this buffer before
/// it will be used. This allows the image to be packed tighter together, and buffer reuse.
virtual void CopyFromTexture(u32 dst_x, u32 dst_y, GPUTexture* src, u32 src_x, u32 src_y, u32 width, u32 height,
u32 src_layer, u32 src_level, bool use_transfer_pitch = true) = 0;
/// Maps the texture into the CPU address space, enabling it to read the contents.
/// The Map call may not perform synchronization. If the contents of the staging texture
/// has been updated by a CopyFromTexture() call, you must call Flush() first.
/// If persistent mapping is supported in the backend, this may be a no-op.
virtual bool Map(u32 x, u32 y, u32 width, u32 height) = 0;
/// Unmaps the CPU-readable copy of the texture. May be a no-op on backends which
/// support persistent-mapped buffers.
virtual void Unmap() = 0;
/// Flushes pending writes from the CPU to the GPU, and reads from the GPU to the CPU.
/// This may cause a command buffer submit depending on if one has occurred between the last
/// call to CopyFromTexture() and the Flush() call.
virtual void Flush() = 0;
/// Sets object name that will be displayed in graphics debuggers.
virtual void SetDebugName(std::string_view name) = 0;
/// Reads the specified rectangle from the staging texture to out_ptr, with the specified stride
/// (length in bytes of each row). CopyFromTexture() must be called first. The contents of any
/// texels outside of the rectangle used for CopyFromTexture is undefined.
bool ReadTexels(u32 x, u32 y, u32 width, u32 height, void* out_ptr, u32 out_stride);
/// Returns what the size of the specified texture would be, in bytes.
static u32 GetBufferSize(u32 width, u32 height, GPUTexture::Format format, u32 pitch_align = 1);
protected:
u32 m_width;
u32 m_height;
GPUTexture::Format m_format;
const u8* m_map_pointer = nullptr;
u32 m_current_pitch = 0;
bool m_is_imported = false;
bool m_needs_flush = false;
};

View File

@ -137,6 +137,34 @@ private:
u8 m_map_level = 0;
};
class MetalDownloadTexture final : public GPUDownloadTexture
{
public:
~MetalDownloadTexture() override;
static std::unique_ptr<MetalDownloadTexture> Create(u32 width, u32 height, GPUTexture::Format format, void* memory,
size_t memory_size, u32 memory_stride);
void CopyFromTexture(u32 dst_x, u32 dst_y, GPUTexture* src, u32 src_x, u32 src_y, u32 width, u32 height,
u32 src_layer, u32 src_level, bool use_transfer_pitch) override;
bool Map(u32 x, u32 y, u32 width, u32 height) override;
void Unmap() override;
void Flush() override;
void SetDebugName(std::string_view name) override;
private:
MetalDownloadTexture(u32 width, u32 height, GPUTexture::Format format, u8* import_buffer, size_t buffer_offset,
id<MTLBuffer> buffer, const u8* map_ptr, u32 map_pitch);
size_t m_buffer_offset = 0;
id<MTLBuffer> m_buffer = nil;
u64 m_copy_fence_counter = 0;
};
class MetalTextureBuffer final : public GPUTextureBuffer
{
public:
@ -160,6 +188,7 @@ private:
class MetalDevice final : public GPUDevice
{
friend MetalTexture;
friend MetalDownloadTexture;
public:
ALWAYS_INLINE static MetalDevice& GetInstance() { return *static_cast<MetalDevice*>(g_gpu_device.get()); }
@ -188,8 +217,11 @@ public:
std::unique_ptr<GPUSampler> CreateSampler(const GPUSampler::Config& config) override;
std::unique_ptr<GPUTextureBuffer> CreateTextureBuffer(GPUTextureBuffer::Format format, u32 size_in_elements) override;
bool DownloadTexture(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height, void* out_data,
u32 out_data_stride) override;
std::unique_ptr<GPUDownloadTexture> CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format) override;
std::unique_ptr<GPUDownloadTexture> CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format,
void* memory, size_t memory_size,
u32 memory_stride) override;
bool SupportsTextureFormat(GPUTexture::Format format) const override;
void CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, GPUTexture* src,
u32 src_x, u32 src_y, u32 src_layer, u32 src_level, u32 width, u32 height) override;
@ -302,8 +334,6 @@ private:
void SetViewportInRenderEncoder();
void SetScissorInRenderEncoder();
bool CheckDownloadBufferSize(u32 required_size);
bool CreateLayer();
void DestroyLayer();
void RenderBlankFrame();
@ -327,9 +357,6 @@ private:
DepthStateMap m_depth_states;
id<MTLBuffer> m_download_buffer = nil;
u32 m_download_buffer_size = 0;
MetalStreamBuffer m_vertex_buffer;
MetalStreamBuffer m_index_buffer;
MetalStreamBuffer m_uniform_buffer;

View File

@ -234,6 +234,7 @@ void MetalDevice::SetFeatures(FeatureMask disabled_features)
m_features.texture_buffers_emulated_with_ssbo = true;
m_features.geometry_shaders = false;
m_features.partial_msaa_resolve = false;
m_features.memory_import = true;
m_features.shader_cache = true;
m_features.pipeline_cache = false;
m_features.prefer_unused_textures = true;
@ -499,13 +500,6 @@ bool MetalDevice::CreateBuffers()
void MetalDevice::DestroyBuffers()
{
if (m_download_buffer != nil)
{
[m_download_buffer release];
m_download_buffer = nil;
m_download_buffer_size = 0;
}
m_texture_upload_buffer.Destroy();
m_uniform_buffer.Destroy();
m_vertex_buffer.Destroy();
@ -759,17 +753,17 @@ std::unique_ptr<GPUPipeline> MetalDevice::CreatePipeline(const GPUPipeline::Grap
static constexpr u32 MAX_COMPONENTS = 4;
static constexpr const MTLVertexFormat
format_mapping[static_cast<u8>(GPUPipeline::VertexAttribute::Type::MaxCount)][MAX_COMPONENTS] = {
{MTLVertexFormatFloat, MTLVertexFormatFloat2, MTLVertexFormatFloat3, MTLVertexFormatFloat4}, // Float
{MTLVertexFormatUChar, MTLVertexFormatUChar2, MTLVertexFormatUChar3, MTLVertexFormatUChar4}, // UInt8
{MTLVertexFormatChar, MTLVertexFormatChar2, MTLVertexFormatChar3, MTLVertexFormatChar4}, // SInt8
{MTLVertexFormatFloat, MTLVertexFormatFloat2, MTLVertexFormatFloat3, MTLVertexFormatFloat4}, // Float
{MTLVertexFormatUChar, MTLVertexFormatUChar2, MTLVertexFormatUChar3, MTLVertexFormatUChar4}, // UInt8
{MTLVertexFormatChar, MTLVertexFormatChar2, MTLVertexFormatChar3, MTLVertexFormatChar4}, // SInt8
{MTLVertexFormatUCharNormalized, MTLVertexFormatUChar2Normalized, MTLVertexFormatUChar3Normalized,
MTLVertexFormatUChar4Normalized}, // UNorm8
{MTLVertexFormatUShort, MTLVertexFormatUShort2, MTLVertexFormatUShort3, MTLVertexFormatUShort4}, // UInt16
{MTLVertexFormatShort, MTLVertexFormatShort2, MTLVertexFormatShort3, MTLVertexFormatShort4}, // SInt16
{MTLVertexFormatUShortNormalized, MTLVertexFormatUShort2Normalized, MTLVertexFormatUShort3Normalized,
MTLVertexFormatUShort4Normalized}, // UNorm16
{MTLVertexFormatUInt, MTLVertexFormatUInt2, MTLVertexFormatUInt3, MTLVertexFormatUInt4}, // UInt32
{MTLVertexFormatInt, MTLVertexFormatInt2, MTLVertexFormatInt3, MTLVertexFormatInt4}, // SInt32
MTLVertexFormatUShort4Normalized}, // UNorm16
{MTLVertexFormatUInt, MTLVertexFormatUInt2, MTLVertexFormatUInt3, MTLVertexFormatUInt4}, // UInt32
{MTLVertexFormatInt, MTLVertexFormatInt2, MTLVertexFormatInt3, MTLVertexFormatInt4}, // SInt32
};
static constexpr std::array<MTLCullMode, static_cast<u32>(GPUPipeline::CullMode::MaxCount)> cull_mapping = {{
@ -1132,6 +1126,166 @@ std::unique_ptr<GPUTexture> MetalDevice::CreateTexture(u32 width, u32 height, u3
}
}
MetalDownloadTexture::MetalDownloadTexture(u32 width, u32 height, GPUTexture::Format format, u8* import_buffer,
size_t buffer_offset, id<MTLBuffer> buffer, const u8* map_ptr, u32 map_pitch)
: GPUDownloadTexture(width, height, format, (import_buffer != nullptr)), m_buffer_offset(buffer_offset),
m_buffer(buffer)
{
m_map_pointer = map_ptr;
m_current_pitch = map_pitch;
}
MetalDownloadTexture::~MetalDownloadTexture()
{
[m_buffer release];
}
std::unique_ptr<MetalDownloadTexture> MetalDownloadTexture::Create(u32 width, u32 height, GPUTexture::Format format,
void* memory, size_t memory_size, u32 memory_stride)
{
@autoreleasepool
{
MetalDevice& dev = MetalDevice::GetInstance();
id<MTLBuffer> buffer = nil;
size_t memory_offset = 0;
const u8* map_ptr = nullptr;
u32 map_pitch = 0;
u32 buffer_size = 0;
constexpr MTLResourceOptions options = MTLResourceStorageModeShared | MTLResourceCPUCacheModeDefaultCache;
// not importing memory?
if (!memory)
{
map_pitch = Common::AlignUpPow2(GPUTexture::CalcUploadPitch(format, width), TEXTURE_UPLOAD_PITCH_ALIGNMENT);
buffer_size = height * map_pitch;
buffer = [[dev.m_device newBufferWithLength:buffer_size options:options] retain];
if (buffer == nil)
{
Log_ErrorFmt("Failed to create {} byte buffer", buffer_size);
return {};
}
map_ptr = static_cast<u8*>([buffer contents]);
}
else
{
map_pitch = memory_stride;
buffer_size = height * map_pitch;
Assert(buffer_size <= memory_size);
// Importing memory, we need to page align the buffer.
void* page_aligned_memory =
reinterpret_cast<void*>(Common::AlignDownPow2(reinterpret_cast<uintptr_t>(memory), HOST_PAGE_SIZE));
const size_t page_offset = static_cast<size_t>(static_cast<u8*>(memory) - static_cast<u8*>(page_aligned_memory));
const size_t page_aligned_size = Common::AlignUpPow2(page_offset + memory_size, HOST_PAGE_SIZE);
Log_DevFmt("Trying to import {} bytes of memory at {} for download texture", page_aligned_memory,
page_aligned_size);
buffer = [[dev.m_device newBufferWithBytesNoCopy:page_aligned_memory
length:page_aligned_size
options:options
deallocator:nil] retain];
if (buffer == nil)
{
Log_ErrorFmt("Failed to import {} byte buffer", page_aligned_size);
return {};
}
map_ptr = static_cast<u8*>(memory);
}
return std::unique_ptr<MetalDownloadTexture>(new MetalDownloadTexture(
width, height, format, static_cast<u8*>(memory), memory_offset, buffer, map_ptr, map_pitch));
}
}
void MetalDownloadTexture::CopyFromTexture(u32 dst_x, u32 dst_y, GPUTexture* src, u32 src_x, u32 src_y, u32 width,
u32 height, u32 src_layer, u32 src_level, bool use_transfer_pitch)
{
MetalTexture* const mtlTex = static_cast<MetalTexture*>(src);
MetalDevice& dev = MetalDevice::GetInstance();
DebugAssert(mtlTex->GetFormat() == m_format);
DebugAssert(src_level < mtlTex->GetLevels());
DebugAssert((src_x + width) <= mtlTex->GetMipWidth(src_level) && (src_y + height) <= mtlTex->GetMipHeight(src_level));
DebugAssert((dst_x + width) <= m_width && (dst_y + height) <= m_height);
DebugAssert((dst_x == 0 && dst_y == 0) || !use_transfer_pitch);
DebugAssert(!m_is_imported || !use_transfer_pitch);
u32 copy_offset, copy_size, copy_rows;
if (!m_is_imported)
m_current_pitch = GetTransferPitch(use_transfer_pitch ? width : m_width, TEXTURE_UPLOAD_PITCH_ALIGNMENT);
GetTransferSize(dst_x, dst_y, width, height, m_current_pitch, &copy_offset, &copy_size, &copy_rows);
dev.GetStatistics().num_downloads++;
dev.CommitClear(mtlTex);
id<MTLBlitCommandEncoder> encoder = dev.GetBlitEncoder(true);
[encoder copyFromTexture:mtlTex->GetMTLTexture()
sourceSlice:src_layer
sourceLevel:src_level
sourceOrigin:MTLOriginMake(src_x, src_y, 0)
sourceSize:MTLSizeMake(width, height, 1)
toBuffer:m_buffer
destinationOffset:m_buffer_offset + copy_offset
destinationBytesPerRow:m_current_pitch
destinationBytesPerImage:0];
m_copy_fence_counter = dev.m_current_fence_counter;
m_needs_flush = true;
}
bool MetalDownloadTexture::Map(u32 x, u32 y, u32 width, u32 height)
{
// Always mapped.
return true;
}
void MetalDownloadTexture::Unmap()
{
// Always mapped.
}
void MetalDownloadTexture::Flush()
{
if (!m_needs_flush)
return;
m_needs_flush = false;
MetalDevice& dev = MetalDevice::GetInstance();
if (dev.m_completed_fence_counter >= m_copy_fence_counter)
return;
// Need to execute command buffer.
if (dev.GetCurrentFenceCounter() == m_copy_fence_counter)
dev.SubmitCommandBuffer(true);
else
dev.WaitForFenceCounter(m_copy_fence_counter);
}
void MetalDownloadTexture::SetDebugName(std::string_view name)
{
@autoreleasepool
{
[m_buffer setLabel:StringViewToNSString(name)];
}
}
std::unique_ptr<GPUDownloadTexture> MetalDevice::CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format)
{
return MetalDownloadTexture::Create(width, height, format, nullptr, 0, 0);
}
std::unique_ptr<GPUDownloadTexture> MetalDevice::CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format,
void* memory, size_t memory_size,
u32 memory_stride)
{
return MetalDownloadTexture::Create(width, height, format, memory, memory_size, memory_stride);
}
MetalSampler::MetalSampler(id<MTLSamplerState> ss) : m_ss(ss)
{
}
@ -1218,71 +1372,6 @@ std::unique_ptr<GPUSampler> MetalDevice::CreateSampler(const GPUSampler::Config&
}
}
bool MetalDevice::DownloadTexture(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height, void* out_data,
u32 out_data_stride)
{
constexpr u32 src_layer = 0;
constexpr u32 src_level = 0;
const u32 copy_size = width * texture->GetPixelSize();
const u32 pitch = Common::AlignUpPow2(copy_size, TEXTURE_UPLOAD_PITCH_ALIGNMENT);
const u32 required_size = pitch * height;
if (!CheckDownloadBufferSize(required_size))
return false;
MetalTexture* T = static_cast<MetalTexture*>(texture);
CommitClear(T);
s_stats.num_downloads++;
@autoreleasepool
{
id<MTLBlitCommandEncoder> encoder = GetBlitEncoder(true);
[encoder copyFromTexture:T->GetMTLTexture()
sourceSlice:src_layer
sourceLevel:src_level
sourceOrigin:MTLOriginMake(x, y, 0)
sourceSize:MTLSizeMake(width, height, 1)
toBuffer:m_download_buffer
destinationOffset:0
destinationBytesPerRow:pitch
destinationBytesPerImage:0];
SubmitCommandBuffer(true);
StringUtil::StrideMemCpy(out_data, out_data_stride, [m_download_buffer contents], pitch, copy_size, height);
}
return true;
}
bool MetalDevice::CheckDownloadBufferSize(u32 required_size)
{
if (m_download_buffer_size >= required_size)
return true;
@autoreleasepool
{
// We don't need to defer releasing this one, it's not going to be used.
if (m_download_buffer != nil)
[m_download_buffer release];
constexpr MTLResourceOptions options = MTLResourceStorageModeShared | MTLResourceCPUCacheModeDefaultCache;
m_download_buffer = [[m_device newBufferWithLength:required_size options:options] retain];
if (m_download_buffer == nil)
{
Log_ErrorPrintf("Failed to create %u byte download buffer", required_size);
m_download_buffer_size = 0;
return false;
}
m_download_buffer_size = required_size;
}
return true;
}
bool MetalDevice::SupportsTextureFormat(GPUTexture::Format format) const
{
if (format == GPUTexture::Format::RGB565 || format == GPUTexture::Format::RGBA5551)

View File

@ -43,6 +43,11 @@ void OpenGLDevice::BindUpdateTextureUnit()
GetInstance().SetActiveTexture(UPDATE_TEXTURE_UNIT - GL_TEXTURE0);
}
bool OpenGLDevice::ShouldUsePBOsForDownloads()
{
return !GetInstance().m_disable_pbo && !GetInstance().m_disable_async_download;
}
RenderAPI OpenGLDevice::GetRenderAPI() const
{
return m_gl_context->IsGLES() ? RenderAPI::OpenGLES : RenderAPI::OpenGL;
@ -55,53 +60,6 @@ std::unique_ptr<GPUTexture> OpenGLDevice::CreateTexture(u32 width, u32 height, u
return OpenGLTexture::Create(width, height, layers, levels, samples, type, format, data, data_stride);
}
bool OpenGLDevice::DownloadTexture(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height, void* out_data,
u32 out_data_stride)
{
OpenGLTexture* T = static_cast<OpenGLTexture*>(texture);
GLint alignment;
if (out_data_stride & 1)
alignment = 1;
else if (out_data_stride & 2)
alignment = 2;
else
alignment = 4;
glPixelStorei(GL_PACK_ALIGNMENT, alignment);
glPixelStorei(GL_PACK_ROW_LENGTH, out_data_stride / T->GetPixelSize());
const auto [gl_internal_format, gl_format, gl_type] =
OpenGLTexture::GetPixelFormatMapping(T->GetFormat(), m_gl_context->IsGLES());
const u32 layer = 0;
const u32 level = 0;
s_stats.num_downloads++;
if (GLAD_GL_VERSION_4_5 || GLAD_GL_ARB_get_texture_sub_image)
{
glGetTextureSubImage(T->GetGLId(), level, x, y, layer, width, height, 1, gl_format, gl_type,
height * out_data_stride, out_data);
}
else
{
glBindFramebuffer(GL_READ_FRAMEBUFFER, m_read_fbo);
if (T->GetLayers() > 1)
glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, T->GetGLId(), level, layer);
else
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, T->GetGLTarget(), T->GetGLId(), level);
DebugAssert(glCheckFramebufferStatus(GL_READ_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE);
glReadPixels(x, y, width, height, gl_format, gl_type, out_data);
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
glBindFramebuffer(GL_READ_FRAMEBUFFER, 0);
}
return true;
}
bool OpenGLDevice::SupportsTextureFormat(GPUTexture::Format format) const
{
const auto [gl_internal_format, gl_format, gl_type] =
@ -362,11 +320,10 @@ bool OpenGLDevice::CreateDevice(const std::string_view& adapter, bool threaded_p
glObjectLabel = nullptr;
}
bool buggy_pbo;
if (!CheckFeatures(&buggy_pbo, disabled_features))
if (!CheckFeatures(disabled_features))
return false;
if (!CreateBuffers(buggy_pbo))
if (!CreateBuffers())
return false;
// Scissor test should always be enabled.
@ -375,7 +332,7 @@ bool OpenGLDevice::CreateDevice(const std::string_view& adapter, bool threaded_p
return true;
}
bool OpenGLDevice::CheckFeatures(bool* buggy_pbo, FeatureMask disabled_features)
bool OpenGLDevice::CheckFeatures(FeatureMask disabled_features)
{
const bool is_gles = m_gl_context->IsGLES();
@ -424,10 +381,9 @@ bool OpenGLDevice::CheckFeatures(bool* buggy_pbo, FeatureMask disabled_features)
// using the normal texture update routines and letting the driver take care of it. PBOs are also completely
// broken on mobile drivers.
const bool is_shitty_mobile_driver = (vendor_id_powervr || vendor_id_qualcomm || vendor_id_arm);
const bool is_buggy_pbo =
m_disable_pbo =
(!GLAD_GL_VERSION_4_4 && !GLAD_GL_ARB_buffer_storage && !GLAD_GL_EXT_buffer_storage) || is_shitty_mobile_driver;
*buggy_pbo = is_buggy_pbo;
if (is_buggy_pbo && !is_shitty_mobile_driver)
if (m_disable_pbo && !is_shitty_mobile_driver)
Log_WarningPrint("Not using PBOs for texture uploads because buffer_storage is unavailable.");
GLint max_texture_size = 1024;
@ -517,6 +473,7 @@ bool OpenGLDevice::CheckFeatures(bool* buggy_pbo, FeatureMask disabled_features)
m_features.gpu_timing = !(m_gl_context->IsGLES() &&
(!GLAD_GL_EXT_disjoint_timer_query || !glGetQueryObjectivEXT || !glGetQueryObjectui64vEXT));
m_features.partial_msaa_resolve = true;
m_features.memory_import = true;
m_features.shader_cache = false;
@ -539,6 +496,13 @@ bool OpenGLDevice::CheckFeatures(bool* buggy_pbo, FeatureMask disabled_features)
// Mobile drivers prefer textures to not be updated mid-frame.
m_features.prefer_unused_textures = is_gles || vendor_id_arm || vendor_id_powervr || vendor_id_qualcomm;
if (vendor_id_intel)
{
// Intel drivers corrupt image on readback when syncs are used for downloads.
Log_WarningPrint("Disabling async downloads with PBOs due to it being broken on Intel drivers.");
m_disable_async_download = true;
}
return true;
}
@ -711,7 +675,7 @@ void OpenGLDevice::DestroySurface()
Log_ErrorPrintf("Failed to switch to surfaceless");
}
bool OpenGLDevice::CreateBuffers(bool buggy_pbo)
bool OpenGLDevice::CreateBuffers()
{
if (!(m_vertex_buffer = OpenGLStreamBuffer::Create(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE)) ||
!(m_index_buffer = OpenGLStreamBuffer::Create(GL_ELEMENT_ARRAY_BUFFER, INDEX_BUFFER_SIZE)) ||
@ -727,7 +691,7 @@ bool OpenGLDevice::CreateBuffers(bool buggy_pbo)
glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, reinterpret_cast<GLint*>(&m_uniform_buffer_alignment));
if (!buggy_pbo)
if (!m_disable_pbo)
{
if (!(m_texture_stream_buffer = OpenGLStreamBuffer::Create(GL_PIXEL_UNPACK_BUFFER, TEXTURE_STREAM_BUFFER_SIZE)))
{

View File

@ -20,9 +20,13 @@
class OpenGLPipeline;
class OpenGLStreamBuffer;
class OpenGLTexture;
class OpenGLDownloadTexture;
class OpenGLDevice final : public GPUDevice
{
friend OpenGLTexture;
friend OpenGLDownloadTexture;
public:
OpenGLDevice();
~OpenGLDevice();
@ -34,6 +38,7 @@ public:
}
ALWAYS_INLINE static bool IsGLES() { return GetInstance().m_gl_context->IsGLES(); }
static void BindUpdateTextureUnit();
static bool ShouldUsePBOsForDownloads();
RenderAPI GetRenderAPI() const override;
@ -53,8 +58,11 @@ public:
std::unique_ptr<GPUSampler> CreateSampler(const GPUSampler::Config& config) override;
std::unique_ptr<GPUTextureBuffer> CreateTextureBuffer(GPUTextureBuffer::Format format, u32 size_in_elements) override;
bool DownloadTexture(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height, void* out_data,
u32 out_data_stride) override;
std::unique_ptr<GPUDownloadTexture> CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format) override;
std::unique_ptr<GPUDownloadTexture> CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format,
void* memory, size_t memory_size,
u32 memory_stride) override;
bool SupportsTextureFormat(GPUTexture::Format format) const override;
void CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, GPUTexture* src,
u32 src_x, u32 src_y, u32 src_layer, u32 src_level, u32 width, u32 height) override;
@ -137,8 +145,8 @@ private:
static constexpr u32 UNIFORM_BUFFER_SIZE = 2 * 1024 * 1024;
static constexpr u32 TEXTURE_STREAM_BUFFER_SIZE = 16 * 1024 * 1024;
bool CheckFeatures(bool* buggy_pbo, FeatureMask disabled_features);
bool CreateBuffers(bool buggy_pbo);
bool CheckFeatures(FeatureMask disabled_features);
bool CreateBuffers();
void DestroyBuffers();
void SetSwapInterval();
@ -215,4 +223,7 @@ private:
std::string m_pipeline_disk_cache_filename;
u32 m_pipeline_disk_cache_data_end = 0;
bool m_pipeline_disk_cache_changed = false;
bool m_disable_pbo = false;
bool m_disable_async_download = false;
};

View File

@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin <stenzek@gmail.com>
// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
#include "opengl_texture.h"
@ -7,6 +7,7 @@
#include "common/align.h"
#include "common/assert.h"
#include "common/intrin.h"
#include "common/log.h"
#include "common/string_util.h"
@ -696,3 +697,207 @@ std::unique_ptr<GPUTextureBuffer> OpenGLDevice::CreateTextureBuffer(GPUTextureBu
return std::unique_ptr<GPUTextureBuffer>(
new OpenGLTextureBuffer(format, size_in_elements, std::move(buffer), texture_id));
}
OpenGLDownloadTexture::OpenGLDownloadTexture(u32 width, u32 height, GPUTexture::Format format, bool imported,
GLuint buffer_id, u8* cpu_buffer, u32 buffer_size, const u8* map_ptr,
u32 map_pitch)
: GPUDownloadTexture(width, height, format, imported), m_buffer_id(buffer_id), m_buffer_size(buffer_size),
m_cpu_buffer(cpu_buffer)
{
m_map_pointer = map_ptr;
m_current_pitch = map_pitch;
}
OpenGLDownloadTexture::~OpenGLDownloadTexture()
{
if (m_buffer_id != 0)
{
if (m_sync)
glDeleteSync(m_sync);
if (m_map_pointer)
{
glBindBuffer(GL_PIXEL_PACK_BUFFER, m_buffer_id);
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
}
glDeleteBuffers(1, &m_buffer_id);
}
else if (m_cpu_buffer && !m_is_imported)
{
Common::AlignedFree(m_cpu_buffer);
}
}
std::unique_ptr<OpenGLDownloadTexture> OpenGLDownloadTexture::Create(u32 width, u32 height, GPUTexture::Format format,
void* memory, size_t memory_size, u32 memory_pitch)
{
const u32 buffer_pitch =
memory ? memory_pitch :
Common::AlignUpPow2(GPUTexture::CalcUploadPitch(format, width), TEXTURE_UPLOAD_PITCH_ALIGNMENT);
const u32 buffer_size = memory ? static_cast<u32>(memory_size) : (height * buffer_pitch);
const bool use_buffer_storage = (GLAD_GL_VERSION_4_4 || GLAD_GL_ARB_buffer_storage || GLAD_GL_EXT_buffer_storage) &&
!memory && OpenGLDevice::ShouldUsePBOsForDownloads();
if (use_buffer_storage)
{
GLuint buffer_id;
glGenBuffers(1, &buffer_id);
glBindBuffer(GL_PIXEL_PACK_BUFFER, buffer_id);
const u32 flags = GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT;
const u32 map_flags = GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT;
if (GLAD_GL_VERSION_4_4 || GLAD_GL_ARB_buffer_storage)
glBufferStorage(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, flags);
else if (GLAD_GL_EXT_buffer_storage)
glBufferStorageEXT(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, flags);
u8* buffer_map = static_cast<u8*>(glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, buffer_size, map_flags));
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
if (!buffer_map)
{
Log_ErrorPrint("Failed to map persistent download buffer");
glDeleteBuffers(1, &buffer_id);
return {};
}
return std::unique_ptr<OpenGLDownloadTexture>(new OpenGLDownloadTexture(
width, height, format, false, buffer_id, nullptr, buffer_size, buffer_map, buffer_pitch));
}
// Fallback to glReadPixels() + CPU buffer.
const bool imported = (memory != nullptr);
u8* cpu_buffer =
imported ? static_cast<u8*>(memory) : static_cast<u8*>(Common::AlignedMalloc(buffer_size, VECTOR_ALIGNMENT));
if (!cpu_buffer)
return {};
return std::unique_ptr<OpenGLDownloadTexture>(
new OpenGLDownloadTexture(width, height, format, imported, 0, cpu_buffer, buffer_size, cpu_buffer, buffer_pitch));
}
void OpenGLDownloadTexture::CopyFromTexture(u32 dst_x, u32 dst_y, GPUTexture* src, u32 src_x, u32 src_y, u32 width,
u32 height, u32 src_layer, u32 src_level, bool use_transfer_pitch)
{
OpenGLTexture* const srcgl = static_cast<OpenGLTexture*>(src);
OpenGLDevice& dev = OpenGLDevice::GetInstance();
DebugAssert(srcgl->GetFormat() == m_format);
DebugAssert(src_level < srcgl->GetLevels());
DebugAssert((src_x + width) <= srcgl->GetMipWidth(src_level) && (src_y + height) <= srcgl->GetMipHeight(src_level));
DebugAssert((dst_x + width) <= m_width && (dst_y + height) <= m_height);
DebugAssert((dst_x == 0 && dst_y == 0) || !use_transfer_pitch);
DebugAssert(!m_is_imported || !use_transfer_pitch);
dev.CommitClear(srcgl);
u32 copy_offset, copy_size, copy_rows;
if (!m_is_imported)
m_current_pitch = GetTransferPitch(use_transfer_pitch ? width : m_width, TEXTURE_UPLOAD_PITCH_ALIGNMENT);
GetTransferSize(dst_x, dst_y, width, height, m_current_pitch, &copy_offset, &copy_size, &copy_rows);
dev.GetStatistics().num_downloads++;
GLint alignment;
if (m_current_pitch & 1)
alignment = 1;
else if (m_current_pitch & 2)
alignment = 2;
else
alignment = 4;
glPixelStorei(GL_PACK_ALIGNMENT, alignment);
glPixelStorei(GL_PACK_ROW_LENGTH, GPUTexture::CalcUploadRowLengthFromPitch(m_format, m_current_pitch));
if (!m_cpu_buffer)
{
// Read to PBO.
glBindBuffer(GL_PIXEL_PACK_BUFFER, m_buffer_id);
}
const auto [gl_internal_format, gl_format, gl_type] =
OpenGLTexture::GetPixelFormatMapping(srcgl->GetFormat(), dev.IsGLES());
if (GLAD_GL_VERSION_4_5 || GLAD_GL_ARB_get_texture_sub_image)
{
glGetTextureSubImage(srcgl->GetGLId(), src_level, src_x, src_y, 0, width, height, 1, gl_format, gl_type,
m_current_pitch * height, m_cpu_buffer + copy_offset);
}
else
{
glBindFramebuffer(GL_READ_FRAMEBUFFER, dev.m_read_fbo);
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, srcgl->GetGLId(), 0);
glReadPixels(src_x, src_y, width, height, gl_format, gl_type, m_cpu_buffer + copy_offset);
glBindFramebuffer(GL_READ_FRAMEBUFFER, 0);
}
if (m_cpu_buffer)
{
// If using CPU buffers, we never need to flush.
m_needs_flush = false;
}
else
{
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
// Create a sync object so we know when the GPU is done copying.
if (m_sync)
glDeleteSync(m_sync);
m_sync = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
m_needs_flush = true;
}
glPixelStorei(GL_PACK_ROW_LENGTH, 0);
}
bool OpenGLDownloadTexture::Map(u32 x, u32 y, u32 width, u32 height)
{
// Either always mapped, or CPU buffer.
return true;
}
void OpenGLDownloadTexture::Unmap()
{
// Either always mapped, or CPU buffer.
}
void OpenGLDownloadTexture::Flush()
{
// If we're using CPU buffers, we did the readback synchronously...
if (!m_needs_flush || !m_sync)
return;
m_needs_flush = false;
glClientWaitSync(m_sync, GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED);
glDeleteSync(m_sync);
m_sync = {};
}
void OpenGLDownloadTexture::SetDebugName(std::string_view name)
{
if (name.empty())
return;
if (glObjectLabel)
glObjectLabel(GL_BUFFER, m_buffer_id, static_cast<GLsizei>(name.length()), name.data());
}
std::unique_ptr<GPUDownloadTexture> OpenGLDevice::CreateDownloadTexture(u32 width, u32 height,
GPUTexture::Format format)
{
return OpenGLDownloadTexture::Create(width, height, format, nullptr, 0, 0);
}
std::unique_ptr<GPUDownloadTexture> OpenGLDevice::CreateDownloadTexture(u32 width, u32 height,
GPUTexture::Format format, void* memory,
size_t memory_size, u32 memory_stride)
{
// not _really_ memory importing, but PBOs are broken on Intel....
return OpenGLDownloadTexture::Create(width, height, format, memory, memory_size, memory_stride);
}

View File

@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin <stenzek@gmail.com>
// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
#pragma once
@ -99,3 +99,34 @@ private:
GLuint m_id;
};
class OpenGLDownloadTexture final : public GPUDownloadTexture
{
public:
~OpenGLDownloadTexture() override;
static std::unique_ptr<OpenGLDownloadTexture> Create(u32 width, u32 height, GPUTexture::Format format, void* memory,
size_t memory_size, u32 memory_pitch);
void CopyFromTexture(u32 dst_x, u32 dst_y, GPUTexture* src, u32 src_x, u32 src_y, u32 width, u32 height,
u32 src_layer, u32 src_level, bool use_transfer_pitch) override;
bool Map(u32 x, u32 y, u32 width, u32 height) override;
void Unmap() override;
void Flush() override;
void SetDebugName(std::string_view name) override;
private:
OpenGLDownloadTexture(u32 width, u32 height, GPUTexture::Format format, bool imported, GLuint buffer_id,
u8* cpu_buffer, u32 buffer_size, const u8* map_ptr, u32 map_pitch);
GLuint m_buffer_id = 0;
u32 m_buffer_size = 0;
GLsync m_sync = {};
// used when buffer storage is not available
u8* m_cpu_buffer = nullptr;
};

View File

@ -1515,6 +1515,14 @@ void VulkanDevice::DeferBufferDestruction(VkBuffer object, VmaAllocation allocat
[this, object, allocation]() { vmaDestroyBuffer(m_allocator, object, allocation); });
}
void VulkanDevice::DeferBufferDestruction(VkBuffer object, VkDeviceMemory memory)
{
m_cleanup_objects.emplace_back(GetCurrentFenceCounter(), [this, object, memory]() {
vkDestroyBuffer(m_device, object, nullptr);
vkFreeMemory(m_device, memory, nullptr);
});
}
void VulkanDevice::DeferFramebufferDestruction(VkFramebuffer object)
{
m_cleanup_objects.emplace_back(GetCurrentFenceCounter(),
@ -2067,7 +2075,6 @@ void VulkanDevice::DestroyDevice()
for (auto& it : m_cleanup_objects)
it.second();
m_cleanup_objects.clear();
DestroyDownloadBuffer();
DestroyPersistentDescriptorSets();
DestroyBuffers();
DestroySamplers();
@ -2528,6 +2535,7 @@ bool VulkanDevice::CheckFeatures(FeatureMask disabled_features)
!(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS) && m_device_features.geometryShader;
m_features.partial_msaa_resolve = true;
m_features.memory_import = m_optional_extensions.vk_ext_external_memory_host;
m_features.shader_cache = true;
m_features.pipeline_cache = true;
m_features.prefer_unused_textures = true;
@ -2981,21 +2989,21 @@ void VulkanDevice::RenderBlankFrame()
InvalidateCachedState();
}
bool VulkanDevice::TryImportHostMemory(const void* data, u32 data_size, VkBufferUsageFlags buffer_usage,
VkDeviceMemory* out_memory, VkBuffer* out_buffer, u32* out_offset)
bool VulkanDevice::TryImportHostMemory(void* data, size_t data_size, VkBufferUsageFlags buffer_usage,
VkDeviceMemory* out_memory, VkBuffer* out_buffer, VkDeviceSize* out_offset)
{
if (!m_optional_extensions.vk_ext_external_memory_host)
return false;
// Align to the nearest page
const void* data_aligned =
reinterpret_cast<const void*>(Common::AlignDownPow2(reinterpret_cast<uintptr_t>(data), HOST_PAGE_SIZE));
void* data_aligned =
reinterpret_cast<void*>(Common::AlignDownPow2(reinterpret_cast<uintptr_t>(data), HOST_PAGE_SIZE));
// Offset to the start of the data within the page
const u32 data_offset = reinterpret_cast<uintptr_t>(data) & (HOST_PAGE_SIZE - 1);
const size_t data_offset = reinterpret_cast<uintptr_t>(data) & static_cast<uintptr_t>(HOST_PAGE_MASK);
// Full amount of data that must be imported, including the pages
const u32 data_size_aligned = Common::AlignUpPow2(data_offset + data_size, HOST_PAGE_SIZE);
const size_t data_size_aligned = Common::AlignUpPow2(data_offset + data_size, HOST_PAGE_SIZE);
VkMemoryHostPointerPropertiesEXT pointer_properties = {VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT, nullptr,
0};
@ -3003,6 +3011,7 @@ bool VulkanDevice::TryImportHostMemory(const void* data, u32 data_size, VkBuffer
data_aligned, &pointer_properties);
if (res != VK_SUCCESS || pointer_properties.memoryTypeBits == 0)
{
LOG_VULKAN_ERROR(res, "vkGetMemoryHostPointerPropertiesEXT() failed: ");
return false;
}
@ -3015,6 +3024,7 @@ bool VulkanDevice::TryImportHostMemory(const void* data, u32 data_size, VkBuffer
res = vmaFindMemoryTypeIndex(m_allocator, pointer_properties.memoryTypeBits, &vma_alloc_info, &memory_index);
if (res != VK_SUCCESS)
{
LOG_VULKAN_ERROR(res, "vmaFindMemoryTypeIndex() failed: ");
return false;
}
@ -3030,6 +3040,7 @@ bool VulkanDevice::TryImportHostMemory(const void* data, u32 data_size, VkBuffer
res = vkAllocateMemory(m_device, &alloc_info, nullptr, &imported_memory);
if (res != VK_SUCCESS)
{
LOG_VULKAN_ERROR(res, "vkAllocateMemory() failed: ");
return false;
}
@ -3049,10 +3060,10 @@ bool VulkanDevice::TryImportHostMemory(const void* data, u32 data_size, VkBuffer
res = vkCreateBuffer(m_device, &buffer_info, nullptr, &imported_buffer);
if (res != VK_SUCCESS)
{
LOG_VULKAN_ERROR(res, "vkCreateBuffer() failed: ");
if (imported_memory != VK_NULL_HANDLE)
{
vkFreeMemory(m_device, imported_memory, nullptr);
}
return false;
}
@ -3061,7 +3072,7 @@ bool VulkanDevice::TryImportHostMemory(const void* data, u32 data_size, VkBuffer
*out_memory = imported_memory;
*out_buffer = imported_buffer;
*out_offset = data_offset;
Log_DevFmt("Imported {} byte buffer covering {} bytes at {}", data_size, data_size_aligned, data);
return true;
}

View File

@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin <stenzek@gmail.com>
// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
#pragma once
@ -25,6 +25,7 @@ class VulkanPipeline;
class VulkanSwapChain;
class VulkanTexture;
class VulkanTextureBuffer;
class VulkanDownloadTexture;
struct VK_PIPELINE_CACHE_HEADER;
@ -32,6 +33,7 @@ class VulkanDevice final : public GPUDevice
{
public:
friend VulkanTexture;
friend VulkanDownloadTexture;
enum : u32
{
@ -81,8 +83,11 @@ public:
std::unique_ptr<GPUSampler> CreateSampler(const GPUSampler::Config& config) override;
std::unique_ptr<GPUTextureBuffer> CreateTextureBuffer(GPUTextureBuffer::Format format, u32 size_in_elements) override;
bool DownloadTexture(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height, void* out_data,
u32 out_data_stride) override;
std::unique_ptr<GPUDownloadTexture> CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format) override;
std::unique_ptr<GPUDownloadTexture> CreateDownloadTexture(u32 width, u32 height, GPUTexture::Format format,
void* memory, size_t memory_size,
u32 memory_stride) override;
bool SupportsTextureFormat(GPUTexture::Format format) const override;
void CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, GPUTexture* src,
u32 src_x, u32 src_y, u32 src_layer, u32 src_level, u32 width, u32 height) override;
@ -196,6 +201,7 @@ public:
// Schedule a vulkan resource for destruction later on. This will occur when the command buffer
// is next re-used, and the GPU has finished working with the specified resource.
void DeferBufferDestruction(VkBuffer object, VmaAllocation allocation);
void DeferBufferDestruction(VkBuffer object, VkDeviceMemory memory);
void DeferFramebufferDestruction(VkFramebuffer object);
void DeferImageDestruction(VkImage object, VmaAllocation allocation);
void DeferImageViewDestruction(VkImageView object);
@ -341,11 +347,8 @@ private:
void RenderBlankFrame();
bool TryImportHostMemory(const void* data, u32 data_size, VkBufferUsageFlags buffer_usage, VkDeviceMemory* out_memory,
VkBuffer* out_buffer, u32* out_offset);
bool CheckDownloadBufferSize(u32 required_size);
void DestroyDownloadBuffer();
bool TryImportHostMemory(void* data, size_t data_size, VkBufferUsageFlags buffer_usage, VkDeviceMemory* out_memory,
VkBuffer* out_buffer, VkDeviceSize* out_offset);
/// Set dirty flags on everything to force re-bind at next draw time.
void InvalidateCachedState();
@ -454,11 +457,6 @@ private:
SamplerMap m_sampler_map;
VmaAllocation m_download_buffer_allocation = VK_NULL_HANDLE;
VkBuffer m_download_buffer = VK_NULL_HANDLE;
u8* m_download_buffer_map = nullptr;
u32 m_download_buffer_size = 0;
// Which bindings/state has to be updated before the next draw.
u32 m_dirty_flags = ALL_DIRTY_STATE;

View File

@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin <stenzek@gmail.com>
// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
#include "vulkan_texture.h"
@ -736,126 +736,6 @@ std::unique_ptr<GPUTexture> VulkanDevice::CreateTexture(u32 width, u32 height, u
return tex;
}
bool VulkanDevice::DownloadTexture(GPUTexture* texture, u32 x, u32 y, u32 width, u32 height, void* out_data,
u32 out_data_stride)
{
VulkanTexture* T = static_cast<VulkanTexture*>(texture);
T->CommitClear();
const u32 pitch = Common::AlignUp(width * T->GetPixelSize(), GetBufferCopyRowPitchAlignment());
const u32 size = pitch * height;
const u32 level = 0;
if (!CheckDownloadBufferSize(size))
{
Log_ErrorPrintf("Can't read back %ux%u", width, height);
return false;
}
s_stats.num_downloads++;
if (InRenderPass())
EndRenderPass();
const VkCommandBuffer cmdbuf = GetCurrentCommandBuffer();
VulkanTexture::Layout old_layout = T->GetLayout();
if (old_layout != VulkanTexture::Layout::TransferSrc)
T->TransitionSubresourcesToLayout(cmdbuf, 0, 1, 0, 1, old_layout, VulkanTexture::Layout::TransferSrc);
VkBufferImageCopy image_copy = {};
const VkImageAspectFlags aspect = T->IsDepthStencil() ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_COLOR_BIT;
image_copy.bufferOffset = 0;
image_copy.bufferRowLength = pitch / T->GetPixelSize();
image_copy.bufferImageHeight = 0;
image_copy.imageSubresource = {aspect, level, 0u, 1u};
image_copy.imageOffset = {static_cast<s32>(x), static_cast<s32>(y), 0};
image_copy.imageExtent = {width, height, 1u};
// do the copy
vkCmdCopyImageToBuffer(cmdbuf, T->GetImage(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_download_buffer, 1,
&image_copy);
// flush gpu cache
const VkBufferMemoryBarrier buffer_info = {
VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, // VkStructureType sType
nullptr, // const void* pNext
VK_ACCESS_TRANSFER_WRITE_BIT, // VkAccessFlags srcAccessMask
VK_ACCESS_HOST_READ_BIT, // VkAccessFlags dstAccessMask
VK_QUEUE_FAMILY_IGNORED, // uint32_t srcQueueFamilyIndex
VK_QUEUE_FAMILY_IGNORED, // uint32_t dstQueueFamilyIndex
m_download_buffer, // VkBuffer buffer
0, // VkDeviceSize offset
size // VkDeviceSize size
};
vkCmdPipelineBarrier(cmdbuf, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0, 0, nullptr, 1, &buffer_info,
0, nullptr);
if (old_layout != VulkanTexture::Layout::TransferSrc)
T->TransitionSubresourcesToLayout(cmdbuf, 0, 1, 0, 1, VulkanTexture::Layout::TransferSrc, old_layout);
SubmitCommandBuffer(true);
// invalidate cpu cache before reading
VkResult res = vmaInvalidateAllocation(m_allocator, m_download_buffer_allocation, 0, size);
if (res != VK_SUCCESS)
LOG_VULKAN_ERROR(res, "vmaInvalidateAllocation() failed, readback may be incorrect: ");
StringUtil::StrideMemCpy(out_data, out_data_stride, m_download_buffer_map, pitch, width * T->GetPixelSize(), height);
return true;
}
bool VulkanDevice::CheckDownloadBufferSize(u32 required_size)
{
if (m_download_buffer_size >= required_size)
return true;
DestroyDownloadBuffer();
// Adreno has slow coherent cached reads.
const bool is_adreno = (m_device_properties.vendorID == 0x5143 ||
m_device_driver_properties.driverID == VK_DRIVER_ID_QUALCOMM_PROPRIETARY);
const VkBufferCreateInfo bci = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
nullptr,
0u,
required_size,
VK_BUFFER_USAGE_TRANSFER_DST_BIT,
VK_SHARING_MODE_EXCLUSIVE,
0u,
nullptr};
VmaAllocationCreateInfo aci = {};
aci.usage = VMA_MEMORY_USAGE_GPU_TO_CPU;
aci.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
aci.preferredFlags = is_adreno ? (VK_MEMORY_PROPERTY_HOST_CACHED_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) :
VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
VmaAllocationInfo ai = {};
VkResult res = vmaCreateBuffer(m_allocator, &bci, &aci, &m_download_buffer, &m_download_buffer_allocation, &ai);
if (res != VK_SUCCESS)
{
LOG_VULKAN_ERROR(res, "vmaCreateBuffer() failed: ");
return false;
}
m_download_buffer_map = static_cast<u8*>(ai.pMappedData);
return true;
}
void VulkanDevice::DestroyDownloadBuffer()
{
if (m_download_buffer == VK_NULL_HANDLE)
return;
vmaDestroyBuffer(m_allocator, m_download_buffer, m_download_buffer_allocation);
// unmapped as part of the buffer destroy
m_download_buffer = VK_NULL_HANDLE;
m_download_buffer_allocation = VK_NULL_HANDLE;
m_download_buffer_map = nullptr;
m_download_buffer_size = 0;
}
VulkanSampler::VulkanSampler(VkSampler sampler) : m_sampler(sampler)
{
}
@ -1081,3 +961,218 @@ std::unique_ptr<GPUTextureBuffer> VulkanDevice::CreateTextureBuffer(GPUTextureBu
return tb;
}
VulkanDownloadTexture::VulkanDownloadTexture(u32 width, u32 height, GPUTexture::Format format, VmaAllocation allocation,
VkDeviceMemory memory, VkBuffer buffer, VkDeviceSize memory_offset,
VkDeviceSize buffer_size, const u8* map_ptr, u32 map_pitch)
: GPUDownloadTexture(width, height, format, (memory != VK_NULL_HANDLE)), m_allocation(allocation), m_memory(memory),
m_buffer(buffer), m_memory_offset(memory_offset), m_buffer_size(buffer_size)
{
m_map_pointer = map_ptr;
m_current_pitch = map_pitch;
}
VulkanDownloadTexture::~VulkanDownloadTexture()
{
if (m_allocation != VK_NULL_HANDLE)
{
// Buffer was created mapped, no need to manually unmap.
VulkanDevice::GetInstance().DeferBufferDestruction(m_buffer, m_allocation);
}
else
{
// imported
DebugAssert(m_is_imported && m_memory != VK_NULL_HANDLE);
VulkanDevice::GetInstance().DeferBufferDestruction(m_buffer, m_memory);
}
}
std::unique_ptr<VulkanDownloadTexture> VulkanDownloadTexture::Create(u32 width, u32 height, GPUTexture::Format format,
void* memory, size_t memory_size,
u32 memory_stride)
{
VulkanDevice& dev = VulkanDevice::GetInstance();
VmaAllocation allocation = VK_NULL_HANDLE;
VkDeviceMemory dev_memory = VK_NULL_HANDLE;
VkBuffer buffer = VK_NULL_HANDLE;
VkDeviceSize memory_offset = 0;
const u8* map_ptr = nullptr;
u32 map_pitch = 0;
u32 buffer_size = 0;
// not importing memory?
if (!memory)
{
map_pitch = Common::AlignUpPow2(GPUTexture::CalcUploadPitch(format, width), dev.GetBufferCopyRowPitchAlignment());
buffer_size = height * map_pitch;
const VkBufferCreateInfo bci = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
nullptr,
0u,
buffer_size,
VK_BUFFER_USAGE_TRANSFER_DST_BIT,
VK_SHARING_MODE_EXCLUSIVE,
0u,
nullptr};
VmaAllocationCreateInfo aci = {};
aci.usage = VMA_MEMORY_USAGE_GPU_TO_CPU;
aci.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
aci.preferredFlags = VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
VmaAllocationInfo ai = {};
VkResult res = vmaCreateBuffer(VulkanDevice::GetInstance().GetAllocator(), &bci, &aci, &buffer, &allocation, &ai);
if (res != VK_SUCCESS)
{
LOG_VULKAN_ERROR(res, "vmaCreateBuffer() failed: ");
return {};
}
DebugAssert(ai.pMappedData);
map_ptr = static_cast<u8*>(ai.pMappedData);
}
else
{
map_pitch = memory_stride;
buffer_size = height * map_pitch;
Assert(buffer_size <= memory_size);
if (!dev.TryImportHostMemory(memory, memory_size, VK_BUFFER_USAGE_TRANSFER_DST_BIT, &dev_memory, &buffer,
&memory_offset))
{
return {};
}
map_ptr = static_cast<u8*>(memory);
}
return std::unique_ptr<VulkanDownloadTexture>(new VulkanDownloadTexture(
width, height, format, allocation, dev_memory, buffer, memory_offset, buffer_size, map_ptr, map_pitch));
}
void VulkanDownloadTexture::CopyFromTexture(u32 dst_x, u32 dst_y, GPUTexture* src, u32 src_x, u32 src_y, u32 width,
u32 height, u32 src_layer, u32 src_level, bool use_transfer_pitch)
{
VulkanTexture* const vkTex = static_cast<VulkanTexture*>(src);
VulkanDevice& dev = VulkanDevice::GetInstance();
DebugAssert(vkTex->GetFormat() == m_format);
DebugAssert(src_level < vkTex->GetLevels());
DebugAssert((src_x + width) <= src->GetMipWidth(src_level) && (src_y + height) <= src->GetMipHeight(src_level));
DebugAssert((dst_x + width) <= m_width && (dst_y + height) <= m_height);
DebugAssert((dst_x == 0 && dst_y == 0) || !use_transfer_pitch);
DebugAssert(!m_is_imported || !use_transfer_pitch);
u32 copy_offset, copy_size, copy_rows;
if (!m_is_imported)
m_current_pitch = GetTransferPitch(use_transfer_pitch ? width : m_width, dev.GetBufferCopyRowPitchAlignment());
GetTransferSize(dst_x, dst_y, width, height, m_current_pitch, &copy_offset, &copy_size, &copy_rows);
dev.GetStatistics().num_downloads++;
if (dev.InRenderPass())
dev.EndRenderPass();
vkTex->CommitClear();
const VkCommandBuffer cmdbuf = dev.GetCurrentCommandBuffer();
GL_INS_FMT("VulkanDownloadTexture::CopyFromTexture: {{{},{}}} {}x{} => {{{},{}}}", src_x, src_y, width, height, dst_x,
dst_y);
VulkanTexture::Layout old_layout = vkTex->GetLayout();
if (old_layout == VulkanTexture::Layout::Undefined)
vkTex->TransitionToLayout(cmdbuf, VulkanTexture::Layout::TransferSrc);
else if (old_layout != VulkanTexture::Layout::TransferSrc)
vkTex->TransitionSubresourcesToLayout(cmdbuf, 0, 1, src_level, 1, old_layout, VulkanTexture::Layout::TransferSrc);
VkBufferImageCopy image_copy = {};
const VkImageAspectFlags aspect = vkTex->IsDepthStencil() ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_COLOR_BIT;
image_copy.bufferOffset = m_memory_offset + copy_offset;
image_copy.bufferRowLength = GPUTexture::CalcUploadRowLengthFromPitch(m_format, m_current_pitch);
image_copy.bufferImageHeight = 0;
image_copy.imageSubresource = {aspect, src_level, src_layer, 1u};
image_copy.imageOffset = {static_cast<s32>(src_x), static_cast<s32>(src_y), 0};
image_copy.imageExtent = {width, height, 1u};
// do the copy
vkCmdCopyImageToBuffer(cmdbuf, vkTex->GetImage(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_buffer, 1, &image_copy);
// flush gpu cache
const VkBufferMemoryBarrier buffer_info = {
VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, // VkStructureType sType
nullptr, // const void* pNext
VK_ACCESS_TRANSFER_WRITE_BIT, // VkAccessFlags srcAccessMask
VK_ACCESS_HOST_READ_BIT, // VkAccessFlags dstAccessMask
VK_QUEUE_FAMILY_IGNORED, // uint32_t srcQueueFamilyIndex
VK_QUEUE_FAMILY_IGNORED, // uint32_t dstQueueFamilyIndex
m_buffer, // VkBuffer buffer
0, // VkDeviceSize offset
copy_size // VkDeviceSize size
};
vkCmdPipelineBarrier(cmdbuf, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0, 0, nullptr, 1, &buffer_info,
0, nullptr);
if (old_layout != VulkanTexture::Layout::TransferSrc && old_layout != VulkanTexture::Layout::Undefined)
vkTex->TransitionSubresourcesToLayout(cmdbuf, 0, 1, src_level, 1, VulkanTexture::Layout::TransferSrc, old_layout);
m_copy_fence_counter = dev.GetCurrentFenceCounter();
m_needs_cache_invalidate = true;
m_needs_flush = true;
}
bool VulkanDownloadTexture::Map(u32 x, u32 y, u32 width, u32 height)
{
// Always mapped, but we might need to invalidate the cache.
if (m_needs_cache_invalidate)
{
u32 copy_offset, copy_size, copy_rows;
GetTransferSize(x, y, width, height, m_current_pitch, &copy_offset, &copy_size, &copy_rows);
vmaInvalidateAllocation(VulkanDevice::GetInstance().GetAllocator(), m_allocation, copy_offset,
m_current_pitch * copy_rows);
m_needs_cache_invalidate = false;
}
return true;
}
void VulkanDownloadTexture::Unmap()
{
// Always mapped.
}
void VulkanDownloadTexture::Flush()
{
if (!m_needs_flush)
return;
m_needs_flush = false;
VulkanDevice& dev = VulkanDevice::GetInstance();
if (dev.GetCompletedFenceCounter() >= m_copy_fence_counter)
return;
// Need to execute command buffer.
if (dev.GetCurrentFenceCounter() == m_copy_fence_counter)
dev.SubmitCommandBuffer(true);
else
dev.WaitForFenceCounter(m_copy_fence_counter);
}
void VulkanDownloadTexture::SetDebugName(std::string_view name)
{
if (name.empty())
return;
Vulkan::SetObjectName(VulkanDevice::GetInstance().GetVulkanDevice(), m_buffer, name);
}
std::unique_ptr<GPUDownloadTexture> VulkanDevice::CreateDownloadTexture(u32 width, u32 height,
GPUTexture::Format format)
{
return VulkanDownloadTexture::Create(width, height, format, nullptr, 0, 0);
}
std::unique_ptr<GPUDownloadTexture> VulkanDevice::CreateDownloadTexture(u32 width, u32 height,
GPUTexture::Format format, void* memory,
size_t memory_size, u32 memory_stride)
{
return VulkanDownloadTexture::Create(width, height, format, memory, memory_size, memory_stride);
}

View File

@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin <stenzek@gmail.com>
// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
#pragma once
@ -150,3 +150,37 @@ private:
VkBufferView m_buffer_view = VK_NULL_HANDLE;
VkDescriptorSet m_descriptor_set = VK_NULL_HANDLE;
};
class VulkanDownloadTexture final : public GPUDownloadTexture
{
public:
~VulkanDownloadTexture() override;
static std::unique_ptr<VulkanDownloadTexture> Create(u32 width, u32 height, GPUTexture::Format format, void* memory,
size_t memory_size, u32 memory_stride);
void CopyFromTexture(u32 dst_x, u32 dst_y, GPUTexture* src, u32 src_x, u32 src_y, u32 width, u32 height,
u32 src_layer, u32 src_level, bool use_transfer_pitch) override;
bool Map(u32 x, u32 y, u32 width, u32 height) override;
void Unmap() override;
void Flush() override;
void SetDebugName(std::string_view name) override;
private:
VulkanDownloadTexture(u32 width, u32 height, GPUTexture::Format format, VmaAllocation allocation,
VkDeviceMemory memory, VkBuffer buffer, VkDeviceSize memory_offset, VkDeviceSize buffer_size,
const u8* map_ptr, u32 map_pitch);
VmaAllocation m_allocation = VK_NULL_HANDLE;
VkDeviceMemory m_memory = VK_NULL_HANDLE;
VkBuffer m_buffer = VK_NULL_HANDLE;
u64 m_copy_fence_counter = 0;
VkDeviceSize m_memory_offset = 0;
VkDeviceSize m_buffer_size = 0;
bool m_needs_cache_invalidate = false;
};