GS: Add GSDownloadTexture (so we can download more than one at once)

This commit is contained in:
Stenzek 2023-01-17 20:56:24 +10:00 committed by refractionpcsx2
parent aa1e9cc9fa
commit fcfb9865df
30 changed files with 1254 additions and 572 deletions

View File

@ -14,9 +14,10 @@
*/
#include "PrecompiledHeader.h"
#include "GSLocalMemory.h"
#include "GS.h"
#include "GSExtra.h"
#include "GS/GS.h"
#include "GS/GSLocalMemory.h"
#include "GS/GSExtra.h"
#include "GS/GSPng.h"
#include <unordered_set>
template <typename Fn>
@ -556,12 +557,11 @@ void GSLocalMemory::SaveBMP(const std::string& fn, u32 bp, u32 bw, u32 psm, int
}
}
GSTextureSW t(GSTexture::Type::Offscreen, w, h);
if (t.Update(GSVector4i(0, 0, w, h), bits, pitch))
{
t.Save(fn);
}
#ifdef PCSX2_DEVBUILD
GSPng::Save(GSPng::RGB_A_PNG, fn, static_cast<u8*>(bits), w, h, pitch, GSConfig.PNGCompressionLevel, false);
#else
GSPng::Save(GSPng::RGB_PNG, fn, static_cast<u8*>(bits), w, h, pitch, GSConfig.PNGCompressionLevel, false);
#endif
_aligned_free(bits);
}

View File

@ -107,7 +107,7 @@ namespace GSPng
return success;
}
bool Save(GSPng::Format fmt, const std::string& file, u8* image, int w, int h, int pitch, int compression, bool rb_swapped)
bool Save(GSPng::Format fmt, const std::string& file, const u8* image, int w, int h, int pitch, int compression, bool rb_swapped)
{
std::string root = file;
root.replace(file.length() - 4, 4, "");

View File

@ -47,7 +47,7 @@ namespace GSPng
~Transaction();
};
bool Save(GSPng::Format fmt, const std::string& file, u8* image, int w, int h, int pitch, int compression, bool rb_swapped = false);
bool Save(GSPng::Format fmt, const std::string& file, const u8* image, int w, int h, int pitch, int compression, bool rb_swapped = false);
void Process(std::shared_ptr<Transaction>& item);

View File

@ -180,6 +180,11 @@ GSTexture* GSDevice::FetchSurface(GSTexture::Type type, int width, int height, i
return t;
}
std::unique_ptr<GSDownloadTexture> GSDevice::CreateDownloadTexture(u32 width, u32 height, GSTexture::Format format)
{
return {};
}
void GSDevice::PrintMemoryUsage()
{
#ifdef ENABLE_OGL_DEBUG
@ -259,11 +264,6 @@ GSTexture* GSDevice::CreateTexture(int w, int h, int mipmap_levels, GSTexture::F
return FetchSurface(GSTexture::Type::Texture, w, h, levels, format, false, prefer_reuse);
}
GSTexture* GSDevice::CreateOffscreen(int w, int h, GSTexture::Format format)
{
return FetchSurface(GSTexture::Type::Offscreen, w, h, 1, format, false, true);
}
GSTexture::Format GSDevice::GetDefaultTextureFormat(GSTexture::Type type)
{
if (type == GSTexture::Type::DepthStencil)
@ -272,23 +272,6 @@ GSTexture::Format GSDevice::GetDefaultTextureFormat(GSTexture::Type type)
return GSTexture::Format::Color;
}
bool GSDevice::DownloadTextureConvert(GSTexture* src, const GSVector4& sRect, const GSVector2i& dSize, GSTexture::Format format, ShaderConvert ps_shader, GSTexture::GSMap& out_map, const bool linear)
{
ASSERT(src);
ASSERT(format == GSTexture::Format::Color || format == GSTexture::Format::UInt16 || format == GSTexture::Format::UInt32);
GSTexture* dst = CreateRenderTarget(dSize.x, dSize.y, format);
if (!dst)
return false;
GSVector4i dRect(0, 0, dSize.x, dSize.y);
StretchRect(src, sRect, dst, GSVector4(dRect), ps_shader, linear);
bool ret = DownloadTexture(dst, dRect, out_map);
Recycle(dst);
return ret;
}
void GSDevice::StretchRect(GSTexture* sTex, GSTexture* dTex, const GSVector4& dRect, ShaderConvert shader, bool linear)
{
StretchRect(sTex, GSVector4(0, 0, 1, 1), dTex, dRect, shader, linear);

View File

@ -813,19 +813,9 @@ public:
GSTexture* CreateRenderTarget(int w, int h, GSTexture::Format format, bool clear = true);
GSTexture* CreateDepthStencil(int w, int h, GSTexture::Format format, bool clear = true);
GSTexture* CreateTexture(int w, int h, int mipmap_levels, GSTexture::Format format, bool prefer_reuse = false);
GSTexture* CreateOffscreen(int w, int h, GSTexture::Format format);
GSTexture::Format GetDefaultTextureFormat(GSTexture::Type type);
/// Download the region `rect` of `src` into `out_map`
/// `out_map` will be valid a call to `DownloadTextureComplete`
virtual bool DownloadTexture(GSTexture* src, const GSVector4i& rect, GSTexture::GSMap& out_map) { return false; }
/// Scale the region `sRect` of `src` to the size `dSize` using `ps_shader` and store the result in `out_map`
/// `out_map` will be valid a call to `DownloadTextureComplete`
virtual bool DownloadTextureConvert(GSTexture* src, const GSVector4& sRect, const GSVector2i& dSize, GSTexture::Format format, ShaderConvert ps_shader, GSTexture::GSMap& out_map, bool linear);
/// Must be called to free resources after calling `DownloadTexture` or `DownloadTextureConvert`
virtual void DownloadTextureComplete() {}
virtual std::unique_ptr<GSDownloadTexture> CreateDownloadTexture(u32 width, u32 height, GSTexture::Format format);
virtual void CopyRect(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r, u32 destX, u32 destY) {}
virtual void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, ShaderConvert shader = ShaderConvert::COPY, bool linear = true) {}

View File

@ -799,19 +799,35 @@ void GSRenderer::VSync(u32 field, bool registers_written)
{
if (GSTexture* current = g_gs_device->GetCurrent())
{
const GSVector2i size = GSCapture::GetSize();
const GSVector2i size(GSCapture::GetSize());
bool res;
GSTexture::GSMap m;
if (size == current->GetSize())
res = g_gs_device->DownloadTexture(current, GSVector4i(0, 0, size.x, size.y), m);
else
res = g_gs_device->DownloadTextureConvert(current, GSVector4(0, 0, 1, 1), size, GSTexture::Format::Color, ShaderConvert::COPY, m, true);
if (res)
std::unique_ptr<GSDownloadTexture> dl(g_gs_device->CreateDownloadTexture(size.x, size.y, GSTexture::Format::Color));
if (dl)
{
GSCapture::DeliverFrame(m.bits, m.pitch, !g_gs_device->IsRBSwapped());
g_gs_device->DownloadTextureComplete();
const GSVector4i rc(0, 0, size.x, size.y);
bool okay = false;
if (size == current->GetSize())
{
dl->CopyFromTexture(rc, current, rc, 0);
okay = true;
}
else
{
GSTexture* rt = g_gs_device->CreateRenderTarget(size.x, size.y, GSTexture::Format::Color);
if (rt)
{
g_gs_device->StretchRect(current, rt, GSVector4(rc), ShaderConvert::COPY);
dl->CopyFromTexture(rc, rt, rc, 0);
g_gs_device->Recycle(rt);
okay = true;
}
}
if (okay)
{
dl->Flush();
if (dl->Map(rc))
GSCapture::DeliverFrame(dl->GetMapPointer(), dl->GetMapPitch(), !g_gs_device->IsRBSwapped());
}
}
}
}
@ -1019,12 +1035,19 @@ bool GSRenderer::SaveSnapshotToMemory(u32 window_width, u32 window_height, bool
const u32 image_width = crop_borders ? draw_width : std::max(draw_width, window_width);
const u32 image_height = crop_borders ? draw_height : std::max(draw_height, window_height);
GSTexture::GSMap map;
const bool result = g_gs_device->DownloadTextureConvert(
current, src_uv,
GSVector2i(draw_width, draw_height), GSTexture::Format::Color,
ShaderConvert::TRANSPARENCY_FILTER, map, true);
if (result)
// We're not expecting screenshots to be fast, so just allocate a download texture on demand.
GSTexture* rt = g_gs_device->CreateRenderTarget(draw_width, draw_height, GSTexture::Format::Color, false);
if (rt)
{
std::unique_ptr<GSDownloadTexture> dl(g_gs_device->CreateDownloadTexture(draw_width, draw_height, GSTexture::Format::Color));
if (dl)
{
const GSVector4i rc(0, 0, draw_width, draw_height);
g_gs_device->StretchRect(current, src_uv, rt, GSVector4(rc), ShaderConvert::TRANSPARENCY_FILTER);
dl->CopyFromTexture(rc, rt, rc, 0);
dl->Flush();
if (dl->Map(rc))
{
const u32 pad_x = (image_width - draw_width) / 2;
const u32 pad_y = (image_height - draw_height) / 2;
@ -1032,11 +1055,19 @@ bool GSRenderer::SaveSnapshotToMemory(u32 window_width, u32 window_height, bool
pixels->resize(image_width * image_height, 0);
*width = image_width;
*height = image_height;
StringUtil::StrideMemCpy(pixels->data() + pad_y * image_width + pad_x, image_width * sizeof(u32),
map.bits, map.pitch, draw_width * sizeof(u32), draw_height);
StringUtil::StrideMemCpy(pixels->data() + pad_y * image_width + pad_x, image_width * sizeof(u32), dl->GetMapPointer(),
dl->GetMapPitch(), draw_width * sizeof(u32), draw_height);
g_gs_device->DownloadTextureComplete();
g_gs_device->Recycle(rt);
return true;
}
}
return result;
g_gs_device->Recycle(rt);
}
*width = 0;
*height = 0;
pixels->clear();
return false;
}

View File

@ -14,9 +14,11 @@
*/
#include "PrecompiledHeader.h"
#include "GSTexture.h"
#include "GSDevice.h"
#include "GS/Renderers/Common/GSTexture.h"
#include "GS/Renderers/Common/GSDevice.h"
#include "GS/GSPng.h"
#include "common/Align.h"
#include "common/StringUtil.h"
#include <bitset>
GSTexture::GSTexture()
@ -51,19 +53,16 @@ bool GSTexture::Save(const std::string& fn)
return false;
}
GSMap map;
if (!g_gs_device->DownloadTexture(this, GSVector4i(0, 0, m_size.x, m_size.y), map))
const GSVector4i rc(0, 0, m_size.x, m_size.y);
std::unique_ptr<GSDownloadTexture> dl(g_gs_device->CreateDownloadTexture(m_size.x, m_size.y, m_format));
if (!dl || (dl->CopyFromTexture(rc, this, rc, 0), dl->Flush(), !dl->Map(rc)))
{
Console.Error("(GSTexture) DownloadTexture() failed.");
return false;
}
const int compression = GSConfig.PNGCompressionLevel;
bool success = GSPng::Save(format, fn, map.bits, m_size.x, m_size.y, map.pitch, compression);
g_gs_device->DownloadTextureComplete();
return success;
return GSPng::Save(format, fn, dl->GetMapPointer(), m_size.x, m_size.y, dl->GetMapPitch(), compression, g_gs_device->IsRBSwapped());
}
void GSTexture::Swap(GSTexture* tex)
@ -80,6 +79,11 @@ void GSTexture::Swap(GSTexture* tex)
}
u32 GSTexture::GetCompressedBytesPerBlock() const
{
return GetCompressedBytesPerBlock(m_format);
}
u32 GSTexture::GetCompressedBytesPerBlock(Format format)
{
static constexpr u32 bytes_per_block[] = {
1, // Invalid
@ -96,12 +100,17 @@ u32 GSTexture::GetCompressedBytesPerBlock() const
16, // BC4 - 16 pixels in 128 bits
};
return bytes_per_block[static_cast<u32>(m_format)];
return bytes_per_block[static_cast<u32>(format)];
}
u32 GSTexture::GetCompressedBlockSize() const
{
if (m_format >= Format::BC1 && m_format <= Format::BC7)
return GetCompressedBlockSize(m_format);
}
u32 GSTexture::GetCompressedBlockSize(Format format)
{
if (format >= Format::BC1 && format <= Format::BC7)
return 4;
else
return 1;
@ -109,14 +118,24 @@ u32 GSTexture::GetCompressedBlockSize() const
u32 GSTexture::CalcUploadRowLengthFromPitch(u32 pitch) const
{
const u32 block_size = GetCompressedBlockSize();
const u32 bytes_per_block = GetCompressedBytesPerBlock();
return CalcUploadRowLengthFromPitch(m_format, pitch);
}
u32 GSTexture::CalcUploadRowLengthFromPitch(Format format, u32 pitch)
{
const u32 block_size = GetCompressedBlockSize(format);
const u32 bytes_per_block = GetCompressedBytesPerBlock(format);
return ((pitch + (bytes_per_block - 1)) / bytes_per_block) * block_size;
}
u32 GSTexture::CalcUploadSize(u32 height, u32 pitch) const
{
const u32 block_size = GetCompressedBlockSize();
return CalcUploadSize(m_format, height, pitch);
}
u32 GSTexture::CalcUploadSize(Format format, u32 height, u32 pitch)
{
const u32 block_size = GetCompressedBlockSize(format);
return pitch * ((static_cast<u32>(height) + (block_size - 1)) / block_size);
}
@ -128,3 +147,69 @@ void GSTexture::GenerateMipmapsIfNeeded()
m_needs_mipmaps_generated = false;
GenerateMipmap();
}
GSDownloadTexture::GSDownloadTexture(u32 width, u32 height, GSTexture::Format format)
: m_width(width)
, m_height(height)
, m_format(format)
{
}
GSDownloadTexture::~GSDownloadTexture() = default;
u32 GSDownloadTexture::GetBufferSize(u32 width, u32 height, GSTexture::Format format, u32 pitch_align /* = 1 */)
{
const u32 block_size = GSTexture::GetCompressedBlockSize(format);
const u32 bytes_per_block = GSTexture::GetCompressedBytesPerBlock(format);
const u32 bw = (width + (block_size - 1)) / block_size;
const u32 bh = (height + (block_size - 1)) / block_size;
pxAssert(Common::IsPow2(pitch_align));
const u32 pitch = Common::AlignUpPow2(bw * bytes_per_block, pitch_align);
return (pitch * bh);
}
u32 GSDownloadTexture::GetTransferPitch(u32 width, u32 pitch_align) const
{
const u32 block_size = GSTexture::GetCompressedBlockSize(m_format);
const u32 bytes_per_block = GSTexture::GetCompressedBytesPerBlock(m_format);
const u32 bw = (width + (block_size - 1)) / block_size;
pxAssert(Common::IsPow2(pitch_align));
return Common::AlignUpPow2(bw * bytes_per_block, pitch_align);
}
void GSDownloadTexture::GetTransferSize(const GSVector4i& rc, u32* copy_offset, u32* copy_size, u32* copy_rows) const
{
const u32 block_size = GSTexture::GetCompressedBlockSize(m_format);
const u32 bytes_per_block = GSTexture::GetCompressedBytesPerBlock(m_format);
const u32 tw = static_cast<u32>(rc.width());
const u32 tb = ((tw + (block_size - 1)) / block_size);
*copy_offset = (((static_cast<u32>(rc.y) + (block_size - 1)) / block_size) * m_current_pitch) +
((static_cast<u32>(rc.x) + (block_size - 1)) / block_size) * bytes_per_block;
*copy_size = tb * bytes_per_block;
*copy_rows = ((static_cast<u32>(rc.height()) + (block_size - 1)) / block_size);
}
bool GSDownloadTexture::ReadTexels(const GSVector4i& rc, void* out_ptr, u32 out_stride)
{
if (m_needs_flush)
Flush();
if (!Map(rc))
return false;
const u32 block_size = GSTexture::GetCompressedBlockSize(m_format);
const u32 bytes_per_block = GSTexture::GetCompressedBytesPerBlock(m_format);
const u32 tw = static_cast<u32>(rc.width());
const u32 tb = ((tw + (block_size - 1)) / block_size);
const u32 copy_offset = (((static_cast<u32>(rc.y) + (block_size - 1)) / block_size) * m_current_pitch) +
((static_cast<u32>(rc.x) + (block_size - 1)) / block_size) * bytes_per_block;
const u32 copy_size = tb * bytes_per_block;
const u32 copy_rows = ((static_cast<u32>(rc.height()) + (block_size - 1)) / block_size);
StringUtil::StrideMemCpy(out_ptr, out_stride, m_map_pointer + copy_offset, m_current_pitch, copy_size, copy_rows);
return true;
}

View File

@ -32,7 +32,6 @@ public:
RenderTarget = 1,
DepthStencil,
Texture,
Offscreen,
RWTexture,
};
@ -96,6 +95,11 @@ public:
Format GetFormat() const { return m_format; }
bool IsCompressedFormat() const { return IsCompressedFormat(m_format); }
static u32 GetCompressedBytesPerBlock(Format format);
static u32 GetCompressedBlockSize(Format format);
static u32 CalcUploadRowLengthFromPitch(Format format, u32 pitch);
static u32 CalcUploadSize(Format format, u32 height, u32 pitch);
u32 GetCompressedBytesPerBlock() const;
u32 GetCompressedBlockSize() const;
u32 CalcUploadRowLengthFromPitch(u32 pitch) const;
@ -132,3 +136,65 @@ public:
// Helper routines for formats/types
static bool IsCompressedFormat(Format format) { return (format >= Format::BC1 && format <= Format::BC7); }
};
class GSDownloadTexture
{
public:
GSDownloadTexture(u32 width, u32 height, GSTexture::Format format);
virtual ~GSDownloadTexture();
/// Basically, this has dimensions only because of DX11.
__fi u32 GetWidth() const { return m_width; }
__fi u32 GetHeight() const { return m_height; }
__fi GSTexture::Format GetFormat() const { return m_format; }
__fi bool NeedsFlush() const { return m_needs_flush; }
__fi bool IsMapped() const { return (m_map_pointer != nullptr); }
__fi const u8* GetMapPointer() const { return m_map_pointer; }
__fi u32 GetMapPitch() const { return m_current_pitch; }
/// Calculates the pitch of a transfer.
u32 GetTransferPitch(u32 width, u32 pitch_align) const;
/// Calculates the size of the data you should transfer.
void GetTransferSize(const GSVector4i& rc, u32* copy_offset, u32* copy_size, u32* copy_rows) const;
/// Queues a copy from the specified texture to this buffer.
/// Does not complete immediately, you should flush before accessing the buffer.
/// use_transfer_pitch should be true if there's only a single texture being copied to this buffer before
/// it will be used. This allows the image to be packed tighter together, and buffer reuse.
virtual void CopyFromTexture(
const GSVector4i& drc, GSTexture* stex, const GSVector4i& src, u32 src_level, bool use_transfer_pitch = true) = 0;
/// Maps the texture into the CPU address space, enabling it to read the contents.
/// The Map call may not perform synchronization. If the contents of the staging texture
/// has been updated by a CopyFromTexture() call, you must call Flush() first.
/// If persistent mapping is supported in the backend, this may be a no-op.
virtual bool Map(const GSVector4i& read_rc) = 0;
/// Unmaps the CPU-readable copy of the texture. May be a no-op on backends which
/// support persistent-mapped buffers.
virtual void Unmap() = 0;
/// Flushes pending writes from the CPU to the GPU, and reads from the GPU to the CPU.
/// This may cause a command buffer submit depending on if one has occurred between the last
/// call to CopyFromTexture() and the Flush() call.
virtual void Flush() = 0;
/// Reads the specified rectangle from the staging texture to out_ptr, with the specified stride
/// (length in bytes of each row). CopyFromTexture() must be called first. The contents of any
/// texels outside of the rectangle used for CopyFromTexture is undefined.
bool ReadTexels(const GSVector4i& rc, void* out_ptr, u32 out_stride);
/// Returns what the size of the specified texture would be, in bytes.
static u32 GetBufferSize(u32 width, u32 height, GSTexture::Format format, u32 pitch_align = 1);
protected:
u32 m_width;
u32 m_height;
GSTexture::Format m_format;
const u8* m_map_pointer = nullptr;
u32 m_current_pitch = 0;
bool m_needs_flush = false;
};

View File

@ -453,33 +453,12 @@ void GSDevice11::ClearStencil(GSTexture* t, u8 c)
GSTexture* GSDevice11::CreateSurface(GSTexture::Type type, int width, int height, int levels, GSTexture::Format format)
{
D3D11_TEXTURE2D_DESC desc;
memset(&desc, 0, sizeof(desc));
DXGI_FORMAT dxformat;
switch (format)
{
case GSTexture::Format::Color: dxformat = DXGI_FORMAT_R8G8B8A8_UNORM; break;
case GSTexture::Format::HDRColor: dxformat = DXGI_FORMAT_R16G16B16A16_UNORM; break;
case GSTexture::Format::DepthStencil: dxformat = DXGI_FORMAT_R32G8X24_TYPELESS; break;
case GSTexture::Format::UNorm8: dxformat = DXGI_FORMAT_A8_UNORM; break;
case GSTexture::Format::UInt16: dxformat = DXGI_FORMAT_R16_UINT; break;
case GSTexture::Format::UInt32: dxformat = DXGI_FORMAT_R32_UINT; break;
case GSTexture::Format::PrimID: dxformat = DXGI_FORMAT_R32_FLOAT; break;
case GSTexture::Format::BC1: dxformat = DXGI_FORMAT_BC1_UNORM; break;
case GSTexture::Format::BC2: dxformat = DXGI_FORMAT_BC2_UNORM; break;
case GSTexture::Format::BC3: dxformat = DXGI_FORMAT_BC3_UNORM; break;
case GSTexture::Format::BC7: dxformat = DXGI_FORMAT_BC7_UNORM; break;
case GSTexture::Format::Invalid:
ASSERT(0);
dxformat = DXGI_FORMAT_UNKNOWN;
}
D3D11_TEXTURE2D_DESC desc = {};
// Texture limit for D3D10/11 min 1, max 8192 D3D10, max 16384 D3D11.
desc.Width = std::clamp(width, 1, m_d3d_texsize);
desc.Height = std::clamp(height, 1, m_d3d_texsize);
desc.Format = dxformat;
desc.Format = GSTexture11::GetDXGIFormat(format);
desc.MipLevels = levels;
desc.ArraySize = 1;
desc.SampleDesc.Count = 1;
@ -498,10 +477,6 @@ GSTexture* GSDevice11::CreateSurface(GSTexture::Type type, int width, int height
desc.BindFlags = (levels > 1 && !GSTexture::IsCompressedFormat(format)) ? (D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE) : D3D11_BIND_SHADER_RESOURCE;
desc.MiscFlags = (levels > 1 && !GSTexture::IsCompressedFormat(format)) ? D3D11_RESOURCE_MISC_GENERATE_MIPS : 0;
break;
case GSTexture::Type::Offscreen:
desc.Usage = D3D11_USAGE_STAGING;
desc.CPUAccessFlags |= D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE;
break;
case GSTexture::Type::RWTexture:
desc.BindFlags = D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_SHADER_RESOURCE;
break;
@ -527,26 +502,9 @@ GSTexture* GSDevice11::CreateSurface(GSTexture::Type type, int width, int height
return t;
}
bool GSDevice11::DownloadTexture(GSTexture* src, const GSVector4i& rect, GSTexture::GSMap& out_map)
std::unique_ptr<GSDownloadTexture> GSDevice11::CreateDownloadTexture(u32 width, u32 height, GSTexture::Format format)
{
ASSERT(src);
ASSERT(!m_download_tex);
g_perfmon.Put(GSPerfMon::Readbacks, 1);
m_download_tex.reset(static_cast<GSTexture11*>(CreateOffscreen(rect.width(), rect.height(), src->GetFormat())));
if (!m_download_tex)
return false;
CopyRect(src, m_download_tex.get(), rect, 0, 0);
return m_download_tex->Map(out_map);
}
void GSDevice11::DownloadTextureComplete()
{
if (m_download_tex)
{
m_download_tex->Unmap();
Recycle(m_download_tex.release());
}
return GSDownloadTexture11::Create(width, height, format);
}
void GSDevice11::CopyRect(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r, u32 destX, u32 destY)

View File

@ -118,6 +118,8 @@ private:
GSTexture* CreateSurface(GSTexture::Type type, int width, int height, int levels, GSTexture::Format format) final;
std::unique_ptr<GSDownloadTexture> CreateDownloadTexture(u32 width, u32 height, GSTexture::Format format) final;
void DoMerge(GSTexture* sTex[3], GSVector4* sRect, GSTexture* dTex, GSVector4* dRect, const GSRegPMODE& PMODE, const GSRegEXTBUF& EXTBUF, const GSVector4& c) final;
void DoInterlace(GSTexture* sTex, GSTexture* dTex, int shader, bool linear, float yoffset = 0, int bufIdx = 0) final;
void DoFXAA(GSTexture* sTex, GSTexture* dTex) final;
@ -255,9 +257,6 @@ public:
void ClearDepth(GSTexture* t) override;
void ClearStencil(GSTexture* t, u8 c) override;
bool DownloadTexture(GSTexture* src, const GSVector4i& rect, GSTexture::GSMap& out_map) override;
void DownloadTextureComplete() override;
void CloneTexture(GSTexture* src, GSTexture** dest, const GSVector4i& rect);
void CopyRect(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r, u32 destX, u32 destY) override;

View File

@ -33,6 +33,30 @@ GSTexture11::GSTexture11(wil::com_ptr_nothrow<ID3D11Texture2D> texture, const D3
m_mipmap_levels = static_cast<int>(desc.MipLevels);
}
DXGI_FORMAT GSTexture11::GetDXGIFormat(Format format)
{
// clang-format off
switch (format)
{
case GSTexture::Format::Color: return DXGI_FORMAT_R8G8B8A8_UNORM;
case GSTexture::Format::HDRColor: return DXGI_FORMAT_R16G16B16A16_UNORM;
case GSTexture::Format::DepthStencil: return DXGI_FORMAT_R32G8X24_TYPELESS;
case GSTexture::Format::UNorm8: return DXGI_FORMAT_A8_UNORM;
case GSTexture::Format::UInt16: return DXGI_FORMAT_R16_UINT;
case GSTexture::Format::UInt32: return DXGI_FORMAT_R32_UINT;
case GSTexture::Format::PrimID: return DXGI_FORMAT_R32_FLOAT;
case GSTexture::Format::BC1: return DXGI_FORMAT_BC1_UNORM;
case GSTexture::Format::BC2: return DXGI_FORMAT_BC2_UNORM;
case GSTexture::Format::BC3: return DXGI_FORMAT_BC3_UNORM;
case GSTexture::Format::BC7: return DXGI_FORMAT_BC7_UNORM;
case GSTexture::Format::Invalid:
default:
ASSERT(0);
return DXGI_FORMAT_UNKNOWN;
}
// clang-format on
}
void* GSTexture11::GetNativeHandle() const
{
return static_cast<ID3D11ShaderResourceView*>(*const_cast<GSTexture11*>(this));
@ -273,3 +297,106 @@ bool GSTexture11::Equal(GSTexture11* tex)
{
return tex && m_texture == tex->m_texture;
}
GSDownloadTexture11::GSDownloadTexture11(wil::com_ptr_nothrow<ID3D11Texture2D> tex, u32 width, u32 height, GSTexture::Format format)
: GSDownloadTexture(width, height, format)
, m_texture(std::move(tex))
{
}
GSDownloadTexture11::~GSDownloadTexture11()
{
if (IsMapped())
GSDownloadTexture11::Unmap();
}
std::unique_ptr<GSDownloadTexture11> GSDownloadTexture11::Create(u32 width, u32 height, GSTexture::Format format)
{
D3D11_TEXTURE2D_DESC desc = {};
desc.Width = width;
desc.Height = height;
desc.Format = GSTexture11::GetDXGIFormat(format);
desc.MipLevels = 1;
desc.ArraySize = 1;
desc.SampleDesc.Count = 1;
desc.SampleDesc.Quality = 0;
desc.Usage = D3D11_USAGE_STAGING;
desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
wil::com_ptr_nothrow<ID3D11Texture2D> tex;
HRESULT hr = GSDevice11::GetInstance()->GetD3DDevice()->CreateTexture2D(&desc, nullptr, tex.put());
if (FAILED(hr))
{
Console.Error("GSDownloadTexture11: CreateTexture2D() failed: %08X", hr);
return {};
}
return std::unique_ptr<GSDownloadTexture11>(new GSDownloadTexture11(std::move(tex), width, height, format));
}
void GSDownloadTexture11::CopyFromTexture(
const GSVector4i& drc, GSTexture* stex, const GSVector4i& src, u32 src_level, bool use_transfer_pitch)
{
pxAssert(stex->GetFormat() == m_format);
pxAssert(drc.width() == src.width() && drc.height() == src.height());
pxAssert(src.z <= stex->GetWidth() && src.w <= stex->GetHeight());
pxAssert(static_cast<u32>(drc.z) <= m_width && static_cast<u32>(drc.w) <= m_height);
pxAssert(src_level < static_cast<u32>(stex->GetMipmapLevels()));
g_perfmon.Put(GSPerfMon::Readbacks, 1);
if (IsMapped())
Unmap();
// depth textures need to copy the whole thing..
if (m_format == GSTexture::Format::DepthStencil)
{
GSDevice11::GetInstance()->GetD3DContext()->CopySubresourceRegion(
m_texture.get(), 0, 0, 0, 0, *static_cast<GSTexture11*>(stex), src_level, nullptr);
}
else
{
const CD3D11_BOX sbox(src.left, src.top, 0, src.right, src.bottom, 1);
GSDevice11::GetInstance()->GetD3DContext()->CopySubresourceRegion(
m_texture.get(), 0, drc.x, drc.y, 0, *static_cast<GSTexture11*>(stex), src_level, &sbox);
}
m_needs_flush = true;
}
bool GSDownloadTexture11::Map(const GSVector4i& rc)
{
if (IsMapped())
return true;
D3D11_MAPPED_SUBRESOURCE sr;
HRESULT hr = GSDevice11::GetInstance()->GetD3DContext()->Map(m_texture.get(), 0, D3D11_MAP_READ, 0, &sr);
if (FAILED(hr))
{
Console.Error("GSDownloadTexture11: Map() failed: %08X", hr);
return false;
}
m_map_pointer = static_cast<u8*>(sr.pData);
m_current_pitch = sr.RowPitch;
return true;
}
void GSDownloadTexture11::Unmap()
{
if (!IsMapped())
return;
GSDevice11::GetInstance()->GetD3DContext()->Unmap(m_texture.get(), 0);
m_map_pointer = nullptr;
}
void GSDownloadTexture11::Flush()
{
if (!m_needs_flush)
return;
if (IsMapped())
Unmap();
// Handled when mapped.
}

View File

@ -20,6 +20,7 @@
#include "common/RedtapeWindows.h"
#include "common/RedtapeWilCom.h"
#include <d3d11.h>
#include <memory>
class GSTexture11 final : public GSTexture
{
@ -35,6 +36,8 @@ public:
explicit GSTexture11(wil::com_ptr_nothrow<ID3D11Texture2D> texture, const D3D11_TEXTURE2D_DESC& desc,
GSTexture::Type type, GSTexture::Format format);
static DXGI_FORMAT GetDXGIFormat(Format format);
void* GetNativeHandle() const override;
bool Update(const GSVector4i& r, const void* data, int pitch, int layer = 0) override;
@ -51,3 +54,24 @@ public:
operator ID3D11DepthStencilView*();
operator ID3D11UnorderedAccessView*();
};
class GSDownloadTexture11 final : public GSDownloadTexture
{
public:
~GSDownloadTexture11() override;
static std::unique_ptr<GSDownloadTexture11> Create(u32 width, u32 height, GSTexture::Format format);
void CopyFromTexture(
const GSVector4i& drc, GSTexture* stex, const GSVector4i& src, u32 src_level, bool use_transfer_pitch) override;
bool Map(const GSVector4i& rc) override;
void Unmap() override;
void Flush() override;
private:
GSDownloadTexture11(wil::com_ptr_nothrow<ID3D11Texture2D> tex, u32 width, u32 height, GSTexture::Format format);
wil::com_ptr_nothrow<ID3D11Texture2D> m_texture;
};

View File

@ -316,8 +316,6 @@ void GSDevice12::LookupNativeFormat(GSTexture::Format format, DXGI_FORMAT* d3d_f
GSTexture* GSDevice12::CreateSurface(GSTexture::Type type, int width, int height, int levels, GSTexture::Format format)
{
pxAssert(type != GSTexture::Type::Offscreen);
const u32 clamped_width = static_cast<u32>(std::clamp<int>(width, 1, D3D12_REQ_TEXTURE2D_U_OR_V_DIMENSION));
const u32 clamped_height = static_cast<u32>(std::clamp<int>(height, 1, D3D12_REQ_TEXTURE2D_U_OR_V_DIMENSION));
@ -336,69 +334,9 @@ GSTexture* GSDevice12::CreateSurface(GSTexture::Type type, int width, int height
return tex.release();
}
bool GSDevice12::DownloadTexture(GSTexture* src, const GSVector4i& rect, GSTexture::GSMap& out_map)
std::unique_ptr<GSDownloadTexture> GSDevice12::CreateDownloadTexture(u32 width, u32 height, GSTexture::Format format)
{
const u32 width = rect.width();
const u32 height = rect.height();
const u32 pitch = Common::AlignUpPow2(width * D3D12::GetTexelSize(static_cast<GSTexture12*>(src)->GetNativeFormat()), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
const u32 size = pitch * height;
constexpr u32 level = 0;
if (!CheckStagingBufferSize(size))
{
Console.Error("Can't read back %ux%u", width, height);
return false;
}
g_perfmon.Put(GSPerfMon::Readbacks, 1);
EndRenderPass();
UnmapStagingBuffer();
{
ID3D12GraphicsCommandList* cmdlist = g_d3d12_context->GetCommandList();
GSTexture12* dsrc = static_cast<GSTexture12*>(src);
GL_INS("ReadbackTexture: {%d,%d} %ux%u", rect.left, rect.top, width, height);
D3D12_TEXTURE_COPY_LOCATION srcloc;
srcloc.pResource = dsrc->GetResource();
srcloc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
srcloc.SubresourceIndex = level;
D3D12_TEXTURE_COPY_LOCATION dstloc;
dstloc.pResource = m_readback_staging_buffer.get();
dstloc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
dstloc.PlacedFootprint.Offset = 0;
dstloc.PlacedFootprint.Footprint.Format = dsrc->GetNativeFormat();
dstloc.PlacedFootprint.Footprint.Width = width;
dstloc.PlacedFootprint.Footprint.Height = height;
dstloc.PlacedFootprint.Footprint.Depth = 1;
dstloc.PlacedFootprint.Footprint.RowPitch = pitch;
const D3D12_RESOURCE_STATES old_layout = dsrc->GetResourceState();
if (old_layout != D3D12_RESOURCE_STATE_COPY_SOURCE)
dsrc->GetTexture().TransitionSubresourceToState(cmdlist, level, old_layout, D3D12_RESOURCE_STATE_COPY_SOURCE);
const D3D12_BOX srcbox{static_cast<UINT>(rect.left), static_cast<UINT>(rect.top), 0u,
static_cast<UINT>(rect.right), static_cast<UINT>(rect.bottom), 1u};
cmdlist->CopyTextureRegion(&dstloc, 0, 0, 0, &srcloc, &srcbox);
if (old_layout != D3D12_RESOURCE_STATE_COPY_SOURCE)
dsrc->GetTexture().TransitionSubresourceToState(cmdlist, level, D3D12_RESOURCE_STATE_COPY_SOURCE, old_layout);
}
// exec and wait
ExecuteCommandList(true);
if (!MapStagingBuffer(size))
return false;
out_map.bits = reinterpret_cast<u8*>(m_readback_staging_buffer_map);
out_map.pitch = pitch;
return true;
}
void GSDevice12::DownloadTextureComplete()
{
UnmapStagingBuffer();
return GSDownloadTexture12::Create(width, height, format);
}
void GSDevice12::CopyRect(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r, u32 destX, u32 destY)
@ -1489,68 +1427,6 @@ bool GSDevice12::CompilePostProcessingPipelines()
return true;
}
bool GSDevice12::CheckStagingBufferSize(u32 required_size)
{
if (m_readback_staging_buffer_size >= required_size)
return true;
DestroyStagingBuffer();
D3D12MA::ALLOCATION_DESC allocation_desc = {};
allocation_desc.HeapType = D3D12_HEAP_TYPE_READBACK;
const D3D12_RESOURCE_DESC resource_desc = {
D3D12_RESOURCE_DIMENSION_BUFFER, 0, required_size, 1, 1, 1, DXGI_FORMAT_UNKNOWN, {1, 0}, D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
D3D12_RESOURCE_FLAG_NONE};
HRESULT hr = g_d3d12_context->GetAllocator()->CreateResource(&allocation_desc, &resource_desc,
D3D12_RESOURCE_STATE_COPY_DEST, nullptr, m_readback_staging_allocation.put(), IID_PPV_ARGS(m_readback_staging_buffer.put()));
if (FAILED(hr))
{
Console.Error("(GSDevice12::CheckStagingBufferSize) CreateResource() failed with HRESULT %08X", hr);
return false;
}
m_readback_staging_buffer_size = required_size;
return true;
}
bool GSDevice12::MapStagingBuffer(u32 size_to_read)
{
if (m_readback_staging_buffer_map)
return true;
const D3D12_RANGE range = {0, size_to_read};
const HRESULT hr = m_readback_staging_buffer->Map(0, &range, &m_readback_staging_buffer_map);
if (FAILED(hr))
{
Console.Error("(GSDevice12::MapStagingBuffer) Map() failed with HRESULT %08X", hr);
return false;
}
return true;
}
void GSDevice12::UnmapStagingBuffer()
{
if (!m_readback_staging_buffer_map)
return;
const D3D12_RANGE write_range = {};
m_readback_staging_buffer->Unmap(0, &write_range);
m_readback_staging_buffer_map = nullptr;
}
void GSDevice12::DestroyStagingBuffer()
{
UnmapStagingBuffer();
// safe to immediately destroy, since the GPU doesn't write to it without a copy+exec.
m_readback_staging_buffer_size = 0;
m_readback_staging_allocation.reset();
m_readback_staging_buffer.reset();
}
void GSDevice12::DestroyResources()
{
g_d3d12_context->ExecuteCommandList(D3D12::Context::WaitType::Sleep);
@ -1581,8 +1457,6 @@ void GSDevice12::DestroyResources()
g_d3d12_context->DeferDescriptorDestruction(g_d3d12_context->GetSamplerHeapManager(), &m_point_sampler_cpu);
g_d3d12_context->InvalidateSamplerGroups();
DestroyStagingBuffer();
m_pixel_constant_buffer.Destroy(false);
m_vertex_constant_buffer.Destroy(false);
m_index_stream_buffer.Destroy(false);
@ -1891,6 +1765,11 @@ void GSDevice12::ExecuteCommandListAndRestartRenderPass(bool wait_for_completion
}
}
void GSDevice12::ExecuteCommandListForReadback()
{
ExecuteCommandList(true);
}
void GSDevice12::InvalidateCachedState()
{
m_dirty_flags |= DIRTY_BASE_STATE | DIRTY_TFX_STATE | DIRTY_UTILITY_STATE | DIRTY_CONSTANT_BUFFER_STATE;

View File

@ -146,11 +146,6 @@ private:
D3D12::StreamBuffer m_vertex_constant_buffer;
D3D12::StreamBuffer m_pixel_constant_buffer;
ComPtr<D3D12MA::Allocation> m_readback_staging_allocation;
ComPtr<ID3D12Resource> m_readback_staging_buffer;
void* m_readback_staging_buffer_map = nullptr;
u32 m_readback_staging_buffer_size = 0;
D3D12::DescriptorHandle m_point_sampler_cpu;
D3D12::DescriptorHandle m_linear_sampler_cpu;
@ -220,11 +215,6 @@ private:
bool CompilePostProcessingPipelines();
bool CompileCASPipelines();
bool CheckStagingBufferSize(u32 required_size);
bool MapStagingBuffer(u32 size_to_read);
void UnmapStagingBuffer();
void DestroyStagingBuffer();
void DestroyResources();
public:
@ -253,8 +243,7 @@ public:
void ClearDepth(GSTexture* t) override;
void ClearStencil(GSTexture* t, u8 c) override;
bool DownloadTexture(GSTexture* src, const GSVector4i& rect, GSTexture::GSMap& out_map) override;
void DownloadTextureComplete() override;
std::unique_ptr<GSDownloadTexture> CreateDownloadTexture(u32 width, u32 height, GSTexture::Format format) override;
void CopyRect(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r, u32 destX, u32 destY) override;
@ -296,6 +285,7 @@ public:
void ExecuteCommandList(bool wait_for_completion);
void ExecuteCommandList(bool wait_for_completion, const char* reason, ...);
void ExecuteCommandListAndRestartRenderPass(bool wait_for_completion, const char* reason);
void ExecuteCommandListForReadback();
/// Set dirty flags on everything to force re-bind at next draw time.
void InvalidateCachedState();

View File

@ -410,3 +410,152 @@ void GSTexture12::CommitClear(ID3D12GraphicsCommandList* cmdlist)
SetState(GSTexture::State::Dirty);
}
GSDownloadTexture12::GSDownloadTexture12(u32 width, u32 height, GSTexture::Format format)
: GSDownloadTexture(width, height, format)
{
}
GSDownloadTexture12::~GSDownloadTexture12()
{
if (IsMapped())
GSDownloadTexture12::Unmap();
if (m_buffer)
g_d3d12_context->DeferResourceDestruction(m_allocation.get(), m_buffer.get());
}
std::unique_ptr<GSDownloadTexture12> GSDownloadTexture12::Create(u32 width, u32 height, GSTexture::Format format)
{
const u32 buffer_size = GetBufferSize(width, height, format, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
D3D12MA::ALLOCATION_DESC allocation_desc = {};
allocation_desc.HeapType = D3D12_HEAP_TYPE_READBACK;
const D3D12_RESOURCE_DESC resource_desc = {D3D12_RESOURCE_DIMENSION_BUFFER, 0, buffer_size, 1, 1, 1, DXGI_FORMAT_UNKNOWN, {1, 0},
D3D12_TEXTURE_LAYOUT_ROW_MAJOR, D3D12_RESOURCE_FLAG_NONE};
wil::com_ptr_nothrow<D3D12MA::Allocation> allocation;
wil::com_ptr_nothrow<ID3D12Resource> buffer;
HRESULT hr = g_d3d12_context->GetAllocator()->CreateResource(
&allocation_desc, &resource_desc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, allocation.put(), IID_PPV_ARGS(buffer.put()));
if (FAILED(hr))
{
Console.Error("(GSDownloadTexture12::Create) CreateResource() failed with HRESULT %08X", hr);
return {};
}
std::unique_ptr<GSDownloadTexture12> tex(new GSDownloadTexture12(width, height, format));
tex->m_allocation = std::move(allocation);
tex->m_buffer = std::move(buffer);
tex->m_buffer_size = buffer_size;
return tex;
}
void GSDownloadTexture12::CopyFromTexture(
const GSVector4i& drc, GSTexture* stex, const GSVector4i& src, u32 src_level, bool use_transfer_pitch)
{
GSTexture12* const tex12 = static_cast<GSTexture12*>(stex);
pxAssert(tex12->GetFormat() == m_format);
pxAssert(drc.width() == src.width() && drc.height() == src.height());
pxAssert(src.z <= tex12->GetWidth() && src.w <= tex12->GetHeight());
pxAssert(static_cast<u32>(drc.z) <= m_width && static_cast<u32>(drc.w) <= m_height);
pxAssert(src_level < static_cast<u32>(tex12->GetMipmapLevels()));
pxAssert((drc.left == 0 && drc.top == 0) || !use_transfer_pitch);
u32 copy_offset, copy_size, copy_rows;
m_current_pitch = GetTransferPitch(use_transfer_pitch ? static_cast<u32>(drc.width()) : m_width, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
GetTransferSize(drc, &copy_offset, &copy_size, &copy_rows);
g_perfmon.Put(GSPerfMon::Readbacks, 1);
GSDevice12::GetInstance()->EndRenderPass();
tex12->CommitClear();
if (IsMapped())
Unmap();
ID3D12GraphicsCommandList* cmdlist = g_d3d12_context->GetCommandList();
GL_INS("ReadbackTexture: {%d,%d} %ux%u", src.left, src.top, src.width(), src.height());
D3D12_TEXTURE_COPY_LOCATION srcloc;
srcloc.pResource = tex12->GetResource();
srcloc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
srcloc.SubresourceIndex = src_level;
D3D12_TEXTURE_COPY_LOCATION dstloc;
dstloc.pResource = m_buffer.get();
dstloc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
dstloc.PlacedFootprint.Offset = copy_offset;
dstloc.PlacedFootprint.Footprint.Format = tex12->GetNativeFormat();
dstloc.PlacedFootprint.Footprint.Width = drc.width();
dstloc.PlacedFootprint.Footprint.Height = drc.height();
dstloc.PlacedFootprint.Footprint.Depth = 1;
dstloc.PlacedFootprint.Footprint.RowPitch = m_current_pitch;
const D3D12_RESOURCE_STATES old_layout = tex12->GetResourceState();
if (old_layout != D3D12_RESOURCE_STATE_COPY_SOURCE)
tex12->GetTexture().TransitionSubresourceToState(cmdlist, src_level, old_layout, D3D12_RESOURCE_STATE_COPY_SOURCE);
// TODO: Rules for depth buffers here?
const D3D12_BOX srcbox{
static_cast<UINT>(src.left), static_cast<UINT>(src.top), 0u, static_cast<UINT>(src.right), static_cast<UINT>(src.bottom), 1u};
cmdlist->CopyTextureRegion(&dstloc, 0, 0, 0, &srcloc, &srcbox);
if (old_layout != D3D12_RESOURCE_STATE_COPY_SOURCE)
tex12->GetTexture().TransitionSubresourceToState(cmdlist, src_level, D3D12_RESOURCE_STATE_COPY_SOURCE, old_layout);
m_copy_fence_value = g_d3d12_context->GetCurrentFenceValue();
m_needs_flush = true;
}
bool GSDownloadTexture12::Map(const GSVector4i& read_rc)
{
if (IsMapped())
return true;
// Never populated?
if (!m_current_pitch)
return false;
u32 copy_offset, copy_size, copy_rows;
GetTransferSize(read_rc, &copy_offset, &copy_size, &copy_rows);
const D3D12_RANGE read_range{copy_offset, copy_offset + copy_size};
const HRESULT hr = m_buffer->Map(0, &read_range, reinterpret_cast<void**>(const_cast<u8**>(&m_map_pointer)));
if (FAILED(hr))
{
Console.Error("(GSDownloadTexture12::Map) Map() failed with HRESULT %08X", hr);
return false;
}
return true;
}
void GSDownloadTexture12::Unmap()
{
if (!IsMapped())
return;
const D3D12_RANGE write_range = {};
m_buffer->Unmap(0, &write_range);
m_map_pointer = nullptr;
}
void GSDownloadTexture12::Flush()
{
if (!m_needs_flush)
return;
m_needs_flush = false;
if (g_d3d12_context->GetCompletedFenceValue() >= m_copy_fence_value)
return;
// Need to execute command buffer.
if (g_d3d12_context->GetCurrentFenceValue() == m_copy_fence_value)
GSDevice12::GetInstance()->ExecuteCommandListForReadback();
else
g_d3d12_context->WaitForFence(m_copy_fence_value, GSConfig.HWSpinGPUForReadbacks);
}

View File

@ -90,3 +90,27 @@ private:
GSVector4i m_map_area = GSVector4i::zero();
u32 m_map_level = UINT32_MAX;
};
class GSDownloadTexture12 final : public GSDownloadTexture
{
public:
~GSDownloadTexture12() override;
static std::unique_ptr<GSDownloadTexture12> Create(u32 width, u32 height, GSTexture::Format format);
void CopyFromTexture(const GSVector4i& drc, GSTexture* stex, const GSVector4i& src, u32 src_level, bool use_transfer_pitch) override;
bool Map(const GSVector4i& read_rc) override;
void Unmap() override;
void Flush() override;
private:
GSDownloadTexture12(u32 width, u32 height, GSTexture::Format format);
wil::com_ptr_nothrow<D3D12MA::Allocation> m_allocation;
wil::com_ptr_nothrow<ID3D12Resource> m_buffer;
u64 m_copy_fence_value = 0;
u32 m_buffer_size = 0;
};

View File

@ -767,6 +767,26 @@ void GSTextureCache::ScaleTargetForDisplay(Target* t, const GIFRegTEX0& dispfb,
GetTargetHeight(t->m_TEX0.TBP0, t->m_TEX0.TBW, t->m_TEX0.PSM, static_cast<u32>(needed_height));
}
bool GSTextureCache::PrepareDownloadTexture(u32 width, u32 height, GSTexture::Format format, std::unique_ptr<GSDownloadTexture>* tex)
{
GSDownloadTexture* ctex = tex->get();
if (ctex && ctex->GetWidth() >= width && ctex->GetHeight() >= height)
return true;
// In the case of oddly sized texture reads, we'll keep the larger dimension.
const u32 new_width = ctex ? std::max(ctex->GetWidth(), width) : width;
const u32 new_height = ctex ? std::max(ctex->GetHeight(), height) : height;
tex->reset();
*tex = g_gs_device->CreateDownloadTexture(new_width, new_height, format);
if (!tex)
{
Console.WriteLn("Failed to create %ux%u download texture", new_width, new_height);
return false;
}
return true;
}
// Expands targets where the write from the EE overlaps the edge of a render target and uses the same base pointer.
void GSTextureCache::ExpandTarget(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r)
{
@ -2333,30 +2353,35 @@ void GSTextureCache::Read(Target* t, const GSVector4i& r)
GSTexture::Format fmt;
ShaderConvert ps_shader;
std::unique_ptr<GSDownloadTexture>* dltex;
switch (TEX0.PSM)
{
case PSM_PSMCT32:
case PSM_PSMCT24:
fmt = GSTexture::Format::Color;
ps_shader = ShaderConvert::COPY;
dltex = &m_color_download_texture;
break;
case PSM_PSMCT16:
case PSM_PSMCT16S:
fmt = GSTexture::Format::UInt16;
ps_shader = ShaderConvert::RGBA8_TO_16_BITS;
dltex = &m_uint16_download_texture;
break;
case PSM_PSMZ32:
case PSM_PSMZ24:
fmt = GSTexture::Format::UInt32;
ps_shader = ShaderConvert::FLOAT32_TO_32_BITS;
dltex = &m_uint32_download_texture;
break;
case PSM_PSMZ16:
case PSM_PSMZ16S:
fmt = GSTexture::Format::UInt16;
ps_shader = ShaderConvert::FLOAT32_TO_16_BITS;
dltex = &m_uint16_download_texture;
break;
default:
@ -2366,58 +2391,85 @@ void GSTextureCache::Read(Target* t, const GSVector4i& r)
// Yes lots of logging, but I'm not confident with this code
GL_PUSH("Texture Cache Read. Format(0x%x)", TEX0.PSM);
GL_PERF("TC: Read Back Target: %d (0x%x)[fmt: 0x%x]. Size %dx%d",
t->m_texture->GetID(), TEX0.TBP0, TEX0.PSM, r.width(), r.height());
GL_PERF("TC: Read Back Target: %d (0x%x)[fmt: 0x%x]. Size %dx%d", t->m_texture->GetID(), TEX0.TBP0, TEX0.PSM, r.width(), r.height());
const GSVector4 src = GSVector4(r) * GSVector4(t->m_texture->GetScale()).xyxy() / GSVector4(t->m_texture->GetSize()).xyxy();
const GSVector4 src(GSVector4(r) * GSVector4(t->m_texture->GetScale()).xyxy() / GSVector4(t->m_texture->GetSize()).xyxy());
const GSVector4i drc(0, 0, r.width(), r.height());
const bool direct_read = (t->m_texture->GetScale() == GSVector2(1, 1) && ps_shader == ShaderConvert::COPY);
bool res;
GSTexture::GSMap m;
if (!PrepareDownloadTexture(drc.z, drc.w, fmt, dltex))
return;
if (t->m_texture->GetScale() == GSVector2(1, 1) && ps_shader == ShaderConvert::COPY)
res = g_gs_device->DownloadTexture(t->m_texture, r, m);
else
res = g_gs_device->DownloadTextureConvert(t->m_texture, src, GSVector2i(r.width(), r.height()), fmt, ps_shader, m, false);
if (res)
if (direct_read)
{
dltex->get()->CopyFromTexture(drc, t->m_texture, r, 0, true);
}
else
{
GSTexture* tmp = g_gs_device->CreateRenderTarget(drc.z, drc.w, fmt, false);
if (tmp)
{
g_gs_device->StretchRect(t->m_texture, src, tmp, GSVector4(drc), ps_shader, false);
dltex->get()->CopyFromTexture(drc, tmp, drc, 0, true);
g_gs_device->Recycle(tmp);
}
else
{
Console.Error("Failed to allocate temporary %dx%d target for read.", drc.z, drc.w);
return;
}
}
dltex->get()->Flush();
if (!dltex->get()->Map(drc))
return;
// Why does WritePixelNN() not take a const pointer?
const GSOffset off = g_gs_renderer->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
u8* bits = const_cast<u8*>(dltex->get()->GetMapPointer());
const u32 pitch = dltex->get()->GetMapPitch();
switch (TEX0.PSM)
{
case PSM_PSMCT32:
case PSM_PSMZ32:
g_gs_renderer->m_mem.WritePixel32(m.bits, m.pitch, off, r);
g_gs_renderer->m_mem.WritePixel32(bits, pitch, off, r);
break;
case PSM_PSMCT24:
case PSM_PSMZ24:
g_gs_renderer->m_mem.WritePixel24(m.bits, m.pitch, off, r);
g_gs_renderer->m_mem.WritePixel24(bits, pitch, off, r);
break;
case PSM_PSMCT16:
case PSM_PSMCT16S:
case PSM_PSMZ16:
case PSM_PSMZ16S:
g_gs_renderer->m_mem.WritePixel16(m.bits, m.pitch, off, r);
g_gs_renderer->m_mem.WritePixel16(bits, pitch, off, r);
break;
default:
ASSERT(0);
Console.Error("Unknown PSM %u on Read", TEX0.PSM);
break;
}
g_gs_device->DownloadTextureComplete();
}
dltex->get()->Unmap();
}
void GSTextureCache::Read(Source* t, const GSVector4i& r)
{
const GIFRegTEX0& TEX0 = t->m_TEX0;
const GSVector4i drc(0, 0, r.width(), r.height());
GSTexture::GSMap m;
if (g_gs_device->DownloadTexture(t->m_texture, r, m))
if (!PrepareDownloadTexture(drc.z, drc.w, GSTexture::Format::Color, &m_color_download_texture))
return;
m_color_download_texture->CopyFromTexture(drc, t->m_texture, r, 0, true);
m_color_download_texture->Flush();
if (m_color_download_texture->Map(drc))
{
GSOffset off = g_gs_renderer->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
g_gs_renderer->m_mem.WritePixel32(m.bits, m.pitch, off, r);
g_gs_device->DownloadTextureComplete();
GSOffset off = g_gs_renderer->m_mem.GetOffset(t->m_TEX0.TBP0, t->m_TEX0.TBW, t->m_TEX0.PSM);
g_gs_renderer->m_mem.WritePixel32(
const_cast<u8*>(m_color_download_texture->GetMapPointer()), m_color_download_texture->GetMapPitch(), off, r);
m_color_download_texture->Unmap();
}
}

View File

@ -307,6 +307,9 @@ protected:
constexpr static size_t S_SURFACE_OFFSET_CACHE_MAX_SIZE = std::numeric_limits<u16>::max();
std::unordered_map<SurfaceOffsetKey, SurfaceOffset, SurfaceOffsetKeyHash, SurfaceOffsetKeyEqual> m_surface_offset_cache;
Source* m_temporary_source = nullptr; // invalidated after the draw
std::unique_ptr<GSDownloadTexture> m_color_download_texture;
std::unique_ptr<GSDownloadTexture> m_uint16_download_texture;
std::unique_ptr<GSDownloadTexture> m_uint32_download_texture;
Source* CreateSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Target* t, bool half_right, int x_offset, int y_offset, const GSVector2i* lod, const GSVector4i* src_range, GSTexture* gpu_clut);
Target* CreateTarget(const GIFRegTEX0& TEX0, int w, int h, int type, const bool clear);
@ -315,6 +318,9 @@ protected:
/// plus the height is larger than the current size of the target.
void ScaleTargetForDisplay(Target* t, const GIFRegTEX0& dispfb, int real_w, int real_h);
/// Resizes the download texture if needed.
bool PrepareDownloadTexture(u32 width, u32 height, GSTexture::Format format, std::unique_ptr<GSDownloadTexture>* tex);
HashCacheEntry* LookupHashCache(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, bool& paltex, const u32* clut, const GSVector2i* lod);
static void PreloadTexture(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, GSLocalMemory& mem, bool paltex, GSTexture* tex, u32 level);

View File

@ -332,10 +332,16 @@ public:
id<MTLBlitCommandEncoder> GetVertexUploadEncoder();
/// Get the render command buffer, creating a new one if it doesn't exist
id<MTLCommandBuffer> GetRenderCmdBuf();
/// Get the render command buffer, will not create a new one if it doesn't exist.
id<MTLCommandBuffer> GetRenderCmdBufWithoutCreate();
/// Get the spin fence if spinning is enabled.
id<MTLFence> GetSpinFence();
/// Called by command buffers when they finish
void DrawCommandBufferFinished(u64 draw, id<MTLCommandBuffer> buffer);
/// Flush pending operations from all encoders to the GPU
void FlushEncoders();
/// Flush pending operations and spins the GPU for a download.
void FlushEncodersForReadback();
/// End current render pass without flushing
void EndRenderPass();
/// Begin a new render pass (may reuse existing)
@ -362,7 +368,7 @@ public:
void ClearDepth(GSTexture* t) override;
void ClearStencil(GSTexture* t, u8 c) override;
bool DownloadTexture(GSTexture* src, const GSVector4i& rect, GSTexture::GSMap& out_map) override;
std::unique_ptr<GSDownloadTexture> CreateDownloadTexture(u32 width, u32 height, GSTexture::Format format) override;
void CopyRect(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r, u32 destX, u32 destY) override;
void DoStretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, id<MTLRenderPipelineState> pipeline, bool linear, LoadAction load_action, void* frag_uniform, size_t frag_uniform_len);
@ -412,4 +418,19 @@ public:
u32 FrameNo() const { return m_frame; }
};
static constexpr bool IsCommandBufferCompleted(MTLCommandBufferStatus status)
{
switch (status)
{
case MTLCommandBufferStatusNotEnqueued:
case MTLCommandBufferStatusEnqueued:
case MTLCommandBufferStatusCommitted:
case MTLCommandBufferStatusScheduled:
return false;
case MTLCommandBufferStatusCompleted:
case MTLCommandBufferStatusError:
return true;
}
}
#endif // __APPLE__

View File

@ -31,21 +31,6 @@ static constexpr simd::float2 ToSimd(const GSVector2& vec)
return simd::make_float2(vec.x, vec.y);
}
static constexpr bool IsCommandBufferCompleted(MTLCommandBufferStatus status)
{
switch (status)
{
case MTLCommandBufferStatusNotEnqueued:
case MTLCommandBufferStatusEnqueued:
case MTLCommandBufferStatusCommitted:
case MTLCommandBufferStatusScheduled:
return false;
case MTLCommandBufferStatusCompleted:
case MTLCommandBufferStatusError:
return true;
}
}
GSDevice* MakeGSDeviceMTL()
{
return new GSDeviceMTL();
@ -241,6 +226,16 @@ id<MTLCommandBuffer> GSDeviceMTL::GetRenderCmdBuf()
return m_current_render_cmdbuf;
}
id<MTLCommandBuffer> GSDeviceMTL::GetRenderCmdBufWithoutCreate()
{
return m_current_render_cmdbuf;
}
id<MTLFence> GSDeviceMTL::GetSpinFence()
{
return m_spin_timer ? m_spin_fence : nil;
}
void GSDeviceMTL::DrawCommandBufferFinished(u64 draw, id<MTLCommandBuffer> buffer)
{
// We can do the update non-atomically because we only ever update under the lock
@ -352,6 +347,19 @@ void GSDeviceMTL::FlushEncoders()
}
}
void GSDeviceMTL::FlushEncodersForReadback()
{
FlushEncoders();
if (@available(macOS 10.15, iOS 10.3, *))
{
if (GSConfig.HWSpinGPUForReadbacks)
{
m_spin_manager.ReadbackRequested();
m_spin_timer = 30;
}
}
}
void GSDeviceMTL::EndRenderPass()
{
if (m_current_render.encoder)
@ -498,9 +506,6 @@ GSTexture* GSDeviceMTL::CreateSurface(GSTexture::Type type, int width, int heigh
case GSTexture::Type::Texture:
[desc setUsage:MTLTextureUsageShaderRead];
break;
case GSTexture::Type::Offscreen:
[desc setUsage:MTLTextureUsageRenderTarget];
break;
case GSTexture::Type::RenderTarget:
if (m_dev.features.slow_color_compression)
[desc setUsage:MTLTextureUsageShaderRead | MTLTextureUsageRenderTarget | MTLTextureUsagePixelFormatView]; // Force color compression off by including PixelFormatView
@ -1081,57 +1086,10 @@ void GSDeviceMTL::ClearStencil(GSTexture* t, uint8 c)
static_cast<GSTextureMTL*>(t)->RequestStencilClear(c);
}
bool GSDeviceMTL::DownloadTexture(GSTexture* src, const GSVector4i& rect, GSTexture::GSMap& out_map)
{ @autoreleasepool {
ASSERT(src);
EndRenderPass();
GSTextureMTL* msrc = static_cast<GSTextureMTL*>(src);
out_map.pitch = msrc->GetCompressedBytesPerBlock() * rect.width();
size_t size = out_map.pitch * rect.height();
if ([m_texture_download_buf length] < size)
m_texture_download_buf = MRCTransfer([m_dev.dev newBufferWithLength:size options:MTLResourceStorageModeShared]);
pxAssertRel(m_texture_download_buf, "Failed to allocate download buffer (out of memory?)");
MRCOwned<id<MTLCommandBuffer>> cmdbuf = MRCRetain(GetRenderCmdBuf());
[cmdbuf pushDebugGroup:@"DownloadTexture"];
id<MTLBlitCommandEncoder> encoder = [cmdbuf blitCommandEncoder];
[encoder copyFromTexture:msrc->GetTexture()
sourceSlice:0
sourceLevel:0
sourceOrigin:MTLOriginMake(rect.x, rect.y, 0)
sourceSize:MTLSizeMake(rect.width(), rect.height(), 1)
toBuffer:m_texture_download_buf
destinationOffset:0
destinationBytesPerRow:out_map.pitch
destinationBytesPerImage:size];
if (m_spin_timer)
[encoder updateFence:m_spin_fence];
[encoder endEncoding];
[cmdbuf popDebugGroup];
FlushEncoders();
if (@available(macOS 10.15, iOS 10.3, *))
{
if (GSConfig.HWSpinGPUForReadbacks)
{
m_spin_manager.ReadbackRequested();
m_spin_timer = 30;
}
}
if (GSConfig.HWSpinCPUForReadbacks)
{
while (!IsCommandBufferCompleted([cmdbuf status]))
ShortSpin();
}
else
{
[cmdbuf waitUntilCompleted];
}
out_map.bits = static_cast<u8*>([m_texture_download_buf contents]);
g_perfmon.Put(GSPerfMon::Readbacks, 1);
return true;
}}
std::unique_ptr<GSDownloadTexture> GSDeviceMTL::CreateDownloadTexture(u32 width, u32 height, GSTexture::Format format)
{
return GSDownloadTextureMTL::Create(this, width, height, format);
}
void GSDeviceMTL::CopyRect(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r, u32 destX, u32 destY)
{ @autoreleasepool {

View File

@ -78,4 +78,29 @@ public:
id<MTLTexture> GetTexture() { return m_texture; }
};
class GSDownloadTextureMTL final : public GSDownloadTexture
{
public:
~GSDownloadTextureMTL() override;
static std::unique_ptr<GSDownloadTextureMTL> Create(GSDeviceMTL* dev, u32 width, u32 height, GSTexture::Format format);
void CopyFromTexture(const GSVector4i& drc, GSTexture* stex, const GSVector4i& src, u32 src_level, bool use_transfer_pitch) override;
bool Map(const GSVector4i& read_rc) override;
void Unmap() override;
void Flush() override;
private:
// TODO: Is there an optimal transfer pitch alignment for Metal?
static constexpr u32 PITCH_ALIGNMENT = 32;
GSDownloadTextureMTL(GSDeviceMTL* dev, MRCOwned<id<MTLBuffer>> buffer, u32 width, u32 height, GSTexture::Format format);
GSDeviceMTL* m_dev;
MRCOwned<id<MTLBuffer>> m_buffer;
MRCOwned<id<MTLCommandBuffer>> m_copy_cmdbuffer = nil;
};
#endif

View File

@ -14,9 +14,10 @@
*/
#include "PrecompiledHeader.h"
#include "GSTextureMTL.h"
#include "GSDeviceMTL.h"
#include "GS/Renderers/Metal/GSTextureMTL.h"
#include "GS/Renderers/Metal/GSDeviceMTL.h"
#include "GS/GSPerfMon.h"
#include "common/Console.h"
#ifdef __APPLE__
@ -207,4 +208,119 @@ void GSTextureMTL::Swap(GSTexture* other)
#undef SWAP
}
GSDownloadTextureMTL::GSDownloadTextureMTL(GSDeviceMTL* dev, MRCOwned<id<MTLBuffer>> buffer,
u32 width, u32 height, GSTexture::Format format)
: GSDownloadTexture(width, height, format)
, m_dev(dev)
, m_buffer(std::move(buffer))
{
m_map_pointer = static_cast<const u8*>([m_buffer contents]);
}
GSDownloadTextureMTL::~GSDownloadTextureMTL() = default;
std::unique_ptr<GSDownloadTextureMTL> GSDownloadTextureMTL::Create(GSDeviceMTL* dev, u32 width, u32 height, GSTexture::Format format)
{
const u32 buffer_size = GetBufferSize(width, height, format, PITCH_ALIGNMENT);
MRCOwned<id<MTLBuffer>> buffer = MRCTransfer([dev->m_dev.dev newBufferWithLength:buffer_size options:MTLResourceStorageModeShared]);
if (!buffer)
{
Console.Error("Failed to allocate %u byte download texture buffer (out of memory?)", buffer_size);
return {};
}
return std::unique_ptr<GSDownloadTextureMTL>(new GSDownloadTextureMTL(dev, buffer, width, height, format));
}
void GSDownloadTextureMTL::CopyFromTexture(
const GSVector4i& drc, GSTexture* stex, const GSVector4i& src, u32 src_level, bool use_transfer_pitch)
{ @autoreleasepool {
GSTextureMTL* const mtlTex = static_cast<GSTextureMTL*>(stex);
pxAssert(mtlTex->GetFormat() == m_format);
pxAssert(drc.width() == src.width() && drc.height() == src.height());
pxAssert(src.z <= mtlTex->GetWidth() && src.w <= mtlTex->GetHeight());
pxAssert(static_cast<u32>(drc.z) <= m_width && static_cast<u32>(drc.w) <= m_height);
pxAssert(src_level < static_cast<u32>(mtlTex->GetMipmapLevels()));
pxAssert((drc.left == 0 && drc.top == 0) || !use_transfer_pitch);
u32 copy_offset, copy_size, copy_rows;
m_current_pitch =
GetTransferPitch(use_transfer_pitch ? static_cast<u32>(drc.width()) : m_width, PITCH_ALIGNMENT);
GetTransferSize(drc, &copy_offset, &copy_size, &copy_rows);
m_dev->EndRenderPass();
mtlTex->FlushClears();
g_perfmon.Put(GSPerfMon::Readbacks, 1);
m_copy_cmdbuffer = MRCRetain(m_dev->GetRenderCmdBuf());
[m_copy_cmdbuffer pushDebugGroup:@"GSDownloadTextureMTL::CopyFromTexture"];
id<MTLBlitCommandEncoder> encoder = [m_copy_cmdbuffer blitCommandEncoder];
[encoder copyFromTexture:mtlTex->GetTexture()
sourceSlice:0
sourceLevel:src_level
sourceOrigin:MTLOriginMake(src.x, src.y, 0)
sourceSize:MTLSizeMake(src.width(), src.height(), 1)
toBuffer:m_buffer
destinationOffset:copy_offset
destinationBytesPerRow:m_current_pitch
destinationBytesPerImage:m_current_pitch * copy_rows];
if (id<MTLFence> fence = m_dev->GetSpinFence())
[encoder updateFence:fence];
[encoder endEncoding];
[m_copy_cmdbuffer popDebugGroup];
m_needs_flush = true;
}}
bool GSDownloadTextureMTL::Map(const GSVector4i& read_rc)
{
// Always mapped.
return true;
}
void GSDownloadTextureMTL::Unmap()
{
// Always mapped.
}
void GSDownloadTextureMTL::Flush()
{
if (!m_needs_flush)
return;
m_needs_flush = false;
// If it's the same buffer currently being encoded, we need to kick it (and spin).
if (m_copy_cmdbuffer == m_dev->GetRenderCmdBufWithoutCreate())
m_dev->FlushEncodersForReadback();
if (IsCommandBufferCompleted([m_copy_cmdbuffer status]))
{
// Asynchronous readback which already completed.
m_copy_cmdbuffer = nil;
return;
}
// Asynchrous readback, but the GPU isn't done yet.
if (GSConfig.HWSpinCPUForReadbacks)
{
do
{
ShortSpin();
}
while (!IsCommandBufferCompleted([m_copy_cmdbuffer status]));
}
else
{
[m_copy_cmdbuffer waitUntilCompleted];
}
m_copy_cmdbuffer = nil;
}
#endif

View File

@ -79,7 +79,7 @@ GSDeviceOGL::~GSDeviceOGL()
GSTexture* GSDeviceOGL::CreateSurface(GSTexture::Type type, int width, int height, int levels, GSTexture::Format format)
{
GL_PUSH("Create surface");
return new GSTextureOGL(type, width, height, levels, format, m_fbo_read);
return new GSTextureOGL(type, width, height, levels, format);
}
bool GSDeviceOGL::Create()
@ -790,6 +790,11 @@ void GSDeviceOGL::ClearStencil(GSTexture* t, u8 c)
glClearBufferiv(GL_STENCIL, 0, &color);
}
std::unique_ptr<GSDownloadTexture> GSDeviceOGL::CreateDownloadTexture(u32 width, u32 height, GSTexture::Format format)
{
return GSDownloadTextureOGL::Create(width, height, format);
}
GLuint GSDeviceOGL::CreateSampler(PSSamplerSelector sel)
{
GL_PUSH("Create Sampler");
@ -1082,17 +1087,6 @@ std::string GSDeviceOGL::GetPSSource(const PSSelector& sel)
return src;
}
bool GSDeviceOGL::DownloadTexture(GSTexture* src, const GSVector4i& rect, GSTexture::GSMap& out_map)
{
ASSERT(src);
g_perfmon.Put(GSPerfMon::Readbacks, 1);
GSTextureOGL* srcgl = static_cast<GSTextureOGL*>(src);
out_map = srcgl->Read(rect, m_download_buffer);
return true;
}
// Copy a sub part of texture (same as below but force a conversion)
void GSDeviceOGL::BlitRect(GSTexture* sTex, const GSVector4i& r, const GSVector2i& dsize, bool at_origin, bool linear)
{

View File

@ -274,8 +274,6 @@ private:
GSHWDrawConfig::VSConstantBuffer m_vs_cb_cache;
GSHWDrawConfig::PSConstantBuffer m_ps_cb_cache;
AlignedBuffer<u8, 32> m_download_buffer;
GSTexture* CreateSurface(GSTexture::Type type, int width, int height, int levels, GSTexture::Format format) final;
void DoMerge(GSTexture* sTex[3], GSVector4* sRect, GSTexture* dTex, GSVector4* dRect, const GSRegPMODE& PMODE, const GSRegEXTBUF& EXTBUF, const GSVector4& c) final;
@ -296,11 +294,16 @@ public:
GSDeviceOGL();
virtual ~GSDeviceOGL();
__fi static GSDeviceOGL* GetInstance() { return static_cast<GSDeviceOGL*>(g_gs_device.get()); }
// Used by OpenGL, so the same calling convention is required.
static void APIENTRY DebugOutputToFile(GLenum gl_source, GLenum gl_type, GLuint id, GLenum gl_severity, GLsizei gl_length, const GLchar* gl_message, const void* userParam);
static GL::StreamBuffer* GetTextureUploadBuffer();
__fi u32 GetFBORead() const { return m_fbo_read; }
__fi u32 GetFBOWrite() const { return m_fbo_write; }
bool Create() override;
void ResetAPIState() override;
@ -316,9 +319,9 @@ public:
void ClearDepth(GSTexture* t) final;
void ClearStencil(GSTexture* t, u8 c) final;
GSTexture* InitPrimDateTexture(GSTexture* rt, const GSVector4i& area, bool datm);
std::unique_ptr<GSDownloadTexture> CreateDownloadTexture(u32 width, u32 height, GSTexture::Format format) override;
bool DownloadTexture(GSTexture* src, const GSVector4i& rect, GSTexture::GSMap& out_map) final;
GSTexture* InitPrimDateTexture(GSTexture* rt, const GSVector4i& area, bool datm);
void CopyRect(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r, u32 destX, u32 destY) final;

View File

@ -21,18 +21,18 @@
#include "GS/GSPerfMon.h"
#include "GS/GSPng.h"
#include "GS/GSGL.h"
#include "common/AlignedMalloc.h"
#include "common/StringUtil.h"
static constexpr u32 TEXTURE_UPLOAD_ALIGNMENT = 256;
GSTextureOGL::GSTextureOGL(Type type, int width, int height, int levels, Format format, GLuint fbo_read)
GSTextureOGL::GSTextureOGL(Type type, int width, int height, int levels, Format format)
{
// OpenGL didn't like dimensions of size 0
m_size.x = std::max(1, width);
m_size.y = std::max(1, height);
m_format = format;
m_type = type;
m_fbo_read = fbo_read;
m_texture_id = 0;
m_mipmap_levels = 1;
int gl_fmt = 0;
@ -201,7 +201,7 @@ void GSTextureOGL::Clear(const void* data, const GSVector4i& area)
bool GSTextureOGL::Update(const GSVector4i& r, const void* data, int pitch, int layer)
{
ASSERT(m_type != Type::DepthStencil && m_type != Type::Offscreen);
ASSERT(m_type != Type::DepthStencil);
if (layer >= m_mipmap_levels)
return true;
@ -332,30 +332,6 @@ void GSTextureOGL::GenerateMipmap()
glGenerateTextureMipmap(m_texture_id);
}
GSTexture::GSMap GSTextureOGL::Read(const GSVector4i& r, AlignedBuffer<u8, 32>& buffer)
{
GSMap m;
m.pitch = r.width() << m_int_shift;
buffer.MakeRoomFor(m.pitch * r.height());
m.bits = buffer.GetPtr();
// The fastest way will be to use a PBO to read the data asynchronously. Unfortunately GS
// architecture is waiting the data right now.
// Bind the texture to the read framebuffer to avoid any disturbance
glBindFramebuffer(GL_READ_FRAMEBUFFER, m_fbo_read);
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_texture_id, 0);
// In case a target is 16 bits (GT4)
glPixelStorei(GL_PACK_ALIGNMENT, 1u << m_int_shift);
glReadPixels(r.x, r.y, r.width(), r.height(), m_int_format, m_int_type, m.bits);
glBindFramebuffer(GL_READ_FRAMEBUFFER, 0);
return m;
}
bool GSTextureOGL::Save(const std::string& fn)
{
// Collect the texture data
@ -429,3 +405,156 @@ void GSTextureOGL::Swap(GSTexture* tex)
std::swap(m_int_type, static_cast<GSTextureOGL*>(tex)->m_int_type);
std::swap(m_int_shift, static_cast<GSTextureOGL*>(tex)->m_int_shift);
}
GSDownloadTextureOGL::GSDownloadTextureOGL(u32 width, u32 height, GSTexture::Format format)
: GSDownloadTexture(width, height, format)
{
}
GSDownloadTextureOGL::~GSDownloadTextureOGL()
{
if (m_buffer_id != 0)
{
if (m_sync)
glDeleteSync(m_sync);
if (m_map_pointer)
{
glBindBuffer(GL_PIXEL_PACK_BUFFER, m_buffer_id);
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
}
glDeleteBuffers(1, &m_buffer_id);
}
else if (m_cpu_buffer)
{
_aligned_free(m_cpu_buffer);
}
}
std::unique_ptr<GSDownloadTextureOGL> GSDownloadTextureOGL::Create(u32 width, u32 height, GSTexture::Format format)
{
const u32 buffer_size = GetBufferSize(width, height, format, GSTexture::GetCompressedBytesPerBlock(format));
const bool use_buffer_storage = (GLAD_GL_VERSION_4_4 || GLAD_GL_ARB_buffer_storage || GLAD_GL_EXT_buffer_storage);
if (use_buffer_storage)
{
GLuint buffer_id;
glGenBuffers(1, &buffer_id);
glBindBuffer(GL_PIXEL_PACK_BUFFER, buffer_id);
const u32 flags = GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT;
const u32 map_flags = GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT;
if (GLAD_GL_VERSION_4_4 || GLAD_GL_ARB_buffer_storage)
glBufferStorage(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, flags);
else if (GLAD_GL_EXT_buffer_storage)
glBufferStorageEXT(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, flags);
u8* buffer_map = static_cast<u8*>(glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, buffer_size, map_flags));
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
if (!buffer_map)
{
Console.Error("Failed to map persistent download buffer");
glDeleteBuffers(1, &buffer_id);
return {};
}
std::unique_ptr<GSDownloadTextureOGL> ret(new GSDownloadTextureOGL(width, height, format));
ret->m_buffer_id = buffer_id;
ret->m_buffer_size = buffer_size;
ret->m_map_pointer = buffer_map;
return ret;
}
// Fallback to glReadPixels() + CPU buffer.
u8* cpu_buffer = static_cast<u8*>(_aligned_malloc(buffer_size, 32));
if (!cpu_buffer)
return {};
std::unique_ptr<GSDownloadTextureOGL> ret(new GSDownloadTextureOGL(width, height, format));
ret->m_cpu_buffer = cpu_buffer;
ret->m_map_pointer = cpu_buffer;
return ret;
}
void GSDownloadTextureOGL::CopyFromTexture(
const GSVector4i& drc, GSTexture* stex, const GSVector4i& src, u32 src_level, bool use_transfer_pitch)
{
GSTextureOGL* const glTex = static_cast<GSTextureOGL*>(stex);
pxAssert(glTex->GetFormat() == m_format);
pxAssert(drc.width() == src.width() && drc.height() == src.height());
pxAssert(src.z <= glTex->GetWidth() && src.w <= glTex->GetHeight());
pxAssert(static_cast<u32>(drc.z) <= m_width && static_cast<u32>(drc.w) <= m_height);
pxAssert(src_level < static_cast<u32>(glTex->GetMipmapLevels()));
pxAssert((drc.left == 0 && drc.top == 0) || !use_transfer_pitch);
u32 copy_offset, copy_size, copy_rows;
m_current_pitch =
GetTransferPitch(use_transfer_pitch ? static_cast<u32>(drc.width()) : m_width, GSTexture::GetCompressedBytesPerBlock(m_format));
GetTransferSize(drc, &copy_offset, &copy_size, &copy_rows);
g_perfmon.Put(GSPerfMon::Readbacks, 1);
glPixelStorei(GL_PACK_ALIGNMENT, 1u << glTex->GetIntShift());
glPixelStorei(GL_PACK_ROW_LENGTH, GSTexture::CalcUploadRowLengthFromPitch(m_format, m_current_pitch));
if (!m_cpu_buffer)
{
// Read to PBO.
glBindBuffer(GL_PIXEL_PACK_BUFFER, m_buffer_id);
}
glBindFramebuffer(GL_READ_FRAMEBUFFER, GSDeviceOGL::GetInstance()->GetFBORead());
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, glTex->GetID(), 0);
glReadPixels(drc.left, drc.top, drc.width(), drc.height(), glTex->GetIntFormat(), glTex->GetIntType(), m_cpu_buffer);
glBindFramebuffer(GL_READ_FRAMEBUFFER, 0);
if (m_cpu_buffer)
{
// If using CPU buffers, we never need to flush.
m_needs_flush = false;
}
else
{
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
// Create a sync object so we know when the GPU is done copying.
if (m_sync)
glDeleteSync(m_sync);
m_sync = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
m_needs_flush = true;
}
glPixelStorei(GL_PACK_ROW_LENGTH, 0);
}
bool GSDownloadTextureOGL::Map(const GSVector4i& read_rc)
{
// Either always mapped, or CPU buffer.
return true;
}
void GSDownloadTextureOGL::Unmap()
{
// Either always mapped, or CPU buffer.
}
void GSDownloadTextureOGL::Flush()
{
// If we're using CPU buffers, we did the readback synchronously...
if (!m_needs_flush || !m_sync)
return;
m_needs_flush = false;
glClientWaitSync(m_sync, GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED);
glDeleteSync(m_sync);
m_sync = {};
}

View File

@ -17,7 +17,6 @@
#include "GS/Renderers/Common/GSTexture.h"
#include "GS/Renderers/OpenGL/GLLoader.h"
#include "common/AlignedMalloc.h"
class GSTextureOGL final : public GSTexture
{
@ -41,8 +40,12 @@ private:
u32 m_int_shift = 0;
public:
explicit GSTextureOGL(Type type, int width, int height, int levels, Format format, GLuint fbo_read);
virtual ~GSTextureOGL();
explicit GSTextureOGL(Type type, int width, int height, int levels, Format format);
~GSTextureOGL() override;
__fi GLenum GetIntFormat() const { return m_int_format; }
__fi GLenum GetIntType() const { return m_int_type; }
__fi u32 GetIntShift() const { return m_int_shift; }
void* GetNativeHandle() const override;
@ -71,3 +74,29 @@ public:
void Clear(const void* data);
void Clear(const void* data, const GSVector4i& area);
};
class GSDownloadTextureOGL final : public GSDownloadTexture
{
public:
~GSDownloadTextureOGL() override;
static std::unique_ptr<GSDownloadTextureOGL> Create(u32 width, u32 height, GSTexture::Format format);
void CopyFromTexture(const GSVector4i& drc, GSTexture* stex, const GSVector4i& src, u32 src_level, bool use_transfer_pitch) override;
bool Map(const GSVector4i& read_rc) override;
void Unmap() override;
void Flush() override;
private:
GSDownloadTextureOGL(u32 width, u32 height, GSTexture::Format format);
GLuint m_buffer_id = 0;
u32 m_buffer_size = 0;
GLsync m_sync = {};
// used when buffer storage is not available
u8* m_cpu_buffer = nullptr;
};

View File

@ -402,8 +402,6 @@ VkFormat GSDeviceVK::LookupNativeFormat(GSTexture::Format format) const
GSTexture* GSDeviceVK::CreateSurface(GSTexture::Type type, int width, int height, int levels, GSTexture::Format format)
{
pxAssert(type != GSTexture::Type::Offscreen);
const u32 clamped_width = static_cast<u32>(std::clamp<int>(width, 1, g_vulkan_context->GetMaxImageDimension2D()));
const u32 clamped_height = static_cast<u32>(std::clamp<int>(height, 1, g_vulkan_context->GetMaxImageDimension2D()));
@ -419,88 +417,11 @@ GSTexture* GSDeviceVK::CreateSurface(GSTexture::Type type, int width, int height
return tex.release();
}
bool GSDeviceVK::DownloadTexture(GSTexture* src, const GSVector4i& rect, GSTexture::GSMap& out_map)
std::unique_ptr<GSDownloadTexture> GSDeviceVK::CreateDownloadTexture(u32 width, u32 height, GSTexture::Format format)
{
const u32 width = rect.width();
const u32 height = rect.height();
const u32 pitch = width * Vulkan::Util::GetTexelSize(static_cast<GSTextureVK*>(src)->GetNativeFormat());
const u32 size = pitch * height;
const u32 level = 0;
if (!CheckStagingBufferSize(size))
{
Console.Error("Can't read back %ux%u", width, height);
return false;
}
g_perfmon.Put(GSPerfMon::Readbacks, 1);
EndRenderPass();
{
const VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer();
GL_INS("ReadbackTexture: {%d,%d} %ux%u", rect.left, rect.top, width, height);
GSTextureVK* vkSrc = static_cast<GSTextureVK*>(src);
VkImageLayout old_layout = vkSrc->GetTexture().GetLayout();
if (old_layout != VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL)
vkSrc->GetTexture().TransitionSubresourcesToLayout(
cmdbuf, level, 1, 0, 1, old_layout, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
VkBufferImageCopy image_copy = {};
const VkImageAspectFlags aspect = Vulkan::Util::IsDepthFormat(static_cast<VkFormat>(vkSrc->GetFormat())) ?
VK_IMAGE_ASPECT_DEPTH_BIT :
VK_IMAGE_ASPECT_COLOR_BIT;
image_copy.bufferOffset = 0;
image_copy.bufferRowLength = width;
image_copy.bufferImageHeight = 0;
image_copy.imageSubresource = {aspect, level, 0u, 1u};
image_copy.imageOffset = {rect.left, rect.top, 0};
image_copy.imageExtent = {width, height, 1u};
// invalidate gpu cache
// TODO: Needed?
Vulkan::Util::BufferMemoryBarrier(cmdbuf, m_readback_staging_buffer, 0, VK_ACCESS_TRANSFER_WRITE_BIT, 0, size,
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT);
// do the copy
vkCmdCopyImageToBuffer(cmdbuf, vkSrc->GetTexture().GetImage(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
m_readback_staging_buffer, 1, &image_copy);
// flush gpu cache
Vulkan::Util::BufferMemoryBarrier(cmdbuf, m_readback_staging_buffer, VK_ACCESS_TRANSFER_WRITE_BIT,
VK_ACCESS_HOST_READ_BIT, 0, size, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_HOST_BIT);
if (old_layout != VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL)
{
vkSrc->GetTexture().TransitionSubresourcesToLayout(
cmdbuf, level, 1, 0, 1, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, old_layout);
}
}
ExecuteCommandBuffer(true);
if (GSConfig.HWSpinGPUForReadbacks)
{
g_vulkan_context->NotifyOfReadback();
if (!g_vulkan_context->GetOptionalExtensions().vk_ext_calibrated_timestamps && !m_warned_slow_spin)
{
m_warned_slow_spin = true;
Host::AddKeyedOSDMessage("GSDeviceVK_NoCalibratedTimestamps",
"Spin GPU During Readbacks is enabled, but calibrated timestamps are unavailable. This might be really slow.",
Host::OSD_WARNING_DURATION);
}
}
// invalidate cpu cache before reading
VkResult res = vmaInvalidateAllocation(g_vulkan_context->GetAllocator(), m_readback_staging_allocation, 0, size);
if (res != VK_SUCCESS)
LOG_VULKAN_ERROR(res, "vmaInvalidateAllocation() failed, readback may be incorrect: ");
out_map.bits = reinterpret_cast<u8*>(m_readback_staging_buffer_map);
out_map.pitch = pitch;
return true;
return GSDownloadTextureVK::Create(width, height, format);
}
void GSDeviceVK::DownloadTextureComplete() {}
void GSDeviceVK::CopyRect(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r, u32 destX, u32 destY)
{
g_perfmon.Put(GSPerfMon::TextureCopies, 1);
@ -1919,50 +1840,6 @@ bool GSDeviceVK::DoCAS(GSTexture* sTex, GSTexture* dTex, bool sharpen_only, cons
return true;
}
bool GSDeviceVK::CheckStagingBufferSize(u32 required_size)
{
if (m_readback_staging_buffer_size >= required_size)
return true;
DestroyStagingBuffer();
const VkBufferCreateInfo bci = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, nullptr, 0u, required_size,
VK_BUFFER_USAGE_TRANSFER_DST_BIT, VK_SHARING_MODE_EXCLUSIVE, 0u, nullptr};
VmaAllocationCreateInfo aci = {};
aci.usage = VMA_MEMORY_USAGE_GPU_TO_CPU;
aci.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
aci.preferredFlags = VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
VmaAllocationInfo ai = {};
VkResult res = vmaCreateBuffer(
g_vulkan_context->GetAllocator(), &bci, &aci, &m_readback_staging_buffer, &m_readback_staging_allocation, &ai);
if (res != VK_SUCCESS)
{
LOG_VULKAN_ERROR(res, "vmaCreateBuffer() failed: ");
return false;
}
m_readback_staging_buffer_map = ai.pMappedData;
m_readback_staging_buffer_size = required_size;
return true;
}
void GSDeviceVK::DestroyStagingBuffer()
{
// unmapped as part of the buffer destroy
m_readback_staging_buffer_map = nullptr;
m_readback_staging_buffer_size = 0;
if (m_readback_staging_buffer != VK_NULL_HANDLE)
{
vmaDestroyBuffer(g_vulkan_context->GetAllocator(), m_readback_staging_buffer, m_readback_staging_allocation);
m_readback_staging_buffer = VK_NULL_HANDLE;
m_readback_staging_allocation = VK_NULL_HANDLE;
m_readback_staging_buffer_size = 0;
}
}
void GSDeviceVK::DestroyResources()
{
g_vulkan_context->ExecuteCommandBuffer(Vulkan::Context::WaitType::Sleep);
@ -2025,8 +1902,6 @@ void GSDeviceVK::DestroyResources()
m_date_setup_render_pass = VK_NULL_HANDLE;
m_swap_chain_render_pass = VK_NULL_HANDLE;
DestroyStagingBuffer();
m_fragment_uniform_stream_buffer.Destroy(false);
m_vertex_uniform_stream_buffer.Destroy(false);
m_index_stream_buffer.Destroy(false);
@ -2396,6 +2271,22 @@ void GSDeviceVK::ExecuteCommandBufferAndRestartRenderPass(bool wait_for_completi
}
}
void GSDeviceVK::ExecuteCommandBufferForReadback()
{
ExecuteCommandBuffer(true);
if (GSConfig.HWSpinGPUForReadbacks)
{
g_vulkan_context->NotifyOfReadback();
if (!g_vulkan_context->GetOptionalExtensions().vk_ext_calibrated_timestamps && !m_warned_slow_spin)
{
m_warned_slow_spin = true;
Host::AddKeyedOSDMessage("GSDeviceVK_NoCalibratedTimestamps",
"Spin GPU During Readbacks is enabled, but calibrated timestamps are unavailable. This might be really slow.",
Host::OSD_WARNING_DURATION);
}
}
}
void GSDeviceVK::InvalidateCachedState()
{
m_dirty_flags |= DIRTY_FLAG_TFX_SAMPLERS_DS | DIRTY_FLAG_TFX_RT_TEXTURE_DS | DIRTY_FLAG_TFX_DYNAMIC_OFFSETS |

View File

@ -107,12 +107,6 @@ private:
Vulkan::StreamBuffer m_vertex_uniform_stream_buffer;
Vulkan::StreamBuffer m_fragment_uniform_stream_buffer;
VmaAllocation m_readback_staging_allocation = VK_NULL_HANDLE;
VkBuffer m_readback_staging_buffer = VK_NULL_HANDLE;
void* m_readback_staging_buffer_map = nullptr;
u32 m_readback_staging_buffer_size = 0;
bool m_warned_slow_spin = false;
VkSampler m_point_sampler = VK_NULL_HANDLE;
VkSampler m_linear_sampler = VK_NULL_HANDLE;
@ -192,9 +186,6 @@ private:
bool CompilePostProcessingPipelines();
bool CompileCASPipelines();
bool CheckStagingBufferSize(u32 required_size);
void DestroyStagingBuffer();
void DestroyResources();
public:
@ -231,8 +222,7 @@ public:
void ClearDepth(GSTexture* t) override;
void ClearStencil(GSTexture* t, u8 c) override;
bool DownloadTexture(GSTexture* src, const GSVector4i& rect, GSTexture::GSMap& out_map) override;
void DownloadTextureComplete() override;
std::unique_ptr<GSDownloadTexture> CreateDownloadTexture(u32 width, u32 height, GSTexture::Format format) override;
void CopyRect(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r, u32 destX, u32 destY) override;
@ -286,6 +276,7 @@ public:
void ExecuteCommandBuffer(bool wait_for_completion);
void ExecuteCommandBuffer(bool wait_for_completion, const char* reason, ...);
void ExecuteCommandBufferAndRestartRenderPass(bool wait_for_completion, const char* reason);
void ExecuteCommandBufferForReadback();
/// Set dirty flags on everything to force re-bind at next draw time.
void InvalidateCachedState();
@ -355,6 +346,7 @@ private:
// Which bindings/state has to be updated before the next draw.
u32 m_dirty_flags = 0;
bool m_current_framebuffer_has_feedback_loop = false;
bool m_warned_slow_spin = false;
// input assembly
VkBuffer m_vertex_buffer = VK_NULL_HANDLE;

View File

@ -463,3 +463,138 @@ VkFramebuffer GSTextureVK::GetLinkedFramebuffer(GSTextureVK* depth_texture, bool
depth_texture->m_framebuffers.emplace_back(this, fb, feedback_loop);
return fb;
}
GSDownloadTextureVK::GSDownloadTextureVK(u32 width, u32 height, GSTexture::Format format)
: GSDownloadTexture(width, height, format)
{
}
GSDownloadTextureVK::~GSDownloadTextureVK()
{
// Buffer was created mapped, no need to manually unmap.
if (m_buffer != VK_NULL_HANDLE)
g_vulkan_context->DeferBufferDestruction(m_buffer, m_allocation);
}
std::unique_ptr<GSDownloadTextureVK> GSDownloadTextureVK::Create(u32 width, u32 height, GSTexture::Format format)
{
const u32 buffer_size = GetBufferSize(width, height, format, g_vulkan_context->GetBufferCopyRowPitchAlignment());
const VkBufferCreateInfo bci = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, nullptr, 0u, buffer_size, VK_BUFFER_USAGE_TRANSFER_DST_BIT,
VK_SHARING_MODE_EXCLUSIVE, 0u, nullptr};
VmaAllocationCreateInfo aci = {};
aci.usage = VMA_MEMORY_USAGE_GPU_TO_CPU;
aci.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
aci.preferredFlags = VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
VmaAllocationInfo ai = {};
VmaAllocation allocation;
VkBuffer buffer;
VkResult res = vmaCreateBuffer(g_vulkan_context->GetAllocator(), &bci, &aci, &buffer, &allocation, &ai);
if (res != VK_SUCCESS)
{
LOG_VULKAN_ERROR(res, "vmaCreateBuffer() failed: ");
return {};
}
std::unique_ptr<GSDownloadTextureVK> tex = std::unique_ptr<GSDownloadTextureVK>(new GSDownloadTextureVK(width, height, format));
tex->m_allocation = allocation;
tex->m_buffer = buffer;
tex->m_buffer_size = buffer_size;
tex->m_map_pointer = static_cast<const u8*>(ai.pMappedData);
return tex;
}
void GSDownloadTextureVK::CopyFromTexture(
const GSVector4i& drc, GSTexture* stex, const GSVector4i& src, u32 src_level, bool use_transfer_pitch)
{
GSTextureVK* const vkTex = static_cast<GSTextureVK*>(stex);
pxAssert(vkTex->GetFormat() == m_format);
pxAssert(drc.width() == src.width() && drc.height() == src.height());
pxAssert(src.z <= vkTex->GetWidth() && src.w <= vkTex->GetHeight());
pxAssert(static_cast<u32>(drc.z) <= m_width && static_cast<u32>(drc.w) <= m_height);
pxAssert(src_level < static_cast<u32>(vkTex->GetMipmapLevels()));
pxAssert((drc.left == 0 && drc.top == 0) || !use_transfer_pitch);
u32 copy_offset, copy_size, copy_rows;
m_current_pitch =
GetTransferPitch(use_transfer_pitch ? static_cast<u32>(drc.width()) : m_width, g_vulkan_context->GetBufferCopyRowPitchAlignment());
GetTransferSize(drc, &copy_offset, &copy_size, &copy_rows);
g_perfmon.Put(GSPerfMon::Readbacks, 1);
GSDeviceVK::GetInstance()->EndRenderPass();
vkTex->CommitClear();
const VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer();
GL_INS("GSDownloadTextureVK::CopyFromTexture: {%d,%d} %ux%u", src.left, src.top, src.width(), src.height());
VkImageLayout old_layout = vkTex->GetTexture().GetLayout();
if (old_layout != VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL)
{
vkTex->GetTexture().TransitionSubresourcesToLayout(cmdbuf, src_level, 1, 0, 1, old_layout, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
}
VkBufferImageCopy image_copy = {};
const VkImageAspectFlags aspect =
Vulkan::Util::IsDepthFormat(static_cast<VkFormat>(vkTex->GetFormat())) ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_COLOR_BIT;
image_copy.bufferOffset = copy_offset;
image_copy.bufferRowLength = GSTexture::CalcUploadRowLengthFromPitch(m_format, m_current_pitch);
image_copy.bufferImageHeight = 0;
image_copy.imageSubresource = {aspect, src_level, 0u, 1u};
image_copy.imageOffset = {src.left, src.top, 0};
image_copy.imageExtent = {static_cast<u32>(src.width()), static_cast<u32>(src.height()), 1u};
// do the copy
vkCmdCopyImageToBuffer(cmdbuf, vkTex->GetTexture().GetImage(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_buffer, 1, &image_copy);
// flush gpu cache
Vulkan::Util::BufferMemoryBarrier(cmdbuf, m_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, 0, copy_size,
VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_HOST_BIT);
if (old_layout != VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL)
{
vkTex->GetTexture().TransitionSubresourcesToLayout(cmdbuf, src_level, 1, 0, 1, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, old_layout);
}
m_copy_fence_counter = g_vulkan_context->GetCurrentFenceCounter();
m_needs_cache_invalidate = true;
m_needs_flush = true;
}
bool GSDownloadTextureVK::Map(const GSVector4i& read_rc)
{
// Always mapped, but we might need to invalidate the cache.
if (m_needs_cache_invalidate)
{
u32 copy_offset, copy_size, copy_rows;
GetTransferSize(read_rc, &copy_offset, &copy_size, &copy_rows);
vmaInvalidateAllocation(g_vulkan_context->GetAllocator(), m_allocation, copy_offset, copy_size);
m_needs_cache_invalidate = false;
}
return true;
}
void GSDownloadTextureVK::Unmap()
{
// Always mapped.
}
void GSDownloadTextureVK::Flush()
{
if (!m_needs_flush)
return;
m_needs_flush = false;
if (g_vulkan_context->GetCompletedFenceCounter() >= m_copy_fence_counter)
return;
// Need to execute command buffer.
if (g_vulkan_context->GetCurrentFenceCounter() == m_copy_fence_counter)
GSDeviceVK::GetInstance()->ExecuteCommandBufferForReadback();
else
g_vulkan_context->WaitForFenceCounter(m_copy_fence_counter);
}

View File

@ -97,3 +97,29 @@ private:
// list of color textures this depth texture is linked to or vice versa
std::vector<std::tuple<GSTextureVK*, VkFramebuffer, bool>> m_framebuffers;
};
class GSDownloadTextureVK final : public GSDownloadTexture
{
public:
~GSDownloadTextureVK() override;
static std::unique_ptr<GSDownloadTextureVK> Create(u32 width, u32 height, GSTexture::Format format);
void CopyFromTexture(const GSVector4i& drc, GSTexture* stex, const GSVector4i& src, u32 src_level, bool use_transfer_pitch) override;
bool Map(const GSVector4i& read_rc) override;
void Unmap() override;
void Flush() override;
private:
GSDownloadTextureVK(u32 width, u32 height, GSTexture::Format format);
VmaAllocation m_allocation = VK_NULL_HANDLE;
VkBuffer m_buffer = VK_NULL_HANDLE;
u64 m_copy_fence_counter = 0;
u32 m_buffer_size = 0;
bool m_needs_cache_invalidate = false;
};