GS: Use 16-bit indices instead of 32-bit

Save some bandwidth.
This commit is contained in:
Stenzek 2023-04-11 00:11:21 +10:00 committed by refractionpcsx2
parent 72f70d4789
commit 839b482cb5
22 changed files with 173 additions and 191 deletions

View File

@ -2638,31 +2638,31 @@ void GSState::GrowVertexBuffer()
{
const u32 maxcount = std::max<u32>(m_vertex.maxcount * 3 / 2, 10000);
GSVertex* vertex = (GSVertex*)_aligned_malloc(sizeof(GSVertex) * maxcount, 32);
GSVertex* vertex = static_cast<GSVertex*>(_aligned_malloc(sizeof(GSVertex) * maxcount, 32));
// Worst case index list is a list of points with vs expansion, 6 indices per point
u32* index = (u32*)_aligned_malloc(sizeof(u32) * maxcount * 6, 32);
u16* index = static_cast<u16*>(_aligned_malloc(sizeof(u16) * maxcount * 6, 32));
if (vertex == NULL || index == NULL)
if (!vertex || !index)
{
const u32 vert_byte_count = sizeof(GSVertex) * maxcount;
const u32 idx_byte_count = sizeof(u32) * maxcount * 3;
const u32 idx_byte_count = sizeof(u16) * maxcount * 3;
Console.Error("GS: failed to allocate %zu bytes for verticles and %zu for indices.",
Console.Error("GS: failed to allocate %zu bytes for vertices and %zu for indices.",
vert_byte_count, idx_byte_count);
throw GSError();
}
if (m_vertex.buff != NULL)
if (m_vertex.buff)
{
memcpy(vertex, m_vertex.buff, sizeof(GSVertex) * m_vertex.tail);
std::memcpy(vertex, m_vertex.buff, sizeof(GSVertex) * m_vertex.tail);
_aligned_free(m_vertex.buff);
}
if (m_index.buff != NULL)
if (m_index.buff)
{
memcpy(index, m_index.buff, sizeof(u32) * m_index.tail);
std::memcpy(index, m_index.buff, sizeof(u16) * m_index.tail);
_aligned_free(m_index.buff);
}
@ -3063,21 +3063,24 @@ static constexpr u32 MaxVerticesForPrim(u32 prim)
{
switch (prim)
{
// Four indices per 1 vertex.
case GS_POINTLIST:
case GS_INVALID:
// Needed due to expansion in hardware renderers.
// Indices are shifted left by 2 to form quads.
case GS_LINELIST:
case GS_LINESTRIP:
return (std::numeric_limits<u16>::max() / 4) - 4;
// Four indices per two vertices.
case GS_SPRITE:
return (std::numeric_limits<u16>::max() / 2) - 2;
case GS_LINELIST:
case GS_LINESTRIP:
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
default:
return 0;
return (std::numeric_limits<u16>::max() - 3);
}
}
@ -3229,19 +3232,19 @@ __forceinline void GSState::VertexKick(u32 skip)
m_backed_up_ctx = m_env.PRIM.CTXT;
}
u32* RESTRICT buff = &m_index.buff[m_index.tail];
u16* RESTRICT buff = &m_index.buff[m_index.tail];
switch (prim)
{
case GS_POINTLIST:
buff[0] = head + 0;
buff[0] = static_cast<u16>(head + 0);
m_vertex.head = head + 1;
m_vertex.next = head + 1;
m_index.tail += 1;
break;
case GS_LINELIST:
buff[0] = head + (index_swap ? 1 : 0);
buff[1] = head + (index_swap ? 0 : 1);
buff[0] = static_cast<u16>(head + (index_swap ? 1 : 0));
buff[1] = static_cast<u16>(head + (index_swap ? 0 : 1));
m_vertex.head = head + 2;
m_vertex.next = head + 2;
m_index.tail += 2;
@ -3254,16 +3257,16 @@ __forceinline void GSState::VertexKick(u32 skip)
head = next;
m_vertex.tail = next + 2;
}
buff[0] = head + (index_swap ? 1 : 0);
buff[1] = head + (index_swap ? 0 : 1);
buff[0] = static_cast<u16>(head + (index_swap ? 1 : 0));
buff[1] = static_cast<u16>(head + (index_swap ? 0 : 1));
m_vertex.head = head + 1;
m_vertex.next = head + 2;
m_index.tail += 2;
break;
case GS_TRIANGLELIST:
buff[0] = head + (index_swap ? 2 : 0);
buff[1] = head + 1;
buff[2] = head + (index_swap ? 0 : 2);
buff[0] = static_cast<u16>(head + (index_swap ? 2 : 0));
buff[1] = static_cast<u16>(head + 1);
buff[2] = static_cast<u16>(head + (index_swap ? 0 : 2));
m_vertex.head = head + 3;
m_vertex.next = head + 3;
m_index.tail += 3;
@ -3277,24 +3280,24 @@ __forceinline void GSState::VertexKick(u32 skip)
head = next;
m_vertex.tail = next + 3;
}
buff[0] = head + (index_swap ? 2 : 0);
buff[1] = head + 1;
buff[2] = head + (index_swap ? 0 : 2);
buff[0] = static_cast<u16>(head + (index_swap ? 2 : 0));
buff[1] = static_cast<u16>(head + 1);
buff[2] = static_cast<u16>(head + (index_swap ? 0 : 2));
m_vertex.head = head + 1;
m_vertex.next = head + 3;
m_index.tail += 3;
break;
case GS_TRIANGLEFAN:
// TODO: remove gaps, next == head && head < tail - 3 || next > head && next < tail - 2 (very rare)
buff[0] = index_swap ? (tail - 1) : (head + 0);
buff[1] = tail - 2;
buff[2] = index_swap ? (head + 0) : (tail - 1);
buff[0] = static_cast<u16>(index_swap ? (tail - 1) : (head + 0));
buff[1] = static_cast<u16>(tail - 2);
buff[2] = static_cast<u16>(index_swap ? (head + 0) : (tail - 1));
m_vertex.next = tail;
m_index.tail += 3;
break;
case GS_SPRITE:
buff[0] = head + 0;
buff[1] = head + 1;
buff[0] = static_cast<u16>(head + 0);
buff[1] = static_cast<u16>(head + 1);
m_vertex.head = head + 2;
m_vertex.next = head + 2;
m_index.tail += 2;

View File

@ -158,7 +158,7 @@ protected:
struct
{
u32* buff;
u16* buff;
u32 tail;
} m_index = {};

View File

@ -28,6 +28,16 @@ class alignas(16) GSVector4i
{
}
constexpr GSVector4i(cxpr_init_tag, short s0, short s1, short s2, short s3, short s4, short s5, short s6, short s7)
: I16{s0, s1, s2, s3, s4, s5, s6, s7}
{
}
constexpr GSVector4i(cxpr_init_tag, char b0, char b1, char b2, char b3, char b4, char b5, char b6, char b7, char b8, char b9, char b10, char b11, char b12, char b13, char b14, char b15)
: I8{b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15}
{
}
public:
union
{
@ -62,6 +72,16 @@ public:
return GSVector4i(cxpr_init, x, x, x, x);
}
constexpr static GSVector4i cxpr16(short s0, short s1, short s2, short s3, short s4, short s5, short s6, short s7)
{
return GSVector4i(cxpr_init, s0, s1, s2, s3, s4, s5, s6, s7);
}
constexpr static GSVector4i cxpr8(char b0, char b1, char b2, char b3, char b4, char b5, char b6, char b7, char b8, char b9, char b10, char b11, char b12, char b13, char b14, char b15)
{
return GSVector4i(cxpr_init, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15);
}
__forceinline GSVector4i(int x, int y, int z, int w)
{
m = _mm_set_epi32(w, z, y, x);
@ -2001,6 +2021,10 @@ public:
return v;
}
__forceinline static GSVector4i broadcast16(u16 value)
{
return GSVector4i(_mm_set1_epi16(value));
}
__forceinline static GSVector4i zero() { return GSVector4i(_mm_setzero_si128()); }

View File

@ -18,6 +18,7 @@
#include "GS/GSGL.h"
#include "GS/GS.h"
#include "Host.h"
#include "common/Align.h"
#include "common/StringUtil.h"
#include "imgui.h"
@ -165,9 +166,9 @@ std::string GSDevice::GetFullscreenModeString(u32 width, u32 height, float refre
void GSDevice::GenerateExpansionIndexBuffer(void* buffer)
{
static constexpr u32 MAX_INDEX = std::numeric_limits<u16>::max();
static constexpr u32 MAX_INDEX = EXPAND_BUFFER_SIZE / 6 / sizeof(u16);
u32* idx_buffer = static_cast<u32*>(buffer);
u16* idx_buffer = static_cast<u16*>(buffer);
for (u32 i = 0; i < MAX_INDEX; i++)
{
const u32 base = i * 4;

View File

@ -627,7 +627,7 @@ struct alignas(16) GSHWDrawConfig
GSTexture* tex; ///< Source texture
GSTexture* pal; ///< Palette texture
const GSVertex* verts;///< Vertices to draw
const u32* indices; ///< Indices to draw
const u16* indices; ///< Indices to draw
u32 nverts; ///< Number of vertices
u32 nindices; ///< Number of indices
u32 indices_per_prim; ///< Number of indices that make up one primitive
@ -749,7 +749,7 @@ protected:
static constexpr float MAD_SENSITIVITY = 0.08f;
static constexpr u32 MAX_POOLED_TEXTURES = 300;
static constexpr u32 NUM_CAS_CONSTANTS = 12; // 8 plus src offset x/y, 16 byte alignment
static constexpr u32 EXPAND_BUFFER_SIZE = sizeof(u32) * std::numeric_limits<u16>::max() * 6;
static constexpr u32 EXPAND_BUFFER_SIZE = sizeof(u16) * 65532 * 6;
WindowInfo m_window_info;
VsyncMode m_vsync_mode = VsyncMode::Off;

View File

@ -24,7 +24,7 @@ GSVertexTrace::GSVertexTrace(const GSState* state, bool provoking_vertex_first)
MULTI_ISA_SELECT(GSVertexTracePopulateFunctions)(*this, provoking_vertex_first);
}
void GSVertexTrace::Update(const void* vertex, const u32* index, int v_count, int i_count, GS_PRIM_CLASS primclass)
void GSVertexTrace::Update(const void* vertex, const u16* index, int v_count, int i_count, GS_PRIM_CLASS primclass)
{
if (i_count == 0)
return;
@ -43,7 +43,7 @@ void GSVertexTrace::Update(const void* vertex, const u32* index, int v_count, in
// that feel big enough.
if (!fst && !m_accurate_stq && m_min.t.z > 1e30)
{
fprintf(stderr, "Vertex Trace: float overflow detected ! min %e max %e\n", m_min.t.z, m_max.t.z);
Console.Warning("Vertex Trace: float overflow detected ! min %e max %e", m_min.t.z, m_max.t.z);
m_accurate_stq = true;
}

View File

@ -49,7 +49,7 @@ public:
protected:
const GSState* m_state;
typedef void (*FindMinMaxPtr)(GSVertexTrace& vt, const void* vertex, const u32* index, int count);
typedef void (*FindMinMaxPtr)(GSVertexTrace& vt, const void* vertex, const u16* index, int count);
FindMinMaxPtr m_fmm[2][2][2][2][4];
@ -77,7 +77,7 @@ public:
public:
GSVertexTrace(const GSState* state, bool provoking_vertex_first);
void Update(const void* vertex, const u32* index, int v_count, int i_count, GS_PRIM_CLASS primclass);
void Update(const void* vertex, const u16* index, int v_count, int i_count, GS_PRIM_CLASS primclass);
bool IsLinear() const { return m_filter.opt_linear; }
bool IsRealLinear() const { return m_filter.linear; }

View File

@ -22,7 +22,7 @@ class CURRENT_ISA::GSVertexTraceFMM
static constexpr GSVector4 s_minmax = GSVector4::cxpr(FLT_MAX, -FLT_MAX, 0.f, 0.f);
template <GS_PRIM_CLASS primclass, u32 iip, u32 tme, u32 fst, u32 color, bool flat_swapped>
static void FindMinMax(GSVertexTrace& vt, const void* vertex, const u32* index, int count);
static void FindMinMax(GSVertexTrace& vt, const void* vertex, const u16* index, int count);
template <GS_PRIM_CLASS primclass, u32 iip, u32 tme, u32 fst, u32 color>
static constexpr GSVertexTrace::FindMinMaxPtr GetFMM(bool provoking_vertex_first);
@ -76,7 +76,7 @@ void GSVertexTraceFMM::Populate(GSVertexTrace& vt, bool provoking_vertex_first)
}
template <GS_PRIM_CLASS primclass, u32 iip, u32 tme, u32 fst, u32 color, bool flat_swapped>
void GSVertexTraceFMM::FindMinMax(GSVertexTrace& vt, const void* vertex, const u32* index, int count)
void GSVertexTraceFMM::FindMinMax(GSVertexTrace& vt, const void* vertex, const u16* index, int count)
{
const GSDrawingContext* context = vt.m_state->m_context;

View File

@ -1440,7 +1440,7 @@ void GSDevice11::DoMultiStretchRects(const MultiStretchRect* rects, u32 num_rect
const u32 vertex_reserve_size = num_rects * 4;
const u32 index_reserve_size = num_rects * 6;
GSVertexPT1* verts = static_cast<GSVertexPT1*>(IAMapVertexBuffer(sizeof(GSVertexPT1), vertex_reserve_size));
u32* idx = IAMapIndexBuffer(index_reserve_size);
u16* idx = IAMapIndexBuffer(index_reserve_size);
u32 icount = 0;
u32 vcount = 0;
for (u32 i = 0; i < num_rects; i++)
@ -1712,7 +1712,6 @@ void GSDevice11::RenderImGui()
const UINT vb_stride = sizeof(ImDrawVert);
const UINT vb_offset = 0;
m_ctx->IASetVertexBuffers(0, 1, m_vb.addressof(), &vb_stride, &vb_offset);
m_ctx->IASetIndexBuffer(m_ib.get(), DXGI_FORMAT_R16_UINT, 0);
IASetInputLayout(m_imgui.il.get());
IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
VSSetShader(m_imgui.vs.get(), m_imgui.vs_cb.get());
@ -1756,16 +1755,8 @@ void GSDevice11::RenderImGui()
m_ctx->Unmap(m_vb.get(), 0);
}
// Bit awkward, because this is using 16-bit indices, not 32-bit.
static_assert(sizeof(ImDrawIdx) == sizeof(u16));
const u32 index_count = static_cast<u32>(cmd_list->IdxBuffer.Size + 1) / 2;
u32* index_map = IAMapIndexBuffer(index_count);
if (!index_map)
continue;
const u32 index_start = m_index.start * 2;
std::memcpy(index_map, cmd_list->IdxBuffer.Data, cmd_list->IdxBuffer.Size * sizeof(ImDrawIdx));
IAUnmapIndexBuffer(index_count);
IASetIndexBuffer(cmd_list->IdxBuffer.Data, cmd_list->IdxBuffer.Size);
for (int cmd_i = 0; cmd_i < cmd_list->CmdBuffer.Size; cmd_i++)
{
@ -1787,14 +1778,13 @@ void GSDevice11::RenderImGui()
m_state.ps_sr_views[0] = static_cast<ID3D11ShaderResourceView*>(pcmd->GetTexID());
PSUpdateShaderState();
m_ctx->DrawIndexed(pcmd->ElemCount, index_start + pcmd->IdxOffset, vertex_offset + pcmd->VtxOffset);
m_ctx->DrawIndexed(pcmd->ElemCount, m_index.start + pcmd->IdxOffset, vertex_offset + pcmd->VtxOffset);
}
g_perfmon.Put(GSPerfMon::DrawCalls, cmd_list->CmdBuffer.Size);
}
m_ctx->IASetVertexBuffers(0, 1, m_vb.addressof(), &m_state.vb_stride, &vb_offset);
m_ctx->IASetIndexBuffer(m_state.index_buffer, DXGI_FORMAT_R32_UINT, 0);
}
void GSDevice11::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* vertices, bool datm)
@ -1912,9 +1902,9 @@ bool GSDevice11::IASetExpandVertexBuffer(const void* vertex, u32 stride, u32 cou
return true;
}
u32* GSDevice11::IAMapIndexBuffer(u32 count)
u16* GSDevice11::IAMapIndexBuffer(u32 count)
{
if (count > (INDEX_BUFFER_SIZE / sizeof(u32)))
if (count > (INDEX_BUFFER_SIZE / sizeof(u16)))
return nullptr;
D3D11_MAP type = D3D11_MAP_WRITE_NO_OVERWRITE;
@ -1922,7 +1912,7 @@ u32* GSDevice11::IAMapIndexBuffer(u32 count)
m_index.start = m_ib_pos;
m_ib_pos += count;
if (m_ib_pos > (INDEX_BUFFER_SIZE / sizeof(u32)))
if (m_ib_pos > (INDEX_BUFFER_SIZE / sizeof(u16)))
{
m_index.start = 0;
m_ib_pos = count;
@ -1933,7 +1923,7 @@ u32* GSDevice11::IAMapIndexBuffer(u32 count)
if (FAILED(m_ctx->Map(m_ib.get(), 0, type, 0, &m)))
return nullptr;
return static_cast<u32*>(m.pData) + m_index.start;
return static_cast<u16*>(m.pData) + m_index.start;
}
void GSDevice11::IAUnmapIndexBuffer(u32 count)
@ -1944,11 +1934,11 @@ void GSDevice11::IAUnmapIndexBuffer(u32 count)
bool GSDevice11::IASetIndexBuffer(const void* index, u32 count)
{
u32* map = IAMapIndexBuffer(count);
u16* map = IAMapIndexBuffer(count);
if (!map)
return false;
std::memcpy(map, index, count * sizeof(u32));
std::memcpy(map, index, count * sizeof(u16));
IAUnmapIndexBuffer(count);
IASetIndexBuffer(m_ib.get());
return true;
@ -1958,7 +1948,7 @@ void GSDevice11::IASetIndexBuffer(ID3D11Buffer* buffer)
{
if (m_state.index_buffer != buffer)
{
m_ctx->IASetIndexBuffer(buffer, DXGI_FORMAT_R32_UINT, 0);
m_ctx->IASetIndexBuffer(buffer, DXGI_FORMAT_R16_UINT, 0);
m_state.index_buffer = buffer;
}
}

View File

@ -343,7 +343,7 @@ public:
bool IASetVertexBuffer(const void* vertex, u32 stride, u32 count);
bool IASetExpandVertexBuffer(const void* vertex, u32 stride, u32 count);
u32* IAMapIndexBuffer(u32 count);
u16* IAMapIndexBuffer(u32 count);
void IAUnmapIndexBuffer(u32 count);
bool IASetIndexBuffer(const void* index, u32 count);
void IASetIndexBuffer(ID3D11Buffer* buffer);

View File

@ -949,13 +949,13 @@ void GSDevice12::DoMultiStretchRects(
{
// Set up vertices first.
const u32 vertex_reserve_size = num_rects * 4 * sizeof(GSVertexPT1);
const u32 index_reserve_size = num_rects * 6 * sizeof(u32);
const u32 index_reserve_size = num_rects * 6 * sizeof(u16);
if (!m_vertex_stream_buffer.ReserveMemory(vertex_reserve_size, sizeof(GSVertexPT1)) ||
!m_index_stream_buffer.ReserveMemory(index_reserve_size, sizeof(u32)))
!m_index_stream_buffer.ReserveMemory(index_reserve_size, sizeof(u16)))
{
ExecuteCommandListAndRestartRenderPass(false, "Uploading bytes to vertex buffer");
if (!m_vertex_stream_buffer.ReserveMemory(vertex_reserve_size, sizeof(GSVertexPT1)) ||
!m_index_stream_buffer.ReserveMemory(index_reserve_size, sizeof(u32)))
!m_index_stream_buffer.ReserveMemory(index_reserve_size, sizeof(u16)))
{
pxFailRel("Failed to reserve space for vertices");
}
@ -965,7 +965,7 @@ void GSDevice12::DoMultiStretchRects(
// Don't use primitive restart here, it ends up slower on some drivers.
const GSVector2 ds(static_cast<float>(dTex->GetWidth()), static_cast<float>(dTex->GetHeight()));
GSVertexPT1* verts = reinterpret_cast<GSVertexPT1*>(m_vertex_stream_buffer.GetCurrentHostPointer());
u32* idx = reinterpret_cast<u32*>(m_index_stream_buffer.GetCurrentHostPointer());
u16* idx = reinterpret_cast<u16*>(m_index_stream_buffer.GetCurrentHostPointer());
u32 icount = 0;
u32 vcount = 0;
for (u32 i = 0; i < num_rects; i++)
@ -996,12 +996,12 @@ void GSDevice12::DoMultiStretchRects(
m_vertex.start = m_vertex_stream_buffer.GetCurrentOffset() / sizeof(GSVertexPT1);
m_vertex.count = vcount;
m_index.start = m_index_stream_buffer.GetCurrentOffset() / sizeof(u32);
m_index.start = m_index_stream_buffer.GetCurrentOffset() / sizeof(u16);
m_index.count = icount;
m_vertex_stream_buffer.CommitMemory(vcount * sizeof(GSVertexPT1));
m_index_stream_buffer.CommitMemory(icount * sizeof(u32));
m_index_stream_buffer.CommitMemory(icount * sizeof(u16));
SetVertexBuffer(m_vertex_stream_buffer.GetGPUPointer(), m_vertex_stream_buffer.GetSize(), sizeof(GSVertexPT1));
SetIndexBuffer(m_index_stream_buffer.GetGPUPointer(), m_index_stream_buffer.GetSize(), DXGI_FORMAT_R32_UINT);
SetIndexBuffer(m_index_stream_buffer.GetGPUPointer(), m_index_stream_buffer.GetSize(), DXGI_FORMAT_R16_UINT);
// Even though we're batching, a cmdbuffer submit could've messed this up.
const GSVector4i rc(dTex->GetRect());
@ -1422,22 +1422,10 @@ void GSDevice12::RenderImGui()
m_vertex_stream_buffer.CommitMemory(size);
}
u32 index_offset;
{
const u32 size = sizeof(ImDrawIdx) * static_cast<u32>(cmd_list->IdxBuffer.Size);
if (!m_index_stream_buffer.ReserveMemory(size, sizeof(ImDrawIdx)))
{
Console.Warning("Skipping ImGui draw because of no vertex buffer space");
return;
}
index_offset = m_index_stream_buffer.GetCurrentOffset() / sizeof(ImDrawIdx);
std::memcpy(m_index_stream_buffer.GetCurrentHostPointer(), cmd_list->IdxBuffer.Data, size);
m_index_stream_buffer.CommitMemory(size);
}
SetVertexBuffer(m_vertex_stream_buffer.GetGPUPointer(), m_vertex_stream_buffer.GetSize(), sizeof(ImDrawVert));
SetIndexBuffer(m_index_stream_buffer.GetGPUPointer(), m_index_stream_buffer.GetSize(), DXGI_FORMAT_R16_UINT);
static_assert(sizeof(ImDrawIdx) == sizeof(u16));
IASetIndexBuffer(cmd_list->IdxBuffer.Data, cmd_list->IdxBuffer.Size);
for (int cmd_i = 0; cmd_i < cmd_list->CmdBuffer.Size; cmd_i++)
{
@ -1468,7 +1456,7 @@ void GSDevice12::RenderImGui()
if (ApplyUtilityState())
{
g_d3d12_context->GetCommandList()->DrawIndexedInstanced(
pcmd->ElemCount, 1, index_offset + pcmd->IdxOffset, vertex_offset + pcmd->VtxOffset, 0);
pcmd->ElemCount, 1, m_index.start + pcmd->IdxOffset, vertex_offset + pcmd->VtxOffset, 0);
}
}
@ -1536,17 +1524,17 @@ void GSDevice12::IASetVertexBuffer(const void* vertex, size_t stride, size_t cou
void GSDevice12::IASetIndexBuffer(const void* index, size_t count)
{
const u32 size = sizeof(u32) * static_cast<u32>(count);
if (!m_index_stream_buffer.ReserveMemory(size, sizeof(u32)))
const u32 size = sizeof(u16) * static_cast<u32>(count);
if (!m_index_stream_buffer.ReserveMemory(size, sizeof(u16)))
{
ExecuteCommandListAndRestartRenderPass(false, "Uploading bytes to index buffer");
if (!m_index_stream_buffer.ReserveMemory(size, sizeof(u32)))
if (!m_index_stream_buffer.ReserveMemory(size, sizeof(u16)))
pxFailRel("Failed to reserve space for vertices");
}
m_index.start = m_index_stream_buffer.GetCurrentOffset() / sizeof(u32);
m_index.start = m_index_stream_buffer.GetCurrentOffset() / sizeof(u16);
m_index.count = count;
SetIndexBuffer(m_index_stream_buffer.GetGPUPointer(), m_index_stream_buffer.GetSize(), DXGI_FORMAT_R32_UINT);
SetIndexBuffer(m_index_stream_buffer.GetGPUPointer(), m_index_stream_buffer.GetSize(), DXGI_FORMAT_R16_UINT);
std::memcpy(m_index_stream_buffer.GetCurrentHostPointer(), index, size);
m_index_stream_buffer.CommitMemory(size);
@ -3353,7 +3341,7 @@ void GSDevice12::UploadHWDrawVerticesAndIndices(const GSHWDrawConfig& config)
{
m_index.start = 0;
m_index.count = config.nindices;
SetIndexBuffer(m_expand_index_buffer->GetGPUVirtualAddress(), EXPAND_BUFFER_SIZE, DXGI_FORMAT_R32_UINT);
SetIndexBuffer(m_expand_index_buffer->GetGPUVirtualAddress(), EXPAND_BUFFER_SIZE, DXGI_FORMAT_R16_UINT);
}
else
{

View File

@ -258,14 +258,10 @@ void GSRendererHW::Lines2Sprites()
int i = static_cast<int>(count) * 2 - 4;
GSVertex* s = &m_vertex.buff[count - 2];
GSVertex* q = &m_vertex.buff[count * 2 - 4];
u32* RESTRICT index = &m_index.buff[count * 3 - 6];
u16* RESTRICT index = &m_index.buff[count * 3 - 6];
alignas(16) static constexpr std::array<int, 8> tri_normal_indices = {{0, 1, 2, 1, 2, 3}};
alignas(16) static constexpr std::array<int, 8> tri_swapped_indices = {{0, 1, 2, 1, 2, 3}};
const bool index_swap = !g_gs_device->Features().provoking_vertex_last;
const int* tri_indices = index_swap ? tri_swapped_indices.data() : tri_normal_indices.data();
const GSVector4i indices_low(GSVector4i::load<true>(tri_indices));
const GSVector4i indices_high(GSVector4i::loadl(tri_indices + 4));
// Sprites are flat shaded, so the provoking vertex doesn't matter here.
constexpr GSVector4i indices = GSVector4i::cxpr16(0, 1, 2, 1, 2, 3, 0, 0);
for (; i >= 0; i -= 4, s -= 2, q -= 4, index -= 6)
{
@ -310,9 +306,10 @@ void GSRendererHW::Lines2Sprites()
q[1] = v0;
q[2] = v1;
const GSVector4i i_splat(i);
GSVector4i::store<false>(index, i_splat + indices_low);
GSVector4i::storel(index + 4, i_splat + indices_high);
const GSVector4i this_indices = GSVector4i::broadcast16(i).add16(indices);
const int high = this_indices.extract32<2>();
GSVector4i::storel(index, this_indices);
std::memcpy(&index[4], &high, sizeof(high));
}
m_vertex.head = m_vertex.tail = m_vertex.next = count * 2;
@ -322,26 +319,30 @@ void GSRendererHW::Lines2Sprites()
void GSRendererHW::ExpandLineIndices()
{
const u32 process_count = (m_index.tail + 3) / 4 * 4;
const u32 process_count = (m_index.tail + 7) / 8 * 8;
const u32 expansion_factor = 3;
m_index.tail *= expansion_factor;
GSVector4i* end = reinterpret_cast<GSVector4i*>(m_index.buff);
GSVector4i* read = reinterpret_cast<GSVector4i*>(m_index.buff + process_count);
GSVector4i* write = reinterpret_cast<GSVector4i*>(m_index.buff + process_count * expansion_factor);
constexpr GSVector4i low0 = GSVector4i::cxpr(0, 1, 2, 1);
constexpr GSVector4i low1 = GSVector4i::cxpr(2, 3, 0, 1);
constexpr GSVector4i low2 = GSVector4i::cxpr(2, 1, 2, 3);
constexpr GSVector4i mask0 = GSVector4i::cxpr8(0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 2, 3, 4, 5, 4, 5);
constexpr GSVector4i mask1 = GSVector4i::cxpr8(6, 7, 4, 5, 6, 7, 6, 7, 8, 9, 8, 9, 10, 11, 8, 9);
constexpr GSVector4i mask2 = GSVector4i::cxpr8(10, 11, 10, 11, 12, 13, 12, 13, 14, 15, 12, 13, 14, 15, 14, 15);
constexpr GSVector4i low0 = GSVector4i::cxpr16(0, 1, 2, 1, 2, 3, 0, 1);
constexpr GSVector4i low1 = GSVector4i::cxpr16(2, 1, 2, 3, 0, 1, 2, 1);
constexpr GSVector4i low2 = GSVector4i::cxpr16(2, 3, 0, 1, 2, 1, 2, 3);
while (read > end)
{
read -= 1;
write -= expansion_factor;
const GSVector4i in = read->sll32(2);
write[0] = in.xxyx() | low0;
write[1] = in.yyzz() | low1;
write[2] = in.wzww() | low2;
const GSVector4i in = read->sll16(2);
write[0] = in.shuffle8(mask0) | low0;
write[1] = in.shuffle8(mask1) | low1;
write[2] = in.shuffle8(mask2) | low2;
}
}

View File

@ -2576,7 +2576,7 @@ bool GSTextureCache::ShuffleMove(u32 BP, u32 BW, u32 PSM, int sx, int sy, int dx
#undef V
static constexpr u32 indices[6] = { 0, 1, 2, 2, 1, 3 };
static constexpr u16 indices[6] = { 0, 1, 2, 2, 1, 3 };
// If we ever do this sort of thing somewhere else, extract this to a helper function.
GSHWDrawConfig config;

View File

@ -2178,7 +2178,7 @@ void GSDeviceMTL::SendHWDraw(GSHWDrawConfig& config, id<MTLRenderCommandEncoder>
textureBarrier(enc);
[enc drawIndexedPrimitives:topology
indexCount:count
indexType:MTLIndexTypeUInt32
indexType:MTLIndexTypeUInt16
indexBuffer:buffer
indexBufferOffset:off + p * sizeof(*config.indices)];
p += count;
@ -2200,7 +2200,7 @@ void GSDeviceMTL::SendHWDraw(GSHWDrawConfig& config, id<MTLRenderCommandEncoder>
textureBarrier(enc);
[enc drawIndexedPrimitives:topology
indexCount:config.indices_per_prim
indexType:MTLIndexTypeUInt32
indexType:MTLIndexTypeUInt16
indexBuffer:buffer
indexBufferOffset:off + p * sizeof(*config.indices)];
}
@ -2217,7 +2217,7 @@ void GSDeviceMTL::SendHWDraw(GSHWDrawConfig& config, id<MTLRenderCommandEncoder>
[enc drawIndexedPrimitives:topology
indexCount:config.nindices
indexType:MTLIndexTypeUInt32
indexType:MTLIndexTypeUInt16
indexBuffer:buffer
indexBufferOffset:off];

View File

@ -876,8 +876,8 @@ void GSDeviceOGL::DrawPrimitive()
void GSDeviceOGL::DrawIndexedPrimitive()
{
g_perfmon.Put(GSPerfMon::DrawCalls, 1);
glDrawElementsBaseVertex(m_draw_topology, static_cast<u32>(m_index.count), GL_UNSIGNED_INT,
reinterpret_cast<void*>(static_cast<u32>(m_index.start) * sizeof(u32)), static_cast<GLint>(m_vertex.start));
glDrawElementsBaseVertex(m_draw_topology, static_cast<u32>(m_index.count), GL_UNSIGNED_SHORT,
reinterpret_cast<void*>(static_cast<u32>(m_index.start) * sizeof(u16)), static_cast<GLint>(m_vertex.start));
}
void GSDeviceOGL::DrawIndexedPrimitive(int offset, int count)
@ -885,8 +885,8 @@ void GSDeviceOGL::DrawIndexedPrimitive(int offset, int count)
//ASSERT(offset + count <= (int)m_index.count);
g_perfmon.Put(GSPerfMon::DrawCalls, 1);
glDrawElementsBaseVertex(m_draw_topology, count, GL_UNSIGNED_INT,
reinterpret_cast<void*>((static_cast<u32>(m_index.start) + static_cast<u32>(offset)) * sizeof(u32)),
glDrawElementsBaseVertex(m_draw_topology, count, GL_UNSIGNED_SHORT,
reinterpret_cast<void*>((static_cast<u32>(m_index.start) + static_cast<u32>(offset)) * sizeof(u16)),
static_cast<GLint>(m_vertex.start));
}
@ -1548,15 +1548,15 @@ void GSDeviceOGL::DrawMultiStretchRects(
void GSDeviceOGL::DoMultiStretchRects(const MultiStretchRect* rects, u32 num_rects, const GSVector2& ds)
{
const u32 vertex_reserve_size = num_rects * 4 * sizeof(GSVertexPT1);
const u32 index_reserve_size = num_rects * 6 * sizeof(u32);
const u32 index_reserve_size = num_rects * 6 * sizeof(u16);
auto vertex_map = m_vertex_stream_buffer->Map(sizeof(GSVertexPT1), vertex_reserve_size);
auto index_map = m_index_stream_buffer->Map(sizeof(u32), index_reserve_size);
auto index_map = m_index_stream_buffer->Map(sizeof(u16), index_reserve_size);
m_vertex.start = vertex_map.index_aligned;
m_index.start = index_map.index_aligned;
// Don't use primitive restart here, it ends up slower on some drivers.
GSVertexPT1* verts = reinterpret_cast<GSVertexPT1*>(vertex_map.pointer);
u32* idx = reinterpret_cast<u32*>(index_map.pointer);
u16* idx = reinterpret_cast<u16*>(index_map.pointer);
u32 icount = 0;
u32 vcount = 0;
for (u32 i = 0; i < num_rects; i++)
@ -1587,7 +1587,7 @@ void GSDeviceOGL::DoMultiStretchRects(const MultiStretchRect* rects, u32 num_rec
m_vertex.count = vcount;
m_index.count = icount;
m_vertex_stream_buffer->Unmap(vcount * sizeof(GSVertexPT1));
m_index_stream_buffer->Unmap(icount * sizeof(u32));
m_index_stream_buffer->Unmap(icount * sizeof(u16));
PSSetShaderResource(0, rects[0].src);
PSSetSamplerState(rects[0].linear ? m_convert.ln : m_convert.pt);
@ -1807,8 +1807,8 @@ void GSDeviceOGL::IASetVertexBuffer(const void* vertices, size_t count)
void GSDeviceOGL::IASetIndexBuffer(const void* index, size_t count)
{
const u32 size = static_cast<u32>(count) * sizeof(u32);
auto res = m_index_stream_buffer->Map(sizeof(u32), size);
const u32 size = static_cast<u32>(count) * sizeof(u16);
auto res = m_index_stream_buffer->Map(sizeof(u16), size);
m_index.start = res.index_aligned;
m_index.count = count;
std::memcpy(res.pointer, index, size);
@ -1999,18 +1999,7 @@ void GSDeviceOGL::RenderImGui()
m_vertex_stream_buffer->Unmap(size);
}
// Bit awkward, because this is using 16-bit indices, not 32-bit.
u32 index_start;
{
static_assert(sizeof(ImDrawIdx) == sizeof(u16));
const u32 size = static_cast<u32>(cmd_list->IdxBuffer.Size) * sizeof(ImDrawIdx);
auto res = m_index_stream_buffer->Map(sizeof(u16), size);
index_start = res.index_aligned;
std::memcpy(res.pointer, cmd_list->IdxBuffer.Data, size);
m_index_stream_buffer->Unmap(size);
m_index_stream_buffer->Bind();
}
IASetIndexBuffer(cmd_list->IdxBuffer.Data, cmd_list->IdxBuffer.Size);
for (int cmd_i = 0; cmd_i < cmd_list->CmdBuffer.Size; cmd_i++)
{
@ -2038,7 +2027,7 @@ void GSDeviceOGL::RenderImGui()
}
glDrawElementsBaseVertex(GL_TRIANGLES, (GLsizei)pcmd->ElemCount, GL_UNSIGNED_SHORT,
(void*)(intptr_t)((pcmd->IdxOffset + index_start) * sizeof(ImDrawIdx)), pcmd->VtxOffset + vertex_start);
(void*)(intptr_t)((pcmd->IdxOffset + m_index.start) * sizeof(ImDrawIdx)), pcmd->VtxOffset + vertex_start);
}
g_perfmon.Put(GSPerfMon::DrawCalls, cmd_list->CmdBuffer.Size);

View File

@ -161,7 +161,7 @@ typedef GSVector4 VectorF;
#define LOCAL_STEP local.d4
#endif
void GSDrawScanline::CSetupPrim(const GSVertexSW* vertex, const u32* index, const GSVertexSW& dscan, GSScanlineLocalData& local)
void GSDrawScanline::CSetupPrim(const GSVertexSW* vertex, const u16* index, const GSVertexSW& dscan, GSScanlineLocalData& local)
{
const GSScanlineGlobalData& global = GlobalFromLocal(local);
GSScanlineSelector sel = global.sel;

View File

@ -38,7 +38,7 @@ public:
~GSDrawScanline() override;
/// Function pointer types which we call back into.
using SetupPrimPtr = void(*)(const GSVertexSW* vertex, const u32* index, const GSVertexSW& dscan, GSScanlineLocalData& local);
using SetupPrimPtr = void(*)(const GSVertexSW* vertex, const u16* index, const GSVertexSW& dscan, GSScanlineLocalData& local);
using DrawScanlinePtr = void(*)(int pixels, int left, int top, const GSVertexSW& scan, GSScanlineLocalData& local);
/// Flushes the code cache, forcing everything to be recompiled.
@ -60,7 +60,7 @@ private:
GSCodeGeneratorFunctionMap<GSSetupPrimCodeGenerator, u64, SetupPrimPtr> m_sp_map;
GSCodeGeneratorFunctionMap<GSDrawScanlineCodeGenerator, u64, DrawScanlinePtr> m_ds_map;
static void CSetupPrim(const GSVertexSW* vertex, const u32* index, const GSVertexSW& dscan, GSScanlineLocalData& local);
static void CSetupPrim(const GSVertexSW* vertex, const u16* index, const GSVertexSW& dscan, GSScanlineLocalData& local);
static void CDrawScanline(int pixels, int left, int top, const GSVertexSW& scan, GSScanlineLocalData& local);
static void CDrawEdge(int pixels, int left, int top, const GSVertexSW& scan, GSScanlineLocalData& local);
};

View File

@ -154,10 +154,10 @@ void GSRasterizer::Draw(GSRasterizerData& data)
const GSVertexSW* vertex = data.vertex;
const GSVertexSW* vertex_end = data.vertex + data.vertex_count;
const u32* index = data.index;
const u32* index_end = data.index + data.index_count;
const u16* index = data.index;
const u16* index_end = data.index + data.index_count;
u32 tmp_index[] = {0, 1, 2};
static constexpr u16 tmp_index[] = {0, 1, 2};
bool scissor_test = !data.bbox.eq(data.bbox.rintersect(data.scissor));
@ -261,7 +261,7 @@ void GSRasterizer::Draw(GSRasterizerData& data)
}
template <bool scissor_test>
void GSRasterizer::DrawPoint(const GSVertexSW* vertex, int vertex_count, const u32* index, int index_count)
void GSRasterizer::DrawPoint(const GSVertexSW* vertex, int vertex_count, const u16* index, int index_count)
{
m_primcount++;
@ -286,7 +286,7 @@ void GSRasterizer::DrawPoint(const GSVertexSW* vertex, int vertex_count, const u
}
else
{
u32 tmp_index[1] = {0};
static constexpr u16 tmp_index[1] = {0};
for (int i = 0; i < vertex_count; i++, vertex++)
{
@ -307,7 +307,7 @@ void GSRasterizer::DrawPoint(const GSVertexSW* vertex, int vertex_count, const u
}
}
void GSRasterizer::DrawLine(const GSVertexSW* vertex, const u32* index)
void GSRasterizer::DrawLine(const GSVertexSW* vertex, const u16* index)
{
m_primcount++;
@ -425,7 +425,7 @@ static const u8 s_ysort[8][4] =
#if _M_SSE >= 0x501
void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const u32* index)
void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const u16* index)
{
m_primcount++;
@ -606,7 +606,7 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW2& RESTRIC
#else
void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const u32* index)
void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const u16* index)
{
m_primcount++;
@ -784,7 +784,7 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& RESTRICT
#endif
void GSRasterizer::DrawSprite(const GSVertexSW* vertex, const u32* index)
void GSRasterizer::DrawSprite(const GSVertexSW* vertex, const u16* index)
{
m_primcount++;
@ -1082,7 +1082,7 @@ void GSRasterizer::AddScanline(GSVertexSW* e, int pixels, int left, int top, con
AddScanlineInfo(e, pixels, left, top);
}
void GSRasterizer::Flush(const GSVertexSW* vertex, const u32* index, const GSVertexSW& dscan, bool edge /* = false */)
void GSRasterizer::Flush(const GSVertexSW* vertex, const u16* index, const GSVertexSW& dscan, bool edge /* = false */)
{
// TODO: on win64 this could be the place where xmm6-15 are preserved (not by each DrawScanline)

View File

@ -38,7 +38,7 @@ public:
u8* buff;
GSVertexSW* vertex;
int vertex_count;
u32* index;
u16* index;
int index_count;
u64 frame;
u64 start;
@ -101,10 +101,10 @@ protected:
__forceinline bool HasEdge() const { return (m_draw_edge != nullptr); }
template <bool scissor_test>
void DrawPoint(const GSVertexSW* vertex, int vertex_count, const u32* index, int index_count);
void DrawLine(const GSVertexSW* vertex, const u32* index);
void DrawTriangle(const GSVertexSW* vertex, const u32* index);
void DrawSprite(const GSVertexSW* vertex, const u32* index);
void DrawPoint(const GSVertexSW* vertex, int vertex_count, const u16* index, int index_count);
void DrawLine(const GSVertexSW* vertex, const u16* index);
void DrawTriangle(const GSVertexSW* vertex, const u16* index);
void DrawSprite(const GSVertexSW* vertex, const u16* index);
#if _M_SSE >= 0x501
__forceinline void DrawTriangleSection(int top, int bottom, GSVertexSW2& RESTRICT edge, const GSVertexSW2& RESTRICT dedge, const GSVertexSW2& RESTRICT dscan, const GSVector4& RESTRICT p0);
@ -115,7 +115,7 @@ protected:
void DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GSVertexSW& dv, int orientation, int side);
__forceinline void AddScanline(GSVertexSW* e, int pixels, int left, int top, const GSVertexSW& scan);
__forceinline void Flush(const GSVertexSW* vertex, const u32* index, const GSVertexSW& dscan, bool edge = false);
__forceinline void Flush(const GSVertexSW* vertex, const u16* index, const GSVertexSW& dscan, bool edge = false);
__forceinline void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan);
__forceinline void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan);

View File

@ -343,7 +343,7 @@ void GSRendererSW::Draw()
sd->buff = (u8*)m_vertex_heap.alloc(sizeof(GSVertexSW) * ((m_vertex.next + 1) & ~1) + sizeof(u32) * m_index.tail, 64);
sd->vertex = (GSVertexSW*)sd->buff;
sd->vertex_count = m_vertex.next;
sd->index = (u32*)(sd->buff + sizeof(GSVertexSW) * ((m_vertex.next + 1) & ~1));
sd->index = (u16*)(sd->buff + sizeof(GSVertexSW) * ((m_vertex.next + 1) & ~1));
sd->index_count = m_index.tail;
sd->scanmsk_value = m_draw_env->SCANMSK.MSK;
@ -354,7 +354,7 @@ void GSRendererSW::Draw()
GSVertexSW::s_cvb[m_vt.m_primclass][PRIM->TME][PRIM->FST][q_div](m_context, sd->vertex, m_vertex.buff, m_vertex.next);
memcpy(sd->index, m_index.buff, sizeof(u32) * m_index.tail);
std::memcpy(sd->index, m_index.buff, sizeof(u16) * m_index.tail);
GSVector4i scissor = GSVector4i(context->scissor.in);
GSVector4i bbox = GSVector4i(m_vt.m_min.p.floor().xyxy(m_vt.m_max.p.ceil()));

View File

@ -210,7 +210,7 @@ void GSSetupPrimCodeGenerator2::Depth_XMM()
{
// GSVector4 p = vertex[index[1]].p;
mov(eax, ptr[_index + sizeof(u32) * 1]);
movzx(eax, word[_index + sizeof(u16) * 1]);
shl(eax, 6); // * sizeof(GSVertexSW)
add(rax, _64_vertex);
@ -299,7 +299,7 @@ void GSSetupPrimCodeGenerator2::Depth_YMM()
{
// GSVector4 p = vertex[index[1]].p;
mov(eax, ptr[_index + sizeof(u32) * 1]);
movzx(eax, word[_index + sizeof(u16) * 1]);
shl(eax, 6); // * sizeof(GSVertexSW)
add(rax, _64_vertex);
@ -504,7 +504,7 @@ void GSSetupPrimCodeGenerator2::Color()
if (!(m_sel.prim == GS_SPRITE_CLASS && (m_en.z || m_en.f))) // if this is a sprite, the last vertex was already loaded in Depth()
{
mov(eax, ptr[_index + sizeof(u32) * last]);
movzx(eax, word[_index + sizeof(u16) * last]);
shl(eax, 6); // * sizeof(GSVertexSW)
add(rax, _64_vertex);
}

View File

@ -955,13 +955,13 @@ void GSDeviceVK::DoMultiStretchRects(
{
// Set up vertices first.
const u32 vertex_reserve_size = num_rects * 4 * sizeof(GSVertexPT1);
const u32 index_reserve_size = num_rects * 6 * sizeof(u32);
const u32 index_reserve_size = num_rects * 6 * sizeof(u16);
if (!m_vertex_stream_buffer.ReserveMemory(vertex_reserve_size, sizeof(GSVertexPT1)) ||
!m_index_stream_buffer.ReserveMemory(index_reserve_size, sizeof(u32)))
!m_index_stream_buffer.ReserveMemory(index_reserve_size, sizeof(u16)))
{
ExecuteCommandBufferAndRestartRenderPass(false, "Uploading bytes to vertex buffer");
if (!m_vertex_stream_buffer.ReserveMemory(vertex_reserve_size, sizeof(GSVertexPT1)) ||
!m_index_stream_buffer.ReserveMemory(index_reserve_size, sizeof(u32)))
!m_index_stream_buffer.ReserveMemory(index_reserve_size, sizeof(u16)))
{
pxFailRel("Failed to reserve space for vertices");
}
@ -971,7 +971,7 @@ void GSDeviceVK::DoMultiStretchRects(
// Don't use primitive restart here, it ends up slower on some drivers.
const GSVector2 ds(static_cast<float>(dTex->GetWidth()), static_cast<float>(dTex->GetHeight()));
GSVertexPT1* verts = reinterpret_cast<GSVertexPT1*>(m_vertex_stream_buffer.GetCurrentHostPointer());
u32* idx = reinterpret_cast<u32*>(m_index_stream_buffer.GetCurrentHostPointer());
u16* idx = reinterpret_cast<u16*>(m_index_stream_buffer.GetCurrentHostPointer());
u32 icount = 0;
u32 vcount = 0;
for (u32 i = 0; i < num_rects; i++)
@ -1001,11 +1001,11 @@ void GSDeviceVK::DoMultiStretchRects(
m_vertex.start = m_vertex_stream_buffer.GetCurrentOffset() / sizeof(GSVertexPT1);
m_vertex.count = vcount;
m_index.start = m_index_stream_buffer.GetCurrentOffset() / sizeof(u32);
m_index.start = m_index_stream_buffer.GetCurrentOffset() / sizeof(u16);
m_index.count = icount;
m_vertex_stream_buffer.CommitMemory(vcount * sizeof(GSVertexPT1));
m_index_stream_buffer.CommitMemory(icount * sizeof(u32));
SetIndexBuffer(m_index_stream_buffer.GetBuffer(), 0, VK_INDEX_TYPE_UINT32);
m_index_stream_buffer.CommitMemory(icount * sizeof(u16));
SetIndexBuffer(m_index_stream_buffer.GetBuffer(), 0, VK_INDEX_TYPE_UINT16);
// Even though we're batching, a cmdbuffer submit could've messed this up.
const GSVector4i rc(dTex->GetRect());
@ -1368,21 +1368,21 @@ void GSDeviceVK::IASetVertexBuffer(const void* vertex, size_t stride, size_t cou
void GSDeviceVK::IASetIndexBuffer(const void* index, size_t count)
{
const u32 size = sizeof(u32) * static_cast<u32>(count);
if (!m_index_stream_buffer.ReserveMemory(size, sizeof(u32)))
const u32 size = sizeof(u16) * static_cast<u32>(count);
if (!m_index_stream_buffer.ReserveMemory(size, sizeof(u16)))
{
ExecuteCommandBufferAndRestartRenderPass(false, "Uploading bytes to index buffer");
if (!m_index_stream_buffer.ReserveMemory(size, sizeof(u32)))
if (!m_index_stream_buffer.ReserveMemory(size, sizeof(u16)))
pxFailRel("Failed to reserve space for vertices");
}
m_index.start = m_index_stream_buffer.GetCurrentOffset() / sizeof(u32);
m_index.start = m_index_stream_buffer.GetCurrentOffset() / sizeof(u16);
m_index.count = count;
std::memcpy(m_index_stream_buffer.GetCurrentHostPointer(), index, size);
m_index_stream_buffer.CommitMemory(size);
SetIndexBuffer(m_index_stream_buffer.GetBuffer(), 0, VK_INDEX_TYPE_UINT32);
SetIndexBuffer(m_index_stream_buffer.GetBuffer(), 0, VK_INDEX_TYPE_UINT16);
}
void GSDeviceVK::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i& scissor, FeedbackLoopFlag feedback_loop)
@ -2314,9 +2314,6 @@ void GSDeviceVK::RenderImGui()
m_dirty_flags |= DIRTY_FLAG_UTILITY_TEXTURE;
}
// imgui uses 16-bit indices
SetIndexBuffer(m_index_stream_buffer.GetBuffer(), 0, VK_INDEX_TYPE_UINT16);
// this is for presenting, we don't want to screw with the viewport/scissor set by display
m_dirty_flags &= ~(DIRTY_FLAG_VIEWPORT | DIRTY_FLAG_SCISSOR);
@ -2338,19 +2335,8 @@ void GSDeviceVK::RenderImGui()
m_vertex_stream_buffer.CommitMemory(size);
}
u32 index_offset;
{
const u32 size = sizeof(ImDrawIdx) * static_cast<u32>(cmd_list->IdxBuffer.Size);
if (!m_index_stream_buffer.ReserveMemory(size, sizeof(ImDrawIdx)))
{
Console.Warning("Skipping ImGui draw because of no vertex buffer space");
return;
}
index_offset = m_index_stream_buffer.GetCurrentOffset() / sizeof(ImDrawIdx);
std::memcpy(m_index_stream_buffer.GetCurrentHostPointer(), cmd_list->IdxBuffer.Data, size);
m_index_stream_buffer.CommitMemory(size);
}
static_assert(sizeof(ImDrawIdx) == sizeof(u16));
IASetIndexBuffer(cmd_list->IdxBuffer.Data, cmd_list->IdxBuffer.Size);
for (int cmd_i = 0; cmd_i < cmd_list->CmdBuffer.Size; cmd_i++)
{
@ -2374,7 +2360,7 @@ void GSDeviceVK::RenderImGui()
if (ApplyUtilityState())
{
vkCmdDrawIndexed(g_vulkan_context->GetCurrentCommandBuffer(), pcmd->ElemCount, 1,
index_offset + pcmd->IdxOffset, vertex_offset + pcmd->VtxOffset, 0);
m_index.start + pcmd->IdxOffset, vertex_offset + pcmd->VtxOffset, 0);
}
}
@ -2779,7 +2765,7 @@ void GSDeviceVK::InitializeState()
m_vertex_buffer_offset = 0;
m_index_buffer = m_index_stream_buffer.GetBuffer();
m_index_buffer_offset = 0;
m_index_type = VK_INDEX_TYPE_UINT32;
m_index_type = VK_INDEX_TYPE_UINT16;
m_current_framebuffer = VK_NULL_HANDLE;
m_current_render_pass = VK_NULL_HANDLE;
@ -3848,7 +3834,7 @@ void GSDeviceVK::UploadHWDrawVerticesAndIndices(const GSHWDrawConfig& config)
{
m_index.start = 0;
m_index.count = config.nindices;
SetIndexBuffer(m_expand_index_buffer, 0, VK_INDEX_TYPE_UINT32);
SetIndexBuffer(m_expand_index_buffer, 0, VK_INDEX_TYPE_UINT16);
}
else
{