mirror of https://github.com/PCSX2/pcsx2.git
GS: Use 16-bit indices instead of 32-bit
Save some bandwidth.
This commit is contained in:
parent
72f70d4789
commit
839b482cb5
|
@ -2638,31 +2638,31 @@ void GSState::GrowVertexBuffer()
|
|||
{
|
||||
const u32 maxcount = std::max<u32>(m_vertex.maxcount * 3 / 2, 10000);
|
||||
|
||||
GSVertex* vertex = (GSVertex*)_aligned_malloc(sizeof(GSVertex) * maxcount, 32);
|
||||
GSVertex* vertex = static_cast<GSVertex*>(_aligned_malloc(sizeof(GSVertex) * maxcount, 32));
|
||||
// Worst case index list is a list of points with vs expansion, 6 indices per point
|
||||
u32* index = (u32*)_aligned_malloc(sizeof(u32) * maxcount * 6, 32);
|
||||
u16* index = static_cast<u16*>(_aligned_malloc(sizeof(u16) * maxcount * 6, 32));
|
||||
|
||||
if (vertex == NULL || index == NULL)
|
||||
if (!vertex || !index)
|
||||
{
|
||||
const u32 vert_byte_count = sizeof(GSVertex) * maxcount;
|
||||
const u32 idx_byte_count = sizeof(u32) * maxcount * 3;
|
||||
const u32 idx_byte_count = sizeof(u16) * maxcount * 3;
|
||||
|
||||
Console.Error("GS: failed to allocate %zu bytes for verticles and %zu for indices.",
|
||||
Console.Error("GS: failed to allocate %zu bytes for vertices and %zu for indices.",
|
||||
vert_byte_count, idx_byte_count);
|
||||
|
||||
throw GSError();
|
||||
}
|
||||
|
||||
if (m_vertex.buff != NULL)
|
||||
if (m_vertex.buff)
|
||||
{
|
||||
memcpy(vertex, m_vertex.buff, sizeof(GSVertex) * m_vertex.tail);
|
||||
std::memcpy(vertex, m_vertex.buff, sizeof(GSVertex) * m_vertex.tail);
|
||||
|
||||
_aligned_free(m_vertex.buff);
|
||||
}
|
||||
|
||||
if (m_index.buff != NULL)
|
||||
if (m_index.buff)
|
||||
{
|
||||
memcpy(index, m_index.buff, sizeof(u32) * m_index.tail);
|
||||
std::memcpy(index, m_index.buff, sizeof(u16) * m_index.tail);
|
||||
|
||||
_aligned_free(m_index.buff);
|
||||
}
|
||||
|
@ -3063,21 +3063,24 @@ static constexpr u32 MaxVerticesForPrim(u32 prim)
|
|||
{
|
||||
switch (prim)
|
||||
{
|
||||
// Four indices per 1 vertex.
|
||||
case GS_POINTLIST:
|
||||
case GS_INVALID:
|
||||
// Needed due to expansion in hardware renderers.
|
||||
|
||||
// Indices are shifted left by 2 to form quads.
|
||||
case GS_LINELIST:
|
||||
case GS_LINESTRIP:
|
||||
return (std::numeric_limits<u16>::max() / 4) - 4;
|
||||
|
||||
// Four indices per two vertices.
|
||||
case GS_SPRITE:
|
||||
return (std::numeric_limits<u16>::max() / 2) - 2;
|
||||
|
||||
case GS_LINELIST:
|
||||
case GS_LINESTRIP:
|
||||
case GS_TRIANGLELIST:
|
||||
case GS_TRIANGLESTRIP:
|
||||
case GS_TRIANGLEFAN:
|
||||
default:
|
||||
return 0;
|
||||
return (std::numeric_limits<u16>::max() - 3);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3229,19 +3232,19 @@ __forceinline void GSState::VertexKick(u32 skip)
|
|||
m_backed_up_ctx = m_env.PRIM.CTXT;
|
||||
}
|
||||
|
||||
u32* RESTRICT buff = &m_index.buff[m_index.tail];
|
||||
u16* RESTRICT buff = &m_index.buff[m_index.tail];
|
||||
|
||||
switch (prim)
|
||||
{
|
||||
case GS_POINTLIST:
|
||||
buff[0] = head + 0;
|
||||
buff[0] = static_cast<u16>(head + 0);
|
||||
m_vertex.head = head + 1;
|
||||
m_vertex.next = head + 1;
|
||||
m_index.tail += 1;
|
||||
break;
|
||||
case GS_LINELIST:
|
||||
buff[0] = head + (index_swap ? 1 : 0);
|
||||
buff[1] = head + (index_swap ? 0 : 1);
|
||||
buff[0] = static_cast<u16>(head + (index_swap ? 1 : 0));
|
||||
buff[1] = static_cast<u16>(head + (index_swap ? 0 : 1));
|
||||
m_vertex.head = head + 2;
|
||||
m_vertex.next = head + 2;
|
||||
m_index.tail += 2;
|
||||
|
@ -3254,16 +3257,16 @@ __forceinline void GSState::VertexKick(u32 skip)
|
|||
head = next;
|
||||
m_vertex.tail = next + 2;
|
||||
}
|
||||
buff[0] = head + (index_swap ? 1 : 0);
|
||||
buff[1] = head + (index_swap ? 0 : 1);
|
||||
buff[0] = static_cast<u16>(head + (index_swap ? 1 : 0));
|
||||
buff[1] = static_cast<u16>(head + (index_swap ? 0 : 1));
|
||||
m_vertex.head = head + 1;
|
||||
m_vertex.next = head + 2;
|
||||
m_index.tail += 2;
|
||||
break;
|
||||
case GS_TRIANGLELIST:
|
||||
buff[0] = head + (index_swap ? 2 : 0);
|
||||
buff[1] = head + 1;
|
||||
buff[2] = head + (index_swap ? 0 : 2);
|
||||
buff[0] = static_cast<u16>(head + (index_swap ? 2 : 0));
|
||||
buff[1] = static_cast<u16>(head + 1);
|
||||
buff[2] = static_cast<u16>(head + (index_swap ? 0 : 2));
|
||||
m_vertex.head = head + 3;
|
||||
m_vertex.next = head + 3;
|
||||
m_index.tail += 3;
|
||||
|
@ -3277,24 +3280,24 @@ __forceinline void GSState::VertexKick(u32 skip)
|
|||
head = next;
|
||||
m_vertex.tail = next + 3;
|
||||
}
|
||||
buff[0] = head + (index_swap ? 2 : 0);
|
||||
buff[1] = head + 1;
|
||||
buff[2] = head + (index_swap ? 0 : 2);
|
||||
buff[0] = static_cast<u16>(head + (index_swap ? 2 : 0));
|
||||
buff[1] = static_cast<u16>(head + 1);
|
||||
buff[2] = static_cast<u16>(head + (index_swap ? 0 : 2));
|
||||
m_vertex.head = head + 1;
|
||||
m_vertex.next = head + 3;
|
||||
m_index.tail += 3;
|
||||
break;
|
||||
case GS_TRIANGLEFAN:
|
||||
// TODO: remove gaps, next == head && head < tail - 3 || next > head && next < tail - 2 (very rare)
|
||||
buff[0] = index_swap ? (tail - 1) : (head + 0);
|
||||
buff[1] = tail - 2;
|
||||
buff[2] = index_swap ? (head + 0) : (tail - 1);
|
||||
buff[0] = static_cast<u16>(index_swap ? (tail - 1) : (head + 0));
|
||||
buff[1] = static_cast<u16>(tail - 2);
|
||||
buff[2] = static_cast<u16>(index_swap ? (head + 0) : (tail - 1));
|
||||
m_vertex.next = tail;
|
||||
m_index.tail += 3;
|
||||
break;
|
||||
case GS_SPRITE:
|
||||
buff[0] = head + 0;
|
||||
buff[1] = head + 1;
|
||||
buff[0] = static_cast<u16>(head + 0);
|
||||
buff[1] = static_cast<u16>(head + 1);
|
||||
m_vertex.head = head + 2;
|
||||
m_vertex.next = head + 2;
|
||||
m_index.tail += 2;
|
||||
|
|
|
@ -158,7 +158,7 @@ protected:
|
|||
|
||||
struct
|
||||
{
|
||||
u32* buff;
|
||||
u16* buff;
|
||||
u32 tail;
|
||||
} m_index = {};
|
||||
|
||||
|
|
|
@ -28,6 +28,16 @@ class alignas(16) GSVector4i
|
|||
{
|
||||
}
|
||||
|
||||
constexpr GSVector4i(cxpr_init_tag, short s0, short s1, short s2, short s3, short s4, short s5, short s6, short s7)
|
||||
: I16{s0, s1, s2, s3, s4, s5, s6, s7}
|
||||
{
|
||||
}
|
||||
|
||||
constexpr GSVector4i(cxpr_init_tag, char b0, char b1, char b2, char b3, char b4, char b5, char b6, char b7, char b8, char b9, char b10, char b11, char b12, char b13, char b14, char b15)
|
||||
: I8{b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15}
|
||||
{
|
||||
}
|
||||
|
||||
public:
|
||||
union
|
||||
{
|
||||
|
@ -62,6 +72,16 @@ public:
|
|||
return GSVector4i(cxpr_init, x, x, x, x);
|
||||
}
|
||||
|
||||
constexpr static GSVector4i cxpr16(short s0, short s1, short s2, short s3, short s4, short s5, short s6, short s7)
|
||||
{
|
||||
return GSVector4i(cxpr_init, s0, s1, s2, s3, s4, s5, s6, s7);
|
||||
}
|
||||
|
||||
constexpr static GSVector4i cxpr8(char b0, char b1, char b2, char b3, char b4, char b5, char b6, char b7, char b8, char b9, char b10, char b11, char b12, char b13, char b14, char b15)
|
||||
{
|
||||
return GSVector4i(cxpr_init, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15);
|
||||
}
|
||||
|
||||
__forceinline GSVector4i(int x, int y, int z, int w)
|
||||
{
|
||||
m = _mm_set_epi32(w, z, y, x);
|
||||
|
@ -2001,6 +2021,10 @@ public:
|
|||
return v;
|
||||
}
|
||||
|
||||
__forceinline static GSVector4i broadcast16(u16 value)
|
||||
{
|
||||
return GSVector4i(_mm_set1_epi16(value));
|
||||
}
|
||||
|
||||
__forceinline static GSVector4i zero() { return GSVector4i(_mm_setzero_si128()); }
|
||||
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#include "GS/GSGL.h"
|
||||
#include "GS/GS.h"
|
||||
#include "Host.h"
|
||||
#include "common/Align.h"
|
||||
#include "common/StringUtil.h"
|
||||
|
||||
#include "imgui.h"
|
||||
|
@ -165,9 +166,9 @@ std::string GSDevice::GetFullscreenModeString(u32 width, u32 height, float refre
|
|||
|
||||
void GSDevice::GenerateExpansionIndexBuffer(void* buffer)
|
||||
{
|
||||
static constexpr u32 MAX_INDEX = std::numeric_limits<u16>::max();
|
||||
static constexpr u32 MAX_INDEX = EXPAND_BUFFER_SIZE / 6 / sizeof(u16);
|
||||
|
||||
u32* idx_buffer = static_cast<u32*>(buffer);
|
||||
u16* idx_buffer = static_cast<u16*>(buffer);
|
||||
for (u32 i = 0; i < MAX_INDEX; i++)
|
||||
{
|
||||
const u32 base = i * 4;
|
||||
|
|
|
@ -627,7 +627,7 @@ struct alignas(16) GSHWDrawConfig
|
|||
GSTexture* tex; ///< Source texture
|
||||
GSTexture* pal; ///< Palette texture
|
||||
const GSVertex* verts;///< Vertices to draw
|
||||
const u32* indices; ///< Indices to draw
|
||||
const u16* indices; ///< Indices to draw
|
||||
u32 nverts; ///< Number of vertices
|
||||
u32 nindices; ///< Number of indices
|
||||
u32 indices_per_prim; ///< Number of indices that make up one primitive
|
||||
|
@ -749,7 +749,7 @@ protected:
|
|||
static constexpr float MAD_SENSITIVITY = 0.08f;
|
||||
static constexpr u32 MAX_POOLED_TEXTURES = 300;
|
||||
static constexpr u32 NUM_CAS_CONSTANTS = 12; // 8 plus src offset x/y, 16 byte alignment
|
||||
static constexpr u32 EXPAND_BUFFER_SIZE = sizeof(u32) * std::numeric_limits<u16>::max() * 6;
|
||||
static constexpr u32 EXPAND_BUFFER_SIZE = sizeof(u16) * 65532 * 6;
|
||||
|
||||
WindowInfo m_window_info;
|
||||
VsyncMode m_vsync_mode = VsyncMode::Off;
|
||||
|
|
|
@ -24,7 +24,7 @@ GSVertexTrace::GSVertexTrace(const GSState* state, bool provoking_vertex_first)
|
|||
MULTI_ISA_SELECT(GSVertexTracePopulateFunctions)(*this, provoking_vertex_first);
|
||||
}
|
||||
|
||||
void GSVertexTrace::Update(const void* vertex, const u32* index, int v_count, int i_count, GS_PRIM_CLASS primclass)
|
||||
void GSVertexTrace::Update(const void* vertex, const u16* index, int v_count, int i_count, GS_PRIM_CLASS primclass)
|
||||
{
|
||||
if (i_count == 0)
|
||||
return;
|
||||
|
@ -43,7 +43,7 @@ void GSVertexTrace::Update(const void* vertex, const u32* index, int v_count, in
|
|||
// that feel big enough.
|
||||
if (!fst && !m_accurate_stq && m_min.t.z > 1e30)
|
||||
{
|
||||
fprintf(stderr, "Vertex Trace: float overflow detected ! min %e max %e\n", m_min.t.z, m_max.t.z);
|
||||
Console.Warning("Vertex Trace: float overflow detected ! min %e max %e", m_min.t.z, m_max.t.z);
|
||||
m_accurate_stq = true;
|
||||
}
|
||||
|
||||
|
|
|
@ -49,7 +49,7 @@ public:
|
|||
protected:
|
||||
const GSState* m_state;
|
||||
|
||||
typedef void (*FindMinMaxPtr)(GSVertexTrace& vt, const void* vertex, const u32* index, int count);
|
||||
typedef void (*FindMinMaxPtr)(GSVertexTrace& vt, const void* vertex, const u16* index, int count);
|
||||
|
||||
FindMinMaxPtr m_fmm[2][2][2][2][4];
|
||||
|
||||
|
@ -77,7 +77,7 @@ public:
|
|||
public:
|
||||
GSVertexTrace(const GSState* state, bool provoking_vertex_first);
|
||||
|
||||
void Update(const void* vertex, const u32* index, int v_count, int i_count, GS_PRIM_CLASS primclass);
|
||||
void Update(const void* vertex, const u16* index, int v_count, int i_count, GS_PRIM_CLASS primclass);
|
||||
|
||||
bool IsLinear() const { return m_filter.opt_linear; }
|
||||
bool IsRealLinear() const { return m_filter.linear; }
|
||||
|
|
|
@ -22,7 +22,7 @@ class CURRENT_ISA::GSVertexTraceFMM
|
|||
static constexpr GSVector4 s_minmax = GSVector4::cxpr(FLT_MAX, -FLT_MAX, 0.f, 0.f);
|
||||
|
||||
template <GS_PRIM_CLASS primclass, u32 iip, u32 tme, u32 fst, u32 color, bool flat_swapped>
|
||||
static void FindMinMax(GSVertexTrace& vt, const void* vertex, const u32* index, int count);
|
||||
static void FindMinMax(GSVertexTrace& vt, const void* vertex, const u16* index, int count);
|
||||
|
||||
template <GS_PRIM_CLASS primclass, u32 iip, u32 tme, u32 fst, u32 color>
|
||||
static constexpr GSVertexTrace::FindMinMaxPtr GetFMM(bool provoking_vertex_first);
|
||||
|
@ -76,7 +76,7 @@ void GSVertexTraceFMM::Populate(GSVertexTrace& vt, bool provoking_vertex_first)
|
|||
}
|
||||
|
||||
template <GS_PRIM_CLASS primclass, u32 iip, u32 tme, u32 fst, u32 color, bool flat_swapped>
|
||||
void GSVertexTraceFMM::FindMinMax(GSVertexTrace& vt, const void* vertex, const u32* index, int count)
|
||||
void GSVertexTraceFMM::FindMinMax(GSVertexTrace& vt, const void* vertex, const u16* index, int count)
|
||||
{
|
||||
const GSDrawingContext* context = vt.m_state->m_context;
|
||||
|
||||
|
|
|
@ -1440,7 +1440,7 @@ void GSDevice11::DoMultiStretchRects(const MultiStretchRect* rects, u32 num_rect
|
|||
const u32 vertex_reserve_size = num_rects * 4;
|
||||
const u32 index_reserve_size = num_rects * 6;
|
||||
GSVertexPT1* verts = static_cast<GSVertexPT1*>(IAMapVertexBuffer(sizeof(GSVertexPT1), vertex_reserve_size));
|
||||
u32* idx = IAMapIndexBuffer(index_reserve_size);
|
||||
u16* idx = IAMapIndexBuffer(index_reserve_size);
|
||||
u32 icount = 0;
|
||||
u32 vcount = 0;
|
||||
for (u32 i = 0; i < num_rects; i++)
|
||||
|
@ -1712,7 +1712,6 @@ void GSDevice11::RenderImGui()
|
|||
const UINT vb_stride = sizeof(ImDrawVert);
|
||||
const UINT vb_offset = 0;
|
||||
m_ctx->IASetVertexBuffers(0, 1, m_vb.addressof(), &vb_stride, &vb_offset);
|
||||
m_ctx->IASetIndexBuffer(m_ib.get(), DXGI_FORMAT_R16_UINT, 0);
|
||||
IASetInputLayout(m_imgui.il.get());
|
||||
IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
|
||||
VSSetShader(m_imgui.vs.get(), m_imgui.vs_cb.get());
|
||||
|
@ -1756,16 +1755,8 @@ void GSDevice11::RenderImGui()
|
|||
m_ctx->Unmap(m_vb.get(), 0);
|
||||
}
|
||||
|
||||
// Bit awkward, because this is using 16-bit indices, not 32-bit.
|
||||
static_assert(sizeof(ImDrawIdx) == sizeof(u16));
|
||||
const u32 index_count = static_cast<u32>(cmd_list->IdxBuffer.Size + 1) / 2;
|
||||
u32* index_map = IAMapIndexBuffer(index_count);
|
||||
if (!index_map)
|
||||
continue;
|
||||
|
||||
const u32 index_start = m_index.start * 2;
|
||||
std::memcpy(index_map, cmd_list->IdxBuffer.Data, cmd_list->IdxBuffer.Size * sizeof(ImDrawIdx));
|
||||
IAUnmapIndexBuffer(index_count);
|
||||
IASetIndexBuffer(cmd_list->IdxBuffer.Data, cmd_list->IdxBuffer.Size);
|
||||
|
||||
for (int cmd_i = 0; cmd_i < cmd_list->CmdBuffer.Size; cmd_i++)
|
||||
{
|
||||
|
@ -1787,14 +1778,13 @@ void GSDevice11::RenderImGui()
|
|||
m_state.ps_sr_views[0] = static_cast<ID3D11ShaderResourceView*>(pcmd->GetTexID());
|
||||
PSUpdateShaderState();
|
||||
|
||||
m_ctx->DrawIndexed(pcmd->ElemCount, index_start + pcmd->IdxOffset, vertex_offset + pcmd->VtxOffset);
|
||||
m_ctx->DrawIndexed(pcmd->ElemCount, m_index.start + pcmd->IdxOffset, vertex_offset + pcmd->VtxOffset);
|
||||
}
|
||||
|
||||
g_perfmon.Put(GSPerfMon::DrawCalls, cmd_list->CmdBuffer.Size);
|
||||
}
|
||||
|
||||
m_ctx->IASetVertexBuffers(0, 1, m_vb.addressof(), &m_state.vb_stride, &vb_offset);
|
||||
m_ctx->IASetIndexBuffer(m_state.index_buffer, DXGI_FORMAT_R32_UINT, 0);
|
||||
}
|
||||
|
||||
void GSDevice11::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* vertices, bool datm)
|
||||
|
@ -1912,9 +1902,9 @@ bool GSDevice11::IASetExpandVertexBuffer(const void* vertex, u32 stride, u32 cou
|
|||
return true;
|
||||
}
|
||||
|
||||
u32* GSDevice11::IAMapIndexBuffer(u32 count)
|
||||
u16* GSDevice11::IAMapIndexBuffer(u32 count)
|
||||
{
|
||||
if (count > (INDEX_BUFFER_SIZE / sizeof(u32)))
|
||||
if (count > (INDEX_BUFFER_SIZE / sizeof(u16)))
|
||||
return nullptr;
|
||||
|
||||
D3D11_MAP type = D3D11_MAP_WRITE_NO_OVERWRITE;
|
||||
|
@ -1922,7 +1912,7 @@ u32* GSDevice11::IAMapIndexBuffer(u32 count)
|
|||
m_index.start = m_ib_pos;
|
||||
m_ib_pos += count;
|
||||
|
||||
if (m_ib_pos > (INDEX_BUFFER_SIZE / sizeof(u32)))
|
||||
if (m_ib_pos > (INDEX_BUFFER_SIZE / sizeof(u16)))
|
||||
{
|
||||
m_index.start = 0;
|
||||
m_ib_pos = count;
|
||||
|
@ -1933,7 +1923,7 @@ u32* GSDevice11::IAMapIndexBuffer(u32 count)
|
|||
if (FAILED(m_ctx->Map(m_ib.get(), 0, type, 0, &m)))
|
||||
return nullptr;
|
||||
|
||||
return static_cast<u32*>(m.pData) + m_index.start;
|
||||
return static_cast<u16*>(m.pData) + m_index.start;
|
||||
}
|
||||
|
||||
void GSDevice11::IAUnmapIndexBuffer(u32 count)
|
||||
|
@ -1944,11 +1934,11 @@ void GSDevice11::IAUnmapIndexBuffer(u32 count)
|
|||
|
||||
bool GSDevice11::IASetIndexBuffer(const void* index, u32 count)
|
||||
{
|
||||
u32* map = IAMapIndexBuffer(count);
|
||||
u16* map = IAMapIndexBuffer(count);
|
||||
if (!map)
|
||||
return false;
|
||||
|
||||
std::memcpy(map, index, count * sizeof(u32));
|
||||
std::memcpy(map, index, count * sizeof(u16));
|
||||
IAUnmapIndexBuffer(count);
|
||||
IASetIndexBuffer(m_ib.get());
|
||||
return true;
|
||||
|
@ -1958,7 +1948,7 @@ void GSDevice11::IASetIndexBuffer(ID3D11Buffer* buffer)
|
|||
{
|
||||
if (m_state.index_buffer != buffer)
|
||||
{
|
||||
m_ctx->IASetIndexBuffer(buffer, DXGI_FORMAT_R32_UINT, 0);
|
||||
m_ctx->IASetIndexBuffer(buffer, DXGI_FORMAT_R16_UINT, 0);
|
||||
m_state.index_buffer = buffer;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -343,7 +343,7 @@ public:
|
|||
bool IASetVertexBuffer(const void* vertex, u32 stride, u32 count);
|
||||
bool IASetExpandVertexBuffer(const void* vertex, u32 stride, u32 count);
|
||||
|
||||
u32* IAMapIndexBuffer(u32 count);
|
||||
u16* IAMapIndexBuffer(u32 count);
|
||||
void IAUnmapIndexBuffer(u32 count);
|
||||
bool IASetIndexBuffer(const void* index, u32 count);
|
||||
void IASetIndexBuffer(ID3D11Buffer* buffer);
|
||||
|
|
|
@ -949,13 +949,13 @@ void GSDevice12::DoMultiStretchRects(
|
|||
{
|
||||
// Set up vertices first.
|
||||
const u32 vertex_reserve_size = num_rects * 4 * sizeof(GSVertexPT1);
|
||||
const u32 index_reserve_size = num_rects * 6 * sizeof(u32);
|
||||
const u32 index_reserve_size = num_rects * 6 * sizeof(u16);
|
||||
if (!m_vertex_stream_buffer.ReserveMemory(vertex_reserve_size, sizeof(GSVertexPT1)) ||
|
||||
!m_index_stream_buffer.ReserveMemory(index_reserve_size, sizeof(u32)))
|
||||
!m_index_stream_buffer.ReserveMemory(index_reserve_size, sizeof(u16)))
|
||||
{
|
||||
ExecuteCommandListAndRestartRenderPass(false, "Uploading bytes to vertex buffer");
|
||||
if (!m_vertex_stream_buffer.ReserveMemory(vertex_reserve_size, sizeof(GSVertexPT1)) ||
|
||||
!m_index_stream_buffer.ReserveMemory(index_reserve_size, sizeof(u32)))
|
||||
!m_index_stream_buffer.ReserveMemory(index_reserve_size, sizeof(u16)))
|
||||
{
|
||||
pxFailRel("Failed to reserve space for vertices");
|
||||
}
|
||||
|
@ -965,7 +965,7 @@ void GSDevice12::DoMultiStretchRects(
|
|||
// Don't use primitive restart here, it ends up slower on some drivers.
|
||||
const GSVector2 ds(static_cast<float>(dTex->GetWidth()), static_cast<float>(dTex->GetHeight()));
|
||||
GSVertexPT1* verts = reinterpret_cast<GSVertexPT1*>(m_vertex_stream_buffer.GetCurrentHostPointer());
|
||||
u32* idx = reinterpret_cast<u32*>(m_index_stream_buffer.GetCurrentHostPointer());
|
||||
u16* idx = reinterpret_cast<u16*>(m_index_stream_buffer.GetCurrentHostPointer());
|
||||
u32 icount = 0;
|
||||
u32 vcount = 0;
|
||||
for (u32 i = 0; i < num_rects; i++)
|
||||
|
@ -996,12 +996,12 @@ void GSDevice12::DoMultiStretchRects(
|
|||
|
||||
m_vertex.start = m_vertex_stream_buffer.GetCurrentOffset() / sizeof(GSVertexPT1);
|
||||
m_vertex.count = vcount;
|
||||
m_index.start = m_index_stream_buffer.GetCurrentOffset() / sizeof(u32);
|
||||
m_index.start = m_index_stream_buffer.GetCurrentOffset() / sizeof(u16);
|
||||
m_index.count = icount;
|
||||
m_vertex_stream_buffer.CommitMemory(vcount * sizeof(GSVertexPT1));
|
||||
m_index_stream_buffer.CommitMemory(icount * sizeof(u32));
|
||||
m_index_stream_buffer.CommitMemory(icount * sizeof(u16));
|
||||
SetVertexBuffer(m_vertex_stream_buffer.GetGPUPointer(), m_vertex_stream_buffer.GetSize(), sizeof(GSVertexPT1));
|
||||
SetIndexBuffer(m_index_stream_buffer.GetGPUPointer(), m_index_stream_buffer.GetSize(), DXGI_FORMAT_R32_UINT);
|
||||
SetIndexBuffer(m_index_stream_buffer.GetGPUPointer(), m_index_stream_buffer.GetSize(), DXGI_FORMAT_R16_UINT);
|
||||
|
||||
// Even though we're batching, a cmdbuffer submit could've messed this up.
|
||||
const GSVector4i rc(dTex->GetRect());
|
||||
|
@ -1422,22 +1422,10 @@ void GSDevice12::RenderImGui()
|
|||
m_vertex_stream_buffer.CommitMemory(size);
|
||||
}
|
||||
|
||||
u32 index_offset;
|
||||
{
|
||||
const u32 size = sizeof(ImDrawIdx) * static_cast<u32>(cmd_list->IdxBuffer.Size);
|
||||
if (!m_index_stream_buffer.ReserveMemory(size, sizeof(ImDrawIdx)))
|
||||
{
|
||||
Console.Warning("Skipping ImGui draw because of no vertex buffer space");
|
||||
return;
|
||||
}
|
||||
|
||||
index_offset = m_index_stream_buffer.GetCurrentOffset() / sizeof(ImDrawIdx);
|
||||
std::memcpy(m_index_stream_buffer.GetCurrentHostPointer(), cmd_list->IdxBuffer.Data, size);
|
||||
m_index_stream_buffer.CommitMemory(size);
|
||||
}
|
||||
|
||||
SetVertexBuffer(m_vertex_stream_buffer.GetGPUPointer(), m_vertex_stream_buffer.GetSize(), sizeof(ImDrawVert));
|
||||
SetIndexBuffer(m_index_stream_buffer.GetGPUPointer(), m_index_stream_buffer.GetSize(), DXGI_FORMAT_R16_UINT);
|
||||
|
||||
static_assert(sizeof(ImDrawIdx) == sizeof(u16));
|
||||
IASetIndexBuffer(cmd_list->IdxBuffer.Data, cmd_list->IdxBuffer.Size);
|
||||
|
||||
for (int cmd_i = 0; cmd_i < cmd_list->CmdBuffer.Size; cmd_i++)
|
||||
{
|
||||
|
@ -1468,7 +1456,7 @@ void GSDevice12::RenderImGui()
|
|||
if (ApplyUtilityState())
|
||||
{
|
||||
g_d3d12_context->GetCommandList()->DrawIndexedInstanced(
|
||||
pcmd->ElemCount, 1, index_offset + pcmd->IdxOffset, vertex_offset + pcmd->VtxOffset, 0);
|
||||
pcmd->ElemCount, 1, m_index.start + pcmd->IdxOffset, vertex_offset + pcmd->VtxOffset, 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1536,17 +1524,17 @@ void GSDevice12::IASetVertexBuffer(const void* vertex, size_t stride, size_t cou
|
|||
|
||||
void GSDevice12::IASetIndexBuffer(const void* index, size_t count)
|
||||
{
|
||||
const u32 size = sizeof(u32) * static_cast<u32>(count);
|
||||
if (!m_index_stream_buffer.ReserveMemory(size, sizeof(u32)))
|
||||
const u32 size = sizeof(u16) * static_cast<u32>(count);
|
||||
if (!m_index_stream_buffer.ReserveMemory(size, sizeof(u16)))
|
||||
{
|
||||
ExecuteCommandListAndRestartRenderPass(false, "Uploading bytes to index buffer");
|
||||
if (!m_index_stream_buffer.ReserveMemory(size, sizeof(u32)))
|
||||
if (!m_index_stream_buffer.ReserveMemory(size, sizeof(u16)))
|
||||
pxFailRel("Failed to reserve space for vertices");
|
||||
}
|
||||
|
||||
m_index.start = m_index_stream_buffer.GetCurrentOffset() / sizeof(u32);
|
||||
m_index.start = m_index_stream_buffer.GetCurrentOffset() / sizeof(u16);
|
||||
m_index.count = count;
|
||||
SetIndexBuffer(m_index_stream_buffer.GetGPUPointer(), m_index_stream_buffer.GetSize(), DXGI_FORMAT_R32_UINT);
|
||||
SetIndexBuffer(m_index_stream_buffer.GetGPUPointer(), m_index_stream_buffer.GetSize(), DXGI_FORMAT_R16_UINT);
|
||||
|
||||
std::memcpy(m_index_stream_buffer.GetCurrentHostPointer(), index, size);
|
||||
m_index_stream_buffer.CommitMemory(size);
|
||||
|
@ -3353,7 +3341,7 @@ void GSDevice12::UploadHWDrawVerticesAndIndices(const GSHWDrawConfig& config)
|
|||
{
|
||||
m_index.start = 0;
|
||||
m_index.count = config.nindices;
|
||||
SetIndexBuffer(m_expand_index_buffer->GetGPUVirtualAddress(), EXPAND_BUFFER_SIZE, DXGI_FORMAT_R32_UINT);
|
||||
SetIndexBuffer(m_expand_index_buffer->GetGPUVirtualAddress(), EXPAND_BUFFER_SIZE, DXGI_FORMAT_R16_UINT);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
|
@ -258,14 +258,10 @@ void GSRendererHW::Lines2Sprites()
|
|||
int i = static_cast<int>(count) * 2 - 4;
|
||||
GSVertex* s = &m_vertex.buff[count - 2];
|
||||
GSVertex* q = &m_vertex.buff[count * 2 - 4];
|
||||
u32* RESTRICT index = &m_index.buff[count * 3 - 6];
|
||||
u16* RESTRICT index = &m_index.buff[count * 3 - 6];
|
||||
|
||||
alignas(16) static constexpr std::array<int, 8> tri_normal_indices = {{0, 1, 2, 1, 2, 3}};
|
||||
alignas(16) static constexpr std::array<int, 8> tri_swapped_indices = {{0, 1, 2, 1, 2, 3}};
|
||||
const bool index_swap = !g_gs_device->Features().provoking_vertex_last;
|
||||
const int* tri_indices = index_swap ? tri_swapped_indices.data() : tri_normal_indices.data();
|
||||
const GSVector4i indices_low(GSVector4i::load<true>(tri_indices));
|
||||
const GSVector4i indices_high(GSVector4i::loadl(tri_indices + 4));
|
||||
// Sprites are flat shaded, so the provoking vertex doesn't matter here.
|
||||
constexpr GSVector4i indices = GSVector4i::cxpr16(0, 1, 2, 1, 2, 3, 0, 0);
|
||||
|
||||
for (; i >= 0; i -= 4, s -= 2, q -= 4, index -= 6)
|
||||
{
|
||||
|
@ -310,9 +306,10 @@ void GSRendererHW::Lines2Sprites()
|
|||
q[1] = v0;
|
||||
q[2] = v1;
|
||||
|
||||
const GSVector4i i_splat(i);
|
||||
GSVector4i::store<false>(index, i_splat + indices_low);
|
||||
GSVector4i::storel(index + 4, i_splat + indices_high);
|
||||
const GSVector4i this_indices = GSVector4i::broadcast16(i).add16(indices);
|
||||
const int high = this_indices.extract32<2>();
|
||||
GSVector4i::storel(index, this_indices);
|
||||
std::memcpy(&index[4], &high, sizeof(high));
|
||||
}
|
||||
|
||||
m_vertex.head = m_vertex.tail = m_vertex.next = count * 2;
|
||||
|
@ -322,26 +319,30 @@ void GSRendererHW::Lines2Sprites()
|
|||
|
||||
void GSRendererHW::ExpandLineIndices()
|
||||
{
|
||||
const u32 process_count = (m_index.tail + 3) / 4 * 4;
|
||||
const u32 process_count = (m_index.tail + 7) / 8 * 8;
|
||||
const u32 expansion_factor = 3;
|
||||
m_index.tail *= expansion_factor;
|
||||
GSVector4i* end = reinterpret_cast<GSVector4i*>(m_index.buff);
|
||||
GSVector4i* read = reinterpret_cast<GSVector4i*>(m_index.buff + process_count);
|
||||
GSVector4i* write = reinterpret_cast<GSVector4i*>(m_index.buff + process_count * expansion_factor);
|
||||
|
||||
constexpr GSVector4i low0 = GSVector4i::cxpr(0, 1, 2, 1);
|
||||
constexpr GSVector4i low1 = GSVector4i::cxpr(2, 3, 0, 1);
|
||||
constexpr GSVector4i low2 = GSVector4i::cxpr(2, 1, 2, 3);
|
||||
constexpr GSVector4i mask0 = GSVector4i::cxpr8(0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 2, 3, 4, 5, 4, 5);
|
||||
constexpr GSVector4i mask1 = GSVector4i::cxpr8(6, 7, 4, 5, 6, 7, 6, 7, 8, 9, 8, 9, 10, 11, 8, 9);
|
||||
constexpr GSVector4i mask2 = GSVector4i::cxpr8(10, 11, 10, 11, 12, 13, 12, 13, 14, 15, 12, 13, 14, 15, 14, 15);
|
||||
|
||||
constexpr GSVector4i low0 = GSVector4i::cxpr16(0, 1, 2, 1, 2, 3, 0, 1);
|
||||
constexpr GSVector4i low1 = GSVector4i::cxpr16(2, 1, 2, 3, 0, 1, 2, 1);
|
||||
constexpr GSVector4i low2 = GSVector4i::cxpr16(2, 3, 0, 1, 2, 1, 2, 3);
|
||||
|
||||
while (read > end)
|
||||
{
|
||||
read -= 1;
|
||||
write -= expansion_factor;
|
||||
|
||||
const GSVector4i in = read->sll32(2);
|
||||
write[0] = in.xxyx() | low0;
|
||||
write[1] = in.yyzz() | low1;
|
||||
write[2] = in.wzww() | low2;
|
||||
const GSVector4i in = read->sll16(2);
|
||||
write[0] = in.shuffle8(mask0) | low0;
|
||||
write[1] = in.shuffle8(mask1) | low1;
|
||||
write[2] = in.shuffle8(mask2) | low2;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -2576,7 +2576,7 @@ bool GSTextureCache::ShuffleMove(u32 BP, u32 BW, u32 PSM, int sx, int sy, int dx
|
|||
|
||||
#undef V
|
||||
|
||||
static constexpr u32 indices[6] = { 0, 1, 2, 2, 1, 3 };
|
||||
static constexpr u16 indices[6] = { 0, 1, 2, 2, 1, 3 };
|
||||
|
||||
// If we ever do this sort of thing somewhere else, extract this to a helper function.
|
||||
GSHWDrawConfig config;
|
||||
|
|
|
@ -2178,7 +2178,7 @@ void GSDeviceMTL::SendHWDraw(GSHWDrawConfig& config, id<MTLRenderCommandEncoder>
|
|||
textureBarrier(enc);
|
||||
[enc drawIndexedPrimitives:topology
|
||||
indexCount:count
|
||||
indexType:MTLIndexTypeUInt32
|
||||
indexType:MTLIndexTypeUInt16
|
||||
indexBuffer:buffer
|
||||
indexBufferOffset:off + p * sizeof(*config.indices)];
|
||||
p += count;
|
||||
|
@ -2200,7 +2200,7 @@ void GSDeviceMTL::SendHWDraw(GSHWDrawConfig& config, id<MTLRenderCommandEncoder>
|
|||
textureBarrier(enc);
|
||||
[enc drawIndexedPrimitives:topology
|
||||
indexCount:config.indices_per_prim
|
||||
indexType:MTLIndexTypeUInt32
|
||||
indexType:MTLIndexTypeUInt16
|
||||
indexBuffer:buffer
|
||||
indexBufferOffset:off + p * sizeof(*config.indices)];
|
||||
}
|
||||
|
@ -2217,7 +2217,7 @@ void GSDeviceMTL::SendHWDraw(GSHWDrawConfig& config, id<MTLRenderCommandEncoder>
|
|||
|
||||
[enc drawIndexedPrimitives:topology
|
||||
indexCount:config.nindices
|
||||
indexType:MTLIndexTypeUInt32
|
||||
indexType:MTLIndexTypeUInt16
|
||||
indexBuffer:buffer
|
||||
indexBufferOffset:off];
|
||||
|
||||
|
|
|
@ -876,8 +876,8 @@ void GSDeviceOGL::DrawPrimitive()
|
|||
void GSDeviceOGL::DrawIndexedPrimitive()
|
||||
{
|
||||
g_perfmon.Put(GSPerfMon::DrawCalls, 1);
|
||||
glDrawElementsBaseVertex(m_draw_topology, static_cast<u32>(m_index.count), GL_UNSIGNED_INT,
|
||||
reinterpret_cast<void*>(static_cast<u32>(m_index.start) * sizeof(u32)), static_cast<GLint>(m_vertex.start));
|
||||
glDrawElementsBaseVertex(m_draw_topology, static_cast<u32>(m_index.count), GL_UNSIGNED_SHORT,
|
||||
reinterpret_cast<void*>(static_cast<u32>(m_index.start) * sizeof(u16)), static_cast<GLint>(m_vertex.start));
|
||||
}
|
||||
|
||||
void GSDeviceOGL::DrawIndexedPrimitive(int offset, int count)
|
||||
|
@ -885,8 +885,8 @@ void GSDeviceOGL::DrawIndexedPrimitive(int offset, int count)
|
|||
//ASSERT(offset + count <= (int)m_index.count);
|
||||
|
||||
g_perfmon.Put(GSPerfMon::DrawCalls, 1);
|
||||
glDrawElementsBaseVertex(m_draw_topology, count, GL_UNSIGNED_INT,
|
||||
reinterpret_cast<void*>((static_cast<u32>(m_index.start) + static_cast<u32>(offset)) * sizeof(u32)),
|
||||
glDrawElementsBaseVertex(m_draw_topology, count, GL_UNSIGNED_SHORT,
|
||||
reinterpret_cast<void*>((static_cast<u32>(m_index.start) + static_cast<u32>(offset)) * sizeof(u16)),
|
||||
static_cast<GLint>(m_vertex.start));
|
||||
}
|
||||
|
||||
|
@ -1548,15 +1548,15 @@ void GSDeviceOGL::DrawMultiStretchRects(
|
|||
void GSDeviceOGL::DoMultiStretchRects(const MultiStretchRect* rects, u32 num_rects, const GSVector2& ds)
|
||||
{
|
||||
const u32 vertex_reserve_size = num_rects * 4 * sizeof(GSVertexPT1);
|
||||
const u32 index_reserve_size = num_rects * 6 * sizeof(u32);
|
||||
const u32 index_reserve_size = num_rects * 6 * sizeof(u16);
|
||||
auto vertex_map = m_vertex_stream_buffer->Map(sizeof(GSVertexPT1), vertex_reserve_size);
|
||||
auto index_map = m_index_stream_buffer->Map(sizeof(u32), index_reserve_size);
|
||||
auto index_map = m_index_stream_buffer->Map(sizeof(u16), index_reserve_size);
|
||||
m_vertex.start = vertex_map.index_aligned;
|
||||
m_index.start = index_map.index_aligned;
|
||||
|
||||
// Don't use primitive restart here, it ends up slower on some drivers.
|
||||
GSVertexPT1* verts = reinterpret_cast<GSVertexPT1*>(vertex_map.pointer);
|
||||
u32* idx = reinterpret_cast<u32*>(index_map.pointer);
|
||||
u16* idx = reinterpret_cast<u16*>(index_map.pointer);
|
||||
u32 icount = 0;
|
||||
u32 vcount = 0;
|
||||
for (u32 i = 0; i < num_rects; i++)
|
||||
|
@ -1587,7 +1587,7 @@ void GSDeviceOGL::DoMultiStretchRects(const MultiStretchRect* rects, u32 num_rec
|
|||
m_vertex.count = vcount;
|
||||
m_index.count = icount;
|
||||
m_vertex_stream_buffer->Unmap(vcount * sizeof(GSVertexPT1));
|
||||
m_index_stream_buffer->Unmap(icount * sizeof(u32));
|
||||
m_index_stream_buffer->Unmap(icount * sizeof(u16));
|
||||
|
||||
PSSetShaderResource(0, rects[0].src);
|
||||
PSSetSamplerState(rects[0].linear ? m_convert.ln : m_convert.pt);
|
||||
|
@ -1807,8 +1807,8 @@ void GSDeviceOGL::IASetVertexBuffer(const void* vertices, size_t count)
|
|||
|
||||
void GSDeviceOGL::IASetIndexBuffer(const void* index, size_t count)
|
||||
{
|
||||
const u32 size = static_cast<u32>(count) * sizeof(u32);
|
||||
auto res = m_index_stream_buffer->Map(sizeof(u32), size);
|
||||
const u32 size = static_cast<u32>(count) * sizeof(u16);
|
||||
auto res = m_index_stream_buffer->Map(sizeof(u16), size);
|
||||
m_index.start = res.index_aligned;
|
||||
m_index.count = count;
|
||||
std::memcpy(res.pointer, index, size);
|
||||
|
@ -1999,18 +1999,7 @@ void GSDeviceOGL::RenderImGui()
|
|||
m_vertex_stream_buffer->Unmap(size);
|
||||
}
|
||||
|
||||
// Bit awkward, because this is using 16-bit indices, not 32-bit.
|
||||
u32 index_start;
|
||||
{
|
||||
static_assert(sizeof(ImDrawIdx) == sizeof(u16));
|
||||
|
||||
const u32 size = static_cast<u32>(cmd_list->IdxBuffer.Size) * sizeof(ImDrawIdx);
|
||||
auto res = m_index_stream_buffer->Map(sizeof(u16), size);
|
||||
index_start = res.index_aligned;
|
||||
std::memcpy(res.pointer, cmd_list->IdxBuffer.Data, size);
|
||||
m_index_stream_buffer->Unmap(size);
|
||||
m_index_stream_buffer->Bind();
|
||||
}
|
||||
IASetIndexBuffer(cmd_list->IdxBuffer.Data, cmd_list->IdxBuffer.Size);
|
||||
|
||||
for (int cmd_i = 0; cmd_i < cmd_list->CmdBuffer.Size; cmd_i++)
|
||||
{
|
||||
|
@ -2038,7 +2027,7 @@ void GSDeviceOGL::RenderImGui()
|
|||
}
|
||||
|
||||
glDrawElementsBaseVertex(GL_TRIANGLES, (GLsizei)pcmd->ElemCount, GL_UNSIGNED_SHORT,
|
||||
(void*)(intptr_t)((pcmd->IdxOffset + index_start) * sizeof(ImDrawIdx)), pcmd->VtxOffset + vertex_start);
|
||||
(void*)(intptr_t)((pcmd->IdxOffset + m_index.start) * sizeof(ImDrawIdx)), pcmd->VtxOffset + vertex_start);
|
||||
}
|
||||
|
||||
g_perfmon.Put(GSPerfMon::DrawCalls, cmd_list->CmdBuffer.Size);
|
||||
|
|
|
@ -161,7 +161,7 @@ typedef GSVector4 VectorF;
|
|||
#define LOCAL_STEP local.d4
|
||||
#endif
|
||||
|
||||
void GSDrawScanline::CSetupPrim(const GSVertexSW* vertex, const u32* index, const GSVertexSW& dscan, GSScanlineLocalData& local)
|
||||
void GSDrawScanline::CSetupPrim(const GSVertexSW* vertex, const u16* index, const GSVertexSW& dscan, GSScanlineLocalData& local)
|
||||
{
|
||||
const GSScanlineGlobalData& global = GlobalFromLocal(local);
|
||||
GSScanlineSelector sel = global.sel;
|
||||
|
|
|
@ -38,7 +38,7 @@ public:
|
|||
~GSDrawScanline() override;
|
||||
|
||||
/// Function pointer types which we call back into.
|
||||
using SetupPrimPtr = void(*)(const GSVertexSW* vertex, const u32* index, const GSVertexSW& dscan, GSScanlineLocalData& local);
|
||||
using SetupPrimPtr = void(*)(const GSVertexSW* vertex, const u16* index, const GSVertexSW& dscan, GSScanlineLocalData& local);
|
||||
using DrawScanlinePtr = void(*)(int pixels, int left, int top, const GSVertexSW& scan, GSScanlineLocalData& local);
|
||||
|
||||
/// Flushes the code cache, forcing everything to be recompiled.
|
||||
|
@ -60,7 +60,7 @@ private:
|
|||
GSCodeGeneratorFunctionMap<GSSetupPrimCodeGenerator, u64, SetupPrimPtr> m_sp_map;
|
||||
GSCodeGeneratorFunctionMap<GSDrawScanlineCodeGenerator, u64, DrawScanlinePtr> m_ds_map;
|
||||
|
||||
static void CSetupPrim(const GSVertexSW* vertex, const u32* index, const GSVertexSW& dscan, GSScanlineLocalData& local);
|
||||
static void CSetupPrim(const GSVertexSW* vertex, const u16* index, const GSVertexSW& dscan, GSScanlineLocalData& local);
|
||||
static void CDrawScanline(int pixels, int left, int top, const GSVertexSW& scan, GSScanlineLocalData& local);
|
||||
static void CDrawEdge(int pixels, int left, int top, const GSVertexSW& scan, GSScanlineLocalData& local);
|
||||
};
|
||||
|
|
|
@ -154,10 +154,10 @@ void GSRasterizer::Draw(GSRasterizerData& data)
|
|||
const GSVertexSW* vertex = data.vertex;
|
||||
const GSVertexSW* vertex_end = data.vertex + data.vertex_count;
|
||||
|
||||
const u32* index = data.index;
|
||||
const u32* index_end = data.index + data.index_count;
|
||||
const u16* index = data.index;
|
||||
const u16* index_end = data.index + data.index_count;
|
||||
|
||||
u32 tmp_index[] = {0, 1, 2};
|
||||
static constexpr u16 tmp_index[] = {0, 1, 2};
|
||||
|
||||
bool scissor_test = !data.bbox.eq(data.bbox.rintersect(data.scissor));
|
||||
|
||||
|
@ -261,7 +261,7 @@ void GSRasterizer::Draw(GSRasterizerData& data)
|
|||
}
|
||||
|
||||
template <bool scissor_test>
|
||||
void GSRasterizer::DrawPoint(const GSVertexSW* vertex, int vertex_count, const u32* index, int index_count)
|
||||
void GSRasterizer::DrawPoint(const GSVertexSW* vertex, int vertex_count, const u16* index, int index_count)
|
||||
{
|
||||
m_primcount++;
|
||||
|
||||
|
@ -286,7 +286,7 @@ void GSRasterizer::DrawPoint(const GSVertexSW* vertex, int vertex_count, const u
|
|||
}
|
||||
else
|
||||
{
|
||||
u32 tmp_index[1] = {0};
|
||||
static constexpr u16 tmp_index[1] = {0};
|
||||
|
||||
for (int i = 0; i < vertex_count; i++, vertex++)
|
||||
{
|
||||
|
@ -307,7 +307,7 @@ void GSRasterizer::DrawPoint(const GSVertexSW* vertex, int vertex_count, const u
|
|||
}
|
||||
}
|
||||
|
||||
void GSRasterizer::DrawLine(const GSVertexSW* vertex, const u32* index)
|
||||
void GSRasterizer::DrawLine(const GSVertexSW* vertex, const u16* index)
|
||||
{
|
||||
m_primcount++;
|
||||
|
||||
|
@ -425,7 +425,7 @@ static const u8 s_ysort[8][4] =
|
|||
|
||||
#if _M_SSE >= 0x501
|
||||
|
||||
void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const u32* index)
|
||||
void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const u16* index)
|
||||
{
|
||||
m_primcount++;
|
||||
|
||||
|
@ -606,7 +606,7 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW2& RESTRIC
|
|||
|
||||
#else
|
||||
|
||||
void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const u32* index)
|
||||
void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const u16* index)
|
||||
{
|
||||
m_primcount++;
|
||||
|
||||
|
@ -784,7 +784,7 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& RESTRICT
|
|||
|
||||
#endif
|
||||
|
||||
void GSRasterizer::DrawSprite(const GSVertexSW* vertex, const u32* index)
|
||||
void GSRasterizer::DrawSprite(const GSVertexSW* vertex, const u16* index)
|
||||
{
|
||||
m_primcount++;
|
||||
|
||||
|
@ -1082,7 +1082,7 @@ void GSRasterizer::AddScanline(GSVertexSW* e, int pixels, int left, int top, con
|
|||
AddScanlineInfo(e, pixels, left, top);
|
||||
}
|
||||
|
||||
void GSRasterizer::Flush(const GSVertexSW* vertex, const u32* index, const GSVertexSW& dscan, bool edge /* = false */)
|
||||
void GSRasterizer::Flush(const GSVertexSW* vertex, const u16* index, const GSVertexSW& dscan, bool edge /* = false */)
|
||||
{
|
||||
// TODO: on win64 this could be the place where xmm6-15 are preserved (not by each DrawScanline)
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@ public:
|
|||
u8* buff;
|
||||
GSVertexSW* vertex;
|
||||
int vertex_count;
|
||||
u32* index;
|
||||
u16* index;
|
||||
int index_count;
|
||||
u64 frame;
|
||||
u64 start;
|
||||
|
@ -101,10 +101,10 @@ protected:
|
|||
__forceinline bool HasEdge() const { return (m_draw_edge != nullptr); }
|
||||
|
||||
template <bool scissor_test>
|
||||
void DrawPoint(const GSVertexSW* vertex, int vertex_count, const u32* index, int index_count);
|
||||
void DrawLine(const GSVertexSW* vertex, const u32* index);
|
||||
void DrawTriangle(const GSVertexSW* vertex, const u32* index);
|
||||
void DrawSprite(const GSVertexSW* vertex, const u32* index);
|
||||
void DrawPoint(const GSVertexSW* vertex, int vertex_count, const u16* index, int index_count);
|
||||
void DrawLine(const GSVertexSW* vertex, const u16* index);
|
||||
void DrawTriangle(const GSVertexSW* vertex, const u16* index);
|
||||
void DrawSprite(const GSVertexSW* vertex, const u16* index);
|
||||
|
||||
#if _M_SSE >= 0x501
|
||||
__forceinline void DrawTriangleSection(int top, int bottom, GSVertexSW2& RESTRICT edge, const GSVertexSW2& RESTRICT dedge, const GSVertexSW2& RESTRICT dscan, const GSVector4& RESTRICT p0);
|
||||
|
@ -115,7 +115,7 @@ protected:
|
|||
void DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GSVertexSW& dv, int orientation, int side);
|
||||
|
||||
__forceinline void AddScanline(GSVertexSW* e, int pixels, int left, int top, const GSVertexSW& scan);
|
||||
__forceinline void Flush(const GSVertexSW* vertex, const u32* index, const GSVertexSW& dscan, bool edge = false);
|
||||
__forceinline void Flush(const GSVertexSW* vertex, const u16* index, const GSVertexSW& dscan, bool edge = false);
|
||||
|
||||
__forceinline void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan);
|
||||
__forceinline void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan);
|
||||
|
|
|
@ -343,7 +343,7 @@ void GSRendererSW::Draw()
|
|||
sd->buff = (u8*)m_vertex_heap.alloc(sizeof(GSVertexSW) * ((m_vertex.next + 1) & ~1) + sizeof(u32) * m_index.tail, 64);
|
||||
sd->vertex = (GSVertexSW*)sd->buff;
|
||||
sd->vertex_count = m_vertex.next;
|
||||
sd->index = (u32*)(sd->buff + sizeof(GSVertexSW) * ((m_vertex.next + 1) & ~1));
|
||||
sd->index = (u16*)(sd->buff + sizeof(GSVertexSW) * ((m_vertex.next + 1) & ~1));
|
||||
sd->index_count = m_index.tail;
|
||||
sd->scanmsk_value = m_draw_env->SCANMSK.MSK;
|
||||
|
||||
|
@ -354,7 +354,7 @@ void GSRendererSW::Draw()
|
|||
|
||||
GSVertexSW::s_cvb[m_vt.m_primclass][PRIM->TME][PRIM->FST][q_div](m_context, sd->vertex, m_vertex.buff, m_vertex.next);
|
||||
|
||||
memcpy(sd->index, m_index.buff, sizeof(u32) * m_index.tail);
|
||||
std::memcpy(sd->index, m_index.buff, sizeof(u16) * m_index.tail);
|
||||
|
||||
GSVector4i scissor = GSVector4i(context->scissor.in);
|
||||
GSVector4i bbox = GSVector4i(m_vt.m_min.p.floor().xyxy(m_vt.m_max.p.ceil()));
|
||||
|
|
|
@ -210,7 +210,7 @@ void GSSetupPrimCodeGenerator2::Depth_XMM()
|
|||
{
|
||||
// GSVector4 p = vertex[index[1]].p;
|
||||
|
||||
mov(eax, ptr[_index + sizeof(u32) * 1]);
|
||||
movzx(eax, word[_index + sizeof(u16) * 1]);
|
||||
shl(eax, 6); // * sizeof(GSVertexSW)
|
||||
add(rax, _64_vertex);
|
||||
|
||||
|
@ -299,7 +299,7 @@ void GSSetupPrimCodeGenerator2::Depth_YMM()
|
|||
{
|
||||
// GSVector4 p = vertex[index[1]].p;
|
||||
|
||||
mov(eax, ptr[_index + sizeof(u32) * 1]);
|
||||
movzx(eax, word[_index + sizeof(u16) * 1]);
|
||||
shl(eax, 6); // * sizeof(GSVertexSW)
|
||||
add(rax, _64_vertex);
|
||||
|
||||
|
@ -504,7 +504,7 @@ void GSSetupPrimCodeGenerator2::Color()
|
|||
|
||||
if (!(m_sel.prim == GS_SPRITE_CLASS && (m_en.z || m_en.f))) // if this is a sprite, the last vertex was already loaded in Depth()
|
||||
{
|
||||
mov(eax, ptr[_index + sizeof(u32) * last]);
|
||||
movzx(eax, word[_index + sizeof(u16) * last]);
|
||||
shl(eax, 6); // * sizeof(GSVertexSW)
|
||||
add(rax, _64_vertex);
|
||||
}
|
||||
|
|
|
@ -955,13 +955,13 @@ void GSDeviceVK::DoMultiStretchRects(
|
|||
{
|
||||
// Set up vertices first.
|
||||
const u32 vertex_reserve_size = num_rects * 4 * sizeof(GSVertexPT1);
|
||||
const u32 index_reserve_size = num_rects * 6 * sizeof(u32);
|
||||
const u32 index_reserve_size = num_rects * 6 * sizeof(u16);
|
||||
if (!m_vertex_stream_buffer.ReserveMemory(vertex_reserve_size, sizeof(GSVertexPT1)) ||
|
||||
!m_index_stream_buffer.ReserveMemory(index_reserve_size, sizeof(u32)))
|
||||
!m_index_stream_buffer.ReserveMemory(index_reserve_size, sizeof(u16)))
|
||||
{
|
||||
ExecuteCommandBufferAndRestartRenderPass(false, "Uploading bytes to vertex buffer");
|
||||
if (!m_vertex_stream_buffer.ReserveMemory(vertex_reserve_size, sizeof(GSVertexPT1)) ||
|
||||
!m_index_stream_buffer.ReserveMemory(index_reserve_size, sizeof(u32)))
|
||||
!m_index_stream_buffer.ReserveMemory(index_reserve_size, sizeof(u16)))
|
||||
{
|
||||
pxFailRel("Failed to reserve space for vertices");
|
||||
}
|
||||
|
@ -971,7 +971,7 @@ void GSDeviceVK::DoMultiStretchRects(
|
|||
// Don't use primitive restart here, it ends up slower on some drivers.
|
||||
const GSVector2 ds(static_cast<float>(dTex->GetWidth()), static_cast<float>(dTex->GetHeight()));
|
||||
GSVertexPT1* verts = reinterpret_cast<GSVertexPT1*>(m_vertex_stream_buffer.GetCurrentHostPointer());
|
||||
u32* idx = reinterpret_cast<u32*>(m_index_stream_buffer.GetCurrentHostPointer());
|
||||
u16* idx = reinterpret_cast<u16*>(m_index_stream_buffer.GetCurrentHostPointer());
|
||||
u32 icount = 0;
|
||||
u32 vcount = 0;
|
||||
for (u32 i = 0; i < num_rects; i++)
|
||||
|
@ -1001,11 +1001,11 @@ void GSDeviceVK::DoMultiStretchRects(
|
|||
|
||||
m_vertex.start = m_vertex_stream_buffer.GetCurrentOffset() / sizeof(GSVertexPT1);
|
||||
m_vertex.count = vcount;
|
||||
m_index.start = m_index_stream_buffer.GetCurrentOffset() / sizeof(u32);
|
||||
m_index.start = m_index_stream_buffer.GetCurrentOffset() / sizeof(u16);
|
||||
m_index.count = icount;
|
||||
m_vertex_stream_buffer.CommitMemory(vcount * sizeof(GSVertexPT1));
|
||||
m_index_stream_buffer.CommitMemory(icount * sizeof(u32));
|
||||
SetIndexBuffer(m_index_stream_buffer.GetBuffer(), 0, VK_INDEX_TYPE_UINT32);
|
||||
m_index_stream_buffer.CommitMemory(icount * sizeof(u16));
|
||||
SetIndexBuffer(m_index_stream_buffer.GetBuffer(), 0, VK_INDEX_TYPE_UINT16);
|
||||
|
||||
// Even though we're batching, a cmdbuffer submit could've messed this up.
|
||||
const GSVector4i rc(dTex->GetRect());
|
||||
|
@ -1368,21 +1368,21 @@ void GSDeviceVK::IASetVertexBuffer(const void* vertex, size_t stride, size_t cou
|
|||
|
||||
void GSDeviceVK::IASetIndexBuffer(const void* index, size_t count)
|
||||
{
|
||||
const u32 size = sizeof(u32) * static_cast<u32>(count);
|
||||
if (!m_index_stream_buffer.ReserveMemory(size, sizeof(u32)))
|
||||
const u32 size = sizeof(u16) * static_cast<u32>(count);
|
||||
if (!m_index_stream_buffer.ReserveMemory(size, sizeof(u16)))
|
||||
{
|
||||
ExecuteCommandBufferAndRestartRenderPass(false, "Uploading bytes to index buffer");
|
||||
if (!m_index_stream_buffer.ReserveMemory(size, sizeof(u32)))
|
||||
if (!m_index_stream_buffer.ReserveMemory(size, sizeof(u16)))
|
||||
pxFailRel("Failed to reserve space for vertices");
|
||||
}
|
||||
|
||||
m_index.start = m_index_stream_buffer.GetCurrentOffset() / sizeof(u32);
|
||||
m_index.start = m_index_stream_buffer.GetCurrentOffset() / sizeof(u16);
|
||||
m_index.count = count;
|
||||
|
||||
std::memcpy(m_index_stream_buffer.GetCurrentHostPointer(), index, size);
|
||||
m_index_stream_buffer.CommitMemory(size);
|
||||
|
||||
SetIndexBuffer(m_index_stream_buffer.GetBuffer(), 0, VK_INDEX_TYPE_UINT32);
|
||||
SetIndexBuffer(m_index_stream_buffer.GetBuffer(), 0, VK_INDEX_TYPE_UINT16);
|
||||
}
|
||||
|
||||
void GSDeviceVK::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i& scissor, FeedbackLoopFlag feedback_loop)
|
||||
|
@ -2314,9 +2314,6 @@ void GSDeviceVK::RenderImGui()
|
|||
m_dirty_flags |= DIRTY_FLAG_UTILITY_TEXTURE;
|
||||
}
|
||||
|
||||
// imgui uses 16-bit indices
|
||||
SetIndexBuffer(m_index_stream_buffer.GetBuffer(), 0, VK_INDEX_TYPE_UINT16);
|
||||
|
||||
// this is for presenting, we don't want to screw with the viewport/scissor set by display
|
||||
m_dirty_flags &= ~(DIRTY_FLAG_VIEWPORT | DIRTY_FLAG_SCISSOR);
|
||||
|
||||
|
@ -2338,19 +2335,8 @@ void GSDeviceVK::RenderImGui()
|
|||
m_vertex_stream_buffer.CommitMemory(size);
|
||||
}
|
||||
|
||||
u32 index_offset;
|
||||
{
|
||||
const u32 size = sizeof(ImDrawIdx) * static_cast<u32>(cmd_list->IdxBuffer.Size);
|
||||
if (!m_index_stream_buffer.ReserveMemory(size, sizeof(ImDrawIdx)))
|
||||
{
|
||||
Console.Warning("Skipping ImGui draw because of no vertex buffer space");
|
||||
return;
|
||||
}
|
||||
|
||||
index_offset = m_index_stream_buffer.GetCurrentOffset() / sizeof(ImDrawIdx);
|
||||
std::memcpy(m_index_stream_buffer.GetCurrentHostPointer(), cmd_list->IdxBuffer.Data, size);
|
||||
m_index_stream_buffer.CommitMemory(size);
|
||||
}
|
||||
static_assert(sizeof(ImDrawIdx) == sizeof(u16));
|
||||
IASetIndexBuffer(cmd_list->IdxBuffer.Data, cmd_list->IdxBuffer.Size);
|
||||
|
||||
for (int cmd_i = 0; cmd_i < cmd_list->CmdBuffer.Size; cmd_i++)
|
||||
{
|
||||
|
@ -2374,7 +2360,7 @@ void GSDeviceVK::RenderImGui()
|
|||
if (ApplyUtilityState())
|
||||
{
|
||||
vkCmdDrawIndexed(g_vulkan_context->GetCurrentCommandBuffer(), pcmd->ElemCount, 1,
|
||||
index_offset + pcmd->IdxOffset, vertex_offset + pcmd->VtxOffset, 0);
|
||||
m_index.start + pcmd->IdxOffset, vertex_offset + pcmd->VtxOffset, 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2779,7 +2765,7 @@ void GSDeviceVK::InitializeState()
|
|||
m_vertex_buffer_offset = 0;
|
||||
m_index_buffer = m_index_stream_buffer.GetBuffer();
|
||||
m_index_buffer_offset = 0;
|
||||
m_index_type = VK_INDEX_TYPE_UINT32;
|
||||
m_index_type = VK_INDEX_TYPE_UINT16;
|
||||
m_current_framebuffer = VK_NULL_HANDLE;
|
||||
m_current_render_pass = VK_NULL_HANDLE;
|
||||
|
||||
|
@ -3848,7 +3834,7 @@ void GSDeviceVK::UploadHWDrawVerticesAndIndices(const GSHWDrawConfig& config)
|
|||
{
|
||||
m_index.start = 0;
|
||||
m_index.count = config.nindices;
|
||||
SetIndexBuffer(m_expand_index_buffer, 0, VK_INDEX_TYPE_UINT32);
|
||||
SetIndexBuffer(m_expand_index_buffer, 0, VK_INDEX_TYPE_UINT16);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue