mirror of https://github.com/PCSX2/pcsx2.git
GS: Use 16-bit indices instead of 32-bit
Save some bandwidth.
This commit is contained in:
parent
72f70d4789
commit
839b482cb5
|
@ -2638,31 +2638,31 @@ void GSState::GrowVertexBuffer()
|
||||||
{
|
{
|
||||||
const u32 maxcount = std::max<u32>(m_vertex.maxcount * 3 / 2, 10000);
|
const u32 maxcount = std::max<u32>(m_vertex.maxcount * 3 / 2, 10000);
|
||||||
|
|
||||||
GSVertex* vertex = (GSVertex*)_aligned_malloc(sizeof(GSVertex) * maxcount, 32);
|
GSVertex* vertex = static_cast<GSVertex*>(_aligned_malloc(sizeof(GSVertex) * maxcount, 32));
|
||||||
// Worst case index list is a list of points with vs expansion, 6 indices per point
|
// Worst case index list is a list of points with vs expansion, 6 indices per point
|
||||||
u32* index = (u32*)_aligned_malloc(sizeof(u32) * maxcount * 6, 32);
|
u16* index = static_cast<u16*>(_aligned_malloc(sizeof(u16) * maxcount * 6, 32));
|
||||||
|
|
||||||
if (vertex == NULL || index == NULL)
|
if (!vertex || !index)
|
||||||
{
|
{
|
||||||
const u32 vert_byte_count = sizeof(GSVertex) * maxcount;
|
const u32 vert_byte_count = sizeof(GSVertex) * maxcount;
|
||||||
const u32 idx_byte_count = sizeof(u32) * maxcount * 3;
|
const u32 idx_byte_count = sizeof(u16) * maxcount * 3;
|
||||||
|
|
||||||
Console.Error("GS: failed to allocate %zu bytes for verticles and %zu for indices.",
|
Console.Error("GS: failed to allocate %zu bytes for vertices and %zu for indices.",
|
||||||
vert_byte_count, idx_byte_count);
|
vert_byte_count, idx_byte_count);
|
||||||
|
|
||||||
throw GSError();
|
throw GSError();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (m_vertex.buff != NULL)
|
if (m_vertex.buff)
|
||||||
{
|
{
|
||||||
memcpy(vertex, m_vertex.buff, sizeof(GSVertex) * m_vertex.tail);
|
std::memcpy(vertex, m_vertex.buff, sizeof(GSVertex) * m_vertex.tail);
|
||||||
|
|
||||||
_aligned_free(m_vertex.buff);
|
_aligned_free(m_vertex.buff);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (m_index.buff != NULL)
|
if (m_index.buff)
|
||||||
{
|
{
|
||||||
memcpy(index, m_index.buff, sizeof(u32) * m_index.tail);
|
std::memcpy(index, m_index.buff, sizeof(u16) * m_index.tail);
|
||||||
|
|
||||||
_aligned_free(m_index.buff);
|
_aligned_free(m_index.buff);
|
||||||
}
|
}
|
||||||
|
@ -3063,21 +3063,24 @@ static constexpr u32 MaxVerticesForPrim(u32 prim)
|
||||||
{
|
{
|
||||||
switch (prim)
|
switch (prim)
|
||||||
{
|
{
|
||||||
|
// Four indices per 1 vertex.
|
||||||
case GS_POINTLIST:
|
case GS_POINTLIST:
|
||||||
case GS_INVALID:
|
case GS_INVALID:
|
||||||
// Needed due to expansion in hardware renderers.
|
|
||||||
|
// Indices are shifted left by 2 to form quads.
|
||||||
|
case GS_LINELIST:
|
||||||
|
case GS_LINESTRIP:
|
||||||
return (std::numeric_limits<u16>::max() / 4) - 4;
|
return (std::numeric_limits<u16>::max() / 4) - 4;
|
||||||
|
|
||||||
|
// Four indices per two vertices.
|
||||||
case GS_SPRITE:
|
case GS_SPRITE:
|
||||||
return (std::numeric_limits<u16>::max() / 2) - 2;
|
return (std::numeric_limits<u16>::max() / 2) - 2;
|
||||||
|
|
||||||
case GS_LINELIST:
|
|
||||||
case GS_LINESTRIP:
|
|
||||||
case GS_TRIANGLELIST:
|
case GS_TRIANGLELIST:
|
||||||
case GS_TRIANGLESTRIP:
|
case GS_TRIANGLESTRIP:
|
||||||
case GS_TRIANGLEFAN:
|
case GS_TRIANGLEFAN:
|
||||||
default:
|
default:
|
||||||
return 0;
|
return (std::numeric_limits<u16>::max() - 3);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3229,19 +3232,19 @@ __forceinline void GSState::VertexKick(u32 skip)
|
||||||
m_backed_up_ctx = m_env.PRIM.CTXT;
|
m_backed_up_ctx = m_env.PRIM.CTXT;
|
||||||
}
|
}
|
||||||
|
|
||||||
u32* RESTRICT buff = &m_index.buff[m_index.tail];
|
u16* RESTRICT buff = &m_index.buff[m_index.tail];
|
||||||
|
|
||||||
switch (prim)
|
switch (prim)
|
||||||
{
|
{
|
||||||
case GS_POINTLIST:
|
case GS_POINTLIST:
|
||||||
buff[0] = head + 0;
|
buff[0] = static_cast<u16>(head + 0);
|
||||||
m_vertex.head = head + 1;
|
m_vertex.head = head + 1;
|
||||||
m_vertex.next = head + 1;
|
m_vertex.next = head + 1;
|
||||||
m_index.tail += 1;
|
m_index.tail += 1;
|
||||||
break;
|
break;
|
||||||
case GS_LINELIST:
|
case GS_LINELIST:
|
||||||
buff[0] = head + (index_swap ? 1 : 0);
|
buff[0] = static_cast<u16>(head + (index_swap ? 1 : 0));
|
||||||
buff[1] = head + (index_swap ? 0 : 1);
|
buff[1] = static_cast<u16>(head + (index_swap ? 0 : 1));
|
||||||
m_vertex.head = head + 2;
|
m_vertex.head = head + 2;
|
||||||
m_vertex.next = head + 2;
|
m_vertex.next = head + 2;
|
||||||
m_index.tail += 2;
|
m_index.tail += 2;
|
||||||
|
@ -3254,16 +3257,16 @@ __forceinline void GSState::VertexKick(u32 skip)
|
||||||
head = next;
|
head = next;
|
||||||
m_vertex.tail = next + 2;
|
m_vertex.tail = next + 2;
|
||||||
}
|
}
|
||||||
buff[0] = head + (index_swap ? 1 : 0);
|
buff[0] = static_cast<u16>(head + (index_swap ? 1 : 0));
|
||||||
buff[1] = head + (index_swap ? 0 : 1);
|
buff[1] = static_cast<u16>(head + (index_swap ? 0 : 1));
|
||||||
m_vertex.head = head + 1;
|
m_vertex.head = head + 1;
|
||||||
m_vertex.next = head + 2;
|
m_vertex.next = head + 2;
|
||||||
m_index.tail += 2;
|
m_index.tail += 2;
|
||||||
break;
|
break;
|
||||||
case GS_TRIANGLELIST:
|
case GS_TRIANGLELIST:
|
||||||
buff[0] = head + (index_swap ? 2 : 0);
|
buff[0] = static_cast<u16>(head + (index_swap ? 2 : 0));
|
||||||
buff[1] = head + 1;
|
buff[1] = static_cast<u16>(head + 1);
|
||||||
buff[2] = head + (index_swap ? 0 : 2);
|
buff[2] = static_cast<u16>(head + (index_swap ? 0 : 2));
|
||||||
m_vertex.head = head + 3;
|
m_vertex.head = head + 3;
|
||||||
m_vertex.next = head + 3;
|
m_vertex.next = head + 3;
|
||||||
m_index.tail += 3;
|
m_index.tail += 3;
|
||||||
|
@ -3277,24 +3280,24 @@ __forceinline void GSState::VertexKick(u32 skip)
|
||||||
head = next;
|
head = next;
|
||||||
m_vertex.tail = next + 3;
|
m_vertex.tail = next + 3;
|
||||||
}
|
}
|
||||||
buff[0] = head + (index_swap ? 2 : 0);
|
buff[0] = static_cast<u16>(head + (index_swap ? 2 : 0));
|
||||||
buff[1] = head + 1;
|
buff[1] = static_cast<u16>(head + 1);
|
||||||
buff[2] = head + (index_swap ? 0 : 2);
|
buff[2] = static_cast<u16>(head + (index_swap ? 0 : 2));
|
||||||
m_vertex.head = head + 1;
|
m_vertex.head = head + 1;
|
||||||
m_vertex.next = head + 3;
|
m_vertex.next = head + 3;
|
||||||
m_index.tail += 3;
|
m_index.tail += 3;
|
||||||
break;
|
break;
|
||||||
case GS_TRIANGLEFAN:
|
case GS_TRIANGLEFAN:
|
||||||
// TODO: remove gaps, next == head && head < tail - 3 || next > head && next < tail - 2 (very rare)
|
// TODO: remove gaps, next == head && head < tail - 3 || next > head && next < tail - 2 (very rare)
|
||||||
buff[0] = index_swap ? (tail - 1) : (head + 0);
|
buff[0] = static_cast<u16>(index_swap ? (tail - 1) : (head + 0));
|
||||||
buff[1] = tail - 2;
|
buff[1] = static_cast<u16>(tail - 2);
|
||||||
buff[2] = index_swap ? (head + 0) : (tail - 1);
|
buff[2] = static_cast<u16>(index_swap ? (head + 0) : (tail - 1));
|
||||||
m_vertex.next = tail;
|
m_vertex.next = tail;
|
||||||
m_index.tail += 3;
|
m_index.tail += 3;
|
||||||
break;
|
break;
|
||||||
case GS_SPRITE:
|
case GS_SPRITE:
|
||||||
buff[0] = head + 0;
|
buff[0] = static_cast<u16>(head + 0);
|
||||||
buff[1] = head + 1;
|
buff[1] = static_cast<u16>(head + 1);
|
||||||
m_vertex.head = head + 2;
|
m_vertex.head = head + 2;
|
||||||
m_vertex.next = head + 2;
|
m_vertex.next = head + 2;
|
||||||
m_index.tail += 2;
|
m_index.tail += 2;
|
||||||
|
|
|
@ -158,7 +158,7 @@ protected:
|
||||||
|
|
||||||
struct
|
struct
|
||||||
{
|
{
|
||||||
u32* buff;
|
u16* buff;
|
||||||
u32 tail;
|
u32 tail;
|
||||||
} m_index = {};
|
} m_index = {};
|
||||||
|
|
||||||
|
|
|
@ -28,6 +28,16 @@ class alignas(16) GSVector4i
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
constexpr GSVector4i(cxpr_init_tag, short s0, short s1, short s2, short s3, short s4, short s5, short s6, short s7)
|
||||||
|
: I16{s0, s1, s2, s3, s4, s5, s6, s7}
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr GSVector4i(cxpr_init_tag, char b0, char b1, char b2, char b3, char b4, char b5, char b6, char b7, char b8, char b9, char b10, char b11, char b12, char b13, char b14, char b15)
|
||||||
|
: I8{b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15}
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
union
|
union
|
||||||
{
|
{
|
||||||
|
@ -62,6 +72,16 @@ public:
|
||||||
return GSVector4i(cxpr_init, x, x, x, x);
|
return GSVector4i(cxpr_init, x, x, x, x);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
constexpr static GSVector4i cxpr16(short s0, short s1, short s2, short s3, short s4, short s5, short s6, short s7)
|
||||||
|
{
|
||||||
|
return GSVector4i(cxpr_init, s0, s1, s2, s3, s4, s5, s6, s7);
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr static GSVector4i cxpr8(char b0, char b1, char b2, char b3, char b4, char b5, char b6, char b7, char b8, char b9, char b10, char b11, char b12, char b13, char b14, char b15)
|
||||||
|
{
|
||||||
|
return GSVector4i(cxpr_init, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15);
|
||||||
|
}
|
||||||
|
|
||||||
__forceinline GSVector4i(int x, int y, int z, int w)
|
__forceinline GSVector4i(int x, int y, int z, int w)
|
||||||
{
|
{
|
||||||
m = _mm_set_epi32(w, z, y, x);
|
m = _mm_set_epi32(w, z, y, x);
|
||||||
|
@ -2001,6 +2021,10 @@ public:
|
||||||
return v;
|
return v;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__forceinline static GSVector4i broadcast16(u16 value)
|
||||||
|
{
|
||||||
|
return GSVector4i(_mm_set1_epi16(value));
|
||||||
|
}
|
||||||
|
|
||||||
__forceinline static GSVector4i zero() { return GSVector4i(_mm_setzero_si128()); }
|
__forceinline static GSVector4i zero() { return GSVector4i(_mm_setzero_si128()); }
|
||||||
|
|
||||||
|
|
|
@ -18,6 +18,7 @@
|
||||||
#include "GS/GSGL.h"
|
#include "GS/GSGL.h"
|
||||||
#include "GS/GS.h"
|
#include "GS/GS.h"
|
||||||
#include "Host.h"
|
#include "Host.h"
|
||||||
|
#include "common/Align.h"
|
||||||
#include "common/StringUtil.h"
|
#include "common/StringUtil.h"
|
||||||
|
|
||||||
#include "imgui.h"
|
#include "imgui.h"
|
||||||
|
@ -165,9 +166,9 @@ std::string GSDevice::GetFullscreenModeString(u32 width, u32 height, float refre
|
||||||
|
|
||||||
void GSDevice::GenerateExpansionIndexBuffer(void* buffer)
|
void GSDevice::GenerateExpansionIndexBuffer(void* buffer)
|
||||||
{
|
{
|
||||||
static constexpr u32 MAX_INDEX = std::numeric_limits<u16>::max();
|
static constexpr u32 MAX_INDEX = EXPAND_BUFFER_SIZE / 6 / sizeof(u16);
|
||||||
|
|
||||||
u32* idx_buffer = static_cast<u32*>(buffer);
|
u16* idx_buffer = static_cast<u16*>(buffer);
|
||||||
for (u32 i = 0; i < MAX_INDEX; i++)
|
for (u32 i = 0; i < MAX_INDEX; i++)
|
||||||
{
|
{
|
||||||
const u32 base = i * 4;
|
const u32 base = i * 4;
|
||||||
|
|
|
@ -627,7 +627,7 @@ struct alignas(16) GSHWDrawConfig
|
||||||
GSTexture* tex; ///< Source texture
|
GSTexture* tex; ///< Source texture
|
||||||
GSTexture* pal; ///< Palette texture
|
GSTexture* pal; ///< Palette texture
|
||||||
const GSVertex* verts;///< Vertices to draw
|
const GSVertex* verts;///< Vertices to draw
|
||||||
const u32* indices; ///< Indices to draw
|
const u16* indices; ///< Indices to draw
|
||||||
u32 nverts; ///< Number of vertices
|
u32 nverts; ///< Number of vertices
|
||||||
u32 nindices; ///< Number of indices
|
u32 nindices; ///< Number of indices
|
||||||
u32 indices_per_prim; ///< Number of indices that make up one primitive
|
u32 indices_per_prim; ///< Number of indices that make up one primitive
|
||||||
|
@ -749,7 +749,7 @@ protected:
|
||||||
static constexpr float MAD_SENSITIVITY = 0.08f;
|
static constexpr float MAD_SENSITIVITY = 0.08f;
|
||||||
static constexpr u32 MAX_POOLED_TEXTURES = 300;
|
static constexpr u32 MAX_POOLED_TEXTURES = 300;
|
||||||
static constexpr u32 NUM_CAS_CONSTANTS = 12; // 8 plus src offset x/y, 16 byte alignment
|
static constexpr u32 NUM_CAS_CONSTANTS = 12; // 8 plus src offset x/y, 16 byte alignment
|
||||||
static constexpr u32 EXPAND_BUFFER_SIZE = sizeof(u32) * std::numeric_limits<u16>::max() * 6;
|
static constexpr u32 EXPAND_BUFFER_SIZE = sizeof(u16) * 65532 * 6;
|
||||||
|
|
||||||
WindowInfo m_window_info;
|
WindowInfo m_window_info;
|
||||||
VsyncMode m_vsync_mode = VsyncMode::Off;
|
VsyncMode m_vsync_mode = VsyncMode::Off;
|
||||||
|
|
|
@ -24,7 +24,7 @@ GSVertexTrace::GSVertexTrace(const GSState* state, bool provoking_vertex_first)
|
||||||
MULTI_ISA_SELECT(GSVertexTracePopulateFunctions)(*this, provoking_vertex_first);
|
MULTI_ISA_SELECT(GSVertexTracePopulateFunctions)(*this, provoking_vertex_first);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSVertexTrace::Update(const void* vertex, const u32* index, int v_count, int i_count, GS_PRIM_CLASS primclass)
|
void GSVertexTrace::Update(const void* vertex, const u16* index, int v_count, int i_count, GS_PRIM_CLASS primclass)
|
||||||
{
|
{
|
||||||
if (i_count == 0)
|
if (i_count == 0)
|
||||||
return;
|
return;
|
||||||
|
@ -43,7 +43,7 @@ void GSVertexTrace::Update(const void* vertex, const u32* index, int v_count, in
|
||||||
// that feel big enough.
|
// that feel big enough.
|
||||||
if (!fst && !m_accurate_stq && m_min.t.z > 1e30)
|
if (!fst && !m_accurate_stq && m_min.t.z > 1e30)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "Vertex Trace: float overflow detected ! min %e max %e\n", m_min.t.z, m_max.t.z);
|
Console.Warning("Vertex Trace: float overflow detected ! min %e max %e", m_min.t.z, m_max.t.z);
|
||||||
m_accurate_stq = true;
|
m_accurate_stq = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -49,7 +49,7 @@ public:
|
||||||
protected:
|
protected:
|
||||||
const GSState* m_state;
|
const GSState* m_state;
|
||||||
|
|
||||||
typedef void (*FindMinMaxPtr)(GSVertexTrace& vt, const void* vertex, const u32* index, int count);
|
typedef void (*FindMinMaxPtr)(GSVertexTrace& vt, const void* vertex, const u16* index, int count);
|
||||||
|
|
||||||
FindMinMaxPtr m_fmm[2][2][2][2][4];
|
FindMinMaxPtr m_fmm[2][2][2][2][4];
|
||||||
|
|
||||||
|
@ -77,7 +77,7 @@ public:
|
||||||
public:
|
public:
|
||||||
GSVertexTrace(const GSState* state, bool provoking_vertex_first);
|
GSVertexTrace(const GSState* state, bool provoking_vertex_first);
|
||||||
|
|
||||||
void Update(const void* vertex, const u32* index, int v_count, int i_count, GS_PRIM_CLASS primclass);
|
void Update(const void* vertex, const u16* index, int v_count, int i_count, GS_PRIM_CLASS primclass);
|
||||||
|
|
||||||
bool IsLinear() const { return m_filter.opt_linear; }
|
bool IsLinear() const { return m_filter.opt_linear; }
|
||||||
bool IsRealLinear() const { return m_filter.linear; }
|
bool IsRealLinear() const { return m_filter.linear; }
|
||||||
|
|
|
@ -22,7 +22,7 @@ class CURRENT_ISA::GSVertexTraceFMM
|
||||||
static constexpr GSVector4 s_minmax = GSVector4::cxpr(FLT_MAX, -FLT_MAX, 0.f, 0.f);
|
static constexpr GSVector4 s_minmax = GSVector4::cxpr(FLT_MAX, -FLT_MAX, 0.f, 0.f);
|
||||||
|
|
||||||
template <GS_PRIM_CLASS primclass, u32 iip, u32 tme, u32 fst, u32 color, bool flat_swapped>
|
template <GS_PRIM_CLASS primclass, u32 iip, u32 tme, u32 fst, u32 color, bool flat_swapped>
|
||||||
static void FindMinMax(GSVertexTrace& vt, const void* vertex, const u32* index, int count);
|
static void FindMinMax(GSVertexTrace& vt, const void* vertex, const u16* index, int count);
|
||||||
|
|
||||||
template <GS_PRIM_CLASS primclass, u32 iip, u32 tme, u32 fst, u32 color>
|
template <GS_PRIM_CLASS primclass, u32 iip, u32 tme, u32 fst, u32 color>
|
||||||
static constexpr GSVertexTrace::FindMinMaxPtr GetFMM(bool provoking_vertex_first);
|
static constexpr GSVertexTrace::FindMinMaxPtr GetFMM(bool provoking_vertex_first);
|
||||||
|
@ -76,7 +76,7 @@ void GSVertexTraceFMM::Populate(GSVertexTrace& vt, bool provoking_vertex_first)
|
||||||
}
|
}
|
||||||
|
|
||||||
template <GS_PRIM_CLASS primclass, u32 iip, u32 tme, u32 fst, u32 color, bool flat_swapped>
|
template <GS_PRIM_CLASS primclass, u32 iip, u32 tme, u32 fst, u32 color, bool flat_swapped>
|
||||||
void GSVertexTraceFMM::FindMinMax(GSVertexTrace& vt, const void* vertex, const u32* index, int count)
|
void GSVertexTraceFMM::FindMinMax(GSVertexTrace& vt, const void* vertex, const u16* index, int count)
|
||||||
{
|
{
|
||||||
const GSDrawingContext* context = vt.m_state->m_context;
|
const GSDrawingContext* context = vt.m_state->m_context;
|
||||||
|
|
||||||
|
|
|
@ -1440,7 +1440,7 @@ void GSDevice11::DoMultiStretchRects(const MultiStretchRect* rects, u32 num_rect
|
||||||
const u32 vertex_reserve_size = num_rects * 4;
|
const u32 vertex_reserve_size = num_rects * 4;
|
||||||
const u32 index_reserve_size = num_rects * 6;
|
const u32 index_reserve_size = num_rects * 6;
|
||||||
GSVertexPT1* verts = static_cast<GSVertexPT1*>(IAMapVertexBuffer(sizeof(GSVertexPT1), vertex_reserve_size));
|
GSVertexPT1* verts = static_cast<GSVertexPT1*>(IAMapVertexBuffer(sizeof(GSVertexPT1), vertex_reserve_size));
|
||||||
u32* idx = IAMapIndexBuffer(index_reserve_size);
|
u16* idx = IAMapIndexBuffer(index_reserve_size);
|
||||||
u32 icount = 0;
|
u32 icount = 0;
|
||||||
u32 vcount = 0;
|
u32 vcount = 0;
|
||||||
for (u32 i = 0; i < num_rects; i++)
|
for (u32 i = 0; i < num_rects; i++)
|
||||||
|
@ -1712,7 +1712,6 @@ void GSDevice11::RenderImGui()
|
||||||
const UINT vb_stride = sizeof(ImDrawVert);
|
const UINT vb_stride = sizeof(ImDrawVert);
|
||||||
const UINT vb_offset = 0;
|
const UINT vb_offset = 0;
|
||||||
m_ctx->IASetVertexBuffers(0, 1, m_vb.addressof(), &vb_stride, &vb_offset);
|
m_ctx->IASetVertexBuffers(0, 1, m_vb.addressof(), &vb_stride, &vb_offset);
|
||||||
m_ctx->IASetIndexBuffer(m_ib.get(), DXGI_FORMAT_R16_UINT, 0);
|
|
||||||
IASetInputLayout(m_imgui.il.get());
|
IASetInputLayout(m_imgui.il.get());
|
||||||
IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
|
IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
|
||||||
VSSetShader(m_imgui.vs.get(), m_imgui.vs_cb.get());
|
VSSetShader(m_imgui.vs.get(), m_imgui.vs_cb.get());
|
||||||
|
@ -1756,16 +1755,8 @@ void GSDevice11::RenderImGui()
|
||||||
m_ctx->Unmap(m_vb.get(), 0);
|
m_ctx->Unmap(m_vb.get(), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Bit awkward, because this is using 16-bit indices, not 32-bit.
|
|
||||||
static_assert(sizeof(ImDrawIdx) == sizeof(u16));
|
static_assert(sizeof(ImDrawIdx) == sizeof(u16));
|
||||||
const u32 index_count = static_cast<u32>(cmd_list->IdxBuffer.Size + 1) / 2;
|
IASetIndexBuffer(cmd_list->IdxBuffer.Data, cmd_list->IdxBuffer.Size);
|
||||||
u32* index_map = IAMapIndexBuffer(index_count);
|
|
||||||
if (!index_map)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
const u32 index_start = m_index.start * 2;
|
|
||||||
std::memcpy(index_map, cmd_list->IdxBuffer.Data, cmd_list->IdxBuffer.Size * sizeof(ImDrawIdx));
|
|
||||||
IAUnmapIndexBuffer(index_count);
|
|
||||||
|
|
||||||
for (int cmd_i = 0; cmd_i < cmd_list->CmdBuffer.Size; cmd_i++)
|
for (int cmd_i = 0; cmd_i < cmd_list->CmdBuffer.Size; cmd_i++)
|
||||||
{
|
{
|
||||||
|
@ -1787,14 +1778,13 @@ void GSDevice11::RenderImGui()
|
||||||
m_state.ps_sr_views[0] = static_cast<ID3D11ShaderResourceView*>(pcmd->GetTexID());
|
m_state.ps_sr_views[0] = static_cast<ID3D11ShaderResourceView*>(pcmd->GetTexID());
|
||||||
PSUpdateShaderState();
|
PSUpdateShaderState();
|
||||||
|
|
||||||
m_ctx->DrawIndexed(pcmd->ElemCount, index_start + pcmd->IdxOffset, vertex_offset + pcmd->VtxOffset);
|
m_ctx->DrawIndexed(pcmd->ElemCount, m_index.start + pcmd->IdxOffset, vertex_offset + pcmd->VtxOffset);
|
||||||
}
|
}
|
||||||
|
|
||||||
g_perfmon.Put(GSPerfMon::DrawCalls, cmd_list->CmdBuffer.Size);
|
g_perfmon.Put(GSPerfMon::DrawCalls, cmd_list->CmdBuffer.Size);
|
||||||
}
|
}
|
||||||
|
|
||||||
m_ctx->IASetVertexBuffers(0, 1, m_vb.addressof(), &m_state.vb_stride, &vb_offset);
|
m_ctx->IASetVertexBuffers(0, 1, m_vb.addressof(), &m_state.vb_stride, &vb_offset);
|
||||||
m_ctx->IASetIndexBuffer(m_state.index_buffer, DXGI_FORMAT_R32_UINT, 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSDevice11::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* vertices, bool datm)
|
void GSDevice11::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* vertices, bool datm)
|
||||||
|
@ -1912,9 +1902,9 @@ bool GSDevice11::IASetExpandVertexBuffer(const void* vertex, u32 stride, u32 cou
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
u32* GSDevice11::IAMapIndexBuffer(u32 count)
|
u16* GSDevice11::IAMapIndexBuffer(u32 count)
|
||||||
{
|
{
|
||||||
if (count > (INDEX_BUFFER_SIZE / sizeof(u32)))
|
if (count > (INDEX_BUFFER_SIZE / sizeof(u16)))
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
|
||||||
D3D11_MAP type = D3D11_MAP_WRITE_NO_OVERWRITE;
|
D3D11_MAP type = D3D11_MAP_WRITE_NO_OVERWRITE;
|
||||||
|
@ -1922,7 +1912,7 @@ u32* GSDevice11::IAMapIndexBuffer(u32 count)
|
||||||
m_index.start = m_ib_pos;
|
m_index.start = m_ib_pos;
|
||||||
m_ib_pos += count;
|
m_ib_pos += count;
|
||||||
|
|
||||||
if (m_ib_pos > (INDEX_BUFFER_SIZE / sizeof(u32)))
|
if (m_ib_pos > (INDEX_BUFFER_SIZE / sizeof(u16)))
|
||||||
{
|
{
|
||||||
m_index.start = 0;
|
m_index.start = 0;
|
||||||
m_ib_pos = count;
|
m_ib_pos = count;
|
||||||
|
@ -1933,7 +1923,7 @@ u32* GSDevice11::IAMapIndexBuffer(u32 count)
|
||||||
if (FAILED(m_ctx->Map(m_ib.get(), 0, type, 0, &m)))
|
if (FAILED(m_ctx->Map(m_ib.get(), 0, type, 0, &m)))
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
|
||||||
return static_cast<u32*>(m.pData) + m_index.start;
|
return static_cast<u16*>(m.pData) + m_index.start;
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSDevice11::IAUnmapIndexBuffer(u32 count)
|
void GSDevice11::IAUnmapIndexBuffer(u32 count)
|
||||||
|
@ -1944,11 +1934,11 @@ void GSDevice11::IAUnmapIndexBuffer(u32 count)
|
||||||
|
|
||||||
bool GSDevice11::IASetIndexBuffer(const void* index, u32 count)
|
bool GSDevice11::IASetIndexBuffer(const void* index, u32 count)
|
||||||
{
|
{
|
||||||
u32* map = IAMapIndexBuffer(count);
|
u16* map = IAMapIndexBuffer(count);
|
||||||
if (!map)
|
if (!map)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
std::memcpy(map, index, count * sizeof(u32));
|
std::memcpy(map, index, count * sizeof(u16));
|
||||||
IAUnmapIndexBuffer(count);
|
IAUnmapIndexBuffer(count);
|
||||||
IASetIndexBuffer(m_ib.get());
|
IASetIndexBuffer(m_ib.get());
|
||||||
return true;
|
return true;
|
||||||
|
@ -1958,7 +1948,7 @@ void GSDevice11::IASetIndexBuffer(ID3D11Buffer* buffer)
|
||||||
{
|
{
|
||||||
if (m_state.index_buffer != buffer)
|
if (m_state.index_buffer != buffer)
|
||||||
{
|
{
|
||||||
m_ctx->IASetIndexBuffer(buffer, DXGI_FORMAT_R32_UINT, 0);
|
m_ctx->IASetIndexBuffer(buffer, DXGI_FORMAT_R16_UINT, 0);
|
||||||
m_state.index_buffer = buffer;
|
m_state.index_buffer = buffer;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -343,7 +343,7 @@ public:
|
||||||
bool IASetVertexBuffer(const void* vertex, u32 stride, u32 count);
|
bool IASetVertexBuffer(const void* vertex, u32 stride, u32 count);
|
||||||
bool IASetExpandVertexBuffer(const void* vertex, u32 stride, u32 count);
|
bool IASetExpandVertexBuffer(const void* vertex, u32 stride, u32 count);
|
||||||
|
|
||||||
u32* IAMapIndexBuffer(u32 count);
|
u16* IAMapIndexBuffer(u32 count);
|
||||||
void IAUnmapIndexBuffer(u32 count);
|
void IAUnmapIndexBuffer(u32 count);
|
||||||
bool IASetIndexBuffer(const void* index, u32 count);
|
bool IASetIndexBuffer(const void* index, u32 count);
|
||||||
void IASetIndexBuffer(ID3D11Buffer* buffer);
|
void IASetIndexBuffer(ID3D11Buffer* buffer);
|
||||||
|
|
|
@ -949,13 +949,13 @@ void GSDevice12::DoMultiStretchRects(
|
||||||
{
|
{
|
||||||
// Set up vertices first.
|
// Set up vertices first.
|
||||||
const u32 vertex_reserve_size = num_rects * 4 * sizeof(GSVertexPT1);
|
const u32 vertex_reserve_size = num_rects * 4 * sizeof(GSVertexPT1);
|
||||||
const u32 index_reserve_size = num_rects * 6 * sizeof(u32);
|
const u32 index_reserve_size = num_rects * 6 * sizeof(u16);
|
||||||
if (!m_vertex_stream_buffer.ReserveMemory(vertex_reserve_size, sizeof(GSVertexPT1)) ||
|
if (!m_vertex_stream_buffer.ReserveMemory(vertex_reserve_size, sizeof(GSVertexPT1)) ||
|
||||||
!m_index_stream_buffer.ReserveMemory(index_reserve_size, sizeof(u32)))
|
!m_index_stream_buffer.ReserveMemory(index_reserve_size, sizeof(u16)))
|
||||||
{
|
{
|
||||||
ExecuteCommandListAndRestartRenderPass(false, "Uploading bytes to vertex buffer");
|
ExecuteCommandListAndRestartRenderPass(false, "Uploading bytes to vertex buffer");
|
||||||
if (!m_vertex_stream_buffer.ReserveMemory(vertex_reserve_size, sizeof(GSVertexPT1)) ||
|
if (!m_vertex_stream_buffer.ReserveMemory(vertex_reserve_size, sizeof(GSVertexPT1)) ||
|
||||||
!m_index_stream_buffer.ReserveMemory(index_reserve_size, sizeof(u32)))
|
!m_index_stream_buffer.ReserveMemory(index_reserve_size, sizeof(u16)))
|
||||||
{
|
{
|
||||||
pxFailRel("Failed to reserve space for vertices");
|
pxFailRel("Failed to reserve space for vertices");
|
||||||
}
|
}
|
||||||
|
@ -965,7 +965,7 @@ void GSDevice12::DoMultiStretchRects(
|
||||||
// Don't use primitive restart here, it ends up slower on some drivers.
|
// Don't use primitive restart here, it ends up slower on some drivers.
|
||||||
const GSVector2 ds(static_cast<float>(dTex->GetWidth()), static_cast<float>(dTex->GetHeight()));
|
const GSVector2 ds(static_cast<float>(dTex->GetWidth()), static_cast<float>(dTex->GetHeight()));
|
||||||
GSVertexPT1* verts = reinterpret_cast<GSVertexPT1*>(m_vertex_stream_buffer.GetCurrentHostPointer());
|
GSVertexPT1* verts = reinterpret_cast<GSVertexPT1*>(m_vertex_stream_buffer.GetCurrentHostPointer());
|
||||||
u32* idx = reinterpret_cast<u32*>(m_index_stream_buffer.GetCurrentHostPointer());
|
u16* idx = reinterpret_cast<u16*>(m_index_stream_buffer.GetCurrentHostPointer());
|
||||||
u32 icount = 0;
|
u32 icount = 0;
|
||||||
u32 vcount = 0;
|
u32 vcount = 0;
|
||||||
for (u32 i = 0; i < num_rects; i++)
|
for (u32 i = 0; i < num_rects; i++)
|
||||||
|
@ -996,12 +996,12 @@ void GSDevice12::DoMultiStretchRects(
|
||||||
|
|
||||||
m_vertex.start = m_vertex_stream_buffer.GetCurrentOffset() / sizeof(GSVertexPT1);
|
m_vertex.start = m_vertex_stream_buffer.GetCurrentOffset() / sizeof(GSVertexPT1);
|
||||||
m_vertex.count = vcount;
|
m_vertex.count = vcount;
|
||||||
m_index.start = m_index_stream_buffer.GetCurrentOffset() / sizeof(u32);
|
m_index.start = m_index_stream_buffer.GetCurrentOffset() / sizeof(u16);
|
||||||
m_index.count = icount;
|
m_index.count = icount;
|
||||||
m_vertex_stream_buffer.CommitMemory(vcount * sizeof(GSVertexPT1));
|
m_vertex_stream_buffer.CommitMemory(vcount * sizeof(GSVertexPT1));
|
||||||
m_index_stream_buffer.CommitMemory(icount * sizeof(u32));
|
m_index_stream_buffer.CommitMemory(icount * sizeof(u16));
|
||||||
SetVertexBuffer(m_vertex_stream_buffer.GetGPUPointer(), m_vertex_stream_buffer.GetSize(), sizeof(GSVertexPT1));
|
SetVertexBuffer(m_vertex_stream_buffer.GetGPUPointer(), m_vertex_stream_buffer.GetSize(), sizeof(GSVertexPT1));
|
||||||
SetIndexBuffer(m_index_stream_buffer.GetGPUPointer(), m_index_stream_buffer.GetSize(), DXGI_FORMAT_R32_UINT);
|
SetIndexBuffer(m_index_stream_buffer.GetGPUPointer(), m_index_stream_buffer.GetSize(), DXGI_FORMAT_R16_UINT);
|
||||||
|
|
||||||
// Even though we're batching, a cmdbuffer submit could've messed this up.
|
// Even though we're batching, a cmdbuffer submit could've messed this up.
|
||||||
const GSVector4i rc(dTex->GetRect());
|
const GSVector4i rc(dTex->GetRect());
|
||||||
|
@ -1422,22 +1422,10 @@ void GSDevice12::RenderImGui()
|
||||||
m_vertex_stream_buffer.CommitMemory(size);
|
m_vertex_stream_buffer.CommitMemory(size);
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 index_offset;
|
|
||||||
{
|
|
||||||
const u32 size = sizeof(ImDrawIdx) * static_cast<u32>(cmd_list->IdxBuffer.Size);
|
|
||||||
if (!m_index_stream_buffer.ReserveMemory(size, sizeof(ImDrawIdx)))
|
|
||||||
{
|
|
||||||
Console.Warning("Skipping ImGui draw because of no vertex buffer space");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
index_offset = m_index_stream_buffer.GetCurrentOffset() / sizeof(ImDrawIdx);
|
|
||||||
std::memcpy(m_index_stream_buffer.GetCurrentHostPointer(), cmd_list->IdxBuffer.Data, size);
|
|
||||||
m_index_stream_buffer.CommitMemory(size);
|
|
||||||
}
|
|
||||||
|
|
||||||
SetVertexBuffer(m_vertex_stream_buffer.GetGPUPointer(), m_vertex_stream_buffer.GetSize(), sizeof(ImDrawVert));
|
SetVertexBuffer(m_vertex_stream_buffer.GetGPUPointer(), m_vertex_stream_buffer.GetSize(), sizeof(ImDrawVert));
|
||||||
SetIndexBuffer(m_index_stream_buffer.GetGPUPointer(), m_index_stream_buffer.GetSize(), DXGI_FORMAT_R16_UINT);
|
|
||||||
|
static_assert(sizeof(ImDrawIdx) == sizeof(u16));
|
||||||
|
IASetIndexBuffer(cmd_list->IdxBuffer.Data, cmd_list->IdxBuffer.Size);
|
||||||
|
|
||||||
for (int cmd_i = 0; cmd_i < cmd_list->CmdBuffer.Size; cmd_i++)
|
for (int cmd_i = 0; cmd_i < cmd_list->CmdBuffer.Size; cmd_i++)
|
||||||
{
|
{
|
||||||
|
@ -1468,7 +1456,7 @@ void GSDevice12::RenderImGui()
|
||||||
if (ApplyUtilityState())
|
if (ApplyUtilityState())
|
||||||
{
|
{
|
||||||
g_d3d12_context->GetCommandList()->DrawIndexedInstanced(
|
g_d3d12_context->GetCommandList()->DrawIndexedInstanced(
|
||||||
pcmd->ElemCount, 1, index_offset + pcmd->IdxOffset, vertex_offset + pcmd->VtxOffset, 0);
|
pcmd->ElemCount, 1, m_index.start + pcmd->IdxOffset, vertex_offset + pcmd->VtxOffset, 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1536,17 +1524,17 @@ void GSDevice12::IASetVertexBuffer(const void* vertex, size_t stride, size_t cou
|
||||||
|
|
||||||
void GSDevice12::IASetIndexBuffer(const void* index, size_t count)
|
void GSDevice12::IASetIndexBuffer(const void* index, size_t count)
|
||||||
{
|
{
|
||||||
const u32 size = sizeof(u32) * static_cast<u32>(count);
|
const u32 size = sizeof(u16) * static_cast<u32>(count);
|
||||||
if (!m_index_stream_buffer.ReserveMemory(size, sizeof(u32)))
|
if (!m_index_stream_buffer.ReserveMemory(size, sizeof(u16)))
|
||||||
{
|
{
|
||||||
ExecuteCommandListAndRestartRenderPass(false, "Uploading bytes to index buffer");
|
ExecuteCommandListAndRestartRenderPass(false, "Uploading bytes to index buffer");
|
||||||
if (!m_index_stream_buffer.ReserveMemory(size, sizeof(u32)))
|
if (!m_index_stream_buffer.ReserveMemory(size, sizeof(u16)))
|
||||||
pxFailRel("Failed to reserve space for vertices");
|
pxFailRel("Failed to reserve space for vertices");
|
||||||
}
|
}
|
||||||
|
|
||||||
m_index.start = m_index_stream_buffer.GetCurrentOffset() / sizeof(u32);
|
m_index.start = m_index_stream_buffer.GetCurrentOffset() / sizeof(u16);
|
||||||
m_index.count = count;
|
m_index.count = count;
|
||||||
SetIndexBuffer(m_index_stream_buffer.GetGPUPointer(), m_index_stream_buffer.GetSize(), DXGI_FORMAT_R32_UINT);
|
SetIndexBuffer(m_index_stream_buffer.GetGPUPointer(), m_index_stream_buffer.GetSize(), DXGI_FORMAT_R16_UINT);
|
||||||
|
|
||||||
std::memcpy(m_index_stream_buffer.GetCurrentHostPointer(), index, size);
|
std::memcpy(m_index_stream_buffer.GetCurrentHostPointer(), index, size);
|
||||||
m_index_stream_buffer.CommitMemory(size);
|
m_index_stream_buffer.CommitMemory(size);
|
||||||
|
@ -3353,7 +3341,7 @@ void GSDevice12::UploadHWDrawVerticesAndIndices(const GSHWDrawConfig& config)
|
||||||
{
|
{
|
||||||
m_index.start = 0;
|
m_index.start = 0;
|
||||||
m_index.count = config.nindices;
|
m_index.count = config.nindices;
|
||||||
SetIndexBuffer(m_expand_index_buffer->GetGPUVirtualAddress(), EXPAND_BUFFER_SIZE, DXGI_FORMAT_R32_UINT);
|
SetIndexBuffer(m_expand_index_buffer->GetGPUVirtualAddress(), EXPAND_BUFFER_SIZE, DXGI_FORMAT_R16_UINT);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
|
|
@ -258,14 +258,10 @@ void GSRendererHW::Lines2Sprites()
|
||||||
int i = static_cast<int>(count) * 2 - 4;
|
int i = static_cast<int>(count) * 2 - 4;
|
||||||
GSVertex* s = &m_vertex.buff[count - 2];
|
GSVertex* s = &m_vertex.buff[count - 2];
|
||||||
GSVertex* q = &m_vertex.buff[count * 2 - 4];
|
GSVertex* q = &m_vertex.buff[count * 2 - 4];
|
||||||
u32* RESTRICT index = &m_index.buff[count * 3 - 6];
|
u16* RESTRICT index = &m_index.buff[count * 3 - 6];
|
||||||
|
|
||||||
alignas(16) static constexpr std::array<int, 8> tri_normal_indices = {{0, 1, 2, 1, 2, 3}};
|
// Sprites are flat shaded, so the provoking vertex doesn't matter here.
|
||||||
alignas(16) static constexpr std::array<int, 8> tri_swapped_indices = {{0, 1, 2, 1, 2, 3}};
|
constexpr GSVector4i indices = GSVector4i::cxpr16(0, 1, 2, 1, 2, 3, 0, 0);
|
||||||
const bool index_swap = !g_gs_device->Features().provoking_vertex_last;
|
|
||||||
const int* tri_indices = index_swap ? tri_swapped_indices.data() : tri_normal_indices.data();
|
|
||||||
const GSVector4i indices_low(GSVector4i::load<true>(tri_indices));
|
|
||||||
const GSVector4i indices_high(GSVector4i::loadl(tri_indices + 4));
|
|
||||||
|
|
||||||
for (; i >= 0; i -= 4, s -= 2, q -= 4, index -= 6)
|
for (; i >= 0; i -= 4, s -= 2, q -= 4, index -= 6)
|
||||||
{
|
{
|
||||||
|
@ -310,9 +306,10 @@ void GSRendererHW::Lines2Sprites()
|
||||||
q[1] = v0;
|
q[1] = v0;
|
||||||
q[2] = v1;
|
q[2] = v1;
|
||||||
|
|
||||||
const GSVector4i i_splat(i);
|
const GSVector4i this_indices = GSVector4i::broadcast16(i).add16(indices);
|
||||||
GSVector4i::store<false>(index, i_splat + indices_low);
|
const int high = this_indices.extract32<2>();
|
||||||
GSVector4i::storel(index + 4, i_splat + indices_high);
|
GSVector4i::storel(index, this_indices);
|
||||||
|
std::memcpy(&index[4], &high, sizeof(high));
|
||||||
}
|
}
|
||||||
|
|
||||||
m_vertex.head = m_vertex.tail = m_vertex.next = count * 2;
|
m_vertex.head = m_vertex.tail = m_vertex.next = count * 2;
|
||||||
|
@ -322,26 +319,30 @@ void GSRendererHW::Lines2Sprites()
|
||||||
|
|
||||||
void GSRendererHW::ExpandLineIndices()
|
void GSRendererHW::ExpandLineIndices()
|
||||||
{
|
{
|
||||||
const u32 process_count = (m_index.tail + 3) / 4 * 4;
|
const u32 process_count = (m_index.tail + 7) / 8 * 8;
|
||||||
const u32 expansion_factor = 3;
|
const u32 expansion_factor = 3;
|
||||||
m_index.tail *= expansion_factor;
|
m_index.tail *= expansion_factor;
|
||||||
GSVector4i* end = reinterpret_cast<GSVector4i*>(m_index.buff);
|
GSVector4i* end = reinterpret_cast<GSVector4i*>(m_index.buff);
|
||||||
GSVector4i* read = reinterpret_cast<GSVector4i*>(m_index.buff + process_count);
|
GSVector4i* read = reinterpret_cast<GSVector4i*>(m_index.buff + process_count);
|
||||||
GSVector4i* write = reinterpret_cast<GSVector4i*>(m_index.buff + process_count * expansion_factor);
|
GSVector4i* write = reinterpret_cast<GSVector4i*>(m_index.buff + process_count * expansion_factor);
|
||||||
|
|
||||||
constexpr GSVector4i low0 = GSVector4i::cxpr(0, 1, 2, 1);
|
constexpr GSVector4i mask0 = GSVector4i::cxpr8(0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 2, 3, 4, 5, 4, 5);
|
||||||
constexpr GSVector4i low1 = GSVector4i::cxpr(2, 3, 0, 1);
|
constexpr GSVector4i mask1 = GSVector4i::cxpr8(6, 7, 4, 5, 6, 7, 6, 7, 8, 9, 8, 9, 10, 11, 8, 9);
|
||||||
constexpr GSVector4i low2 = GSVector4i::cxpr(2, 1, 2, 3);
|
constexpr GSVector4i mask2 = GSVector4i::cxpr8(10, 11, 10, 11, 12, 13, 12, 13, 14, 15, 12, 13, 14, 15, 14, 15);
|
||||||
|
|
||||||
|
constexpr GSVector4i low0 = GSVector4i::cxpr16(0, 1, 2, 1, 2, 3, 0, 1);
|
||||||
|
constexpr GSVector4i low1 = GSVector4i::cxpr16(2, 1, 2, 3, 0, 1, 2, 1);
|
||||||
|
constexpr GSVector4i low2 = GSVector4i::cxpr16(2, 3, 0, 1, 2, 1, 2, 3);
|
||||||
|
|
||||||
while (read > end)
|
while (read > end)
|
||||||
{
|
{
|
||||||
read -= 1;
|
read -= 1;
|
||||||
write -= expansion_factor;
|
write -= expansion_factor;
|
||||||
|
|
||||||
const GSVector4i in = read->sll32(2);
|
const GSVector4i in = read->sll16(2);
|
||||||
write[0] = in.xxyx() | low0;
|
write[0] = in.shuffle8(mask0) | low0;
|
||||||
write[1] = in.yyzz() | low1;
|
write[1] = in.shuffle8(mask1) | low1;
|
||||||
write[2] = in.wzww() | low2;
|
write[2] = in.shuffle8(mask2) | low2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -2576,7 +2576,7 @@ bool GSTextureCache::ShuffleMove(u32 BP, u32 BW, u32 PSM, int sx, int sy, int dx
|
||||||
|
|
||||||
#undef V
|
#undef V
|
||||||
|
|
||||||
static constexpr u32 indices[6] = { 0, 1, 2, 2, 1, 3 };
|
static constexpr u16 indices[6] = { 0, 1, 2, 2, 1, 3 };
|
||||||
|
|
||||||
// If we ever do this sort of thing somewhere else, extract this to a helper function.
|
// If we ever do this sort of thing somewhere else, extract this to a helper function.
|
||||||
GSHWDrawConfig config;
|
GSHWDrawConfig config;
|
||||||
|
|
|
@ -2178,7 +2178,7 @@ void GSDeviceMTL::SendHWDraw(GSHWDrawConfig& config, id<MTLRenderCommandEncoder>
|
||||||
textureBarrier(enc);
|
textureBarrier(enc);
|
||||||
[enc drawIndexedPrimitives:topology
|
[enc drawIndexedPrimitives:topology
|
||||||
indexCount:count
|
indexCount:count
|
||||||
indexType:MTLIndexTypeUInt32
|
indexType:MTLIndexTypeUInt16
|
||||||
indexBuffer:buffer
|
indexBuffer:buffer
|
||||||
indexBufferOffset:off + p * sizeof(*config.indices)];
|
indexBufferOffset:off + p * sizeof(*config.indices)];
|
||||||
p += count;
|
p += count;
|
||||||
|
@ -2200,7 +2200,7 @@ void GSDeviceMTL::SendHWDraw(GSHWDrawConfig& config, id<MTLRenderCommandEncoder>
|
||||||
textureBarrier(enc);
|
textureBarrier(enc);
|
||||||
[enc drawIndexedPrimitives:topology
|
[enc drawIndexedPrimitives:topology
|
||||||
indexCount:config.indices_per_prim
|
indexCount:config.indices_per_prim
|
||||||
indexType:MTLIndexTypeUInt32
|
indexType:MTLIndexTypeUInt16
|
||||||
indexBuffer:buffer
|
indexBuffer:buffer
|
||||||
indexBufferOffset:off + p * sizeof(*config.indices)];
|
indexBufferOffset:off + p * sizeof(*config.indices)];
|
||||||
}
|
}
|
||||||
|
@ -2217,7 +2217,7 @@ void GSDeviceMTL::SendHWDraw(GSHWDrawConfig& config, id<MTLRenderCommandEncoder>
|
||||||
|
|
||||||
[enc drawIndexedPrimitives:topology
|
[enc drawIndexedPrimitives:topology
|
||||||
indexCount:config.nindices
|
indexCount:config.nindices
|
||||||
indexType:MTLIndexTypeUInt32
|
indexType:MTLIndexTypeUInt16
|
||||||
indexBuffer:buffer
|
indexBuffer:buffer
|
||||||
indexBufferOffset:off];
|
indexBufferOffset:off];
|
||||||
|
|
||||||
|
|
|
@ -876,8 +876,8 @@ void GSDeviceOGL::DrawPrimitive()
|
||||||
void GSDeviceOGL::DrawIndexedPrimitive()
|
void GSDeviceOGL::DrawIndexedPrimitive()
|
||||||
{
|
{
|
||||||
g_perfmon.Put(GSPerfMon::DrawCalls, 1);
|
g_perfmon.Put(GSPerfMon::DrawCalls, 1);
|
||||||
glDrawElementsBaseVertex(m_draw_topology, static_cast<u32>(m_index.count), GL_UNSIGNED_INT,
|
glDrawElementsBaseVertex(m_draw_topology, static_cast<u32>(m_index.count), GL_UNSIGNED_SHORT,
|
||||||
reinterpret_cast<void*>(static_cast<u32>(m_index.start) * sizeof(u32)), static_cast<GLint>(m_vertex.start));
|
reinterpret_cast<void*>(static_cast<u32>(m_index.start) * sizeof(u16)), static_cast<GLint>(m_vertex.start));
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSDeviceOGL::DrawIndexedPrimitive(int offset, int count)
|
void GSDeviceOGL::DrawIndexedPrimitive(int offset, int count)
|
||||||
|
@ -885,8 +885,8 @@ void GSDeviceOGL::DrawIndexedPrimitive(int offset, int count)
|
||||||
//ASSERT(offset + count <= (int)m_index.count);
|
//ASSERT(offset + count <= (int)m_index.count);
|
||||||
|
|
||||||
g_perfmon.Put(GSPerfMon::DrawCalls, 1);
|
g_perfmon.Put(GSPerfMon::DrawCalls, 1);
|
||||||
glDrawElementsBaseVertex(m_draw_topology, count, GL_UNSIGNED_INT,
|
glDrawElementsBaseVertex(m_draw_topology, count, GL_UNSIGNED_SHORT,
|
||||||
reinterpret_cast<void*>((static_cast<u32>(m_index.start) + static_cast<u32>(offset)) * sizeof(u32)),
|
reinterpret_cast<void*>((static_cast<u32>(m_index.start) + static_cast<u32>(offset)) * sizeof(u16)),
|
||||||
static_cast<GLint>(m_vertex.start));
|
static_cast<GLint>(m_vertex.start));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1548,15 +1548,15 @@ void GSDeviceOGL::DrawMultiStretchRects(
|
||||||
void GSDeviceOGL::DoMultiStretchRects(const MultiStretchRect* rects, u32 num_rects, const GSVector2& ds)
|
void GSDeviceOGL::DoMultiStretchRects(const MultiStretchRect* rects, u32 num_rects, const GSVector2& ds)
|
||||||
{
|
{
|
||||||
const u32 vertex_reserve_size = num_rects * 4 * sizeof(GSVertexPT1);
|
const u32 vertex_reserve_size = num_rects * 4 * sizeof(GSVertexPT1);
|
||||||
const u32 index_reserve_size = num_rects * 6 * sizeof(u32);
|
const u32 index_reserve_size = num_rects * 6 * sizeof(u16);
|
||||||
auto vertex_map = m_vertex_stream_buffer->Map(sizeof(GSVertexPT1), vertex_reserve_size);
|
auto vertex_map = m_vertex_stream_buffer->Map(sizeof(GSVertexPT1), vertex_reserve_size);
|
||||||
auto index_map = m_index_stream_buffer->Map(sizeof(u32), index_reserve_size);
|
auto index_map = m_index_stream_buffer->Map(sizeof(u16), index_reserve_size);
|
||||||
m_vertex.start = vertex_map.index_aligned;
|
m_vertex.start = vertex_map.index_aligned;
|
||||||
m_index.start = index_map.index_aligned;
|
m_index.start = index_map.index_aligned;
|
||||||
|
|
||||||
// Don't use primitive restart here, it ends up slower on some drivers.
|
// Don't use primitive restart here, it ends up slower on some drivers.
|
||||||
GSVertexPT1* verts = reinterpret_cast<GSVertexPT1*>(vertex_map.pointer);
|
GSVertexPT1* verts = reinterpret_cast<GSVertexPT1*>(vertex_map.pointer);
|
||||||
u32* idx = reinterpret_cast<u32*>(index_map.pointer);
|
u16* idx = reinterpret_cast<u16*>(index_map.pointer);
|
||||||
u32 icount = 0;
|
u32 icount = 0;
|
||||||
u32 vcount = 0;
|
u32 vcount = 0;
|
||||||
for (u32 i = 0; i < num_rects; i++)
|
for (u32 i = 0; i < num_rects; i++)
|
||||||
|
@ -1587,7 +1587,7 @@ void GSDeviceOGL::DoMultiStretchRects(const MultiStretchRect* rects, u32 num_rec
|
||||||
m_vertex.count = vcount;
|
m_vertex.count = vcount;
|
||||||
m_index.count = icount;
|
m_index.count = icount;
|
||||||
m_vertex_stream_buffer->Unmap(vcount * sizeof(GSVertexPT1));
|
m_vertex_stream_buffer->Unmap(vcount * sizeof(GSVertexPT1));
|
||||||
m_index_stream_buffer->Unmap(icount * sizeof(u32));
|
m_index_stream_buffer->Unmap(icount * sizeof(u16));
|
||||||
|
|
||||||
PSSetShaderResource(0, rects[0].src);
|
PSSetShaderResource(0, rects[0].src);
|
||||||
PSSetSamplerState(rects[0].linear ? m_convert.ln : m_convert.pt);
|
PSSetSamplerState(rects[0].linear ? m_convert.ln : m_convert.pt);
|
||||||
|
@ -1807,8 +1807,8 @@ void GSDeviceOGL::IASetVertexBuffer(const void* vertices, size_t count)
|
||||||
|
|
||||||
void GSDeviceOGL::IASetIndexBuffer(const void* index, size_t count)
|
void GSDeviceOGL::IASetIndexBuffer(const void* index, size_t count)
|
||||||
{
|
{
|
||||||
const u32 size = static_cast<u32>(count) * sizeof(u32);
|
const u32 size = static_cast<u32>(count) * sizeof(u16);
|
||||||
auto res = m_index_stream_buffer->Map(sizeof(u32), size);
|
auto res = m_index_stream_buffer->Map(sizeof(u16), size);
|
||||||
m_index.start = res.index_aligned;
|
m_index.start = res.index_aligned;
|
||||||
m_index.count = count;
|
m_index.count = count;
|
||||||
std::memcpy(res.pointer, index, size);
|
std::memcpy(res.pointer, index, size);
|
||||||
|
@ -1999,18 +1999,7 @@ void GSDeviceOGL::RenderImGui()
|
||||||
m_vertex_stream_buffer->Unmap(size);
|
m_vertex_stream_buffer->Unmap(size);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Bit awkward, because this is using 16-bit indices, not 32-bit.
|
IASetIndexBuffer(cmd_list->IdxBuffer.Data, cmd_list->IdxBuffer.Size);
|
||||||
u32 index_start;
|
|
||||||
{
|
|
||||||
static_assert(sizeof(ImDrawIdx) == sizeof(u16));
|
|
||||||
|
|
||||||
const u32 size = static_cast<u32>(cmd_list->IdxBuffer.Size) * sizeof(ImDrawIdx);
|
|
||||||
auto res = m_index_stream_buffer->Map(sizeof(u16), size);
|
|
||||||
index_start = res.index_aligned;
|
|
||||||
std::memcpy(res.pointer, cmd_list->IdxBuffer.Data, size);
|
|
||||||
m_index_stream_buffer->Unmap(size);
|
|
||||||
m_index_stream_buffer->Bind();
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int cmd_i = 0; cmd_i < cmd_list->CmdBuffer.Size; cmd_i++)
|
for (int cmd_i = 0; cmd_i < cmd_list->CmdBuffer.Size; cmd_i++)
|
||||||
{
|
{
|
||||||
|
@ -2038,7 +2027,7 @@ void GSDeviceOGL::RenderImGui()
|
||||||
}
|
}
|
||||||
|
|
||||||
glDrawElementsBaseVertex(GL_TRIANGLES, (GLsizei)pcmd->ElemCount, GL_UNSIGNED_SHORT,
|
glDrawElementsBaseVertex(GL_TRIANGLES, (GLsizei)pcmd->ElemCount, GL_UNSIGNED_SHORT,
|
||||||
(void*)(intptr_t)((pcmd->IdxOffset + index_start) * sizeof(ImDrawIdx)), pcmd->VtxOffset + vertex_start);
|
(void*)(intptr_t)((pcmd->IdxOffset + m_index.start) * sizeof(ImDrawIdx)), pcmd->VtxOffset + vertex_start);
|
||||||
}
|
}
|
||||||
|
|
||||||
g_perfmon.Put(GSPerfMon::DrawCalls, cmd_list->CmdBuffer.Size);
|
g_perfmon.Put(GSPerfMon::DrawCalls, cmd_list->CmdBuffer.Size);
|
||||||
|
|
|
@ -161,7 +161,7 @@ typedef GSVector4 VectorF;
|
||||||
#define LOCAL_STEP local.d4
|
#define LOCAL_STEP local.d4
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void GSDrawScanline::CSetupPrim(const GSVertexSW* vertex, const u32* index, const GSVertexSW& dscan, GSScanlineLocalData& local)
|
void GSDrawScanline::CSetupPrim(const GSVertexSW* vertex, const u16* index, const GSVertexSW& dscan, GSScanlineLocalData& local)
|
||||||
{
|
{
|
||||||
const GSScanlineGlobalData& global = GlobalFromLocal(local);
|
const GSScanlineGlobalData& global = GlobalFromLocal(local);
|
||||||
GSScanlineSelector sel = global.sel;
|
GSScanlineSelector sel = global.sel;
|
||||||
|
|
|
@ -38,7 +38,7 @@ public:
|
||||||
~GSDrawScanline() override;
|
~GSDrawScanline() override;
|
||||||
|
|
||||||
/// Function pointer types which we call back into.
|
/// Function pointer types which we call back into.
|
||||||
using SetupPrimPtr = void(*)(const GSVertexSW* vertex, const u32* index, const GSVertexSW& dscan, GSScanlineLocalData& local);
|
using SetupPrimPtr = void(*)(const GSVertexSW* vertex, const u16* index, const GSVertexSW& dscan, GSScanlineLocalData& local);
|
||||||
using DrawScanlinePtr = void(*)(int pixels, int left, int top, const GSVertexSW& scan, GSScanlineLocalData& local);
|
using DrawScanlinePtr = void(*)(int pixels, int left, int top, const GSVertexSW& scan, GSScanlineLocalData& local);
|
||||||
|
|
||||||
/// Flushes the code cache, forcing everything to be recompiled.
|
/// Flushes the code cache, forcing everything to be recompiled.
|
||||||
|
@ -60,7 +60,7 @@ private:
|
||||||
GSCodeGeneratorFunctionMap<GSSetupPrimCodeGenerator, u64, SetupPrimPtr> m_sp_map;
|
GSCodeGeneratorFunctionMap<GSSetupPrimCodeGenerator, u64, SetupPrimPtr> m_sp_map;
|
||||||
GSCodeGeneratorFunctionMap<GSDrawScanlineCodeGenerator, u64, DrawScanlinePtr> m_ds_map;
|
GSCodeGeneratorFunctionMap<GSDrawScanlineCodeGenerator, u64, DrawScanlinePtr> m_ds_map;
|
||||||
|
|
||||||
static void CSetupPrim(const GSVertexSW* vertex, const u32* index, const GSVertexSW& dscan, GSScanlineLocalData& local);
|
static void CSetupPrim(const GSVertexSW* vertex, const u16* index, const GSVertexSW& dscan, GSScanlineLocalData& local);
|
||||||
static void CDrawScanline(int pixels, int left, int top, const GSVertexSW& scan, GSScanlineLocalData& local);
|
static void CDrawScanline(int pixels, int left, int top, const GSVertexSW& scan, GSScanlineLocalData& local);
|
||||||
static void CDrawEdge(int pixels, int left, int top, const GSVertexSW& scan, GSScanlineLocalData& local);
|
static void CDrawEdge(int pixels, int left, int top, const GSVertexSW& scan, GSScanlineLocalData& local);
|
||||||
};
|
};
|
||||||
|
|
|
@ -154,10 +154,10 @@ void GSRasterizer::Draw(GSRasterizerData& data)
|
||||||
const GSVertexSW* vertex = data.vertex;
|
const GSVertexSW* vertex = data.vertex;
|
||||||
const GSVertexSW* vertex_end = data.vertex + data.vertex_count;
|
const GSVertexSW* vertex_end = data.vertex + data.vertex_count;
|
||||||
|
|
||||||
const u32* index = data.index;
|
const u16* index = data.index;
|
||||||
const u32* index_end = data.index + data.index_count;
|
const u16* index_end = data.index + data.index_count;
|
||||||
|
|
||||||
u32 tmp_index[] = {0, 1, 2};
|
static constexpr u16 tmp_index[] = {0, 1, 2};
|
||||||
|
|
||||||
bool scissor_test = !data.bbox.eq(data.bbox.rintersect(data.scissor));
|
bool scissor_test = !data.bbox.eq(data.bbox.rintersect(data.scissor));
|
||||||
|
|
||||||
|
@ -261,7 +261,7 @@ void GSRasterizer::Draw(GSRasterizerData& data)
|
||||||
}
|
}
|
||||||
|
|
||||||
template <bool scissor_test>
|
template <bool scissor_test>
|
||||||
void GSRasterizer::DrawPoint(const GSVertexSW* vertex, int vertex_count, const u32* index, int index_count)
|
void GSRasterizer::DrawPoint(const GSVertexSW* vertex, int vertex_count, const u16* index, int index_count)
|
||||||
{
|
{
|
||||||
m_primcount++;
|
m_primcount++;
|
||||||
|
|
||||||
|
@ -286,7 +286,7 @@ void GSRasterizer::DrawPoint(const GSVertexSW* vertex, int vertex_count, const u
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
u32 tmp_index[1] = {0};
|
static constexpr u16 tmp_index[1] = {0};
|
||||||
|
|
||||||
for (int i = 0; i < vertex_count; i++, vertex++)
|
for (int i = 0; i < vertex_count; i++, vertex++)
|
||||||
{
|
{
|
||||||
|
@ -307,7 +307,7 @@ void GSRasterizer::DrawPoint(const GSVertexSW* vertex, int vertex_count, const u
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSRasterizer::DrawLine(const GSVertexSW* vertex, const u32* index)
|
void GSRasterizer::DrawLine(const GSVertexSW* vertex, const u16* index)
|
||||||
{
|
{
|
||||||
m_primcount++;
|
m_primcount++;
|
||||||
|
|
||||||
|
@ -425,7 +425,7 @@ static const u8 s_ysort[8][4] =
|
||||||
|
|
||||||
#if _M_SSE >= 0x501
|
#if _M_SSE >= 0x501
|
||||||
|
|
||||||
void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const u32* index)
|
void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const u16* index)
|
||||||
{
|
{
|
||||||
m_primcount++;
|
m_primcount++;
|
||||||
|
|
||||||
|
@ -606,7 +606,7 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW2& RESTRIC
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const u32* index)
|
void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const u16* index)
|
||||||
{
|
{
|
||||||
m_primcount++;
|
m_primcount++;
|
||||||
|
|
||||||
|
@ -784,7 +784,7 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& RESTRICT
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void GSRasterizer::DrawSprite(const GSVertexSW* vertex, const u32* index)
|
void GSRasterizer::DrawSprite(const GSVertexSW* vertex, const u16* index)
|
||||||
{
|
{
|
||||||
m_primcount++;
|
m_primcount++;
|
||||||
|
|
||||||
|
@ -1082,7 +1082,7 @@ void GSRasterizer::AddScanline(GSVertexSW* e, int pixels, int left, int top, con
|
||||||
AddScanlineInfo(e, pixels, left, top);
|
AddScanlineInfo(e, pixels, left, top);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSRasterizer::Flush(const GSVertexSW* vertex, const u32* index, const GSVertexSW& dscan, bool edge /* = false */)
|
void GSRasterizer::Flush(const GSVertexSW* vertex, const u16* index, const GSVertexSW& dscan, bool edge /* = false */)
|
||||||
{
|
{
|
||||||
// TODO: on win64 this could be the place where xmm6-15 are preserved (not by each DrawScanline)
|
// TODO: on win64 this could be the place where xmm6-15 are preserved (not by each DrawScanline)
|
||||||
|
|
||||||
|
|
|
@ -38,7 +38,7 @@ public:
|
||||||
u8* buff;
|
u8* buff;
|
||||||
GSVertexSW* vertex;
|
GSVertexSW* vertex;
|
||||||
int vertex_count;
|
int vertex_count;
|
||||||
u32* index;
|
u16* index;
|
||||||
int index_count;
|
int index_count;
|
||||||
u64 frame;
|
u64 frame;
|
||||||
u64 start;
|
u64 start;
|
||||||
|
@ -101,10 +101,10 @@ protected:
|
||||||
__forceinline bool HasEdge() const { return (m_draw_edge != nullptr); }
|
__forceinline bool HasEdge() const { return (m_draw_edge != nullptr); }
|
||||||
|
|
||||||
template <bool scissor_test>
|
template <bool scissor_test>
|
||||||
void DrawPoint(const GSVertexSW* vertex, int vertex_count, const u32* index, int index_count);
|
void DrawPoint(const GSVertexSW* vertex, int vertex_count, const u16* index, int index_count);
|
||||||
void DrawLine(const GSVertexSW* vertex, const u32* index);
|
void DrawLine(const GSVertexSW* vertex, const u16* index);
|
||||||
void DrawTriangle(const GSVertexSW* vertex, const u32* index);
|
void DrawTriangle(const GSVertexSW* vertex, const u16* index);
|
||||||
void DrawSprite(const GSVertexSW* vertex, const u32* index);
|
void DrawSprite(const GSVertexSW* vertex, const u16* index);
|
||||||
|
|
||||||
#if _M_SSE >= 0x501
|
#if _M_SSE >= 0x501
|
||||||
__forceinline void DrawTriangleSection(int top, int bottom, GSVertexSW2& RESTRICT edge, const GSVertexSW2& RESTRICT dedge, const GSVertexSW2& RESTRICT dscan, const GSVector4& RESTRICT p0);
|
__forceinline void DrawTriangleSection(int top, int bottom, GSVertexSW2& RESTRICT edge, const GSVertexSW2& RESTRICT dedge, const GSVertexSW2& RESTRICT dscan, const GSVector4& RESTRICT p0);
|
||||||
|
@ -115,7 +115,7 @@ protected:
|
||||||
void DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GSVertexSW& dv, int orientation, int side);
|
void DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GSVertexSW& dv, int orientation, int side);
|
||||||
|
|
||||||
__forceinline void AddScanline(GSVertexSW* e, int pixels, int left, int top, const GSVertexSW& scan);
|
__forceinline void AddScanline(GSVertexSW* e, int pixels, int left, int top, const GSVertexSW& scan);
|
||||||
__forceinline void Flush(const GSVertexSW* vertex, const u32* index, const GSVertexSW& dscan, bool edge = false);
|
__forceinline void Flush(const GSVertexSW* vertex, const u16* index, const GSVertexSW& dscan, bool edge = false);
|
||||||
|
|
||||||
__forceinline void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan);
|
__forceinline void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan);
|
||||||
__forceinline void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan);
|
__forceinline void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan);
|
||||||
|
|
|
@ -343,7 +343,7 @@ void GSRendererSW::Draw()
|
||||||
sd->buff = (u8*)m_vertex_heap.alloc(sizeof(GSVertexSW) * ((m_vertex.next + 1) & ~1) + sizeof(u32) * m_index.tail, 64);
|
sd->buff = (u8*)m_vertex_heap.alloc(sizeof(GSVertexSW) * ((m_vertex.next + 1) & ~1) + sizeof(u32) * m_index.tail, 64);
|
||||||
sd->vertex = (GSVertexSW*)sd->buff;
|
sd->vertex = (GSVertexSW*)sd->buff;
|
||||||
sd->vertex_count = m_vertex.next;
|
sd->vertex_count = m_vertex.next;
|
||||||
sd->index = (u32*)(sd->buff + sizeof(GSVertexSW) * ((m_vertex.next + 1) & ~1));
|
sd->index = (u16*)(sd->buff + sizeof(GSVertexSW) * ((m_vertex.next + 1) & ~1));
|
||||||
sd->index_count = m_index.tail;
|
sd->index_count = m_index.tail;
|
||||||
sd->scanmsk_value = m_draw_env->SCANMSK.MSK;
|
sd->scanmsk_value = m_draw_env->SCANMSK.MSK;
|
||||||
|
|
||||||
|
@ -354,7 +354,7 @@ void GSRendererSW::Draw()
|
||||||
|
|
||||||
GSVertexSW::s_cvb[m_vt.m_primclass][PRIM->TME][PRIM->FST][q_div](m_context, sd->vertex, m_vertex.buff, m_vertex.next);
|
GSVertexSW::s_cvb[m_vt.m_primclass][PRIM->TME][PRIM->FST][q_div](m_context, sd->vertex, m_vertex.buff, m_vertex.next);
|
||||||
|
|
||||||
memcpy(sd->index, m_index.buff, sizeof(u32) * m_index.tail);
|
std::memcpy(sd->index, m_index.buff, sizeof(u16) * m_index.tail);
|
||||||
|
|
||||||
GSVector4i scissor = GSVector4i(context->scissor.in);
|
GSVector4i scissor = GSVector4i(context->scissor.in);
|
||||||
GSVector4i bbox = GSVector4i(m_vt.m_min.p.floor().xyxy(m_vt.m_max.p.ceil()));
|
GSVector4i bbox = GSVector4i(m_vt.m_min.p.floor().xyxy(m_vt.m_max.p.ceil()));
|
||||||
|
|
|
@ -210,7 +210,7 @@ void GSSetupPrimCodeGenerator2::Depth_XMM()
|
||||||
{
|
{
|
||||||
// GSVector4 p = vertex[index[1]].p;
|
// GSVector4 p = vertex[index[1]].p;
|
||||||
|
|
||||||
mov(eax, ptr[_index + sizeof(u32) * 1]);
|
movzx(eax, word[_index + sizeof(u16) * 1]);
|
||||||
shl(eax, 6); // * sizeof(GSVertexSW)
|
shl(eax, 6); // * sizeof(GSVertexSW)
|
||||||
add(rax, _64_vertex);
|
add(rax, _64_vertex);
|
||||||
|
|
||||||
|
@ -299,7 +299,7 @@ void GSSetupPrimCodeGenerator2::Depth_YMM()
|
||||||
{
|
{
|
||||||
// GSVector4 p = vertex[index[1]].p;
|
// GSVector4 p = vertex[index[1]].p;
|
||||||
|
|
||||||
mov(eax, ptr[_index + sizeof(u32) * 1]);
|
movzx(eax, word[_index + sizeof(u16) * 1]);
|
||||||
shl(eax, 6); // * sizeof(GSVertexSW)
|
shl(eax, 6); // * sizeof(GSVertexSW)
|
||||||
add(rax, _64_vertex);
|
add(rax, _64_vertex);
|
||||||
|
|
||||||
|
@ -504,7 +504,7 @@ void GSSetupPrimCodeGenerator2::Color()
|
||||||
|
|
||||||
if (!(m_sel.prim == GS_SPRITE_CLASS && (m_en.z || m_en.f))) // if this is a sprite, the last vertex was already loaded in Depth()
|
if (!(m_sel.prim == GS_SPRITE_CLASS && (m_en.z || m_en.f))) // if this is a sprite, the last vertex was already loaded in Depth()
|
||||||
{
|
{
|
||||||
mov(eax, ptr[_index + sizeof(u32) * last]);
|
movzx(eax, word[_index + sizeof(u16) * last]);
|
||||||
shl(eax, 6); // * sizeof(GSVertexSW)
|
shl(eax, 6); // * sizeof(GSVertexSW)
|
||||||
add(rax, _64_vertex);
|
add(rax, _64_vertex);
|
||||||
}
|
}
|
||||||
|
|
|
@ -955,13 +955,13 @@ void GSDeviceVK::DoMultiStretchRects(
|
||||||
{
|
{
|
||||||
// Set up vertices first.
|
// Set up vertices first.
|
||||||
const u32 vertex_reserve_size = num_rects * 4 * sizeof(GSVertexPT1);
|
const u32 vertex_reserve_size = num_rects * 4 * sizeof(GSVertexPT1);
|
||||||
const u32 index_reserve_size = num_rects * 6 * sizeof(u32);
|
const u32 index_reserve_size = num_rects * 6 * sizeof(u16);
|
||||||
if (!m_vertex_stream_buffer.ReserveMemory(vertex_reserve_size, sizeof(GSVertexPT1)) ||
|
if (!m_vertex_stream_buffer.ReserveMemory(vertex_reserve_size, sizeof(GSVertexPT1)) ||
|
||||||
!m_index_stream_buffer.ReserveMemory(index_reserve_size, sizeof(u32)))
|
!m_index_stream_buffer.ReserveMemory(index_reserve_size, sizeof(u16)))
|
||||||
{
|
{
|
||||||
ExecuteCommandBufferAndRestartRenderPass(false, "Uploading bytes to vertex buffer");
|
ExecuteCommandBufferAndRestartRenderPass(false, "Uploading bytes to vertex buffer");
|
||||||
if (!m_vertex_stream_buffer.ReserveMemory(vertex_reserve_size, sizeof(GSVertexPT1)) ||
|
if (!m_vertex_stream_buffer.ReserveMemory(vertex_reserve_size, sizeof(GSVertexPT1)) ||
|
||||||
!m_index_stream_buffer.ReserveMemory(index_reserve_size, sizeof(u32)))
|
!m_index_stream_buffer.ReserveMemory(index_reserve_size, sizeof(u16)))
|
||||||
{
|
{
|
||||||
pxFailRel("Failed to reserve space for vertices");
|
pxFailRel("Failed to reserve space for vertices");
|
||||||
}
|
}
|
||||||
|
@ -971,7 +971,7 @@ void GSDeviceVK::DoMultiStretchRects(
|
||||||
// Don't use primitive restart here, it ends up slower on some drivers.
|
// Don't use primitive restart here, it ends up slower on some drivers.
|
||||||
const GSVector2 ds(static_cast<float>(dTex->GetWidth()), static_cast<float>(dTex->GetHeight()));
|
const GSVector2 ds(static_cast<float>(dTex->GetWidth()), static_cast<float>(dTex->GetHeight()));
|
||||||
GSVertexPT1* verts = reinterpret_cast<GSVertexPT1*>(m_vertex_stream_buffer.GetCurrentHostPointer());
|
GSVertexPT1* verts = reinterpret_cast<GSVertexPT1*>(m_vertex_stream_buffer.GetCurrentHostPointer());
|
||||||
u32* idx = reinterpret_cast<u32*>(m_index_stream_buffer.GetCurrentHostPointer());
|
u16* idx = reinterpret_cast<u16*>(m_index_stream_buffer.GetCurrentHostPointer());
|
||||||
u32 icount = 0;
|
u32 icount = 0;
|
||||||
u32 vcount = 0;
|
u32 vcount = 0;
|
||||||
for (u32 i = 0; i < num_rects; i++)
|
for (u32 i = 0; i < num_rects; i++)
|
||||||
|
@ -1001,11 +1001,11 @@ void GSDeviceVK::DoMultiStretchRects(
|
||||||
|
|
||||||
m_vertex.start = m_vertex_stream_buffer.GetCurrentOffset() / sizeof(GSVertexPT1);
|
m_vertex.start = m_vertex_stream_buffer.GetCurrentOffset() / sizeof(GSVertexPT1);
|
||||||
m_vertex.count = vcount;
|
m_vertex.count = vcount;
|
||||||
m_index.start = m_index_stream_buffer.GetCurrentOffset() / sizeof(u32);
|
m_index.start = m_index_stream_buffer.GetCurrentOffset() / sizeof(u16);
|
||||||
m_index.count = icount;
|
m_index.count = icount;
|
||||||
m_vertex_stream_buffer.CommitMemory(vcount * sizeof(GSVertexPT1));
|
m_vertex_stream_buffer.CommitMemory(vcount * sizeof(GSVertexPT1));
|
||||||
m_index_stream_buffer.CommitMemory(icount * sizeof(u32));
|
m_index_stream_buffer.CommitMemory(icount * sizeof(u16));
|
||||||
SetIndexBuffer(m_index_stream_buffer.GetBuffer(), 0, VK_INDEX_TYPE_UINT32);
|
SetIndexBuffer(m_index_stream_buffer.GetBuffer(), 0, VK_INDEX_TYPE_UINT16);
|
||||||
|
|
||||||
// Even though we're batching, a cmdbuffer submit could've messed this up.
|
// Even though we're batching, a cmdbuffer submit could've messed this up.
|
||||||
const GSVector4i rc(dTex->GetRect());
|
const GSVector4i rc(dTex->GetRect());
|
||||||
|
@ -1368,21 +1368,21 @@ void GSDeviceVK::IASetVertexBuffer(const void* vertex, size_t stride, size_t cou
|
||||||
|
|
||||||
void GSDeviceVK::IASetIndexBuffer(const void* index, size_t count)
|
void GSDeviceVK::IASetIndexBuffer(const void* index, size_t count)
|
||||||
{
|
{
|
||||||
const u32 size = sizeof(u32) * static_cast<u32>(count);
|
const u32 size = sizeof(u16) * static_cast<u32>(count);
|
||||||
if (!m_index_stream_buffer.ReserveMemory(size, sizeof(u32)))
|
if (!m_index_stream_buffer.ReserveMemory(size, sizeof(u16)))
|
||||||
{
|
{
|
||||||
ExecuteCommandBufferAndRestartRenderPass(false, "Uploading bytes to index buffer");
|
ExecuteCommandBufferAndRestartRenderPass(false, "Uploading bytes to index buffer");
|
||||||
if (!m_index_stream_buffer.ReserveMemory(size, sizeof(u32)))
|
if (!m_index_stream_buffer.ReserveMemory(size, sizeof(u16)))
|
||||||
pxFailRel("Failed to reserve space for vertices");
|
pxFailRel("Failed to reserve space for vertices");
|
||||||
}
|
}
|
||||||
|
|
||||||
m_index.start = m_index_stream_buffer.GetCurrentOffset() / sizeof(u32);
|
m_index.start = m_index_stream_buffer.GetCurrentOffset() / sizeof(u16);
|
||||||
m_index.count = count;
|
m_index.count = count;
|
||||||
|
|
||||||
std::memcpy(m_index_stream_buffer.GetCurrentHostPointer(), index, size);
|
std::memcpy(m_index_stream_buffer.GetCurrentHostPointer(), index, size);
|
||||||
m_index_stream_buffer.CommitMemory(size);
|
m_index_stream_buffer.CommitMemory(size);
|
||||||
|
|
||||||
SetIndexBuffer(m_index_stream_buffer.GetBuffer(), 0, VK_INDEX_TYPE_UINT32);
|
SetIndexBuffer(m_index_stream_buffer.GetBuffer(), 0, VK_INDEX_TYPE_UINT16);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSDeviceVK::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i& scissor, FeedbackLoopFlag feedback_loop)
|
void GSDeviceVK::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i& scissor, FeedbackLoopFlag feedback_loop)
|
||||||
|
@ -2314,9 +2314,6 @@ void GSDeviceVK::RenderImGui()
|
||||||
m_dirty_flags |= DIRTY_FLAG_UTILITY_TEXTURE;
|
m_dirty_flags |= DIRTY_FLAG_UTILITY_TEXTURE;
|
||||||
}
|
}
|
||||||
|
|
||||||
// imgui uses 16-bit indices
|
|
||||||
SetIndexBuffer(m_index_stream_buffer.GetBuffer(), 0, VK_INDEX_TYPE_UINT16);
|
|
||||||
|
|
||||||
// this is for presenting, we don't want to screw with the viewport/scissor set by display
|
// this is for presenting, we don't want to screw with the viewport/scissor set by display
|
||||||
m_dirty_flags &= ~(DIRTY_FLAG_VIEWPORT | DIRTY_FLAG_SCISSOR);
|
m_dirty_flags &= ~(DIRTY_FLAG_VIEWPORT | DIRTY_FLAG_SCISSOR);
|
||||||
|
|
||||||
|
@ -2338,19 +2335,8 @@ void GSDeviceVK::RenderImGui()
|
||||||
m_vertex_stream_buffer.CommitMemory(size);
|
m_vertex_stream_buffer.CommitMemory(size);
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 index_offset;
|
static_assert(sizeof(ImDrawIdx) == sizeof(u16));
|
||||||
{
|
IASetIndexBuffer(cmd_list->IdxBuffer.Data, cmd_list->IdxBuffer.Size);
|
||||||
const u32 size = sizeof(ImDrawIdx) * static_cast<u32>(cmd_list->IdxBuffer.Size);
|
|
||||||
if (!m_index_stream_buffer.ReserveMemory(size, sizeof(ImDrawIdx)))
|
|
||||||
{
|
|
||||||
Console.Warning("Skipping ImGui draw because of no vertex buffer space");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
index_offset = m_index_stream_buffer.GetCurrentOffset() / sizeof(ImDrawIdx);
|
|
||||||
std::memcpy(m_index_stream_buffer.GetCurrentHostPointer(), cmd_list->IdxBuffer.Data, size);
|
|
||||||
m_index_stream_buffer.CommitMemory(size);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int cmd_i = 0; cmd_i < cmd_list->CmdBuffer.Size; cmd_i++)
|
for (int cmd_i = 0; cmd_i < cmd_list->CmdBuffer.Size; cmd_i++)
|
||||||
{
|
{
|
||||||
|
@ -2374,7 +2360,7 @@ void GSDeviceVK::RenderImGui()
|
||||||
if (ApplyUtilityState())
|
if (ApplyUtilityState())
|
||||||
{
|
{
|
||||||
vkCmdDrawIndexed(g_vulkan_context->GetCurrentCommandBuffer(), pcmd->ElemCount, 1,
|
vkCmdDrawIndexed(g_vulkan_context->GetCurrentCommandBuffer(), pcmd->ElemCount, 1,
|
||||||
index_offset + pcmd->IdxOffset, vertex_offset + pcmd->VtxOffset, 0);
|
m_index.start + pcmd->IdxOffset, vertex_offset + pcmd->VtxOffset, 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2779,7 +2765,7 @@ void GSDeviceVK::InitializeState()
|
||||||
m_vertex_buffer_offset = 0;
|
m_vertex_buffer_offset = 0;
|
||||||
m_index_buffer = m_index_stream_buffer.GetBuffer();
|
m_index_buffer = m_index_stream_buffer.GetBuffer();
|
||||||
m_index_buffer_offset = 0;
|
m_index_buffer_offset = 0;
|
||||||
m_index_type = VK_INDEX_TYPE_UINT32;
|
m_index_type = VK_INDEX_TYPE_UINT16;
|
||||||
m_current_framebuffer = VK_NULL_HANDLE;
|
m_current_framebuffer = VK_NULL_HANDLE;
|
||||||
m_current_render_pass = VK_NULL_HANDLE;
|
m_current_render_pass = VK_NULL_HANDLE;
|
||||||
|
|
||||||
|
@ -3848,7 +3834,7 @@ void GSDeviceVK::UploadHWDrawVerticesAndIndices(const GSHWDrawConfig& config)
|
||||||
{
|
{
|
||||||
m_index.start = 0;
|
m_index.start = 0;
|
||||||
m_index.count = config.nindices;
|
m_index.count = config.nindices;
|
||||||
SetIndexBuffer(m_expand_index_buffer, 0, VK_INDEX_TYPE_UINT32);
|
SetIndexBuffer(m_expand_index_buffer, 0, VK_INDEX_TYPE_UINT16);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in New Issue