GPU/HW: Switch to indexed draws

This commit is contained in:
Stenzek 2024-03-01 15:33:00 +10:00
parent b34742cdd2
commit 250fb56838
No known key found for this signature in database
2 changed files with 153 additions and 121 deletions

View File

@ -174,19 +174,6 @@ ALWAYS_INLINE void GPU_HW::BatchVertex::SetUVLimits(u32 min_u, u32 max_u, u32 mi
uv_limits = PackUVLimits(min_u, max_u, min_v, max_v);
}
ALWAYS_INLINE void GPU_HW::AddVertex(const BatchVertex& v)
{
std::memcpy(m_batch_current_vertex_ptr, &v, sizeof(BatchVertex));
m_batch_current_vertex_ptr++;
}
template<typename... Args>
ALWAYS_INLINE void GPU_HW::AddNewVertex(Args&&... args)
{
m_batch_current_vertex_ptr->Set(std::forward<Args>(args)...);
m_batch_current_vertex_ptr++;
}
const Threading::Thread* GPU_HW::GetSWThread() const
{
return m_sw_renderer ? m_sw_renderer->GetThread() : nullptr;
@ -247,7 +234,8 @@ void GPU_HW::Reset(bool clear_vram)
{
GPU::Reset(clear_vram);
m_batch_current_vertex_ptr = m_batch_start_vertex_ptr;
if (m_batch_vertex_ptr)
UnmapGPUBuffer(0, 0);
if (m_sw_renderer)
m_sw_renderer->Reset();
@ -305,7 +293,8 @@ bool GPU_HW::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_di
// invalidate the whole VRAM read texture when loading state
if (sw.IsReading())
{
m_batch_current_vertex_ptr = m_batch_start_vertex_ptr;
if (m_batch_vertex_ptr)
UnmapGPUBuffer(0, 0);
SetFullVRAMDirtyRectangle();
ResetBatchVertexDepth();
}
@ -1373,29 +1362,36 @@ void GPU_HW::SetScissor()
g_gpu_device->SetScissor(left, top, right - left, bottom - top);
}
void GPU_HW::MapBatchVertexPointer(u32 required_vertices)
void GPU_HW::MapGPUBuffer(u32 required_vertices, u32 required_indices)
{
DebugAssert(!m_batch_start_vertex_ptr);
DebugAssert(!m_batch_vertex_ptr && !m_batch_index_ptr);
void* map;
u32 space;
g_gpu_device->MapVertexBuffer(sizeof(BatchVertex), required_vertices, &map, &space, &m_batch_base_vertex);
void* vb_map;
u32 vb_space;
g_gpu_device->MapVertexBuffer(sizeof(BatchVertex), required_vertices, &vb_map, &vb_space, &m_batch_base_vertex);
m_batch_vertex_ptr = static_cast<BatchVertex*>(vb_map);
m_batch_vertex_space = Truncate16(std::min<u32>(vb_space, std::numeric_limits<u16>::max()));
m_batch_start_vertex_ptr = static_cast<BatchVertex*>(map);
m_batch_current_vertex_ptr = m_batch_start_vertex_ptr;
m_batch_end_vertex_ptr = m_batch_start_vertex_ptr + space;
u32 ib_space;
g_gpu_device->MapIndexBuffer(required_indices, &m_batch_index_ptr, &ib_space, &m_batch_base_index);
m_batch_index_space = Truncate16(std::min<u32>(ib_space, std::numeric_limits<u16>::max()));
}
void GPU_HW::UnmapBatchVertexPointer(u32 used_vertices)
void GPU_HW::UnmapGPUBuffer(u32 used_vertices, u32 used_indices)
{
DebugAssert(m_batch_start_vertex_ptr);
DebugAssert(m_batch_vertex_ptr && m_batch_index_ptr);
g_gpu_device->UnmapVertexBuffer(sizeof(BatchVertex), used_vertices);
m_batch_start_vertex_ptr = nullptr;
m_batch_end_vertex_ptr = nullptr;
m_batch_current_vertex_ptr = nullptr;
g_gpu_device->UnmapIndexBuffer(used_indices);
m_batch_vertex_ptr = nullptr;
m_batch_vertex_count = 0;
m_batch_vertex_space = 0;
m_batch_index_ptr = nullptr;
m_batch_index_count = 0;
m_batch_index_space = 0;
}
void GPU_HW::DrawBatchVertices(BatchRenderMode render_mode, u32 num_vertices, u32 base_vertex)
ALWAYS_INLINE_RELEASE void GPU_HW::DrawBatchVertices(BatchRenderMode render_mode, u32 num_indices, u32 base_index,
u32 base_vertex)
{
// [depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing]
const u8 depth_test = m_batch.use_depth_buffer ? static_cast<u8>(2) : BoolToUInt8(m_batch.check_mask_before_draw);
@ -1403,7 +1399,7 @@ void GPU_HW::DrawBatchVertices(BatchRenderMode render_mode, u32 num_vertices, u3
m_batch_pipelines[depth_test][static_cast<u8>(render_mode)][static_cast<u8>(m_batch.texture_mode)][static_cast<u8>(
m_batch.transparency_mode)][BoolToUInt8(m_batch.dithering)][BoolToUInt8(m_batch.interlacing)]
.get());
g_gpu_device->Draw(num_vertices, base_vertex);
g_gpu_device->DrawIndexed(num_indices, base_index, base_vertex);
}
void GPU_HW::ClearDisplay()
@ -1529,7 +1525,7 @@ void GPU_HW::SetBatchDepthBuffer(bool enabled)
if (m_batch.use_depth_buffer == enabled)
return;
if (GetBatchVertexCount() > 0)
if (m_batch_index_count > 0)
{
FlushRender();
EnsureVertexBufferSpaceForCurrentCommand();
@ -1549,7 +1545,7 @@ void GPU_HW::CheckForDepthClear(const BatchVertex* vertices, u32 num_vertices)
if ((average_z - m_last_depth_z) >= g_settings.gpu_pgxp_depth_clear_threshold)
{
if (GetBatchVertexCount() > 0)
if (m_batch_index_count > 0)
{
FlushRender();
EnsureVertexBufferSpaceForCurrentCommand();
@ -1576,16 +1572,17 @@ u32 GPU_HW::GetAdaptiveDownsamplingMipLevels() const
void GPU_HW::DrawLine(float x0, float y0, u32 col0, float x1, float y1, u32 col1, float depth)
{
DebugAssert(m_batch_vertex_space >= 4 && m_batch_index_space >= 6);
const float dx = x1 - x0;
const float dy = y1 - y0;
std::array<BatchVertex, 4> output;
if (dx == 0.0f && dy == 0.0f)
{
// Degenerate, render a point.
output[0].Set(x0, y0, depth, 1.0f, col0, 0, 0, 0);
output[1].Set(x0 + 1.0f, y0, depth, 1.0f, col0, 0, 0, 0);
output[2].Set(x1, y1 + 1.0f, depth, 1.0f, col0, 0, 0, 0);
output[3].Set(x1 + 1.0f, y1 + 1.0f, depth, 1.0f, col0, 0, 0, 0);
(m_batch_vertex_ptr++)->Set(x0, y0, depth, 1.0f, col0, 0, 0, 0);
(m_batch_vertex_ptr++)->Set(x0 + 1.0f, y0, depth, 1.0f, col0, 0, 0, 0);
(m_batch_vertex_ptr++)->Set(x1, y1 + 1.0f, depth, 1.0f, col0, 0, 0, 0);
(m_batch_vertex_ptr++)->Set(x1 + 1.0f, y1 + 1.0f, depth, 1.0f, col0, 0, 0, 0);
}
else
{
@ -1649,18 +1646,24 @@ void GPU_HW::DrawLine(float x0, float y0, u32 col0, float x1, float y1, u32 col1
const float ox1 = x1 + pad_x1;
const float oy1 = y1 + pad_y1;
output[0].Set(ox0, oy0, depth, 1.0f, col0, 0, 0, 0);
output[1].Set(ox0 + fill_dx, oy0 + fill_dy, depth, 1.0f, col0, 0, 0, 0);
output[2].Set(ox1, oy1, depth, 1.0f, col1, 0, 0, 0);
output[3].Set(ox1 + fill_dx, oy1 + fill_dy, depth, 1.0f, col1, 0, 0, 0);
(m_batch_vertex_ptr++)->Set(ox0, oy0, depth, 1.0f, col0, 0, 0, 0);
(m_batch_vertex_ptr++)->Set(ox0 + fill_dx, oy0 + fill_dy, depth, 1.0f, col0, 0, 0, 0);
(m_batch_vertex_ptr++)->Set(ox1, oy1, depth, 1.0f, col1, 0, 0, 0);
(m_batch_vertex_ptr++)->Set(ox1 + fill_dx, oy1 + fill_dy, depth, 1.0f, col1, 0, 0, 0);
}
AddVertex(output[0]);
AddVertex(output[1]);
AddVertex(output[2]);
AddVertex(output[3]);
AddVertex(output[2]);
AddVertex(output[1]);
const u32 start_index = m_batch_vertex_count;
m_batch_vertex_count += 4;
m_batch_vertex_space -= 4;
*(m_batch_index_ptr++) = Truncate16(start_index + 0);
*(m_batch_index_ptr++) = Truncate16(start_index + 1);
*(m_batch_index_ptr++) = Truncate16(start_index + 2);
*(m_batch_index_ptr++) = Truncate16(start_index + 3);
*(m_batch_index_ptr++) = Truncate16(start_index + 2);
*(m_batch_index_ptr++) = Truncate16(start_index + 1);
m_batch_index_count += 6;
m_batch_index_space -= 6;
}
void GPU_HW::LoadVertices()
@ -1676,8 +1679,6 @@ void GPU_HW::LoadVertices()
{
case GPUPrimitive::Polygon:
{
DebugAssert(GetBatchVertexSpace() >= (rc.quad_polygon ? 6u : 3u));
const u32 first_color = rc.color_for_first_vertex;
const bool shaded = rc.shading_enable;
const bool textured = rc.texture_enable;
@ -1732,9 +1733,27 @@ void GPU_HW::LoadVertices()
if (m_compute_uv_range && textured)
ComputePolygonUVLimits(texpage, vertices.data(), num_vertices);
if (!IsDrawingAreaIsValid())
if (!IsDrawingAreaIsValid()) [[unlikely]]
return;
const u32 start_index = m_batch_vertex_count;
if (rc.quad_polygon)
{
DebugAssert(m_batch_vertex_space >= 4);
std::memcpy(m_batch_vertex_ptr, vertices.data(), sizeof(BatchVertex) * 4);
m_batch_vertex_ptr += 4;
m_batch_vertex_count += 4;
m_batch_vertex_space -= 4;
}
else
{
DebugAssert(m_batch_vertex_space >= 3);
std::memcpy(m_batch_vertex_ptr, vertices.data(), sizeof(BatchVertex) * 3);
m_batch_vertex_ptr += 3;
m_batch_vertex_count += 3;
m_batch_vertex_space -= 3;
}
// Cull polygons which are too large.
const auto [min_x_12, max_x_12] = MinMax(native_vertex_positions[1][0], native_vertex_positions[2][0]);
const auto [min_y_12, max_y_12] = MinMax(native_vertex_positions[1][1], native_vertex_positions[2][1]);
@ -1763,8 +1782,12 @@ void GPU_HW::LoadVertices()
native_vertex_positions[2][0], native_vertex_positions[2][1], rc.shading_enable,
rc.texture_enable, rc.transparency_enable);
std::memcpy(m_batch_current_vertex_ptr, vertices.data(), sizeof(BatchVertex) * 3);
m_batch_current_vertex_ptr += 3;
DebugAssert(m_batch_index_space >= 3);
*(m_batch_index_ptr++) = Truncate16(start_index);
*(m_batch_index_ptr++) = Truncate16(start_index + 1);
*(m_batch_index_ptr++) = Truncate16(start_index + 2);
m_batch_index_count += 3;
m_batch_index_space -= 3;
}
// quads
@ -1797,9 +1820,12 @@ void GPU_HW::LoadVertices()
native_vertex_positions[3][0], native_vertex_positions[3][1], rc.shading_enable,
rc.texture_enable, rc.transparency_enable);
AddVertex(vertices[2]);
AddVertex(vertices[1]);
AddVertex(vertices[3]);
DebugAssert(m_batch_index_space >= 3);
*(m_batch_index_ptr++) = Truncate16(start_index + 2);
*(m_batch_index_ptr++) = Truncate16(start_index + 1);
*(m_batch_index_ptr++) = Truncate16(start_index + 3);
m_batch_index_count += 3;
m_batch_index_space -= 3;
}
}
@ -1808,7 +1834,8 @@ void GPU_HW::LoadVertices()
GPUBackendDrawPolygonCommand* cmd = m_sw_renderer->NewDrawPolygonCommand(num_vertices);
FillDrawCommand(cmd, rc);
for (u32 i = 0; i < num_vertices; i++)
const u32 sw_num_vertices = rc.quad_polygon ? 4 : 3;
for (u32 i = 0; i < sw_num_vertices; i++)
{
GPUBackendDrawPolygonCommand::Vertex* vert = &cmd->vertices[i];
vert->x = native_vertex_positions[i][0];
@ -1863,12 +1890,13 @@ void GPU_HW::LoadVertices()
break;
}
if (!IsDrawingAreaIsValid())
if (!IsDrawingAreaIsValid()) [[unlikely]]
return;
// we can split the rectangle up into potentially 8 quads
SetBatchDepthBuffer(false);
DebugAssert(GetBatchVertexSpace() >= MAX_VERTICES_FOR_RECTANGLE);
DebugAssert(m_batch_vertex_space >= MAX_VERTICES_FOR_RECTANGLE &&
m_batch_index_space >= MAX_VERTICES_FOR_RECTANGLE);
// Split the rectangle into multiple quads if it's greater than 256x256, as the texture page should repeat.
u16 tex_top = orig_tex_top;
@ -1890,13 +1918,26 @@ void GPU_HW::LoadVertices()
CheckForTexPageOverlap(texpage, tex_left, tex_top, tex_right - 1, tex_bottom - 1);
AddNewVertex(quad_start_x, quad_start_y, depth, 1.0f, color, texpage, tex_left, tex_top, uv_limits);
AddNewVertex(quad_end_x, quad_start_y, depth, 1.0f, color, texpage, tex_right, tex_top, uv_limits);
AddNewVertex(quad_start_x, quad_end_y, depth, 1.0f, color, texpage, tex_left, tex_bottom, uv_limits);
const u32 base_vertex = m_batch_vertex_count;
(m_batch_vertex_ptr++)
->Set(quad_start_x, quad_start_y, depth, 1.0f, color, texpage, tex_left, tex_top, uv_limits);
(m_batch_vertex_ptr++)
->Set(quad_end_x, quad_start_y, depth, 1.0f, color, texpage, tex_right, tex_top, uv_limits);
(m_batch_vertex_ptr++)
->Set(quad_start_x, quad_end_y, depth, 1.0f, color, texpage, tex_left, tex_bottom, uv_limits);
(m_batch_vertex_ptr++)
->Set(quad_end_x, quad_end_y, depth, 1.0f, color, texpage, tex_right, tex_bottom, uv_limits);
m_batch_vertex_count += 4;
m_batch_vertex_space -= 4;
AddNewVertex(quad_start_x, quad_end_y, depth, 1.0f, color, texpage, tex_left, tex_bottom, uv_limits);
AddNewVertex(quad_end_x, quad_start_y, depth, 1.0f, color, texpage, tex_right, tex_top, uv_limits);
AddNewVertex(quad_end_x, quad_end_y, depth, 1.0f, color, texpage, tex_right, tex_bottom, uv_limits);
*(m_batch_index_ptr++) = Truncate16(base_vertex + 0);
*(m_batch_index_ptr++) = Truncate16(base_vertex + 1);
*(m_batch_index_ptr++) = Truncate16(base_vertex + 2);
*(m_batch_index_ptr++) = Truncate16(base_vertex + 2);
*(m_batch_index_ptr++) = Truncate16(base_vertex + 1);
*(m_batch_index_ptr++) = Truncate16(base_vertex + 3);
m_batch_index_count += 6;
m_batch_index_space -= 6;
x_offset += quad_width;
tex_left = 0;
@ -1937,7 +1978,7 @@ void GPU_HW::LoadVertices()
if (!rc.polyline)
{
DebugAssert(GetBatchVertexSpace() >= 2);
DebugAssert(m_batch_vertex_space >= 4 && m_batch_index_space >= 6);
u32 start_color, end_color;
GPUVertexPosition start_pos, end_pos;
@ -1955,7 +1996,7 @@ void GPU_HW::LoadVertices()
end_pos.bits = FifoPop();
}
if (!IsDrawingAreaIsValid())
if (!IsDrawingAreaIsValid()) [[unlikely]]
return;
s32 start_x = start_pos.x + m_drawing_offset.x;
@ -1996,9 +2037,9 @@ void GPU_HW::LoadVertices()
{
// Multiply by two because we don't use line strips.
const u32 num_vertices = GetPolyLineVertexCount();
DebugAssert(GetBatchVertexSpace() >= (num_vertices * 2));
DebugAssert(m_batch_vertex_space >= (num_vertices * 4) && m_batch_index_space >= (num_vertices * 6));
if (!IsDrawingAreaIsValid())
if (!IsDrawingAreaIsValid()) [[unlikely]]
return;
const bool shaded = rc.shading_enable;
@ -2181,7 +2222,7 @@ ALWAYS_INLINE_RELEASE void GPU_HW::CheckForTexPageOverlap(u32 texpage, u32 min_u
if (update_drawn || update_written)
{
if (GetBatchVertexCount() > 0)
if (m_batch_index_count > 0)
{
FlushRender();
EnsureVertexBufferSpaceForCurrentCommand();
@ -2194,7 +2235,7 @@ ALWAYS_INLINE_RELEASE void GPU_HW::CheckForTexPageOverlap(u32 texpage, u32 min_u
ALWAYS_INLINE bool GPU_HW::IsFlushed() const
{
return m_batch_current_vertex_ptr == m_batch_start_vertex_ptr;
return (m_batch_index_count == 0);
}
GPU_HW::InterlacedRenderMode GPU_HW::GetInterlacedRenderMode() const
@ -2228,44 +2269,44 @@ ALWAYS_INLINE_RELEASE bool GPU_HW::NeedsShaderBlending(GPUTransparencyMode trans
(transparency != GPUTransparencyMode::Disabled || IsBlendedTextureFiltering(m_texture_filtering)))));
}
ALWAYS_INLINE u32 GPU_HW::GetBatchVertexSpace() const
void GPU_HW::EnsureVertexBufferSpace(u32 required_vertices, u32 required_indices)
{
return static_cast<u32>(m_batch_end_vertex_ptr - m_batch_current_vertex_ptr);
}
ALWAYS_INLINE u32 GPU_HW::GetBatchVertexCount() const
if (m_batch_vertex_ptr)
{
return static_cast<u32>(m_batch_current_vertex_ptr - m_batch_start_vertex_ptr);
}
void GPU_HW::EnsureVertexBufferSpace(u32 required_vertices)
{
if (m_batch_current_vertex_ptr)
{
if (GetBatchVertexSpace() >= required_vertices)
if (m_batch_vertex_space >= required_vertices && m_batch_index_space >= required_indices)
return;
FlushRender();
}
MapBatchVertexPointer(required_vertices);
MapGPUBuffer(required_vertices, required_indices);
}
void GPU_HW::EnsureVertexBufferSpaceForCurrentCommand()
{
u32 required_vertices;
u32 required_indices;
switch (m_render_command.primitive)
{
case GPUPrimitive::Polygon:
required_vertices = m_render_command.quad_polygon ? 6 : 3;
required_vertices = 4; // assume quad, in case of expansion
required_indices = 6;
break;
case GPUPrimitive::Rectangle:
required_vertices = MAX_VERTICES_FOR_RECTANGLE;
required_vertices = MAX_VERTICES_FOR_RECTANGLE; // TODO: WRong
required_indices = MAX_VERTICES_FOR_RECTANGLE;
break;
case GPUPrimitive::Line:
default:
required_vertices = m_render_command.polyline ? (GetPolyLineVertexCount() * 6u) : 6u;
{
// assume expansion
const u32 vert_count = m_render_command.polyline ? GetPolyLineVertexCount() : 2;
required_vertices = vert_count * 4;
required_indices = vert_count * 6;
}
break;
default:
UnreachableCode();
}
// can we fit these vertices in the current depth buffer range?
@ -2273,16 +2314,11 @@ void GPU_HW::EnsureVertexBufferSpaceForCurrentCommand()
{
// implies FlushRender()
ResetBatchVertexDepth();
}
else if (m_batch_current_vertex_ptr)
{
if (GetBatchVertexSpace() >= required_vertices)
MapGPUBuffer(required_vertices, required_indices);
return;
FlushRender();
}
MapBatchVertexPointer(required_vertices);
EnsureVertexBufferSpace(required_vertices, required_indices);
}
void GPU_HW::ResetBatchVertexDepth()
@ -2769,7 +2805,7 @@ void GPU_HW::DispatchRenderCommand()
EnsureVertexBufferSpaceForCurrentCommand();
if (GetBatchVertexCount() == 0)
if (m_batch_index_count == 0)
{
// transparency mode change
if (transparency_mode != GPUTransparencyMode::Disabled &&
@ -2842,14 +2878,13 @@ void GPU_HW::DispatchRenderCommand()
void GPU_HW::FlushRender()
{
if (!m_batch_current_vertex_ptr)
if (m_batch_index_count == 0)
return;
const u32 vertex_count = GetBatchVertexCount();
UnmapBatchVertexPointer(vertex_count);
if (vertex_count == 0)
return;
const u32 base_vertex = m_batch_base_vertex;
const u32 base_index = m_batch_base_index;
const u32 index_count = m_batch_index_count;
UnmapGPUBuffer(m_batch_vertex_count, m_batch_index_count);
#ifdef _DEBUG
GL_SCOPE_FMT("Hardware Draw {}", ++s_draw_number);
@ -2870,19 +2905,19 @@ void GPU_HW::FlushRender()
{
if (NeedsTwoPassRendering())
{
DrawBatchVertices(BatchRenderMode::OnlyOpaque, vertex_count, m_batch_base_vertex);
DrawBatchVertices(BatchRenderMode::OnlyTransparent, vertex_count, m_batch_base_vertex);
DrawBatchVertices(BatchRenderMode::OnlyOpaque, index_count, base_index, base_vertex);
DrawBatchVertices(BatchRenderMode::OnlyTransparent, index_count, base_index, base_vertex);
}
else
{
DrawBatchVertices(m_batch.GetRenderMode(), vertex_count, m_batch_base_vertex);
DrawBatchVertices(m_batch.GetRenderMode(), index_count, base_index, base_vertex);
}
}
if (m_wireframe_mode != GPUWireframeMode::Disabled)
{
g_gpu_device->SetPipeline(m_wireframe_pipeline.get());
g_gpu_device->Draw(vertex_count, m_batch_base_vertex);
g_gpu_device->DrawIndexed(index_count, base_index, base_vertex);
}
}

View File

@ -132,11 +132,6 @@ private:
void LoadVertices();
void AddVertex(const BatchVertex& v);
template<typename... Args>
void AddNewVertex(Args&&... args);
void PrintSettingsToLog();
void CheckSettings();
@ -144,9 +139,9 @@ private:
void UpdateDepthBufferFromMaskBit();
void ClearDepthBuffer();
void SetScissor();
void MapBatchVertexPointer(u32 required_vertices);
void UnmapBatchVertexPointer(u32 used_vertices);
void DrawBatchVertices(BatchRenderMode render_mode, u32 num_vertices, u32 base_vertex);
void MapGPUBuffer(u32 required_vertices, u32 required_indices);
void UnmapGPUBuffer(u32 used_vertices, u32 used_indices);
void DrawBatchVertices(BatchRenderMode render_mode, u32 num_indices, u32 base_index, u32 base_vertex);
u32 CalculateResolutionScale() const;
GPUDownsampleMode GetDownsampleMode(u32 resolution_scale) const;
@ -160,9 +155,7 @@ private:
void CheckForTexPageOverlap(u32 texpage, u32 min_u, u32 min_v, u32 max_u, u32 max_v);
bool IsFlushed() const;
u32 GetBatchVertexSpace() const;
u32 GetBatchVertexCount() const;
void EnsureVertexBufferSpace(u32 required_vertices);
void EnsureVertexBufferSpace(u32 required_vertices, u32 required_indices);
void EnsureVertexBufferSpaceForCurrentCommand();
void ResetBatchVertexDepth();
@ -225,10 +218,14 @@ private:
std::unique_ptr<GPU_SW_Backend> m_sw_renderer;
BatchVertex* m_batch_start_vertex_ptr = nullptr;
BatchVertex* m_batch_end_vertex_ptr = nullptr;
BatchVertex* m_batch_current_vertex_ptr = nullptr;
BatchVertex* m_batch_vertex_ptr = nullptr;
u16* m_batch_index_ptr = nullptr;
u32 m_batch_base_vertex = 0;
u32 m_batch_base_index = 0;
u16 m_batch_vertex_count = 0;
u16 m_batch_index_count = 0;
u16 m_batch_vertex_space = 0;
u16 m_batch_index_space = 0;
s32 m_current_depth = 0;
float m_last_depth_z = 1.0f;