GS/SW: Move code buffer into main memory map

This commit is contained in:
Stenzek 2023-01-25 20:51:23 +10:00 committed by refractionpcsx2
parent 3d84443bcf
commit 609a44aaf2
22 changed files with 557 additions and 669 deletions

View File

@ -494,7 +494,6 @@ set(pcsx2GSSources
GS/GSAlignedClass.cpp
GS/GSCapture.cpp
GS/GSClut.cpp
GS/GSCodeBuffer.cpp
GS/GSCrc.cpp
GS/GSDrawingContext.cpp
GS/GSDump.cpp
@ -534,7 +533,6 @@ set(pcsx2GSHeaders
GS/GSBlock.h
GS/GSCapture.h
GS/GSClut.h
GS/GSCodeBuffer.h
GS/GSCrc.h
GS/GSDrawingContext.h
GS/GSDrawingEnvironment.h
@ -985,10 +983,6 @@ set(pcsx2RecordingHeaders
${rec_src}/Utilities/InputRecordingLogger.h
)
# System headers
set(pcsx2SystemHeaders
System/RecTypes.h)
# Windows sources
set(pcsx2WindowsSources
CDVD/Windows/DriveUtility.cpp
@ -1114,7 +1108,6 @@ target_sources(PCSX2 PRIVATE
${pcsx2FrontendHeaders}
${pcsx2ps2Sources}
${pcsx2ps2Headers}
${pcsx2SystemHeaders}
)
# platform sources

View File

@ -1,71 +0,0 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2021 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#include "PrecompiledHeader.h"
#include "GSCodeBuffer.h"
#include "GSExtra.h"
#include "common/General.h"
GSCodeBuffer::GSCodeBuffer(size_t blocksize)
: m_blocksize(blocksize)
, m_pos(0)
, m_reserved(0)
, m_ptr(NULL)
{
}
GSCodeBuffer::~GSCodeBuffer()
{
for (auto buffer : m_buffers)
{
HostSys::Munmap(buffer, m_blocksize);
}
}
void* GSCodeBuffer::GetBuffer(size_t size)
{
pxAssert(size < m_blocksize);
pxAssert(m_reserved == 0);
size = (size + 15) & ~15;
if (m_ptr == NULL || m_pos + size > m_blocksize)
{
m_ptr = (u8*)HostSys::Mmap(nullptr, m_blocksize, PageProtectionMode().All());
if (!m_ptr)
pxFailRel("Failed to allocate GS code buffer");
m_pos = 0;
m_buffers.push_back(m_ptr);
}
u8* ptr = &m_ptr[m_pos];
m_reserved = size;
return ptr;
}
void GSCodeBuffer::ReleaseBuffer(size_t size)
{
pxAssert(size <= m_reserved);
m_pos = ((m_pos + size) + 15) & ~15;
pxAssert(m_pos < m_blocksize);
m_reserved = 0;
}

View File

@ -1,31 +0,0 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2021 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
class GSCodeBuffer
{
std::vector<void*> m_buffers;
size_t m_blocksize;
size_t m_pos, m_reserved;
u8* m_ptr;
public:
GSCodeBuffer(size_t blocksize = 4096 * 64); // 256k
virtual ~GSCodeBuffer();
void* GetBuffer(size_t size);
void ReleaseBuffer(size_t size);
};

View File

@ -15,9 +15,9 @@
#pragma once
#include "GS/GSCodeBuffer.h"
#include "GS/GSExtra.h"
#include "GS/Renderers/SW/GSScanlineEnvironment.h"
#include "System.h"
#include "common/emitter/tools.h"
template <class KEY, class VALUE>
@ -147,28 +147,25 @@ class GSCodeGeneratorFunctionMap : public GSFunctionMap<KEY, VALUE>
{
std::string m_name;
std::unordered_map<u64, VALUE> m_cgmap;
GSCodeBuffer m_cb;
size_t m_total_code_size;
enum { MAX_SIZE = 8192 };
public:
GSCodeGeneratorFunctionMap(const char* name)
GSCodeGeneratorFunctionMap(std::string name)
: m_name(name)
, m_total_code_size(0)
{
}
~GSCodeGeneratorFunctionMap()
~GSCodeGeneratorFunctionMap() = default;
void Clear()
{
#ifdef _DEBUG
fprintf(stderr, "%s generated %zu bytes of instruction\n", m_name.c_str(), m_total_code_size);
#endif
m_cgmap.clear();
}
VALUE GetDefaultFunction(KEY key)
{
VALUE ret = NULL;
VALUE ret = nullptr;
auto i = m_cgmap.find(key);
@ -178,22 +175,19 @@ public:
}
else
{
void* code_ptr = m_cb.GetBuffer(MAX_SIZE);
CG* cg = new CG(key, code_ptr, MAX_SIZE);
ASSERT(cg->getSize() < MAX_SIZE);
u8* code_ptr = GetVmMemory().GSCode().Reserve(MAX_SIZE);
CG cg(key, code_ptr, MAX_SIZE);
ASSERT(cg.getSize() < MAX_SIZE);
#if 0
fprintf(stderr, "%s Location:%p Size:%zu Key:%llx\n", m_name.c_str(), code_ptr, cg->getSize(), (u64)key);
fprintf(stderr, "%s Location:%p Size:%zu Key:%llx\n", m_name.c_str(), code_ptr, cg.getSize(), (u64)key);
GSScanlineSelector sel(key);
sel.Print();
#endif
m_total_code_size += cg->getSize();
GetVmMemory().GSCode().Commit(cg.getSize());
m_cb.ReleaseBuffer(cg->getSize());
ret = (VALUE)cg->getCode();
ret = (VALUE)cg.getCode();
m_cgmap[key] = ret;
@ -211,8 +205,8 @@ public:
ml.method_id = iJIT_GetNewMethodID();
ml.method_name = (char*)name.c_str();
ml.method_load_address = (void*)cg->getCode();
ml.method_size = (unsigned int)cg->getSize();
ml.method_load_address = (void*)cg.getCode();
ml.method_size = (unsigned int)cg.getSize();
iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED, &ml);
/*
@ -224,7 +218,7 @@ public:
fputc(0xBB, fp); fputc(0x6F, fp); fputc(0x00, fp); fputc(0x00, fp); fputc(0x00, fp);
fputc(0x64, fp); fputc(0x67, fp); fputc(0x90, fp);
fwrite(cg->getCode(), cg->getSize(), 1, fp);
fwrite(cg.getCode(), cg.getSize(), 1, fp);
fputc(0xBB, fp); fputc(0xDE, fp); fputc(0x00, fp); fputc(0x00, fp); fputc(0x00, fp);
fputc(0x64, fp); fputc(0x67, fp); fputc(0x90, fp);
@ -236,8 +230,6 @@ public:
}
#endif
delete cg;
}
return ret;

View File

@ -120,7 +120,6 @@ private:
// software sprite renderer state
std::vector<GSVertexSW> m_sw_vertex_buffer;
std::unique_ptr<GSTextureCacheSW::Texture> m_sw_texture[7 + 1];
std::unique_ptr<GSVirtualAlignedClass<32>> m_sw_draw_scanline;
std::unique_ptr<GSVirtualAlignedClass<32>> m_sw_rasterizer;
public:

View File

@ -16,7 +16,7 @@
#include "GSRendererHW.h"
#include "GS/Renderers/SW/GSTextureCacheSW.h"
#include "GS/Renderers/SW/GSDrawScanline.h"
#include "GS/Renderers/SW/GSRasterizer.h"
class CURRENT_ISA::GSRendererHWFunctions
{
@ -44,7 +44,7 @@ bool GSRendererHWFunctions::SwPrimRender(GSRendererHW& hw, bool invalidate_tc)
const GSDrawingEnvironment& env = hw.m_env;
const GS_PRIM_CLASS primclass = vt.m_primclass;
GSDrawScanline::SharedData data;
GSRasterizerData data;
GSScanlineGlobalData& gd = data.global;
u32 clut_storage[256];
@ -549,13 +549,9 @@ bool GSRendererHWFunctions::SwPrimRender(GSRendererHW& hw, bool invalidate_tc)
}
if (!hw.m_sw_rasterizer)
{
hw.m_sw_draw_scanline = std::make_unique<GSDrawScanline>();
hw.m_sw_rasterizer = std::make_unique<GSRasterizer>(static_cast<GSDrawScanline*>(hw.m_sw_draw_scanline.get()), 0, 1);
}
hw.m_sw_rasterizer = std::make_unique<GSSingleRasterizer>();
static_cast<GSDrawScanline*>(hw.m_sw_draw_scanline.get())->SetupDraw(data);
static_cast<GSRasterizer*>(hw.m_sw_rasterizer.get())->Draw(data);
static_cast<GSSingleRasterizer*>(hw.m_sw_rasterizer.get())->Draw(data);
if (invalidate_tc)
hw.m_tc->InvalidateVideoMem(context->offset.fb, bbox);

View File

@ -14,8 +14,11 @@
*/
#include "PrecompiledHeader.h"
#include "GSDrawScanline.h"
#include "GSTextureCacheSW.h"
#include "GS/Renderers/SW/GSDrawScanline.h"
#include "GS/Renderers/SW/GSTextureCacheSW.h"
#include "GS/Renderers/SW/GSScanlineEnvironment.h"
#include "GS/Renderers/SW/GSRasterizer.h"
#include "GS/config.h"
#if MULTI_ISA_COMPILE_ONCE
// Lack of a better home
@ -33,13 +36,21 @@ GSDrawScanline::GSDrawScanline()
: m_sp_map("GSSetupPrim")
, m_ds_map("GSDrawScanline")
{
GetVmMemory().GSCode().AllowModification();
GetVmMemory().GSCode().Reset();
}
GSDrawScanline::~GSDrawScanline() = default;
GSDrawScanline::~GSDrawScanline()
{
if (const size_t used = GetVmMemory().GSCode().GetMemoryUsed(); used > 0)
DevCon.WriteLn("SW JIT generated %zu bytes of code", used);
GetVmMemory().GSCode().ForbidModification();
}
void GSDrawScanline::BeginDraw(const GSRasterizerData& data, GSScanlineLocalData& local)
{
const GSScanlineGlobalData& global = static_cast<const SharedData&>(data).global;
const GSScanlineGlobalData& global = data.global;
local.gd = &global;
if (global.sel.mmin && global.sel.lcm)
@ -67,12 +78,22 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData& data, GSScanlineLocalData
}
}
void GSDrawScanline::SetupDraw(GSRasterizerData& data)
void GSDrawScanline::ResetCodeCache()
{
Console.Warning("GS Software JIT cache overflow, resetting.");
m_sp_map.Clear();
m_ds_map.Clear();
GetVmMemory().GSCode().Reset();
}
bool GSDrawScanline::SetupDraw(GSRasterizerData& data)
{
const GSScanlineGlobalData& global = data.global;
#ifdef ENABLE_JIT_RASTERIZER
SharedData& sdata = static_cast<SharedData&>(data);
const GSScanlineGlobalData& global = sdata.global;
sdata.ds = m_ds_map[global.sel];
data.draw_scanline = m_ds_map[global.sel];
if (!unlikely(data.draw_scanline))
return false;
if (global.sel.aa1)
{
@ -82,11 +103,13 @@ void GSDrawScanline::SetupDraw(GSRasterizerData& data)
sel.zwrite = 0;
sel.edge = 1;
sdata.de = m_ds_map[sel];
data.draw_edge = m_ds_map[sel];
if (unlikely(!data.draw_edge))
return false;
}
else
{
sdata.de = nullptr;
data.draw_edge = nullptr;
}
// doesn't need all bits => less functions generated
@ -107,7 +130,12 @@ void GSDrawScanline::SetupDraw(GSRasterizerData& data)
sel.zequal = global.sel.zequal;
sel.notest = global.sel.notest;
sdata.sp = m_sp_map[sel];
return (data.setup_prim = m_sp_map[sel]) != nullptr;
#else
data.setup_prim = &GSDrawScanline::CSetupPrim;
data.draw_scanline = &GSDrawScanline::CDrawScanline;
data.draw_edge = global.sel.aa1 ? &GSDrawScanline::CDrawEdge : nullptr;
return true;
#endif
}
@ -116,6 +144,11 @@ void GSDrawScanline::UpdateDrawStats(u64 frame, u64 ticks, int actual, int total
m_ds_map.UpdateStats(frame, ticks, actual, total, prims);
}
void GSDrawScanline::PrintStats()
{
m_ds_map.PrintStats();
}
#if _M_SSE >= 0x501
typedef GSVector8i VectorI;
typedef GSVector8 VectorF;
@ -310,6 +343,111 @@ void GSDrawScanline::CSetupPrim(const GSVertexSW* vertex, const u32* index, cons
}
}
template <class T>
__ri static bool TestAlpha(T& test, T& fm, T& zm, const T& ga, const GSScanlineGlobalData& global)
{
GSScanlineSelector sel = global.sel;
switch (sel.afail)
{
case AFAIL_FB_ONLY:
if (!sel.zwrite)
return true;
break;
case AFAIL_ZB_ONLY:
if (!sel.fwrite)
return true;
break;
case AFAIL_RGB_ONLY:
if (!sel.zwrite && sel.fpsm == 1)
return true;
break;
}
T t;
switch (sel.atst)
{
case ATST_NEVER:
t = GSVector4i::xffffffff();
break;
case ATST_ALWAYS:
return true;
case ATST_LESS:
case ATST_LEQUAL:
t = (ga >> 16) > T(global.aref);
break;
case ATST_EQUAL:
t = (ga >> 16) != T(global.aref);
break;
case ATST_GEQUAL:
case ATST_GREATER:
t = (ga >> 16) < T(global.aref);
break;
case ATST_NOTEQUAL:
t = (ga >> 16) == T(global.aref);
break;
default:
__assume(0);
}
switch (sel.afail)
{
case AFAIL_KEEP:
test |= t;
if (test.alltrue())
return false;
break;
case AFAIL_FB_ONLY:
zm |= t;
break;
case AFAIL_ZB_ONLY:
fm |= t;
break;
case AFAIL_RGB_ONLY:
zm |= t;
fm |= t & T::xff000000(); // fpsm 16 bit => & 0xffff8000?
break;
default:
__assume(0);
}
return true;
}
static const int s_offsets[] = {0, 2, 8, 10, 16, 18, 24, 26}; // columnTable16[0]
template <class T>
__ri static void WritePixel(const T& src, int addr, int i, u32 psm, const GSScanlineGlobalData& global)
{
u8* dst = (u8*)global.vm + addr * 2 + s_offsets[i] * 2;
switch (psm)
{
case 0:
*(u32*)dst = src.U32[i];
break;
case 1:
*(u32*)dst = (src.U32[i] & 0xffffff) | (*(u32*)dst & 0xff000000);
break;
case 2:
*(u16*)dst = src.U16[i * 2];
break;
}
}
void GSDrawScanline::CDrawScanline(int pixels, int left, int top, const GSVertexSW& scan, GSScanlineLocalData& local)
{
const GSScanlineGlobalData& global = GlobalFromLocal(local);
@ -1587,108 +1725,150 @@ void GSDrawScanline::CDrawScanline(int pixels, int left, int top, const GSVertex
}
}
template <class T>
bool GSDrawScanline::TestAlpha(T& test, T& fm, T& zm, const T& ga, const GSScanlineGlobalData& global)
void GSDrawScanline::CDrawEdge(int pixels, int left, int top, const GSVertexSW& scan, GSScanlineLocalData& local)
{
GSScanlineSelector sel = global.sel;
// This sucks. But so does not jitting!
const GSScanlineGlobalData* old_gd = local.gd;
GSScanlineGlobalData gd;
std::memcpy(&gd, &local.gd, sizeof(gd));
gd.sel.zwrite = 0;
gd.sel.edge = 1;
local.gd = &gd;
switch (sel.afail)
{
case AFAIL_FB_ONLY:
if (!sel.zwrite)
return true;
break;
CDrawScanline(pixels, left, top, scan, local);
case AFAIL_ZB_ONLY:
if (!sel.fwrite)
return true;
break;
case AFAIL_RGB_ONLY:
if (!sel.zwrite && sel.fpsm == 1)
return true;
break;
}
T t;
switch (sel.atst)
{
case ATST_NEVER:
t = GSVector4i::xffffffff();
break;
case ATST_ALWAYS:
return true;
case ATST_LESS:
case ATST_LEQUAL:
t = (ga >> 16) > T(global.aref);
break;
case ATST_EQUAL:
t = (ga >> 16) != T(global.aref);
break;
case ATST_GEQUAL:
case ATST_GREATER:
t = (ga >> 16) < T(global.aref);
break;
case ATST_NOTEQUAL:
t = (ga >> 16) == T(global.aref);
break;
default:
__assume(0);
}
switch (sel.afail)
{
case AFAIL_KEEP:
test |= t;
if (test.alltrue())
return false;
break;
case AFAIL_FB_ONLY:
zm |= t;
break;
case AFAIL_ZB_ONLY:
fm |= t;
break;
case AFAIL_RGB_ONLY:
zm |= t;
fm |= t & T::xff000000(); // fpsm 16 bit => & 0xffff8000?
break;
default:
__assume(0);
}
return true;
local.gd = old_gd;
}
static const int s_offsets[] = {0, 2, 8, 10, 16, 18, 24, 26}; // columnTable16[0]
template <class T>
void GSDrawScanline::WritePixel(const T& src, int addr, int i, u32 psm, const GSScanlineGlobalData& global)
template <class T, bool masked>
__ri static void FillRect(const GSOffset& off, const GSVector4i& r, u32 c, u32 m, GSScanlineLocalData& local)
{
u8* dst = (u8*)global.vm + addr * 2 + s_offsets[i] * 2;
if (r.x >= r.z)
return;
switch (psm)
T* vm = (T*)GlobalFromLocal(local).vm;
for (int y = r.y; y < r.w; y++)
{
case 0:
*(u32*)dst = src.U32[i];
break;
case 1:
*(u32*)dst = (src.U32[i] & 0xffffff) | (*(u32*)dst & 0xff000000);
break;
case 2:
*(u16*)dst = src.U16[i * 2];
break;
auto pa = off.paMulti(vm, 0, y);
for (int x = r.x; x < r.z; x++)
{
T& d = *pa.value(x);
d = (T)(!masked ? c : (c | (d & m)));
}
}
}
#if _M_SSE >= 0x501
template <class T, bool masked>
__ri static void FillBlock(const GSOffset& off, const GSVector4i& r, const GSVector8i& c, const GSVector8i& m, GSScanlineLocalData& local)
{
if (r.x >= r.z)
return;
T* vm = (T*)GlobalFromLocal(local).vm;
for (int y = r.y; y < r.w; y += 8)
{
for (int x = r.x; x < r.z; x += 8 * 4 / sizeof(T))
{
GSVector8i* RESTRICT p = (GSVector8i*)&vm[off.pa(x, y)];
p[0] = !masked ? c : (c | (p[0] & m));
p[1] = !masked ? c : (c | (p[1] & m));
p[2] = !masked ? c : (c | (p[2] & m));
p[3] = !masked ? c : (c | (p[3] & m));
p[4] = !masked ? c : (c | (p[4] & m));
p[5] = !masked ? c : (c | (p[5] & m));
p[6] = !masked ? c : (c | (p[6] & m));
p[7] = !masked ? c : (c | (p[7] & m));
}
}
}
#else
template <class T, bool masked>
__ri static void FillBlock(const GSOffset& off, const GSVector4i& r, const GSVector4i& c, const GSVector4i& m, GSScanlineLocalData& local)
{
if (r.x >= r.z)
return;
T* vm = (T*)GlobalFromLocal(local).vm;
for (int y = r.y; y < r.w; y += 8)
{
auto pa = off.paMulti(vm, 0, y);
for (int x = r.x; x < r.z; x += 8 * 4 / sizeof(T))
{
GSVector4i* RESTRICT p = (GSVector4i*)pa.value(x);
for (int i = 0; i < 16; i += 4)
{
p[i + 0] = !masked ? c : (c | (p[i + 0] & m));
p[i + 1] = !masked ? c : (c | (p[i + 1] & m));
p[i + 2] = !masked ? c : (c | (p[i + 2] & m));
p[i + 3] = !masked ? c : (c | (p[i + 3] & m));
}
}
}
}
#endif
template <class T, bool masked>
__ri static void DrawRectT(const GSOffset& off, const GSVector4i& r, u32 c, u32 m, GSScanlineLocalData& local)
{
if (m == 0xffffffff)
return;
#if _M_SSE >= 0x501
GSVector8i color((int)c);
GSVector8i mask((int)m);
#else
GSVector4i color((int)c);
GSVector4i mask((int)m);
#endif
if (sizeof(T) == sizeof(u16))
{
color = color.xxzzlh();
mask = mask.xxzzlh();
c = (c & 0xffff) | (c << 16);
m = (m & 0xffff) | (m << 16);
}
color = color.andnot(mask);
c = c & (~m);
if (masked)
ASSERT(mask.U32[0] != 0);
GSVector4i br = r.ralign<Align_Inside>(GSVector2i(8 * 4 / sizeof(T), 8));
if (!br.rempty())
{
FillRect<T, masked>(off, GSVector4i(r.x, r.y, r.z, br.y), c, m, local);
FillRect<T, masked>(off, GSVector4i(r.x, br.w, r.z, r.w), c, m, local);
if (r.x < br.x || br.z < r.z)
{
FillRect<T, masked>(off, GSVector4i(r.x, br.y, br.x, br.w), c, m, local);
FillRect<T, masked>(off, GSVector4i(br.z, br.y, r.z, br.w), c, m, local);
}
FillBlock<T, masked>(off, br, color, mask, local);
}
else
{
FillRect<T, masked>(off, r, c, m, local);
}
}
@ -1777,135 +1957,3 @@ void GSDrawScanline::DrawRect(const GSVector4i& r, const GSVertexSW& v, GSScanli
}
}
}
template <class T, bool masked>
void GSDrawScanline::DrawRectT(const GSOffset& off, const GSVector4i& r, u32 c, u32 m, GSScanlineLocalData& local)
{
if (m == 0xffffffff)
return;
#if _M_SSE >= 0x501
GSVector8i color((int)c);
GSVector8i mask((int)m);
#else
GSVector4i color((int)c);
GSVector4i mask((int)m);
#endif
if (sizeof(T) == sizeof(u16))
{
color = color.xxzzlh();
mask = mask.xxzzlh();
c = (c & 0xffff) | (c << 16);
m = (m & 0xffff) | (m << 16);
}
color = color.andnot(mask);
c = c & (~m);
if (masked)
ASSERT(mask.U32[0] != 0);
GSVector4i br = r.ralign<Align_Inside>(GSVector2i(8 * 4 / sizeof(T), 8));
if (!br.rempty())
{
FillRect<T, masked>(off, GSVector4i(r.x, r.y, r.z, br.y), c, m, local);
FillRect<T, masked>(off, GSVector4i(r.x, br.w, r.z, r.w), c, m, local);
if (r.x < br.x || br.z < r.z)
{
FillRect<T, masked>(off, GSVector4i(r.x, br.y, br.x, br.w), c, m, local);
FillRect<T, masked>(off, GSVector4i(br.z, br.y, r.z, br.w), c, m, local);
}
FillBlock<T, masked>(off, br, color, mask, local);
}
else
{
FillRect<T, masked>(off, r, c, m, local);
}
}
template <class T, bool masked>
void GSDrawScanline::FillRect(const GSOffset& off, const GSVector4i& r, u32 c, u32 m, GSScanlineLocalData& local)
{
if (r.x >= r.z)
return;
T* vm = (T*)GlobalFromLocal(local).vm;
for (int y = r.y; y < r.w; y++)
{
auto pa = off.paMulti(vm, 0, y);
for (int x = r.x; x < r.z; x++)
{
T& d = *pa.value(x);
d = (T)(!masked ? c : (c | (d & m)));
}
}
}
#if _M_SSE >= 0x501
template <class T, bool masked>
void GSDrawScanline::FillBlock(const GSOffset& off, const GSVector4i& r, const GSVector8i& c, const GSVector8i& m, GSScanlineLocalData& local)
{
if (r.x >= r.z)
return;
T* vm = (T*)GlobalFromLocal(local).vm;
for (int y = r.y; y < r.w; y += 8)
{
for (int x = r.x; x < r.z; x += 8 * 4 / sizeof(T))
{
GSVector8i* RESTRICT p = (GSVector8i*)&vm[off.pa(x, y)];
p[0] = !masked ? c : (c | (p[0] & m));
p[1] = !masked ? c : (c | (p[1] & m));
p[2] = !masked ? c : (c | (p[2] & m));
p[3] = !masked ? c : (c | (p[3] & m));
p[4] = !masked ? c : (c | (p[4] & m));
p[5] = !masked ? c : (c | (p[5] & m));
p[6] = !masked ? c : (c | (p[6] & m));
p[7] = !masked ? c : (c | (p[7] & m));
}
}
}
#else
template <class T, bool masked>
void GSDrawScanline::FillBlock(const GSOffset& off, const GSVector4i& r, const GSVector4i& c, const GSVector4i& m, GSScanlineLocalData& local)
{
if (r.x >= r.z)
return;
T* vm = (T*)GlobalFromLocal(local).vm;
for (int y = r.y; y < r.w; y += 8)
{
auto pa = off.paMulti(vm, 0, y);
for (int x = r.x; x < r.z; x += 8 * 4 / sizeof(T))
{
GSVector4i* RESTRICT p = (GSVector4i*)pa.value(x);
for (int i = 0; i < 16; i += 4)
{
p[i + 0] = !masked ? c : (c | (p[i + 0] & m));
p[i + 1] = !masked ? c : (c | (p[i + 1] & m));
p[i + 2] = !masked ? c : (c | (p[i + 2] & m));
p[i + 3] = !masked ? c : (c | (p[i + 3] & m));
}
}
}
}
#endif

View File

@ -16,136 +16,53 @@
#pragma once
#include "GS/GSState.h"
#include "GS/Renderers/SW/GSRasterizer.h"
#include "GS/Renderers/SW/GSScanlineEnvironment.h"
#include "GS/Renderers/SW/GSSetupPrimCodeGenerator.h"
#include "GS/Renderers/SW/GSDrawScanlineCodeGenerator.h"
#include "GS/config.h"
#include <cstring>
struct GSScanlineLocalData;
MULTI_ISA_UNSHARED_START
class GSRasterizerData;
class GSSetupPrimCodeGenerator;
class GSDrawScanlineCodeGenerator;
class GSDrawScanline : public GSVirtualAlignedClass<32>
{
public:
using SetupPrimPtr = void(*)(const GSVertexSW* vertex, const u32* index, const GSVertexSW& dscan, GSScanlineLocalData& local);
using DrawScanlinePtr = void(*)(int pixels, int left, int top, const GSVertexSW& scan, GSScanlineLocalData& local);
class SharedData : public GSRasterizerData
{
public:
GSScanlineGlobalData global;
#ifdef ENABLE_JIT_RASTERIZER
SetupPrimPtr sp;
DrawScanlinePtr ds;
DrawScanlinePtr de;
#endif
};
protected:
GSCodeGeneratorFunctionMap<GSSetupPrimCodeGenerator, u64, SetupPrimPtr> m_sp_map;
GSCodeGeneratorFunctionMap<GSDrawScanlineCodeGenerator, u64, DrawScanlinePtr> m_ds_map;
template <class T, bool masked>
static void DrawRectT(const GSOffset& off, const GSVector4i& r, u32 c, u32 m, GSScanlineLocalData& local);
template <class T, bool masked>
static __forceinline void FillRect(const GSOffset& off, const GSVector4i& r, u32 c, u32 m, GSScanlineLocalData& local);
#if _M_SSE >= 0x501
template <class T, bool masked>
static __forceinline void FillBlock(const GSOffset& off, const GSVector4i& r, const GSVector8i& c, const GSVector8i& m, GSScanlineLocalData& local);
#else
template <class T, bool masked>
static __forceinline void FillBlock(const GSOffset& off, const GSVector4i& r, const GSVector4i& c, const GSVector4i& m, GSScanlineLocalData& local);
#endif
friend GSSetupPrimCodeGenerator;
friend GSDrawScanlineCodeGenerator;
public:
GSDrawScanline();
~GSDrawScanline() override;
void SetupDraw(GSRasterizerData& data);
void UpdateDrawStats(u64 frame, u64 ticks, int actual, int total, int prims);
/// Function pointer types which we call back into.
using SetupPrimPtr = void(*)(const GSVertexSW* vertex, const u32* index, const GSVertexSW& dscan, GSScanlineLocalData& local);
using DrawScanlinePtr = void(*)(int pixels, int left, int top, const GSVertexSW& scan, GSScanlineLocalData& local);
/// Flushes the code cache, forcing everything to be recompiled.
void ResetCodeCache();
/// Populates function pointers. If this returns false, we ran out of code space.
bool SetupDraw(GSRasterizerData& data);
/// Draw pre-calculations, computed per-thread.
static void BeginDraw(const GSRasterizerData& data, GSScanlineLocalData& local);
/// Not currently jitted.
static void DrawRect(const GSVector4i& r, const GSVertexSW& v, GSScanlineLocalData& local);
void UpdateDrawStats(u64 frame, u64 ticks, int actual, int total, int prims);
void PrintStats();
private:
GSCodeGeneratorFunctionMap<GSSetupPrimCodeGenerator, u64, SetupPrimPtr> m_sp_map;
GSCodeGeneratorFunctionMap<GSDrawScanlineCodeGenerator, u64, DrawScanlinePtr> m_ds_map;
static void CSetupPrim(const GSVertexSW* vertex, const u32* index, const GSVertexSW& dscan, GSScanlineLocalData& local);
static void CDrawScanline(int pixels, int left, int top, const GSVertexSW& scan, GSScanlineLocalData& local);
template<class T> static bool TestAlpha(T& test, T& fm, T& zm, const T& ga, const GSScanlineGlobalData& global);
template<class T> static void WritePixel(const T& src, int addr, int i, u32 psm, const GSScanlineGlobalData& global);
#ifdef ENABLE_JIT_RASTERIZER
__forceinline static void SetupPrim(const GSRasterizerData& data, const GSVertexSW* vertex, const u32* index,
const GSVertexSW& dscan, GSScanlineLocalData& local)
{
static_cast<const SharedData&>(data).sp(vertex, index, dscan, local);
}
__forceinline static void DrawScanline(
const GSRasterizerData& data, int pixels, int left, int top, const GSVertexSW& scan, GSScanlineLocalData& local)
{
static_cast<const SharedData&>(data).ds(pixels, left, top, scan, local);
}
__forceinline static void DrawEdge(
const GSRasterizerData& data, int pixels, int left, int top, const GSVertexSW& scan, GSScanlineLocalData& local)
{
static_cast<const SharedData&>(data).de(pixels, left, top, scan, local);
}
__forceinline static bool HasEdge(const GSRasterizerData& data)
{
return static_cast<const SharedData&>(data).de != nullptr;
}
#else
__forceinline static void SetupPrim(const GSRasterizerData& data, const GSVertexSW* vertex, const u32* index,
const GSVertexSW& dscan, GSScanlineLocalData& local)
{
CSetupPrim(vertex, index, dscan, local);
}
__forceinline static void DrawScanline(
const GSRasterizerData& data, int pixels, int left, int top, const GSVertexSW& scan, GSScanlineLocalData& local)
{
CDrawScanline(pixels, left, top, scan, local);
}
__forceinline static void DrawEdge(
const GSRasterizerData& data, int pixels, int left, int top, const GSVertexSW& scan, GSScanlineLocalData& local)
{
// This sucks. But so does not jitting!
const GSScanlineGlobalData* old_gd = local.gd;
GSScanlineGlobalData gd;
std::memcpy(&gd, &local.gd, sizeof(gd));
gd.sel.zwrite = 0;
gd.sel.edge = 1;
local.gd = &gd;
CDrawScanline(pixels, left, top, scan, local);
local.gd = old_gd;
}
__forceinline static bool HasEdge(const SharedData& data)
{
return static_cast<const SharedData&>(data).global.sel.aa1;
}
#endif
// Not currently jitted.
void DrawRect(const GSVector4i& r, const GSVertexSW& v, GSScanlineLocalData& local);
void PrintStats()
{
m_ds_map.PrintStats();
}
static void CDrawEdge(int pixels, int left, int top, const GSVertexSW& scan, GSScanlineLocalData& local);
};
MULTI_ISA_UNSHARED_END

View File

@ -91,8 +91,7 @@ GSDrawScanlineCodeGenerator::GSDrawScanlineCodeGenerator(u64 key, void* code, si
if (shouldUseCDrawScanline(key))
{
mov(rax, reinterpret_cast<size_t>(GSDrawScanline::CDrawScanline)); // TODO: Get rid of once we move to memory map
jmp(rax);
jmp(reinterpret_cast<const void*>(&GSDrawScanline::CDrawScanline));
return;
}

View File

@ -146,7 +146,10 @@ void GSRasterizer::Draw(GSRasterizerData& data)
if constexpr (ENABLE_DRAW_STATS)
data.start = __rdtsc();
GSDrawScanline::BeginDraw(static_cast<GSDrawScanline::SharedData&>(data), m_local);
m_setup_prim = data.setup_prim;
m_draw_scanline = data.draw_scanline;
m_draw_edge = data.draw_edge;
GSDrawScanline::BeginDraw(data, m_local);
const GSVertexSW* vertex = data.vertex;
const GSVertexSW* vertex_end = data.vertex + data.vertex_count;
@ -169,11 +172,11 @@ void GSRasterizer::Draw(GSRasterizerData& data)
if (scissor_test)
{
DrawPoint<true>(data, vertex, index);
DrawPoint<true>(vertex, data.vertex_count, index, data.index_count);
}
else
{
DrawPoint<false>(data, vertex, index);
DrawPoint<false>(vertex, data.vertex_count, index, data.index_count);
}
break;
@ -184,7 +187,7 @@ void GSRasterizer::Draw(GSRasterizerData& data)
{
do
{
DrawLine(data, vertex, index);
DrawLine(vertex, index);
index += 2;
} while (index < index_end);
}
@ -192,7 +195,7 @@ void GSRasterizer::Draw(GSRasterizerData& data)
{
do
{
DrawLine(data, vertex, tmp_index);
DrawLine(vertex, tmp_index);
vertex += 2;
} while (vertex < vertex_end);
}
@ -205,7 +208,7 @@ void GSRasterizer::Draw(GSRasterizerData& data)
{
do
{
DrawTriangle(data, vertex, index);
DrawTriangle(vertex, index);
index += 3;
} while (index < index_end);
}
@ -213,7 +216,7 @@ void GSRasterizer::Draw(GSRasterizerData& data)
{
do
{
DrawTriangle(data, vertex, tmp_index);
DrawTriangle(vertex, tmp_index);
vertex += 3;
} while (vertex < vertex_end);
}
@ -226,7 +229,7 @@ void GSRasterizer::Draw(GSRasterizerData& data)
{
do
{
DrawSprite(data, vertex, index);
DrawSprite(vertex, index);
index += 2;
} while (index < index_end);
}
@ -234,7 +237,7 @@ void GSRasterizer::Draw(GSRasterizerData& data)
{
do
{
DrawSprite(data, vertex, tmp_index);
DrawSprite(vertex, tmp_index);
vertex += 2;
} while (vertex < vertex_end);
}
@ -258,13 +261,10 @@ void GSRasterizer::Draw(GSRasterizerData& data)
}
template <bool scissor_test>
void GSRasterizer::DrawPoint(const GSRasterizerData& data, const GSVertexSW* vertex, const u32* index)
void GSRasterizer::DrawPoint(const GSVertexSW* vertex, int vertex_count, const u32* index, int index_count)
{
m_primcount++;
const int vertex_count = data.vertex_count;
const int index_count = data.index_count;
if (index)
{
for (int i = 0; i < index_count; i++, index++)
@ -277,9 +277,9 @@ void GSRasterizer::DrawPoint(const GSRasterizerData& data, const GSVertexSW* ver
{
if (IsOneOfMyScanlines(p.y))
{
GSDrawScanline::SetupPrim(data, vertex, index, GSVertexSW::zero(), m_local);
m_setup_prim(vertex, index, GSVertexSW::zero(), m_local);
DrawScanline(data, 1, p.x, p.y, v);
DrawScanline(1, p.x, p.y, v);
}
}
}
@ -298,16 +298,16 @@ void GSRasterizer::DrawPoint(const GSRasterizerData& data, const GSVertexSW* ver
{
if (IsOneOfMyScanlines(p.y))
{
GSDrawScanline::SetupPrim(data, vertex, tmp_index, GSVertexSW::zero(), m_local);
m_setup_prim(vertex, tmp_index, GSVertexSW::zero(), m_local);
DrawScanline(data, 1, p.x, p.y, v);
DrawScanline(1, p.x, p.y, v);
}
}
}
}
}
void GSRasterizer::DrawLine(const GSRasterizerData& data, const GSVertexSW* vertex, const u32* index)
void GSRasterizer::DrawLine(const GSVertexSW* vertex, const u32* index)
{
m_primcount++;
@ -320,12 +320,12 @@ void GSRasterizer::DrawLine(const GSRasterizerData& data, const GSVertexSW* vert
int i = (dp < dp.yxwz()).mask() & 1; // |dx| <= |dy|
if (GSDrawScanline::HasEdge(data))
if (HasEdge())
{
DrawEdge(data, v0, v1, dv, i, 0);
DrawEdge(data, v0, v1, dv, i, 1);
DrawEdge(v0, v1, dv, i, 0);
DrawEdge(v0, v1, dv, i, 1);
Flush(data, vertex, index, GSVertexSW::zero(), true);
Flush(vertex, index, GSVertexSW::zero(), true);
return;
}
@ -366,9 +366,9 @@ void GSRasterizer::DrawLine(const GSRasterizerData& data, const GSVertexSW* vert
scan += dscan * (l - scan.p).xxxx();
GSDrawScanline::SetupPrim(data, vertex, index, dscan, m_local);
m_setup_prim(vertex, index, dscan, m_local);
DrawScanline(data, pixels, left, p.y, scan);
DrawScanline(pixels, left, p.y, scan);
}
}
}
@ -407,7 +407,7 @@ void GSRasterizer::DrawLine(const GSRasterizerData& data, const GSVertexSW* vert
m_edge.count = e - m_edge.buff;
Flush(data, vertex, index, GSVertexSW::zero());
Flush(vertex, index, GSVertexSW::zero());
}
}
@ -425,7 +425,7 @@ static const u8 s_ysort[8][4] =
#if _M_SSE >= 0x501
void GSRasterizer::DrawTriangle(const GSRasterizerData& data, const GSVertexSW* vertex, const u32* index)
void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const u32* index)
{
m_primcount++;
@ -509,7 +509,7 @@ void GSRasterizer::DrawTriangle(const GSRasterizerData& data, const GSVertexSW*
edge.p.y = vertex[i[m2]].p.x;
dedge.p = ddx[!m2 << 1].yzzw(dedge.p);
DrawTriangleSection(data, tb.x, tb.w, edge, dedge, dscan, vertex[i[1 - m2]].p);
DrawTriangleSection(tb.x, tb.w, edge, dedge, dscan, vertex[i[1 - m2]].p);
}
}
else
@ -521,7 +521,7 @@ void GSRasterizer::DrawTriangle(const GSRasterizerData& data, const GSVertexSW*
edge.p.y = edge.p.x;
dedge.p = ddx[m2].xyzw(dedge.p);
DrawTriangleSection(data, tb.x, tb.z, edge, dedge, dscan, v0.p);
DrawTriangleSection(tb.x, tb.z, edge, dedge, dscan, v0.p);
}
if (tb.y < tb.w)
@ -531,13 +531,13 @@ void GSRasterizer::DrawTriangle(const GSRasterizerData& data, const GSVertexSW*
edge.p = (v0.p.xxxx() + ddx[m2] * dv0.p.yyyy()).xyzw(edge.p);
dedge.p = ddx[!m2 << 1].yzzw(dedge.p);
DrawTriangleSection(data, tb.y, tb.w, edge, dedge, dscan, v1.p);
DrawTriangleSection(tb.y, tb.w, edge, dedge, dscan, v1.p);
}
}
Flush(data, vertex, index, (GSVertexSW&)dscan);
Flush(vertex, index, (GSVertexSW&)dscan);
if (GSDrawScanline::HasEdge(data))
if (HasEdge())
{
GSVector4 a = dx.abs() < dy.abs(); // |dx| <= |dy|
GSVector4 b = dx < GSVector4::zero(); // dx < 0
@ -546,15 +546,15 @@ void GSRasterizer::DrawTriangle(const GSRasterizerData& data, const GSVertexSW*
int orientation = a.mask();
int side = ((a | b) ^ c).mask() ^ 2; // evil
DrawEdge(data, (GSVertexSW&)v0, (GSVertexSW&)v1, (GSVertexSW&)dv0, orientation & 1, side & 1);
DrawEdge(data, (GSVertexSW&)v0, (GSVertexSW&)v2, (GSVertexSW&)dv1, orientation & 2, side & 2);
DrawEdge(data, (GSVertexSW&)v1, (GSVertexSW&)v2, (GSVertexSW&)dv2, orientation & 4, side & 4);
DrawEdge((GSVertexSW&)v0, (GSVertexSW&)v1, (GSVertexSW&)dv0, orientation & 1, side & 1);
DrawEdge((GSVertexSW&)v0, (GSVertexSW&)v2, (GSVertexSW&)dv1, orientation & 2, side & 2);
DrawEdge((GSVertexSW&)v1, (GSVertexSW&)v2, (GSVertexSW&)dv2, orientation & 4, side & 4);
Flush(data, vertex, index, GSVertexSW::zero(), true);
Flush(vertex, index, GSVertexSW::zero(), true);
}
}
void GSRasterizer::DrawTriangleSection(const GSRasterizerData& data, int top, int bottom, GSVertexSW2& RESTRICT edge, const GSVertexSW2& RESTRICT dedge, const GSVertexSW2& RESTRICT dscan, const GSVector4& RESTRICT p0)
void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW2& RESTRICT edge, const GSVertexSW2& RESTRICT dedge, const GSVertexSW2& RESTRICT dscan, const GSVector4& RESTRICT p0)
{
ASSERT(top < bottom);
ASSERT(edge.p.x <= edge.p.y);
@ -606,7 +606,7 @@ void GSRasterizer::DrawTriangleSection(const GSRasterizerData& data, int top, in
#else
void GSRasterizer::DrawTriangle(const GSRasterizerData& data, const GSVertexSW* vertex, const u32* index)
void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const u32* index)
{
m_primcount++;
@ -688,7 +688,7 @@ void GSRasterizer::DrawTriangle(const GSRasterizerData& data, const GSVertexSW*
edge.p.y = vertex[i[m2]].p.x;
dedge.p = ddx[!m2 << 1].yzzw(dedge.p);
DrawTriangleSection(data, tb.x, tb.w, edge, dedge, dscan, vertex[i[1 - m2]].p);
DrawTriangleSection(tb.x, tb.w, edge, dedge, dscan, vertex[i[1 - m2]].p);
}
}
else
@ -700,7 +700,7 @@ void GSRasterizer::DrawTriangle(const GSRasterizerData& data, const GSVertexSW*
edge.p.y = edge.p.x;
dedge.p = ddx[m2].xyzw(dedge.p);
DrawTriangleSection(data, tb.x, tb.z, edge, dedge, dscan, v0.p);
DrawTriangleSection(tb.x, tb.z, edge, dedge, dscan, v0.p);
}
if (tb.y < tb.w)
@ -710,13 +710,13 @@ void GSRasterizer::DrawTriangle(const GSRasterizerData& data, const GSVertexSW*
edge.p = (v0.p.xxxx() + ddx[m2] * dv0.p.yyyy()).xyzw(edge.p);
dedge.p = ddx[!m2 << 1].yzzw(dedge.p);
DrawTriangleSection(data, tb.y, tb.w, edge, dedge, dscan, v1.p);
DrawTriangleSection(tb.y, tb.w, edge, dedge, dscan, v1.p);
}
}
Flush(data, vertex, index, dscan);
Flush(vertex, index, dscan);
if (GSDrawScanline::HasEdge(data))
if (HasEdge())
{
GSVector4 a = dx.abs() < dy.abs(); // |dx| <= |dy|
GSVector4 b = dx < GSVector4::zero(); // dx < 0
@ -725,15 +725,15 @@ void GSRasterizer::DrawTriangle(const GSRasterizerData& data, const GSVertexSW*
int orientation = a.mask();
int side = ((a | b) ^ c).mask() ^ 2; // evil
DrawEdge(data, v0, v1, dv0, orientation & 1, side & 1);
DrawEdge(data, v0, v2, dv1, orientation & 2, side & 2);
DrawEdge(data, v1, v2, dv2, orientation & 4, side & 4);
DrawEdge(v0, v1, dv0, orientation & 1, side & 1);
DrawEdge(v0, v2, dv1, orientation & 2, side & 2);
DrawEdge(v1, v2, dv2, orientation & 4, side & 4);
Flush(data, vertex, index, GSVertexSW::zero(), true);
Flush(vertex, index, GSVertexSW::zero(), true);
}
}
void GSRasterizer::DrawTriangleSection(const GSRasterizerData& data, int top, int bottom, GSVertexSW& RESTRICT edge, const GSVertexSW& RESTRICT dedge, const GSVertexSW& RESTRICT dscan, const GSVector4& RESTRICT p0)
void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& RESTRICT edge, const GSVertexSW& RESTRICT dedge, const GSVertexSW& RESTRICT dscan, const GSVector4& RESTRICT p0)
{
ASSERT(top < bottom);
ASSERT(edge.p.x <= edge.p.y);
@ -784,7 +784,7 @@ void GSRasterizer::DrawTriangleSection(const GSRasterizerData& data, int top, in
#endif
void GSRasterizer::DrawSprite(const GSRasterizerData& data, const GSVertexSW* vertex, const u32* index)
void GSRasterizer::DrawSprite(const GSVertexSW* vertex, const u32* index)
{
m_primcount++;
@ -811,12 +811,11 @@ void GSRasterizer::DrawSprite(const GSRasterizerData& data, const GSVertexSW* ve
GSVertexSW scan = v[0];
// TODO: Double check IsSolidRect() works.
if ((m_scanmsk_value & 2) == 0 && m_local.gd->sel.IsSolidRect())
{
if (m_threads == 1)
{
m_ds->DrawRect(r, scan, m_local);
GSDrawScanline::DrawRect(r, scan, m_local);
int pixels = r.width() * r.height();
@ -833,7 +832,7 @@ void GSRasterizer::DrawSprite(const GSRasterizerData& data, const GSVertexSW* ve
r.top = top;
r.bottom = std::min<int>((top + (1 << m_thread_height)) & ~((1 << m_thread_height) - 1), bottom);
m_ds->DrawRect(r, scan, m_local);
GSDrawScanline::DrawRect(r, scan, m_local);
int pixels = r.width() * r.height();
@ -862,13 +861,13 @@ void GSRasterizer::DrawSprite(const GSRasterizerData& data, const GSVertexSW* ve
scan.t = (scan.t + dt * prestep).xyzw(scan.t);
GSDrawScanline::SetupPrim(data, vertex, index, dscan, m_local);
m_setup_prim(vertex, index, dscan, m_local);
while (1)
{
if (IsOneOfMyScanlines(r.top))
{
DrawScanline(data, r.width(), r.left, r.top, scan);
DrawScanline(r.width(), r.left, r.top, scan);
}
if (++r.top >= r.bottom)
@ -878,7 +877,7 @@ void GSRasterizer::DrawSprite(const GSRasterizerData& data, const GSVertexSW* ve
}
}
void GSRasterizer::DrawEdge(const GSRasterizerData& data, const GSVertexSW& v0, const GSVertexSW& v1, const GSVertexSW& dv, int orientation, int side)
void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GSVertexSW& dv, int orientation, int side)
{
// orientation:
// - true: |dv.p.y| > |dv.p.x|
@ -1083,7 +1082,7 @@ void GSRasterizer::AddScanline(GSVertexSW* e, int pixels, int left, int top, con
AddScanlineInfo(e, pixels, left, top);
}
void GSRasterizer::Flush(const GSRasterizerData& data, const GSVertexSW* vertex, const u32* index, const GSVertexSW& dscan, bool edge /* = false */)
void GSRasterizer::Flush(const GSVertexSW* vertex, const u32* index, const GSVertexSW& dscan, bool edge /* = false */)
{
// TODO: on win64 this could be the place where xmm6-15 are preserved (not by each DrawScanline)
@ -1091,7 +1090,7 @@ void GSRasterizer::Flush(const GSRasterizerData& data, const GSVertexSW* vertex,
if (count > 0)
{
m_ds->SetupPrim(data, vertex, index, dscan, m_local);
m_setup_prim(vertex, index, dscan, m_local);
const GSVertexSW* RESTRICT e = m_edge.buff;
const GSVertexSW* RESTRICT ee = e + count;
@ -1104,7 +1103,7 @@ void GSRasterizer::Flush(const GSRasterizerData& data, const GSVertexSW* vertex,
int left = e->_pad.I32[1];
int top = e->_pad.I32[2];
DrawScanline(data, pixels, left, top, *e++);
DrawScanline(pixels, left, top, *e++);
} while (e < ee);
}
else
@ -1115,7 +1114,7 @@ void GSRasterizer::Flush(const GSRasterizerData& data, const GSVertexSW* vertex,
int left = e->_pad.I32[1];
int top = e->_pad.I32[2];
DrawEdge(data, pixels, left, top, *e++);
DrawEdge(pixels, left, top, *e++);
} while (e < ee);
}
@ -1129,7 +1128,7 @@ void GSRasterizer::Flush(const GSRasterizerData& data, const GSVertexSW* vertex,
#define PIXELS_PER_LOOP 4
#endif
void GSRasterizer::DrawScanline(const GSRasterizerData& data, int pixels, int left, int top, const GSVertexSW& scan)
void GSRasterizer::DrawScanline(int pixels, int left, int top, const GSVertexSW& scan)
{
if ((m_scanmsk_value & 2) && (m_scanmsk_value & 1) == (top & 1)) return;
m_pixels.actual += pixels;
@ -1138,10 +1137,10 @@ void GSRasterizer::DrawScanline(const GSRasterizerData& data, int pixels, int le
ASSERT(m_pixels.actual <= m_pixels.total);
GSDrawScanline::DrawScanline(data, pixels, left, top, scan, m_local);
m_draw_scanline(pixels, left, top, scan, m_local);
}
void GSRasterizer::DrawEdge(const GSRasterizerData& data, int pixels, int left, int top, const GSVertexSW& scan)
void GSRasterizer::DrawEdge(int pixels, int left, int top, const GSVertexSW& scan)
{
if ((m_scanmsk_value & 2) && (m_scanmsk_value & 1) == (top & 1)) return;
m_pixels.actual += 1;
@ -1149,27 +1148,32 @@ void GSRasterizer::DrawEdge(const GSRasterizerData& data, int pixels, int left,
ASSERT(m_pixels.actual <= m_pixels.total);
GSDrawScanline::DrawEdge(data, pixels, left, top, scan, m_local);
m_draw_edge(pixels, left, top, scan, m_local);
}
//
GSSingleRasterizer::GSSingleRasterizer()
: m_r(&m_ds, 0, 1)
{
m_ds = std::make_unique<GSDrawScanline>();
m_r = std::make_unique<GSRasterizer>(m_ds.get(), 0, 1);
}
GSSingleRasterizer::~GSSingleRasterizer()
{
m_r.reset();
m_ds.reset();
}
GSSingleRasterizer::~GSSingleRasterizer() = default;
void GSSingleRasterizer::Queue(const GSRingHeap::SharedPtr<GSRasterizerData>& data)
{
m_ds->SetupDraw(*data.get());
m_r->Draw(*data.get());
Draw(*data.get());
}
void GSSingleRasterizer::Draw(GSRasterizerData& data)
{
if (!unlikely(m_ds.SetupDraw(data)))
{
m_ds.ResetCodeCache();
m_ds.SetupDraw(data);
}
m_r.Draw(data);
}
void GSSingleRasterizer::Sync()
@ -1183,20 +1187,18 @@ bool GSSingleRasterizer::IsSynced() const
int GSSingleRasterizer::GetPixels(bool reset /*= true*/)
{
return m_r->GetPixels(reset);
return m_r.GetPixels(reset);
}
void GSSingleRasterizer::PrintStats()
{
m_ds->PrintStats();
m_ds.PrintStats();
}
//
GSRasterizerList::GSRasterizerList(int threads)
{
m_ds = std::make_unique<GSDrawScanline>();
m_thread_height = compute_best_thread_height(threads);
const int rows = (2048 >> m_thread_height) + 16;
@ -1213,12 +1215,6 @@ GSRasterizerList::GSRasterizerList(int threads)
GSRasterizerList::~GSRasterizerList()
{
PerformanceMetrics::SetGSSWThreadCount(0);
// Destruct draw scanline last.
m_workers.clear();
m_r.clear();
m_ds.reset();
_aligned_free(m_scanline);
}
@ -1252,7 +1248,12 @@ void GSRasterizerList::Queue(const GSRingHeap::SharedPtr<GSRasterizerData>& data
{
GSVector4i r = data->bbox.rintersect(data->scissor);
m_ds->SetupDraw(*data.get());
if (unlikely(!m_ds.SetupDraw(*data.get())))
{
Sync();
m_ds.ResetCodeCache();
m_ds.SetupDraw(*data.get());
}
ASSERT(r.top >= 0 && r.top < 2048 && r.bottom >= 0 && r.bottom < 2048);
@ -1316,7 +1317,7 @@ std::unique_ptr<IRasterizer> GSRasterizerList::Create(int threads)
for (int i = 0; i < threads; i++)
{
rl->m_r.push_back(std::unique_ptr<GSRasterizer>(new GSRasterizer(rl->m_ds.get(), i, threads)));
rl->m_r.push_back(std::unique_ptr<GSRasterizer>(new GSRasterizer(&rl->m_ds, i, threads)));
auto& r = *rl->m_r[i];
rl->m_workers.push_back(std::unique_ptr<GSWorker>(new GSWorker([i]() { GSRasterizerList::OnWorkerStartup(i); },
[&r](GSRingHeap::SharedPtr<GSRasterizerData>& item) { r.Draw(*item.get()); },

View File

@ -16,7 +16,7 @@
#pragma once
#include "GS/Renderers/SW/GSVertexSW.h"
#include "GS/Renderers/Common/GSFunctionMap.h"
#include "GS/Renderers/SW/GSDrawScanline.h"
#include "GS/GSAlignedClass.h"
#include "GS/GSPerfMon.h"
#include "GS/GSThread_CXX11.h"
@ -47,6 +47,12 @@ public:
int counter;
u8 scanmsk_value;
GSScanlineGlobalData global;
GSDrawScanline::SetupPrimPtr setup_prim;
GSDrawScanline::DrawScanlinePtr draw_scanline;
GSDrawScanline::DrawScanlinePtr draw_edge;
GSRasterizerData()
: scissor(GSVector4i::zero())
, bbox(GSVector4i::zero())
@ -87,30 +93,33 @@ protected:
struct { int sum, actual, total; } m_pixels;
int m_primcount;
// For the current draw.
GSScanlineLocalData m_local = {};
GSDrawScanline::SetupPrimPtr m_setup_prim = nullptr;
GSDrawScanline::DrawScanlinePtr m_draw_scanline = nullptr;
GSDrawScanline::DrawScanlinePtr m_draw_edge = nullptr;
// TODO: Make data pointer a class member?
// Or, at the very least, pull the function pointers out.
__forceinline bool HasEdge() const { return (m_draw_edge != nullptr); }
template <bool scissor_test>
void DrawPoint(const GSRasterizerData& data, const GSVertexSW* vertex, const u32* index);
void DrawLine(const GSRasterizerData& data, const GSVertexSW* vertex, const u32* index);
void DrawTriangle(const GSRasterizerData& data, const GSVertexSW* vertex, const u32* index);
void DrawSprite(const GSRasterizerData& data, const GSVertexSW* vertex, const u32* index);
void DrawPoint(const GSVertexSW* vertex, int vertex_count, const u32* index, int index_count);
void DrawLine(const GSVertexSW* vertex, const u32* index);
void DrawTriangle(const GSVertexSW* vertex, const u32* index);
void DrawSprite(const GSVertexSW* vertex, const u32* index);
#if _M_SSE >= 0x501
__forceinline void DrawTriangleSection(const GSRasterizerData& data, int top, int bottom, GSVertexSW2& RESTRICT edge, const GSVertexSW2& RESTRICT dedge, const GSVertexSW2& RESTRICT dscan, const GSVector4& RESTRICT p0);
__forceinline void DrawTriangleSection(int top, int bottom, GSVertexSW2& RESTRICT edge, const GSVertexSW2& RESTRICT dedge, const GSVertexSW2& RESTRICT dscan, const GSVector4& RESTRICT p0);
#else
__forceinline void DrawTriangleSection(const GSRasterizerData& data, int top, int bottom, GSVertexSW& RESTRICT edge, const GSVertexSW& RESTRICT dedge, const GSVertexSW& RESTRICT dscan, const GSVector4& RESTRICT p0);
__forceinline void DrawTriangleSection(int top, int bottom, GSVertexSW& RESTRICT edge, const GSVertexSW& RESTRICT dedge, const GSVertexSW& RESTRICT dscan, const GSVector4& RESTRICT p0);
#endif
void DrawEdge(const GSRasterizerData& data, const GSVertexSW& v0, const GSVertexSW& v1, const GSVertexSW& dv, int orientation, int side);
void DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GSVertexSW& dv, int orientation, int side);
__forceinline void AddScanline(GSVertexSW* e, int pixels, int left, int top, const GSVertexSW& scan);
__forceinline void Flush(const GSRasterizerData& data, const GSVertexSW* vertex, const u32* index, const GSVertexSW& dscan, bool edge = false);
__forceinline void Flush(const GSVertexSW* vertex, const u32* index, const GSVertexSW& dscan, bool edge = false);
__forceinline void DrawScanline(const GSRasterizerData& data, int pixels, int left, int top, const GSVertexSW& scan);
__forceinline void DrawEdge(const GSRasterizerData& data, int pixels, int left, int top, const GSVertexSW& scan);
__forceinline void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan);
__forceinline void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan);
public:
GSRasterizer(GSDrawScanline* ds, int id, int threads);
@ -148,10 +157,11 @@ public:
int GetPixels(bool reset = true) override;
void PrintStats() override;
void Draw(GSRasterizerData& data);
private:
// TODO: Get rid of indirection here
std::unique_ptr<GSDrawScanline> m_ds;
std::unique_ptr<GSRasterizer> m_r;
GSDrawScanline m_ds;
GSRasterizer m_r;
};
class GSRasterizerList final : public IRasterizer
@ -159,7 +169,7 @@ class GSRasterizerList final : public IRasterizer
protected:
using GSWorker = GSJobQueue<GSRingHeap::SharedPtr<GSRasterizerData>, 65536>;
std::unique_ptr<GSDrawScanline> m_ds;
GSDrawScanline m_ds;
// Worker threads depend on the rasterizers, so don't change the order.
std::vector<std::unique_ptr<GSRasterizer>> m_r;

View File

@ -15,8 +15,8 @@
#pragma once
#include "GSTextureCacheSW.h"
#include "GSDrawScanline.h"
#include "GS/Renderers/SW/GSTextureCacheSW.h"
#include "GS/Renderers/SW/GSRasterizer.h"
#include "GS/GSRingHeap.h"
#include "GS/MultiISA.h"
@ -25,7 +25,7 @@ MULTI_ISA_UNSHARED_START
class GSRendererSW final : public GSRenderer
{
public:
class SharedData : public GSDrawScanline::SharedData
class SharedData : public GSRasterizerData
{
struct alignas(16) TextureLevel
{

View File

@ -22,8 +22,6 @@
#include "Elfheader.h"
#include "System/RecTypes.h"
#include "common/Align.h"
#include "common/MemsetFast.inl"
#include "common/Perf.h"
@ -133,6 +131,36 @@ RecompiledCodeReserve& RecompiledCodeReserve::SetProfilerName(std::string name)
return *this;
}
GSCodeReserve::GSCodeReserve()
: RecompiledCodeReserve("GS Software Renderer")
{
}
GSCodeReserve::~GSCodeReserve() = default;
void GSCodeReserve::Assign(VirtualMemoryManagerPtr allocator)
{
RecompiledCodeReserve::Assign(std::move(allocator), HostMemoryMap::SWrecOffset, HostMemoryMap::SWrecSize);
}
void GSCodeReserve::Reset()
{
RecompiledCodeReserve::Reset();
m_memory_used = 0;
}
u8* GSCodeReserve::Reserve(size_t size)
{
pxAssert((m_memory_used + size) <= m_size);
return m_baseptr + m_memory_used;
}
void GSCodeReserve::Commit(size_t size)
{
pxAssert((m_memory_used + size) <= m_size);
m_memory_used += size;
}
#include "svnrev.h"
Pcsx2Config EmuConfig;
@ -221,9 +249,9 @@ namespace HostMemoryMap
// For debuggers
extern "C" {
#ifdef _WIN32
_declspec(dllexport) uptr EEmem, IOPmem, VUmem, EErec, IOPrec, VIF0rec, VIF1rec, mVU0rec, mVU1rec, bumpAllocator;
_declspec(dllexport) uptr EEmem, IOPmem, VUmem, EErec, IOPrec, VIF0rec, VIF1rec, mVU0rec, mVU1rec, SWjit, bumpAllocator;
#else
__attribute__((visibility("default"), used)) uptr EEmem, IOPmem, VUmem, EErec, IOPrec, VIF0rec, VIF1rec, mVU0rec, mVU1rec, bumpAllocator;
__attribute__((visibility("default"), used)) uptr EEmem, IOPmem, VUmem, EErec, IOPrec, VIF0rec, VIF1rec, mVU0rec, mVU1rec, SWjit, bumpAllocator;
#endif
}
} // namespace HostMemoryMap
@ -301,7 +329,9 @@ bool SysMainMemory::Allocate()
m_ee.Assign(MainMemory());
m_iop.Assign(MainMemory());
m_vu.Assign(MainMemory());
vtlb_Core_Alloc();
return true;
}
@ -315,6 +345,7 @@ void SysMainMemory::Reset()
m_vu.Reset();
// Note: newVif is reset as part of other VIF structures.
// Software is reset on the GS thread.
}
void SysMainMemory::Release()
@ -356,10 +387,14 @@ SysCpuProviderPack::SysCpuProviderPack()
dVifReserve(0);
dVifReserve(1);
}
GetVmMemory().GSCode().Assign(GetVmMemory().CodeMemory());
}
SysCpuProviderPack::~SysCpuProviderPack()
{
GetVmMemory().GSCode().Release();
if (newVifDynaRec)
{
dVifRelease(1);

View File

@ -28,7 +28,6 @@
typedef SafeArray<u8> VmStateBuffer;
class BaseVUmicroCPU;
class RecompiledCodeReserve;
// This is a table of default virtual map addresses for ps2vm components. These locations
// are provided and used to assist in debugging and possibly hacking; as it makes it possible
@ -64,7 +63,7 @@ namespace HostMemoryMap
//////////////////////////////////////////////////////////////////////////
// Code
//////////////////////////////////////////////////////////////////////////
static const u32 CodeSize = 0x0F100000; // 241 mb
static const u32 CodeSize = 0x13100000; // 305 mb
// EE recompiler code cache area (64mb)
static const u32 EErecOffset = 0x00000000;
@ -86,8 +85,67 @@ namespace HostMemoryMap
// SSE-optimized VIF unpack functions (1mb)
static const u32 VIFUnpackRecOffset = 0x0F000000;
// Software Renderer JIT buffer (64mb)
static const u32 SWrecOffset = 0x0F100000;
static const u32 SWrecSize = 0x04000000;
}
// --------------------------------------------------------------------------------------
// RecompiledCodeReserve
// --------------------------------------------------------------------------------------
// A recompiled code reserve is a simple sequential-growth block of memory which is auto-
// cleared to INT 3 (0xcc) as needed.
//
class RecompiledCodeReserve : public VirtualMemoryReserve
{
typedef VirtualMemoryReserve _parent;
protected:
std::string m_profiler_name;
public:
RecompiledCodeReserve(std::string name);
~RecompiledCodeReserve();
void Assign(VirtualMemoryManagerPtr allocator, size_t offset, size_t size);
void Reset();
RecompiledCodeReserve& SetProfilerName(std::string name);
void ForbidModification();
void AllowModification();
operator u8*() { return m_baseptr; }
operator const u8*() const { return m_baseptr; }
protected:
void _registerProfiler();
};
// --------------------------------------------------------------------------------------
// GSCodeReserve
// --------------------------------------------------------------------------------------
// Stores code buffers for the GS software JIT.
class GSCodeReserve : public RecompiledCodeReserve
{
public:
GSCodeReserve();
~GSCodeReserve();
size_t GetMemoryUsed() const { return m_memory_used; }
void Assign(VirtualMemoryManagerPtr allocator);
void Reset();
u8* Reserve(size_t size);
void Commit(size_t size);
private:
size_t m_memory_used = 0;
};
// --------------------------------------------------------------------------------------
// SysMainMemory
// --------------------------------------------------------------------------------------
@ -104,6 +162,8 @@ protected:
iopMemoryReserve m_iop;
vuMemoryReserve m_vu;
GSCodeReserve m_gs_code;
public:
SysMainMemory();
~SysMainMemory();
@ -117,6 +177,8 @@ public:
const iopMemoryReserve& IOPMemory() const { return m_iop; }
const vuMemoryReserve& VUMemory() const { return m_vu; }
GSCodeReserve& GSCode() { return m_gs_code; }
bool Allocate();
void Reset();
void Release();

View File

@ -1,50 +0,0 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2010 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include "common/PageFaultSource.h"
// --------------------------------------------------------------------------------------
// RecompiledCodeReserve
// --------------------------------------------------------------------------------------
// A recompiled code reserve is a simple sequential-growth block of memory which is auto-
// cleared to INT 3 (0xcc) as needed.
//
class RecompiledCodeReserve : public VirtualMemoryReserve
{
typedef VirtualMemoryReserve _parent;
protected:
std::string m_profiler_name;
public:
RecompiledCodeReserve(std::string name);
~RecompiledCodeReserve();
void Assign(VirtualMemoryManagerPtr allocator, size_t offset, size_t size);
void Reset();
RecompiledCodeReserve& SetProfilerName(std::string name);
void ForbidModification();
void AllowModification();
operator u8*() { return m_baseptr; }
operator const u8*() const { return m_baseptr; }
protected:
void _registerProfiler();
};

View File

@ -310,8 +310,8 @@ void VMManager::Internal::ReleaseMemory()
std::vector<u8>().swap(s_no_interlacing_cheats_data);
s_no_interlacing_cheats_loaded = false;
s_vm_memory.reset();
s_cpu_provider_pack.reset();
s_vm_memory.reset();
}
SysMainMemory& GetVmMemory()

View File

@ -280,7 +280,6 @@
<ClCompile Include="GS\GSBlock.cpp" />
<ClCompile Include="GS\GSCapture.cpp" />
<ClCompile Include="GS\GSClut.cpp" />
<ClCompile Include="GS\GSCodeBuffer.cpp" />
<ClCompile Include="GS\GSCrc.cpp" />
<ClCompile Include="GS\Renderers\Common\GSDevice.cpp" />
<ClCompile Include="GS\Renderers\DX11\GSDevice11.cpp" />
@ -618,7 +617,6 @@
<ClInclude Include="GS\GSBlock.h" />
<ClInclude Include="GS\GSCapture.h" />
<ClInclude Include="GS\GSClut.h" />
<ClInclude Include="GS\GSCodeBuffer.h" />
<ClInclude Include="GS\GSCrc.h" />
<ClInclude Include="GS\Renderers\Common\GSDevice.h" />
<ClInclude Include="GS\Renderers\DX11\GSDevice11.h" />
@ -715,7 +713,6 @@
<ClInclude Include="SingleRegisterTypes.h" />
<ClInclude Include="System.h" />
<ClInclude Include="System\SysThreads.h" />
<ClInclude Include="System\RecTypes.h" />
<ClInclude Include="Counters.h" />
<ClInclude Include="Dmac.h" />
<ClInclude Include="Hardware.h" />

View File

@ -992,9 +992,6 @@
<ClCompile Include="GS\GSClut.cpp">
<Filter>System\Ps2\GS</Filter>
</ClCompile>
<ClCompile Include="GS\GSCodeBuffer.cpp">
<Filter>System\Ps2\GS</Filter>
</ClCompile>
<ClCompile Include="GS\GSCrc.cpp">
<Filter>System\Ps2\GS</Filter>
</ClCompile>
@ -1426,9 +1423,6 @@
<ClInclude Include="System\SysThreads.h">
<Filter>System\Include</Filter>
</ClInclude>
<ClInclude Include="System\RecTypes.h">
<Filter>System\Include</Filter>
</ClInclude>
<ClInclude Include="Counters.h">
<Filter>System\Ps2\EmotionEngine</Filter>
</ClInclude>
@ -1903,9 +1897,6 @@
<ClInclude Include="GS\GSClut.h">
<Filter>System\Ps2\GS</Filter>
</ClInclude>
<ClInclude Include="GS\GSCodeBuffer.h">
<Filter>System\Ps2\GS</Filter>
</ClInclude>
<ClInclude Include="GS\GSCrc.h">
<Filter>System\Ps2\GS</Filter>
</ClInclude>

View File

@ -23,7 +23,7 @@
#include "iR3000A.h"
#include "R3000A.h"
#include "BaseblockEx.h"
#include "System/RecTypes.h"
#include "System.h"
#include "R5900OpcodeTables.h"
#include "IopBios.h"
#include "IopHw.h"

View File

@ -24,7 +24,7 @@
#include "iR5900.h"
#include "iR5900Analysis.h"
#include "BaseblockEx.h"
#include "System/RecTypes.h"
#include "System.h"
#include "vtlb.h"

View File

@ -30,7 +30,7 @@ using namespace x86Emitter;
#include "Gif_Unit.h"
#include "iR5900.h"
#include "R5900OpcodeTables.h"
#include "System/RecTypes.h"
#include "System.h"
#include "common/emitter/x86emitter.h"
#include "microVU_Misc.h"
#include "microVU_IR.h"

View File

@ -19,7 +19,7 @@
#include "VU.h"
#include "common/emitter/x86emitter.h"
#include "System/RecTypes.h"
#include "System.h"
using namespace x86Emitter;