mirror of https://github.com/PCSX2/pcsx2.git
GS: Switch integer typedefs to match rest of pcsx2
Had to capitalize the names of some struct params that had the same names
This commit is contained in:
parent
2351431d71
commit
b74be70ffc
|
@ -194,7 +194,6 @@ static const int __pagesize = PCSX2_PAGESIZE;
|
||||||
#define __fc __fastcall
|
#define __fc __fastcall
|
||||||
|
|
||||||
// Makes sure that if anyone includes xbyak, it doesn't do anything bad
|
// Makes sure that if anyone includes xbyak, it doesn't do anything bad
|
||||||
#define MIE_INTEGER_TYPE_DEFINED
|
|
||||||
#define XBYAK_ENABLE_OMITTED_OPERAND
|
#define XBYAK_ENABLE_OMITTED_OPERAND
|
||||||
|
|
||||||
#ifdef __x86_64__
|
#ifdef __x86_64__
|
||||||
|
|
|
@ -46,7 +46,7 @@ static HRESULT s_hr = E_FAIL;
|
||||||
#undef None
|
#undef None
|
||||||
|
|
||||||
static GSRenderer* s_gs = NULL;
|
static GSRenderer* s_gs = NULL;
|
||||||
static uint8* s_basemem = NULL;
|
static u8* s_basemem = NULL;
|
||||||
static int s_vsync = 0;
|
static int s_vsync = 0;
|
||||||
static bool s_exclusive = true;
|
static bool s_exclusive = true;
|
||||||
static std::string s_renderer_name;
|
static std::string s_renderer_name;
|
||||||
|
@ -59,7 +59,7 @@ static int s_new_gs_window_width = 0;
|
||||||
static int s_new_gs_window_height = 0;
|
static int s_new_gs_window_height = 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void GSsetBaseMem(uint8* mem)
|
void GSsetBaseMem(u8* mem)
|
||||||
{
|
{
|
||||||
s_basemem = mem;
|
s_basemem = mem;
|
||||||
|
|
||||||
|
@ -267,19 +267,19 @@ int _GSopen(const WindowInfo& wi, const char* title, GSRendererType renderer, in
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSosdLog(const char* utf8, uint32 color)
|
void GSosdLog(const char* utf8, u32 color)
|
||||||
{
|
{
|
||||||
if (s_gs && s_gs->m_dev)
|
if (s_gs && s_gs->m_dev)
|
||||||
s_gs->m_dev->m_osd.Log(utf8);
|
s_gs->m_dev->m_osd.Log(utf8);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSosdMonitor(const char* key, const char* value, uint32 color)
|
void GSosdMonitor(const char* key, const char* value, u32 color)
|
||||||
{
|
{
|
||||||
if (s_gs && s_gs->m_dev)
|
if (s_gs && s_gs->m_dev)
|
||||||
s_gs->m_dev->m_osd.Monitor(key, value);
|
s_gs->m_dev->m_osd.Monitor(key, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
int GSopen2(const WindowInfo& wi, uint32 flags)
|
int GSopen2(const WindowInfo& wi, u32 flags)
|
||||||
{
|
{
|
||||||
static bool stored_toggle_state = false;
|
static bool stored_toggle_state = false;
|
||||||
const bool toggle_state = !!(flags & 4);
|
const bool toggle_state = !!(flags & 4);
|
||||||
|
@ -344,7 +344,7 @@ void GSreset()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSgifSoftReset(uint32 mask)
|
void GSgifSoftReset(u32 mask)
|
||||||
{
|
{
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
|
@ -355,7 +355,7 @@ void GSgifSoftReset(uint32 mask)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSwriteCSR(uint32 csr)
|
void GSwriteCSR(u32 csr)
|
||||||
{
|
{
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
|
@ -366,7 +366,7 @@ void GSwriteCSR(uint32 csr)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSinitReadFIFO(uint8* mem)
|
void GSinitReadFIFO(u8* mem)
|
||||||
{
|
{
|
||||||
GL_PERF("Init Read FIFO1");
|
GL_PERF("Init Read FIFO1");
|
||||||
try
|
try
|
||||||
|
@ -382,7 +382,7 @@ void GSinitReadFIFO(uint8* mem)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSreadFIFO(uint8* mem)
|
void GSreadFIFO(u8* mem)
|
||||||
{
|
{
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
|
@ -397,7 +397,7 @@ void GSreadFIFO(uint8* mem)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSinitReadFIFO2(uint8* mem, uint32 size)
|
void GSinitReadFIFO2(u8* mem, u32 size)
|
||||||
{
|
{
|
||||||
GL_PERF("Init Read FIFO2");
|
GL_PERF("Init Read FIFO2");
|
||||||
try
|
try
|
||||||
|
@ -413,7 +413,7 @@ void GSinitReadFIFO2(uint8* mem, uint32 size)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSreadFIFO2(uint8* mem, uint32 size)
|
void GSreadFIFO2(u8* mem, u32 size)
|
||||||
{
|
{
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
|
@ -428,7 +428,7 @@ void GSreadFIFO2(uint8* mem, uint32 size)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSgifTransfer(const uint8* mem, uint32 size)
|
void GSgifTransfer(const u8* mem, u32 size)
|
||||||
{
|
{
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
|
@ -439,33 +439,33 @@ void GSgifTransfer(const uint8* mem, uint32 size)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSgifTransfer1(uint8* mem, uint32 addr)
|
void GSgifTransfer1(u8* mem, u32 addr)
|
||||||
{
|
{
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
s_gs->Transfer<0>(const_cast<uint8*>(mem) + addr, (0x4000 - addr) / 16);
|
s_gs->Transfer<0>(const_cast<u8*>(mem) + addr, (0x4000 - addr) / 16);
|
||||||
}
|
}
|
||||||
catch (GSRecoverableError)
|
catch (GSRecoverableError)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSgifTransfer2(uint8* mem, uint32 size)
|
void GSgifTransfer2(u8* mem, u32 size)
|
||||||
{
|
{
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
s_gs->Transfer<1>(const_cast<uint8*>(mem), size);
|
s_gs->Transfer<1>(const_cast<u8*>(mem), size);
|
||||||
}
|
}
|
||||||
catch (GSRecoverableError)
|
catch (GSRecoverableError)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSgifTransfer3(uint8* mem, uint32 size)
|
void GSgifTransfer3(u8* mem, u32 size)
|
||||||
{
|
{
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
s_gs->Transfer<2>(const_cast<uint8*>(mem), size);
|
s_gs->Transfer<2>(const_cast<u8*>(mem), size);
|
||||||
}
|
}
|
||||||
catch (GSRecoverableError)
|
catch (GSRecoverableError)
|
||||||
{
|
{
|
||||||
|
@ -487,7 +487,7 @@ void GSvsync(int field)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32 GSmakeSnapshot(char* path)
|
u32 GSmakeSnapshot(char* path)
|
||||||
{
|
{
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
|
@ -629,7 +629,7 @@ void GSendRecording()
|
||||||
pt(" - Capture ended\n");
|
pt(" - Capture ended\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSsetGameCRC(uint32 crc, int options)
|
void GSsetGameCRC(u32 crc, int options)
|
||||||
{
|
{
|
||||||
s_gs->SetGameCRC(crc, options);
|
s_gs->SetGameCRC(crc, options);
|
||||||
}
|
}
|
||||||
|
@ -746,7 +746,7 @@ void vmfree(void* ptr, size_t size)
|
||||||
}
|
}
|
||||||
|
|
||||||
static HANDLE s_fh = NULL;
|
static HANDLE s_fh = NULL;
|
||||||
static uint8* s_Next[8];
|
static u8* s_Next[8];
|
||||||
|
|
||||||
void* fifo_alloc(size_t size, size_t repeat)
|
void* fifo_alloc(size_t size, size_t repeat)
|
||||||
{
|
{
|
||||||
|
@ -770,8 +770,8 @@ void* fifo_alloc(size_t size, size_t repeat)
|
||||||
void* fifo = MapViewOfFile(s_fh, FILE_MAP_ALL_ACCESS, 0, 0, size);
|
void* fifo = MapViewOfFile(s_fh, FILE_MAP_ALL_ACCESS, 0, 0, size);
|
||||||
for (size_t i = 1; i < repeat; i++)
|
for (size_t i = 1; i < repeat; i++)
|
||||||
{
|
{
|
||||||
void* base = (uint8*)fifo + size * i;
|
void* base = (u8*)fifo + size * i;
|
||||||
s_Next[i] = (uint8*)MapViewOfFileEx(s_fh, FILE_MAP_ALL_ACCESS, 0, 0, size, base);
|
s_Next[i] = (u8*)MapViewOfFileEx(s_fh, FILE_MAP_ALL_ACCESS, 0, 0, size, base);
|
||||||
errorID = ::GetLastError();
|
errorID = ::GetLastError();
|
||||||
if (s_Next[i] != base)
|
if (s_Next[i] != base)
|
||||||
{
|
{
|
||||||
|
@ -881,8 +881,8 @@ void* fifo_alloc(size_t size, size_t repeat)
|
||||||
|
|
||||||
for (size_t i = 1; i < repeat; i++)
|
for (size_t i = 1; i < repeat; i++)
|
||||||
{
|
{
|
||||||
void* base = (uint8*)fifo + size * i;
|
void* base = (u8*)fifo + size * i;
|
||||||
uint8* next = (uint8*)mmap(base, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, s_shm_fd, 0);
|
u8* next = (u8*)mmap(base, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, s_shm_fd, 0);
|
||||||
if (next != base)
|
if (next != base)
|
||||||
fprintf(stderr, "Fail to mmap contiguous segment\n");
|
fprintf(stderr, "Fail to mmap contiguous segment\n");
|
||||||
}
|
}
|
||||||
|
@ -1081,16 +1081,16 @@ void GSApp::Init()
|
||||||
m_section = "Settings";
|
m_section = "Settings";
|
||||||
|
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
m_gs_renderers.push_back(GSSetting(static_cast<uint32>(GSRendererType::DX1011_HW), "Direct3D 11", ""));
|
m_gs_renderers.push_back(GSSetting(static_cast<u32>(GSRendererType::DX1011_HW), "Direct3D 11", ""));
|
||||||
m_gs_renderers.push_back(GSSetting(static_cast<uint32>(GSRendererType::OGL_HW), "OpenGL", ""));
|
m_gs_renderers.push_back(GSSetting(static_cast<u32>(GSRendererType::OGL_HW), "OpenGL", ""));
|
||||||
m_gs_renderers.push_back(GSSetting(static_cast<uint32>(GSRendererType::OGL_SW), "Software", ""));
|
m_gs_renderers.push_back(GSSetting(static_cast<u32>(GSRendererType::OGL_SW), "Software", ""));
|
||||||
#else // Linux
|
#else // Linux
|
||||||
m_gs_renderers.push_back(GSSetting(static_cast<uint32>(GSRendererType::OGL_HW), "OpenGL", ""));
|
m_gs_renderers.push_back(GSSetting(static_cast<u32>(GSRendererType::OGL_HW), "OpenGL", ""));
|
||||||
m_gs_renderers.push_back(GSSetting(static_cast<uint32>(GSRendererType::OGL_SW), "Software", ""));
|
m_gs_renderers.push_back(GSSetting(static_cast<u32>(GSRendererType::OGL_SW), "Software", ""));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// The null renderer goes third, it has use for benchmarking purposes in a release build
|
// The null renderer goes third, it has use for benchmarking purposes in a release build
|
||||||
m_gs_renderers.push_back(GSSetting(static_cast<uint32>(GSRendererType::Null), "Null", ""));
|
m_gs_renderers.push_back(GSSetting(static_cast<u32>(GSRendererType::Null), "Null", ""));
|
||||||
|
|
||||||
m_gs_interlace.push_back(GSSetting(0, "None", ""));
|
m_gs_interlace.push_back(GSSetting(0, "None", ""));
|
||||||
m_gs_interlace.push_back(GSSetting(1, "Weave tff", "saw-tooth"));
|
m_gs_interlace.push_back(GSSetting(1, "Weave tff", "saw-tooth"));
|
||||||
|
@ -1120,14 +1120,14 @@ void GSApp::Init()
|
||||||
m_gs_dithering.push_back(GSSetting(2, "Unscaled", "Default"));
|
m_gs_dithering.push_back(GSSetting(2, "Unscaled", "Default"));
|
||||||
m_gs_dithering.push_back(GSSetting(1, "Scaled", ""));
|
m_gs_dithering.push_back(GSSetting(1, "Scaled", ""));
|
||||||
|
|
||||||
m_gs_bifilter.push_back(GSSetting(static_cast<uint32>(BiFiltering::Nearest), "Nearest", ""));
|
m_gs_bifilter.push_back(GSSetting(static_cast<u32>(BiFiltering::Nearest), "Nearest", ""));
|
||||||
m_gs_bifilter.push_back(GSSetting(static_cast<uint32>(BiFiltering::Forced_But_Sprite), "Bilinear", "Forced excluding sprite"));
|
m_gs_bifilter.push_back(GSSetting(static_cast<u32>(BiFiltering::Forced_But_Sprite), "Bilinear", "Forced excluding sprite"));
|
||||||
m_gs_bifilter.push_back(GSSetting(static_cast<uint32>(BiFiltering::Forced), "Bilinear", "Forced"));
|
m_gs_bifilter.push_back(GSSetting(static_cast<u32>(BiFiltering::Forced), "Bilinear", "Forced"));
|
||||||
m_gs_bifilter.push_back(GSSetting(static_cast<uint32>(BiFiltering::PS2), "Bilinear", "PS2"));
|
m_gs_bifilter.push_back(GSSetting(static_cast<u32>(BiFiltering::PS2), "Bilinear", "PS2"));
|
||||||
|
|
||||||
m_gs_trifilter.push_back(GSSetting(static_cast<uint32>(TriFiltering::None), "None", "Default"));
|
m_gs_trifilter.push_back(GSSetting(static_cast<u32>(TriFiltering::None), "None", "Default"));
|
||||||
m_gs_trifilter.push_back(GSSetting(static_cast<uint32>(TriFiltering::PS2), "Trilinear", ""));
|
m_gs_trifilter.push_back(GSSetting(static_cast<u32>(TriFiltering::PS2), "Trilinear", ""));
|
||||||
m_gs_trifilter.push_back(GSSetting(static_cast<uint32>(TriFiltering::Forced), "Trilinear", "Ultra/Slow"));
|
m_gs_trifilter.push_back(GSSetting(static_cast<u32>(TriFiltering::Forced), "Trilinear", "Ultra/Slow"));
|
||||||
|
|
||||||
m_gs_generic_list.push_back(GSSetting(-1, "Automatic", "Default"));
|
m_gs_generic_list.push_back(GSSetting(-1, "Automatic", "Default"));
|
||||||
m_gs_generic_list.push_back(GSSetting(0, "Force-Disabled", ""));
|
m_gs_generic_list.push_back(GSSetting(0, "Force-Disabled", ""));
|
||||||
|
@ -1202,7 +1202,7 @@ void GSApp::Init()
|
||||||
m_default_configuration["capture_threads"] = "4";
|
m_default_configuration["capture_threads"] = "4";
|
||||||
m_default_configuration["CaptureHeight"] = "480";
|
m_default_configuration["CaptureHeight"] = "480";
|
||||||
m_default_configuration["CaptureWidth"] = "640";
|
m_default_configuration["CaptureWidth"] = "640";
|
||||||
m_default_configuration["crc_hack_level"] = std::to_string(static_cast<int8>(CRCHackLevel::Automatic));
|
m_default_configuration["crc_hack_level"] = std::to_string(static_cast<s8>(CRCHackLevel::Automatic));
|
||||||
m_default_configuration["CrcHacksExclusions"] = "";
|
m_default_configuration["CrcHacksExclusions"] = "";
|
||||||
m_default_configuration["debug_glsl_shader"] = "0";
|
m_default_configuration["debug_glsl_shader"] = "0";
|
||||||
m_default_configuration["debug_opengl"] = "0";
|
m_default_configuration["debug_opengl"] = "0";
|
||||||
|
@ -1211,7 +1211,7 @@ void GSApp::Init()
|
||||||
m_default_configuration["dump"] = "0";
|
m_default_configuration["dump"] = "0";
|
||||||
m_default_configuration["extrathreads"] = "2";
|
m_default_configuration["extrathreads"] = "2";
|
||||||
m_default_configuration["extrathreads_height"] = "4";
|
m_default_configuration["extrathreads_height"] = "4";
|
||||||
m_default_configuration["filter"] = std::to_string(static_cast<int8>(BiFiltering::PS2));
|
m_default_configuration["filter"] = std::to_string(static_cast<s8>(BiFiltering::PS2));
|
||||||
m_default_configuration["force_texture_clear"] = "0";
|
m_default_configuration["force_texture_clear"] = "0";
|
||||||
m_default_configuration["fxaa"] = "0";
|
m_default_configuration["fxaa"] = "0";
|
||||||
m_default_configuration["interlace"] = "7";
|
m_default_configuration["interlace"] = "7";
|
||||||
|
@ -1288,7 +1288,7 @@ void GSApp::Init()
|
||||||
m_default_configuration["UserHacks_TCOffsetX"] = "0";
|
m_default_configuration["UserHacks_TCOffsetX"] = "0";
|
||||||
m_default_configuration["UserHacks_TCOffsetY"] = "0";
|
m_default_configuration["UserHacks_TCOffsetY"] = "0";
|
||||||
m_default_configuration["UserHacks_TextureInsideRt"] = "0";
|
m_default_configuration["UserHacks_TextureInsideRt"] = "0";
|
||||||
m_default_configuration["UserHacks_TriFilter"] = std::to_string(static_cast<int8>(TriFiltering::None));
|
m_default_configuration["UserHacks_TriFilter"] = std::to_string(static_cast<s8>(TriFiltering::None));
|
||||||
m_default_configuration["UserHacks_WildHack"] = "0";
|
m_default_configuration["UserHacks_WildHack"] = "0";
|
||||||
m_default_configuration["wrap_gs_mem"] = "0";
|
m_default_configuration["wrap_gs_mem"] = "0";
|
||||||
m_default_configuration["vsync"] = "0";
|
m_default_configuration["vsync"] = "0";
|
||||||
|
|
1042
pcsx2/GS/GS.h
1042
pcsx2/GS/GS.h
File diff suppressed because it is too large
Load Diff
|
@ -47,11 +47,11 @@ class GSBlock
|
||||||
static const GSVector4i m_uw8hmask3;
|
static const GSVector4i m_uw8hmask3;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
template <int i, int alignment, uint32 mask>
|
template <int i, int alignment, u32 mask>
|
||||||
__forceinline static void WriteColumn32(uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch)
|
__forceinline static void WriteColumn32(u8* RESTRICT dst, const u8* RESTRICT src, int srcpitch)
|
||||||
{
|
{
|
||||||
const uint8* RESTRICT s0 = &src[srcpitch * 0];
|
const u8* RESTRICT s0 = &src[srcpitch * 0];
|
||||||
const uint8* RESTRICT s1 = &src[srcpitch * 1];
|
const u8* RESTRICT s1 = &src[srcpitch * 1];
|
||||||
|
|
||||||
#if _M_SSE >= 0x501
|
#if _M_SSE >= 0x501
|
||||||
|
|
||||||
|
@ -173,12 +173,12 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int i, int alignment>
|
template <int i, int alignment>
|
||||||
__forceinline static void WriteColumn16(uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch)
|
__forceinline static void WriteColumn16(u8* RESTRICT dst, const u8* RESTRICT src, int srcpitch)
|
||||||
{
|
{
|
||||||
const uint8* RESTRICT s0 = &src[srcpitch * 0];
|
const u8* RESTRICT s0 = &src[srcpitch * 0];
|
||||||
const uint8* RESTRICT s1 = &src[srcpitch * 1];
|
const u8* RESTRICT s1 = &src[srcpitch * 1];
|
||||||
|
|
||||||
// for(int j = 0; j < 16; j++) {((uint16*)s0)[j] = columnTable16[0][j]; ((uint16*)s1)[j] = columnTable16[1][j];}
|
// for(int j = 0; j < 16; j++) {((u16*)s0)[j] = columnTable16[0][j]; ((u16*)s1)[j] = columnTable16[1][j];}
|
||||||
|
|
||||||
#if _M_SSE >= 0x501
|
#if _M_SSE >= 0x501
|
||||||
|
|
||||||
|
@ -247,7 +247,7 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int i, int alignment>
|
template <int i, int alignment>
|
||||||
__forceinline static void WriteColumn8(uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch)
|
__forceinline static void WriteColumn8(u8* RESTRICT dst, const u8* RESTRICT src, int srcpitch)
|
||||||
{
|
{
|
||||||
// TODO: read unaligned as WriteColumn32 does and try saving a few shuffles
|
// TODO: read unaligned as WriteColumn32 does and try saving a few shuffles
|
||||||
|
|
||||||
|
@ -310,7 +310,7 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int i, int alignment>
|
template <int i, int alignment>
|
||||||
__forceinline static void WriteColumn4(uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch)
|
__forceinline static void WriteColumn4(u8* RESTRICT dst, const u8* RESTRICT src, int srcpitch)
|
||||||
{
|
{
|
||||||
//printf("WriteColumn4\n");
|
//printf("WriteColumn4\n");
|
||||||
|
|
||||||
|
@ -345,8 +345,8 @@ public:
|
||||||
((GSVector4i*)dst)[i * 4 + 3] = v3;
|
((GSVector4i*)dst)[i * 4 + 3] = v3;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int alignment, uint32 mask>
|
template <int alignment, u32 mask>
|
||||||
static void WriteColumn32(int y, uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch)
|
static void WriteColumn32(int y, u8* RESTRICT dst, const u8* RESTRICT src, int srcpitch)
|
||||||
{
|
{
|
||||||
switch ((y >> 1) & 3)
|
switch ((y >> 1) & 3)
|
||||||
{
|
{
|
||||||
|
@ -359,7 +359,7 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int alignment>
|
template <int alignment>
|
||||||
static void WriteColumn16(int y, uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch)
|
static void WriteColumn16(int y, u8* RESTRICT dst, const u8* RESTRICT src, int srcpitch)
|
||||||
{
|
{
|
||||||
switch ((y >> 1) & 3)
|
switch ((y >> 1) & 3)
|
||||||
{
|
{
|
||||||
|
@ -372,7 +372,7 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int alignment>
|
template <int alignment>
|
||||||
static void WriteColumn8(int y, uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch)
|
static void WriteColumn8(int y, u8* RESTRICT dst, const u8* RESTRICT src, int srcpitch)
|
||||||
{
|
{
|
||||||
switch ((y >> 2) & 3)
|
switch ((y >> 2) & 3)
|
||||||
{
|
{
|
||||||
|
@ -385,7 +385,7 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int alignment>
|
template <int alignment>
|
||||||
static void WriteColumn4(int y, uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch)
|
static void WriteColumn4(int y, u8* RESTRICT dst, const u8* RESTRICT src, int srcpitch)
|
||||||
{
|
{
|
||||||
switch ((y >> 2) & 3)
|
switch ((y >> 2) & 3)
|
||||||
{
|
{
|
||||||
|
@ -397,8 +397,8 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int alignment, uint32 mask>
|
template <int alignment, u32 mask>
|
||||||
static void WriteBlock32(uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch)
|
static void WriteBlock32(u8* RESTRICT dst, const u8* RESTRICT src, int srcpitch)
|
||||||
{
|
{
|
||||||
WriteColumn32<0, alignment, mask>(dst, src, srcpitch);
|
WriteColumn32<0, alignment, mask>(dst, src, srcpitch);
|
||||||
src += srcpitch * 2;
|
src += srcpitch * 2;
|
||||||
|
@ -410,7 +410,7 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int alignment>
|
template <int alignment>
|
||||||
static void WriteBlock16(uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch)
|
static void WriteBlock16(u8* RESTRICT dst, const u8* RESTRICT src, int srcpitch)
|
||||||
{
|
{
|
||||||
WriteColumn16<0, alignment>(dst, src, srcpitch);
|
WriteColumn16<0, alignment>(dst, src, srcpitch);
|
||||||
src += srcpitch * 2;
|
src += srcpitch * 2;
|
||||||
|
@ -422,7 +422,7 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int alignment>
|
template <int alignment>
|
||||||
static void WriteBlock8(uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch)
|
static void WriteBlock8(u8* RESTRICT dst, const u8* RESTRICT src, int srcpitch)
|
||||||
{
|
{
|
||||||
WriteColumn8<0, alignment>(dst, src, srcpitch);
|
WriteColumn8<0, alignment>(dst, src, srcpitch);
|
||||||
src += srcpitch * 4;
|
src += srcpitch * 4;
|
||||||
|
@ -434,7 +434,7 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int alignment>
|
template <int alignment>
|
||||||
static void WriteBlock4(uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch)
|
static void WriteBlock4(u8* RESTRICT dst, const u8* RESTRICT src, int srcpitch)
|
||||||
{
|
{
|
||||||
WriteColumn4<0, alignment>(dst, src, srcpitch);
|
WriteColumn4<0, alignment>(dst, src, srcpitch);
|
||||||
src += srcpitch * 4;
|
src += srcpitch * 4;
|
||||||
|
@ -446,7 +446,7 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int i>
|
template <int i>
|
||||||
__forceinline static void ReadColumn32(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch)
|
__forceinline static void ReadColumn32(const u8* RESTRICT src, u8* RESTRICT dst, int dstpitch)
|
||||||
{
|
{
|
||||||
#if _M_SSE >= 0x501
|
#if _M_SSE >= 0x501
|
||||||
|
|
||||||
|
@ -484,7 +484,7 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int i>
|
template <int i>
|
||||||
__forceinline static void ReadColumn16(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch)
|
__forceinline static void ReadColumn16(const u8* RESTRICT src, u8* RESTRICT dst, int dstpitch)
|
||||||
{
|
{
|
||||||
#if _M_SSE >= 0x501
|
#if _M_SSE >= 0x501
|
||||||
|
|
||||||
|
@ -526,10 +526,10 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int i>
|
template <int i>
|
||||||
__forceinline static void ReadColumn8(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch)
|
__forceinline static void ReadColumn8(const u8* RESTRICT src, u8* RESTRICT dst, int dstpitch)
|
||||||
{
|
{
|
||||||
|
|
||||||
//for(int j = 0; j < 64; j++) ((uint8*)src)[j] = (uint8)j;
|
//for(int j = 0; j < 64; j++) ((u8*)src)[j] = (u8)j;
|
||||||
|
|
||||||
#if 0 //_M_SSE >= 0x501
|
#if 0 //_M_SSE >= 0x501
|
||||||
|
|
||||||
|
@ -593,7 +593,7 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int i>
|
template <int i>
|
||||||
__forceinline static void ReadColumn4(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch)
|
__forceinline static void ReadColumn4(const u8* RESTRICT src, u8* RESTRICT dst, int dstpitch)
|
||||||
{
|
{
|
||||||
//printf("ReadColumn4\n");
|
//printf("ReadColumn4\n");
|
||||||
|
|
||||||
|
@ -628,7 +628,7 @@ public:
|
||||||
GSVector4i::store<true>(&dst[dstpitch * 3], v3);
|
GSVector4i::store<true>(&dst[dstpitch * 3], v3);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ReadColumn32(int y, const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch)
|
static void ReadColumn32(int y, const u8* RESTRICT src, u8* RESTRICT dst, int dstpitch)
|
||||||
{
|
{
|
||||||
switch ((y >> 1) & 3)
|
switch ((y >> 1) & 3)
|
||||||
{
|
{
|
||||||
|
@ -640,7 +640,7 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ReadColumn16(int y, const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch)
|
static void ReadColumn16(int y, const u8* RESTRICT src, u8* RESTRICT dst, int dstpitch)
|
||||||
{
|
{
|
||||||
switch ((y >> 1) & 3)
|
switch ((y >> 1) & 3)
|
||||||
{
|
{
|
||||||
|
@ -652,7 +652,7 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ReadColumn8(int y, const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch)
|
static void ReadColumn8(int y, const u8* RESTRICT src, u8* RESTRICT dst, int dstpitch)
|
||||||
{
|
{
|
||||||
switch ((y >> 2) & 3)
|
switch ((y >> 2) & 3)
|
||||||
{
|
{
|
||||||
|
@ -664,7 +664,7 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ReadColumn4(int y, const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch)
|
static void ReadColumn4(int y, const u8* RESTRICT src, u8* RESTRICT dst, int dstpitch)
|
||||||
{
|
{
|
||||||
switch ((y >> 2) & 3)
|
switch ((y >> 2) & 3)
|
||||||
{
|
{
|
||||||
|
@ -676,7 +676,7 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ReadBlock32(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch)
|
static void ReadBlock32(const u8* RESTRICT src, u8* RESTRICT dst, int dstpitch)
|
||||||
{
|
{
|
||||||
ReadColumn32<0>(src, dst, dstpitch);
|
ReadColumn32<0>(src, dst, dstpitch);
|
||||||
dst += dstpitch * 2;
|
dst += dstpitch * 2;
|
||||||
|
@ -687,7 +687,7 @@ public:
|
||||||
ReadColumn32<3>(src, dst, dstpitch);
|
ReadColumn32<3>(src, dst, dstpitch);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ReadBlock16(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch)
|
static void ReadBlock16(const u8* RESTRICT src, u8* RESTRICT dst, int dstpitch)
|
||||||
{
|
{
|
||||||
ReadColumn16<0>(src, dst, dstpitch);
|
ReadColumn16<0>(src, dst, dstpitch);
|
||||||
dst += dstpitch * 2;
|
dst += dstpitch * 2;
|
||||||
|
@ -698,7 +698,7 @@ public:
|
||||||
ReadColumn16<3>(src, dst, dstpitch);
|
ReadColumn16<3>(src, dst, dstpitch);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ReadBlock8(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch)
|
static void ReadBlock8(const u8* RESTRICT src, u8* RESTRICT dst, int dstpitch)
|
||||||
{
|
{
|
||||||
ReadColumn8<0>(src, dst, dstpitch);
|
ReadColumn8<0>(src, dst, dstpitch);
|
||||||
dst += dstpitch * 4;
|
dst += dstpitch * 4;
|
||||||
|
@ -709,7 +709,7 @@ public:
|
||||||
ReadColumn8<3>(src, dst, dstpitch);
|
ReadColumn8<3>(src, dst, dstpitch);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ReadBlock4(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch)
|
static void ReadBlock4(const u8* RESTRICT src, u8* RESTRICT dst, int dstpitch)
|
||||||
{
|
{
|
||||||
ReadColumn4<0>(src, dst, dstpitch);
|
ReadColumn4<0>(src, dst, dstpitch);
|
||||||
dst += dstpitch * 4;
|
dst += dstpitch * 4;
|
||||||
|
@ -720,7 +720,7 @@ public:
|
||||||
ReadColumn4<3>(src, dst, dstpitch);
|
ReadColumn4<3>(src, dst, dstpitch);
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline static void ReadBlock4P(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch)
|
__forceinline static void ReadBlock4P(const u8* RESTRICT src, u8* RESTRICT dst, int dstpitch)
|
||||||
{
|
{
|
||||||
//printf("ReadBlock4P\n");
|
//printf("ReadBlock4P\n");
|
||||||
|
|
||||||
|
@ -784,12 +784,12 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline static void ReadBlock8HP(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch)
|
__forceinline static void ReadBlock8HP(const u8* RESTRICT src, u8* RESTRICT dst, int dstpitch)
|
||||||
{
|
{
|
||||||
#if _M_SSE >= 0x501
|
#if _M_SSE >= 0x501
|
||||||
|
|
||||||
uint8* RESTRICT d0 = &dst[dstpitch * 0];
|
u8* RESTRICT d0 = &dst[dstpitch * 0];
|
||||||
uint8* RESTRICT d1 = &dst[dstpitch * 4];
|
u8* RESTRICT d1 = &dst[dstpitch * 4];
|
||||||
|
|
||||||
const GSVector8i* s = (const GSVector8i*)src;
|
const GSVector8i* s = (const GSVector8i*)src;
|
||||||
|
|
||||||
|
@ -855,12 +855,12 @@ public:
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline static void ReadBlock4HLP(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch)
|
__forceinline static void ReadBlock4HLP(const u8* RESTRICT src, u8* RESTRICT dst, int dstpitch)
|
||||||
{
|
{
|
||||||
#if _M_SSE >= 0x501
|
#if _M_SSE >= 0x501
|
||||||
|
|
||||||
uint8* RESTRICT d0 = &dst[dstpitch * 0];
|
u8* RESTRICT d0 = &dst[dstpitch * 0];
|
||||||
uint8* RESTRICT d1 = &dst[dstpitch * 4];
|
u8* RESTRICT d1 = &dst[dstpitch * 4];
|
||||||
|
|
||||||
const GSVector8i* s = (const GSVector8i*)src;
|
const GSVector8i* s = (const GSVector8i*)src;
|
||||||
|
|
||||||
|
@ -929,12 +929,12 @@ public:
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline static void ReadBlock4HHP(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch)
|
__forceinline static void ReadBlock4HHP(const u8* RESTRICT src, u8* RESTRICT dst, int dstpitch)
|
||||||
{
|
{
|
||||||
#if _M_SSE >= 0x501
|
#if _M_SSE >= 0x501
|
||||||
|
|
||||||
uint8* RESTRICT d0 = &dst[dstpitch * 0];
|
u8* RESTRICT d0 = &dst[dstpitch * 0];
|
||||||
uint8* RESTRICT d1 = &dst[dstpitch * 4];
|
u8* RESTRICT d1 = &dst[dstpitch * 4];
|
||||||
|
|
||||||
const GSVector8i* s = (const GSVector8i*)src;
|
const GSVector8i* s = (const GSVector8i*)src;
|
||||||
|
|
||||||
|
@ -1013,7 +1013,7 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
template <bool AEM>
|
template <bool AEM>
|
||||||
static void ExpandBlock24(const uint32* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const GIFRegTEXA& TEXA)
|
static void ExpandBlock24(const u32* RESTRICT src, u8* RESTRICT dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||||
{
|
{
|
||||||
#if _M_SSE >= 0x501
|
#if _M_SSE >= 0x501
|
||||||
|
|
||||||
|
@ -1061,7 +1061,7 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
template <bool AEM>
|
template <bool AEM>
|
||||||
static void ExpandBlock16(const uint16* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const GIFRegTEXA& TEXA) // do not inline, uses too many xmm regs
|
static void ExpandBlock16(const u16* RESTRICT src, u8* RESTRICT dst, int dstpitch, const GIFRegTEXA& TEXA) // do not inline, uses too many xmm regs
|
||||||
{
|
{
|
||||||
#if _M_SSE >= 0x501
|
#if _M_SSE >= 0x501
|
||||||
|
|
||||||
|
@ -1101,7 +1101,7 @@ public:
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline static void ExpandBlock8_32(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const uint32* RESTRICT pal)
|
__forceinline static void ExpandBlock8_32(const u8* RESTRICT src, u8* RESTRICT dst, int dstpitch, const u32* RESTRICT pal)
|
||||||
{
|
{
|
||||||
for (int j = 0; j < 16; j++, dst += dstpitch)
|
for (int j = 0; j < 16; j++, dst += dstpitch)
|
||||||
{
|
{
|
||||||
|
@ -1109,7 +1109,7 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline static void ExpandBlock8_16(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const uint32* RESTRICT pal)
|
__forceinline static void ExpandBlock8_16(const u8* RESTRICT src, u8* RESTRICT dst, int dstpitch, const u32* RESTRICT pal)
|
||||||
{
|
{
|
||||||
for (int j = 0; j < 16; j++, dst += dstpitch)
|
for (int j = 0; j < 16; j++, dst += dstpitch)
|
||||||
{
|
{
|
||||||
|
@ -1117,7 +1117,7 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline static void ExpandBlock4_32(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const uint64* RESTRICT pal)
|
__forceinline static void ExpandBlock4_32(const u8* RESTRICT src, u8* RESTRICT dst, int dstpitch, const u64* RESTRICT pal)
|
||||||
{
|
{
|
||||||
for (int j = 0; j < 16; j++, dst += dstpitch)
|
for (int j = 0; j < 16; j++, dst += dstpitch)
|
||||||
{
|
{
|
||||||
|
@ -1125,7 +1125,7 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline static void ExpandBlock4_16(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const uint64* RESTRICT pal)
|
__forceinline static void ExpandBlock4_16(const u8* RESTRICT src, u8* RESTRICT dst, int dstpitch, const u64* RESTRICT pal)
|
||||||
{
|
{
|
||||||
for (int j = 0; j < 16; j++, dst += dstpitch)
|
for (int j = 0; j < 16; j++, dst += dstpitch)
|
||||||
{
|
{
|
||||||
|
@ -1133,7 +1133,7 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline static void ExpandBlock8H_32(uint32* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const uint32* RESTRICT pal)
|
__forceinline static void ExpandBlock8H_32(u32* RESTRICT src, u8* RESTRICT dst, int dstpitch, const u32* RESTRICT pal)
|
||||||
{
|
{
|
||||||
for (int j = 0; j < 8; j++, dst += dstpitch)
|
for (int j = 0; j < 8; j++, dst += dstpitch)
|
||||||
{
|
{
|
||||||
|
@ -1144,7 +1144,7 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline static void ExpandBlock8H_16(uint32* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const uint32* RESTRICT pal)
|
__forceinline static void ExpandBlock8H_16(u32* RESTRICT src, u8* RESTRICT dst, int dstpitch, const u32* RESTRICT pal)
|
||||||
{
|
{
|
||||||
for (int j = 0; j < 8; j++, dst += dstpitch)
|
for (int j = 0; j < 8; j++, dst += dstpitch)
|
||||||
{
|
{
|
||||||
|
@ -1158,7 +1158,7 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline static void ExpandBlock4HL_32(uint32* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const uint32* RESTRICT pal)
|
__forceinline static void ExpandBlock4HL_32(u32* RESTRICT src, u8* RESTRICT dst, int dstpitch, const u32* RESTRICT pal)
|
||||||
{
|
{
|
||||||
for (int j = 0; j < 8; j++, dst += dstpitch)
|
for (int j = 0; j < 8; j++, dst += dstpitch)
|
||||||
{
|
{
|
||||||
|
@ -1169,7 +1169,7 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline static void ExpandBlock4HL_16(uint32* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const uint32* RESTRICT pal)
|
__forceinline static void ExpandBlock4HL_16(u32* RESTRICT src, u8* RESTRICT dst, int dstpitch, const u32* RESTRICT pal)
|
||||||
{
|
{
|
||||||
for (int j = 0; j < 8; j++, dst += dstpitch)
|
for (int j = 0; j < 8; j++, dst += dstpitch)
|
||||||
{
|
{
|
||||||
|
@ -1182,7 +1182,7 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline static void ExpandBlock4HH_32(uint32* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const uint32* RESTRICT pal)
|
__forceinline static void ExpandBlock4HH_32(u32* RESTRICT src, u8* RESTRICT dst, int dstpitch, const u32* RESTRICT pal)
|
||||||
{
|
{
|
||||||
for (int j = 0; j < 8; j++, dst += dstpitch)
|
for (int j = 0; j < 8; j++, dst += dstpitch)
|
||||||
{
|
{
|
||||||
|
@ -1193,7 +1193,7 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline static void ExpandBlock4HH_16(uint32* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const uint32* RESTRICT pal)
|
__forceinline static void ExpandBlock4HH_16(u32* RESTRICT src, u8* RESTRICT dst, int dstpitch, const u32* RESTRICT pal)
|
||||||
{
|
{
|
||||||
for (int j = 0; j < 8; j++, dst += dstpitch)
|
for (int j = 0; j < 8; j++, dst += dstpitch)
|
||||||
{
|
{
|
||||||
|
@ -1206,14 +1206,14 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline static void UnpackAndWriteBlock24(const uint8* RESTRICT src, int srcpitch, uint8* RESTRICT dst)
|
__forceinline static void UnpackAndWriteBlock24(const u8* RESTRICT src, int srcpitch, u8* RESTRICT dst)
|
||||||
{
|
{
|
||||||
#if _M_SSE >= 0x501
|
#if _M_SSE >= 0x501
|
||||||
|
|
||||||
const uint8* RESTRICT s0 = &src[srcpitch * 0];
|
const u8* RESTRICT s0 = &src[srcpitch * 0];
|
||||||
const uint8* RESTRICT s1 = &src[srcpitch * 1];
|
const u8* RESTRICT s1 = &src[srcpitch * 1];
|
||||||
const uint8* RESTRICT s2 = &src[srcpitch * 2];
|
const u8* RESTRICT s2 = &src[srcpitch * 2];
|
||||||
const uint8* RESTRICT s3 = &src[srcpitch * 3];
|
const u8* RESTRICT s3 = &src[srcpitch * 3];
|
||||||
|
|
||||||
GSVector8i v0, v1, v2, v3, v4, v5, v6;
|
GSVector8i v0, v1, v2, v3, v4, v5, v6;
|
||||||
GSVector8i mask = GSVector8i::x00ffffff();
|
GSVector8i mask = GSVector8i::x00ffffff();
|
||||||
|
@ -1293,7 +1293,7 @@ public:
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline static void UnpackAndWriteBlock8H(const uint8* RESTRICT src, int srcpitch, uint8* RESTRICT dst)
|
__forceinline static void UnpackAndWriteBlock8H(const u8* RESTRICT src, int srcpitch, u8* RESTRICT dst)
|
||||||
{
|
{
|
||||||
|
|
||||||
#if _M_SSE >= 0x501
|
#if _M_SSE >= 0x501
|
||||||
|
@ -1367,13 +1367,13 @@ public:
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline static void UnpackAndWriteBlock4HL(const uint8* RESTRICT src, int srcpitch, uint8* RESTRICT dst)
|
__forceinline static void UnpackAndWriteBlock4HL(const u8* RESTRICT src, int srcpitch, u8* RESTRICT dst)
|
||||||
{
|
{
|
||||||
//printf("4HL\n");
|
//printf("4HL\n");
|
||||||
|
|
||||||
if (0)
|
if (0)
|
||||||
{
|
{
|
||||||
uint8* s = (uint8*)src;
|
u8* s = (u8*)src;
|
||||||
for (int j = 0; j < 8; j++, s += srcpitch)
|
for (int j = 0; j < 8; j++, s += srcpitch)
|
||||||
for (int i = 0; i < 4; i++)
|
for (int i = 0; i < 4; i++)
|
||||||
s[i] = (columnTable32[j][i * 2] & 0x0f) | (columnTable32[j][i * 2 + 1] << 4);
|
s[i] = (columnTable32[j][i * 2] & 0x0f) | (columnTable32[j][i * 2 + 1] << 4);
|
||||||
|
@ -1385,7 +1385,7 @@ public:
|
||||||
GSVector8i v0, v1, v2, v3;
|
GSVector8i v0, v1, v2, v3;
|
||||||
GSVector8i mask(0x0f000000);
|
GSVector8i mask(0x0f000000);
|
||||||
|
|
||||||
v6 = GSVector4i(*(uint32*)&src[srcpitch * 0], *(uint32*)&src[srcpitch * 2], *(uint32*)&src[srcpitch * 1], *(uint32*)&src[srcpitch * 3]);
|
v6 = GSVector4i(*(u32*)&src[srcpitch * 0], *(u32*)&src[srcpitch * 2], *(u32*)&src[srcpitch * 1], *(u32*)&src[srcpitch * 3]);
|
||||||
|
|
||||||
v4 = v6.upl8(v6 >> 4);
|
v4 = v6.upl8(v6 >> 4);
|
||||||
v5 = v6.uph8(v6 >> 4);
|
v5 = v6.uph8(v6 >> 4);
|
||||||
|
@ -1405,7 +1405,7 @@ public:
|
||||||
|
|
||||||
src += srcpitch * 4;
|
src += srcpitch * 4;
|
||||||
|
|
||||||
v6 = GSVector4i(*(uint32*)&src[srcpitch * 0], *(uint32*)&src[srcpitch * 2], *(uint32*)&src[srcpitch * 1], *(uint32*)&src[srcpitch * 3]);
|
v6 = GSVector4i(*(u32*)&src[srcpitch * 0], *(u32*)&src[srcpitch * 2], *(u32*)&src[srcpitch * 1], *(u32*)&src[srcpitch * 3]);
|
||||||
|
|
||||||
v4 = v6.upl8(v6 >> 4);
|
v4 = v6.upl8(v6 >> 4);
|
||||||
v5 = v6.uph8(v6 >> 4);
|
v5 = v6.uph8(v6 >> 4);
|
||||||
|
@ -1434,7 +1434,7 @@ public:
|
||||||
|
|
||||||
for (int i = 0; i < 2; i++, src += srcpitch * 4)
|
for (int i = 0; i < 2; i++, src += srcpitch * 4)
|
||||||
{
|
{
|
||||||
GSVector4i v(*(uint32*)&src[srcpitch * 0], *(uint32*)&src[srcpitch * 1], *(uint32*)&src[srcpitch * 2], *(uint32*)&src[srcpitch * 3]);
|
GSVector4i v(*(u32*)&src[srcpitch * 0], *(u32*)&src[srcpitch * 1], *(u32*)&src[srcpitch * 2], *(u32*)&src[srcpitch * 3]);
|
||||||
|
|
||||||
v4 = v.upl8(v >> 4);
|
v4 = v.upl8(v >> 4);
|
||||||
v5 = v.uph8(v >> 4);
|
v5 = v.uph8(v >> 4);
|
||||||
|
@ -1463,7 +1463,7 @@ public:
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline static void UnpackAndWriteBlock4HH(const uint8* RESTRICT src, int srcpitch, uint8* RESTRICT dst)
|
__forceinline static void UnpackAndWriteBlock4HH(const u8* RESTRICT src, int srcpitch, u8* RESTRICT dst)
|
||||||
{
|
{
|
||||||
|
|
||||||
#if _M_SSE >= 0x501
|
#if _M_SSE >= 0x501
|
||||||
|
@ -1472,7 +1472,7 @@ public:
|
||||||
GSVector8i v0, v1, v2, v3;
|
GSVector8i v0, v1, v2, v3;
|
||||||
GSVector8i mask = GSVector8i::xf0000000();
|
GSVector8i mask = GSVector8i::xf0000000();
|
||||||
|
|
||||||
v6 = GSVector4i(*(uint32*)&src[srcpitch * 0], *(uint32*)&src[srcpitch * 2], *(uint32*)&src[srcpitch * 1], *(uint32*)&src[srcpitch * 3]);
|
v6 = GSVector4i(*(u32*)&src[srcpitch * 0], *(u32*)&src[srcpitch * 2], *(u32*)&src[srcpitch * 1], *(u32*)&src[srcpitch * 3]);
|
||||||
|
|
||||||
v4 = (v6 << 4).upl8(v6);
|
v4 = (v6 << 4).upl8(v6);
|
||||||
v5 = (v6 << 4).uph8(v6);
|
v5 = (v6 << 4).uph8(v6);
|
||||||
|
@ -1492,7 +1492,7 @@ public:
|
||||||
|
|
||||||
src += srcpitch * 4;
|
src += srcpitch * 4;
|
||||||
|
|
||||||
v6 = GSVector4i(*(uint32*)&src[srcpitch * 0], *(uint32*)&src[srcpitch * 2], *(uint32*)&src[srcpitch * 1], *(uint32*)&src[srcpitch * 3]);
|
v6 = GSVector4i(*(u32*)&src[srcpitch * 0], *(u32*)&src[srcpitch * 2], *(u32*)&src[srcpitch * 1], *(u32*)&src[srcpitch * 3]);
|
||||||
|
|
||||||
v4 = (v6 << 4).upl8(v6);
|
v4 = (v6 << 4).upl8(v6);
|
||||||
v5 = (v6 << 4).uph8(v6);
|
v5 = (v6 << 4).uph8(v6);
|
||||||
|
@ -1521,7 +1521,7 @@ public:
|
||||||
|
|
||||||
for (int i = 0; i < 2; i++, src += srcpitch * 4)
|
for (int i = 0; i < 2; i++, src += srcpitch * 4)
|
||||||
{
|
{
|
||||||
GSVector4i v(*(uint32*)&src[srcpitch * 0], *(uint32*)&src[srcpitch * 1], *(uint32*)&src[srcpitch * 2], *(uint32*)&src[srcpitch * 3]);
|
GSVector4i v(*(u32*)&src[srcpitch * 0], *(u32*)&src[srcpitch * 1], *(u32*)&src[srcpitch * 2], *(u32*)&src[srcpitch * 3]);
|
||||||
|
|
||||||
v4 = (v << 4).upl8(v);
|
v4 = (v << 4).upl8(v);
|
||||||
v5 = (v << 4).uph8(v);
|
v5 = (v << 4).uph8(v);
|
||||||
|
@ -1551,7 +1551,7 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
template <bool AEM>
|
template <bool AEM>
|
||||||
__forceinline static void ReadAndExpandBlock24(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const GIFRegTEXA& TEXA)
|
__forceinline static void ReadAndExpandBlock24(const u8* RESTRICT src, u8* RESTRICT dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||||
{
|
{
|
||||||
#if _M_SSE >= 0x501
|
#if _M_SSE >= 0x501
|
||||||
|
|
||||||
|
@ -1628,7 +1628,7 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
template <bool AEM>
|
template <bool AEM>
|
||||||
__forceinline static void ReadAndExpandBlock16(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const GIFRegTEXA& TEXA)
|
__forceinline static void ReadAndExpandBlock16(const u8* RESTRICT src, u8* RESTRICT dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||||
{
|
{
|
||||||
#if _M_SSE >= 0x501
|
#if _M_SSE >= 0x501
|
||||||
|
|
||||||
|
@ -1656,16 +1656,16 @@ public:
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
alignas(32) uint16 block[16 * 8];
|
alignas(32) u16 block[16 * 8];
|
||||||
|
|
||||||
ReadBlock16(src, (uint8*)block, sizeof(block) / 8);
|
ReadBlock16(src, (u8*)block, sizeof(block) / 8);
|
||||||
|
|
||||||
ExpandBlock16<AEM>(block, dst, dstpitch, TEXA);
|
ExpandBlock16<AEM>(block, dst, dstpitch, TEXA);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline static void ReadAndExpandBlock8_32(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const uint32* RESTRICT pal)
|
__forceinline static void ReadAndExpandBlock8_32(const u8* RESTRICT src, u8* RESTRICT dst, int dstpitch, const u32* RESTRICT pal)
|
||||||
{
|
{
|
||||||
//printf("ReadAndExpandBlock8_32\n");
|
//printf("ReadAndExpandBlock8_32\n");
|
||||||
|
|
||||||
|
@ -1714,7 +1714,7 @@ public:
|
||||||
|
|
||||||
// TODO: ReadAndExpandBlock8_16
|
// TODO: ReadAndExpandBlock8_16
|
||||||
|
|
||||||
__forceinline static void ReadAndExpandBlock4_32(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const uint64* RESTRICT pal)
|
__forceinline static void ReadAndExpandBlock4_32(const u8* RESTRICT src, u8* RESTRICT dst, int dstpitch, const u64* RESTRICT pal)
|
||||||
{
|
{
|
||||||
//printf("ReadAndExpandBlock4_32\n");
|
//printf("ReadAndExpandBlock4_32\n");
|
||||||
|
|
||||||
|
@ -1779,7 +1779,7 @@ public:
|
||||||
|
|
||||||
// TODO: ReadAndExpandBlock4_16
|
// TODO: ReadAndExpandBlock4_16
|
||||||
|
|
||||||
__forceinline static void ReadAndExpandBlock8H_32(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const uint32* RESTRICT pal)
|
__forceinline static void ReadAndExpandBlock8H_32(const u8* RESTRICT src, u8* RESTRICT dst, int dstpitch, const u32* RESTRICT pal)
|
||||||
{
|
{
|
||||||
//printf("ReadAndExpandBlock8H_32\n");
|
//printf("ReadAndExpandBlock8H_32\n");
|
||||||
|
|
||||||
|
@ -1810,7 +1810,7 @@ public:
|
||||||
|
|
||||||
// TODO: ReadAndExpandBlock8H_16
|
// TODO: ReadAndExpandBlock8H_16
|
||||||
|
|
||||||
__forceinline static void ReadAndExpandBlock4HL_32(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const uint32* RESTRICT pal)
|
__forceinline static void ReadAndExpandBlock4HL_32(const u8* RESTRICT src, u8* RESTRICT dst, int dstpitch, const u32* RESTRICT pal)
|
||||||
{
|
{
|
||||||
//printf("ReadAndExpandBlock4HL_32\n");
|
//printf("ReadAndExpandBlock4HL_32\n");
|
||||||
const GSVector4i* s = (const GSVector4i*)src;
|
const GSVector4i* s = (const GSVector4i*)src;
|
||||||
|
@ -1840,7 +1840,7 @@ public:
|
||||||
|
|
||||||
// TODO: ReadAndExpandBlock4HL_16
|
// TODO: ReadAndExpandBlock4HL_16
|
||||||
|
|
||||||
__forceinline static void ReadAndExpandBlock4HH_32(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const uint32* RESTRICT pal)
|
__forceinline static void ReadAndExpandBlock4HH_32(const u8* RESTRICT src, u8* RESTRICT dst, int dstpitch, const u32* RESTRICT pal)
|
||||||
{
|
{
|
||||||
//printf("ReadAndExpandBlock4HH_32\n");
|
//printf("ReadAndExpandBlock4HH_32\n");
|
||||||
|
|
||||||
|
|
|
@ -126,7 +126,7 @@ GSSource : public CBaseFilter, private CCritSec, public IGSSource
|
||||||
vih.bmiHeader.biPlanes = 1;
|
vih.bmiHeader.biPlanes = 1;
|
||||||
vih.bmiHeader.biBitCount = 16;
|
vih.bmiHeader.biBitCount = 16;
|
||||||
vih.bmiHeader.biSizeImage = m_size.x * m_size.y * 2;
|
vih.bmiHeader.biSizeImage = m_size.x * m_size.y * 2;
|
||||||
mt.SetFormat((uint8*)&vih, sizeof(vih));
|
mt.SetFormat((u8*)&vih, sizeof(vih));
|
||||||
|
|
||||||
m_mts.push_back(mt);
|
m_mts.push_back(mt);
|
||||||
|
|
||||||
|
@ -139,7 +139,7 @@ GSSource : public CBaseFilter, private CCritSec, public IGSSource
|
||||||
vih.bmiHeader.biPlanes = 1;
|
vih.bmiHeader.biPlanes = 1;
|
||||||
vih.bmiHeader.biBitCount = 32;
|
vih.bmiHeader.biBitCount = 32;
|
||||||
vih.bmiHeader.biSizeImage = m_size.x * m_size.y * 4;
|
vih.bmiHeader.biSizeImage = m_size.x * m_size.y * 4;
|
||||||
mt.SetFormat((uint8*)&vih, sizeof(vih));
|
mt.SetFormat((u8*)&vih, sizeof(vih));
|
||||||
|
|
||||||
if (colorspace == 1)
|
if (colorspace == 1)
|
||||||
m_mts.insert(m_mts.begin(), mt);
|
m_mts.insert(m_mts.begin(), mt);
|
||||||
|
@ -272,8 +272,8 @@ public:
|
||||||
|
|
||||||
const CMediaType& mt = m_output->CurrentMediaType();
|
const CMediaType& mt = m_output->CurrentMediaType();
|
||||||
|
|
||||||
uint8* src = (uint8*)bits;
|
u8* src = (u8*)bits;
|
||||||
uint8* dst = NULL;
|
u8* dst = NULL;
|
||||||
|
|
||||||
sample->GetPointer(&dst);
|
sample->GetPointer(&dst);
|
||||||
|
|
||||||
|
@ -300,8 +300,8 @@ public:
|
||||||
|
|
||||||
for (int j = 0; j < h; j++, dst += dstpitch, src += srcpitch)
|
for (int j = 0; j < h; j++, dst += dstpitch, src += srcpitch)
|
||||||
{
|
{
|
||||||
uint32* s = (uint32*)src;
|
u32* s = (u32*)src;
|
||||||
uint16* d = (uint16*)dst;
|
u16* d = (u16*)dst;
|
||||||
|
|
||||||
for (int i = 0; i < w; i += 2)
|
for (int i = 0; i < w; i += 2)
|
||||||
{
|
{
|
||||||
|
@ -314,7 +314,7 @@ public:
|
||||||
|
|
||||||
GSVector4 c = lo.hadd(hi) + offset;
|
GSVector4 c = lo.hadd(hi) + offset;
|
||||||
|
|
||||||
*((uint32*)&d[i]) = GSVector4i(c).rgba32();
|
*((u32*)&d[i]) = GSVector4i(c).rgba32();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -568,8 +568,8 @@ bool GSCapture::DeliverFrame(const void* bits, int pitch, bool rgba)
|
||||||
#elif defined(__unix__)
|
#elif defined(__unix__)
|
||||||
|
|
||||||
std::string out_file = m_out_dir + format("/frame.%010d.png", m_frame);
|
std::string out_file = m_out_dir + format("/frame.%010d.png", m_frame);
|
||||||
//GSPng::Save(GSPng::RGB_PNG, out_file, (uint8*)bits, m_size.x, m_size.y, pitch, m_compression_level);
|
//GSPng::Save(GSPng::RGB_PNG, out_file, (u8*)bits, m_size.x, m_size.y, pitch, m_compression_level);
|
||||||
m_workers[m_frame % m_threads]->Push(std::make_shared<GSPng::Transaction>(GSPng::RGB_PNG, out_file, static_cast<const uint8*>(bits), m_size.x, m_size.y, pitch, m_compression_level));
|
m_workers[m_frame % m_threads]->Push(std::make_shared<GSPng::Transaction>(GSPng::RGB_PNG, out_file, static_cast<const u8*>(bits), m_size.x, m_size.y, pitch, m_compression_level));
|
||||||
|
|
||||||
m_frame++;
|
m_frame++;
|
||||||
|
|
||||||
|
|
|
@ -28,7 +28,7 @@ class GSCapture
|
||||||
std::recursive_mutex m_lock;
|
std::recursive_mutex m_lock;
|
||||||
bool m_capturing;
|
bool m_capturing;
|
||||||
GSVector2i m_size;
|
GSVector2i m_size;
|
||||||
uint64 m_frame;
|
u64 m_frame;
|
||||||
std::string m_out_dir;
|
std::string m_out_dir;
|
||||||
int m_threads;
|
int m_threads;
|
||||||
|
|
||||||
|
|
|
@ -22,11 +22,11 @@
|
||||||
GSClut::GSClut(GSLocalMemory* mem)
|
GSClut::GSClut(GSLocalMemory* mem)
|
||||||
: m_mem(mem)
|
: m_mem(mem)
|
||||||
{
|
{
|
||||||
uint8* p = (uint8*)vmalloc(CLUT_ALLOC_SIZE, false);
|
u8* p = (u8*)vmalloc(CLUT_ALLOC_SIZE, false);
|
||||||
|
|
||||||
m_clut = (uint16*)&p[0]; // 1k + 1k for mirrored area simulating wrapping memory
|
m_clut = (u16*)&p[0]; // 1k + 1k for mirrored area simulating wrapping memory
|
||||||
m_buff32 = (uint32*)&p[2048]; // 1k
|
m_buff32 = (u32*)&p[2048]; // 1k
|
||||||
m_buff64 = (uint64*)&p[4096]; // 2k
|
m_buff64 = (u64*)&p[4096]; // 2k
|
||||||
m_write.dirty = true;
|
m_write.dirty = true;
|
||||||
m_read.dirty = true;
|
m_read.dirty = true;
|
||||||
|
|
||||||
|
@ -107,7 +107,7 @@ void GSClut::Invalidate()
|
||||||
m_write.dirty = true;
|
m_write.dirty = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSClut::Invalidate(uint32 block)
|
void GSClut::Invalidate(u32 block)
|
||||||
{
|
{
|
||||||
if (block == m_write.TEX0.CBP)
|
if (block == m_write.TEX0.CBP)
|
||||||
{
|
{
|
||||||
|
@ -163,34 +163,34 @@ void GSClut::Write(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
|
||||||
void GSClut::WriteCLUT32_I8_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
|
void GSClut::WriteCLUT32_I8_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
|
||||||
{
|
{
|
||||||
ALIGN_STACK(32);
|
ALIGN_STACK(32);
|
||||||
WriteCLUT_T32_I8_CSM1((uint32*)m_mem->BlockPtr32(0, 0, TEX0.CBP, 1), m_clut, (TEX0.CSA & 15));
|
WriteCLUT_T32_I8_CSM1((u32*)m_mem->BlockPtr32(0, 0, TEX0.CBP, 1), m_clut, (TEX0.CSA & 15));
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSClut::WriteCLUT32_I4_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
|
void GSClut::WriteCLUT32_I4_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
|
||||||
{
|
{
|
||||||
ALIGN_STACK(32);
|
ALIGN_STACK(32);
|
||||||
|
|
||||||
WriteCLUT_T32_I4_CSM1((uint32*)m_mem->BlockPtr32(0, 0, TEX0.CBP, 1), m_clut + ((TEX0.CSA & 15) << 4));
|
WriteCLUT_T32_I4_CSM1((u32*)m_mem->BlockPtr32(0, 0, TEX0.CBP, 1), m_clut + ((TEX0.CSA & 15) << 4));
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSClut::WriteCLUT16_I8_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
|
void GSClut::WriteCLUT16_I8_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
|
||||||
{
|
{
|
||||||
WriteCLUT_T16_I8_CSM1((uint16*)m_mem->BlockPtr16(0, 0, TEX0.CBP, 1), m_clut + (TEX0.CSA << 4));
|
WriteCLUT_T16_I8_CSM1((u16*)m_mem->BlockPtr16(0, 0, TEX0.CBP, 1), m_clut + (TEX0.CSA << 4));
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSClut::WriteCLUT16_I4_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
|
void GSClut::WriteCLUT16_I4_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
|
||||||
{
|
{
|
||||||
WriteCLUT_T16_I4_CSM1((uint16*)m_mem->BlockPtr16(0, 0, TEX0.CBP, 1), m_clut + (TEX0.CSA << 4));
|
WriteCLUT_T16_I4_CSM1((u16*)m_mem->BlockPtr16(0, 0, TEX0.CBP, 1), m_clut + (TEX0.CSA << 4));
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSClut::WriteCLUT16S_I8_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
|
void GSClut::WriteCLUT16S_I8_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
|
||||||
{
|
{
|
||||||
WriteCLUT_T16_I8_CSM1((uint16*)m_mem->BlockPtr16S(0, 0, TEX0.CBP, 1), m_clut + (TEX0.CSA << 4));
|
WriteCLUT_T16_I8_CSM1((u16*)m_mem->BlockPtr16S(0, 0, TEX0.CBP, 1), m_clut + (TEX0.CSA << 4));
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSClut::WriteCLUT16S_I4_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
|
void GSClut::WriteCLUT16S_I4_CSM1(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
|
||||||
{
|
{
|
||||||
WriteCLUT_T16_I4_CSM1((uint16*)m_mem->BlockPtr16S(0, 0, TEX0.CBP, 1), m_clut + (TEX0.CSA << 4));
|
WriteCLUT_T16_I4_CSM1((u16*)m_mem->BlockPtr16S(0, 0, TEX0.CBP, 1), m_clut + (TEX0.CSA << 4));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int n>
|
template <int n>
|
||||||
|
@ -199,14 +199,14 @@ void GSClut::WriteCLUT32_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCL
|
||||||
GSOffset off = GSOffset::fromKnownPSM(TEX0.CBP, TEXCLUT.CBW, PSM_PSMCT32);
|
GSOffset off = GSOffset::fromKnownPSM(TEX0.CBP, TEXCLUT.CBW, PSM_PSMCT32);
|
||||||
auto pa = off.paMulti(m_mem->m_vm32, TEXCLUT.COU << 4, TEXCLUT.COV);
|
auto pa = off.paMulti(m_mem->m_vm32, TEXCLUT.COU << 4, TEXCLUT.COV);
|
||||||
|
|
||||||
uint16* RESTRICT clut = m_clut + ((TEX0.CSA & 15) << 4);
|
u16* RESTRICT clut = m_clut + ((TEX0.CSA & 15) << 4);
|
||||||
|
|
||||||
for (int i = 0; i < n; i++)
|
for (int i = 0; i < n; i++)
|
||||||
{
|
{
|
||||||
uint32 c = *pa.value(i);
|
u32 c = *pa.value(i);
|
||||||
|
|
||||||
clut[i] = (uint16)(c & 0xffff);
|
clut[i] = (u16)(c & 0xffff);
|
||||||
clut[i + 256] = (uint16)(c >> 16);
|
clut[i + 256] = (u16)(c >> 16);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -216,7 +216,7 @@ void GSClut::WriteCLUT16_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCL
|
||||||
GSOffset off = GSOffset::fromKnownPSM(TEX0.CBP, TEXCLUT.CBW, PSM_PSMCT16);
|
GSOffset off = GSOffset::fromKnownPSM(TEX0.CBP, TEXCLUT.CBW, PSM_PSMCT16);
|
||||||
auto pa = off.paMulti(m_mem->m_vm16, TEXCLUT.COU << 4, TEXCLUT.COV);
|
auto pa = off.paMulti(m_mem->m_vm16, TEXCLUT.COU << 4, TEXCLUT.COV);
|
||||||
|
|
||||||
uint16* RESTRICT clut = m_clut + (TEX0.CSA << 4);
|
u16* RESTRICT clut = m_clut + (TEX0.CSA << 4);
|
||||||
|
|
||||||
for (int i = 0; i < n; i++)
|
for (int i = 0; i < n; i++)
|
||||||
{
|
{
|
||||||
|
@ -230,7 +230,7 @@ void GSClut::WriteCLUT16S_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXC
|
||||||
GSOffset off = GSOffset::fromKnownPSM(TEX0.CBP, TEXCLUT.CBW, PSM_PSMCT16S);
|
GSOffset off = GSOffset::fromKnownPSM(TEX0.CBP, TEXCLUT.CBW, PSM_PSMCT16S);
|
||||||
auto pa = off.paMulti(m_mem->m_vm16, TEXCLUT.COU << 4, TEXCLUT.COV);
|
auto pa = off.paMulti(m_mem->m_vm16, TEXCLUT.COU << 4, TEXCLUT.COV);
|
||||||
|
|
||||||
uint16* RESTRICT clut = m_clut + (TEX0.CSA << 4);
|
u16* RESTRICT clut = m_clut + (TEX0.CSA << 4);
|
||||||
|
|
||||||
for (int i = 0; i < n; i++)
|
for (int i = 0; i < n; i++)
|
||||||
{
|
{
|
||||||
|
@ -252,7 +252,7 @@ void GSClut::Read(const GIFRegTEX0& TEX0)
|
||||||
m_read.TEX0 = TEX0;
|
m_read.TEX0 = TEX0;
|
||||||
m_read.dirty = false;
|
m_read.dirty = false;
|
||||||
|
|
||||||
uint16* clut = m_clut;
|
u16* clut = m_clut;
|
||||||
|
|
||||||
if(TEX0.CPSM == PSM_PSMCT32 || TEX0.CPSM == PSM_PSMCT24)
|
if(TEX0.CPSM == PSM_PSMCT32 || TEX0.CPSM == PSM_PSMCT24)
|
||||||
{
|
{
|
||||||
|
@ -301,7 +301,7 @@ void GSClut::Read32(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
|
||||||
m_read.dirty = false;
|
m_read.dirty = false;
|
||||||
m_read.adirty = true;
|
m_read.adirty = true;
|
||||||
|
|
||||||
uint16* clut = m_clut;
|
u16* clut = m_clut;
|
||||||
|
|
||||||
if (TEX0.CPSM == PSM_PSMCT32 || TEX0.CPSM == PSM_PSMCT24)
|
if (TEX0.CPSM == PSM_PSMCT32 || TEX0.CPSM == PSM_PSMCT24)
|
||||||
{
|
{
|
||||||
|
@ -317,7 +317,7 @@ void GSClut::Read32(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
|
||||||
clut += (TEX0.CSA & 15) << 4;
|
clut += (TEX0.CSA & 15) << 4;
|
||||||
// TODO: merge these functions
|
// TODO: merge these functions
|
||||||
ReadCLUT_T32_I4(clut, m_buff32);
|
ReadCLUT_T32_I4(clut, m_buff32);
|
||||||
ExpandCLUT64_T32_I8(m_buff32, (uint64*)m_buff64); // sw renderer does not need m_buff64 anymore
|
ExpandCLUT64_T32_I8(m_buff32, (u64*)m_buff64); // sw renderer does not need m_buff64 anymore
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -336,7 +336,7 @@ void GSClut::Read32(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
|
||||||
clut += TEX0.CSA << 4;
|
clut += TEX0.CSA << 4;
|
||||||
// TODO: merge these functions
|
// TODO: merge these functions
|
||||||
Expand16(clut, m_buff32, 16, TEXA);
|
Expand16(clut, m_buff32, 16, TEXA);
|
||||||
ExpandCLUT64_T32_I8(m_buff32, (uint64*)m_buff64); // sw renderer does not need m_buff64 anymore
|
ExpandCLUT64_T32_I8(m_buff32, (u64*)m_buff64); // sw renderer does not need m_buff64 anymore
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -412,7 +412,7 @@ void GSClut::GetAlphaMinMax32(int& amin_out, int& amax_out)
|
||||||
|
|
||||||
//
|
//
|
||||||
|
|
||||||
void GSClut::WriteCLUT_T32_I8_CSM1(const uint32* RESTRICT src, uint16* RESTRICT clut, uint16 offset)
|
void GSClut::WriteCLUT_T32_I8_CSM1(const u32* RESTRICT src, u16* RESTRICT clut, u16 offset)
|
||||||
{
|
{
|
||||||
// This is required when CSA is offset from the base of the CLUT so we point to the right data
|
// This is required when CSA is offset from the base of the CLUT so we point to the right data
|
||||||
for (int i = offset; i < 16; i ++)
|
for (int i = offset; i < 16; i ++)
|
||||||
|
@ -425,7 +425,7 @@ void GSClut::WriteCLUT_T32_I8_CSM1(const uint32* RESTRICT src, uint16* RESTRICT
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline void GSClut::WriteCLUT_T32_I4_CSM1(const uint32* RESTRICT src, uint16* RESTRICT clut)
|
__forceinline void GSClut::WriteCLUT_T32_I4_CSM1(const u32* RESTRICT src, u16* RESTRICT clut)
|
||||||
{
|
{
|
||||||
// 1 block
|
// 1 block
|
||||||
|
|
||||||
|
@ -466,7 +466,7 @@ __forceinline void GSClut::WriteCLUT_T32_I4_CSM1(const uint32* RESTRICT src, uin
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSClut::WriteCLUT_T16_I8_CSM1(const uint16* RESTRICT src, uint16* RESTRICT clut)
|
void GSClut::WriteCLUT_T16_I8_CSM1(const u16* RESTRICT src, u16* RESTRICT clut)
|
||||||
{
|
{
|
||||||
// 2 blocks
|
// 2 blocks
|
||||||
|
|
||||||
|
@ -491,7 +491,7 @@ void GSClut::WriteCLUT_T16_I8_CSM1(const uint16* RESTRICT src, uint16* RESTRICT
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline void GSClut::WriteCLUT_T16_I4_CSM1(const uint16* RESTRICT src, uint16* RESTRICT clut)
|
__forceinline void GSClut::WriteCLUT_T16_I4_CSM1(const u16* RESTRICT src, u16* RESTRICT clut)
|
||||||
{
|
{
|
||||||
// 1 block (half)
|
// 1 block (half)
|
||||||
|
|
||||||
|
@ -501,7 +501,7 @@ __forceinline void GSClut::WriteCLUT_T16_I4_CSM1(const uint16* RESTRICT src, uin
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSClut::ReadCLUT_T32_I8(const uint16* RESTRICT clut, uint32* RESTRICT dst, int offset)
|
void GSClut::ReadCLUT_T32_I8(const u16* RESTRICT clut, u32* RESTRICT dst, int offset)
|
||||||
{
|
{
|
||||||
// Okay this deserves a small explanation
|
// Okay this deserves a small explanation
|
||||||
// T32 I8 can address up to 256 colors however the offset can be "more than zero" when reading
|
// T32 I8 can address up to 256 colors however the offset can be "more than zero" when reading
|
||||||
|
@ -516,7 +516,7 @@ void GSClut::ReadCLUT_T32_I8(const uint16* RESTRICT clut, uint32* RESTRICT dst,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline void GSClut::ReadCLUT_T32_I4(const uint16* RESTRICT clut, uint32* RESTRICT dst)
|
__forceinline void GSClut::ReadCLUT_T32_I4(const u16* RESTRICT clut, u32* RESTRICT dst)
|
||||||
{
|
{
|
||||||
GSVector4i* s = (GSVector4i*)clut;
|
GSVector4i* s = (GSVector4i*)clut;
|
||||||
GSVector4i* d = (GSVector4i*)dst;
|
GSVector4i* d = (GSVector4i*)dst;
|
||||||
|
@ -535,7 +535,7 @@ __forceinline void GSClut::ReadCLUT_T32_I4(const uint16* RESTRICT clut, uint32*
|
||||||
}
|
}
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
__forceinline void GSClut::ReadCLUT_T32_I4(const uint16* RESTRICT clut, uint32* RESTRICT dst32, uint64* RESTRICT dst64)
|
__forceinline void GSClut::ReadCLUT_T32_I4(const u16* RESTRICT clut, u32* RESTRICT dst32, u64* RESTRICT dst64)
|
||||||
{
|
{
|
||||||
GSVector4i* s = (GSVector4i*)clut;
|
GSVector4i* s = (GSVector4i*)clut;
|
||||||
GSVector4i* d32 = (GSVector4i*)dst32;
|
GSVector4i* d32 = (GSVector4i*)dst32;
|
||||||
|
@ -561,7 +561,7 @@ __forceinline void GSClut::ReadCLUT_T32_I4(const uint16* RESTRICT clut, uint32*
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
void GSClut::ReadCLUT_T16_I8(const uint16* RESTRICT clut, uint32* RESTRICT dst)
|
void GSClut::ReadCLUT_T16_I8(const u16* RESTRICT clut, u32* RESTRICT dst)
|
||||||
{
|
{
|
||||||
for(int i = 0; i < 256; i += 16)
|
for(int i = 0; i < 256; i += 16)
|
||||||
{
|
{
|
||||||
|
@ -571,7 +571,7 @@ void GSClut::ReadCLUT_T16_I8(const uint16* RESTRICT clut, uint32* RESTRICT dst)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
__forceinline void GSClut::ReadCLUT_T16_I4(const uint16* RESTRICT clut, uint32* RESTRICT dst)
|
__forceinline void GSClut::ReadCLUT_T16_I4(const u16* RESTRICT clut, u32* RESTRICT dst)
|
||||||
{
|
{
|
||||||
GSVector4i* s = (GSVector4i*)clut;
|
GSVector4i* s = (GSVector4i*)clut;
|
||||||
GSVector4i* d = (GSVector4i*)dst;
|
GSVector4i* d = (GSVector4i*)dst;
|
||||||
|
@ -587,7 +587,7 @@ __forceinline void GSClut::ReadCLUT_T16_I4(const uint16* RESTRICT clut, uint32*
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
__forceinline void GSClut::ReadCLUT_T16_I4(const uint16* RESTRICT clut, uint32* RESTRICT dst32, uint64* RESTRICT dst64)
|
__forceinline void GSClut::ReadCLUT_T16_I4(const u16* RESTRICT clut, u32* RESTRICT dst32, u64* RESTRICT dst64)
|
||||||
{
|
{
|
||||||
GSVector4i* s = (GSVector4i*)clut;
|
GSVector4i* s = (GSVector4i*)clut;
|
||||||
GSVector4i* d32 = (GSVector4i*)dst32;
|
GSVector4i* d32 = (GSVector4i*)dst32;
|
||||||
|
@ -613,7 +613,7 @@ __forceinline void GSClut::ReadCLUT_T16_I4(const uint16* RESTRICT clut, uint32*
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void GSClut::ExpandCLUT64_T32_I8(const uint32* RESTRICT src, uint64* RESTRICT dst)
|
void GSClut::ExpandCLUT64_T32_I8(const u32* RESTRICT src, u64* RESTRICT dst)
|
||||||
{
|
{
|
||||||
GSVector4i* s = (GSVector4i*)src;
|
GSVector4i* s = (GSVector4i*)src;
|
||||||
GSVector4i* d = (GSVector4i*)dst;
|
GSVector4i* d = (GSVector4i*)dst;
|
||||||
|
@ -656,7 +656,7 @@ __forceinline void GSClut::ExpandCLUT64_T32(const GSVector4i& hi, const GSVector
|
||||||
}
|
}
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
void GSClut::ExpandCLUT64_T16_I8(const uint32* RESTRICT src, uint64* RESTRICT dst)
|
void GSClut::ExpandCLUT64_T16_I8(const u32* RESTRICT src, u64* RESTRICT dst)
|
||||||
{
|
{
|
||||||
GSVector4i* s = (GSVector4i*)src;
|
GSVector4i* s = (GSVector4i*)src;
|
||||||
GSVector4i* d = (GSVector4i*)dst;
|
GSVector4i* d = (GSVector4i*)dst;
|
||||||
|
@ -705,7 +705,7 @@ CONSTINIT const GSVector4i GSClut::m_bm = GSVector4i::cxpr(0x00007c00);
|
||||||
CONSTINIT const GSVector4i GSClut::m_gm = GSVector4i::cxpr(0x000003e0);
|
CONSTINIT const GSVector4i GSClut::m_gm = GSVector4i::cxpr(0x000003e0);
|
||||||
CONSTINIT const GSVector4i GSClut::m_rm = GSVector4i::cxpr(0x0000001f);
|
CONSTINIT const GSVector4i GSClut::m_rm = GSVector4i::cxpr(0x0000001f);
|
||||||
|
|
||||||
void GSClut::Expand16(const uint16* RESTRICT src, uint32* RESTRICT dst, int w, const GIFRegTEXA& TEXA)
|
void GSClut::Expand16(const u16* RESTRICT src, u32* RESTRICT dst, int w, const GIFRegTEXA& TEXA)
|
||||||
{
|
{
|
||||||
ASSERT((w & 7) == 0);
|
ASSERT((w & 7) == 0);
|
||||||
|
|
||||||
|
|
|
@ -30,10 +30,10 @@ class alignas(32) GSClut : public GSAlignedClass<32>
|
||||||
|
|
||||||
GSLocalMemory* m_mem;
|
GSLocalMemory* m_mem;
|
||||||
|
|
||||||
uint32 m_CBP[2];
|
u32 m_CBP[2];
|
||||||
uint16* m_clut;
|
u16* m_clut;
|
||||||
uint32* m_buff32;
|
u32* m_buff32;
|
||||||
uint64* m_buff64;
|
u64* m_buff64;
|
||||||
|
|
||||||
struct alignas(32) WriteState
|
struct alignas(32) WriteState
|
||||||
{
|
{
|
||||||
|
@ -74,42 +74,42 @@ class alignas(32) GSClut : public GSAlignedClass<32>
|
||||||
|
|
||||||
void WriteCLUT_NULL(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT);
|
void WriteCLUT_NULL(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT);
|
||||||
|
|
||||||
static void WriteCLUT_T32_I8_CSM1(const uint32* RESTRICT src, uint16* RESTRICT clut, uint16 offset);
|
static void WriteCLUT_T32_I8_CSM1(const u32* RESTRICT src, u16* RESTRICT clut, u16 offset);
|
||||||
static void WriteCLUT_T32_I4_CSM1(const uint32* RESTRICT src, uint16* RESTRICT clut);
|
static void WriteCLUT_T32_I4_CSM1(const u32* RESTRICT src, u16* RESTRICT clut);
|
||||||
static void WriteCLUT_T16_I8_CSM1(const uint16* RESTRICT src, uint16* RESTRICT clut);
|
static void WriteCLUT_T16_I8_CSM1(const u16* RESTRICT src, u16* RESTRICT clut);
|
||||||
static void WriteCLUT_T16_I4_CSM1(const uint16* RESTRICT src, uint16* RESTRICT clut);
|
static void WriteCLUT_T16_I4_CSM1(const u16* RESTRICT src, u16* RESTRICT clut);
|
||||||
static void ReadCLUT_T32_I8(const uint16* RESTRICT clut, uint32* RESTRICT dst, int offset);
|
static void ReadCLUT_T32_I8(const u16* RESTRICT clut, u32* RESTRICT dst, int offset);
|
||||||
static void ReadCLUT_T32_I4(const uint16* RESTRICT clut, uint32* RESTRICT dst);
|
static void ReadCLUT_T32_I4(const u16* RESTRICT clut, u32* RESTRICT dst);
|
||||||
//static void ReadCLUT_T32_I4(const uint16* RESTRICT clut, uint32* RESTRICT dst32, uint64* RESTRICT dst64);
|
//static void ReadCLUT_T32_I4(const u16* RESTRICT clut, u32* RESTRICT dst32, u64* RESTRICT dst64);
|
||||||
//static void ReadCLUT_T16_I8(const uint16* RESTRICT clut, uint32* RESTRICT dst);
|
//static void ReadCLUT_T16_I8(const u16* RESTRICT clut, u32* RESTRICT dst);
|
||||||
//static void ReadCLUT_T16_I4(const uint16* RESTRICT clut, uint32* RESTRICT dst);
|
//static void ReadCLUT_T16_I4(const u16* RESTRICT clut, u32* RESTRICT dst);
|
||||||
//static void ReadCLUT_T16_I4(const uint16* RESTRICT clut, uint32* RESTRICT dst32, uint64* RESTRICT dst64);
|
//static void ReadCLUT_T16_I4(const u16* RESTRICT clut, u32* RESTRICT dst32, u64* RESTRICT dst64);
|
||||||
public:
|
public:
|
||||||
static void ExpandCLUT64_T32_I8(const uint32* RESTRICT src, uint64* RESTRICT dst);
|
static void ExpandCLUT64_T32_I8(const u32* RESTRICT src, u64* RESTRICT dst);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
static void ExpandCLUT64_T32(const GSVector4i& hi, const GSVector4i& lo0, const GSVector4i& lo1, const GSVector4i& lo2, const GSVector4i& lo3, GSVector4i* dst);
|
static void ExpandCLUT64_T32(const GSVector4i& hi, const GSVector4i& lo0, const GSVector4i& lo1, const GSVector4i& lo2, const GSVector4i& lo3, GSVector4i* dst);
|
||||||
static void ExpandCLUT64_T32(const GSVector4i& hi, const GSVector4i& lo, GSVector4i* dst);
|
static void ExpandCLUT64_T32(const GSVector4i& hi, const GSVector4i& lo, GSVector4i* dst);
|
||||||
//static void ExpandCLUT64_T16_I8(const uint32* RESTRICT src, uint64* RESTRICT dst);
|
//static void ExpandCLUT64_T16_I8(const u32* RESTRICT src, u64* RESTRICT dst);
|
||||||
static void ExpandCLUT64_T16(const GSVector4i& hi, const GSVector4i& lo0, const GSVector4i& lo1, const GSVector4i& lo2, const GSVector4i& lo3, GSVector4i* dst);
|
static void ExpandCLUT64_T16(const GSVector4i& hi, const GSVector4i& lo0, const GSVector4i& lo1, const GSVector4i& lo2, const GSVector4i& lo3, GSVector4i* dst);
|
||||||
static void ExpandCLUT64_T16(const GSVector4i& hi, const GSVector4i& lo, GSVector4i* dst);
|
static void ExpandCLUT64_T16(const GSVector4i& hi, const GSVector4i& lo, GSVector4i* dst);
|
||||||
|
|
||||||
static void Expand16(const uint16* RESTRICT src, uint32* RESTRICT dst, int w, const GIFRegTEXA& TEXA);
|
static void Expand16(const u16* RESTRICT src, u32* RESTRICT dst, int w, const GIFRegTEXA& TEXA);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
GSClut(GSLocalMemory* mem);
|
GSClut(GSLocalMemory* mem);
|
||||||
virtual ~GSClut();
|
virtual ~GSClut();
|
||||||
|
|
||||||
void Invalidate();
|
void Invalidate();
|
||||||
void Invalidate(uint32 block);
|
void Invalidate(u32 block);
|
||||||
bool WriteTest(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT);
|
bool WriteTest(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT);
|
||||||
void Write(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT);
|
void Write(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT);
|
||||||
//void Read(const GIFRegTEX0& TEX0);
|
//void Read(const GIFRegTEX0& TEX0);
|
||||||
void Read32(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
|
void Read32(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
|
||||||
void GetAlphaMinMax32(int& amin, int& amax);
|
void GetAlphaMinMax32(int& amin, int& amax);
|
||||||
|
|
||||||
uint32 operator[](size_t i) const { return m_buff32[i]; }
|
u32 operator[](size_t i) const { return m_buff32[i]; }
|
||||||
|
|
||||||
operator const uint32*() const { return m_buff32; }
|
operator const u32*() const { return m_buff32; }
|
||||||
operator const uint64*() const { return m_buff64; }
|
operator const u64*() const { return m_buff64; }
|
||||||
};
|
};
|
||||||
|
|
|
@ -42,14 +42,14 @@ void* GSCodeBuffer::GetBuffer(size_t size)
|
||||||
|
|
||||||
if (m_ptr == NULL || m_pos + size > m_blocksize)
|
if (m_ptr == NULL || m_pos + size > m_blocksize)
|
||||||
{
|
{
|
||||||
m_ptr = (uint8*)vmalloc(m_blocksize, true);
|
m_ptr = (u8*)vmalloc(m_blocksize, true);
|
||||||
|
|
||||||
m_pos = 0;
|
m_pos = 0;
|
||||||
|
|
||||||
m_buffers.push_back(m_ptr);
|
m_buffers.push_back(m_ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
uint8* ptr = &m_ptr[m_pos];
|
u8* ptr = &m_ptr[m_pos];
|
||||||
|
|
||||||
m_reserved = size;
|
m_reserved = size;
|
||||||
|
|
||||||
|
|
|
@ -22,7 +22,7 @@ class GSCodeBuffer
|
||||||
std::vector<void*> m_buffers;
|
std::vector<void*> m_buffers;
|
||||||
size_t m_blocksize;
|
size_t m_blocksize;
|
||||||
size_t m_pos, m_reserved;
|
size_t m_pos, m_reserved;
|
||||||
uint8* m_ptr;
|
u8* m_ptr;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
GSCodeBuffer(size_t blocksize = 4096 * 64); // 256k
|
GSCodeBuffer(size_t blocksize = 4096 * 64); // 256k
|
||||||
|
|
|
@ -505,7 +505,7 @@ const CRC::Game CRC::m_games[] =
|
||||||
{0xB1BE3E51, Whiplash, EU, 0},
|
{0xB1BE3E51, Whiplash, EU, 0},
|
||||||
};
|
};
|
||||||
|
|
||||||
std::map<uint32, const CRC::Game*> CRC::m_map;
|
std::map<u32, const CRC::Game*> CRC::m_map;
|
||||||
|
|
||||||
std::string ToLower(std::string str)
|
std::string ToLower(std::string str)
|
||||||
{
|
{
|
||||||
|
@ -517,14 +517,14 @@ std::string ToLower(std::string str)
|
||||||
// The list is case insensitive and order insensitive.
|
// The list is case insensitive and order insensitive.
|
||||||
// E.g. Disable all CRC hacks: CrcHacksExclusions=all
|
// E.g. Disable all CRC hacks: CrcHacksExclusions=all
|
||||||
// E.g. Disable hacks for these CRCs: CrcHacksExclusions=0x0F0C4A9C, 0x0EE5646B, 0x7ACF7E03
|
// E.g. Disable hacks for these CRCs: CrcHacksExclusions=0x0F0C4A9C, 0x0EE5646B, 0x7ACF7E03
|
||||||
bool IsCrcExcluded(std::string exclusionList, uint32 crc)
|
bool IsCrcExcluded(std::string exclusionList, u32 crc)
|
||||||
{
|
{
|
||||||
std::string target = format("0x%08x", crc);
|
std::string target = format("0x%08x", crc);
|
||||||
exclusionList = ToLower(exclusionList);
|
exclusionList = ToLower(exclusionList);
|
||||||
return exclusionList.find(target) != std::string::npos || exclusionList.find("all") != std::string::npos;
|
return exclusionList.find(target) != std::string::npos || exclusionList.find("all") != std::string::npos;
|
||||||
}
|
}
|
||||||
|
|
||||||
const CRC::Game& CRC::Lookup(uint32 crc)
|
const CRC::Game& CRC::Lookup(u32 crc)
|
||||||
{
|
{
|
||||||
printf("GS Lookup CRC:%08X\n", crc);
|
printf("GS Lookup CRC:%08X\n", crc);
|
||||||
if (m_map.empty())
|
if (m_map.empty())
|
||||||
|
|
|
@ -170,16 +170,16 @@ public:
|
||||||
|
|
||||||
struct Game
|
struct Game
|
||||||
{
|
{
|
||||||
uint32 crc;
|
u32 crc;
|
||||||
Title title;
|
Title title;
|
||||||
Region region;
|
Region region;
|
||||||
uint32 flags;
|
u32 flags;
|
||||||
};
|
};
|
||||||
|
|
||||||
private:
|
private:
|
||||||
static const Game m_games[];
|
static const Game m_games[];
|
||||||
static std::map<uint32, const Game*> m_map;
|
static std::map<u32, const Game*> m_map;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
static const Game& Lookup(uint32 crc);
|
static const Game& Lookup(u32 crc);
|
||||||
};
|
};
|
||||||
|
|
|
@ -15,7 +15,7 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
#ifdef __clang__
|
#ifdef __clang__
|
||||||
// Ignore format for this file, as it spams a lot of warnings about uint64 and %llu.
|
// Ignore format for this file, as it spams a lot of warnings about u64 and %llu.
|
||||||
#pragma clang diagnostic ignored "-Wformat"
|
#pragma clang diagnostic ignored "-Wformat"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -102,10 +102,10 @@ public:
|
||||||
{
|
{
|
||||||
ASSERT(XYOFFSET.OFX <= 0xf800 && XYOFFSET.OFY <= 0xf800);
|
ASSERT(XYOFFSET.OFX <= 0xf800 && XYOFFSET.OFY <= 0xf800);
|
||||||
|
|
||||||
scissor.ex.u16[0] = (uint16)((SCISSOR.SCAX0 << 4) + XYOFFSET.OFX - 0x8000);
|
scissor.ex.U16[0] = (u16)((SCISSOR.SCAX0 << 4) + XYOFFSET.OFX - 0x8000);
|
||||||
scissor.ex.u16[1] = (uint16)((SCISSOR.SCAY0 << 4) + XYOFFSET.OFY - 0x8000);
|
scissor.ex.U16[1] = (u16)((SCISSOR.SCAY0 << 4) + XYOFFSET.OFY - 0x8000);
|
||||||
scissor.ex.u16[2] = (uint16)((SCISSOR.SCAX1 << 4) + XYOFFSET.OFX - 0x8000);
|
scissor.ex.U16[2] = (u16)((SCISSOR.SCAX1 << 4) + XYOFFSET.OFX - 0x8000);
|
||||||
scissor.ex.u16[3] = (uint16)((SCISSOR.SCAY1 << 4) + XYOFFSET.OFY - 0x8000);
|
scissor.ex.U16[3] = (u16)((SCISSOR.SCAY1 << 4) + XYOFFSET.OFY - 0x8000);
|
||||||
|
|
||||||
scissor.ofex = GSVector4(
|
scissor.ofex = GSVector4(
|
||||||
(int)((SCISSOR.SCAX0 << 4) + XYOFFSET.OFX),
|
(int)((SCISSOR.SCAX0 << 4) + XYOFFSET.OFX),
|
||||||
|
|
|
@ -31,7 +31,7 @@ GSDumpBase::~GSDumpBase()
|
||||||
fclose(m_gs);
|
fclose(m_gs);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSDumpBase::AddHeader(uint32 crc, const freezeData& fd, const GSPrivRegSet* regs)
|
void GSDumpBase::AddHeader(u32 crc, const freezeData& fd, const GSPrivRegSet* regs)
|
||||||
{
|
{
|
||||||
AppendRawData(&crc, 4);
|
AppendRawData(&crc, 4);
|
||||||
AppendRawData(&fd.size, 4);
|
AppendRawData(&fd.size, 4);
|
||||||
|
@ -39,18 +39,18 @@ void GSDumpBase::AddHeader(uint32 crc, const freezeData& fd, const GSPrivRegSet*
|
||||||
AppendRawData(regs, sizeof(*regs));
|
AppendRawData(regs, sizeof(*regs));
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSDumpBase::Transfer(int index, const uint8* mem, size_t size)
|
void GSDumpBase::Transfer(int index, const u8* mem, size_t size)
|
||||||
{
|
{
|
||||||
if (size == 0)
|
if (size == 0)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
AppendRawData(0);
|
AppendRawData(0);
|
||||||
AppendRawData(static_cast<uint8>(index));
|
AppendRawData(static_cast<u8>(index));
|
||||||
AppendRawData(&size, 4);
|
AppendRawData(&size, 4);
|
||||||
AppendRawData(mem, size);
|
AppendRawData(mem, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSDumpBase::ReadFIFO(uint32 size)
|
void GSDumpBase::ReadFIFO(u32 size)
|
||||||
{
|
{
|
||||||
if (size == 0)
|
if (size == 0)
|
||||||
return;
|
return;
|
||||||
|
@ -69,7 +69,7 @@ bool GSDumpBase::VSync(int field, bool last, const GSPrivRegSet* regs)
|
||||||
AppendRawData(regs, sizeof(*regs));
|
AppendRawData(regs, sizeof(*regs));
|
||||||
|
|
||||||
AppendRawData(1);
|
AppendRawData(1);
|
||||||
AppendRawData(static_cast<uint8>(field));
|
AppendRawData(static_cast<u8>(field));
|
||||||
|
|
||||||
if (last)
|
if (last)
|
||||||
m_extra_frames--;
|
m_extra_frames--;
|
||||||
|
@ -91,7 +91,7 @@ void GSDumpBase::Write(const void* data, size_t size)
|
||||||
// GSDump implementation
|
// GSDump implementation
|
||||||
//////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
GSDump::GSDump(const std::string& fn, uint32 crc, const freezeData& fd, const GSPrivRegSet* regs)
|
GSDump::GSDump(const std::string& fn, u32 crc, const freezeData& fd, const GSPrivRegSet* regs)
|
||||||
: GSDumpBase(fn + ".gs")
|
: GSDumpBase(fn + ".gs")
|
||||||
{
|
{
|
||||||
AddHeader(crc, fd, regs);
|
AddHeader(crc, fd, regs);
|
||||||
|
@ -102,7 +102,7 @@ void GSDump::AppendRawData(const void* data, size_t size)
|
||||||
Write(data, size);
|
Write(data, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSDump::AppendRawData(uint8 c)
|
void GSDump::AppendRawData(u8 c)
|
||||||
{
|
{
|
||||||
Write(&c, 1);
|
Write(&c, 1);
|
||||||
}
|
}
|
||||||
|
@ -111,7 +111,7 @@ void GSDump::AppendRawData(uint8 c)
|
||||||
// GSDumpXz implementation
|
// GSDumpXz implementation
|
||||||
//////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
GSDumpXz::GSDumpXz(const std::string& fn, uint32 crc, const freezeData& fd, const GSPrivRegSet* regs)
|
GSDumpXz::GSDumpXz(const std::string& fn, u32 crc, const freezeData& fd, const GSPrivRegSet* regs)
|
||||||
: GSDumpBase(fn + ".gs.xz")
|
: GSDumpBase(fn + ".gs.xz")
|
||||||
{
|
{
|
||||||
m_strm = LZMA_STREAM_INIT;
|
m_strm = LZMA_STREAM_INIT;
|
||||||
|
@ -150,7 +150,7 @@ void GSDumpXz::AppendRawData(const void* data, size_t size)
|
||||||
Flush();
|
Flush();
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSDumpXz::AppendRawData(uint8 c)
|
void GSDumpXz::AppendRawData(u8 c)
|
||||||
{
|
{
|
||||||
m_in_buff.push_back(c);
|
m_in_buff.push_back(c);
|
||||||
}
|
}
|
||||||
|
@ -170,7 +170,7 @@ void GSDumpXz::Flush()
|
||||||
|
|
||||||
void GSDumpXz::Compress(lzma_action action, lzma_ret expected_status)
|
void GSDumpXz::Compress(lzma_action action, lzma_ret expected_status)
|
||||||
{
|
{
|
||||||
std::vector<uint8> out_buff(1024 * 1024);
|
std::vector<u8> out_buff(1024 * 1024);
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
m_strm.next_out = out_buff.data();
|
m_strm.next_out = out_buff.data();
|
||||||
|
|
|
@ -45,28 +45,28 @@ class GSDumpBase
|
||||||
FILE* m_gs;
|
FILE* m_gs;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
void AddHeader(uint32 crc, const freezeData& fd, const GSPrivRegSet* regs);
|
void AddHeader(u32 crc, const freezeData& fd, const GSPrivRegSet* regs);
|
||||||
void Write(const void* data, size_t size);
|
void Write(const void* data, size_t size);
|
||||||
|
|
||||||
virtual void AppendRawData(const void* data, size_t size) = 0;
|
virtual void AppendRawData(const void* data, size_t size) = 0;
|
||||||
virtual void AppendRawData(uint8 c) = 0;
|
virtual void AppendRawData(u8 c) = 0;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
GSDumpBase(const std::string& fn);
|
GSDumpBase(const std::string& fn);
|
||||||
virtual ~GSDumpBase();
|
virtual ~GSDumpBase();
|
||||||
|
|
||||||
void ReadFIFO(uint32 size);
|
void ReadFIFO(u32 size);
|
||||||
void Transfer(int index, const uint8* mem, size_t size);
|
void Transfer(int index, const u8* mem, size_t size);
|
||||||
bool VSync(int field, bool last, const GSPrivRegSet* regs);
|
bool VSync(int field, bool last, const GSPrivRegSet* regs);
|
||||||
};
|
};
|
||||||
|
|
||||||
class GSDump final : public GSDumpBase
|
class GSDump final : public GSDumpBase
|
||||||
{
|
{
|
||||||
void AppendRawData(const void* data, size_t size) final;
|
void AppendRawData(const void* data, size_t size) final;
|
||||||
void AppendRawData(uint8 c) final;
|
void AppendRawData(u8 c) final;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
GSDump(const std::string& fn, uint32 crc, const freezeData& fd, const GSPrivRegSet* regs);
|
GSDump(const std::string& fn, u32 crc, const freezeData& fd, const GSPrivRegSet* regs);
|
||||||
virtual ~GSDump() = default;
|
virtual ~GSDump() = default;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -74,14 +74,14 @@ class GSDumpXz final : public GSDumpBase
|
||||||
{
|
{
|
||||||
lzma_stream m_strm;
|
lzma_stream m_strm;
|
||||||
|
|
||||||
std::vector<uint8> m_in_buff;
|
std::vector<u8> m_in_buff;
|
||||||
|
|
||||||
void Flush();
|
void Flush();
|
||||||
void Compress(lzma_action action, lzma_ret expected_status);
|
void Compress(lzma_action action, lzma_ret expected_status);
|
||||||
void AppendRawData(const void* data, size_t size);
|
void AppendRawData(const void* data, size_t size);
|
||||||
void AppendRawData(uint8 c);
|
void AppendRawData(u8 c);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
GSDumpXz(const std::string& fn, uint32 crc, const freezeData& fd, const GSPrivRegSet* regs);
|
GSDumpXz(const std::string& fn, u32 crc, const freezeData& fd, const GSPrivRegSet* regs);
|
||||||
virtual ~GSDumpXz();
|
virtual ~GSDumpXz();
|
||||||
};
|
};
|
||||||
|
|
|
@ -18,7 +18,7 @@
|
||||||
#include "GS.h"
|
#include "GS.h"
|
||||||
|
|
||||||
template <typename Fn>
|
template <typename Fn>
|
||||||
static void foreachBlock(const GSOffset& off, GSLocalMemory* mem, const GSVector4i& r, uint8* dst, int dstpitch, int bpp, Fn&& fn)
|
static void foreachBlock(const GSOffset& off, GSLocalMemory* mem, const GSVector4i& r, u8* dst, int dstpitch, int bpp, Fn&& fn)
|
||||||
{
|
{
|
||||||
ASSERT(off.isBlockAligned(r));
|
ASSERT(off.isBlockAligned(r));
|
||||||
GSOffset::BNHelper bn = off.bnMulti(r.left, r.top);
|
GSOffset::BNHelper bn = off.bnMulti(r.left, r.top);
|
||||||
|
@ -32,8 +32,8 @@ static void foreachBlock(const GSOffset& off, GSLocalMemory* mem, const GSVector
|
||||||
{
|
{
|
||||||
for (int x = 0; bn.blkX() < right; bn.nextBlockX(), x += xAdd)
|
for (int x = 0; bn.blkX() < right; bn.nextBlockX(), x += xAdd)
|
||||||
{
|
{
|
||||||
const uint8* src = mem->BlockPtr(bn.value());
|
const u8* src = mem->BlockPtr(bn.value());
|
||||||
uint8* read_dst = dst + x;
|
u8* read_dst = dst + x;
|
||||||
fn(read_dst, src);
|
fn(read_dst, src);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -70,19 +70,19 @@ GSLocalMemory::GSLocalMemory()
|
||||||
}
|
}
|
||||||
|
|
||||||
if (m_use_fifo_alloc)
|
if (m_use_fifo_alloc)
|
||||||
m_vm8 = (uint8*)fifo_alloc(m_vmsize, 4);
|
m_vm8 = (u8*)fifo_alloc(m_vmsize, 4);
|
||||||
else
|
else
|
||||||
m_vm8 = nullptr;
|
m_vm8 = nullptr;
|
||||||
|
|
||||||
// Either we don't use fifo alloc or we get an error.
|
// Either we don't use fifo alloc or we get an error.
|
||||||
if (m_vm8 == nullptr)
|
if (m_vm8 == nullptr)
|
||||||
{
|
{
|
||||||
m_vm8 = (uint8*)vmalloc(m_vmsize * 4, false);
|
m_vm8 = (u8*)vmalloc(m_vmsize * 4, false);
|
||||||
m_use_fifo_alloc = false;
|
m_use_fifo_alloc = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
m_vm16 = (uint16*)m_vm8;
|
m_vm16 = (u16*)m_vm8;
|
||||||
m_vm32 = (uint32*)m_vm8;
|
m_vm32 = (u32*)m_vm8;
|
||||||
|
|
||||||
memset(m_vm8, 0, m_vmsize);
|
memset(m_vm8, 0, m_vmsize);
|
||||||
|
|
||||||
|
@ -341,20 +341,20 @@ GSLocalMemory::~GSLocalMemory()
|
||||||
|
|
||||||
GSPixelOffset* GSLocalMemory::GetPixelOffset(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF)
|
GSPixelOffset* GSLocalMemory::GetPixelOffset(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF)
|
||||||
{
|
{
|
||||||
uint32 fbp = FRAME.Block();
|
u32 fbp = FRAME.Block();
|
||||||
uint32 zbp = ZBUF.Block();
|
u32 zbp = ZBUF.Block();
|
||||||
uint32 fpsm = FRAME.PSM;
|
u32 fpsm = FRAME.PSM;
|
||||||
uint32 zpsm = ZBUF.PSM;
|
u32 zpsm = ZBUF.PSM;
|
||||||
uint32 bw = FRAME.FBW;
|
u32 bw = FRAME.FBW;
|
||||||
|
|
||||||
ASSERT(m_psm[fpsm].trbpp > 8 || m_psm[zpsm].trbpp > 8);
|
ASSERT(m_psm[fpsm].trbpp > 8 || m_psm[zpsm].trbpp > 8);
|
||||||
|
|
||||||
// "(psm & 0x0f) ^ ((psm & 0xf0) >> 2)" creates 4 bit unique identifiers for render target formats (only)
|
// "(psm & 0x0f) ^ ((psm & 0xf0) >> 2)" creates 4 bit unique identifiers for render target formats (only)
|
||||||
|
|
||||||
uint32 fpsm_hash = (fpsm & 0x0f) ^ ((fpsm & 0x30) >> 2);
|
u32 fpsm_hash = (fpsm & 0x0f) ^ ((fpsm & 0x30) >> 2);
|
||||||
uint32 zpsm_hash = (zpsm & 0x0f) ^ ((zpsm & 0x30) >> 2);
|
u32 zpsm_hash = (zpsm & 0x0f) ^ ((zpsm & 0x30) >> 2);
|
||||||
|
|
||||||
uint32 hash = (FRAME.FBP << 0) | (ZBUF.ZBP << 9) | (bw << 18) | (fpsm_hash << 24) | (zpsm_hash << 28);
|
u32 hash = (FRAME.FBP << 0) | (ZBUF.ZBP << 9) | (bw << 18) | (fpsm_hash << 24) | (zpsm_hash << 28);
|
||||||
|
|
||||||
auto it = m_pomap.find(hash);
|
auto it = m_pomap.find(hash);
|
||||||
|
|
||||||
|
@ -394,20 +394,20 @@ GSPixelOffset* GSLocalMemory::GetPixelOffset(const GIFRegFRAME& FRAME, const GIF
|
||||||
|
|
||||||
GSPixelOffset4* GSLocalMemory::GetPixelOffset4(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF)
|
GSPixelOffset4* GSLocalMemory::GetPixelOffset4(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF)
|
||||||
{
|
{
|
||||||
uint32 fbp = FRAME.Block();
|
u32 fbp = FRAME.Block();
|
||||||
uint32 zbp = ZBUF.Block();
|
u32 zbp = ZBUF.Block();
|
||||||
uint32 fpsm = FRAME.PSM;
|
u32 fpsm = FRAME.PSM;
|
||||||
uint32 zpsm = ZBUF.PSM;
|
u32 zpsm = ZBUF.PSM;
|
||||||
uint32 bw = FRAME.FBW;
|
u32 bw = FRAME.FBW;
|
||||||
|
|
||||||
ASSERT(m_psm[fpsm].trbpp > 8 || m_psm[zpsm].trbpp > 8);
|
ASSERT(m_psm[fpsm].trbpp > 8 || m_psm[zpsm].trbpp > 8);
|
||||||
|
|
||||||
// "(psm & 0x0f) ^ ((psm & 0xf0) >> 2)" creates 4 bit unique identifiers for render target formats (only)
|
// "(psm & 0x0f) ^ ((psm & 0xf0) >> 2)" creates 4 bit unique identifiers for render target formats (only)
|
||||||
|
|
||||||
uint32 fpsm_hash = (fpsm & 0x0f) ^ ((fpsm & 0x30) >> 2);
|
u32 fpsm_hash = (fpsm & 0x0f) ^ ((fpsm & 0x30) >> 2);
|
||||||
uint32 zpsm_hash = (zpsm & 0x0f) ^ ((zpsm & 0x30) >> 2);
|
u32 zpsm_hash = (zpsm & 0x0f) ^ ((zpsm & 0x30) >> 2);
|
||||||
|
|
||||||
uint32 hash = (FRAME.FBP << 0) | (ZBUF.ZBP << 9) | (bw << 18) | (fpsm_hash << 24) | (zpsm_hash << 28);
|
u32 hash = (FRAME.FBP << 0) | (ZBUF.ZBP << 9) | (bw << 18) | (fpsm_hash << 24) | (zpsm_hash << 28);
|
||||||
|
|
||||||
auto it = m_po4map.find(hash);
|
auto it = m_po4map.find(hash);
|
||||||
|
|
||||||
|
@ -449,7 +449,7 @@ static bool cmp_vec2x(const GSVector2i& a, const GSVector2i& b) { return a.x < b
|
||||||
|
|
||||||
std::vector<GSVector2i>* GSLocalMemory::GetPage2TileMap(const GIFRegTEX0& TEX0)
|
std::vector<GSVector2i>* GSLocalMemory::GetPage2TileMap(const GIFRegTEX0& TEX0)
|
||||||
{
|
{
|
||||||
uint64 hash = TEX0.u64 & 0x3ffffffffull; // TBP0 TBW PSM TW TH
|
u64 hash = TEX0.U64 & 0x3ffffffffull; // TBP0 TBW PSM TW TH
|
||||||
|
|
||||||
auto it = m_p2tmap.find(hash);
|
auto it = m_p2tmap.find(hash);
|
||||||
|
|
||||||
|
@ -466,34 +466,34 @@ std::vector<GSVector2i>* GSLocalMemory::GetPage2TileMap(const GIFRegTEX0& TEX0)
|
||||||
GSOffset off = GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
|
GSOffset off = GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
|
||||||
GSOffset::BNHelper bn = off.bnMulti(0, 0);
|
GSOffset::BNHelper bn = off.bnMulti(0, 0);
|
||||||
|
|
||||||
std::unordered_map<uint32, std::unordered_set<uint32>> tmp; // key = page, value = y:x, 7 bits each, max 128x128 tiles for the worst case (1024x1024 32bpp 8x8 blocks)
|
std::unordered_map<u32, std::unordered_set<u32>> tmp; // key = page, value = y:x, 7 bits each, max 128x128 tiles for the worst case (1024x1024 32bpp 8x8 blocks)
|
||||||
|
|
||||||
for (; bn.blkY() < (th >> off.blockShiftY()); bn.nextBlockY())
|
for (; bn.blkY() < (th >> off.blockShiftY()); bn.nextBlockY())
|
||||||
{
|
{
|
||||||
for (; bn.blkX() < (tw >> off.blockShiftX()); bn.nextBlockX())
|
for (; bn.blkX() < (tw >> off.blockShiftX()); bn.nextBlockX())
|
||||||
{
|
{
|
||||||
uint32 page = (bn.value() >> 5) % MAX_PAGES;
|
u32 page = (bn.value() >> 5) % MAX_PAGES;
|
||||||
|
|
||||||
tmp[page].insert((bn.blkY() << 7) + bn.blkX());
|
tmp[page].insert((bn.blkY() << 7) + bn.blkX());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// combine the lower 5 bits of the address into a 9:5 pointer:mask form, so the "valid bits" can be tested against an uint32 array
|
// combine the lower 5 bits of the address into a 9:5 pointer:mask form, so the "valid bits" can be tested against an u32 array
|
||||||
|
|
||||||
auto p2t = new std::vector<GSVector2i>[MAX_PAGES];
|
auto p2t = new std::vector<GSVector2i>[MAX_PAGES];
|
||||||
|
|
||||||
for (const auto& i : tmp)
|
for (const auto& i : tmp)
|
||||||
{
|
{
|
||||||
uint32 page = i.first;
|
u32 page = i.first;
|
||||||
|
|
||||||
auto& tiles = i.second;
|
auto& tiles = i.second;
|
||||||
|
|
||||||
std::unordered_map<uint32, uint32> m;
|
std::unordered_map<u32, u32> m;
|
||||||
|
|
||||||
for (const auto addr : tiles)
|
for (const auto addr : tiles)
|
||||||
{
|
{
|
||||||
uint32 row = addr >> 5;
|
u32 row = addr >> 5;
|
||||||
uint32 col = 1 << (addr & 31);
|
u32 col = 1 << (addr & 31);
|
||||||
|
|
||||||
auto k = m.find(row);
|
auto k = m.find(row);
|
||||||
|
|
||||||
|
@ -528,10 +528,10 @@ std::vector<GSVector2i>* GSLocalMemory::GetPage2TileMap(const GIFRegTEX0& TEX0)
|
||||||
////////////////////
|
////////////////////
|
||||||
|
|
||||||
template <int psm, int bsx, int bsy, int alignment>
|
template <int psm, int bsx, int bsy, int alignment>
|
||||||
void GSLocalMemory::WriteImageColumn(int l, int r, int y, int h, const uint8* src, int srcpitch, const GIFRegBITBLTBUF& BITBLTBUF)
|
void GSLocalMemory::WriteImageColumn(int l, int r, int y, int h, const u8* src, int srcpitch, const GIFRegBITBLTBUF& BITBLTBUF)
|
||||||
{
|
{
|
||||||
uint32 bp = BITBLTBUF.DBP;
|
u32 bp = BITBLTBUF.DBP;
|
||||||
uint32 bw = BITBLTBUF.DBW;
|
u32 bw = BITBLTBUF.DBW;
|
||||||
|
|
||||||
const int csy = bsy / 4;
|
const int csy = bsy / 4;
|
||||||
|
|
||||||
|
@ -557,10 +557,10 @@ void GSLocalMemory::WriteImageColumn(int l, int r, int y, int h, const uint8* sr
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int psm, int bsx, int bsy, int alignment>
|
template <int psm, int bsx, int bsy, int alignment>
|
||||||
void GSLocalMemory::WriteImageBlock(int l, int r, int y, int h, const uint8* src, int srcpitch, const GIFRegBITBLTBUF& BITBLTBUF)
|
void GSLocalMemory::WriteImageBlock(int l, int r, int y, int h, const u8* src, int srcpitch, const GIFRegBITBLTBUF& BITBLTBUF)
|
||||||
{
|
{
|
||||||
uint32 bp = BITBLTBUF.DBP;
|
u32 bp = BITBLTBUF.DBP;
|
||||||
uint32 bw = BITBLTBUF.DBW;
|
u32 bw = BITBLTBUF.DBW;
|
||||||
|
|
||||||
for (int offset = srcpitch * bsy; h >= bsy; h -= bsy, y += bsy, src += offset)
|
for (int offset = srcpitch * bsy; h >= bsy; h -= bsy, y += bsy, src += offset)
|
||||||
{
|
{
|
||||||
|
@ -584,10 +584,10 @@ void GSLocalMemory::WriteImageBlock(int l, int r, int y, int h, const uint8* src
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int psm, int bsx, int bsy>
|
template <int psm, int bsx, int bsy>
|
||||||
void GSLocalMemory::WriteImageLeftRight(int l, int r, int y, int h, const uint8* src, int srcpitch, const GIFRegBITBLTBUF& BITBLTBUF)
|
void GSLocalMemory::WriteImageLeftRight(int l, int r, int y, int h, const u8* src, int srcpitch, const GIFRegBITBLTBUF& BITBLTBUF)
|
||||||
{
|
{
|
||||||
uint32 bp = BITBLTBUF.DBP;
|
u32 bp = BITBLTBUF.DBP;
|
||||||
uint32 bw = BITBLTBUF.DBW;
|
u32 bw = BITBLTBUF.DBW;
|
||||||
|
|
||||||
for (; h > 0; y++, h--, src += srcpitch)
|
for (; h > 0; y++, h--, src += srcpitch)
|
||||||
{
|
{
|
||||||
|
@ -595,14 +595,14 @@ void GSLocalMemory::WriteImageLeftRight(int l, int r, int y, int h, const uint8*
|
||||||
{
|
{
|
||||||
switch (psm)
|
switch (psm)
|
||||||
{
|
{
|
||||||
case PSM_PSMCT32: WritePixel32(x, y, *(uint32*)&src[x * 4], bp, bw); break;
|
case PSM_PSMCT32: WritePixel32(x, y, *(u32*)&src[x * 4], bp, bw); break;
|
||||||
case PSM_PSMCT16: WritePixel16(x, y, *(uint16*)&src[x * 2], bp, bw); break;
|
case PSM_PSMCT16: WritePixel16(x, y, *(u16*)&src[x * 2], bp, bw); break;
|
||||||
case PSM_PSMCT16S: WritePixel16S(x, y, *(uint16*)&src[x * 2], bp, bw); break;
|
case PSM_PSMCT16S: WritePixel16S(x, y, *(u16*)&src[x * 2], bp, bw); break;
|
||||||
case PSM_PSMT8: WritePixel8(x, y, src[x], bp, bw); break;
|
case PSM_PSMT8: WritePixel8(x, y, src[x], bp, bw); break;
|
||||||
case PSM_PSMT4: WritePixel4(x, y, src[x >> 1] >> ((x & 1) << 2), bp, bw); break;
|
case PSM_PSMT4: WritePixel4(x, y, src[x >> 1] >> ((x & 1) << 2), bp, bw); break;
|
||||||
case PSM_PSMZ32: WritePixel32Z(x, y, *(uint32*)&src[x * 4], bp, bw); break;
|
case PSM_PSMZ32: WritePixel32Z(x, y, *(u32*)&src[x * 4], bp, bw); break;
|
||||||
case PSM_PSMZ16: WritePixel16Z(x, y, *(uint16*)&src[x * 2], bp, bw); break;
|
case PSM_PSMZ16: WritePixel16Z(x, y, *(u16*)&src[x * 2], bp, bw); break;
|
||||||
case PSM_PSMZ16S: WritePixel16SZ(x, y, *(uint16*)&src[x * 2], bp, bw); break;
|
case PSM_PSMZ16S: WritePixel16SZ(x, y, *(u16*)&src[x * 2], bp, bw); break;
|
||||||
// TODO
|
// TODO
|
||||||
default: __assume(0);
|
default: __assume(0);
|
||||||
}
|
}
|
||||||
|
@ -611,12 +611,12 @@ void GSLocalMemory::WriteImageLeftRight(int l, int r, int y, int h, const uint8*
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int psm, int bsx, int bsy, int trbpp>
|
template <int psm, int bsx, int bsy, int trbpp>
|
||||||
void GSLocalMemory::WriteImageTopBottom(int l, int r, int y, int h, const uint8* src, int srcpitch, const GIFRegBITBLTBUF& BITBLTBUF)
|
void GSLocalMemory::WriteImageTopBottom(int l, int r, int y, int h, const u8* src, int srcpitch, const GIFRegBITBLTBUF& BITBLTBUF)
|
||||||
{
|
{
|
||||||
alignas(32) uint8 buff[64]; // merge buffer for one column
|
alignas(32) u8 buff[64]; // merge buffer for one column
|
||||||
|
|
||||||
uint32 bp = BITBLTBUF.DBP;
|
u32 bp = BITBLTBUF.DBP;
|
||||||
uint32 bw = BITBLTBUF.DBW;
|
u32 bw = BITBLTBUF.DBW;
|
||||||
|
|
||||||
const int csy = bsy / 4;
|
const int csy = bsy / 4;
|
||||||
|
|
||||||
|
@ -630,7 +630,7 @@ void GSLocalMemory::WriteImageTopBottom(int l, int r, int y, int h, const uint8*
|
||||||
|
|
||||||
for (int x = l; x < r; x += bsx)
|
for (int x = l; x < r; x += bsx)
|
||||||
{
|
{
|
||||||
uint8* dst = NULL;
|
u8* dst = NULL;
|
||||||
|
|
||||||
switch (psm)
|
switch (psm)
|
||||||
{
|
{
|
||||||
|
@ -719,7 +719,7 @@ void GSLocalMemory::WriteImageTopBottom(int l, int r, int y, int h, const uint8*
|
||||||
{
|
{
|
||||||
for (int x = l; x < r; x += bsx)
|
for (int x = l; x < r; x += bsx)
|
||||||
{
|
{
|
||||||
uint8* dst = NULL;
|
u8* dst = NULL;
|
||||||
|
|
||||||
switch (psm)
|
switch (psm)
|
||||||
{
|
{
|
||||||
|
@ -772,7 +772,7 @@ void GSLocalMemory::WriteImageTopBottom(int l, int r, int y, int h, const uint8*
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int psm, int bsx, int bsy, int trbpp>
|
template <int psm, int bsx, int bsy, int trbpp>
|
||||||
void GSLocalMemory::WriteImage(int& tx, int& ty, const uint8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG)
|
void GSLocalMemory::WriteImage(int& tx, int& ty, const u8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG)
|
||||||
{
|
{
|
||||||
if (TRXREG.RRW == 0)
|
if (TRXREG.RRW == 0)
|
||||||
return;
|
return;
|
||||||
|
@ -797,7 +797,7 @@ void GSLocalMemory::WriteImage(int& tx, int& ty, const uint8* src, int len, GIFR
|
||||||
|
|
||||||
if (ra - la >= bsx && h > 0) // "transfer width" >= "block width" && there is at least one full row
|
if (ra - la >= bsx && h > 0) // "transfer width" >= "block width" && there is at least one full row
|
||||||
{
|
{
|
||||||
const uint8* s = &src[-l * trbpp >> 3];
|
const u8* s = &src[-l * trbpp >> 3];
|
||||||
|
|
||||||
src += srcpitch * h;
|
src += srcpitch * h;
|
||||||
len -= srcpitch * h;
|
len -= srcpitch * h;
|
||||||
|
@ -890,13 +890,13 @@ static bool IsTopLeftAligned(int dsax, int tx, int ty, int bw, int bh)
|
||||||
return ((dsax & (bw - 1)) == 0 && (tx & (bw - 1)) == 0 && dsax == tx && (ty & (bh - 1)) == 0);
|
return ((dsax & (bw - 1)) == 0 && (tx & (bw - 1)) == 0 && dsax == tx && (ty & (bh - 1)) == 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSLocalMemory::WriteImage24(int& tx, int& ty, const uint8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG)
|
void GSLocalMemory::WriteImage24(int& tx, int& ty, const u8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG)
|
||||||
{
|
{
|
||||||
if (TRXREG.RRW == 0)
|
if (TRXREG.RRW == 0)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
uint32 bp = BITBLTBUF.DBP;
|
u32 bp = BITBLTBUF.DBP;
|
||||||
uint32 bw = BITBLTBUF.DBW;
|
u32 bw = BITBLTBUF.DBW;
|
||||||
|
|
||||||
int tw = TRXPOS.DSAX + TRXREG.RRW, srcpitch = TRXREG.RRW * 3;
|
int tw = TRXPOS.DSAX + TRXREG.RRW, srcpitch = TRXREG.RRW * 3;
|
||||||
int th = len / srcpitch;
|
int th = len / srcpitch;
|
||||||
|
@ -925,13 +925,13 @@ void GSLocalMemory::WriteImage24(int& tx, int& ty, const uint8* src, int len, GI
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSLocalMemory::WriteImage8H(int& tx, int& ty, const uint8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG)
|
void GSLocalMemory::WriteImage8H(int& tx, int& ty, const u8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG)
|
||||||
{
|
{
|
||||||
if (TRXREG.RRW == 0)
|
if (TRXREG.RRW == 0)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
uint32 bp = BITBLTBUF.DBP;
|
u32 bp = BITBLTBUF.DBP;
|
||||||
uint32 bw = BITBLTBUF.DBW;
|
u32 bw = BITBLTBUF.DBW;
|
||||||
|
|
||||||
int tw = TRXPOS.DSAX + TRXREG.RRW, srcpitch = TRXREG.RRW;
|
int tw = TRXPOS.DSAX + TRXREG.RRW, srcpitch = TRXREG.RRW;
|
||||||
int th = len / srcpitch;
|
int th = len / srcpitch;
|
||||||
|
@ -960,13 +960,13 @@ void GSLocalMemory::WriteImage8H(int& tx, int& ty, const uint8* src, int len, GI
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSLocalMemory::WriteImage4HL(int& tx, int& ty, const uint8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG)
|
void GSLocalMemory::WriteImage4HL(int& tx, int& ty, const u8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG)
|
||||||
{
|
{
|
||||||
if (TRXREG.RRW == 0)
|
if (TRXREG.RRW == 0)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
uint32 bp = BITBLTBUF.DBP;
|
u32 bp = BITBLTBUF.DBP;
|
||||||
uint32 bw = BITBLTBUF.DBW;
|
u32 bw = BITBLTBUF.DBW;
|
||||||
|
|
||||||
int tw = TRXPOS.DSAX + TRXREG.RRW, srcpitch = TRXREG.RRW / 2;
|
int tw = TRXPOS.DSAX + TRXREG.RRW, srcpitch = TRXREG.RRW / 2;
|
||||||
int th = len / srcpitch;
|
int th = len / srcpitch;
|
||||||
|
@ -995,13 +995,13 @@ void GSLocalMemory::WriteImage4HL(int& tx, int& ty, const uint8* src, int len, G
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSLocalMemory::WriteImage4HH(int& tx, int& ty, const uint8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG)
|
void GSLocalMemory::WriteImage4HH(int& tx, int& ty, const u8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG)
|
||||||
{
|
{
|
||||||
if (TRXREG.RRW == 0)
|
if (TRXREG.RRW == 0)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
uint32 bp = BITBLTBUF.DBP;
|
u32 bp = BITBLTBUF.DBP;
|
||||||
uint32 bw = BITBLTBUF.DBW;
|
u32 bw = BITBLTBUF.DBW;
|
||||||
|
|
||||||
int tw = TRXPOS.DSAX + TRXREG.RRW, srcpitch = TRXREG.RRW / 2;
|
int tw = TRXPOS.DSAX + TRXREG.RRW, srcpitch = TRXREG.RRW / 2;
|
||||||
int th = len / srcpitch;
|
int th = len / srcpitch;
|
||||||
|
@ -1030,13 +1030,13 @@ void GSLocalMemory::WriteImage4HH(int& tx, int& ty, const uint8* src, int len, G
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSLocalMemory::WriteImage24Z(int& tx, int& ty, const uint8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG)
|
void GSLocalMemory::WriteImage24Z(int& tx, int& ty, const u8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG)
|
||||||
{
|
{
|
||||||
if (TRXREG.RRW == 0)
|
if (TRXREG.RRW == 0)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
uint32 bp = BITBLTBUF.DBP;
|
u32 bp = BITBLTBUF.DBP;
|
||||||
uint32 bw = BITBLTBUF.DBW;
|
u32 bw = BITBLTBUF.DBW;
|
||||||
|
|
||||||
int tw = TRXPOS.DSAX + TRXREG.RRW, srcpitch = TRXREG.RRW * 3;
|
int tw = TRXPOS.DSAX + TRXREG.RRW, srcpitch = TRXREG.RRW * 3;
|
||||||
int th = len / srcpitch;
|
int th = len / srcpitch;
|
||||||
|
@ -1122,17 +1122,17 @@ static void readWriteHelper(VM* vm, int& tx, int& ty, int len, int xinc, int sx,
|
||||||
readWriteHelperImpl(tx, ty, len, xinc, sx, w, [&](int x, int y){ return off.paMulti(vm, x, y); }, std::forward<Fn>(fn));
|
readWriteHelperImpl(tx, ty, len, xinc, sx, w, [&](int x, int y){ return off.paMulti(vm, x, y); }, std::forward<Fn>(fn));
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG)
|
void GSLocalMemory::WriteImageX(int& tx, int& ty, const u8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG)
|
||||||
{
|
{
|
||||||
if (len <= 0)
|
if (len <= 0)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
const uint8* pb = (uint8*)src;
|
const u8* pb = (u8*)src;
|
||||||
const uint16* pw = (uint16*)src;
|
const u16* pw = (u16*)src;
|
||||||
const uint32* pd = (uint32*)src;
|
const u32* pd = (u32*)src;
|
||||||
|
|
||||||
uint32 bp = BITBLTBUF.DBP;
|
u32 bp = BITBLTBUF.DBP;
|
||||||
uint32 bw = BITBLTBUF.DBW;
|
u32 bw = BITBLTBUF.DBW;
|
||||||
|
|
||||||
int sx = TRXPOS.DSAX;
|
int sx = TRXPOS.DSAX;
|
||||||
int w = TRXREG.RRW;
|
int w = TRXREG.RRW;
|
||||||
|
@ -1154,7 +1154,7 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
|
||||||
case PSM_PSMZ24:
|
case PSM_PSMZ24:
|
||||||
readWriteHelper(m_vm32, tx, ty, len / 3, 1, sx, w, off.assertSizesMatch(swizzle32), [&](auto& pa, int x)
|
readWriteHelper(m_vm32, tx, ty, len / 3, 1, sx, w, off.assertSizesMatch(swizzle32), [&](auto& pa, int x)
|
||||||
{
|
{
|
||||||
WritePixel24(pa.value(x), *(uint32*)pb);
|
WritePixel24(pa.value(x), *(u32*)pb);
|
||||||
pb += 3;
|
pb += 3;
|
||||||
});
|
});
|
||||||
break;
|
break;
|
||||||
|
@ -1217,17 +1217,17 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF
|
||||||
|
|
||||||
//
|
//
|
||||||
|
|
||||||
void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG) const
|
void GSLocalMemory::ReadImageX(int& tx, int& ty, u8* dst, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG) const
|
||||||
{
|
{
|
||||||
if (len <= 0)
|
if (len <= 0)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
uint8* RESTRICT pb = (uint8*)dst;
|
u8* RESTRICT pb = (u8*)dst;
|
||||||
uint16* RESTRICT pw = (uint16*)dst;
|
u16* RESTRICT pw = (u16*)dst;
|
||||||
uint32* RESTRICT pd = (uint32*)dst;
|
u32* RESTRICT pd = (u32*)dst;
|
||||||
|
|
||||||
uint32 bp = BITBLTBUF.SBP;
|
u32 bp = BITBLTBUF.SBP;
|
||||||
uint32 bw = BITBLTBUF.SBW;
|
u32 bw = BITBLTBUF.SBW;
|
||||||
|
|
||||||
int sx = TRXPOS.SSAX;
|
int sx = TRXPOS.SSAX;
|
||||||
int w = TRXREG.RRW;
|
int w = TRXREG.RRW;
|
||||||
|
@ -1262,7 +1262,7 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
|
||||||
|
|
||||||
for (int ex8 = ex - 8; len >= 8 && x <= ex8; len -= 8, x += 8, pd += 8)
|
for (int ex8 = ex - 8; len >= 8 && x <= ex8; len -= 8, x += 8, pd += 8)
|
||||||
{
|
{
|
||||||
uint32* ps = pa.value(x);
|
u32* ps = pa.value(x);
|
||||||
|
|
||||||
GSVector4i::store<false>(&pd[0], GSVector4i::load(ps + 0, ps + 4));
|
GSVector4i::store<false>(&pd[0], GSVector4i::load(ps + 0, ps + 4));
|
||||||
GSVector4i::store<false>(&pd[4], GSVector4i::load(ps + 8, ps + 12));
|
GSVector4i::store<false>(&pd[4], GSVector4i::load(ps + 8, ps + 12));
|
||||||
|
@ -1293,10 +1293,10 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
|
||||||
case PSM_PSMZ24:
|
case PSM_PSMZ24:
|
||||||
readWriteHelper(m_vm32, tx, ty, len / 3, 1, sx, w, off.assertSizesMatch(swizzle32), [&](auto& pa, int x)
|
readWriteHelper(m_vm32, tx, ty, len / 3, 1, sx, w, off.assertSizesMatch(swizzle32), [&](auto& pa, int x)
|
||||||
{
|
{
|
||||||
uint32 c = *pa.value(x);
|
u32 c = *pa.value(x);
|
||||||
pb[0] = (uint8)(c);
|
pb[0] = (u8)(c);
|
||||||
pb[1] = (uint8)(c >> 8);
|
pb[1] = (u8)(c >> 8);
|
||||||
pb[2] = (uint8)(c >> 16);
|
pb[2] = (u8)(c >> 16);
|
||||||
pb += 3;
|
pb += 3;
|
||||||
});
|
});
|
||||||
break;
|
break;
|
||||||
|
@ -1323,8 +1323,8 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
|
||||||
case PSM_PSMT4:
|
case PSM_PSMT4:
|
||||||
readWriteHelper(tx, ty, len * 2, 2, sx, w, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT4), [&](GSOffset::PAHelper& pa, int x)
|
readWriteHelper(tx, ty, len * 2, 2, sx, w, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT4), [&](GSOffset::PAHelper& pa, int x)
|
||||||
{
|
{
|
||||||
uint8 low = ReadPixel4(pa.value(x));
|
u8 low = ReadPixel4(pa.value(x));
|
||||||
uint8 high = ReadPixel4(pa.value(x + 1));
|
u8 high = ReadPixel4(pa.value(x + 1));
|
||||||
*pb = low | (high << 4);
|
*pb = low | (high << 4);
|
||||||
});
|
});
|
||||||
break;
|
break;
|
||||||
|
@ -1332,7 +1332,7 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
|
||||||
case PSM_PSMT8H:
|
case PSM_PSMT8H:
|
||||||
readWriteHelper(m_vm32, tx, ty, len, 1, sx, w, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT8H), [&](auto& pa, int x)
|
readWriteHelper(m_vm32, tx, ty, len, 1, sx, w, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT8H), [&](auto& pa, int x)
|
||||||
{
|
{
|
||||||
*pb = (uint8)(*pa.value(x) >> 24);
|
*pb = (u8)(*pa.value(x) >> 24);
|
||||||
pb++;
|
pb++;
|
||||||
});
|
});
|
||||||
break;
|
break;
|
||||||
|
@ -1340,9 +1340,9 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
|
||||||
case PSM_PSMT4HL:
|
case PSM_PSMT4HL:
|
||||||
readWriteHelper(m_vm32, tx, ty, len * 2, 2, sx, w, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT4HL), [&](auto& pa, int x)
|
readWriteHelper(m_vm32, tx, ty, len * 2, 2, sx, w, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT4HL), [&](auto& pa, int x)
|
||||||
{
|
{
|
||||||
uint32 c0 = *pa.value(x) >> 24 & 0x0f;
|
u32 c0 = *pa.value(x) >> 24 & 0x0f;
|
||||||
uint32 c1 = *pa.value(x + 1) >> 20 & 0xf0;
|
u32 c1 = *pa.value(x + 1) >> 20 & 0xf0;
|
||||||
*pb = (uint8)(c0 | c1);
|
*pb = (u8)(c0 | c1);
|
||||||
pb++;
|
pb++;
|
||||||
});
|
});
|
||||||
break;
|
break;
|
||||||
|
@ -1350,9 +1350,9 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
|
||||||
case PSM_PSMT4HH:
|
case PSM_PSMT4HH:
|
||||||
readWriteHelper(m_vm32, tx, ty, len * 2, 2, sx, w, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT4HH), [&](auto& pa, int x)
|
readWriteHelper(m_vm32, tx, ty, len * 2, 2, sx, w, GSOffset::fromKnownPSM(bp, bw, PSM_PSMT4HH), [&](auto& pa, int x)
|
||||||
{
|
{
|
||||||
uint32 c0 = *pa.value(x) >> 28 & 0x0f;
|
u32 c0 = *pa.value(x) >> 28 & 0x0f;
|
||||||
uint32 c1 = *pa.value(x + 1) >> 24 & 0xf0;
|
u32 c1 = *pa.value(x + 1) >> 24 & 0xf0;
|
||||||
*pb = (uint8)(c0 | c1);
|
*pb = (u8)(c0 | c1);
|
||||||
pb++;
|
pb++;
|
||||||
});
|
});
|
||||||
break;
|
break;
|
||||||
|
@ -1361,35 +1361,35 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, uint8* dst, int len, GIFRegBITB
|
||||||
|
|
||||||
///////////////////
|
///////////////////
|
||||||
|
|
||||||
void GSLocalMemory::ReadTexture32(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
void GSLocalMemory::ReadTexture32(const GSOffset& off, const GSVector4i& r, u8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||||
{
|
{
|
||||||
foreachBlock(off.assertSizesMatch(swizzle32), this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
|
foreachBlock(off.assertSizesMatch(swizzle32), this, r, dst, dstpitch, 32, [&](u8* read_dst, const u8* src)
|
||||||
{
|
{
|
||||||
GSBlock::ReadBlock32(src, read_dst, dstpitch);
|
GSBlock::ReadBlock32(src, read_dst, dstpitch);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSLocalMemory::ReadTexture24(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
void GSLocalMemory::ReadTexture24(const GSOffset& off, const GSVector4i& r, u8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||||
{
|
{
|
||||||
if (TEXA.AEM)
|
if (TEXA.AEM)
|
||||||
{
|
{
|
||||||
foreachBlock(off.assertSizesMatch(swizzle32), this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
|
foreachBlock(off.assertSizesMatch(swizzle32), this, r, dst, dstpitch, 32, [&](u8* read_dst, const u8* src)
|
||||||
{
|
{
|
||||||
GSBlock::ReadAndExpandBlock24<true>(src, read_dst, dstpitch, TEXA);
|
GSBlock::ReadAndExpandBlock24<true>(src, read_dst, dstpitch, TEXA);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
foreachBlock(off.assertSizesMatch(swizzle32), this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
|
foreachBlock(off.assertSizesMatch(swizzle32), this, r, dst, dstpitch, 32, [&](u8* read_dst, const u8* src)
|
||||||
{
|
{
|
||||||
GSBlock::ReadAndExpandBlock24<false>(src, read_dst, dstpitch, TEXA);
|
GSBlock::ReadAndExpandBlock24<false>(src, read_dst, dstpitch, TEXA);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSLocalMemory::ReadTextureGPU24(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
void GSLocalMemory::ReadTextureGPU24(const GSOffset& off, const GSVector4i& r, u8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||||
{
|
{
|
||||||
foreachBlock(off.assertSizesMatch(swizzle16), this, r, dst, dstpitch, 16, [&](uint8* read_dst, const uint8* src)
|
foreachBlock(off.assertSizesMatch(swizzle16), this, r, dst, dstpitch, 16, [&](u8* read_dst, const u8* src)
|
||||||
{
|
{
|
||||||
GSBlock::ReadBlock16(src, read_dst, dstpitch);
|
GSBlock::ReadBlock16(src, read_dst, dstpitch);
|
||||||
});
|
});
|
||||||
|
@ -1398,78 +1398,78 @@ void GSLocalMemory::ReadTextureGPU24(const GSOffset& off, const GSVector4i& r, u
|
||||||
ASSERT(dstpitch >= r.width() * 4);
|
ASSERT(dstpitch >= r.width() * 4);
|
||||||
for (int y = r.top; y < r.bottom; y++)
|
for (int y = r.top; y < r.bottom; y++)
|
||||||
{
|
{
|
||||||
uint8* line = dst + y * dstpitch;
|
u8* line = dst + y * dstpitch;
|
||||||
|
|
||||||
for (int x = r.right; x >= r.left; x--)
|
for (int x = r.right; x >= r.left; x--)
|
||||||
{
|
{
|
||||||
*(uint32*)&line[x * 4] = *(uint32*)&line[x * 3] & 0xFFFFFF;
|
*(u32*)&line[x * 4] = *(u32*)&line[x * 3] & 0xFFFFFF;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSLocalMemory::ReadTexture16(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
void GSLocalMemory::ReadTexture16(const GSOffset& off, const GSVector4i& r, u8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||||
{
|
{
|
||||||
if (TEXA.AEM)
|
if (TEXA.AEM)
|
||||||
{
|
{
|
||||||
foreachBlock(off.assertSizesMatch(swizzle16), this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
|
foreachBlock(off.assertSizesMatch(swizzle16), this, r, dst, dstpitch, 32, [&](u8* read_dst, const u8* src)
|
||||||
{
|
{
|
||||||
GSBlock::ReadAndExpandBlock16<true>(src, read_dst, dstpitch, TEXA);
|
GSBlock::ReadAndExpandBlock16<true>(src, read_dst, dstpitch, TEXA);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
foreachBlock(off.assertSizesMatch(swizzle16), this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
|
foreachBlock(off.assertSizesMatch(swizzle16), this, r, dst, dstpitch, 32, [&](u8* read_dst, const u8* src)
|
||||||
{
|
{
|
||||||
GSBlock::ReadAndExpandBlock16<false>(src, read_dst, dstpitch, TEXA);
|
GSBlock::ReadAndExpandBlock16<false>(src, read_dst, dstpitch, TEXA);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSLocalMemory::ReadTexture8(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
void GSLocalMemory::ReadTexture8(const GSOffset& off, const GSVector4i& r, u8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||||
{
|
{
|
||||||
const uint32* pal = m_clut;
|
const u32* pal = m_clut;
|
||||||
|
|
||||||
foreachBlock(off.assertSizesMatch(swizzle8), this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
|
foreachBlock(off.assertSizesMatch(swizzle8), this, r, dst, dstpitch, 32, [&](u8* read_dst, const u8* src)
|
||||||
{
|
{
|
||||||
GSBlock::ReadAndExpandBlock8_32(src, read_dst, dstpitch, pal);
|
GSBlock::ReadAndExpandBlock8_32(src, read_dst, dstpitch, pal);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSLocalMemory::ReadTexture4(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
void GSLocalMemory::ReadTexture4(const GSOffset& off, const GSVector4i& r, u8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||||
{
|
{
|
||||||
const uint64* pal = m_clut;
|
const u64* pal = m_clut;
|
||||||
|
|
||||||
foreachBlock(off.assertSizesMatch(swizzle4), this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
|
foreachBlock(off.assertSizesMatch(swizzle4), this, r, dst, dstpitch, 32, [&](u8* read_dst, const u8* src)
|
||||||
{
|
{
|
||||||
GSBlock::ReadAndExpandBlock4_32(src, read_dst, dstpitch, pal);
|
GSBlock::ReadAndExpandBlock4_32(src, read_dst, dstpitch, pal);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSLocalMemory::ReadTexture8H(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
void GSLocalMemory::ReadTexture8H(const GSOffset& off, const GSVector4i& r, u8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||||
{
|
{
|
||||||
const uint32* pal = m_clut;
|
const u32* pal = m_clut;
|
||||||
|
|
||||||
foreachBlock(off.assertSizesMatch(swizzle32), this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
|
foreachBlock(off.assertSizesMatch(swizzle32), this, r, dst, dstpitch, 32, [&](u8* read_dst, const u8* src)
|
||||||
{
|
{
|
||||||
GSBlock::ReadAndExpandBlock8H_32(src, read_dst, dstpitch, pal);
|
GSBlock::ReadAndExpandBlock8H_32(src, read_dst, dstpitch, pal);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSLocalMemory::ReadTexture4HL(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
void GSLocalMemory::ReadTexture4HL(const GSOffset& off, const GSVector4i& r, u8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||||
{
|
{
|
||||||
const uint32* pal = m_clut;
|
const u32* pal = m_clut;
|
||||||
|
|
||||||
foreachBlock(off.assertSizesMatch(swizzle32), this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
|
foreachBlock(off.assertSizesMatch(swizzle32), this, r, dst, dstpitch, 32, [&](u8* read_dst, const u8* src)
|
||||||
{
|
{
|
||||||
GSBlock::ReadAndExpandBlock4HL_32(src, read_dst, dstpitch, pal);
|
GSBlock::ReadAndExpandBlock4HL_32(src, read_dst, dstpitch, pal);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSLocalMemory::ReadTexture4HH(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
void GSLocalMemory::ReadTexture4HH(const GSOffset& off, const GSVector4i& r, u8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||||
{
|
{
|
||||||
const uint32* pal = m_clut;
|
const u32* pal = m_clut;
|
||||||
|
|
||||||
foreachBlock(off.assertSizesMatch(swizzle32), this, r, dst, dstpitch, 32, [&](uint8* read_dst, const uint8* src)
|
foreachBlock(off.assertSizesMatch(swizzle32), this, r, dst, dstpitch, 32, [&](u8* read_dst, const u8* src)
|
||||||
{
|
{
|
||||||
GSBlock::ReadAndExpandBlock4HH_32(src, read_dst, dstpitch, pal);
|
GSBlock::ReadAndExpandBlock4HH_32(src, read_dst, dstpitch, pal);
|
||||||
});
|
});
|
||||||
|
@ -1477,14 +1477,14 @@ void GSLocalMemory::ReadTexture4HH(const GSOffset& off, const GSVector4i& r, uin
|
||||||
|
|
||||||
///////////////////
|
///////////////////
|
||||||
|
|
||||||
void GSLocalMemory::ReadTextureBlock32(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
void GSLocalMemory::ReadTextureBlock32(u32 bp, u8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||||
{
|
{
|
||||||
ALIGN_STACK(32);
|
ALIGN_STACK(32);
|
||||||
|
|
||||||
GSBlock::ReadBlock32(BlockPtr(bp), dst, dstpitch);
|
GSBlock::ReadBlock32(BlockPtr(bp), dst, dstpitch);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSLocalMemory::ReadTextureBlock24(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
void GSLocalMemory::ReadTextureBlock24(u32 bp, u8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||||
{
|
{
|
||||||
ALIGN_STACK(32);
|
ALIGN_STACK(32);
|
||||||
|
|
||||||
|
@ -1498,7 +1498,7 @@ void GSLocalMemory::ReadTextureBlock24(uint32 bp, uint8* dst, int dstpitch, cons
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSLocalMemory::ReadTextureBlock16(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
void GSLocalMemory::ReadTextureBlock16(u32 bp, u8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||||
{
|
{
|
||||||
ALIGN_STACK(32);
|
ALIGN_STACK(32);
|
||||||
|
|
||||||
|
@ -1512,35 +1512,35 @@ void GSLocalMemory::ReadTextureBlock16(uint32 bp, uint8* dst, int dstpitch, cons
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSLocalMemory::ReadTextureBlock8(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
void GSLocalMemory::ReadTextureBlock8(u32 bp, u8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||||
{
|
{
|
||||||
ALIGN_STACK(32);
|
ALIGN_STACK(32);
|
||||||
|
|
||||||
GSBlock::ReadAndExpandBlock8_32(BlockPtr(bp), dst, dstpitch, m_clut);
|
GSBlock::ReadAndExpandBlock8_32(BlockPtr(bp), dst, dstpitch, m_clut);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSLocalMemory::ReadTextureBlock4(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
void GSLocalMemory::ReadTextureBlock4(u32 bp, u8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||||
{
|
{
|
||||||
ALIGN_STACK(32);
|
ALIGN_STACK(32);
|
||||||
|
|
||||||
GSBlock::ReadAndExpandBlock4_32(BlockPtr(bp), dst, dstpitch, m_clut);
|
GSBlock::ReadAndExpandBlock4_32(BlockPtr(bp), dst, dstpitch, m_clut);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSLocalMemory::ReadTextureBlock8H(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
void GSLocalMemory::ReadTextureBlock8H(u32 bp, u8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||||
{
|
{
|
||||||
ALIGN_STACK(32);
|
ALIGN_STACK(32);
|
||||||
|
|
||||||
GSBlock::ReadAndExpandBlock8H_32(BlockPtr(bp), dst, dstpitch, m_clut);
|
GSBlock::ReadAndExpandBlock8H_32(BlockPtr(bp), dst, dstpitch, m_clut);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSLocalMemory::ReadTextureBlock4HL(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
void GSLocalMemory::ReadTextureBlock4HL(u32 bp, u8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||||
{
|
{
|
||||||
ALIGN_STACK(32);
|
ALIGN_STACK(32);
|
||||||
|
|
||||||
GSBlock::ReadAndExpandBlock4HL_32(BlockPtr(bp), dst, dstpitch, m_clut);
|
GSBlock::ReadAndExpandBlock4HL_32(BlockPtr(bp), dst, dstpitch, m_clut);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSLocalMemory::ReadTextureBlock4HH(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
void GSLocalMemory::ReadTextureBlock4HH(u32 bp, u8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||||
{
|
{
|
||||||
ALIGN_STACK(32);
|
ALIGN_STACK(32);
|
||||||
|
|
||||||
|
@ -1549,7 +1549,7 @@ void GSLocalMemory::ReadTextureBlock4HH(uint32 bp, uint8* dst, int dstpitch, con
|
||||||
|
|
||||||
///////////////////
|
///////////////////
|
||||||
|
|
||||||
void GSLocalMemory::ReadTexture(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
void GSLocalMemory::ReadTexture(const GSOffset& off, const GSVector4i& r, u8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||||
{
|
{
|
||||||
const psm_t& psm = m_psm[off.psm()];
|
const psm_t& psm = m_psm[off.psm()];
|
||||||
|
|
||||||
|
@ -1566,7 +1566,7 @@ void GSLocalMemory::ReadTexture(const GSOffset& off, const GSVector4i& r, uint8*
|
||||||
|
|
||||||
GSVector4i cr = r.ralign<Align_Inside>(psm.bs);
|
GSVector4i cr = r.ralign<Align_Inside>(psm.bs);
|
||||||
|
|
||||||
bool aligned = ((size_t)(dst + (cr.left - r.left) * sizeof(uint32)) & 0xf) == 0;
|
bool aligned = ((size_t)(dst + (cr.left - r.left) * sizeof(u32)) & 0xf) == 0;
|
||||||
|
|
||||||
if (cr.rempty() || !aligned)
|
if (cr.rempty() || !aligned)
|
||||||
{
|
{
|
||||||
|
@ -1579,7 +1579,7 @@ void GSLocalMemory::ReadTexture(const GSOffset& off, const GSVector4i& r, uint8*
|
||||||
{
|
{
|
||||||
for (int x = r.left, i = 0; x < r.right; x++, i++)
|
for (int x = r.left, i = 0; x < r.right; x++, i++)
|
||||||
{
|
{
|
||||||
((uint32*)dst)[i] = (this->*rt)(x, y, TEX0, TEXA);
|
((u32*)dst)[i] = (this->*rt)(x, y, TEX0, TEXA);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1589,7 +1589,7 @@ void GSLocalMemory::ReadTexture(const GSOffset& off, const GSVector4i& r, uint8*
|
||||||
{
|
{
|
||||||
for (int x = r.left, i = 0; x < r.right; x++, i++)
|
for (int x = r.left, i = 0; x < r.right; x++, i++)
|
||||||
{
|
{
|
||||||
((uint32*)dst)[i] = (this->*rt)(x, y, TEX0, TEXA);
|
((u32*)dst)[i] = (this->*rt)(x, y, TEX0, TEXA);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1597,7 +1597,7 @@ void GSLocalMemory::ReadTexture(const GSOffset& off, const GSVector4i& r, uint8*
|
||||||
{
|
{
|
||||||
for (int x = r.left, i = 0; x < r.right; x++, i++)
|
for (int x = r.left, i = 0; x < r.right; x++, i++)
|
||||||
{
|
{
|
||||||
((uint32*)dst)[i] = (this->*rt)(x, y, TEX0, TEXA);
|
((u32*)dst)[i] = (this->*rt)(x, y, TEX0, TEXA);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1605,18 +1605,18 @@ void GSLocalMemory::ReadTexture(const GSOffset& off, const GSVector4i& r, uint8*
|
||||||
{
|
{
|
||||||
for (int x = r.left, i = 0; x < cr.left; x++, i++)
|
for (int x = r.left, i = 0; x < cr.left; x++, i++)
|
||||||
{
|
{
|
||||||
((uint32*)dst)[i] = (this->*rt)(x, y, TEX0, TEXA);
|
((u32*)dst)[i] = (this->*rt)(x, y, TEX0, TEXA);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int x = cr.right, i = x - r.left; x < r.right; x++, i++)
|
for (int x = cr.right, i = x - r.left; x < r.right; x++, i++)
|
||||||
{
|
{
|
||||||
((uint32*)dst)[i] = (this->*rt)(x, y, TEX0, TEXA);
|
((u32*)dst)[i] = (this->*rt)(x, y, TEX0, TEXA);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!cr.rempty())
|
if (!cr.rempty())
|
||||||
{
|
{
|
||||||
(this->*rtx)(off, cr, dst + (cr.left - r.left) * sizeof(uint32), dstpitch, TEXA);
|
(this->*rtx)(off, cr, dst + (cr.left - r.left) * sizeof(u32), dstpitch, TEXA);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1628,41 +1628,41 @@ void GSLocalMemory::ReadTexture(const GSOffset& off, const GSVector4i& r, uint8*
|
||||||
|
|
||||||
// 32/8
|
// 32/8
|
||||||
|
|
||||||
void GSLocalMemory::ReadTexture8P(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
void GSLocalMemory::ReadTexture8P(const GSOffset& off, const GSVector4i& r, u8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||||
{
|
{
|
||||||
foreachBlock(off.assertSizesMatch(swizzle8), this, r, dst, dstpitch, 8, [&](uint8* read_dst, const uint8* src)
|
foreachBlock(off.assertSizesMatch(swizzle8), this, r, dst, dstpitch, 8, [&](u8* read_dst, const u8* src)
|
||||||
{
|
{
|
||||||
GSBlock::ReadBlock8(src, read_dst, dstpitch);
|
GSBlock::ReadBlock8(src, read_dst, dstpitch);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSLocalMemory::ReadTexture4P(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
void GSLocalMemory::ReadTexture4P(const GSOffset& off, const GSVector4i& r, u8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||||
{
|
{
|
||||||
foreachBlock(off.assertSizesMatch(swizzle4), this, r, dst, dstpitch, 8, [&](uint8* read_dst, const uint8* src)
|
foreachBlock(off.assertSizesMatch(swizzle4), this, r, dst, dstpitch, 8, [&](u8* read_dst, const u8* src)
|
||||||
{
|
{
|
||||||
GSBlock::ReadBlock4P(src, read_dst, dstpitch);
|
GSBlock::ReadBlock4P(src, read_dst, dstpitch);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSLocalMemory::ReadTexture8HP(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
void GSLocalMemory::ReadTexture8HP(const GSOffset& off, const GSVector4i& r, u8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||||
{
|
{
|
||||||
foreachBlock(off.assertSizesMatch(swizzle32), this, r, dst, dstpitch, 8, [&](uint8* read_dst, const uint8* src)
|
foreachBlock(off.assertSizesMatch(swizzle32), this, r, dst, dstpitch, 8, [&](u8* read_dst, const u8* src)
|
||||||
{
|
{
|
||||||
GSBlock::ReadBlock8HP(src, read_dst, dstpitch);
|
GSBlock::ReadBlock8HP(src, read_dst, dstpitch);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSLocalMemory::ReadTexture4HLP(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
void GSLocalMemory::ReadTexture4HLP(const GSOffset& off, const GSVector4i& r, u8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||||
{
|
{
|
||||||
foreachBlock(off.assertSizesMatch(swizzle32), this, r, dst, dstpitch, 8, [&](uint8* read_dst, const uint8* src)
|
foreachBlock(off.assertSizesMatch(swizzle32), this, r, dst, dstpitch, 8, [&](u8* read_dst, const u8* src)
|
||||||
{
|
{
|
||||||
GSBlock::ReadBlock4HLP(src, read_dst, dstpitch);
|
GSBlock::ReadBlock4HLP(src, read_dst, dstpitch);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSLocalMemory::ReadTexture4HHP(const GSOffset& off, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
void GSLocalMemory::ReadTexture4HHP(const GSOffset& off, const GSVector4i& r, u8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||||
{
|
{
|
||||||
foreachBlock(off.assertSizesMatch(swizzle32), this, r, dst, dstpitch, 8, [&](uint8* read_dst, const uint8* src)
|
foreachBlock(off.assertSizesMatch(swizzle32), this, r, dst, dstpitch, 8, [&](u8* read_dst, const u8* src)
|
||||||
{
|
{
|
||||||
GSBlock::ReadBlock4HHP(src, read_dst, dstpitch);
|
GSBlock::ReadBlock4HHP(src, read_dst, dstpitch);
|
||||||
});
|
});
|
||||||
|
@ -1670,33 +1670,33 @@ void GSLocalMemory::ReadTexture4HHP(const GSOffset& off, const GSVector4i& r, ui
|
||||||
|
|
||||||
//
|
//
|
||||||
|
|
||||||
void GSLocalMemory::ReadTextureBlock8P(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
void GSLocalMemory::ReadTextureBlock8P(u32 bp, u8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||||
{
|
{
|
||||||
GSBlock::ReadBlock8(BlockPtr(bp), dst, dstpitch);
|
GSBlock::ReadBlock8(BlockPtr(bp), dst, dstpitch);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSLocalMemory::ReadTextureBlock4P(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
void GSLocalMemory::ReadTextureBlock4P(u32 bp, u8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||||
{
|
{
|
||||||
ALIGN_STACK(32);
|
ALIGN_STACK(32);
|
||||||
|
|
||||||
GSBlock::ReadBlock4P(BlockPtr(bp), dst, dstpitch);
|
GSBlock::ReadBlock4P(BlockPtr(bp), dst, dstpitch);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSLocalMemory::ReadTextureBlock8HP(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
void GSLocalMemory::ReadTextureBlock8HP(u32 bp, u8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||||
{
|
{
|
||||||
ALIGN_STACK(32);
|
ALIGN_STACK(32);
|
||||||
|
|
||||||
GSBlock::ReadBlock8HP(BlockPtr(bp), dst, dstpitch);
|
GSBlock::ReadBlock8HP(BlockPtr(bp), dst, dstpitch);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSLocalMemory::ReadTextureBlock4HLP(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
void GSLocalMemory::ReadTextureBlock4HLP(u32 bp, u8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||||
{
|
{
|
||||||
ALIGN_STACK(32);
|
ALIGN_STACK(32);
|
||||||
|
|
||||||
GSBlock::ReadBlock4HLP(BlockPtr(bp), dst, dstpitch);
|
GSBlock::ReadBlock4HLP(BlockPtr(bp), dst, dstpitch);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSLocalMemory::ReadTextureBlock4HHP(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
void GSLocalMemory::ReadTextureBlock4HHP(u32 bp, u8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||||
{
|
{
|
||||||
ALIGN_STACK(32);
|
ALIGN_STACK(32);
|
||||||
|
|
||||||
|
@ -1707,7 +1707,7 @@ void GSLocalMemory::ReadTextureBlock4HHP(uint32 bp, uint8* dst, int dstpitch, co
|
||||||
|
|
||||||
#include "Renderers/SW/GSTextureSW.h"
|
#include "Renderers/SW/GSTextureSW.h"
|
||||||
|
|
||||||
void GSLocalMemory::SaveBMP(const std::string& fn, uint32 bp, uint32 bw, uint32 psm, int w, int h)
|
void GSLocalMemory::SaveBMP(const std::string& fn, u32 bp, u32 bw, u32 psm, int w, int h)
|
||||||
{
|
{
|
||||||
int pitch = w * 4;
|
int pitch = w * 4;
|
||||||
int size = pitch * h;
|
int size = pitch * h;
|
||||||
|
@ -1721,13 +1721,13 @@ void GSLocalMemory::SaveBMP(const std::string& fn, uint32 bp, uint32 bw, uint32
|
||||||
|
|
||||||
readPixel rp = m_psm[psm].rp;
|
readPixel rp = m_psm[psm].rp;
|
||||||
|
|
||||||
uint8* p = (uint8*)bits;
|
u8* p = (u8*)bits;
|
||||||
|
|
||||||
for (int j = 0; j < h; j++, p += pitch)
|
for (int j = 0; j < h; j++, p += pitch)
|
||||||
{
|
{
|
||||||
for (int i = 0; i < w; i++)
|
for (int i = 0; i < w; i++)
|
||||||
{
|
{
|
||||||
((uint32*)p)[i] = (this->*rp)(i, j, TEX0.TBP0, TEX0.TBW);
|
((u32*)p)[i] = (this->*rp)(i, j, TEX0.TBP0, TEX0.TBW);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -40,7 +40,7 @@ void GSPerfMon::Put(counter_t c, double val)
|
||||||
# else
|
# else
|
||||||
clock_gettime(CLOCK_MONOTONIC, &ts);
|
clock_gettime(CLOCK_MONOTONIC, &ts);
|
||||||
# endif
|
# endif
|
||||||
uint64 now = (uint64)ts.tv_sec * (uint64)1e6 + (uint64)ts.tv_nsec / (uint64)1e3;
|
u64 now = (u64)ts.tv_sec * (u64)1e6 + (u64)ts.tv_nsec / (u64)1e3;
|
||||||
#else
|
#else
|
||||||
clock_t now = clock();
|
clock_t now = clock();
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -45,8 +45,8 @@ public:
|
||||||
protected:
|
protected:
|
||||||
double m_counters[CounterLast];
|
double m_counters[CounterLast];
|
||||||
double m_stats[CounterLast];
|
double m_stats[CounterLast];
|
||||||
uint64 m_begin[TimerLast], m_total[TimerLast], m_start[TimerLast];
|
u64 m_begin[TimerLast], m_total[TimerLast], m_start[TimerLast];
|
||||||
uint64 m_frame;
|
u64 m_frame;
|
||||||
clock_t m_lastframe;
|
clock_t m_lastframe;
|
||||||
int m_count;
|
int m_count;
|
||||||
|
|
||||||
|
@ -55,8 +55,8 @@ protected:
|
||||||
public:
|
public:
|
||||||
GSPerfMon();
|
GSPerfMon();
|
||||||
|
|
||||||
void SetFrame(uint64 frame) { m_frame = frame; }
|
void SetFrame(u64 frame) { m_frame = frame; }
|
||||||
uint64 GetFrame() { return m_frame; }
|
u64 GetFrame() { return m_frame; }
|
||||||
|
|
||||||
void Put(counter_t c, double val = 0);
|
void Put(counter_t c, double val = 0);
|
||||||
double Get(counter_t c) { return m_stats[c]; }
|
double Get(counter_t c) { return m_stats[c]; }
|
||||||
|
|
|
@ -38,8 +38,8 @@ struct
|
||||||
namespace GSPng
|
namespace GSPng
|
||||||
{
|
{
|
||||||
|
|
||||||
bool SaveFile(const std::string& file, const Format fmt, const uint8* const image,
|
bool SaveFile(const std::string& file, const Format fmt, const u8* const image,
|
||||||
uint8* const row, const int width, const int height, const int pitch,
|
u8* const row, const int width, const int height, const int pitch,
|
||||||
const int compression, const bool rb_swapped = false, const bool first_image = false)
|
const int compression, const bool rb_swapped = false, const bool first_image = false)
|
||||||
{
|
{
|
||||||
const int channel_bit_depth = pixel[fmt].channel_bit_depth;
|
const int channel_bit_depth = pixel[fmt].channel_bit_depth;
|
||||||
|
@ -105,7 +105,7 @@ namespace GSPng
|
||||||
return success;
|
return success;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Save(GSPng::Format fmt, const std::string& file, uint8* image, int w, int h, int pitch, int compression, bool rb_swapped)
|
bool Save(GSPng::Format fmt, const std::string& file, u8* image, int w, int h, int pitch, int compression, bool rb_swapped)
|
||||||
{
|
{
|
||||||
std::string root = file;
|
std::string root = file;
|
||||||
root.replace(file.length() - 4, 4, "");
|
root.replace(file.length() - 4, 4, "");
|
||||||
|
@ -115,7 +115,7 @@ namespace GSPng
|
||||||
if (compression < 0 || compression > Z_BEST_COMPRESSION)
|
if (compression < 0 || compression > Z_BEST_COMPRESSION)
|
||||||
compression = Z_BEST_SPEED;
|
compression = Z_BEST_SPEED;
|
||||||
|
|
||||||
std::unique_ptr<uint8[]> row(new uint8[pixel[fmt].bytes_per_pixel_out * w]);
|
std::unique_ptr<u8[]> row(new u8[pixel[fmt].bytes_per_pixel_out * w]);
|
||||||
|
|
||||||
std::string filename = root + pixel[fmt].extension[0];
|
std::string filename = root + pixel[fmt].extension[0];
|
||||||
if (!SaveFile(filename, fmt, image, row.get(), w, h, pitch, compression, rb_swapped, true))
|
if (!SaveFile(filename, fmt, image, row.get(), w, h, pitch, compression, rb_swapped, true))
|
||||||
|
@ -129,11 +129,11 @@ namespace GSPng
|
||||||
return SaveFile(filename, fmt, image, row.get(), w, h, pitch, compression);
|
return SaveFile(filename, fmt, image, row.get(), w, h, pitch, compression);
|
||||||
}
|
}
|
||||||
|
|
||||||
Transaction::Transaction(GSPng::Format fmt, const std::string& file, const uint8* image, int w, int h, int pitch, int compression)
|
Transaction::Transaction(GSPng::Format fmt, const std::string& file, const u8* image, int w, int h, int pitch, int compression)
|
||||||
: m_fmt(fmt), m_file(file), m_w(w), m_h(h), m_pitch(pitch), m_compression(compression)
|
: m_fmt(fmt), m_file(file), m_w(w), m_h(h), m_pitch(pitch), m_compression(compression)
|
||||||
{
|
{
|
||||||
// Note: yes it would be better to use shared pointer
|
// Note: yes it would be better to use shared pointer
|
||||||
m_image = (uint8*)_aligned_malloc(pitch * h, 32);
|
m_image = (u8*)_aligned_malloc(pitch * h, 32);
|
||||||
if (m_image)
|
if (m_image)
|
||||||
memcpy(m_image, image, pitch * h);
|
memcpy(m_image, image, pitch * h);
|
||||||
}
|
}
|
||||||
|
|
|
@ -37,17 +37,17 @@ namespace GSPng
|
||||||
public:
|
public:
|
||||||
Format m_fmt;
|
Format m_fmt;
|
||||||
const std::string m_file;
|
const std::string m_file;
|
||||||
uint8* m_image;
|
u8* m_image;
|
||||||
int m_w;
|
int m_w;
|
||||||
int m_h;
|
int m_h;
|
||||||
int m_pitch;
|
int m_pitch;
|
||||||
int m_compression;
|
int m_compression;
|
||||||
|
|
||||||
Transaction(GSPng::Format fmt, const std::string& file, const uint8* image, int w, int h, int pitch, int compression);
|
Transaction(GSPng::Format fmt, const std::string& file, const u8* image, int w, int h, int pitch, int compression);
|
||||||
~Transaction();
|
~Transaction();
|
||||||
};
|
};
|
||||||
|
|
||||||
bool Save(GSPng::Format fmt, const std::string& file, uint8* image, int w, int h, int pitch, int compression, bool rb_swapped = false);
|
bool Save(GSPng::Format fmt, const std::string& file, u8* image, int w, int h, int pitch, int compression, bool rb_swapped = false);
|
||||||
|
|
||||||
void Process(std::shared_ptr<Transaction>& item);
|
void Process(std::shared_ptr<Transaction>& item);
|
||||||
|
|
||||||
|
|
|
@ -148,7 +148,7 @@ GSState::~GSState()
|
||||||
_aligned_free(m_index.buff);
|
_aligned_free(m_index.buff);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSState::SetRegsMem(uint8* basemem)
|
void GSState::SetRegsMem(u8* basemem)
|
||||||
{
|
{
|
||||||
ASSERT(basemem);
|
ASSERT(basemem);
|
||||||
|
|
||||||
|
@ -338,8 +338,8 @@ GSVideoMode GSState::GetVideoMode()
|
||||||
// Other videomodes can't be detected on our side without the help of the data from core
|
// Other videomodes can't be detected on our side without the help of the data from core
|
||||||
// You can only identify a limited number of video modes based on the info from CRTC registers.
|
// You can only identify a limited number of video modes based on the info from CRTC registers.
|
||||||
|
|
||||||
const uint8 Colorburst = m_regs->SMODE1.CMOD; // Subcarrier frequency
|
const u8 Colorburst = m_regs->SMODE1.CMOD; // Subcarrier frequency
|
||||||
const uint8 PLL_Divider = m_regs->SMODE1.LC; // Phased lock loop divider
|
const u8 PLL_Divider = m_regs->SMODE1.LC; // Phased lock loop divider
|
||||||
|
|
||||||
switch (Colorburst)
|
switch (Colorburst)
|
||||||
{
|
{
|
||||||
|
@ -438,13 +438,13 @@ GSVector4i GSState::GetDisplayRect(int i)
|
||||||
|
|
||||||
const auto& DISP = m_regs->DISP[i].DISPLAY;
|
const auto& DISP = m_regs->DISP[i].DISPLAY;
|
||||||
|
|
||||||
const uint32 DW = DISP.DW + 1;
|
const u32 DW = DISP.DW + 1;
|
||||||
const uint32 DH = DISP.DH + 1;
|
const u32 DH = DISP.DH + 1;
|
||||||
const uint32 DX = DISP.DX;
|
const u32 DX = DISP.DX;
|
||||||
const uint32 DY = DISP.DY;
|
const u32 DY = DISP.DY;
|
||||||
|
|
||||||
const uint32 MAGH = DISP.MAGH + 1;
|
const u32 MAGH = DISP.MAGH + 1;
|
||||||
const uint32 MAGV = DISP.MAGV + 1;
|
const u32 MAGV = DISP.MAGV + 1;
|
||||||
|
|
||||||
const GSVector2i magnification(MAGH, MAGV);
|
const GSVector2i magnification(MAGH, MAGV);
|
||||||
|
|
||||||
|
@ -478,8 +478,8 @@ GSVector4i GSState::GetFrameRect(int i)
|
||||||
if (isinterlaced() && m_regs->SMODE2.FFMD && h > 1)
|
if (isinterlaced() && m_regs->SMODE2.FFMD && h > 1)
|
||||||
h >>= 1;
|
h >>= 1;
|
||||||
|
|
||||||
const uint32 DBX = m_regs->DISP[i].DISPFB.DBX;
|
const u32 DBX = m_regs->DISP[i].DISPFB.DBX;
|
||||||
const uint32 DBY = m_regs->DISP[i].DISPFB.DBY;
|
const u32 DBY = m_regs->DISP[i].DISPFB.DBY;
|
||||||
|
|
||||||
rectangle.left = DBX;
|
rectangle.left = DBX;
|
||||||
rectangle.top = DBY;
|
rectangle.top = DBY;
|
||||||
|
@ -560,16 +560,16 @@ void GSState::GIFPackedRegHandlerRGBA(const GIFPackedReg* RESTRICT r)
|
||||||
const GSVector4i mask = GSVector4i::load(0x0c080400);
|
const GSVector4i mask = GSVector4i::load(0x0c080400);
|
||||||
const GSVector4i v = GSVector4i::load<false>(r).shuffle8(mask);
|
const GSVector4i v = GSVector4i::load<false>(r).shuffle8(mask);
|
||||||
|
|
||||||
m_v.RGBAQ.u32[0] = (uint32)GSVector4i::store(v);
|
m_v.RGBAQ.U32[0] = (u32)GSVector4i::store(v);
|
||||||
|
|
||||||
m_v.RGBAQ.Q = m_q;
|
m_v.RGBAQ.Q = m_q;
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSState::GIFPackedRegHandlerSTQ(const GIFPackedReg* RESTRICT r)
|
void GSState::GIFPackedRegHandlerSTQ(const GIFPackedReg* RESTRICT r)
|
||||||
{
|
{
|
||||||
const GSVector4i st = GSVector4i::loadl(&r->u64[0]);
|
const GSVector4i st = GSVector4i::loadl(&r->U64[0]);
|
||||||
|
|
||||||
GSVector4i q = GSVector4i::loadl(&r->u64[1]);
|
GSVector4i q = GSVector4i::loadl(&r->U64[1]);
|
||||||
GSVector4i::storel(&m_v.ST, st);
|
GSVector4i::storel(&m_v.ST, st);
|
||||||
|
|
||||||
// Vexx (character shadow)
|
// Vexx (character shadow)
|
||||||
|
@ -593,23 +593,23 @@ void GSState::GIFPackedRegHandlerUV(const GIFPackedReg* RESTRICT r)
|
||||||
{
|
{
|
||||||
const GSVector4i v = GSVector4i::loadl(r) & GSVector4i::x00003fff();
|
const GSVector4i v = GSVector4i::loadl(r) & GSVector4i::x00003fff();
|
||||||
|
|
||||||
m_v.UV = (uint32)GSVector4i::store(v.ps32(v));
|
m_v.UV = (u32)GSVector4i::store(v.ps32(v));
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSState::GIFPackedRegHandlerUV_Hack(const GIFPackedReg* RESTRICT r)
|
void GSState::GIFPackedRegHandlerUV_Hack(const GIFPackedReg* RESTRICT r)
|
||||||
{
|
{
|
||||||
const GSVector4i v = GSVector4i::loadl(r) & GSVector4i::x00003fff();
|
const GSVector4i v = GSVector4i::loadl(r) & GSVector4i::x00003fff();
|
||||||
|
|
||||||
m_v.UV = (uint32)GSVector4i::store(v.ps32(v));
|
m_v.UV = (u32)GSVector4i::store(v.ps32(v));
|
||||||
|
|
||||||
m_isPackedUV_HackFlag = true;
|
m_isPackedUV_HackFlag = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <uint32 prim, uint32 adc, bool auto_flush>
|
template <u32 prim, u32 adc, bool auto_flush>
|
||||||
void GSState::GIFPackedRegHandlerXYZF2(const GIFPackedReg* RESTRICT r)
|
void GSState::GIFPackedRegHandlerXYZF2(const GIFPackedReg* RESTRICT r)
|
||||||
{
|
{
|
||||||
GSVector4i xy = GSVector4i::loadl(&r->u64[0]);
|
GSVector4i xy = GSVector4i::loadl(&r->U64[0]);
|
||||||
GSVector4i zf = GSVector4i::loadl(&r->u64[1]);
|
GSVector4i zf = GSVector4i::loadl(&r->U64[1]);
|
||||||
|
|
||||||
xy = xy.upl16(xy.srl<4>()).upl32(GSVector4i::load((int)m_v.UV));
|
xy = xy.upl16(xy.srl<4>()).upl32(GSVector4i::load((int)m_v.UV));
|
||||||
zf = zf.srl32(4) & GSVector4i::x00ffffff().upl32(GSVector4i::x000000ff());
|
zf = zf.srl32(4) & GSVector4i::x00ffffff().upl32(GSVector4i::x000000ff());
|
||||||
|
@ -619,11 +619,11 @@ void GSState::GIFPackedRegHandlerXYZF2(const GIFPackedReg* RESTRICT r)
|
||||||
VertexKick<prim, auto_flush>(adc ? 1 : r->XYZF2.Skip());
|
VertexKick<prim, auto_flush>(adc ? 1 : r->XYZF2.Skip());
|
||||||
}
|
}
|
||||||
|
|
||||||
template <uint32 prim, uint32 adc, bool auto_flush>
|
template <u32 prim, u32 adc, bool auto_flush>
|
||||||
void GSState::GIFPackedRegHandlerXYZ2(const GIFPackedReg* RESTRICT r)
|
void GSState::GIFPackedRegHandlerXYZ2(const GIFPackedReg* RESTRICT r)
|
||||||
{
|
{
|
||||||
const GSVector4i xy = GSVector4i::loadl(&r->u64[0]);
|
const GSVector4i xy = GSVector4i::loadl(&r->U64[0]);
|
||||||
const GSVector4i z = GSVector4i::loadl(&r->u64[1]);
|
const GSVector4i z = GSVector4i::loadl(&r->U64[1]);
|
||||||
const GSVector4i xyz = xy.upl16(xy.srl<4>()).upl32(z);
|
const GSVector4i xyz = xy.upl16(xy.srl<4>()).upl32(z);
|
||||||
|
|
||||||
m_v.m[1] = xyz.upl64(GSVector4i::loadl(&m_v.UV));
|
m_v.m[1] = xyz.upl64(GSVector4i::loadl(&m_v.UV));
|
||||||
|
@ -645,8 +645,8 @@ void GSState::GIFPackedRegHandlerNOP(const GIFPackedReg* RESTRICT r)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
template <uint32 prim, bool auto_flush>
|
template <u32 prim, bool auto_flush>
|
||||||
void GSState::GIFPackedRegHandlerSTQRGBAXYZF2(const GIFPackedReg* RESTRICT r, uint32 size)
|
void GSState::GIFPackedRegHandlerSTQRGBAXYZF2(const GIFPackedReg* RESTRICT r, u32 size)
|
||||||
{
|
{
|
||||||
ASSERT(size > 0 && size % 3 == 0);
|
ASSERT(size > 0 && size % 3 == 0);
|
||||||
|
|
||||||
|
@ -654,16 +654,16 @@ void GSState::GIFPackedRegHandlerSTQRGBAXYZF2(const GIFPackedReg* RESTRICT r, ui
|
||||||
|
|
||||||
while (r < r_end)
|
while (r < r_end)
|
||||||
{
|
{
|
||||||
GSVector4i st = GSVector4i::loadl(&r[0].u64[0]);
|
GSVector4i st = GSVector4i::loadl(&r[0].U64[0]);
|
||||||
GSVector4i q = GSVector4i::loadl(&r[0].u64[1]);
|
GSVector4i q = GSVector4i::loadl(&r[0].U64[1]);
|
||||||
GSVector4i rgba = (GSVector4i::load<false>(&r[1]) & GSVector4i::x000000ff()).ps32().pu16();
|
GSVector4i rgba = (GSVector4i::load<false>(&r[1]) & GSVector4i::x000000ff()).ps32().pu16();
|
||||||
|
|
||||||
q = q.blend8(GSVector4i::cast(GSVector4::m_one), q == GSVector4i::zero()); // see GIFPackedRegHandlerSTQ
|
q = q.blend8(GSVector4i::cast(GSVector4::m_one), q == GSVector4i::zero()); // see GIFPackedRegHandlerSTQ
|
||||||
|
|
||||||
m_v.m[0] = st.upl64(rgba.upl32(q)); // TODO: only store the last one
|
m_v.m[0] = st.upl64(rgba.upl32(q)); // TODO: only store the last one
|
||||||
|
|
||||||
GSVector4i xy = GSVector4i::loadl(&r[2].u64[0]);
|
GSVector4i xy = GSVector4i::loadl(&r[2].U64[0]);
|
||||||
GSVector4i zf = GSVector4i::loadl(&r[2].u64[1]);
|
GSVector4i zf = GSVector4i::loadl(&r[2].U64[1]);
|
||||||
xy = xy.upl16(xy.srl<4>()).upl32(GSVector4i::load((int)m_v.UV));
|
xy = xy.upl16(xy.srl<4>()).upl32(GSVector4i::load((int)m_v.UV));
|
||||||
zf = zf.srl32(4) & GSVector4i::x00ffffff().upl32(GSVector4i::x000000ff());
|
zf = zf.srl32(4) & GSVector4i::x00ffffff().upl32(GSVector4i::x000000ff());
|
||||||
|
|
||||||
|
@ -677,8 +677,8 @@ void GSState::GIFPackedRegHandlerSTQRGBAXYZF2(const GIFPackedReg* RESTRICT r, ui
|
||||||
m_q = r[-3].STQ.Q; // remember the last one, STQ outputs this to the temp Q each time
|
m_q = r[-3].STQ.Q; // remember the last one, STQ outputs this to the temp Q each time
|
||||||
}
|
}
|
||||||
|
|
||||||
template <uint32 prim, bool auto_flush>
|
template <u32 prim, bool auto_flush>
|
||||||
void GSState::GIFPackedRegHandlerSTQRGBAXYZ2(const GIFPackedReg* RESTRICT r, uint32 size)
|
void GSState::GIFPackedRegHandlerSTQRGBAXYZ2(const GIFPackedReg* RESTRICT r, u32 size)
|
||||||
{
|
{
|
||||||
ASSERT(size > 0 && size % 3 == 0);
|
ASSERT(size > 0 && size % 3 == 0);
|
||||||
|
|
||||||
|
@ -686,16 +686,16 @@ void GSState::GIFPackedRegHandlerSTQRGBAXYZ2(const GIFPackedReg* RESTRICT r, uin
|
||||||
|
|
||||||
while (r < r_end)
|
while (r < r_end)
|
||||||
{
|
{
|
||||||
GSVector4i st = GSVector4i::loadl(&r[0].u64[0]);
|
GSVector4i st = GSVector4i::loadl(&r[0].U64[0]);
|
||||||
GSVector4i q = GSVector4i::loadl(&r[0].u64[1]);
|
GSVector4i q = GSVector4i::loadl(&r[0].U64[1]);
|
||||||
GSVector4i rgba = (GSVector4i::load<false>(&r[1]) & GSVector4i::x000000ff()).ps32().pu16();
|
GSVector4i rgba = (GSVector4i::load<false>(&r[1]) & GSVector4i::x000000ff()).ps32().pu16();
|
||||||
|
|
||||||
q = q.blend8(GSVector4i::cast(GSVector4::m_one), q == GSVector4i::zero()); // see GIFPackedRegHandlerSTQ
|
q = q.blend8(GSVector4i::cast(GSVector4::m_one), q == GSVector4i::zero()); // see GIFPackedRegHandlerSTQ
|
||||||
|
|
||||||
m_v.m[0] = st.upl64(rgba.upl32(q)); // TODO: only store the last one
|
m_v.m[0] = st.upl64(rgba.upl32(q)); // TODO: only store the last one
|
||||||
|
|
||||||
GSVector4i xy = GSVector4i::loadl(&r[2].u64[0]);
|
GSVector4i xy = GSVector4i::loadl(&r[2].U64[0]);
|
||||||
GSVector4i z = GSVector4i::loadl(&r[2].u64[1]);
|
GSVector4i z = GSVector4i::loadl(&r[2].U64[1]);
|
||||||
GSVector4i xyz = xy.upl16(xy.srl<4>()).upl32(z);
|
GSVector4i xyz = xy.upl16(xy.srl<4>()).upl32(z);
|
||||||
|
|
||||||
m_v.m[1] = xyz.upl64(GSVector4i::loadl(&m_v.UV)); // TODO: only store the last one
|
m_v.m[1] = xyz.upl64(GSVector4i::loadl(&m_v.UV)); // TODO: only store the last one
|
||||||
|
@ -708,7 +708,7 @@ void GSState::GIFPackedRegHandlerSTQRGBAXYZ2(const GIFPackedReg* RESTRICT r, uin
|
||||||
m_q = r[-3].STQ.Q; // remember the last one, STQ outputs this to the temp Q each time
|
m_q = r[-3].STQ.Q; // remember the last one, STQ outputs this to the temp Q each time
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSState::GIFPackedRegHandlerNOP(const GIFPackedReg* RESTRICT r, uint32 size)
|
void GSState::GIFPackedRegHandlerNOP(const GIFPackedReg* RESTRICT r, u32 size)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -716,11 +716,11 @@ void GSState::GIFRegHandlerNull(const GIFReg* RESTRICT r)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline void GSState::ApplyPRIM(uint32 prim)
|
__forceinline void GSState::ApplyPRIM(u32 prim)
|
||||||
{
|
{
|
||||||
if (GSUtil::GetPrimClass(m_env.PRIM.PRIM) == GSUtil::GetPrimClass(prim & 7)) // NOTE: assume strips/fans are converted to lists
|
if (GSUtil::GetPrimClass(m_env.PRIM.PRIM) == GSUtil::GetPrimClass(prim & 7)) // NOTE: assume strips/fans are converted to lists
|
||||||
{
|
{
|
||||||
if (m_env.PRMODECONT.AC == 1 && (m_env.PRIM.u32[0] ^ prim) & 0x7f8) // all fields except PRIM
|
if (m_env.PRMODECONT.AC == 1 && (m_env.PRIM.U32[0] ^ prim) & 0x7f8) // all fields except PRIM
|
||||||
Flush();
|
Flush();
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -730,7 +730,7 @@ __forceinline void GSState::ApplyPRIM(uint32 prim)
|
||||||
|
|
||||||
if (m_env.PRMODECONT.AC == 1)
|
if (m_env.PRMODECONT.AC == 1)
|
||||||
{
|
{
|
||||||
m_env.PRIM.u32[0] = prim;
|
m_env.PRIM.U32[0] = prim;
|
||||||
|
|
||||||
UpdateContext();
|
UpdateContext();
|
||||||
}
|
}
|
||||||
|
@ -753,7 +753,7 @@ void GSState::GIFRegHandlerPRIM(const GIFReg* RESTRICT r)
|
||||||
{
|
{
|
||||||
ALIGN_STACK(32);
|
ALIGN_STACK(32);
|
||||||
|
|
||||||
ApplyPRIM(r->PRIM.u32[0]);
|
ApplyPRIM(r->PRIM.U32[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSState::GIFRegHandlerRGBAQ(const GIFReg* RESTRICT r)
|
void GSState::GIFRegHandlerRGBAQ(const GIFReg* RESTRICT r)
|
||||||
|
@ -782,17 +782,17 @@ void GSState::GIFRegHandlerST(const GIFReg* RESTRICT r)
|
||||||
|
|
||||||
void GSState::GIFRegHandlerUV(const GIFReg* RESTRICT r)
|
void GSState::GIFRegHandlerUV(const GIFReg* RESTRICT r)
|
||||||
{
|
{
|
||||||
m_v.UV = r->UV.u32[0] & 0x3fff3fff;
|
m_v.UV = r->UV.U32[0] & 0x3fff3fff;
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSState::GIFRegHandlerUV_Hack(const GIFReg* RESTRICT r)
|
void GSState::GIFRegHandlerUV_Hack(const GIFReg* RESTRICT r)
|
||||||
{
|
{
|
||||||
m_v.UV = r->UV.u32[0] & 0x3fff3fff;
|
m_v.UV = r->UV.U32[0] & 0x3fff3fff;
|
||||||
|
|
||||||
m_isPackedUV_HackFlag = false;
|
m_isPackedUV_HackFlag = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <uint32 prim, uint32 adc, bool auto_flush>
|
template <u32 prim, u32 adc, bool auto_flush>
|
||||||
void GSState::GIFRegHandlerXYZF2(const GIFReg* RESTRICT r)
|
void GSState::GIFRegHandlerXYZF2(const GIFReg* RESTRICT r)
|
||||||
{
|
{
|
||||||
GSVector4i xyzf = GSVector4i::loadl(&r->XYZF);
|
GSVector4i xyzf = GSVector4i::loadl(&r->XYZF);
|
||||||
|
@ -804,7 +804,7 @@ void GSState::GIFRegHandlerXYZF2(const GIFReg* RESTRICT r)
|
||||||
VertexKick<prim, auto_flush>(adc);
|
VertexKick<prim, auto_flush>(adc);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <uint32 prim, uint32 adc, bool auto_flush>
|
template <u32 prim, u32 adc, bool auto_flush>
|
||||||
void GSState::GIFRegHandlerXYZ2(const GIFReg* RESTRICT r)
|
void GSState::GIFRegHandlerXYZ2(const GIFReg* RESTRICT r)
|
||||||
{
|
{
|
||||||
m_v.m[1] = GSVector4i::load(&r->XYZ, &m_v.UV);
|
m_v.m[1] = GSVector4i::load(&r->XYZ, &m_v.UV);
|
||||||
|
@ -828,21 +828,21 @@ void GSState::ApplyTEX0(GIFRegTEX0& TEX0)
|
||||||
// extremely broken for the same reasons as MLB Power Pros in that it spams TEX0 with
|
// extremely broken for the same reasons as MLB Power Pros in that it spams TEX0 with
|
||||||
// complete garbage making for a nice 1G heap of GSOffset.
|
// complete garbage making for a nice 1G heap of GSOffset.
|
||||||
|
|
||||||
GL_REG("Apply TEX0_%d = 0x%x_%x", i, TEX0.u32[1], TEX0.u32[0]);
|
GL_REG("Apply TEX0_%d = 0x%x_%x", i, TEX0.U32[1], TEX0.U32[0]);
|
||||||
|
|
||||||
// even if TEX0 did not change, a new palette may have been uploaded and will overwrite the currently queued for drawing
|
// even if TEX0 did not change, a new palette may have been uploaded and will overwrite the currently queued for drawing
|
||||||
const bool wt = m_mem.m_clut.WriteTest(TEX0, m_env.TEXCLUT);
|
const bool wt = m_mem.m_clut.WriteTest(TEX0, m_env.TEXCLUT);
|
||||||
|
|
||||||
// clut loading already covered with WriteTest, for drawing only have to check CPSM and CSA (MGS3 intro skybox would be drawn piece by piece without this)
|
// clut loading already covered with WriteTest, for drawing only have to check CPSM and CSA (MGS3 intro skybox would be drawn piece by piece without this)
|
||||||
|
|
||||||
constexpr uint64 mask = 0x1f78001fffffffffull; // TBP0 TBW PSM TW TH TCC TFX CPSM CSA
|
constexpr u64 mask = 0x1f78001fffffffffull; // TBP0 TBW PSM TW TH TCC TFX CPSM CSA
|
||||||
|
|
||||||
if (wt || PRIM->CTXT == i && ((TEX0.u64 ^ m_env.CTXT[i].TEX0.u64) & mask))
|
if (wt || PRIM->CTXT == i && ((TEX0.U64 ^ m_env.CTXT[i].TEX0.U64) & mask))
|
||||||
Flush();
|
Flush();
|
||||||
|
|
||||||
TEX0.CPSM &= 0xa; // 1010b
|
TEX0.CPSM &= 0xa; // 1010b
|
||||||
|
|
||||||
if ((TEX0.u32[0] ^ m_env.CTXT[i].TEX0.u32[0]) & 0x3ffffff) // TBP0 TBW PSM
|
if ((TEX0.U32[0] ^ m_env.CTXT[i].TEX0.U32[0]) & 0x3ffffff) // TBP0 TBW PSM
|
||||||
m_env.CTXT[i].offset.tex = m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
|
m_env.CTXT[i].offset.tex = m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
|
||||||
|
|
||||||
m_env.CTXT[i].TEX0 = (GSVector4i)TEX0;
|
m_env.CTXT[i].TEX0 = (GSVector4i)TEX0;
|
||||||
|
@ -895,7 +895,7 @@ void GSState::ApplyTEX0(GIFRegTEX0& TEX0)
|
||||||
template <int i>
|
template <int i>
|
||||||
void GSState::GIFRegHandlerTEX0(const GIFReg* RESTRICT r)
|
void GSState::GIFRegHandlerTEX0(const GIFReg* RESTRICT r)
|
||||||
{
|
{
|
||||||
GL_REG("TEX0_%d = 0x%x_%x", i, r->u32[1], r->u32[0]);
|
GL_REG("TEX0_%d = 0x%x_%x", i, r->U32[1], r->U32[0]);
|
||||||
|
|
||||||
GIFRegTEX0 TEX0 = r->TEX0;
|
GIFRegTEX0 TEX0 = r->TEX0;
|
||||||
|
|
||||||
|
@ -910,8 +910,8 @@ void GSState::GIFRegHandlerTEX0(const GIFReg* RESTRICT r)
|
||||||
// Sets TW/TH to 0
|
// Sets TW/TH to 0
|
||||||
// there used to be a case to force this to 10
|
// there used to be a case to force this to 10
|
||||||
// but GetSizeFixedTEX0 sorts this now
|
// but GetSizeFixedTEX0 sorts this now
|
||||||
TEX0.TW = std::clamp<uint32>(TEX0.TW, 0, 10);
|
TEX0.TW = std::clamp<u32>(TEX0.TW, 0, 10);
|
||||||
TEX0.TH = std::clamp<uint32>(TEX0.TH, 0, 10);
|
TEX0.TH = std::clamp<u32>(TEX0.TH, 0, 10);
|
||||||
|
|
||||||
ApplyTEX0<i>(TEX0);
|
ApplyTEX0<i>(TEX0);
|
||||||
|
|
||||||
|
@ -921,36 +921,36 @@ void GSState::GIFRegHandlerTEX0(const GIFReg* RESTRICT r)
|
||||||
// NOTE 2: Mipmap levels are tightly packed, if (tbw << 6) > (1 << tw) then the left-over space to the right is used. (common for PSM_PSMT4)
|
// NOTE 2: Mipmap levels are tightly packed, if (tbw << 6) > (1 << tw) then the left-over space to the right is used. (common for PSM_PSMT4)
|
||||||
// NOTE 3: Non-rectangular textures are treated as rectangular when calculating the occupied space (height is extended, not sure about width)
|
// NOTE 3: Non-rectangular textures are treated as rectangular when calculating the occupied space (height is extended, not sure about width)
|
||||||
|
|
||||||
uint32 bp = TEX0.TBP0;
|
u32 bp = TEX0.TBP0;
|
||||||
uint32 bw = TEX0.TBW;
|
u32 bw = TEX0.TBW;
|
||||||
uint32 w = 1u << TEX0.TW;
|
u32 w = 1u << TEX0.TW;
|
||||||
uint32 h = 1u << TEX0.TH;
|
u32 h = 1u << TEX0.TH;
|
||||||
|
|
||||||
const uint32 bpp = GSLocalMemory::m_psm[TEX0.PSM].bpp;
|
const u32 bpp = GSLocalMemory::m_psm[TEX0.PSM].bpp;
|
||||||
|
|
||||||
if (h < w)
|
if (h < w)
|
||||||
h = w;
|
h = w;
|
||||||
|
|
||||||
bp += ((w * h * bpp >> 3) + 255) >> 8;
|
bp += ((w * h * bpp >> 3) + 255) >> 8;
|
||||||
bw = std::max<uint32>(bw >> 1, 1);
|
bw = std::max<u32>(bw >> 1, 1);
|
||||||
w = std::max<uint32>(w >> 1, 1);
|
w = std::max<u32>(w >> 1, 1);
|
||||||
h = std::max<uint32>(h >> 1, 1);
|
h = std::max<u32>(h >> 1, 1);
|
||||||
|
|
||||||
m_env.CTXT[i].MIPTBP1.TBP1 = bp;
|
m_env.CTXT[i].MIPTBP1.TBP1 = bp;
|
||||||
m_env.CTXT[i].MIPTBP1.TBW1 = bw;
|
m_env.CTXT[i].MIPTBP1.TBW1 = bw;
|
||||||
|
|
||||||
bp += ((w * h * bpp >> 3) + 255) >> 8;
|
bp += ((w * h * bpp >> 3) + 255) >> 8;
|
||||||
bw = std::max<uint32>(bw >> 1, 1);
|
bw = std::max<u32>(bw >> 1, 1);
|
||||||
w = std::max<uint32>(w >> 1, 1);
|
w = std::max<u32>(w >> 1, 1);
|
||||||
h = std::max<uint32>(h >> 1, 1);
|
h = std::max<u32>(h >> 1, 1);
|
||||||
|
|
||||||
m_env.CTXT[i].MIPTBP1.TBP2 = bp;
|
m_env.CTXT[i].MIPTBP1.TBP2 = bp;
|
||||||
m_env.CTXT[i].MIPTBP1.TBW2 = bw;
|
m_env.CTXT[i].MIPTBP1.TBW2 = bw;
|
||||||
|
|
||||||
bp += ((w * h * bpp >> 3) + 255) >> 8;
|
bp += ((w * h * bpp >> 3) + 255) >> 8;
|
||||||
bw = std::max<uint32>(bw >> 1, 1);
|
bw = std::max<u32>(bw >> 1, 1);
|
||||||
w = std::max<uint32>(w >> 1, 1);
|
w = std::max<u32>(w >> 1, 1);
|
||||||
h = std::max<uint32>(h >> 1, 1);
|
h = std::max<u32>(h >> 1, 1);
|
||||||
|
|
||||||
m_env.CTXT[i].MIPTBP1.TBP3 = bp;
|
m_env.CTXT[i].MIPTBP1.TBP3 = bp;
|
||||||
m_env.CTXT[i].MIPTBP1.TBW3 = bw;
|
m_env.CTXT[i].MIPTBP1.TBW3 = bw;
|
||||||
|
@ -960,7 +960,7 @@ void GSState::GIFRegHandlerTEX0(const GIFReg* RESTRICT r)
|
||||||
template <int i>
|
template <int i>
|
||||||
void GSState::GIFRegHandlerCLAMP(const GIFReg* RESTRICT r)
|
void GSState::GIFRegHandlerCLAMP(const GIFReg* RESTRICT r)
|
||||||
{
|
{
|
||||||
GL_REG("CLAMP_%d = 0x%x_%x", i, r->u32[1], r->u32[0]);
|
GL_REG("CLAMP_%d = 0x%x_%x", i, r->U32[1], r->U32[0]);
|
||||||
|
|
||||||
if (PRIM->CTXT == i && r->CLAMP != m_env.CTXT[i].CLAMP)
|
if (PRIM->CTXT == i && r->CLAMP != m_env.CTXT[i].CLAMP)
|
||||||
Flush();
|
Flush();
|
||||||
|
@ -980,7 +980,7 @@ void GSState::GIFRegHandlerNOP(const GIFReg* RESTRICT r)
|
||||||
template <int i>
|
template <int i>
|
||||||
void GSState::GIFRegHandlerTEX1(const GIFReg* RESTRICT r)
|
void GSState::GIFRegHandlerTEX1(const GIFReg* RESTRICT r)
|
||||||
{
|
{
|
||||||
GL_REG("TEX1_%d = 0x%x_%x", i, r->u32[1], r->u32[0]);
|
GL_REG("TEX1_%d = 0x%x_%x", i, r->U32[1], r->U32[0]);
|
||||||
|
|
||||||
if (PRIM->CTXT == i && r->TEX1 != m_env.CTXT[i].TEX1)
|
if (PRIM->CTXT == i && r->TEX1 != m_env.CTXT[i].TEX1)
|
||||||
Flush();
|
Flush();
|
||||||
|
@ -991,7 +991,7 @@ void GSState::GIFRegHandlerTEX1(const GIFReg* RESTRICT r)
|
||||||
template <int i>
|
template <int i>
|
||||||
void GSState::GIFRegHandlerTEX2(const GIFReg* RESTRICT r)
|
void GSState::GIFRegHandlerTEX2(const GIFReg* RESTRICT r)
|
||||||
{
|
{
|
||||||
GL_REG("TEX2_%d = 0x%x_%x", i, r->u32[1], r->u32[0]);
|
GL_REG("TEX2_%d = 0x%x_%x", i, r->U32[1], r->U32[0]);
|
||||||
|
|
||||||
// TEX2 is a masked write to TEX0, for performing CLUT swaps (palette swaps).
|
// TEX2 is a masked write to TEX0, for performing CLUT swaps (palette swaps).
|
||||||
// It only applies the following fields:
|
// It only applies the following fields:
|
||||||
|
@ -999,11 +999,11 @@ void GSState::GIFRegHandlerTEX2(const GIFReg* RESTRICT r)
|
||||||
// It ignores these fields (uses existing values in the context):
|
// It ignores these fields (uses existing values in the context):
|
||||||
// TFX, TCC, TH, TW, TBW, and TBP0
|
// TFX, TCC, TH, TW, TBW, and TBP0
|
||||||
|
|
||||||
constexpr uint64 mask = 0xFFFFFFE003F00000ull; // TEX2 bits
|
constexpr u64 mask = 0xFFFFFFE003F00000ull; // TEX2 bits
|
||||||
|
|
||||||
GIFRegTEX0 TEX0;
|
GIFRegTEX0 TEX0;
|
||||||
|
|
||||||
TEX0.u64 = (m_env.CTXT[i].TEX0.u64 & ~mask) | (r->u64 & mask);
|
TEX0.U64 = (m_env.CTXT[i].TEX0.U64 & ~mask) | (r->U64 & mask);
|
||||||
|
|
||||||
ApplyTEX0<i>(TEX0);
|
ApplyTEX0<i>(TEX0);
|
||||||
}
|
}
|
||||||
|
@ -1011,7 +1011,7 @@ void GSState::GIFRegHandlerTEX2(const GIFReg* RESTRICT r)
|
||||||
template <int i>
|
template <int i>
|
||||||
void GSState::GIFRegHandlerXYOFFSET(const GIFReg* RESTRICT r)
|
void GSState::GIFRegHandlerXYOFFSET(const GIFReg* RESTRICT r)
|
||||||
{
|
{
|
||||||
GL_REG("XYOFFSET_%d = 0x%x_%x", i, r->u32[1], r->u32[0]);
|
GL_REG("XYOFFSET_%d = 0x%x_%x", i, r->U32[1], r->U32[0]);
|
||||||
|
|
||||||
const GSVector4i o = (GSVector4i)r->XYOFFSET & GSVector4i::x0000ffff();
|
const GSVector4i o = (GSVector4i)r->XYOFFSET & GSVector4i::x0000ffff();
|
||||||
|
|
||||||
|
@ -1027,18 +1027,18 @@ void GSState::GIFRegHandlerXYOFFSET(const GIFReg* RESTRICT r)
|
||||||
|
|
||||||
void GSState::GIFRegHandlerPRMODECONT(const GIFReg* RESTRICT r)
|
void GSState::GIFRegHandlerPRMODECONT(const GIFReg* RESTRICT r)
|
||||||
{
|
{
|
||||||
GL_REG("PRMODECONT = 0x%x_%x", r->u32[1], r->u32[0]);
|
GL_REG("PRMODECONT = 0x%x_%x", r->U32[1], r->U32[0]);
|
||||||
|
|
||||||
m_env.PRMODECONT.AC = r->PRMODECONT.AC;
|
m_env.PRMODECONT.AC = r->PRMODECONT.AC;
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSState::GIFRegHandlerPRMODE(const GIFReg* RESTRICT r)
|
void GSState::GIFRegHandlerPRMODE(const GIFReg* RESTRICT r)
|
||||||
{
|
{
|
||||||
GL_REG("PRMODE = 0x%x_%x", r->u32[1], r->u32[0]);
|
GL_REG("PRMODE = 0x%x_%x", r->U32[1], r->U32[0]);
|
||||||
|
|
||||||
if (!m_env.PRMODECONT.AC)
|
if (!m_env.PRMODECONT.AC)
|
||||||
{
|
{
|
||||||
if ((m_env.PRIM.u32[0] ^ r->PRMODE.u32[0]) & 0x7f8)
|
if ((m_env.PRIM.U32[0] ^ r->PRMODE.U32[0]) & 0x7f8)
|
||||||
Flush();
|
Flush();
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -1046,7 +1046,7 @@ void GSState::GIFRegHandlerPRMODE(const GIFReg* RESTRICT r)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const uint32 _PRIM = m_env.PRIM.PRIM;
|
const u32 _PRIM = m_env.PRIM.PRIM;
|
||||||
m_env.PRIM = (GSVector4i)r->PRMODE;
|
m_env.PRIM = (GSVector4i)r->PRMODE;
|
||||||
m_env.PRIM.PRIM = _PRIM;
|
m_env.PRIM.PRIM = _PRIM;
|
||||||
|
|
||||||
|
@ -1055,7 +1055,7 @@ void GSState::GIFRegHandlerPRMODE(const GIFReg* RESTRICT r)
|
||||||
|
|
||||||
void GSState::GIFRegHandlerTEXCLUT(const GIFReg* RESTRICT r)
|
void GSState::GIFRegHandlerTEXCLUT(const GIFReg* RESTRICT r)
|
||||||
{
|
{
|
||||||
GL_REG("TEXCLUT = 0x%x_%x", r->u32[1], r->u32[0]);
|
GL_REG("TEXCLUT = 0x%x_%x", r->U32[1], r->U32[0]);
|
||||||
|
|
||||||
if (r->TEXCLUT != m_env.TEXCLUT)
|
if (r->TEXCLUT != m_env.TEXCLUT)
|
||||||
Flush();
|
Flush();
|
||||||
|
@ -1074,7 +1074,7 @@ void GSState::GIFRegHandlerSCANMSK(const GIFReg* RESTRICT r)
|
||||||
template <int i>
|
template <int i>
|
||||||
void GSState::GIFRegHandlerMIPTBP1(const GIFReg* RESTRICT r)
|
void GSState::GIFRegHandlerMIPTBP1(const GIFReg* RESTRICT r)
|
||||||
{
|
{
|
||||||
GL_REG("MIPTBP1_%d = 0x%x_%x", i, r->u32[1], r->u32[0]);
|
GL_REG("MIPTBP1_%d = 0x%x_%x", i, r->U32[1], r->U32[0]);
|
||||||
|
|
||||||
if (PRIM->CTXT == i && r->MIPTBP1 != m_env.CTXT[i].MIPTBP1)
|
if (PRIM->CTXT == i && r->MIPTBP1 != m_env.CTXT[i].MIPTBP1)
|
||||||
Flush();
|
Flush();
|
||||||
|
@ -1085,7 +1085,7 @@ void GSState::GIFRegHandlerMIPTBP1(const GIFReg* RESTRICT r)
|
||||||
template <int i>
|
template <int i>
|
||||||
void GSState::GIFRegHandlerMIPTBP2(const GIFReg* RESTRICT r)
|
void GSState::GIFRegHandlerMIPTBP2(const GIFReg* RESTRICT r)
|
||||||
{
|
{
|
||||||
GL_REG("MIPTBP2_%d = 0x%x_%x", i, r->u32[1], r->u32[0]);
|
GL_REG("MIPTBP2_%d = 0x%x_%x", i, r->U32[1], r->U32[0]);
|
||||||
|
|
||||||
if (PRIM->CTXT == i && r->MIPTBP2 != m_env.CTXT[i].MIPTBP2)
|
if (PRIM->CTXT == i && r->MIPTBP2 != m_env.CTXT[i].MIPTBP2)
|
||||||
Flush();
|
Flush();
|
||||||
|
@ -1095,7 +1095,7 @@ void GSState::GIFRegHandlerMIPTBP2(const GIFReg* RESTRICT r)
|
||||||
|
|
||||||
void GSState::GIFRegHandlerTEXA(const GIFReg* RESTRICT r)
|
void GSState::GIFRegHandlerTEXA(const GIFReg* RESTRICT r)
|
||||||
{
|
{
|
||||||
GL_REG("TEXA = 0x%x_%x", r->u32[1], r->u32[0]);
|
GL_REG("TEXA = 0x%x_%x", r->U32[1], r->U32[0]);
|
||||||
if (r->TEXA != m_env.TEXA)
|
if (r->TEXA != m_env.TEXA)
|
||||||
Flush();
|
Flush();
|
||||||
|
|
||||||
|
@ -1104,7 +1104,7 @@ void GSState::GIFRegHandlerTEXA(const GIFReg* RESTRICT r)
|
||||||
|
|
||||||
void GSState::GIFRegHandlerFOGCOL(const GIFReg* RESTRICT r)
|
void GSState::GIFRegHandlerFOGCOL(const GIFReg* RESTRICT r)
|
||||||
{
|
{
|
||||||
GL_REG("FOGCOL = 0x%x_%x", r->u32[1], r->u32[0]);
|
GL_REG("FOGCOL = 0x%x_%x", r->U32[1], r->U32[0]);
|
||||||
|
|
||||||
if (r->FOGCOL != m_env.FOGCOL)
|
if (r->FOGCOL != m_env.FOGCOL)
|
||||||
Flush();
|
Flush();
|
||||||
|
@ -1114,7 +1114,7 @@ void GSState::GIFRegHandlerFOGCOL(const GIFReg* RESTRICT r)
|
||||||
|
|
||||||
void GSState::GIFRegHandlerTEXFLUSH(const GIFReg* RESTRICT r)
|
void GSState::GIFRegHandlerTEXFLUSH(const GIFReg* RESTRICT r)
|
||||||
{
|
{
|
||||||
GL_REG("TEXFLUSH = 0x%x_%x", r->u32[1], r->u32[0]);
|
GL_REG("TEXFLUSH = 0x%x_%x", r->U32[1], r->U32[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int i>
|
template <int i>
|
||||||
|
@ -1133,8 +1133,7 @@ void GSState::GIFRegHandlerSCISSOR(const GIFReg* RESTRICT r)
|
||||||
template <int i>
|
template <int i>
|
||||||
void GSState::GIFRegHandlerALPHA(const GIFReg* RESTRICT r)
|
void GSState::GIFRegHandlerALPHA(const GIFReg* RESTRICT r)
|
||||||
{
|
{
|
||||||
GL_REG("ALPHA = 0x%x_%x", r->u32[1], r->u32[0]);
|
GL_REG("ALPHA = 0x%x_%x", r->U32[1], r->U32[0]);
|
||||||
|
|
||||||
if (PRIM->CTXT == i && r->ALPHA != m_env.CTXT[i].ALPHA)
|
if (PRIM->CTXT == i && r->ALPHA != m_env.CTXT[i].ALPHA)
|
||||||
Flush();
|
Flush();
|
||||||
|
|
||||||
|
@ -1142,10 +1141,10 @@ void GSState::GIFRegHandlerALPHA(const GIFReg* RESTRICT r)
|
||||||
|
|
||||||
// value of 4 is not allowed by the spec
|
// value of 4 is not allowed by the spec
|
||||||
// acts has 3 on real hw, so just clamp it
|
// acts has 3 on real hw, so just clamp it
|
||||||
m_env.CTXT[i].ALPHA.A = std::clamp<uint32>(r->ALPHA.A, 0, 3);
|
m_env.CTXT[i].ALPHA.A = std::clamp<u32>(r->ALPHA.A, 0, 3);
|
||||||
m_env.CTXT[i].ALPHA.B = std::clamp<uint32>(r->ALPHA.B, 0, 3);
|
m_env.CTXT[i].ALPHA.B = std::clamp<u32>(r->ALPHA.B, 0, 3);
|
||||||
m_env.CTXT[i].ALPHA.C = std::clamp<uint32>(r->ALPHA.C, 0, 3);
|
m_env.CTXT[i].ALPHA.C = std::clamp<u32>(r->ALPHA.C, 0, 3);
|
||||||
m_env.CTXT[i].ALPHA.D = std::clamp<uint32>(r->ALPHA.D, 0, 3);
|
m_env.CTXT[i].ALPHA.D = std::clamp<u32>(r->ALPHA.D, 0, 3);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSState::GIFRegHandlerDIMX(const GIFReg* RESTRICT r)
|
void GSState::GIFRegHandlerDIMX(const GIFReg* RESTRICT r)
|
||||||
|
@ -1210,12 +1209,12 @@ void GSState::GIFRegHandlerFBA(const GIFReg* RESTRICT r)
|
||||||
template <int i>
|
template <int i>
|
||||||
void GSState::GIFRegHandlerFRAME(const GIFReg* RESTRICT r)
|
void GSState::GIFRegHandlerFRAME(const GIFReg* RESTRICT r)
|
||||||
{
|
{
|
||||||
GL_REG("FRAME_%d = 0x%x_%x", i, r->u32[1], r->u32[0]);
|
GL_REG("FRAME_%d = 0x%x_%x", i, r->U32[1], r->U32[0]);
|
||||||
|
|
||||||
if (PRIM->CTXT == i && r->FRAME != m_env.CTXT[i].FRAME)
|
if (PRIM->CTXT == i && r->FRAME != m_env.CTXT[i].FRAME)
|
||||||
Flush();
|
Flush();
|
||||||
|
|
||||||
if ((m_env.CTXT[i].FRAME.u32[0] ^ r->FRAME.u32[0]) & 0x3f3f01ff) // FBP FBW PSM
|
if ((m_env.CTXT[i].FRAME.U32[0] ^ r->FRAME.U32[0]) & 0x3f3f01ff) // FBP FBW PSM
|
||||||
{
|
{
|
||||||
m_env.CTXT[i].offset.fb = m_mem.GetOffset(r->FRAME.Block(), r->FRAME.FBW, r->FRAME.PSM);
|
m_env.CTXT[i].offset.fb = m_mem.GetOffset(r->FRAME.Block(), r->FRAME.FBW, r->FRAME.PSM);
|
||||||
m_env.CTXT[i].offset.zb = m_mem.GetOffset(m_env.CTXT[i].ZBUF.Block(), r->FRAME.FBW, m_env.CTXT[i].ZBUF.PSM);
|
m_env.CTXT[i].offset.zb = m_mem.GetOffset(m_env.CTXT[i].ZBUF.Block(), r->FRAME.FBW, m_env.CTXT[i].ZBUF.PSM);
|
||||||
|
@ -1251,7 +1250,7 @@ void GSState::GIFRegHandlerFRAME(const GIFReg* RESTRICT r)
|
||||||
template <int i>
|
template <int i>
|
||||||
void GSState::GIFRegHandlerZBUF(const GIFReg* RESTRICT r)
|
void GSState::GIFRegHandlerZBUF(const GIFReg* RESTRICT r)
|
||||||
{
|
{
|
||||||
GL_REG("ZBUF_%d = 0x%x_%x", i, r->u32[1], r->u32[0]);
|
GL_REG("ZBUF_%d = 0x%x_%x", i, r->U32[1], r->U32[0]);
|
||||||
|
|
||||||
GIFRegZBUF ZBUF = r->ZBUF;
|
GIFRegZBUF ZBUF = r->ZBUF;
|
||||||
|
|
||||||
|
@ -1264,7 +1263,7 @@ void GSState::GIFRegHandlerZBUF(const GIFReg* RESTRICT r)
|
||||||
if (PRIM->CTXT == i && ZBUF != m_env.CTXT[i].ZBUF)
|
if (PRIM->CTXT == i && ZBUF != m_env.CTXT[i].ZBUF)
|
||||||
Flush();
|
Flush();
|
||||||
|
|
||||||
if ((m_env.CTXT[i].ZBUF.u32[0] ^ ZBUF.u32[0]) & 0x3f0001ff) // ZBP PSM
|
if ((m_env.CTXT[i].ZBUF.U32[0] ^ ZBUF.U32[0]) & 0x3f0001ff) // ZBP PSM
|
||||||
{
|
{
|
||||||
m_env.CTXT[i].offset.zb = m_mem.GetOffset(ZBUF.Block(), m_env.CTXT[i].FRAME.FBW, ZBUF.PSM);
|
m_env.CTXT[i].offset.zb = m_mem.GetOffset(ZBUF.Block(), m_env.CTXT[i].FRAME.FBW, ZBUF.PSM);
|
||||||
m_env.CTXT[i].offset.fzb = m_mem.GetPixelOffset(m_env.CTXT[i].FRAME, ZBUF);
|
m_env.CTXT[i].offset.fzb = m_mem.GetPixelOffset(m_env.CTXT[i].FRAME, ZBUF);
|
||||||
|
@ -1286,7 +1285,7 @@ void GSState::GIFRegHandlerBITBLTBUF(const GIFReg* RESTRICT r)
|
||||||
// documentation on this problem, nothing in the game to suggest
|
// documentation on this problem, nothing in the game to suggest
|
||||||
// it is broken and the code here for it was likely incorrect to begin with.
|
// it is broken and the code here for it was likely incorrect to begin with.
|
||||||
|
|
||||||
GL_REG("BITBLTBUF = 0x%x_%x", r->u32[1], r->u32[0]);
|
GL_REG("BITBLTBUF = 0x%x_%x", r->U32[1], r->U32[0]);
|
||||||
|
|
||||||
if (r->BITBLTBUF != m_env.BITBLTBUF)
|
if (r->BITBLTBUF != m_env.BITBLTBUF)
|
||||||
FlushWrite();
|
FlushWrite();
|
||||||
|
@ -1296,7 +1295,7 @@ void GSState::GIFRegHandlerBITBLTBUF(const GIFReg* RESTRICT r)
|
||||||
|
|
||||||
void GSState::GIFRegHandlerTRXPOS(const GIFReg* RESTRICT r)
|
void GSState::GIFRegHandlerTRXPOS(const GIFReg* RESTRICT r)
|
||||||
{
|
{
|
||||||
GL_REG("TRXPOS = 0x%x_%x", r->u32[1], r->u32[0]);
|
GL_REG("TRXPOS = 0x%x_%x", r->U32[1], r->U32[0]);
|
||||||
|
|
||||||
if (r->TRXPOS != m_env.TRXPOS)
|
if (r->TRXPOS != m_env.TRXPOS)
|
||||||
FlushWrite();
|
FlushWrite();
|
||||||
|
@ -1306,8 +1305,7 @@ void GSState::GIFRegHandlerTRXPOS(const GIFReg* RESTRICT r)
|
||||||
|
|
||||||
void GSState::GIFRegHandlerTRXREG(const GIFReg* RESTRICT r)
|
void GSState::GIFRegHandlerTRXREG(const GIFReg* RESTRICT r)
|
||||||
{
|
{
|
||||||
GL_REG("TRXREG = 0x%x_%x", r->u32[1], r->u32[0]);
|
GL_REG("TRXREG = 0x%x_%x", r->U32[1], r->U32[0]);
|
||||||
|
|
||||||
if (r->TRXREG != m_env.TRXREG)
|
if (r->TRXREG != m_env.TRXREG)
|
||||||
FlushWrite();
|
FlushWrite();
|
||||||
|
|
||||||
|
@ -1316,7 +1314,7 @@ void GSState::GIFRegHandlerTRXREG(const GIFReg* RESTRICT r)
|
||||||
|
|
||||||
void GSState::GIFRegHandlerTRXDIR(const GIFReg* RESTRICT r)
|
void GSState::GIFRegHandlerTRXDIR(const GIFReg* RESTRICT r)
|
||||||
{
|
{
|
||||||
GL_REG("TRXDIR = 0x%x_%x", r->u32[1], r->u32[0]);
|
GL_REG("TRXDIR = 0x%x_%x", r->U32[1], r->U32[0]);
|
||||||
|
|
||||||
Flush();
|
Flush();
|
||||||
|
|
||||||
|
@ -1341,14 +1339,14 @@ void GSState::GIFRegHandlerTRXDIR(const GIFReg* RESTRICT r)
|
||||||
|
|
||||||
void GSState::GIFRegHandlerHWREG(const GIFReg* RESTRICT r)
|
void GSState::GIFRegHandlerHWREG(const GIFReg* RESTRICT r)
|
||||||
{
|
{
|
||||||
GL_REG("HWREG = 0x%x_%x", r->u32[1], r->u32[0]);
|
GL_REG("HWREG = 0x%x_%x", r->U32[1], r->U32[0]);
|
||||||
|
|
||||||
// don't bother if not host -> local
|
// don't bother if not host -> local
|
||||||
// real hw ignores
|
// real hw ignores
|
||||||
if (m_env.TRXDIR.XDIR != 0)
|
if (m_env.TRXDIR.XDIR != 0)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
Write(reinterpret_cast<const uint8*>(r), 8); // haunting ground
|
Write(reinterpret_cast<const u8*>(r), 8); // haunting ground
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSState::Flush()
|
void GSState::Flush()
|
||||||
|
@ -1482,7 +1480,7 @@ void GSState::FlushPrim()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSState::Write(const uint8* mem, int len)
|
void GSState::Write(const u8* mem, int len)
|
||||||
{
|
{
|
||||||
int w = m_env.TRXREG.RRW;
|
int w = m_env.TRXREG.RRW;
|
||||||
int h = m_env.TRXREG.RRH;
|
int h = m_env.TRXREG.RRH;
|
||||||
|
@ -1555,7 +1553,7 @@ void GSState::Write(const uint8* mem, int len)
|
||||||
m_mem.m_clut.Invalidate();
|
m_mem.m_clut.Invalidate();
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSState::InitReadFIFO(uint8* mem, int len)
|
void GSState::InitReadFIFO(u8* mem, int len)
|
||||||
{
|
{
|
||||||
if (len <= 0)
|
if (len <= 0)
|
||||||
return;
|
return;
|
||||||
|
@ -1565,7 +1563,7 @@ void GSState::InitReadFIFO(uint8* mem, int len)
|
||||||
const int w = m_env.TRXREG.RRW;
|
const int w = m_env.TRXREG.RRW;
|
||||||
const int h = m_env.TRXREG.RRH;
|
const int h = m_env.TRXREG.RRH;
|
||||||
|
|
||||||
const uint16 bpp = GSLocalMemory::m_psm[m_env.BITBLTBUF.SPSM].trbpp;
|
const u16 bpp = GSLocalMemory::m_psm[m_env.BITBLTBUF.SPSM].trbpp;
|
||||||
|
|
||||||
if (!m_tr.Update(w, h, bpp, len))
|
if (!m_tr.Update(w, h, bpp, len))
|
||||||
return;
|
return;
|
||||||
|
@ -1575,7 +1573,7 @@ void GSState::InitReadFIFO(uint8* mem, int len)
|
||||||
}
|
}
|
||||||
|
|
||||||
// NOTE: called from outside MTGS
|
// NOTE: called from outside MTGS
|
||||||
void GSState::Read(uint8* mem, int len)
|
void GSState::Read(u8* mem, int len)
|
||||||
{
|
{
|
||||||
if (len <= 0)
|
if (len <= 0)
|
||||||
return;
|
return;
|
||||||
|
@ -1587,7 +1585,7 @@ void GSState::Read(uint8* mem, int len)
|
||||||
|
|
||||||
const GSVector4i r(sx, sy, sx + w, sy + h);
|
const GSVector4i r(sx, sy, sx + w, sy + h);
|
||||||
|
|
||||||
const uint16 bpp = GSLocalMemory::m_psm[m_env.BITBLTBUF.SPSM].trbpp;
|
const u16 bpp = GSLocalMemory::m_psm[m_env.BITBLTBUF.SPSM].trbpp;
|
||||||
|
|
||||||
if (!m_tr.Update(w, h, bpp, len))
|
if (!m_tr.Update(w, h, bpp, len))
|
||||||
return;
|
return;
|
||||||
|
@ -1710,21 +1708,21 @@ void GSState::Move()
|
||||||
{
|
{
|
||||||
if (spsm.trbpp == 32)
|
if (spsm.trbpp == 32)
|
||||||
{
|
{
|
||||||
copyFast(m_mem.m_vm32, dpo.assertSizesMatch(GSLocalMemory::swizzle32), spo.assertSizesMatch(GSLocalMemory::swizzle32), [](uint32* d, uint32* s)
|
copyFast(m_mem.m_vm32, dpo.assertSizesMatch(GSLocalMemory::swizzle32), spo.assertSizesMatch(GSLocalMemory::swizzle32), [](u32* d, u32* s)
|
||||||
{
|
{
|
||||||
*d = *s;
|
*d = *s;
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
else if (spsm.trbpp == 24)
|
else if (spsm.trbpp == 24)
|
||||||
{
|
{
|
||||||
copyFast(m_mem.m_vm32, dpo.assertSizesMatch(GSLocalMemory::swizzle32), spo.assertSizesMatch(GSLocalMemory::swizzle32), [](uint32* d, uint32* s)
|
copyFast(m_mem.m_vm32, dpo.assertSizesMatch(GSLocalMemory::swizzle32), spo.assertSizesMatch(GSLocalMemory::swizzle32), [](u32* d, u32* s)
|
||||||
{
|
{
|
||||||
*d = (*d & 0xff000000) | (*s & 0x00ffffff);
|
*d = (*d & 0xff000000) | (*s & 0x00ffffff);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
else // if(spsm.trbpp == 16)
|
else // if(spsm.trbpp == 16)
|
||||||
{
|
{
|
||||||
copyFast(m_mem.m_vm16, dpo.assertSizesMatch(GSLocalMemory::swizzle16), spo.assertSizesMatch(GSLocalMemory::swizzle16), [](uint16* d, uint16* s)
|
copyFast(m_mem.m_vm16, dpo.assertSizesMatch(GSLocalMemory::swizzle16), spo.assertSizesMatch(GSLocalMemory::swizzle16), [](u16* d, u16* s)
|
||||||
{
|
{
|
||||||
*d = *s;
|
*d = *s;
|
||||||
});
|
});
|
||||||
|
@ -1732,28 +1730,28 @@ void GSState::Move()
|
||||||
}
|
}
|
||||||
else if (m_env.BITBLTBUF.SPSM == PSM_PSMT8 && m_env.BITBLTBUF.DPSM == PSM_PSMT8)
|
else if (m_env.BITBLTBUF.SPSM == PSM_PSMT8 && m_env.BITBLTBUF.DPSM == PSM_PSMT8)
|
||||||
{
|
{
|
||||||
copyFast(m_mem.m_vm8, GSOffset::fromKnownPSM(dbp, dbw, PSM_PSMT8), GSOffset::fromKnownPSM(sbp, sbw, PSM_PSMT8), [](uint8* d, uint8* s)
|
copyFast(m_mem.m_vm8, GSOffset::fromKnownPSM(dbp, dbw, PSM_PSMT8), GSOffset::fromKnownPSM(sbp, sbw, PSM_PSMT8), [](u8* d, u8* s)
|
||||||
{
|
{
|
||||||
*d = *s;
|
*d = *s;
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
else if (m_env.BITBLTBUF.SPSM == PSM_PSMT4 && m_env.BITBLTBUF.DPSM == PSM_PSMT4)
|
else if (m_env.BITBLTBUF.SPSM == PSM_PSMT4 && m_env.BITBLTBUF.DPSM == PSM_PSMT4)
|
||||||
{
|
{
|
||||||
copy(GSOffset::fromKnownPSM(dbp, dbw, PSM_PSMT4), GSOffset::fromKnownPSM(sbp, sbw, PSM_PSMT4), [&](uint32 doff, uint32 soff)
|
copy(GSOffset::fromKnownPSM(dbp, dbw, PSM_PSMT4), GSOffset::fromKnownPSM(sbp, sbw, PSM_PSMT4), [&](u32 doff, u32 soff)
|
||||||
{
|
{
|
||||||
m_mem.WritePixel4(doff, m_mem.ReadPixel4(soff));
|
m_mem.WritePixel4(doff, m_mem.ReadPixel4(soff));
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
copy(dpo, spo, [&](uint32 doff, uint32 soff)
|
copy(dpo, spo, [&](u32 doff, u32 soff)
|
||||||
{
|
{
|
||||||
(m_mem.*dpsm.wpa)(doff, (m_mem.*spsm.rpa)(soff));
|
(m_mem.*dpsm.wpa)(doff, (m_mem.*spsm.rpa)(soff));
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSState::SoftReset(uint32 mask)
|
void GSState::SoftReset(u32 mask)
|
||||||
{
|
{
|
||||||
if (mask & 1)
|
if (mask & 1)
|
||||||
{
|
{
|
||||||
|
@ -1772,7 +1770,7 @@ void GSState::SoftReset(uint32 mask)
|
||||||
m_q = 1.0f;
|
m_q = 1.0f;
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSState::ReadFIFO(uint8* mem, int size)
|
void GSState::ReadFIFO(u8* mem, int size)
|
||||||
{
|
{
|
||||||
GSPerfMonAutoTimer pmat(&m_perfmon);
|
GSPerfMonAutoTimer pmat(&m_perfmon);
|
||||||
|
|
||||||
|
@ -1786,17 +1784,17 @@ void GSState::ReadFIFO(uint8* mem, int size)
|
||||||
m_dump->ReadFIFO(size);
|
m_dump->ReadFIFO(size);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void GSState::Transfer<0>(const uint8* mem, uint32 size);
|
template void GSState::Transfer<0>(const u8* mem, u32 size);
|
||||||
template void GSState::Transfer<1>(const uint8* mem, uint32 size);
|
template void GSState::Transfer<1>(const u8* mem, u32 size);
|
||||||
template void GSState::Transfer<2>(const uint8* mem, uint32 size);
|
template void GSState::Transfer<2>(const u8* mem, u32 size);
|
||||||
template void GSState::Transfer<3>(const uint8* mem, uint32 size);
|
template void GSState::Transfer<3>(const u8* mem, u32 size);
|
||||||
|
|
||||||
template <int index>
|
template <int index>
|
||||||
void GSState::Transfer(const uint8* mem, uint32 size)
|
void GSState::Transfer(const u8* mem, u32 size)
|
||||||
{
|
{
|
||||||
GSPerfMonAutoTimer pmat(&m_perfmon);
|
GSPerfMonAutoTimer pmat(&m_perfmon);
|
||||||
|
|
||||||
const uint8* start = mem;
|
const u8* start = mem;
|
||||||
|
|
||||||
GIFPath& path = m_path[index];
|
GIFPath& path = m_path[index];
|
||||||
|
|
||||||
|
@ -1823,7 +1821,7 @@ void GSState::Transfer(const uint8* mem, uint32 size)
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
uint32 total;
|
u32 total;
|
||||||
|
|
||||||
switch (path.tag.FLG)
|
switch (path.tag.FLG)
|
||||||
{
|
{
|
||||||
|
@ -1852,7 +1850,7 @@ void GSState::Transfer(const uint8* mem, uint32 size)
|
||||||
{
|
{
|
||||||
case GIFPath::TYPE_UNKNOWN:
|
case GIFPath::TYPE_UNKNOWN:
|
||||||
{
|
{
|
||||||
uint32 reg = 0;
|
u32 reg = 0;
|
||||||
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
|
@ -2058,9 +2056,9 @@ int GSState::Freeze(freezeData* fd, bool sizeonly)
|
||||||
path.tag.NLOOP = path.nloop;
|
path.tag.NLOOP = path.nloop;
|
||||||
path.tag.REGS = 0;
|
path.tag.REGS = 0;
|
||||||
|
|
||||||
for (size_t j = 0; j < std::size(path.regs.u8); j++)
|
for (size_t j = 0; j < std::size(path.regs.U8); j++)
|
||||||
{
|
{
|
||||||
path.tag.u32[2 + (j >> 3)] |= path.regs.u8[j] << ((j & 7) << 2);
|
path.tag.U32[2 + (j >> 3)] |= path.regs.U8[j] << ((j & 7) << 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
WriteState(data, &path.tag);
|
WriteState(data, &path.tag);
|
||||||
|
@ -2145,7 +2143,7 @@ int GSState::Defrost(const freezeData* fd)
|
||||||
m_env.CTXT[i].XYOFFSET.OFY &= 0xffff;
|
m_env.CTXT[i].XYOFFSET.OFY &= 0xffff;
|
||||||
|
|
||||||
if (version <= 4)
|
if (version <= 4)
|
||||||
data += sizeof(uint32) * 7; // skip
|
data += sizeof(u32) * 7; // skip
|
||||||
}
|
}
|
||||||
|
|
||||||
ReadState(&m_v.RGBAQ, data);
|
ReadState(&m_v.RGBAQ, data);
|
||||||
|
@ -2196,7 +2194,7 @@ int GSState::Defrost(const freezeData* fd)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSState::SetGameCRC(uint32 crc, int options)
|
void GSState::SetGameCRC(u32 crc, int options)
|
||||||
{
|
{
|
||||||
m_crc = crc;
|
m_crc = crc;
|
||||||
m_options = options;
|
m_options = options;
|
||||||
|
@ -2229,7 +2227,7 @@ void GSState::UpdateVertexKick()
|
||||||
if (m_frameskip)
|
if (m_frameskip)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
const uint32 prim = PRIM->PRIM;
|
const u32 prim = PRIM->PRIM;
|
||||||
|
|
||||||
m_fpGIFPackedRegHandlers[GIF_REG_XYZF2] = m_fpGIFPackedRegHandlerXYZ[prim][0];
|
m_fpGIFPackedRegHandlers[GIF_REG_XYZF2] = m_fpGIFPackedRegHandlerXYZ[prim][0];
|
||||||
m_fpGIFPackedRegHandlers[GIF_REG_XYZF3] = m_fpGIFPackedRegHandlerXYZ[prim][1];
|
m_fpGIFPackedRegHandlers[GIF_REG_XYZF3] = m_fpGIFPackedRegHandlerXYZ[prim][1];
|
||||||
|
@ -2250,12 +2248,12 @@ void GSState::GrowVertexBuffer()
|
||||||
const size_t maxcount = std::max<size_t>(m_vertex.maxcount * 3 / 2, 10000);
|
const size_t maxcount = std::max<size_t>(m_vertex.maxcount * 3 / 2, 10000);
|
||||||
|
|
||||||
GSVertex* vertex = (GSVertex*)_aligned_malloc(sizeof(GSVertex) * maxcount, 32);
|
GSVertex* vertex = (GSVertex*)_aligned_malloc(sizeof(GSVertex) * maxcount, 32);
|
||||||
uint32* index = (uint32*)_aligned_malloc(sizeof(uint32) * maxcount * 3, 32); // worst case is slightly less than vertex number * 3
|
u32* index = (u32*)_aligned_malloc(sizeof(u32) * maxcount * 3, 32); // worst case is slightly less than vertex number * 3
|
||||||
|
|
||||||
if (vertex == NULL || index == NULL)
|
if (vertex == NULL || index == NULL)
|
||||||
{
|
{
|
||||||
const size_t vert_byte_count = sizeof(GSVertex) * maxcount;
|
const size_t vert_byte_count = sizeof(GSVertex) * maxcount;
|
||||||
const size_t idx_byte_count = sizeof(uint32) * maxcount * 3;
|
const size_t idx_byte_count = sizeof(u32) * maxcount * 3;
|
||||||
|
|
||||||
Console.Error("GS: failed to allocate %zu bytes for verticles and %zu for indices.",
|
Console.Error("GS: failed to allocate %zu bytes for verticles and %zu for indices.",
|
||||||
vert_byte_count, idx_byte_count);
|
vert_byte_count, idx_byte_count);
|
||||||
|
@ -2272,7 +2270,7 @@ void GSState::GrowVertexBuffer()
|
||||||
|
|
||||||
if (m_index.buff != NULL)
|
if (m_index.buff != NULL)
|
||||||
{
|
{
|
||||||
memcpy(index, m_index.buff, sizeof(uint32) * m_index.tail);
|
memcpy(index, m_index.buff, sizeof(u32) * m_index.tail);
|
||||||
|
|
||||||
_aligned_free(m_index.buff);
|
_aligned_free(m_index.buff);
|
||||||
}
|
}
|
||||||
|
@ -2282,8 +2280,8 @@ void GSState::GrowVertexBuffer()
|
||||||
m_index.buff = index;
|
m_index.buff = index;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <uint32 prim, bool auto_flush>
|
template <u32 prim, bool auto_flush>
|
||||||
__forceinline void GSState::VertexKick(uint32 skip)
|
__forceinline void GSState::VertexKick(u32 skip)
|
||||||
{
|
{
|
||||||
ASSERT(m_vertex.tail < m_vertex.maxcount + 3);
|
ASSERT(m_vertex.tail < m_vertex.maxcount + 3);
|
||||||
|
|
||||||
|
@ -2441,7 +2439,7 @@ __forceinline void GSState::VertexKick(uint32 skip)
|
||||||
if (tail >= m_vertex.maxcount)
|
if (tail >= m_vertex.maxcount)
|
||||||
GrowVertexBuffer();
|
GrowVertexBuffer();
|
||||||
|
|
||||||
uint32* RESTRICT buff = &m_index.buff[m_index.tail];
|
u32* RESTRICT buff = &m_index.buff[m_index.tail];
|
||||||
|
|
||||||
switch (prim)
|
switch (prim)
|
||||||
{
|
{
|
||||||
|
@ -2726,7 +2724,7 @@ void GSState::GetAlphaMinMax()
|
||||||
m_vt.m_alpha.valid = true;
|
m_vt.m_alpha.valid = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool GSState::TryAlphaTest(uint32& fm, uint32& zm)
|
bool GSState::TryAlphaTest(u32& fm, u32& zm)
|
||||||
{
|
{
|
||||||
// Shortcut for the easy case
|
// Shortcut for the easy case
|
||||||
if (m_context->TEST.ATST == ATST_ALWAYS)
|
if (m_context->TEST.ATST == ATST_ALWAYS)
|
||||||
|
@ -2901,7 +2899,7 @@ bool GSState::IsMipMapActive()
|
||||||
return m_mipmap && IsMipMapDraw();
|
return m_mipmap && IsMipMapDraw();
|
||||||
}
|
}
|
||||||
|
|
||||||
GIFRegTEX0 GSState::GetTex0Layer(uint32 lod)
|
GIFRegTEX0 GSState::GetTex0Layer(u32 lod)
|
||||||
{
|
{
|
||||||
// Shortcut
|
// Shortcut
|
||||||
if (lod == 0)
|
if (lod == 0)
|
||||||
|
@ -2962,7 +2960,7 @@ GSState::GSTransferBuffer::GSTransferBuffer()
|
||||||
start = end = total = 0;
|
start = end = total = 0;
|
||||||
|
|
||||||
constexpr size_t alloc_size = 1024 * 1024 * 4;
|
constexpr size_t alloc_size = 1024 * 1024 * 4;
|
||||||
buff = reinterpret_cast<uint8*>(_aligned_malloc(alloc_size, 32));
|
buff = reinterpret_cast<u8*>(_aligned_malloc(alloc_size, 32));
|
||||||
}
|
}
|
||||||
|
|
||||||
GSState::GSTransferBuffer::~GSTransferBuffer()
|
GSState::GSTransferBuffer::~GSTransferBuffer()
|
||||||
|
|
|
@ -31,12 +31,12 @@
|
||||||
|
|
||||||
struct GSFrameInfo
|
struct GSFrameInfo
|
||||||
{
|
{
|
||||||
uint32 FBP;
|
u32 FBP;
|
||||||
uint32 FPSM;
|
u32 FPSM;
|
||||||
uint32 FBMSK;
|
u32 FBMSK;
|
||||||
uint32 TBP0;
|
u32 TBP0;
|
||||||
uint32 TPSM;
|
u32 TPSM;
|
||||||
uint32 TZTST;
|
u32 TZTST;
|
||||||
bool TME;
|
bool TME;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -56,8 +56,8 @@ class GSState : public GSAlignedClass<32>
|
||||||
void GIFPackedRegHandlerSTQ(const GIFPackedReg* RESTRICT r);
|
void GIFPackedRegHandlerSTQ(const GIFPackedReg* RESTRICT r);
|
||||||
void GIFPackedRegHandlerUV(const GIFPackedReg* RESTRICT r);
|
void GIFPackedRegHandlerUV(const GIFPackedReg* RESTRICT r);
|
||||||
void GIFPackedRegHandlerUV_Hack(const GIFPackedReg* RESTRICT r);
|
void GIFPackedRegHandlerUV_Hack(const GIFPackedReg* RESTRICT r);
|
||||||
template<uint32 prim, uint32 adc, bool auto_flush> void GIFPackedRegHandlerXYZF2(const GIFPackedReg* RESTRICT r);
|
template<u32 prim, u32 adc, bool auto_flush> void GIFPackedRegHandlerXYZF2(const GIFPackedReg* RESTRICT r);
|
||||||
template<uint32 prim, uint32 adc, bool auto_flush> void GIFPackedRegHandlerXYZ2(const GIFPackedReg* RESTRICT r);
|
template<u32 prim, u32 adc, bool auto_flush> void GIFPackedRegHandlerXYZ2(const GIFPackedReg* RESTRICT r);
|
||||||
void GIFPackedRegHandlerFOG(const GIFPackedReg* RESTRICT r);
|
void GIFPackedRegHandlerFOG(const GIFPackedReg* RESTRICT r);
|
||||||
void GIFPackedRegHandlerA_D(const GIFPackedReg* RESTRICT r);
|
void GIFPackedRegHandlerA_D(const GIFPackedReg* RESTRICT r);
|
||||||
void GIFPackedRegHandlerNOP(const GIFPackedReg* RESTRICT r);
|
void GIFPackedRegHandlerNOP(const GIFPackedReg* RESTRICT r);
|
||||||
|
@ -67,18 +67,18 @@ class GSState : public GSAlignedClass<32>
|
||||||
GIFRegHandler m_fpGIFRegHandlers[256];
|
GIFRegHandler m_fpGIFRegHandlers[256];
|
||||||
GIFRegHandler m_fpGIFRegHandlerXYZ[8][4];
|
GIFRegHandler m_fpGIFRegHandlerXYZ[8][4];
|
||||||
|
|
||||||
typedef void (GSState::*GIFPackedRegHandlerC)(const GIFPackedReg* RESTRICT r, uint32 size);
|
typedef void (GSState::*GIFPackedRegHandlerC)(const GIFPackedReg* RESTRICT r, u32 size);
|
||||||
|
|
||||||
GIFPackedRegHandlerC m_fpGIFPackedRegHandlersC[2];
|
GIFPackedRegHandlerC m_fpGIFPackedRegHandlersC[2];
|
||||||
GIFPackedRegHandlerC m_fpGIFPackedRegHandlerSTQRGBAXYZF2[8];
|
GIFPackedRegHandlerC m_fpGIFPackedRegHandlerSTQRGBAXYZF2[8];
|
||||||
GIFPackedRegHandlerC m_fpGIFPackedRegHandlerSTQRGBAXYZ2[8];
|
GIFPackedRegHandlerC m_fpGIFPackedRegHandlerSTQRGBAXYZ2[8];
|
||||||
|
|
||||||
template<uint32 prim, bool auto_flush> void GIFPackedRegHandlerSTQRGBAXYZF2(const GIFPackedReg* RESTRICT r, uint32 size);
|
template<u32 prim, bool auto_flush> void GIFPackedRegHandlerSTQRGBAXYZF2(const GIFPackedReg* RESTRICT r, u32 size);
|
||||||
template<uint32 prim, bool auto_flush> void GIFPackedRegHandlerSTQRGBAXYZ2(const GIFPackedReg* RESTRICT r, uint32 size);
|
template<u32 prim, bool auto_flush> void GIFPackedRegHandlerSTQRGBAXYZ2(const GIFPackedReg* RESTRICT r, u32 size);
|
||||||
void GIFPackedRegHandlerNOP(const GIFPackedReg* RESTRICT r, uint32 size);
|
void GIFPackedRegHandlerNOP(const GIFPackedReg* RESTRICT r, u32 size);
|
||||||
|
|
||||||
template<int i> void ApplyTEX0(GIFRegTEX0& TEX0);
|
template<int i> void ApplyTEX0(GIFRegTEX0& TEX0);
|
||||||
void ApplyPRIM(uint32 prim);
|
void ApplyPRIM(u32 prim);
|
||||||
|
|
||||||
void GIFRegHandlerNull(const GIFReg* RESTRICT r);
|
void GIFRegHandlerNull(const GIFReg* RESTRICT r);
|
||||||
void GIFRegHandlerPRIM(const GIFReg* RESTRICT r);
|
void GIFRegHandlerPRIM(const GIFReg* RESTRICT r);
|
||||||
|
@ -86,8 +86,8 @@ class GSState : public GSAlignedClass<32>
|
||||||
void GIFRegHandlerST(const GIFReg* RESTRICT r);
|
void GIFRegHandlerST(const GIFReg* RESTRICT r);
|
||||||
void GIFRegHandlerUV(const GIFReg* RESTRICT r);
|
void GIFRegHandlerUV(const GIFReg* RESTRICT r);
|
||||||
void GIFRegHandlerUV_Hack(const GIFReg* RESTRICT r);
|
void GIFRegHandlerUV_Hack(const GIFReg* RESTRICT r);
|
||||||
template<uint32 prim, uint32 adc, bool auto_flush> void GIFRegHandlerXYZF2(const GIFReg* RESTRICT r);
|
template<u32 prim, u32 adc, bool auto_flush> void GIFRegHandlerXYZF2(const GIFReg* RESTRICT r);
|
||||||
template<uint32 prim, uint32 adc, bool auto_flush> void GIFRegHandlerXYZ2(const GIFReg* RESTRICT r);
|
template<u32 prim, u32 adc, bool auto_flush> void GIFRegHandlerXYZ2(const GIFReg* RESTRICT r);
|
||||||
template<int i> void GIFRegHandlerTEX0(const GIFReg* RESTRICT r);
|
template<int i> void GIFRegHandlerTEX0(const GIFReg* RESTRICT r);
|
||||||
template<int i> void GIFRegHandlerCLAMP(const GIFReg* RESTRICT r);
|
template<int i> void GIFRegHandlerCLAMP(const GIFReg* RESTRICT r);
|
||||||
void GIFRegHandlerFOG(const GIFReg* RESTRICT r);
|
void GIFRegHandlerFOG(const GIFReg* RESTRICT r);
|
||||||
|
@ -128,7 +128,7 @@ class GSState : public GSAlignedClass<32>
|
||||||
int x, y;
|
int x, y;
|
||||||
int start, end, total;
|
int start, end, total;
|
||||||
bool overflow;
|
bool overflow;
|
||||||
uint8* buff;
|
u8* buff;
|
||||||
GIFRegBITBLTBUF m_blit;
|
GIFRegBITBLTBUF m_blit;
|
||||||
|
|
||||||
GSTransferBuffer();
|
GSTransferBuffer();
|
||||||
|
@ -163,12 +163,12 @@ protected:
|
||||||
GSVertex* buff;
|
GSVertex* buff;
|
||||||
size_t head, tail, next, maxcount; // head: first vertex, tail: last vertex + 1, next: last indexed + 1
|
size_t head, tail, next, maxcount; // head: first vertex, tail: last vertex + 1, next: last indexed + 1
|
||||||
size_t xy_tail;
|
size_t xy_tail;
|
||||||
uint64 xy[4];
|
u64 xy[4];
|
||||||
} m_vertex;
|
} m_vertex;
|
||||||
|
|
||||||
struct
|
struct
|
||||||
{
|
{
|
||||||
uint32* buff;
|
u32* buff;
|
||||||
size_t tail;
|
size_t tail;
|
||||||
} m_index;
|
} m_index;
|
||||||
|
|
||||||
|
@ -179,8 +179,8 @@ protected:
|
||||||
|
|
||||||
void GrowVertexBuffer();
|
void GrowVertexBuffer();
|
||||||
|
|
||||||
template <uint32 prim, bool auto_flush>
|
template <u32 prim, bool auto_flush>
|
||||||
void VertexKick(uint32 skip);
|
void VertexKick(u32 skip);
|
||||||
|
|
||||||
// following functions need m_vt to be initialized
|
// following functions need m_vt to be initialized
|
||||||
|
|
||||||
|
@ -188,11 +188,11 @@ protected:
|
||||||
|
|
||||||
void GetTextureMinMax(GSVector4i& r, const GIFRegTEX0& TEX0, const GIFRegCLAMP& CLAMP, bool linear);
|
void GetTextureMinMax(GSVector4i& r, const GIFRegTEX0& TEX0, const GIFRegCLAMP& CLAMP, bool linear);
|
||||||
void GetAlphaMinMax();
|
void GetAlphaMinMax();
|
||||||
bool TryAlphaTest(uint32& fm, uint32& zm);
|
bool TryAlphaTest(u32& fm, u32& zm);
|
||||||
bool IsOpaque();
|
bool IsOpaque();
|
||||||
bool IsMipMapDraw();
|
bool IsMipMapDraw();
|
||||||
bool IsMipMapActive();
|
bool IsMipMapActive();
|
||||||
GIFRegTEX0 GetTex0Layer(uint32 lod);
|
GIFRegTEX0 GetTex0Layer(u32 lod);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
GIFPath m_path[4];
|
GIFPath m_path[4];
|
||||||
|
@ -202,7 +202,7 @@ public:
|
||||||
GSDrawingEnvironment m_env;
|
GSDrawingEnvironment m_env;
|
||||||
GSDrawingContext* m_context;
|
GSDrawingContext* m_context;
|
||||||
GSPerfMon m_perfmon;
|
GSPerfMon m_perfmon;
|
||||||
uint32 m_crc;
|
u32 m_crc;
|
||||||
CRC::Game m_game;
|
CRC::Game m_game;
|
||||||
std::unique_ptr<GSDumpBase> m_dump;
|
std::unique_ptr<GSDumpBase> m_dump;
|
||||||
int m_options;
|
int m_options;
|
||||||
|
@ -248,17 +248,17 @@ public:
|
||||||
virtual void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut = false) {}
|
virtual void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut = false) {}
|
||||||
|
|
||||||
void Move();
|
void Move();
|
||||||
void Write(const uint8* mem, int len);
|
void Write(const u8* mem, int len);
|
||||||
void Read(uint8* mem, int len);
|
void Read(u8* mem, int len);
|
||||||
void InitReadFIFO(uint8* mem, int len);
|
void InitReadFIFO(u8* mem, int len);
|
||||||
|
|
||||||
void SoftReset(uint32 mask);
|
void SoftReset(u32 mask);
|
||||||
void WriteCSR(uint32 csr) { m_regs->CSR.u32[1] = csr; }
|
void WriteCSR(u32 csr) { m_regs->CSR.U32[1] = csr; }
|
||||||
void ReadFIFO(uint8* mem, int size);
|
void ReadFIFO(u8* mem, int size);
|
||||||
template<int index> void Transfer(const uint8* mem, uint32 size);
|
template<int index> void Transfer(const u8* mem, u32 size);
|
||||||
int Freeze(freezeData* fd, bool sizeonly);
|
int Freeze(freezeData* fd, bool sizeonly);
|
||||||
int Defrost(const freezeData* fd);
|
int Defrost(const freezeData* fd);
|
||||||
virtual void SetGameCRC(uint32 crc, int options);
|
virtual void SetGameCRC(u32 crc, int options);
|
||||||
void SetFrameSkip(int skip);
|
void SetFrameSkip(int skip);
|
||||||
void SetRegsMem(uint8* basemem);
|
void SetRegsMem(u8* basemem);
|
||||||
};
|
};
|
||||||
|
|
|
@ -20,7 +20,7 @@
|
||||||
#include "GS_types.h"
|
#include "GS_types.h"
|
||||||
|
|
||||||
template <int Width, int Height>
|
template <int Width, int Height>
|
||||||
static constexpr GSSizedBlockSwizzleTable<Height, Width> makeSwizzleTable(const uint8 (&arr)[Height][Width]) {
|
static constexpr GSSizedBlockSwizzleTable<Height, Width> makeSwizzleTable(const u8 (&arr)[Height][Width]) {
|
||||||
GSSizedBlockSwizzleTable<Height, Width> table = {};
|
GSSizedBlockSwizzleTable<Height, Width> table = {};
|
||||||
for (int y = 0; y < 8; y++) {
|
for (int y = 0; y < 8; y++) {
|
||||||
for (int x = 0; x < 8; x++) {
|
for (int x = 0; x < 8; x++) {
|
||||||
|
@ -30,7 +30,7 @@ static constexpr GSSizedBlockSwizzleTable<Height, Width> makeSwizzleTable(const
|
||||||
return table;
|
return table;
|
||||||
}
|
}
|
||||||
|
|
||||||
static constexpr uint8 _blockTable32[4][8] =
|
static constexpr u8 _blockTable32[4][8] =
|
||||||
{
|
{
|
||||||
{ 0, 1, 4, 5, 16, 17, 20, 21},
|
{ 0, 1, 4, 5, 16, 17, 20, 21},
|
||||||
{ 2, 3, 6, 7, 18, 19, 22, 23},
|
{ 2, 3, 6, 7, 18, 19, 22, 23},
|
||||||
|
@ -38,7 +38,7 @@ static constexpr uint8 _blockTable32[4][8] =
|
||||||
{ 10, 11, 14, 15, 26, 27, 30, 31}
|
{ 10, 11, 14, 15, 26, 27, 30, 31}
|
||||||
};
|
};
|
||||||
|
|
||||||
static constexpr uint8 _blockTable32Z[4][8] =
|
static constexpr u8 _blockTable32Z[4][8] =
|
||||||
{
|
{
|
||||||
{ 24, 25, 28, 29, 8, 9, 12, 13},
|
{ 24, 25, 28, 29, 8, 9, 12, 13},
|
||||||
{ 26, 27, 30, 31, 10, 11, 14, 15},
|
{ 26, 27, 30, 31, 10, 11, 14, 15},
|
||||||
|
@ -46,7 +46,7 @@ static constexpr uint8 _blockTable32Z[4][8] =
|
||||||
{ 18, 19, 22, 23, 2, 3, 6, 7}
|
{ 18, 19, 22, 23, 2, 3, 6, 7}
|
||||||
};
|
};
|
||||||
|
|
||||||
static constexpr uint8 _blockTable16[8][4] =
|
static constexpr u8 _blockTable16[8][4] =
|
||||||
{
|
{
|
||||||
{ 0, 2, 8, 10 },
|
{ 0, 2, 8, 10 },
|
||||||
{ 1, 3, 9, 11 },
|
{ 1, 3, 9, 11 },
|
||||||
|
@ -58,7 +58,7 @@ static constexpr uint8 _blockTable16[8][4] =
|
||||||
{ 21, 23, 29, 31 }
|
{ 21, 23, 29, 31 }
|
||||||
};
|
};
|
||||||
|
|
||||||
static constexpr uint8 _blockTable16S[8][4] =
|
static constexpr u8 _blockTable16S[8][4] =
|
||||||
{
|
{
|
||||||
{ 0, 2, 16, 18 },
|
{ 0, 2, 16, 18 },
|
||||||
{ 1, 3, 17, 19 },
|
{ 1, 3, 17, 19 },
|
||||||
|
@ -70,7 +70,7 @@ static constexpr uint8 _blockTable16S[8][4] =
|
||||||
{ 13, 15, 29, 31 }
|
{ 13, 15, 29, 31 }
|
||||||
};
|
};
|
||||||
|
|
||||||
static constexpr uint8 _blockTable16Z[8][4] =
|
static constexpr u8 _blockTable16Z[8][4] =
|
||||||
{
|
{
|
||||||
{ 24, 26, 16, 18 },
|
{ 24, 26, 16, 18 },
|
||||||
{ 25, 27, 17, 19 },
|
{ 25, 27, 17, 19 },
|
||||||
|
@ -82,7 +82,7 @@ static constexpr uint8 _blockTable16Z[8][4] =
|
||||||
{ 13, 15, 5, 7 }
|
{ 13, 15, 5, 7 }
|
||||||
};
|
};
|
||||||
|
|
||||||
static constexpr uint8 _blockTable16SZ[8][4] =
|
static constexpr u8 _blockTable16SZ[8][4] =
|
||||||
{
|
{
|
||||||
{ 24, 26, 8, 10 },
|
{ 24, 26, 8, 10 },
|
||||||
{ 25, 27, 9, 11 },
|
{ 25, 27, 9, 11 },
|
||||||
|
@ -94,7 +94,7 @@ static constexpr uint8 _blockTable16SZ[8][4] =
|
||||||
{ 21, 23, 5, 7 }
|
{ 21, 23, 5, 7 }
|
||||||
};
|
};
|
||||||
|
|
||||||
static constexpr uint8 _blockTable8[4][8] =
|
static constexpr u8 _blockTable8[4][8] =
|
||||||
{
|
{
|
||||||
{ 0, 1, 4, 5, 16, 17, 20, 21},
|
{ 0, 1, 4, 5, 16, 17, 20, 21},
|
||||||
{ 2, 3, 6, 7, 18, 19, 22, 23},
|
{ 2, 3, 6, 7, 18, 19, 22, 23},
|
||||||
|
@ -102,7 +102,7 @@ static constexpr uint8 _blockTable8[4][8] =
|
||||||
{ 10, 11, 14, 15, 26, 27, 30, 31}
|
{ 10, 11, 14, 15, 26, 27, 30, 31}
|
||||||
};
|
};
|
||||||
|
|
||||||
static constexpr uint8 _blockTable4[8][4] =
|
static constexpr u8 _blockTable4[8][4] =
|
||||||
{
|
{
|
||||||
{ 0, 2, 8, 10 },
|
{ 0, 2, 8, 10 },
|
||||||
{ 1, 3, 9, 11 },
|
{ 1, 3, 9, 11 },
|
||||||
|
@ -123,7 +123,7 @@ constexpr GSSizedBlockSwizzleTable<8, 4> blockTable16SZ = makeSwizzleTable(_bloc
|
||||||
constexpr GSSizedBlockSwizzleTable<4, 8> blockTable8 = makeSwizzleTable(_blockTable8);
|
constexpr GSSizedBlockSwizzleTable<4, 8> blockTable8 = makeSwizzleTable(_blockTable8);
|
||||||
constexpr GSSizedBlockSwizzleTable<8, 4> blockTable4 = makeSwizzleTable(_blockTable4);
|
constexpr GSSizedBlockSwizzleTable<8, 4> blockTable4 = makeSwizzleTable(_blockTable4);
|
||||||
|
|
||||||
constexpr uint8 columnTable32[8][8] =
|
constexpr u8 columnTable32[8][8] =
|
||||||
{
|
{
|
||||||
{ 0, 1, 4, 5, 8, 9, 12, 13 },
|
{ 0, 1, 4, 5, 8, 9, 12, 13 },
|
||||||
{ 2, 3, 6, 7, 10, 11, 14, 15 },
|
{ 2, 3, 6, 7, 10, 11, 14, 15 },
|
||||||
|
@ -135,7 +135,7 @@ constexpr uint8 columnTable32[8][8] =
|
||||||
{ 50, 51, 54, 55, 58, 59, 62, 63 },
|
{ 50, 51, 54, 55, 58, 59, 62, 63 },
|
||||||
};
|
};
|
||||||
|
|
||||||
constexpr uint8 columnTable16[8][16] =
|
constexpr u8 columnTable16[8][16] =
|
||||||
{
|
{
|
||||||
{ 0, 2, 8, 10, 16, 18, 24, 26,
|
{ 0, 2, 8, 10, 16, 18, 24, 26,
|
||||||
1, 3, 9, 11, 17, 19, 25, 27 },
|
1, 3, 9, 11, 17, 19, 25, 27 },
|
||||||
|
@ -155,7 +155,7 @@ constexpr uint8 columnTable16[8][16] =
|
||||||
101, 103, 109, 111, 117, 119, 125, 127 },
|
101, 103, 109, 111, 117, 119, 125, 127 },
|
||||||
};
|
};
|
||||||
|
|
||||||
constexpr uint8 columnTable8[16][16] =
|
constexpr u8 columnTable8[16][16] =
|
||||||
{
|
{
|
||||||
{ 0, 4, 16, 20, 32, 36, 48, 52, // column 0
|
{ 0, 4, 16, 20, 32, 36, 48, 52, // column 0
|
||||||
2, 6, 18, 22, 34, 38, 50, 54 },
|
2, 6, 18, 22, 34, 38, 50, 54 },
|
||||||
|
@ -191,7 +191,7 @@ constexpr uint8 columnTable8[16][16] =
|
||||||
203, 207, 219, 223, 235, 239, 251, 255 },
|
203, 207, 219, 223, 235, 239, 251, 255 },
|
||||||
};
|
};
|
||||||
|
|
||||||
constexpr uint16 columnTable4[16][32] =
|
constexpr u16 columnTable4[16][32] =
|
||||||
{
|
{
|
||||||
{ 0, 8, 32, 40, 64, 72, 96, 104, // column 0
|
{ 0, 8, 32, 40, 64, 72, 96, 104, // column 0
|
||||||
2, 10, 34, 42, 66, 74, 98, 106,
|
2, 10, 34, 42, 66, 74, 98, 106,
|
||||||
|
@ -259,7 +259,7 @@ constexpr uint16 columnTable4[16][32] =
|
||||||
407, 415, 439, 447, 471, 479, 503, 511 },
|
407, 415, 439, 447, 471, 479, 503, 511 },
|
||||||
};
|
};
|
||||||
|
|
||||||
constexpr uint8 clutTableT32I8[128] =
|
constexpr u8 clutTableT32I8[128] =
|
||||||
{
|
{
|
||||||
0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15,
|
0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15,
|
||||||
64, 65, 68, 69, 72, 73, 76, 77, 66, 67, 70, 71, 74, 75, 78, 79,
|
64, 65, 68, 69, 72, 73, 76, 77, 66, 67, 70, 71, 74, 75, 78, 79,
|
||||||
|
@ -271,13 +271,13 @@ constexpr uint8 clutTableT32I8[128] =
|
||||||
112, 113, 116, 117, 120, 121, 124, 125, 114, 115, 118, 119, 122, 123, 126, 127
|
112, 113, 116, 117, 120, 121, 124, 125, 114, 115, 118, 119, 122, 123, 126, 127
|
||||||
};
|
};
|
||||||
|
|
||||||
constexpr uint8 clutTableT32I4[16] =
|
constexpr u8 clutTableT32I4[16] =
|
||||||
{
|
{
|
||||||
0, 1, 4, 5, 8, 9, 12, 13,
|
0, 1, 4, 5, 8, 9, 12, 13,
|
||||||
2, 3, 6, 7, 10, 11, 14, 15
|
2, 3, 6, 7, 10, 11, 14, 15
|
||||||
};
|
};
|
||||||
|
|
||||||
constexpr uint8 clutTableT16I8[32] =
|
constexpr u8 clutTableT16I8[32] =
|
||||||
{
|
{
|
||||||
0, 2, 8, 10, 16, 18, 24, 26,
|
0, 2, 8, 10, 16, 18, 24, 26,
|
||||||
4, 6, 12, 14, 20, 22, 28, 30,
|
4, 6, 12, 14, 20, 22, 28, 30,
|
||||||
|
@ -285,14 +285,14 @@ constexpr uint8 clutTableT16I8[32] =
|
||||||
5, 7, 13, 15, 21, 23, 29, 31
|
5, 7, 13, 15, 21, 23, 29, 31
|
||||||
};
|
};
|
||||||
|
|
||||||
constexpr uint8 clutTableT16I4[16] =
|
constexpr u8 clutTableT16I4[16] =
|
||||||
{
|
{
|
||||||
0, 2, 8, 10, 16, 18, 24, 26,
|
0, 2, 8, 10, 16, 18, 24, 26,
|
||||||
4, 6, 12, 14, 20, 22, 28, 30
|
4, 6, 12, 14, 20, 22, 28, 30
|
||||||
};
|
};
|
||||||
|
|
||||||
template <int BlocksHigh, int BlocksWide, int ColHeight, int ColWidth, typename Col>
|
template <int BlocksHigh, int BlocksWide, int ColHeight, int ColWidth, typename Col>
|
||||||
constexpr int pxOffset(const uint8 (&blockTable)[BlocksHigh][BlocksWide], Col (&colTable)[ColHeight][ColWidth], int x, int y)
|
constexpr int pxOffset(const u8 (&blockTable)[BlocksHigh][BlocksWide], Col (&colTable)[ColHeight][ColWidth], int x, int y)
|
||||||
{
|
{
|
||||||
int blockSize = ColHeight * ColWidth;
|
int blockSize = ColHeight * ColWidth;
|
||||||
int pageSize = blockSize * BlocksHigh * BlocksWide;
|
int pageSize = blockSize * BlocksHigh * BlocksWide;
|
||||||
|
@ -305,7 +305,7 @@ constexpr int pxOffset(const uint8 (&blockTable)[BlocksHigh][BlocksWide], Col (&
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BlocksHigh, int BlocksWide, int ColHeight, int ColWidth, typename Col>
|
template <int BlocksHigh, int BlocksWide, int ColHeight, int ColWidth, typename Col>
|
||||||
constexpr GSPixelColOffsetTable<BlocksHigh * ColHeight> makeColOffsetTable(const uint8 (&blockTable)[BlocksHigh][BlocksWide], Col (&colTable)[ColHeight][ColWidth])
|
constexpr GSPixelColOffsetTable<BlocksHigh * ColHeight> makeColOffsetTable(const u8 (&blockTable)[BlocksHigh][BlocksWide], Col (&colTable)[ColHeight][ColWidth])
|
||||||
{
|
{
|
||||||
constexpr int size = BlocksHigh * ColHeight;
|
constexpr int size = BlocksHigh * ColHeight;
|
||||||
GSPixelColOffsetTable<size> table = {};
|
GSPixelColOffsetTable<size> table = {};
|
||||||
|
@ -317,7 +317,7 @@ constexpr GSPixelColOffsetTable<BlocksHigh * ColHeight> makeColOffsetTable(const
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int BlocksHigh, int BlocksWide, int ColHeight, int ColWidth, typename Col>
|
template <int BlocksHigh, int BlocksWide, int ColHeight, int ColWidth, typename Col>
|
||||||
constexpr GSSizedPixelRowOffsetTable<BlocksWide * ColWidth> makeRowOffsetTable(const uint8 (&blockTable)[BlocksHigh][BlocksWide], Col (&colTable)[ColHeight][ColWidth], int y)
|
constexpr GSSizedPixelRowOffsetTable<BlocksWide * ColWidth> makeRowOffsetTable(const u8 (&blockTable)[BlocksHigh][BlocksWide], Col (&colTable)[ColHeight][ColWidth], int y)
|
||||||
{
|
{
|
||||||
int base = pxOffset(blockTable, colTable, 0, y);
|
int base = pxOffset(blockTable, colTable, 0, y);
|
||||||
GSSizedPixelRowOffsetTable<BlocksWide * ColWidth> table = {};
|
GSSizedPixelRowOffsetTable<BlocksWide * ColWidth> table = {};
|
||||||
|
|
|
@ -21,9 +21,9 @@
|
||||||
struct alignas(64) GSBlockSwizzleTable
|
struct alignas(64) GSBlockSwizzleTable
|
||||||
{
|
{
|
||||||
// Some swizzles are 4x8 and others are 8x4. An 8x8 table can store either at the cost of 2x size
|
// Some swizzles are 4x8 and others are 8x4. An 8x8 table can store either at the cost of 2x size
|
||||||
uint8 value[8][8];
|
u8 value[8][8];
|
||||||
|
|
||||||
constexpr uint8 lookup(int x, int y) const
|
constexpr u8 lookup(int x, int y) const
|
||||||
{
|
{
|
||||||
return value[y & 7][x & 7];
|
return value[y & 7][x & 7];
|
||||||
}
|
}
|
||||||
|
@ -111,14 +111,14 @@ extern const GSSizedBlockSwizzleTable<8, 4> blockTable16Z;
|
||||||
extern const GSSizedBlockSwizzleTable<8, 4> blockTable16SZ;
|
extern const GSSizedBlockSwizzleTable<8, 4> blockTable16SZ;
|
||||||
extern const GSSizedBlockSwizzleTable<4, 8> blockTable8;
|
extern const GSSizedBlockSwizzleTable<4, 8> blockTable8;
|
||||||
extern const GSSizedBlockSwizzleTable<8, 4> blockTable4;
|
extern const GSSizedBlockSwizzleTable<8, 4> blockTable4;
|
||||||
extern const uint8 columnTable32[8][8];
|
extern const u8 columnTable32[8][8];
|
||||||
extern const uint8 columnTable16[8][16];
|
extern const u8 columnTable16[8][16];
|
||||||
extern const uint8 columnTable8[16][16];
|
extern const u8 columnTable8[16][16];
|
||||||
extern const uint16 columnTable4[16][32];
|
extern const u16 columnTable4[16][32];
|
||||||
extern const uint8 clutTableT32I8[128];
|
extern const u8 clutTableT32I8[128];
|
||||||
extern const uint8 clutTableT32I4[16];
|
extern const u8 clutTableT32I4[16];
|
||||||
extern const uint8 clutTableT16I8[32];
|
extern const u8 clutTableT16I8[32];
|
||||||
extern const uint8 clutTableT16I4[16];
|
extern const u8 clutTableT16I4[16];
|
||||||
extern const GSPixelColOffsetTable< 32> pixelColOffset32;
|
extern const GSPixelColOffsetTable< 32> pixelColOffset32;
|
||||||
extern const GSPixelColOffsetTable< 32> pixelColOffset32Z;
|
extern const GSPixelColOffsetTable< 32> pixelColOffset32Z;
|
||||||
extern const GSPixelColOffsetTable< 64> pixelColOffset16;
|
extern const GSPixelColOffsetTable< 64> pixelColOffset16;
|
||||||
|
|
|
@ -50,7 +50,7 @@ private:
|
||||||
|
|
||||||
l.unlock();
|
l.unlock();
|
||||||
|
|
||||||
uint32 waited = 0;
|
u32 waited = 0;
|
||||||
while (true)
|
while (true)
|
||||||
{
|
{
|
||||||
while (m_queue.consume_one(*this))
|
while (m_queue.consume_one(*this))
|
||||||
|
@ -110,7 +110,7 @@ public:
|
||||||
|
|
||||||
void Wait()
|
void Wait()
|
||||||
{
|
{
|
||||||
uint32 waited = 0;
|
u32 waited = 0;
|
||||||
while (true)
|
while (true)
|
||||||
{
|
{
|
||||||
if (IsEmpty())
|
if (IsEmpty())
|
||||||
|
|
|
@ -32,11 +32,11 @@ Xbyak::util::Cpu g_cpu;
|
||||||
static class GSUtilMaps
|
static class GSUtilMaps
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
uint8 PrimClassField[8];
|
u8 PrimClassField[8];
|
||||||
uint8 VertexCountField[8];
|
u8 VertexCountField[8];
|
||||||
uint8 ClassVertexCountField[4];
|
u8 ClassVertexCountField[4];
|
||||||
uint32 CompatibleBitsField[64][2];
|
u32 CompatibleBitsField[64][2];
|
||||||
uint32 SharedBitsField[64][2];
|
u32 SharedBitsField[64][2];
|
||||||
|
|
||||||
// Defer init to avoid AVX2 illegal instructions
|
// Defer init to avoid AVX2 illegal instructions
|
||||||
void Init()
|
void Init()
|
||||||
|
@ -105,42 +105,42 @@ void GSUtil::Init()
|
||||||
s_maps.Init();
|
s_maps.Init();
|
||||||
}
|
}
|
||||||
|
|
||||||
GS_PRIM_CLASS GSUtil::GetPrimClass(uint32 prim)
|
GS_PRIM_CLASS GSUtil::GetPrimClass(u32 prim)
|
||||||
{
|
{
|
||||||
return (GS_PRIM_CLASS)s_maps.PrimClassField[prim];
|
return (GS_PRIM_CLASS)s_maps.PrimClassField[prim];
|
||||||
}
|
}
|
||||||
|
|
||||||
int GSUtil::GetVertexCount(uint32 prim)
|
int GSUtil::GetVertexCount(u32 prim)
|
||||||
{
|
{
|
||||||
return s_maps.VertexCountField[prim];
|
return s_maps.VertexCountField[prim];
|
||||||
}
|
}
|
||||||
|
|
||||||
int GSUtil::GetClassVertexCount(uint32 primclass)
|
int GSUtil::GetClassVertexCount(u32 primclass)
|
||||||
{
|
{
|
||||||
return s_maps.ClassVertexCountField[primclass];
|
return s_maps.ClassVertexCountField[primclass];
|
||||||
}
|
}
|
||||||
|
|
||||||
const uint32* GSUtil::HasSharedBitsPtr(uint32 dpsm)
|
const u32* GSUtil::HasSharedBitsPtr(u32 dpsm)
|
||||||
{
|
{
|
||||||
return s_maps.SharedBitsField[dpsm];
|
return s_maps.SharedBitsField[dpsm];
|
||||||
}
|
}
|
||||||
|
|
||||||
bool GSUtil::HasSharedBits(uint32 spsm, const uint32* RESTRICT ptr)
|
bool GSUtil::HasSharedBits(u32 spsm, const u32* RESTRICT ptr)
|
||||||
{
|
{
|
||||||
return (ptr[spsm >> 5] & (1 << (spsm & 0x1f))) == 0;
|
return (ptr[spsm >> 5] & (1 << (spsm & 0x1f))) == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool GSUtil::HasSharedBits(uint32 spsm, uint32 dpsm)
|
bool GSUtil::HasSharedBits(u32 spsm, u32 dpsm)
|
||||||
{
|
{
|
||||||
return (s_maps.SharedBitsField[dpsm][spsm >> 5] & (1 << (spsm & 0x1f))) == 0;
|
return (s_maps.SharedBitsField[dpsm][spsm >> 5] & (1 << (spsm & 0x1f))) == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool GSUtil::HasSharedBits(uint32 sbp, uint32 spsm, uint32 dbp, uint32 dpsm)
|
bool GSUtil::HasSharedBits(u32 sbp, u32 spsm, u32 dbp, u32 dpsm)
|
||||||
{
|
{
|
||||||
return ((sbp ^ dbp) | (s_maps.SharedBitsField[dpsm][spsm >> 5] & (1 << (spsm & 0x1f)))) == 0;
|
return ((sbp ^ dbp) | (s_maps.SharedBitsField[dpsm][spsm >> 5] & (1 << (spsm & 0x1f)))) == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool GSUtil::HasCompatibleBits(uint32 spsm, uint32 dpsm)
|
bool GSUtil::HasCompatibleBits(u32 spsm, u32 dpsm)
|
||||||
{
|
{
|
||||||
return (s_maps.CompatibleBitsField[spsm][dpsm >> 5] & (1 << (dpsm & 0x1f))) != 0;
|
return (s_maps.CompatibleBitsField[spsm][dpsm >> 5] & (1 << (dpsm & 0x1f))) != 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,15 +24,15 @@ class GSUtil
|
||||||
public:
|
public:
|
||||||
static void Init();
|
static void Init();
|
||||||
|
|
||||||
static GS_PRIM_CLASS GetPrimClass(uint32 prim);
|
static GS_PRIM_CLASS GetPrimClass(u32 prim);
|
||||||
static int GetVertexCount(uint32 prim);
|
static int GetVertexCount(u32 prim);
|
||||||
static int GetClassVertexCount(uint32 primclass);
|
static int GetClassVertexCount(u32 primclass);
|
||||||
|
|
||||||
static const uint32* HasSharedBitsPtr(uint32 dpsm);
|
static const u32* HasSharedBitsPtr(u32 dpsm);
|
||||||
static bool HasSharedBits(uint32 spsm, const uint32* ptr);
|
static bool HasSharedBits(u32 spsm, const u32* ptr);
|
||||||
static bool HasSharedBits(uint32 spsm, uint32 dpsm);
|
static bool HasSharedBits(u32 spsm, u32 dpsm);
|
||||||
static bool HasSharedBits(uint32 sbp, uint32 spsm, uint32 dbp, uint32 dpsm);
|
static bool HasSharedBits(u32 sbp, u32 spsm, u32 dbp, u32 dpsm);
|
||||||
static bool HasCompatibleBits(uint32 spsm, uint32 dpsm);
|
static bool HasCompatibleBits(u32 spsm, u32 dpsm);
|
||||||
|
|
||||||
static bool CheckSSE();
|
static bool CheckSSE();
|
||||||
static CRCHackLevel GetRecommendedCRCHackLevel(GSRendererType type);
|
static CRCHackLevel GetRecommendedCRCHackLevel(GSRendererType type);
|
||||||
|
|
|
@ -50,15 +50,15 @@ public:
|
||||||
struct { float r, g, b, a; };
|
struct { float r, g, b, a; };
|
||||||
struct { float left, top, right, bottom; };
|
struct { float left, top, right, bottom; };
|
||||||
float v[4];
|
float v[4];
|
||||||
float f32[4];
|
float F32[4];
|
||||||
int8 i8[16];
|
s8 I8[16];
|
||||||
int16 i16[8];
|
s16 I16[8];
|
||||||
int32 i32[4];
|
s32 I32[4];
|
||||||
int64 i64[2];
|
s64 I64[2];
|
||||||
uint8 u8[16];
|
u8 U8[16];
|
||||||
uint16 u16[8];
|
u16 U16[8];
|
||||||
uint32 u32[4];
|
u32 U32[4];
|
||||||
uint64 u64[2];
|
u64 U64[2];
|
||||||
__m128 m;
|
__m128 m;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -154,7 +154,7 @@ public:
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline explicit GSVector4(uint32 u)
|
__forceinline explicit GSVector4(u32 u)
|
||||||
{
|
{
|
||||||
GSVector4i v((int)u);
|
GSVector4i v((int)u);
|
||||||
|
|
||||||
|
@ -216,17 +216,17 @@ public:
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline uint32 rgba32() const
|
__forceinline u32 rgba32() const
|
||||||
{
|
{
|
||||||
return GSVector4i(*this).rgba32();
|
return GSVector4i(*this).rgba32();
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline static GSVector4 rgba32(uint32 rgba)
|
__forceinline static GSVector4 rgba32(u32 rgba)
|
||||||
{
|
{
|
||||||
return GSVector4(GSVector4i::load((int)rgba).u8to32());
|
return GSVector4(GSVector4i::load((int)rgba).u8to32());
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline static GSVector4 rgba32(uint32 rgba, int shift)
|
__forceinline static GSVector4 rgba32(u32 rgba, int shift)
|
||||||
{
|
{
|
||||||
return GSVector4(GSVector4i::load((int)rgba).u8to32() << shift);
|
return GSVector4(GSVector4i::load((int)rgba).u8to32() << shift);
|
||||||
}
|
}
|
||||||
|
@ -634,7 +634,7 @@ GSVector.h:2973:15: error: shadows template parm 'int i'
|
||||||
return GSVector4(_mm_load_ss(&f));
|
return GSVector4(_mm_load_ss(&f));
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline static GSVector4 load(uint32 u)
|
__forceinline static GSVector4 load(u32 u)
|
||||||
{
|
{
|
||||||
GSVector4i v = GSVector4i::load((int)u);
|
GSVector4i v = GSVector4i::load((int)u);
|
||||||
|
|
||||||
|
|
|
@ -54,15 +54,15 @@ public:
|
||||||
struct { int r, g, b, a; };
|
struct { int r, g, b, a; };
|
||||||
struct { int left, top, right, bottom; };
|
struct { int left, top, right, bottom; };
|
||||||
int v[4];
|
int v[4];
|
||||||
float f32[4];
|
float F32[4];
|
||||||
int8 i8[16];
|
s8 I8[16];
|
||||||
int16 i16[8];
|
s16 I16[8];
|
||||||
int32 i32[4];
|
s32 I32[4];
|
||||||
int64 i64[2];
|
s64 I64[2];
|
||||||
uint8 u8[16];
|
u8 U8[16];
|
||||||
uint16 u16[8];
|
u16 U16[8];
|
||||||
uint32 u32[4];
|
u32 U32[4];
|
||||||
uint64 u64[2];
|
u64 U64[2];
|
||||||
__m128i m;
|
__m128i m;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -286,14 +286,14 @@ public:
|
||||||
|
|
||||||
//
|
//
|
||||||
|
|
||||||
__forceinline uint32 rgba32() const
|
__forceinline u32 rgba32() const
|
||||||
{
|
{
|
||||||
GSVector4i v = *this;
|
GSVector4i v = *this;
|
||||||
|
|
||||||
v = v.ps32(v);
|
v = v.ps32(v);
|
||||||
v = v.pu16(v);
|
v = v.pu16(v);
|
||||||
|
|
||||||
return (uint32)store(v);
|
return (u32)store(v);
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline GSVector4i sat_i8(const GSVector4i& a, const GSVector4i& b) const
|
__forceinline GSVector4i sat_i8(const GSVector4i& a, const GSVector4i& b) const
|
||||||
|
@ -1073,13 +1073,13 @@ public:
|
||||||
#ifdef _M_AMD64
|
#ifdef _M_AMD64
|
||||||
|
|
||||||
template <int i>
|
template <int i>
|
||||||
__forceinline GSVector4i insert64(int64 a) const
|
__forceinline GSVector4i insert64(s64 a) const
|
||||||
{
|
{
|
||||||
return GSVector4i(_mm_insert_epi64(m, a, i));
|
return GSVector4i(_mm_insert_epi64(m, a, i));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int i>
|
template <int i>
|
||||||
__forceinline int64 extract64() const
|
__forceinline s64 extract64() const
|
||||||
{
|
{
|
||||||
if (i == 0)
|
if (i == 0)
|
||||||
return GSVector4i::storeq(*this);
|
return GSVector4i::storeq(*this);
|
||||||
|
@ -1321,8 +1321,8 @@ public:
|
||||||
{
|
{
|
||||||
GSVector4i v;
|
GSVector4i v;
|
||||||
|
|
||||||
v = loadq((int64)ptr[extract8<src + 0>() & 0xf]);
|
v = loadq((s64)ptr[extract8<src + 0>() & 0xf]);
|
||||||
v = v.insert64<1>((int64)ptr[extract8<src + 0>() >> 4]);
|
v = v.insert64<1>((s64)ptr[extract8<src + 0>() >> 4]);
|
||||||
|
|
||||||
return v;
|
return v;
|
||||||
}
|
}
|
||||||
|
@ -1332,8 +1332,8 @@ public:
|
||||||
{
|
{
|
||||||
GSVector4i v;
|
GSVector4i v;
|
||||||
|
|
||||||
v = loadq((int64)ptr[extract8<src + 0>()]);
|
v = loadq((s64)ptr[extract8<src + 0>()]);
|
||||||
v = v.insert64<1>((int64)ptr[extract8<src + 1>()]);
|
v = v.insert64<1>((s64)ptr[extract8<src + 1>()]);
|
||||||
|
|
||||||
return v;
|
return v;
|
||||||
}
|
}
|
||||||
|
@ -1343,8 +1343,8 @@ public:
|
||||||
{
|
{
|
||||||
GSVector4i v;
|
GSVector4i v;
|
||||||
|
|
||||||
v = loadq((int64)ptr[extract16<src + 0>()]);
|
v = loadq((s64)ptr[extract16<src + 0>()]);
|
||||||
v = v.insert64<1>((int64)ptr[extract16<src + 1>()]);
|
v = v.insert64<1>((s64)ptr[extract16<src + 1>()]);
|
||||||
|
|
||||||
return v;
|
return v;
|
||||||
}
|
}
|
||||||
|
@ -1354,8 +1354,8 @@ public:
|
||||||
{
|
{
|
||||||
GSVector4i v;
|
GSVector4i v;
|
||||||
|
|
||||||
v = loadq((int64)ptr[extract32<src + 0>()]);
|
v = loadq((s64)ptr[extract32<src + 0>()]);
|
||||||
v = v.insert64<1>((int64)ptr[extract32<src + 1>()]);
|
v = v.insert64<1>((s64)ptr[extract32<src + 1>()]);
|
||||||
|
|
||||||
return v;
|
return v;
|
||||||
}
|
}
|
||||||
|
@ -1365,8 +1365,8 @@ public:
|
||||||
{
|
{
|
||||||
GSVector4i v;
|
GSVector4i v;
|
||||||
|
|
||||||
v = loadq((int64)ptr[extract64<0>()]);
|
v = loadq((s64)ptr[extract64<0>()]);
|
||||||
v = v.insert64<1>((int64)ptr[extract64<1>()]);
|
v = v.insert64<1>((s64)ptr[extract64<1>()]);
|
||||||
|
|
||||||
return v;
|
return v;
|
||||||
}
|
}
|
||||||
|
@ -1422,7 +1422,7 @@ public:
|
||||||
dst[1] = gather8_4<8>(ptr);
|
dst[1] = gather8_4<8>(ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline void gather8_8(const uint8* RESTRICT ptr, GSVector4i* RESTRICT dst) const
|
__forceinline void gather8_8(const u8* RESTRICT ptr, GSVector4i* RESTRICT dst) const
|
||||||
{
|
{
|
||||||
dst[0] = gather8_8<>(ptr);
|
dst[0] = gather8_8<>(ptr);
|
||||||
}
|
}
|
||||||
|
@ -1590,7 +1590,7 @@ public:
|
||||||
|
|
||||||
#ifdef _M_AMD64
|
#ifdef _M_AMD64
|
||||||
|
|
||||||
__forceinline static GSVector4i loadq(int64 i)
|
__forceinline static GSVector4i loadq(s64 i)
|
||||||
{
|
{
|
||||||
return GSVector4i(_mm_cvtsi64_si128(i));
|
return GSVector4i(_mm_cvtsi64_si128(i));
|
||||||
}
|
}
|
||||||
|
@ -1634,7 +1634,7 @@ public:
|
||||||
|
|
||||||
#ifdef _M_AMD64
|
#ifdef _M_AMD64
|
||||||
|
|
||||||
__forceinline static int64 storeq(const GSVector4i& v)
|
__forceinline static s64 storeq(const GSVector4i& v)
|
||||||
{
|
{
|
||||||
return _mm_cvtsi128_si64(v.m);
|
return _mm_cvtsi128_si64(v.m);
|
||||||
}
|
}
|
||||||
|
|
|
@ -59,15 +59,15 @@ public:
|
||||||
struct { float x0, y0, z0, w0, x1, y1, z1, w1; };
|
struct { float x0, y0, z0, w0, x1, y1, z1, w1; };
|
||||||
struct { float r0, g0, b0, a0, r1, g1, b1, a1; };
|
struct { float r0, g0, b0, a0, r1, g1, b1, a1; };
|
||||||
float v[8];
|
float v[8];
|
||||||
float f32[8];
|
float F32[8];
|
||||||
int8 i8[32];
|
s8 I8[32];
|
||||||
int16 i16[16];
|
s16 I16[16];
|
||||||
int32 i32[8];
|
s32 I32[8];
|
||||||
int64 i64[4];
|
s64 I64[4];
|
||||||
uint8 u8[32];
|
u8 U8[32];
|
||||||
uint16 u16[16];
|
u16 U16[16];
|
||||||
uint32 u32[8];
|
u32 U32[8];
|
||||||
uint64 u64[4];
|
u64 U64[4];
|
||||||
__m256 m;
|
__m256 m;
|
||||||
__m128 m0, m1;
|
__m128 m0, m1;
|
||||||
};
|
};
|
||||||
|
@ -103,7 +103,7 @@ public:
|
||||||
return GSVector8(cxpr_setr_epi32(x, x, x, x, x, x, x, x));
|
return GSVector8(cxpr_setr_epi32(x, x, x, x, x, x, x, x));
|
||||||
}
|
}
|
||||||
|
|
||||||
static constexpr GSVector8 cxpr(uint32 x)
|
static constexpr GSVector8 cxpr(u32 x)
|
||||||
{
|
{
|
||||||
return cxpr(static_cast<int>(x));
|
return cxpr(static_cast<int>(x));
|
||||||
}
|
}
|
||||||
|
|
|
@ -69,15 +69,15 @@ public:
|
||||||
struct { int x0, y0, z0, w0, x1, y1, z1, w1; };
|
struct { int x0, y0, z0, w0, x1, y1, z1, w1; };
|
||||||
struct { int r0, g0, b0, a0, r1, g1, b1, a1; };
|
struct { int r0, g0, b0, a0, r1, g1, b1, a1; };
|
||||||
int v[8];
|
int v[8];
|
||||||
float f32[8];
|
float F32[8];
|
||||||
int8 i8[32];
|
s8 I8[32];
|
||||||
int16 i16[16];
|
s16 I16[16];
|
||||||
int32 i32[8];
|
s32 I32[8];
|
||||||
int64 i64[4];
|
s64 I64[4];
|
||||||
uint8 u8[32];
|
u8 U8[32];
|
||||||
uint16 u16[16];
|
u16 U16[16];
|
||||||
uint32 u32[8];
|
u32 U32[8];
|
||||||
uint64 u64[4];
|
u64 U64[4];
|
||||||
__m256i m;
|
__m256i m;
|
||||||
__m128i m0, m1;
|
__m128i m0, m1;
|
||||||
};
|
};
|
||||||
|
@ -1055,17 +1055,17 @@ public:
|
||||||
return cast(v0).insert<1>(v1);
|
return cast(v0).insert<1>(v1);
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline GSVector8i gather32_32(const uint8* ptr) const
|
__forceinline GSVector8i gather32_32(const u8* ptr) const
|
||||||
{
|
{
|
||||||
return GSVector8i(_mm256_i32gather_epi32((const int*)ptr, m, 1)) & GSVector8i::x000000ff();
|
return GSVector8i(_mm256_i32gather_epi32((const int*)ptr, m, 1)) & GSVector8i::x000000ff();
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline GSVector8i gather32_32(const uint16* ptr) const
|
__forceinline GSVector8i gather32_32(const u16* ptr) const
|
||||||
{
|
{
|
||||||
return GSVector8i(_mm256_i32gather_epi32((const int*)ptr, m, 2)) & GSVector8i::x0000ffff();
|
return GSVector8i(_mm256_i32gather_epi32((const int*)ptr, m, 2)) & GSVector8i::x0000ffff();
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline GSVector8i gather32_32(const uint32* ptr) const
|
__forceinline GSVector8i gather32_32(const u32* ptr) const
|
||||||
{
|
{
|
||||||
return GSVector8i(_mm256_i32gather_epi32((const int*)ptr, m, 4));
|
return GSVector8i(_mm256_i32gather_epi32((const int*)ptr, m, 4));
|
||||||
}
|
}
|
||||||
|
@ -1092,14 +1092,14 @@ public:
|
||||||
return cast(v0).insert<1>(v1);
|
return cast(v0).insert<1>(v1);
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline GSVector8i gather32_32(const uint8* ptr1, const uint32* ptr2) const
|
__forceinline GSVector8i gather32_32(const u8* ptr1, const u32* ptr2) const
|
||||||
{
|
{
|
||||||
return gather32_32<uint8>(ptr1).gather32_32<uint32>(ptr2);
|
return gather32_32<u8>(ptr1).gather32_32<u32>(ptr2);
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline GSVector8i gather32_32(const uint32* ptr1, const uint32* ptr2) const
|
__forceinline GSVector8i gather32_32(const u32* ptr1, const u32* ptr2) const
|
||||||
{
|
{
|
||||||
return gather32_32<uint32>(ptr1).gather32_32<uint32>(ptr2);
|
return gather32_32<u32>(ptr1).gather32_32<u32>(ptr2);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
|
@ -1169,7 +1169,7 @@ public:
|
||||||
|
|
||||||
#ifdef _M_AMD64
|
#ifdef _M_AMD64
|
||||||
|
|
||||||
__forceinline static GSVector8i loadq(int64 i)
|
__forceinline static GSVector8i loadq(s64 i)
|
||||||
{
|
{
|
||||||
return cast(GSVector4i::loadq(i));
|
return cast(GSVector4i::loadq(i));
|
||||||
}
|
}
|
||||||
|
@ -1213,7 +1213,7 @@ public:
|
||||||
|
|
||||||
#ifdef _M_AMD64
|
#ifdef _M_AMD64
|
||||||
|
|
||||||
__forceinline static int64 storeq(const GSVector8i& v)
|
__forceinline static s64 storeq(const GSVector8i& v)
|
||||||
{
|
{
|
||||||
return GSVector4i::storeq(GSVector4i::cast(v));
|
return GSVector4i::storeq(GSVector4i::cast(v));
|
||||||
}
|
}
|
||||||
|
|
|
@ -15,17 +15,6 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
typedef unsigned char uint8;
|
|
||||||
typedef signed char int8;
|
|
||||||
typedef unsigned short uint16;
|
|
||||||
typedef signed short int16;
|
|
||||||
typedef unsigned int uint32;
|
|
||||||
typedef signed int int32;
|
|
||||||
typedef unsigned long long uint64;
|
|
||||||
typedef signed long long int64;
|
|
||||||
typedef signed long long sint64;
|
|
||||||
|
|
||||||
|
|
||||||
#include <cfloat>
|
#include <cfloat>
|
||||||
|
|
||||||
// clang-format off
|
// clang-format off
|
||||||
|
|
|
@ -126,7 +126,7 @@ GSTexture* GSDevice::FetchSurface(int type, int w, int h, int format)
|
||||||
void GSDevice::PrintMemoryUsage()
|
void GSDevice::PrintMemoryUsage()
|
||||||
{
|
{
|
||||||
#ifdef ENABLE_OGL_DEBUG
|
#ifdef ENABLE_OGL_DEBUG
|
||||||
uint32 pool = 0;
|
u32 pool = 0;
|
||||||
for (auto t : m_pool)
|
for (auto t : m_pool)
|
||||||
{
|
{
|
||||||
if (t)
|
if (t)
|
||||||
|
@ -422,7 +422,7 @@ HWBlend GSDevice::GetBlend(size_t index)
|
||||||
return blend;
|
return blend;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint16 GSDevice::GetBlendFlags(size_t index) { return m_blendMap[index].flags; }
|
u16 GSDevice::GetBlendFlags(size_t index) { return m_blendMap[index].flags; }
|
||||||
|
|
||||||
// clang-format off
|
// clang-format off
|
||||||
|
|
||||||
|
|
|
@ -120,7 +120,7 @@ enum HWBlendFlags
|
||||||
// Determines the HW blend function for DX11/OGL
|
// Determines the HW blend function for DX11/OGL
|
||||||
struct HWBlend
|
struct HWBlend
|
||||||
{
|
{
|
||||||
uint16 flags, op, src, dst;
|
u16 flags, op, src, dst;
|
||||||
};
|
};
|
||||||
|
|
||||||
class GSDevice : public GSAlignedClass<32>
|
class GSDevice : public GSAlignedClass<32>
|
||||||
|
@ -130,7 +130,7 @@ private:
|
||||||
static std::array<HWBlend, 3*3*3*3 + 1> m_blendMap;
|
static std::array<HWBlend, 3*3*3*3 + 1> m_blendMap;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
enum : uint16
|
enum : u16
|
||||||
{
|
{
|
||||||
// HW blend factors
|
// HW blend factors
|
||||||
SRC_COLOR, INV_SRC_COLOR, DST_COLOR, INV_DST_COLOR,
|
SRC_COLOR, INV_SRC_COLOR, DST_COLOR, INV_DST_COLOR,
|
||||||
|
@ -172,7 +172,7 @@ protected:
|
||||||
virtual void DoFXAA(GSTexture* sTex, GSTexture* dTex) {}
|
virtual void DoFXAA(GSTexture* sTex, GSTexture* dTex) {}
|
||||||
virtual void DoShadeBoost(GSTexture* sTex, GSTexture* dTex) {}
|
virtual void DoShadeBoost(GSTexture* sTex, GSTexture* dTex) {}
|
||||||
virtual void DoExternalFX(GSTexture* sTex, GSTexture* dTex) {}
|
virtual void DoExternalFX(GSTexture* sTex, GSTexture* dTex) {}
|
||||||
virtual uint16 ConvertBlendEnum(uint16 generic) = 0; // Convert blend factors/ops from the generic enum to DX11/OGl specific.
|
virtual u16 ConvertBlendEnum(u16 generic) = 0; // Convert blend factors/ops from the generic enum to DX11/OGl specific.
|
||||||
|
|
||||||
public:
|
public:
|
||||||
GSOsdManager m_osd;
|
GSOsdManager m_osd;
|
||||||
|
@ -208,9 +208,9 @@ public:
|
||||||
virtual bool HasColorSparse() { return false; }
|
virtual bool HasColorSparse() { return false; }
|
||||||
|
|
||||||
virtual void ClearRenderTarget(GSTexture* t, const GSVector4& c) {}
|
virtual void ClearRenderTarget(GSTexture* t, const GSVector4& c) {}
|
||||||
virtual void ClearRenderTarget(GSTexture* t, uint32 c) {}
|
virtual void ClearRenderTarget(GSTexture* t, u32 c) {}
|
||||||
virtual void ClearDepth(GSTexture* t) {}
|
virtual void ClearDepth(GSTexture* t) {}
|
||||||
virtual void ClearStencil(GSTexture* t, uint8 c) {}
|
virtual void ClearStencil(GSTexture* t, u8 c) {}
|
||||||
|
|
||||||
GSTexture* CreateSparseRenderTarget(int w, int h, int format = 0);
|
GSTexture* CreateSparseRenderTarget(int w, int h, int format = 0);
|
||||||
GSTexture* CreateSparseDepthStencil(int w, int h, int format = 0);
|
GSTexture* CreateSparseDepthStencil(int w, int h, int format = 0);
|
||||||
|
@ -257,15 +257,15 @@ public:
|
||||||
// Convert the GS blend equations to HW specific blend factors/ops
|
// Convert the GS blend equations to HW specific blend factors/ops
|
||||||
// Index is computed as ((((A * 3 + B) * 3) + C) * 3) + D. A, B, C, D taken from ALPHA register.
|
// Index is computed as ((((A * 3 + B) * 3) + C) * 3) + D. A, B, C, D taken from ALPHA register.
|
||||||
HWBlend GetBlend(size_t index);
|
HWBlend GetBlend(size_t index);
|
||||||
uint16 GetBlendFlags(size_t index);
|
u16 GetBlendFlags(size_t index);
|
||||||
};
|
};
|
||||||
|
|
||||||
struct GSAdapter
|
struct GSAdapter
|
||||||
{
|
{
|
||||||
uint32 vendor;
|
u32 vendor;
|
||||||
uint32 device;
|
u32 device;
|
||||||
uint32 subsys;
|
u32 subsys;
|
||||||
uint32 rev;
|
u32 rev;
|
||||||
|
|
||||||
operator std::string() const;
|
operator std::string() const;
|
||||||
bool operator==(const GSAdapter&) const;
|
bool operator==(const GSAdapter&) const;
|
||||||
|
|
|
@ -22,7 +22,7 @@ GSDirtyRect::GSDirtyRect()
|
||||||
left = top = right = bottom = 0;
|
left = top = right = bottom = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
GSDirtyRect::GSDirtyRect(const GSVector4i& r, uint32 psm)
|
GSDirtyRect::GSDirtyRect(const GSVector4i& r, u32 psm)
|
||||||
: psm(psm)
|
: psm(psm)
|
||||||
{
|
{
|
||||||
left = r.left;
|
left = r.left;
|
||||||
|
|
|
@ -24,11 +24,11 @@ class GSDirtyRect
|
||||||
int right;
|
int right;
|
||||||
int bottom;
|
int bottom;
|
||||||
|
|
||||||
uint32 psm;
|
u32 psm;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
GSDirtyRect();
|
GSDirtyRect();
|
||||||
GSDirtyRect(const GSVector4i& r, uint32 psm);
|
GSDirtyRect(const GSVector4i& r, u32 psm);
|
||||||
const GSVector4i GetDirtyRect(const GIFRegTEX0& TEX0) const;
|
const GSVector4i GetDirtyRect(const GIFRegTEX0& TEX0) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -21,8 +21,8 @@ template <class T>
|
||||||
struct Element
|
struct Element
|
||||||
{
|
{
|
||||||
T data;
|
T data;
|
||||||
uint16 next_index;
|
u16 next_index;
|
||||||
uint16 prev_index;
|
u16 prev_index;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
|
@ -47,11 +47,11 @@ private:
|
||||||
// the relevant iterator (or the index alone) are erased from the list.
|
// the relevant iterator (or the index alone) are erased from the list.
|
||||||
// m_buffer[0] is always present as auxiliary Element<T> of the list
|
// m_buffer[0] is always present as auxiliary Element<T> of the list
|
||||||
Element<T>* m_buffer;
|
Element<T>* m_buffer;
|
||||||
uint16 m_capacity;
|
u16 m_capacity;
|
||||||
uint16 m_free_indexes_stack_top;
|
u16 m_free_indexes_stack_top;
|
||||||
// m_free_indexes_stack has dynamic size (m_capacity - 1)
|
// m_free_indexes_stack has dynamic size (m_capacity - 1)
|
||||||
// m_buffer indexes that are free to be used are stacked here
|
// m_buffer indexes that are free to be used are stacked here
|
||||||
uint16* m_free_indexes_stack;
|
u16* m_free_indexes_stack;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
__forceinline FastList()
|
__forceinline FastList()
|
||||||
|
@ -74,8 +74,8 @@ public:
|
||||||
// Initialize m_buffer and m_free_indexes_stack as a contiguous block of memory starting at m_buffer
|
// Initialize m_buffer and m_free_indexes_stack as a contiguous block of memory starting at m_buffer
|
||||||
// This should increase cache locality and reduce memory fragmentation
|
// This should increase cache locality and reduce memory fragmentation
|
||||||
_aligned_free(m_buffer);
|
_aligned_free(m_buffer);
|
||||||
m_buffer = (Element<T>*)_aligned_malloc(m_capacity * sizeof(Element<T>) + (m_capacity - 1) * sizeof(uint16), 64);
|
m_buffer = (Element<T>*)_aligned_malloc(m_capacity * sizeof(Element<T>) + (m_capacity - 1) * sizeof(u16), 64);
|
||||||
m_free_indexes_stack = (uint16*)&m_buffer[m_capacity];
|
m_free_indexes_stack = (u16*)&m_buffer[m_capacity];
|
||||||
|
|
||||||
// Initialize m_buffer[0], data field is unused but initialized using default T constructor
|
// Initialize m_buffer[0], data field is unused but initialized using default T constructor
|
||||||
m_buffer[0] = {T(), 0, 0};
|
m_buffer[0] = {T(), 0, 0};
|
||||||
|
@ -84,14 +84,14 @@ public:
|
||||||
m_free_indexes_stack_top = 0;
|
m_free_indexes_stack_top = 0;
|
||||||
|
|
||||||
// m_buffer index 0 is reserved for auxiliary element
|
// m_buffer index 0 is reserved for auxiliary element
|
||||||
for (uint16 i = 0; i < m_capacity - 1; i++)
|
for (u16 i = 0; i < m_capacity - 1; i++)
|
||||||
{
|
{
|
||||||
m_free_indexes_stack[i] = i + 1;
|
m_free_indexes_stack[i] = i + 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Insert the element in front of the list and return its position in m_buffer
|
// Insert the element in front of the list and return its position in m_buffer
|
||||||
__forceinline uint16 InsertFront(const T& data)
|
__forceinline u16 InsertFront(const T& data)
|
||||||
{
|
{
|
||||||
if (Full())
|
if (Full())
|
||||||
{
|
{
|
||||||
|
@ -99,7 +99,7 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
// Pop a free index from the stack
|
// Pop a free index from the stack
|
||||||
const uint16 free_index = m_free_indexes_stack[m_free_indexes_stack_top++];
|
const u16 free_index = m_free_indexes_stack[m_free_indexes_stack_top++];
|
||||||
m_buffer[free_index].data = data;
|
m_buffer[free_index].data = data;
|
||||||
ListInsertFront(free_index);
|
ListInsertFront(free_index);
|
||||||
return free_index;
|
return free_index;
|
||||||
|
@ -120,7 +120,7 @@ public:
|
||||||
EraseIndex(LastIndex());
|
EraseIndex(LastIndex());
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline uint16 size() const
|
__forceinline u16 size() const
|
||||||
{
|
{
|
||||||
return m_free_indexes_stack_top;
|
return m_free_indexes_stack_top;
|
||||||
}
|
}
|
||||||
|
@ -130,13 +130,13 @@ public:
|
||||||
return size() == 0;
|
return size() == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline void EraseIndex(const uint16 index)
|
__forceinline void EraseIndex(const u16 index)
|
||||||
{
|
{
|
||||||
ListRemove(index);
|
ListRemove(index);
|
||||||
m_free_indexes_stack[--m_free_indexes_stack_top] = index;
|
m_free_indexes_stack[--m_free_indexes_stack_top] = index;
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline void MoveFront(const uint16 index)
|
__forceinline void MoveFront(const u16 index)
|
||||||
{
|
{
|
||||||
if (FirstIndex() != index)
|
if (FirstIndex() != index)
|
||||||
{
|
{
|
||||||
|
@ -163,29 +163,29 @@ public:
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// Accessed by FastListIterator<T> using class friendship
|
// Accessed by FastListIterator<T> using class friendship
|
||||||
__forceinline const T& Data(const uint16 index) const
|
__forceinline const T& Data(const u16 index) const
|
||||||
{
|
{
|
||||||
return m_buffer[index].data;
|
return m_buffer[index].data;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Accessed by FastListIterator<T> using class friendship
|
// Accessed by FastListIterator<T> using class friendship
|
||||||
__forceinline uint16 NextIndex(const uint16 index) const
|
__forceinline u16 NextIndex(const u16 index) const
|
||||||
{
|
{
|
||||||
return m_buffer[index].next_index;
|
return m_buffer[index].next_index;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Accessed by FastListIterator<T> using class friendship
|
// Accessed by FastListIterator<T> using class friendship
|
||||||
__forceinline uint16 PrevIndex(const uint16 index) const
|
__forceinline u16 PrevIndex(const u16 index) const
|
||||||
{
|
{
|
||||||
return m_buffer[index].prev_index;
|
return m_buffer[index].prev_index;
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline uint16 FirstIndex() const
|
__forceinline u16 FirstIndex() const
|
||||||
{
|
{
|
||||||
return m_buffer[0].next_index;
|
return m_buffer[0].next_index;
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline uint16 LastIndex() const
|
__forceinline u16 LastIndex() const
|
||||||
{
|
{
|
||||||
return m_buffer[0].prev_index;
|
return m_buffer[0].prev_index;
|
||||||
}
|
}
|
||||||
|
@ -196,7 +196,7 @@ private:
|
||||||
return size() == m_capacity - 1;
|
return size() == m_capacity - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline void ListInsertFront(const uint16 index)
|
__forceinline void ListInsertFront(const u16 index)
|
||||||
{
|
{
|
||||||
// Update prev / next indexes to add m_buffer[index] to the chain
|
// Update prev / next indexes to add m_buffer[index] to the chain
|
||||||
Element<T>& head = m_buffer[0];
|
Element<T>& head = m_buffer[0];
|
||||||
|
@ -206,7 +206,7 @@ private:
|
||||||
head.next_index = index;
|
head.next_index = index;
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline void ListRemove(const uint16 index)
|
__forceinline void ListRemove(const u16 index)
|
||||||
{
|
{
|
||||||
// Update prev / next indexes to remove m_buffer[index] from the chain
|
// Update prev / next indexes to remove m_buffer[index] from the chain
|
||||||
const Element<T>& to_remove = m_buffer[index];
|
const Element<T>& to_remove = m_buffer[index];
|
||||||
|
@ -221,13 +221,13 @@ private:
|
||||||
throw std::runtime_error("FastList size maxed out at USHRT_MAX (65535) elements, cannot grow futhermore.");
|
throw std::runtime_error("FastList size maxed out at USHRT_MAX (65535) elements, cannot grow futhermore.");
|
||||||
}
|
}
|
||||||
|
|
||||||
const uint16 new_capacity = m_capacity <= (USHRT_MAX / 2) ? (m_capacity * 2) : USHRT_MAX;
|
const u16 new_capacity = m_capacity <= (USHRT_MAX / 2) ? (m_capacity * 2) : USHRT_MAX;
|
||||||
|
|
||||||
Element<T>* new_buffer = (Element<T>*)_aligned_malloc(new_capacity * sizeof(Element<T>) + (new_capacity - 1) * sizeof(uint16), 64);
|
Element<T>* new_buffer = (Element<T>*)_aligned_malloc(new_capacity * sizeof(Element<T>) + (new_capacity - 1) * sizeof(u16), 64);
|
||||||
uint16* new_free_indexes_stack = (uint16*)&new_buffer[new_capacity];
|
u16* new_free_indexes_stack = (u16*)&new_buffer[new_capacity];
|
||||||
|
|
||||||
memcpy(new_buffer, m_buffer, m_capacity * sizeof(Element<T>));
|
memcpy(new_buffer, m_buffer, m_capacity * sizeof(Element<T>));
|
||||||
memcpy(new_free_indexes_stack, m_free_indexes_stack, (m_capacity - 1) * sizeof(uint16));
|
memcpy(new_free_indexes_stack, m_free_indexes_stack, (m_capacity - 1) * sizeof(u16));
|
||||||
|
|
||||||
_aligned_free(m_buffer);
|
_aligned_free(m_buffer);
|
||||||
|
|
||||||
|
@ -235,7 +235,7 @@ private:
|
||||||
m_free_indexes_stack = new_free_indexes_stack;
|
m_free_indexes_stack = new_free_indexes_stack;
|
||||||
|
|
||||||
// Initialize the additional space in the stack
|
// Initialize the additional space in the stack
|
||||||
for (uint16 i = m_capacity - 1; i < new_capacity - 1; i++)
|
for (u16 i = m_capacity - 1; i < new_capacity - 1; i++)
|
||||||
{
|
{
|
||||||
m_free_indexes_stack[i] = i + 1;
|
m_free_indexes_stack[i] = i + 1;
|
||||||
}
|
}
|
||||||
|
@ -251,10 +251,10 @@ class FastListIterator
|
||||||
{
|
{
|
||||||
private:
|
private:
|
||||||
const FastList<T>* m_fastlist;
|
const FastList<T>* m_fastlist;
|
||||||
uint16 m_index;
|
u16 m_index;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
__forceinline FastListIterator(const FastList<T>* fastlist, const uint16 index)
|
__forceinline FastListIterator(const FastList<T>* fastlist, const u16 index)
|
||||||
{
|
{
|
||||||
m_fastlist = fastlist;
|
m_fastlist = fastlist;
|
||||||
m_index = index;
|
m_index = index;
|
||||||
|
@ -305,7 +305,7 @@ public:
|
||||||
return m_fastlist->Data(m_index);
|
return m_fastlist->Data(m_index);
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline uint16 Index() const
|
__forceinline u16 Index() const
|
||||||
{
|
{
|
||||||
return m_index;
|
return m_index;
|
||||||
}
|
}
|
||||||
|
|
|
@ -29,8 +29,8 @@ class GSFunctionMap
|
||||||
protected:
|
protected:
|
||||||
struct ActivePtr
|
struct ActivePtr
|
||||||
{
|
{
|
||||||
uint64 frame, frames, prims;
|
u64 frame, frames, prims;
|
||||||
uint64 ticks, actual, total;
|
u64 ticks, actual, total;
|
||||||
VALUE f;
|
VALUE f;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -68,7 +68,7 @@ public:
|
||||||
|
|
||||||
memset(p, 0, sizeof(*p));
|
memset(p, 0, sizeof(*p));
|
||||||
|
|
||||||
p->frame = (uint64)-1;
|
p->frame = (u64)-1;
|
||||||
|
|
||||||
p->f = GetDefaultFunction(key);
|
p->f = GetDefaultFunction(key);
|
||||||
|
|
||||||
|
@ -80,7 +80,7 @@ public:
|
||||||
return m_active->f;
|
return m_active->f;
|
||||||
}
|
}
|
||||||
|
|
||||||
void UpdateStats(uint64 frame, uint64 ticks, int actual, int total, int prims)
|
void UpdateStats(u64 frame, u64 ticks, int actual, int total, int prims)
|
||||||
{
|
{
|
||||||
if (m_active)
|
if (m_active)
|
||||||
{
|
{
|
||||||
|
@ -101,7 +101,7 @@ public:
|
||||||
|
|
||||||
virtual void PrintStats()
|
virtual void PrintStats()
|
||||||
{
|
{
|
||||||
uint64 totalTicks = 0;
|
u64 totalTicks = 0;
|
||||||
|
|
||||||
for (const auto& i : m_map_active)
|
for (const auto& i : m_map_active)
|
||||||
{
|
{
|
||||||
|
@ -128,10 +128,10 @@ public:
|
||||||
|
|
||||||
if (p->frames && p->actual)
|
if (p->frames && p->actual)
|
||||||
{
|
{
|
||||||
uint64 tpf = p->ticks / p->frames;
|
u64 tpf = p->ticks / p->frames;
|
||||||
|
|
||||||
printf("%016llx | %6llu | %5llu | %5.2f%% %5.1f %6.1f | %8llu %6llu %5.2f%%\n",
|
printf("%016llx | %6llu | %5llu | %5.2f%% %5.1f %6.1f | %8llu %6llu %5.2f%%\n",
|
||||||
(uint64)key,
|
(u64)key,
|
||||||
p->frames,
|
p->frames,
|
||||||
p->prims / p->frames,
|
p->prims / p->frames,
|
||||||
(double)(p->ticks * 100) / totalTicks,
|
(double)(p->ticks * 100) / totalTicks,
|
||||||
|
@ -162,7 +162,7 @@ class GSCodeGeneratorFunctionMap : public GSFunctionMap<KEY, VALUE>
|
||||||
{
|
{
|
||||||
std::string m_name;
|
std::string m_name;
|
||||||
void* m_param;
|
void* m_param;
|
||||||
std::unordered_map<uint64, VALUE> m_cgmap;
|
std::unordered_map<u64, VALUE> m_cgmap;
|
||||||
GSCodeBuffer m_cb;
|
GSCodeBuffer m_cb;
|
||||||
size_t m_total_code_size;
|
size_t m_total_code_size;
|
||||||
|
|
||||||
|
@ -201,7 +201,7 @@ public:
|
||||||
ASSERT(cg->getSize() < MAX_SIZE);
|
ASSERT(cg->getSize() < MAX_SIZE);
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
fprintf(stderr, "%s Location:%p Size:%zu Key:%llx\n", m_name.c_str(), code_ptr, cg->getSize(), (uint64)key);
|
fprintf(stderr, "%s Location:%p Size:%zu Key:%llx\n", m_name.c_str(), code_ptr, cg->getSize(), (u64)key);
|
||||||
GSScanlineSelector sel(key);
|
GSScanlineSelector sel(key);
|
||||||
sel.Print();
|
sel.Print();
|
||||||
#endif
|
#endif
|
||||||
|
@ -220,7 +220,7 @@ public:
|
||||||
|
|
||||||
// if(iJIT_IsProfilingActive()) // always > 0
|
// if(iJIT_IsProfilingActive()) // always > 0
|
||||||
{
|
{
|
||||||
std::string name = format("%s<%016llx>()", m_name.c_str(), (uint64)key);
|
std::string name = format("%s<%016llx>()", m_name.c_str(), (u64)key);
|
||||||
|
|
||||||
iJIT_Method_Load ml;
|
iJIT_Method_Load ml;
|
||||||
|
|
||||||
|
@ -233,7 +233,7 @@ public:
|
||||||
|
|
||||||
iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED, &ml);
|
iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED, &ml);
|
||||||
/*
|
/*
|
||||||
name = format("c:/temp1/%s_%016llx.bin", m_name.c_str(), (uint64)key);
|
name = format("c:/temp1/%s_%016llx.bin", m_name.c_str(), (u64)key);
|
||||||
|
|
||||||
if(FILE* fp = fopen(name.c_str(), "wb"))
|
if(FILE* fp = fopen(name.c_str(), "wb"))
|
||||||
{
|
{
|
||||||
|
|
|
@ -276,7 +276,7 @@ void GSOsdManager::Monitor(const char* key, const char* value)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSOsdManager::RenderGlyph(GSVertexPT1* dst, const glyph_info g, float x, float y, uint32 color)
|
void GSOsdManager::RenderGlyph(GSVertexPT1* dst, const glyph_info g, float x, float y, u32 color)
|
||||||
{
|
{
|
||||||
float x2 = x + g.bl * (2.0f / m_real_size.x);
|
float x2 = x + g.bl * (2.0f / m_real_size.x);
|
||||||
float y2 = -y - g.bt * (2.0f / m_real_size.y);
|
float y2 = -y - g.bt * (2.0f / m_real_size.y);
|
||||||
|
@ -309,7 +309,7 @@ void GSOsdManager::RenderGlyph(GSVertexPT1* dst, const glyph_info g, float x, fl
|
||||||
++dst;
|
++dst;
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSOsdManager::RenderString(GSVertexPT1* dst, const std::u32string msg, float x, float y, uint32 color)
|
void GSOsdManager::RenderString(GSVertexPT1* dst, const std::u32string msg, float x, float y, u32 color)
|
||||||
{
|
{
|
||||||
char32_t p = 0;
|
char32_t p = 0;
|
||||||
for (const auto& c : msg)
|
for (const auto& c : msg)
|
||||||
|
@ -435,8 +435,8 @@ size_t GSOsdManager::GeneratePrimitives(GSVertexPT1* dst, size_t count)
|
||||||
ratio = ratio > 1.0f ? 1.0f : ratio < 0.0f ? 0.0f : ratio;
|
ratio = ratio > 1.0f ? 1.0f : ratio < 0.0f ? 0.0f : ratio;
|
||||||
|
|
||||||
y += offset += ((m_size + 2) * (2.0f / m_real_size.y)) * ratio;
|
y += offset += ((m_size + 2) * (2.0f / m_real_size.y)) * ratio;
|
||||||
uint32 color = m_color;
|
u32 color = m_color;
|
||||||
((uint8*)&color)[3] = (uint8)(((uint8*)&color)[3] * (1.0f - ratio) * opacity);
|
((u8*)&color)[3] = (u8)(((u8*)&color)[3] * (1.0f - ratio) * opacity);
|
||||||
RenderString(dst, it->msg, x, y, color);
|
RenderString(dst, it->msg, x, y, color);
|
||||||
dst += it->msg.size() * 6;
|
dst += it->msg.size() * 6;
|
||||||
drawn += it->msg.size() * 6;
|
drawn += it->msg.size() * 6;
|
||||||
|
@ -473,8 +473,8 @@ size_t GSOsdManager::GeneratePrimitives(GSVertexPT1* dst, size_t count)
|
||||||
float x = 1.0f - 8 * (2.0f / m_real_size.x) - first_max - m_char_info[' '].ax * (2.0f / m_real_size.x) - second_max;
|
float x = 1.0f - 8 * (2.0f / m_real_size.x) - first_max - m_char_info[' '].ax * (2.0f / m_real_size.x) - second_max;
|
||||||
float y = -1.0f + ((m_size + 2) * (2.0f / m_real_size.y)) * line++;
|
float y = -1.0f + ((m_size + 2) * (2.0f / m_real_size.y)) * line++;
|
||||||
|
|
||||||
uint32 color = m_color;
|
u32 color = m_color;
|
||||||
((uint8*)&color)[3] = (uint8)(((uint8*)&color)[3] * opacity);
|
((u8*)&color)[3] = (u8)(((u8*)&color)[3] * opacity);
|
||||||
|
|
||||||
// Render the key
|
// Render the key
|
||||||
RenderString(dst, pair.first, x, y, color);
|
RenderString(dst, pair.first, x, y, color);
|
||||||
|
|
|
@ -26,14 +26,14 @@ class GSOsdManager
|
||||||
{
|
{
|
||||||
struct glyph_info
|
struct glyph_info
|
||||||
{
|
{
|
||||||
int32 ax; // advance.x
|
s32 ax; // advance.x
|
||||||
int32 ay; // advance.y
|
s32 ay; // advance.y
|
||||||
|
|
||||||
uint32 bw; // bitmap.width;
|
u32 bw; // bitmap.width;
|
||||||
uint32 bh; // bitmap.rows;
|
u32 bh; // bitmap.rows;
|
||||||
|
|
||||||
int32 bl; // bitmap_left;
|
s32 bl; // bitmap_left;
|
||||||
int32 bt; // bitmap_top;
|
s32 bt; // bitmap_top;
|
||||||
|
|
||||||
float tx; // x offset of glyph
|
float tx; // x offset of glyph
|
||||||
float ty; // y offset of glyph
|
float ty; // y offset of glyph
|
||||||
|
@ -47,10 +47,10 @@ class GSOsdManager
|
||||||
FT_Face m_face;
|
FT_Face m_face;
|
||||||
FT_UInt m_size;
|
FT_UInt m_size;
|
||||||
|
|
||||||
uint32 m_atlas_h;
|
u32 m_atlas_h;
|
||||||
uint32 m_atlas_w;
|
u32 m_atlas_w;
|
||||||
int32 m_max_width;
|
s32 m_max_width;
|
||||||
int32 m_onscreen_messages;
|
s32 m_onscreen_messages;
|
||||||
|
|
||||||
struct log_info
|
struct log_info
|
||||||
{
|
{
|
||||||
|
@ -62,15 +62,15 @@ class GSOsdManager
|
||||||
std::map<std::u32string, std::u32string> m_monitor;
|
std::map<std::u32string, std::u32string> m_monitor;
|
||||||
|
|
||||||
void AddGlyph(char32_t codepoint);
|
void AddGlyph(char32_t codepoint);
|
||||||
void RenderGlyph(GSVertexPT1* dst, const glyph_info g, float x, float y, uint32 color);
|
void RenderGlyph(GSVertexPT1* dst, const glyph_info g, float x, float y, u32 color);
|
||||||
void RenderString(GSVertexPT1* dst, const std::u32string msg, float x, float y, uint32 color);
|
void RenderString(GSVertexPT1* dst, const std::u32string msg, float x, float y, u32 color);
|
||||||
float StringSize(const std::u32string msg);
|
float StringSize(const std::u32string msg);
|
||||||
|
|
||||||
bool m_log_enabled;
|
bool m_log_enabled;
|
||||||
int m_log_timeout;
|
int m_log_timeout;
|
||||||
bool m_monitor_enabled;
|
bool m_monitor_enabled;
|
||||||
int m_opacity;
|
int m_opacity;
|
||||||
uint32 m_color;
|
u32 m_color;
|
||||||
int m_max_onscreen_messages;
|
int m_max_onscreen_messages;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
|
@ -31,7 +31,7 @@ protected:
|
||||||
public:
|
public:
|
||||||
struct GSMap
|
struct GSMap
|
||||||
{
|
{
|
||||||
uint8* bits;
|
u8* bits;
|
||||||
int pitch;
|
int pitch;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -61,7 +61,7 @@ public:
|
||||||
virtual void Unmap() = 0;
|
virtual void Unmap() = 0;
|
||||||
virtual void GenerateMipmap() {}
|
virtual void GenerateMipmap() {}
|
||||||
virtual bool Save(const std::string& fn) = 0;
|
virtual bool Save(const std::string& fn) = 0;
|
||||||
virtual uint32 GetID() { return 0; }
|
virtual u32 GetID() { return 0; }
|
||||||
|
|
||||||
GSVector2 GetScale() const { return m_scale; }
|
GSVector2 GetScale() const { return m_scale; }
|
||||||
void SetScale(const GSVector2& scale) { m_scale = scale; }
|
void SetScale(const GSVector2& scale) { m_scale = scale; }
|
||||||
|
@ -90,5 +90,5 @@ public:
|
||||||
float OffsetHack_mody;
|
float OffsetHack_mody;
|
||||||
|
|
||||||
// Typical size of a RGBA texture
|
// Typical size of a RGBA texture
|
||||||
virtual uint32 GetMemUsage() { return m_size.x * m_size.y * 4; }
|
virtual u32 GetMemUsage() { return m_size.x * m_size.y * 4; }
|
||||||
};
|
};
|
||||||
|
|
|
@ -31,8 +31,8 @@ struct alignas(32) GSVertex
|
||||||
GIFRegST ST; // S:0, T:4
|
GIFRegST ST; // S:0, T:4
|
||||||
GIFRegRGBAQ RGBAQ; // RGBA:8, Q:12
|
GIFRegRGBAQ RGBAQ; // RGBA:8, Q:12
|
||||||
GIFRegXYZ XYZ; // XY:16, Z:20
|
GIFRegXYZ XYZ; // XY:16, Z:20
|
||||||
union { uint32 UV; struct { uint16 U, V; }; }; // UV:24
|
union { u32 UV; struct { u16 U, V; }; }; // UV:24
|
||||||
uint32 FOG; // FOG:28
|
u32 FOG; // FOG:28
|
||||||
};
|
};
|
||||||
|
|
||||||
#if _M_SSE >= 0x500
|
#if _M_SSE >= 0x500
|
||||||
|
@ -73,7 +73,7 @@ struct alignas(32) GSVertexPT1
|
||||||
GSVector4 p;
|
GSVector4 p;
|
||||||
GSVector2 t;
|
GSVector2 t;
|
||||||
char pad[4];
|
char pad[4];
|
||||||
union { uint32 c; struct { uint8 r, g, b, a; }; };
|
union { u32 c; struct { u8 r, g, b, a; }; };
|
||||||
};
|
};
|
||||||
|
|
||||||
struct GSVertexPT2
|
struct GSVertexPT2
|
||||||
|
|
|
@ -47,14 +47,14 @@ GSVertexTrace::GSVertexTrace(const GSState* state)
|
||||||
InitUpdate(GS_SPRITE_CLASS);
|
InitUpdate(GS_SPRITE_CLASS);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSVertexTrace::Update(const void* vertex, const uint32* index, int v_count, int i_count, GS_PRIM_CLASS primclass)
|
void GSVertexTrace::Update(const void* vertex, const u32* index, int v_count, int i_count, GS_PRIM_CLASS primclass)
|
||||||
{
|
{
|
||||||
m_primclass = primclass;
|
m_primclass = primclass;
|
||||||
|
|
||||||
uint32 iip = m_state->PRIM->IIP;
|
u32 iip = m_state->PRIM->IIP;
|
||||||
uint32 tme = m_state->PRIM->TME;
|
u32 tme = m_state->PRIM->TME;
|
||||||
uint32 fst = m_state->PRIM->FST;
|
u32 fst = m_state->PRIM->FST;
|
||||||
uint32 color = !(m_state->PRIM->TME && m_state->m_context->TEX0.TFX == TFX_DECAL && m_state->m_context->TEX0.TCC);
|
u32 color = !(m_state->PRIM->TME && m_state->m_context->TEX0.TFX == TFX_DECAL && m_state->m_context->TEX0.TCC);
|
||||||
|
|
||||||
(this->*m_fmm[color][fst][tme][iip][primclass])(vertex, index, i_count);
|
(this->*m_fmm[color][fst][tme][iip][primclass])(vertex, index, i_count);
|
||||||
|
|
||||||
|
@ -148,8 +148,8 @@ void GSVertexTrace::Update(const void* vertex, const uint32* index, int v_count,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <GS_PRIM_CLASS primclass, uint32 iip, uint32 tme, uint32 fst, uint32 color>
|
template <GS_PRIM_CLASS primclass, u32 iip, u32 tme, u32 fst, u32 color>
|
||||||
void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int count)
|
void GSVertexTrace::FindMinMax(const void* vertex, const u32* index, int count)
|
||||||
{
|
{
|
||||||
const GSDrawingContext* context = m_state->m_context;
|
const GSDrawingContext* context = m_state->m_context;
|
||||||
|
|
||||||
|
@ -184,8 +184,8 @@ void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int coun
|
||||||
{
|
{
|
||||||
if (color)
|
if (color)
|
||||||
{
|
{
|
||||||
GSVector4i c0 = GSVector4i::load(v0.RGBAQ.u32[0]);
|
GSVector4i c0 = GSVector4i::load(v0.RGBAQ.U32[0]);
|
||||||
GSVector4i c1 = GSVector4i::load(v1.RGBAQ.u32[0]);
|
GSVector4i c1 = GSVector4i::load(v1.RGBAQ.U32[0]);
|
||||||
if (iip || finalVertex)
|
if (iip || finalVertex)
|
||||||
{
|
{
|
||||||
cmin = cmin.min_u8(c0.min_u8(c1));
|
cmin = cmin.min_u8(c0.min_u8(c1));
|
||||||
|
@ -304,8 +304,8 @@ void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int coun
|
||||||
m_max.p = (GSVector4(pmax) - o) * s;
|
m_max.p = (GSVector4(pmax) - o) * s;
|
||||||
|
|
||||||
// Fix signed int conversion
|
// Fix signed int conversion
|
||||||
m_min.p = m_min.p.insert32<0, 2>(GSVector4::load((float)(uint32)pmin.extract32<2>()));
|
m_min.p = m_min.p.insert32<0, 2>(GSVector4::load((float)(u32)pmin.extract32<2>()));
|
||||||
m_max.p = m_max.p.insert32<0, 2>(GSVector4::load((float)(uint32)pmax.extract32<2>()));
|
m_max.p = m_max.p.insert32<0, 2>(GSVector4::load((float)(u32)pmax.extract32<2>()));
|
||||||
|
|
||||||
if (tme)
|
if (tme)
|
||||||
{
|
{
|
||||||
|
@ -354,7 +354,7 @@ void GSVertexTrace::CorrectDepthTrace(const void* vertex, int count)
|
||||||
|
|
||||||
|
|
||||||
const GSVertex* RESTRICT v = (GSVertex*)vertex;
|
const GSVertex* RESTRICT v = (GSVertex*)vertex;
|
||||||
uint32 z = v[0].XYZ.Z;
|
u32 z = v[0].XYZ.Z;
|
||||||
|
|
||||||
// ought to check only 1/2 for sprite
|
// ought to check only 1/2 for sprite
|
||||||
if (z & 1)
|
if (z & 1)
|
||||||
|
|
|
@ -45,12 +45,12 @@ protected:
|
||||||
|
|
||||||
static const GSVector4 s_minmax;
|
static const GSVector4 s_minmax;
|
||||||
|
|
||||||
typedef void (GSVertexTrace::*FindMinMaxPtr)(const void* vertex, const uint32* index, int count);
|
typedef void (GSVertexTrace::*FindMinMaxPtr)(const void* vertex, const u32* index, int count);
|
||||||
|
|
||||||
FindMinMaxPtr m_fmm[2][2][2][2][4];
|
FindMinMaxPtr m_fmm[2][2][2][2][4];
|
||||||
|
|
||||||
template <GS_PRIM_CLASS primclass, uint32 iip, uint32 tme, uint32 fst, uint32 color>
|
template <GS_PRIM_CLASS primclass, u32 iip, u32 tme, u32 fst, u32 color>
|
||||||
void FindMinMax(const void* vertex, const uint32* index, int count);
|
void FindMinMax(const void* vertex, const u32* index, int count);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
GS_PRIM_CLASS m_primclass;
|
GS_PRIM_CLASS m_primclass;
|
||||||
|
@ -61,14 +61,14 @@ public:
|
||||||
|
|
||||||
union
|
union
|
||||||
{
|
{
|
||||||
uint32 value;
|
u32 value;
|
||||||
struct { uint32 r:4, g:4, b:4, a:4, x:1, y:1, z:1, f:1, s:1, t:1, q:1, _pad:1; };
|
struct { u32 r:4, g:4, b:4, a:4, x:1, y:1, z:1, f:1, s:1, t:1, q:1, _pad:1; };
|
||||||
struct { uint32 rgba:16, xyzf:4, stq:4; };
|
struct { u32 rgba:16, xyzf:4, stq:4; };
|
||||||
} m_eq;
|
} m_eq;
|
||||||
|
|
||||||
union
|
union
|
||||||
{
|
{
|
||||||
struct { uint32 mmag:1, mmin:1, linear:1, opt_linear:1; };
|
struct { u32 mmag:1, mmin:1, linear:1, opt_linear:1; };
|
||||||
} m_filter;
|
} m_filter;
|
||||||
|
|
||||||
GSVector2 m_lod; // x = min, y = max
|
GSVector2 m_lod; // x = min, y = max
|
||||||
|
@ -77,7 +77,7 @@ public:
|
||||||
GSVertexTrace(const GSState* state);
|
GSVertexTrace(const GSState* state);
|
||||||
virtual ~GSVertexTrace() {}
|
virtual ~GSVertexTrace() {}
|
||||||
|
|
||||||
void Update(const void* vertex, const uint32* index, int v_count, int i_count, GS_PRIM_CLASS primclass);
|
void Update(const void* vertex, const u32* index, int v_count, int i_count, GS_PRIM_CLASS primclass);
|
||||||
|
|
||||||
bool IsLinear() const { return m_filter.opt_linear; }
|
bool IsLinear() const { return m_filter.opt_linear; }
|
||||||
bool IsRealLinear() const { return m_filter.linear; }
|
bool IsRealLinear() const { return m_filter.linear; }
|
||||||
|
|
|
@ -113,7 +113,7 @@ bool GSDevice11::Create(const WindowInfo& wi)
|
||||||
|
|
||||||
// device creation
|
// device creation
|
||||||
{
|
{
|
||||||
uint32 flags = D3D11_CREATE_DEVICE_SINGLETHREADED;
|
u32 flags = D3D11_CREATE_DEVICE_SINGLETHREADED;
|
||||||
|
|
||||||
if(enable_debugging)
|
if(enable_debugging)
|
||||||
flags |= D3D11_CREATE_DEVICE_DEBUG;
|
flags |= D3D11_CREATE_DEVICE_DEBUG;
|
||||||
|
@ -536,7 +536,7 @@ void GSDevice11::ClearRenderTarget(GSTexture* t, const GSVector4& c)
|
||||||
m_ctx->ClearRenderTargetView(*(GSTexture11*)t, c.v);
|
m_ctx->ClearRenderTargetView(*(GSTexture11*)t, c.v);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSDevice11::ClearRenderTarget(GSTexture* t, uint32 c)
|
void GSDevice11::ClearRenderTarget(GSTexture* t, u32 c)
|
||||||
{
|
{
|
||||||
if (!t)
|
if (!t)
|
||||||
return;
|
return;
|
||||||
|
@ -552,7 +552,7 @@ void GSDevice11::ClearDepth(GSTexture* t)
|
||||||
m_ctx->ClearDepthStencilView(*(GSTexture11*)t, D3D11_CLEAR_DEPTH, 0.0f, 0);
|
m_ctx->ClearDepthStencilView(*(GSTexture11*)t, D3D11_CLEAR_DEPTH, 0.0f, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSDevice11::ClearStencil(GSTexture* t, uint8 c)
|
void GSDevice11::ClearStencil(GSTexture* t, u8 c)
|
||||||
{
|
{
|
||||||
if (!t)
|
if (!t)
|
||||||
return;
|
return;
|
||||||
|
@ -714,7 +714,7 @@ void GSDevice11::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture*
|
||||||
{
|
{
|
||||||
D3D11_BLEND_DESC bd = {};
|
D3D11_BLEND_DESC bd = {};
|
||||||
|
|
||||||
uint8 write_mask = 0;
|
u8 write_mask = 0;
|
||||||
|
|
||||||
if (red) write_mask |= D3D11_COLOR_WRITE_ENABLE_RED;
|
if (red) write_mask |= D3D11_COLOR_WRITE_ENABLE_RED;
|
||||||
if (green) write_mask |= D3D11_COLOR_WRITE_ENABLE_GREEN;
|
if (green) write_mask |= D3D11_COLOR_WRITE_ENABLE_GREEN;
|
||||||
|
@ -1087,7 +1087,7 @@ bool GSDevice11::IAMapVertexBuffer(void** vertex, size_t stride, size_t count)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
*vertex = (uint8*)m.pData + m_vertex.start * stride;
|
*vertex = (u8*)m.pData + m_vertex.start * stride;
|
||||||
|
|
||||||
m_vertex.count = count;
|
m_vertex.count = count;
|
||||||
m_vertex.stride = stride;
|
m_vertex.stride = stride;
|
||||||
|
@ -1109,8 +1109,8 @@ void GSDevice11::IASetVertexBuffer(ID3D11Buffer* vb, size_t stride)
|
||||||
m_state.vb = vb;
|
m_state.vb = vb;
|
||||||
m_state.vb_stride = stride;
|
m_state.vb_stride = stride;
|
||||||
|
|
||||||
const uint32 stride2 = stride;
|
const u32 stride2 = stride;
|
||||||
const uint32 offset = 0;
|
const u32 offset = 0;
|
||||||
|
|
||||||
m_ctx->IASetVertexBuffers(0, 1, &vb, &stride2, &offset);
|
m_ctx->IASetVertexBuffers(0, 1, &vb, &stride2, &offset);
|
||||||
}
|
}
|
||||||
|
@ -1135,7 +1135,7 @@ void GSDevice11::IASetIndexBuffer(const void* index, size_t count)
|
||||||
memset(&bd, 0, sizeof(bd));
|
memset(&bd, 0, sizeof(bd));
|
||||||
|
|
||||||
bd.Usage = D3D11_USAGE_DYNAMIC;
|
bd.Usage = D3D11_USAGE_DYNAMIC;
|
||||||
bd.ByteWidth = m_index.limit * sizeof(uint32);
|
bd.ByteWidth = m_index.limit * sizeof(u32);
|
||||||
bd.BindFlags = D3D11_BIND_INDEX_BUFFER;
|
bd.BindFlags = D3D11_BIND_INDEX_BUFFER;
|
||||||
bd.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
|
bd.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
|
||||||
|
|
||||||
|
@ -1158,7 +1158,7 @@ void GSDevice11::IASetIndexBuffer(const void* index, size_t count)
|
||||||
|
|
||||||
if (SUCCEEDED(m_ctx->Map(m_ib.get(), 0, type, 0, &m)))
|
if (SUCCEEDED(m_ctx->Map(m_ib.get(), 0, type, 0, &m)))
|
||||||
{
|
{
|
||||||
memcpy((uint8*)m.pData + m_index.start * sizeof(uint32), index, count * sizeof(uint32));
|
memcpy((u8*)m.pData + m_index.start * sizeof(u32), index, count * sizeof(u32));
|
||||||
|
|
||||||
m_ctx->Unmap(m_ib.get(), 0);
|
m_ctx->Unmap(m_ib.get(), 0);
|
||||||
}
|
}
|
||||||
|
@ -1297,7 +1297,7 @@ void GSDevice11::PSUpdateShaderState()
|
||||||
m_ctx->PSSetSamplers(0, std::size(m_state.ps_ss), m_state.ps_ss);
|
m_ctx->PSSetSamplers(0, std::size(m_state.ps_ss), m_state.ps_ss);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSDevice11::OMSetDepthStencilState(ID3D11DepthStencilState* dss, uint8 sref)
|
void GSDevice11::OMSetDepthStencilState(ID3D11DepthStencilState* dss, u8 sref)
|
||||||
{
|
{
|
||||||
if (m_state.dss != dss || m_state.sref != sref)
|
if (m_state.dss != dss || m_state.sref != sref)
|
||||||
{
|
{
|
||||||
|
@ -1464,7 +1464,7 @@ void GSDevice11::CompileShader(const std::vector<char>& source, const char* fn,
|
||||||
throw GSRecoverableError();
|
throw GSRecoverableError();
|
||||||
}
|
}
|
||||||
|
|
||||||
uint16 GSDevice11::ConvertBlendEnum(uint16 generic)
|
u16 GSDevice11::ConvertBlendEnum(u16 generic)
|
||||||
{
|
{
|
||||||
switch (generic)
|
switch (generic)
|
||||||
{
|
{
|
||||||
|
|
|
@ -73,22 +73,22 @@ public:
|
||||||
{
|
{
|
||||||
struct
|
struct
|
||||||
{
|
{
|
||||||
uint32 tme : 1;
|
u32 tme : 1;
|
||||||
uint32 fst : 1;
|
u32 fst : 1;
|
||||||
|
|
||||||
uint32 _free : 30;
|
u32 _free : 30;
|
||||||
};
|
};
|
||||||
|
|
||||||
uint32 key;
|
u32 key;
|
||||||
};
|
};
|
||||||
|
|
||||||
operator uint32() const { return key; }
|
operator u32() const { return key; }
|
||||||
|
|
||||||
VSSelector()
|
VSSelector()
|
||||||
: key(0)
|
: key(0)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
VSSelector(uint32 k)
|
VSSelector(u32 k)
|
||||||
: key(k)
|
: key(k)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
@ -179,25 +179,25 @@ public:
|
||||||
{
|
{
|
||||||
struct
|
struct
|
||||||
{
|
{
|
||||||
uint32 iip : 1;
|
u32 iip : 1;
|
||||||
uint32 prim : 2;
|
u32 prim : 2;
|
||||||
uint32 point : 1;
|
u32 point : 1;
|
||||||
uint32 line : 1;
|
u32 line : 1;
|
||||||
uint32 cpu_sprite : 1;
|
u32 cpu_sprite : 1;
|
||||||
|
|
||||||
uint32 _free : 26;
|
u32 _free : 26;
|
||||||
};
|
};
|
||||||
|
|
||||||
uint32 key;
|
u32 key;
|
||||||
};
|
};
|
||||||
|
|
||||||
operator uint32() { return key; }
|
operator u32() { return key; }
|
||||||
|
|
||||||
GSSelector()
|
GSSelector()
|
||||||
: key(0)
|
: key(0)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
GSSelector(uint32 k)
|
GSSelector(u32 k)
|
||||||
: key(k)
|
: key(k)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
@ -211,61 +211,61 @@ public:
|
||||||
{
|
{
|
||||||
// *** Word 1
|
// *** Word 1
|
||||||
// Format
|
// Format
|
||||||
uint32 fmt : 4;
|
u32 fmt : 4;
|
||||||
uint32 dfmt : 2;
|
u32 dfmt : 2;
|
||||||
uint32 depth_fmt : 2;
|
u32 depth_fmt : 2;
|
||||||
// Alpha extension/Correction
|
// Alpha extension/Correction
|
||||||
uint32 aem : 1;
|
u32 aem : 1;
|
||||||
uint32 fba : 1;
|
u32 fba : 1;
|
||||||
// Fog
|
// Fog
|
||||||
uint32 fog : 1;
|
u32 fog : 1;
|
||||||
// Pixel test
|
// Pixel test
|
||||||
uint32 atst : 3;
|
u32 atst : 3;
|
||||||
// Color sampling
|
// Color sampling
|
||||||
uint32 fst : 1;
|
u32 fst : 1;
|
||||||
uint32 tfx : 3;
|
u32 tfx : 3;
|
||||||
uint32 tcc : 1;
|
u32 tcc : 1;
|
||||||
uint32 wms : 2;
|
u32 wms : 2;
|
||||||
uint32 wmt : 2;
|
u32 wmt : 2;
|
||||||
uint32 ltf : 1;
|
u32 ltf : 1;
|
||||||
// Shuffle and fbmask effect
|
// Shuffle and fbmask effect
|
||||||
uint32 shuffle : 1;
|
u32 shuffle : 1;
|
||||||
uint32 read_ba : 1;
|
u32 read_ba : 1;
|
||||||
uint32 fbmask : 1;
|
u32 fbmask : 1;
|
||||||
|
|
||||||
// Blend and Colclip
|
// Blend and Colclip
|
||||||
uint32 hdr : 1;
|
u32 hdr : 1;
|
||||||
uint32 blend_a : 2;
|
u32 blend_a : 2;
|
||||||
uint32 blend_b : 2; // bit30/31
|
u32 blend_b : 2; // bit30/31
|
||||||
uint32 blend_c : 2; // bit0
|
u32 blend_c : 2; // bit0
|
||||||
uint32 blend_d : 2;
|
u32 blend_d : 2;
|
||||||
uint32 clr1 : 1;
|
u32 clr1 : 1;
|
||||||
uint32 colclip : 1;
|
u32 colclip : 1;
|
||||||
uint32 pabe : 1;
|
u32 pabe : 1;
|
||||||
|
|
||||||
// Others ways to fetch the texture
|
// Others ways to fetch the texture
|
||||||
uint32 channel : 3;
|
u32 channel : 3;
|
||||||
|
|
||||||
// Dithering
|
// Dithering
|
||||||
uint32 dither : 2;
|
u32 dither : 2;
|
||||||
|
|
||||||
// Depth clamp
|
// Depth clamp
|
||||||
uint32 zclamp : 1;
|
u32 zclamp : 1;
|
||||||
|
|
||||||
// Hack
|
// Hack
|
||||||
uint32 tcoffsethack : 1;
|
u32 tcoffsethack : 1;
|
||||||
uint32 urban_chaos_hle : 1;
|
u32 urban_chaos_hle : 1;
|
||||||
uint32 tales_of_abyss_hle : 1;
|
u32 tales_of_abyss_hle : 1;
|
||||||
uint32 point_sampler : 1;
|
u32 point_sampler : 1;
|
||||||
uint32 invalid_tex0 : 1; // Lupin the 3rd
|
u32 invalid_tex0 : 1; // Lupin the 3rd
|
||||||
|
|
||||||
uint32 _free : 14;
|
u32 _free : 14;
|
||||||
};
|
};
|
||||||
|
|
||||||
uint64 key;
|
u64 key;
|
||||||
};
|
};
|
||||||
|
|
||||||
operator uint64() { return key; }
|
operator u64() { return key; }
|
||||||
|
|
||||||
PSSelector()
|
PSSelector()
|
||||||
: key(0)
|
: key(0)
|
||||||
|
@ -279,15 +279,15 @@ public:
|
||||||
{
|
{
|
||||||
struct
|
struct
|
||||||
{
|
{
|
||||||
uint32 tau : 1;
|
u32 tau : 1;
|
||||||
uint32 tav : 1;
|
u32 tav : 1;
|
||||||
uint32 ltf : 1;
|
u32 ltf : 1;
|
||||||
};
|
};
|
||||||
|
|
||||||
uint32 key;
|
u32 key;
|
||||||
};
|
};
|
||||||
|
|
||||||
operator uint32() { return key & 0x7; }
|
operator u32() { return key & 0x7; }
|
||||||
|
|
||||||
PSSamplerSelector()
|
PSSamplerSelector()
|
||||||
: key(0)
|
: key(0)
|
||||||
|
@ -301,17 +301,17 @@ public:
|
||||||
{
|
{
|
||||||
struct
|
struct
|
||||||
{
|
{
|
||||||
uint32 ztst : 2;
|
u32 ztst : 2;
|
||||||
uint32 zwe : 1;
|
u32 zwe : 1;
|
||||||
uint32 date : 1;
|
u32 date : 1;
|
||||||
uint32 fba : 1;
|
u32 fba : 1;
|
||||||
uint32 date_one : 1;
|
u32 date_one : 1;
|
||||||
};
|
};
|
||||||
|
|
||||||
uint32 key;
|
u32 key;
|
||||||
};
|
};
|
||||||
|
|
||||||
operator uint32() { return key & 0x3f; }
|
operator u32() { return key & 0x3f; }
|
||||||
|
|
||||||
OMDepthStencilSelector()
|
OMDepthStencilSelector()
|
||||||
: key(0)
|
: key(0)
|
||||||
|
@ -326,26 +326,26 @@ public:
|
||||||
struct
|
struct
|
||||||
{
|
{
|
||||||
// Color mask
|
// Color mask
|
||||||
uint32 wr : 1;
|
u32 wr : 1;
|
||||||
uint32 wg : 1;
|
u32 wg : 1;
|
||||||
uint32 wb : 1;
|
u32 wb : 1;
|
||||||
uint32 wa : 1;
|
u32 wa : 1;
|
||||||
// Alpha blending
|
// Alpha blending
|
||||||
uint32 blend_index : 7;
|
u32 blend_index : 7;
|
||||||
uint32 abe : 1;
|
u32 abe : 1;
|
||||||
uint32 accu_blend : 1;
|
u32 accu_blend : 1;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct
|
struct
|
||||||
{
|
{
|
||||||
// Color mask
|
// Color mask
|
||||||
uint32 wrgba : 4;
|
u32 wrgba : 4;
|
||||||
};
|
};
|
||||||
|
|
||||||
uint32 key;
|
u32 key;
|
||||||
};
|
};
|
||||||
|
|
||||||
operator uint32() { return key & 0x1fff; }
|
operator u32() { return key & 0x1fff; }
|
||||||
|
|
||||||
OMBlendSelector()
|
OMBlendSelector()
|
||||||
: key(0)
|
: key(0)
|
||||||
|
@ -405,7 +405,7 @@ private:
|
||||||
void BeforeDraw();
|
void BeforeDraw();
|
||||||
void AfterDraw();
|
void AfterDraw();
|
||||||
|
|
||||||
uint16 ConvertBlendEnum(uint16 generic) final;
|
u16 ConvertBlendEnum(u16 generic) final;
|
||||||
|
|
||||||
wil::com_ptr_nothrow<ID3D11Device> m_dev;
|
wil::com_ptr_nothrow<ID3D11Device> m_dev;
|
||||||
wil::com_ptr_nothrow<ID3D11DeviceContext> m_ctx;
|
wil::com_ptr_nothrow<ID3D11DeviceContext> m_ctx;
|
||||||
|
@ -432,7 +432,7 @@ private:
|
||||||
GSVector2i viewport;
|
GSVector2i viewport;
|
||||||
GSVector4i scissor;
|
GSVector4i scissor;
|
||||||
ID3D11DepthStencilState* dss;
|
ID3D11DepthStencilState* dss;
|
||||||
uint8 sref;
|
u8 sref;
|
||||||
ID3D11BlendState* bs;
|
ID3D11BlendState* bs;
|
||||||
float bf;
|
float bf;
|
||||||
ID3D11RenderTargetView* rt_view;
|
ID3D11RenderTargetView* rt_view;
|
||||||
|
@ -494,16 +494,16 @@ private:
|
||||||
|
|
||||||
// Shaders...
|
// Shaders...
|
||||||
|
|
||||||
std::unordered_map<uint32, GSVertexShader11> m_vs;
|
std::unordered_map<u32, GSVertexShader11> m_vs;
|
||||||
wil::com_ptr_nothrow<ID3D11Buffer> m_vs_cb;
|
wil::com_ptr_nothrow<ID3D11Buffer> m_vs_cb;
|
||||||
std::unordered_map<uint32, wil::com_ptr_nothrow<ID3D11GeometryShader>> m_gs;
|
std::unordered_map<u32, wil::com_ptr_nothrow<ID3D11GeometryShader>> m_gs;
|
||||||
wil::com_ptr_nothrow<ID3D11Buffer> m_gs_cb;
|
wil::com_ptr_nothrow<ID3D11Buffer> m_gs_cb;
|
||||||
std::unordered_map<uint64, wil::com_ptr_nothrow<ID3D11PixelShader>> m_ps;
|
std::unordered_map<u64, wil::com_ptr_nothrow<ID3D11PixelShader>> m_ps;
|
||||||
wil::com_ptr_nothrow<ID3D11Buffer> m_ps_cb;
|
wil::com_ptr_nothrow<ID3D11Buffer> m_ps_cb;
|
||||||
std::unordered_map<uint32, wil::com_ptr_nothrow<ID3D11SamplerState>> m_ps_ss;
|
std::unordered_map<u32, wil::com_ptr_nothrow<ID3D11SamplerState>> m_ps_ss;
|
||||||
wil::com_ptr_nothrow<ID3D11SamplerState> m_palette_ss;
|
wil::com_ptr_nothrow<ID3D11SamplerState> m_palette_ss;
|
||||||
std::unordered_map<uint32, wil::com_ptr_nothrow<ID3D11DepthStencilState>> m_om_dss;
|
std::unordered_map<u32, wil::com_ptr_nothrow<ID3D11DepthStencilState>> m_om_dss;
|
||||||
std::unordered_map<uint32, wil::com_ptr_nothrow<ID3D11BlendState>> m_om_bs;
|
std::unordered_map<u32, wil::com_ptr_nothrow<ID3D11BlendState>> m_om_bs;
|
||||||
|
|
||||||
VSConstantBuffer m_vs_cb_cache;
|
VSConstantBuffer m_vs_cb_cache;
|
||||||
GSConstantBuffer m_gs_cb_cache;
|
GSConstantBuffer m_gs_cb_cache;
|
||||||
|
@ -535,9 +535,9 @@ public:
|
||||||
void DrawIndexedPrimitive(int offset, int count) final;
|
void DrawIndexedPrimitive(int offset, int count) final;
|
||||||
|
|
||||||
void ClearRenderTarget(GSTexture* t, const GSVector4& c) final;
|
void ClearRenderTarget(GSTexture* t, const GSVector4& c) final;
|
||||||
void ClearRenderTarget(GSTexture* t, uint32 c) final;
|
void ClearRenderTarget(GSTexture* t, u32 c) final;
|
||||||
void ClearDepth(GSTexture* t) final;
|
void ClearDepth(GSTexture* t) final;
|
||||||
void ClearStencil(GSTexture* t, uint8 c) final;
|
void ClearStencil(GSTexture* t, u8 c) final;
|
||||||
|
|
||||||
GSTexture* CopyOffscreen(GSTexture* src, const GSVector4& sRect, int w, int h, int format = 0, int ps_shader = 0) final;
|
GSTexture* CopyOffscreen(GSTexture* src, const GSVector4& sRect, int w, int h, int format = 0, int ps_shader = 0) final;
|
||||||
|
|
||||||
|
@ -571,7 +571,7 @@ public:
|
||||||
void PSUpdateShaderState();
|
void PSUpdateShaderState();
|
||||||
void PSSetSamplerState(ID3D11SamplerState* ss0, ID3D11SamplerState* ss1);
|
void PSSetSamplerState(ID3D11SamplerState* ss0, ID3D11SamplerState* ss1);
|
||||||
|
|
||||||
void OMSetDepthStencilState(ID3D11DepthStencilState* dss, uint8 sref);
|
void OMSetDepthStencilState(ID3D11DepthStencilState* dss, u8 sref);
|
||||||
void OMSetBlendState(ID3D11BlendState* bs, float bf);
|
void OMSetBlendState(ID3D11BlendState* bs, float bf);
|
||||||
void OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i* scissor = NULL) final;
|
void OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i* scissor = NULL) final;
|
||||||
|
|
||||||
|
@ -579,7 +579,7 @@ public:
|
||||||
void SetupVS(VSSelector sel, const VSConstantBuffer* cb);
|
void SetupVS(VSSelector sel, const VSConstantBuffer* cb);
|
||||||
void SetupGS(GSSelector sel, const GSConstantBuffer* cb);
|
void SetupGS(GSSelector sel, const GSConstantBuffer* cb);
|
||||||
void SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel);
|
void SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel);
|
||||||
void SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 afix);
|
void SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, u8 afix);
|
||||||
|
|
||||||
ID3D11Device* operator->() { return m_dev.get(); }
|
ID3D11Device* operator->() { return m_dev.get(); }
|
||||||
operator ID3D11Device*() { return m_dev.get(); }
|
operator ID3D11Device*() { return m_dev.get(); }
|
||||||
|
|
|
@ -117,8 +117,8 @@ void GSRendererDX11::EmulateZbuffer()
|
||||||
|
|
||||||
// On the real GS we appear to do clamping on the max z value the format allows.
|
// On the real GS we appear to do clamping on the max z value the format allows.
|
||||||
// Clamping is done after rasterization.
|
// Clamping is done after rasterization.
|
||||||
const uint32 max_z = 0xFFFFFFFF >> (GSLocalMemory::m_psm[m_context->ZBUF.PSM].fmt * 8);
|
const u32 max_z = 0xFFFFFFFF >> (GSLocalMemory::m_psm[m_context->ZBUF.PSM].fmt * 8);
|
||||||
const bool clamp_z = (uint32)(GSVector4i(m_vt.m_max.p).z) > max_z;
|
const bool clamp_z = (u32)(GSVector4i(m_vt.m_max.p).z) > max_z;
|
||||||
|
|
||||||
vs_cb.MaxDepth = GSVector2i(0xFFFFFFFF);
|
vs_cb.MaxDepth = GSVector2i(0xFFFFFFFF);
|
||||||
//ps_cb.Af_MaxDepth.y = 1.0f;
|
//ps_cb.Af_MaxDepth.y = 1.0f;
|
||||||
|
@ -197,11 +197,11 @@ void GSRendererDX11::EmulateTextureShuffleAndFbmask()
|
||||||
|
|
||||||
// Please bang my head against the wall!
|
// Please bang my head against the wall!
|
||||||
// 1/ Reduce the frame mask to a 16 bit format
|
// 1/ Reduce the frame mask to a 16 bit format
|
||||||
const uint32& m = m_context->FRAME.FBMSK;
|
const u32& m = m_context->FRAME.FBMSK;
|
||||||
const uint32 fbmask = ((m >> 3) & 0x1F) | ((m >> 6) & 0x3E0) | ((m >> 9) & 0x7C00) | ((m >> 16) & 0x8000);
|
const u32 fbmask = ((m >> 3) & 0x1F) | ((m >> 6) & 0x3E0) | ((m >> 9) & 0x7C00) | ((m >> 16) & 0x8000);
|
||||||
// FIXME GSVector will be nice here
|
// FIXME GSVector will be nice here
|
||||||
const uint8 rg_mask = fbmask & 0xFF;
|
const u8 rg_mask = fbmask & 0xFF;
|
||||||
const uint8 ba_mask = (fbmask >> 8) & 0xFF;
|
const u8 ba_mask = (fbmask >> 8) & 0xFF;
|
||||||
m_om_bsel.wrgba = 0;
|
m_om_bsel.wrgba = 0;
|
||||||
|
|
||||||
// 2 Select the new mask (Please someone put SSE here)
|
// 2 Select the new mask (Please someone put SSE here)
|
||||||
|
@ -421,10 +421,10 @@ void GSRendererDX11::EmulateChannelShuffle(GSTexture** rt, const GSTextureCache:
|
||||||
// the rendered size of the framebuffer
|
// the rendered size of the framebuffer
|
||||||
|
|
||||||
GSVertex* s = &m_vertex.buff[0];
|
GSVertex* s = &m_vertex.buff[0];
|
||||||
s[0].XYZ.X = (uint16)(m_context->XYOFFSET.OFX + 0);
|
s[0].XYZ.X = (u16)(m_context->XYOFFSET.OFX + 0);
|
||||||
s[1].XYZ.X = (uint16)(m_context->XYOFFSET.OFX + 16384);
|
s[1].XYZ.X = (u16)(m_context->XYOFFSET.OFX + 16384);
|
||||||
s[0].XYZ.Y = (uint16)(m_context->XYOFFSET.OFY + 0);
|
s[0].XYZ.Y = (u16)(m_context->XYOFFSET.OFY + 0);
|
||||||
s[1].XYZ.Y = (uint16)(m_context->XYOFFSET.OFY + 16384);
|
s[1].XYZ.Y = (u16)(m_context->XYOFFSET.OFY + 16384);
|
||||||
|
|
||||||
m_vertex.head = m_vertex.tail = m_vertex.next = 2;
|
m_vertex.head = m_vertex.tail = m_vertex.next = 2;
|
||||||
m_index.tail = 2;
|
m_index.tail = 2;
|
||||||
|
@ -448,7 +448,7 @@ void GSRendererDX11::EmulateBlending()
|
||||||
return;
|
return;
|
||||||
|
|
||||||
m_om_bsel.abe = 1;
|
m_om_bsel.abe = 1;
|
||||||
m_om_bsel.blend_index = uint8(((ALPHA.A * 3 + ALPHA.B) * 3 + ALPHA.C) * 3 + ALPHA.D);
|
m_om_bsel.blend_index = u8(((ALPHA.A * 3 + ALPHA.B) * 3 + ALPHA.C) * 3 + ALPHA.D);
|
||||||
const int blend_flag = m_dev->GetBlendFlags(m_om_bsel.blend_index);
|
const int blend_flag = m_dev->GetBlendFlags(m_om_bsel.blend_index);
|
||||||
|
|
||||||
// Do the multiplication in shader for blending accumulation: Cs*As + Cd or Cs*Af + Cd
|
// Do the multiplication in shader for blending accumulation: Cs*As + Cd or Cs*Af + Cd
|
||||||
|
@ -567,8 +567,8 @@ void GSRendererDX11::EmulateTextureSampler(const GSTextureCache::Source* tex)
|
||||||
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[tex->m_TEX0.PSM];
|
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[tex->m_TEX0.PSM];
|
||||||
const GSLocalMemory::psm_t& cpsm = psm.pal > 0 ? GSLocalMemory::m_psm[m_context->TEX0.CPSM] : psm;
|
const GSLocalMemory::psm_t& cpsm = psm.pal > 0 ? GSLocalMemory::m_psm[m_context->TEX0.CPSM] : psm;
|
||||||
|
|
||||||
const uint8 wms = m_context->CLAMP.WMS;
|
const u8 wms = m_context->CLAMP.WMS;
|
||||||
const uint8 wmt = m_context->CLAMP.WMT;
|
const u8 wmt = m_context->CLAMP.WMT;
|
||||||
const bool complex_wms_wmt = !!((wms | wmt) & 2);
|
const bool complex_wms_wmt = !!((wms | wmt) & 2);
|
||||||
|
|
||||||
bool bilinear = m_vt.IsLinear();
|
bool bilinear = m_vt.IsLinear();
|
||||||
|
@ -937,7 +937,7 @@ void GSRendererDX11::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sou
|
||||||
{
|
{
|
||||||
m_ps_sel.fog = 1;
|
m_ps_sel.fog = 1;
|
||||||
|
|
||||||
const GSVector4 fc = GSVector4::rgba32(m_env.FOGCOL.u32[0]);
|
const GSVector4 fc = GSVector4::rgba32(m_env.FOGCOL.U32[0]);
|
||||||
// Blend AREF to avoid to load a random value for alpha (dirty cache)
|
// Blend AREF to avoid to load a random value for alpha (dirty cache)
|
||||||
ps_cb.FogColor_AREF = fc.blend32<8>(ps_cb.FogColor_AREF);
|
ps_cb.FogColor_AREF = fc.blend32<8>(ps_cb.FogColor_AREF);
|
||||||
}
|
}
|
||||||
|
@ -948,7 +948,7 @@ void GSRendererDX11::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sou
|
||||||
// pass to handle the depth based on the alpha test.
|
// pass to handle the depth based on the alpha test.
|
||||||
bool ate_RGBA_then_Z = false;
|
bool ate_RGBA_then_Z = false;
|
||||||
bool ate_RGB_then_ZA = false;
|
bool ate_RGB_then_ZA = false;
|
||||||
uint8 ps_atst = 0;
|
u8 ps_atst = 0;
|
||||||
if (ate_first_pass & ate_second_pass)
|
if (ate_first_pass & ate_second_pass)
|
||||||
{
|
{
|
||||||
// fprintf(stdout, "%d: Complex Alpha Test\n", s_n);
|
// fprintf(stdout, "%d: Complex Alpha Test\n", s_n);
|
||||||
|
@ -1025,7 +1025,7 @@ void GSRendererDX11::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sou
|
||||||
|
|
||||||
if (!tex->m_palette)
|
if (!tex->m_palette)
|
||||||
{
|
{
|
||||||
const uint16 pal = GSLocalMemory::m_psm[tex->m_TEX0.PSM].pal;
|
const u16 pal = GSLocalMemory::m_psm[tex->m_TEX0.PSM].pal;
|
||||||
m_tc->AttachPaletteToSource(tex, pal, true);
|
m_tc->AttachPaletteToSource(tex, pal, true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1045,7 +1045,7 @@ void GSRendererDX11::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sou
|
||||||
|
|
||||||
SetupIA(sx, sy);
|
SetupIA(sx, sy);
|
||||||
|
|
||||||
const uint8 afix = m_context->ALPHA.FIX;
|
const u8 afix = m_context->ALPHA.FIX;
|
||||||
dev->SetupOM(m_om_dssel, m_om_bsel, afix);
|
dev->SetupOM(m_om_dssel, m_om_bsel, afix);
|
||||||
dev->SetupVS(m_vs_sel, &vs_cb);
|
dev->SetupVS(m_vs_sel, &vs_cb);
|
||||||
dev->SetupGS(m_gs_sel, &gs_cb);
|
dev->SetupGS(m_gs_sel, &gs_cb);
|
||||||
|
|
|
@ -80,7 +80,7 @@ bool GSTexture11::Map(GSMap& m, const GSVector4i* r, int layer)
|
||||||
|
|
||||||
if (SUCCEEDED(m_ctx->Map(m_texture.get(), subresource, D3D11_MAP_READ_WRITE, 0, &map)))
|
if (SUCCEEDED(m_ctx->Map(m_texture.get(), subresource, D3D11_MAP_READ_WRITE, 0, &map)))
|
||||||
{
|
{
|
||||||
m.bits = (uint8*)map.pData;
|
m.bits = (u8*)map.pData;
|
||||||
m.pitch = (int)map.RowPitch;
|
m.pitch = (int)map.RowPitch;
|
||||||
|
|
||||||
m_layer = layer;
|
m_layer = layer;
|
||||||
|
@ -152,14 +152,14 @@ bool GSTexture11::Save(const std::string& fn)
|
||||||
m_ctx->Unmap(dst.get(), 0);
|
m_ctx->Unmap(dst.get(), 0);
|
||||||
});
|
});
|
||||||
|
|
||||||
const uint8* s = static_cast<const uint8*>(sm.pData);
|
const u8* s = static_cast<const u8*>(sm.pData);
|
||||||
uint8* d = static_cast<uint8*>(dm.pData);
|
u8* d = static_cast<u8*>(dm.pData);
|
||||||
|
|
||||||
for (uint32 y = 0; y < desc.Height; y++, s += sm.RowPitch, d += dm.RowPitch)
|
for (u32 y = 0; y < desc.Height; y++, s += sm.RowPitch, d += dm.RowPitch)
|
||||||
{
|
{
|
||||||
for (uint32 x = 0; x < desc.Width; x++)
|
for (u32 x = 0; x < desc.Width; x++)
|
||||||
{
|
{
|
||||||
reinterpret_cast<uint32*>(d)[x] = static_cast<uint32>(ldexpf(reinterpret_cast<const float*>(s)[x * 2], 32));
|
reinterpret_cast<u32*>(d)[x] = static_cast<u32>(ldexpf(reinterpret_cast<const float*>(s)[x * 2], 32));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -193,7 +193,7 @@ bool GSTexture11::Save(const std::string& fn)
|
||||||
}
|
}
|
||||||
|
|
||||||
int compression = theApp.GetConfigI("png_compression_level");
|
int compression = theApp.GetConfigI("png_compression_level");
|
||||||
bool success = GSPng::Save(format, fn, static_cast<uint8*>(sm.pData), desc.Width, desc.Height, sm.RowPitch, compression);
|
bool success = GSPng::Save(format, fn, static_cast<u8*>(sm.pData), desc.Width, desc.Height, sm.RowPitch, compression);
|
||||||
|
|
||||||
m_ctx->Unmap(res.get(), 0);
|
m_ctx->Unmap(res.get(), 0);
|
||||||
|
|
||||||
|
|
|
@ -279,7 +279,7 @@ void GSDevice11::SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSe
|
||||||
PSSetShader(i->second.get(), m_ps_cb.get());
|
PSSetShader(i->second.get(), m_ps_cb.get());
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSDevice11::SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 afix)
|
void GSDevice11::SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, u8 afix)
|
||||||
{
|
{
|
||||||
auto i = std::as_const(m_om_dss).find(dssel);
|
auto i = std::as_const(m_om_dss).find(dssel);
|
||||||
|
|
||||||
|
|
|
@ -188,7 +188,7 @@ GSRendererHW::~GSRendererHW()
|
||||||
delete m_tc;
|
delete m_tc;
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSRendererHW::SetGameCRC(uint32 crc, int options)
|
void GSRendererHW::SetGameCRC(u32 crc, int options)
|
||||||
{
|
{
|
||||||
GSRenderer::SetGameCRC(crc, options);
|
GSRenderer::SetGameCRC(crc, options);
|
||||||
|
|
||||||
|
@ -389,7 +389,7 @@ void GSRendererHW::Lines2Sprites()
|
||||||
int i = (int)count * 2 - 4;
|
int i = (int)count * 2 - 4;
|
||||||
GSVertex* s = &m_vertex.buff[count - 2];
|
GSVertex* s = &m_vertex.buff[count - 2];
|
||||||
GSVertex* q = &m_vertex.buff[count * 2 - 4];
|
GSVertex* q = &m_vertex.buff[count * 2 - 4];
|
||||||
uint32* RESTRICT index = &m_index.buff[count * 3 - 6];
|
u32* RESTRICT index = &m_index.buff[count * 3 - 6];
|
||||||
|
|
||||||
for (; i >= 0; i -= 4, s -= 2, q -= 4, index -= 6)
|
for (; i >= 0; i -= 4, s -= 2, q -= 4, index -= 6)
|
||||||
{
|
{
|
||||||
|
@ -402,13 +402,13 @@ void GSRendererHW::Lines2Sprites()
|
||||||
|
|
||||||
if (PRIM->TME && !PRIM->FST)
|
if (PRIM->TME && !PRIM->FST)
|
||||||
{
|
{
|
||||||
GSVector4 st0 = GSVector4::loadl(&v0.ST.u64);
|
GSVector4 st0 = GSVector4::loadl(&v0.ST.U64);
|
||||||
GSVector4 st1 = GSVector4::loadl(&v1.ST.u64);
|
GSVector4 st1 = GSVector4::loadl(&v1.ST.U64);
|
||||||
GSVector4 Q = GSVector4(v1.RGBAQ.Q, v1.RGBAQ.Q, v1.RGBAQ.Q, v1.RGBAQ.Q);
|
GSVector4 Q = GSVector4(v1.RGBAQ.Q, v1.RGBAQ.Q, v1.RGBAQ.Q, v1.RGBAQ.Q);
|
||||||
GSVector4 st = st0.upld(st1) / Q;
|
GSVector4 st = st0.upld(st1) / Q;
|
||||||
|
|
||||||
GSVector4::storel(&v0.ST.u64, st);
|
GSVector4::storel(&v0.ST.U64, st);
|
||||||
GSVector4::storeh(&v1.ST.u64, st);
|
GSVector4::storeh(&v1.ST.U64, st);
|
||||||
|
|
||||||
v0.RGBAQ.Q = 1.0f;
|
v0.RGBAQ.Q = 1.0f;
|
||||||
v1.RGBAQ.Q = 1.0f;
|
v1.RGBAQ.Q = 1.0f;
|
||||||
|
@ -419,7 +419,7 @@ void GSRendererHW::Lines2Sprites()
|
||||||
|
|
||||||
// swap x, s, u
|
// swap x, s, u
|
||||||
|
|
||||||
const uint16 x = v0.XYZ.X;
|
const u16 x = v0.XYZ.X;
|
||||||
v0.XYZ.X = v1.XYZ.X;
|
v0.XYZ.X = v1.XYZ.X;
|
||||||
v1.XYZ.X = x;
|
v1.XYZ.X = x;
|
||||||
|
|
||||||
|
@ -427,7 +427,7 @@ void GSRendererHW::Lines2Sprites()
|
||||||
v0.ST.S = v1.ST.S;
|
v0.ST.S = v1.ST.S;
|
||||||
v1.ST.S = s;
|
v1.ST.S = s;
|
||||||
|
|
||||||
const uint16 u = v0.U;
|
const u16 u = v0.U;
|
||||||
v0.U = v1.U;
|
v0.U = v1.U;
|
||||||
v1.U = u;
|
v1.U = u;
|
||||||
|
|
||||||
|
@ -447,9 +447,9 @@ void GSRendererHW::Lines2Sprites()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSRendererHW::EmulateAtst(GSVector4& FogColor_AREF, uint8& ps_atst, const bool pass_2)
|
void GSRendererHW::EmulateAtst(GSVector4& FogColor_AREF, u8& ps_atst, const bool pass_2)
|
||||||
{
|
{
|
||||||
static const uint32 inverted_atst[] = {ATST_ALWAYS, ATST_NEVER, ATST_GEQUAL, ATST_GREATER, ATST_NOTEQUAL, ATST_LESS, ATST_LEQUAL, ATST_EQUAL};
|
static const u32 inverted_atst[] = {ATST_ALWAYS, ATST_NEVER, ATST_GEQUAL, ATST_GREATER, ATST_NOTEQUAL, ATST_LESS, ATST_LEQUAL, ATST_EQUAL};
|
||||||
|
|
||||||
if (!m_context->TEST.ATE)
|
if (!m_context->TEST.ATE)
|
||||||
return;
|
return;
|
||||||
|
@ -570,10 +570,10 @@ void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba)
|
||||||
GSVector4i tmp(v[i].XYZ.Y, v[i].V, v[i + 1].XYZ.Y, v[i + 1].V);
|
GSVector4i tmp(v[i].XYZ.Y, v[i].V, v[i + 1].XYZ.Y, v[i + 1].V);
|
||||||
tmp = GSVector4i(tmp - offset).srl32(1) + offset;
|
tmp = GSVector4i(tmp - offset).srl32(1) + offset;
|
||||||
|
|
||||||
v[i].XYZ.Y = (uint16)tmp.x;
|
v[i].XYZ.Y = (u16)tmp.x;
|
||||||
v[i].V = (uint16)tmp.y;
|
v[i].V = (u16)tmp.y;
|
||||||
v[i + 1].XYZ.Y = (uint16)tmp.z;
|
v[i + 1].XYZ.Y = (u16)tmp.z;
|
||||||
v[i + 1].V = (uint16)tmp.w;
|
v[i + 1].V = (u16)tmp.w;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -603,9 +603,9 @@ void GSRendererHW::ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba)
|
||||||
tmp = GSVector4i(tmp - offset).srl32(1) + offset;
|
tmp = GSVector4i(tmp - offset).srl32(1) + offset;
|
||||||
|
|
||||||
//fprintf(stderr, "Before %d, After %d\n", v[i + 1].XYZ.Y, tmp.y);
|
//fprintf(stderr, "Before %d, After %d\n", v[i + 1].XYZ.Y, tmp.y);
|
||||||
v[i].XYZ.Y = (uint16)tmp.x;
|
v[i].XYZ.Y = (u16)tmp.x;
|
||||||
v[i].ST.T /= 2.0f;
|
v[i].ST.T /= 2.0f;
|
||||||
v[i + 1].XYZ.Y = (uint16)tmp.y;
|
v[i + 1].XYZ.Y = (u16)tmp.y;
|
||||||
v[i + 1].ST.T /= 2.0f;
|
v[i + 1].ST.T /= 2.0f;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -757,15 +757,15 @@ void GSRendererHW::MergeSprite(GSTextureCache::Source* tex)
|
||||||
// Replace all sprite with a single fullscreen sprite.
|
// Replace all sprite with a single fullscreen sprite.
|
||||||
GSVertex* s = &m_vertex.buff[0];
|
GSVertex* s = &m_vertex.buff[0];
|
||||||
|
|
||||||
s[0].XYZ.X = static_cast<uint16>((16.0f * m_vt.m_min.p.x) + m_context->XYOFFSET.OFX);
|
s[0].XYZ.X = static_cast<u16>((16.0f * m_vt.m_min.p.x) + m_context->XYOFFSET.OFX);
|
||||||
s[1].XYZ.X = static_cast<uint16>((16.0f * m_vt.m_max.p.x) + m_context->XYOFFSET.OFX);
|
s[1].XYZ.X = static_cast<u16>((16.0f * m_vt.m_max.p.x) + m_context->XYOFFSET.OFX);
|
||||||
s[0].XYZ.Y = static_cast<uint16>((16.0f * m_vt.m_min.p.y) + m_context->XYOFFSET.OFY);
|
s[0].XYZ.Y = static_cast<u16>((16.0f * m_vt.m_min.p.y) + m_context->XYOFFSET.OFY);
|
||||||
s[1].XYZ.Y = static_cast<uint16>((16.0f * m_vt.m_max.p.y) + m_context->XYOFFSET.OFY);
|
s[1].XYZ.Y = static_cast<u16>((16.0f * m_vt.m_max.p.y) + m_context->XYOFFSET.OFY);
|
||||||
|
|
||||||
s[0].U = static_cast<uint16>(16.0f * m_vt.m_min.t.x);
|
s[0].U = static_cast<u16>(16.0f * m_vt.m_min.t.x);
|
||||||
s[0].V = static_cast<uint16>(16.0f * m_vt.m_min.t.y);
|
s[0].V = static_cast<u16>(16.0f * m_vt.m_min.t.y);
|
||||||
s[1].U = static_cast<uint16>(16.0f * m_vt.m_max.t.x);
|
s[1].U = static_cast<u16>(16.0f * m_vt.m_max.t.x);
|
||||||
s[1].V = static_cast<uint16>(16.0f * m_vt.m_max.t.y);
|
s[1].V = static_cast<u16>(16.0f * m_vt.m_max.t.y);
|
||||||
|
|
||||||
m_vertex.head = m_vertex.tail = m_vertex.next = 2;
|
m_vertex.head = m_vertex.tail = m_vertex.next = 2;
|
||||||
m_index.tail = 2;
|
m_index.tail = 2;
|
||||||
|
@ -791,10 +791,10 @@ void GSRendererHW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS
|
||||||
m_tc->InvalidateLocalMem(m_mem.GetOffset(BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM), r);
|
m_tc->InvalidateLocalMem(m_mem.GetOffset(BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM), r);
|
||||||
}
|
}
|
||||||
|
|
||||||
uint16 GSRendererHW::Interpolate_UV(float alpha, int t0, int t1)
|
u16 GSRendererHW::Interpolate_UV(float alpha, int t0, int t1)
|
||||||
{
|
{
|
||||||
const float t = (1.0f - alpha) * t0 + alpha * t1;
|
const float t = (1.0f - alpha) * t0 + alpha * t1;
|
||||||
return (uint16)t & ~0xF; // cheap rounding
|
return (u16)t & ~0xF; // cheap rounding
|
||||||
}
|
}
|
||||||
|
|
||||||
float GSRendererHW::alpha0(int L, int X0, int X1)
|
float GSRendererHW::alpha0(int L, int X0, int X1)
|
||||||
|
@ -899,28 +899,28 @@ void GSRendererHW::SwSpriteRender()
|
||||||
const bool fb_mask_enabled = m_context->FRAME.FBMSK != 0x0;
|
const bool fb_mask_enabled = m_context->FRAME.FBMSK != 0x0;
|
||||||
const GSVector4i fb_mask = GSVector4i(m_context->FRAME.FBMSK).u8to16(); // 0x00AA00BB00GG00RR00AA00BB00GG00RR
|
const GSVector4i fb_mask = GSVector4i(m_context->FRAME.FBMSK).u8to16(); // 0x00AA00BB00GG00RR00AA00BB00GG00RR
|
||||||
|
|
||||||
const uint8 tex0_tfx = m_context->TEX0.TFX;
|
const u8 tex0_tfx = m_context->TEX0.TFX;
|
||||||
const uint8 tex0_tcc = m_context->TEX0.TCC;
|
const u8 tex0_tcc = m_context->TEX0.TCC;
|
||||||
const uint8 alpha_b = m_context->ALPHA.B;
|
const u8 alpha_b = m_context->ALPHA.B;
|
||||||
const uint8 alpha_c = m_context->ALPHA.C;
|
const u8 alpha_c = m_context->ALPHA.C;
|
||||||
const uint8 alpha_fix = m_context->ALPHA.FIX;
|
const u8 alpha_fix = m_context->ALPHA.FIX;
|
||||||
|
|
||||||
for (int y = 0; y < h; y++, ++sy, ++dy)
|
for (int y = 0; y < h; y++, ++sy, ++dy)
|
||||||
{
|
{
|
||||||
auto spa = texture_mapping_enabled ? spo.paMulti(m_mem.m_vm32, sx, sy) : GSOffset::PAPtrHelper<uint32>();
|
auto spa = texture_mapping_enabled ? spo.paMulti(m_mem.m_vm32, sx, sy) : GSOffset::PAPtrHelper<u32>();
|
||||||
auto dpa = dpo.paMulti(m_mem.m_vm32, dx, dy);
|
auto dpa = dpo.paMulti(m_mem.m_vm32, dx, dy);
|
||||||
|
|
||||||
ASSERT(w % 2 == 0);
|
ASSERT(w % 2 == 0);
|
||||||
|
|
||||||
for (int x = 0; x < w; x += 2)
|
for (int x = 0; x < w; x += 2)
|
||||||
{
|
{
|
||||||
uint32* di = dpa.value(x);
|
u32* di = dpa.value(x);
|
||||||
ASSERT(*di + 1 == *dpa.value(x + 1)); // Destination pixel pair is adjacent in memory
|
ASSERT(*di + 1 == *dpa.value(x + 1)); // Destination pixel pair is adjacent in memory
|
||||||
|
|
||||||
GSVector4i sc;
|
GSVector4i sc;
|
||||||
if (texture_mapping_enabled)
|
if (texture_mapping_enabled)
|
||||||
{
|
{
|
||||||
uint32* si = spa.value(x);
|
u32* si = spa.value(x);
|
||||||
// Read 2 source pixel colors
|
// Read 2 source pixel colors
|
||||||
ASSERT((*si + 1) == *spa.value(x + 1)); // Source pixel pair is adjacent in memory
|
ASSERT((*si + 1) == *spa.value(x + 1)); // Source pixel pair is adjacent in memory
|
||||||
sc = GSVector4i::loadl(si).u8to16(); // 0x00AA00BB00GG00RR00aa00bb00gg00rr
|
sc = GSVector4i::loadl(si).u8to16(); // 0x00AA00BB00GG00RR00aa00bb00gg00rr
|
||||||
|
@ -1079,8 +1079,8 @@ void GSRendererHW::RoundSpriteOffset()
|
||||||
const int Lx = (v[i + 1].XYZ.X - v[i].XYZ.X);
|
const int Lx = (v[i + 1].XYZ.X - v[i].XYZ.X);
|
||||||
const float ax0 = alpha0(Lx, X0, X1);
|
const float ax0 = alpha0(Lx, X0, X1);
|
||||||
const float ax1 = alpha1(Lx, X0, X1);
|
const float ax1 = alpha1(Lx, X0, X1);
|
||||||
const uint16 tx0 = Interpolate_UV(ax0, v[i].U, v[i + 1].U);
|
const u16 tx0 = Interpolate_UV(ax0, v[i].U, v[i + 1].U);
|
||||||
const uint16 tx1 = Interpolate_UV(ax1, v[i].U, v[i + 1].U);
|
const u16 tx1 = Interpolate_UV(ax1, v[i].U, v[i + 1].U);
|
||||||
#ifdef DEBUG_U
|
#ifdef DEBUG_U
|
||||||
if (debug)
|
if (debug)
|
||||||
{
|
{
|
||||||
|
@ -1096,8 +1096,8 @@ void GSRendererHW::RoundSpriteOffset()
|
||||||
const int Ly = (v[i + 1].XYZ.Y - v[i].XYZ.Y);
|
const int Ly = (v[i + 1].XYZ.Y - v[i].XYZ.Y);
|
||||||
const float ay0 = alpha0(Ly, Y0, Y1);
|
const float ay0 = alpha0(Ly, Y0, Y1);
|
||||||
const float ay1 = alpha1(Ly, Y0, Y1);
|
const float ay1 = alpha1(Ly, Y0, Y1);
|
||||||
const uint16 ty0 = Interpolate_UV(ay0, v[i].V, v[i + 1].V);
|
const u16 ty0 = Interpolate_UV(ay0, v[i].V, v[i + 1].V);
|
||||||
const uint16 ty1 = Interpolate_UV(ay1, v[i].V, v[i + 1].V);
|
const u16 ty1 = Interpolate_UV(ay1, v[i].V, v[i + 1].V);
|
||||||
#ifdef DEBUG_V
|
#ifdef DEBUG_V
|
||||||
if (debug)
|
if (debug)
|
||||||
{
|
{
|
||||||
|
@ -1206,8 +1206,8 @@ void GSRendererHW::Draw()
|
||||||
const GIFRegFRAME FRAME = context->FRAME;
|
const GIFRegFRAME FRAME = context->FRAME;
|
||||||
const GIFRegZBUF ZBUF = context->ZBUF;
|
const GIFRegZBUF ZBUF = context->ZBUF;
|
||||||
|
|
||||||
uint32 fm = context->FRAME.FBMSK;
|
u32 fm = context->FRAME.FBMSK;
|
||||||
uint32 zm = context->ZBUF.ZMSK || context->TEST.ZTE == 0 ? 0xffffffff : 0;
|
u32 zm = context->ZBUF.ZMSK || context->TEST.ZTE == 0 ? 0xffffffff : 0;
|
||||||
|
|
||||||
// Note required to compute TryAlphaTest below. So do it now.
|
// Note required to compute TryAlphaTest below. So do it now.
|
||||||
if (PRIM->TME && tex_psm.pal > 0)
|
if (PRIM->TME && tex_psm.pal > 0)
|
||||||
|
@ -1469,7 +1469,7 @@ void GSRendererHW::Draw()
|
||||||
|
|
||||||
if (s_dump)
|
if (s_dump)
|
||||||
{
|
{
|
||||||
const uint64 frame = m_perfmon.GetFrame();
|
const u64 frame = m_perfmon.GetFrame();
|
||||||
|
|
||||||
std::string s;
|
std::string s;
|
||||||
|
|
||||||
|
@ -1651,7 +1651,7 @@ void GSRendererHW::Draw()
|
||||||
|
|
||||||
if (s_dump)
|
if (s_dump)
|
||||||
{
|
{
|
||||||
const uint64 frame = m_perfmon.GetFrame();
|
const u64 frame = m_perfmon.GetFrame();
|
||||||
|
|
||||||
std::string s;
|
std::string s;
|
||||||
|
|
||||||
|
@ -1715,7 +1715,7 @@ GSRendererHW::Hacks::Hacks()
|
||||||
|
|
||||||
void GSRendererHW::Hacks::SetGameCRC(const CRC::Game& game)
|
void GSRendererHW::Hacks::SetGameCRC(const CRC::Game& game)
|
||||||
{
|
{
|
||||||
const uint32 hash = (uint32)((game.region << 24) | game.title);
|
const u32 hash = (u32)((game.region << 24) | game.title);
|
||||||
|
|
||||||
m_oi = m_oi_map[hash];
|
m_oi = m_oi_map[hash];
|
||||||
m_oo = m_oo_map[hash];
|
m_oo = m_oo_map[hash];
|
||||||
|
@ -1744,7 +1744,7 @@ void GSRendererHW::OI_DoubleHalfClear(GSTexture* rt, GSTexture* ds)
|
||||||
//const GSLocalMemory::psm_t& depth_psm = GSLocalMemory::m_psm[m_context->ZBUF.PSM];
|
//const GSLocalMemory::psm_t& depth_psm = GSLocalMemory::m_psm[m_context->ZBUF.PSM];
|
||||||
|
|
||||||
// Z and color must be constant and the same
|
// Z and color must be constant and the same
|
||||||
if (m_vt.m_eq.rgba != 0xFFFF || !m_vt.m_eq.z || v[1].XYZ.Z != v[1].RGBAQ.u32[0])
|
if (m_vt.m_eq.rgba != 0xFFFF || !m_vt.m_eq.z || v[1].XYZ.Z != v[1].RGBAQ.U32[0])
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// Format doesn't have the same size. It smells fishy (xmen...)
|
// Format doesn't have the same size. It smells fishy (xmen...)
|
||||||
|
@ -1752,12 +1752,12 @@ void GSRendererHW::OI_DoubleHalfClear(GSTexture* rt, GSTexture* ds)
|
||||||
// return;
|
// return;
|
||||||
|
|
||||||
// Size of the current draw
|
// Size of the current draw
|
||||||
const uint32 w_pages = static_cast<uint32>(roundf(m_vt.m_max.p.x / frame_psm.pgs.x));
|
const u32 w_pages = static_cast<u32>(roundf(m_vt.m_max.p.x / frame_psm.pgs.x));
|
||||||
const uint32 h_pages = static_cast<uint32>(roundf(m_vt.m_max.p.y / frame_psm.pgs.y));
|
const u32 h_pages = static_cast<u32>(roundf(m_vt.m_max.p.y / frame_psm.pgs.y));
|
||||||
const uint32 written_pages = w_pages * h_pages;
|
const u32 written_pages = w_pages * h_pages;
|
||||||
|
|
||||||
// Frame and depth pointer can be inverted
|
// Frame and depth pointer can be inverted
|
||||||
uint32 base = 0, half = 0;
|
u32 base = 0, half = 0;
|
||||||
if (m_context->FRAME.FBP > m_context->ZBUF.ZBP)
|
if (m_context->FRAME.FBP > m_context->ZBUF.ZBP)
|
||||||
{
|
{
|
||||||
base = m_context->ZBUF.ZBP;
|
base = m_context->ZBUF.ZBP;
|
||||||
|
@ -1772,7 +1772,7 @@ void GSRendererHW::OI_DoubleHalfClear(GSTexture* rt, GSTexture* ds)
|
||||||
// If both buffers are side by side we can expect a fast clear in on-going
|
// If both buffers are side by side we can expect a fast clear in on-going
|
||||||
if (half <= (base + written_pages))
|
if (half <= (base + written_pages))
|
||||||
{
|
{
|
||||||
const uint32 color = v[1].RGBAQ.u32[0];
|
const u32 color = v[1].RGBAQ.U32[0];
|
||||||
const bool clear_depth = (m_context->FRAME.FBP > m_context->ZBUF.ZBP);
|
const bool clear_depth = (m_context->FRAME.FBP > m_context->ZBUF.ZBP);
|
||||||
|
|
||||||
GL_INS("OI_DoubleHalfClear:%s: base %x half %x. w_pages %d h_pages %d fbw %d. Color %x",
|
GL_INS("OI_DoubleHalfClear:%s: base %x half %x. w_pages %d h_pages %d fbw %d. Color %x",
|
||||||
|
@ -1953,7 +1953,7 @@ bool GSRendererHW::OI_BigMuthaTruckers(GSTexture* rt, GSTexture* ds, GSTextureCa
|
||||||
|
|
||||||
const size_t count = m_vertex.next;
|
const size_t count = m_vertex.next;
|
||||||
GSVertex* v = &m_vertex.buff[0];
|
GSVertex* v = &m_vertex.buff[0];
|
||||||
const uint16 offset = (uint16)m_r.y * 16;
|
const u16 offset = (u16)m_r.y * 16;
|
||||||
|
|
||||||
for (size_t i = 0; i < count; i++)
|
for (size_t i = 0; i < count; i++)
|
||||||
v[i].V += offset;
|
v[i].V += offset;
|
||||||
|
@ -1978,7 +1978,7 @@ bool GSRendererHW::OI_DBZBTGames(GSTexture* rt, GSTexture* ds, GSTextureCache::S
|
||||||
|
|
||||||
bool GSRendererHW::OI_FFXII(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
|
bool GSRendererHW::OI_FFXII(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
|
||||||
{
|
{
|
||||||
static uint32* video = NULL;
|
static u32* video = NULL;
|
||||||
static size_t lines = 0;
|
static size_t lines = 0;
|
||||||
|
|
||||||
if (lines == 0)
|
if (lines == 0)
|
||||||
|
@ -1997,7 +1997,7 @@ bool GSRendererHW::OI_FFXII(GSTexture* rt, GSTexture* ds, GSTextureCache::Source
|
||||||
// incoming pixels are stored in columns, one column is 16x512, total res 448x512 or 448x454
|
// incoming pixels are stored in columns, one column is 16x512, total res 448x512 or 448x454
|
||||||
|
|
||||||
if (!video)
|
if (!video)
|
||||||
video = new uint32[512 * 512];
|
video = new u32[512 * 512];
|
||||||
|
|
||||||
const int ox = m_context->XYOFFSET.OFX - 8;
|
const int ox = m_context->XYOFFSET.OFX - 8;
|
||||||
const int oy = m_context->XYOFFSET.OFY - 8;
|
const int oy = m_context->XYOFFSET.OFY - 8;
|
||||||
|
@ -2012,7 +2012,7 @@ bool GSRendererHW::OI_FFXII(GSTexture* rt, GSTexture* ds, GSTextureCache::Source
|
||||||
if (x < 0 || x >= 448 || y < 0 || y >= (int)lines)
|
if (x < 0 || x >= 448 || y < 0 || y >= (int)lines)
|
||||||
return false; // le sigh
|
return false; // le sigh
|
||||||
|
|
||||||
video[(y << 8) + (y << 7) + (y << 6) + x] = v->RGBAQ.u32[0];
|
video[(y << 8) + (y << 7) + (y << 6) + x] = v->RGBAQ.U32[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
|
@ -2062,9 +2062,9 @@ bool GSRendererHW::OI_FFXII(GSTexture* rt, GSTexture* ds, GSTextureCache::Source
|
||||||
|
|
||||||
bool GSRendererHW::OI_FFX(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
|
bool GSRendererHW::OI_FFX(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
|
||||||
{
|
{
|
||||||
const uint32 FBP = m_context->FRAME.Block();
|
const u32 FBP = m_context->FRAME.Block();
|
||||||
const uint32 ZBP = m_context->ZBUF.Block();
|
const u32 ZBP = m_context->ZBUF.Block();
|
||||||
const uint32 TBP = m_context->TEX0.TBP0;
|
const u32 TBP = m_context->TEX0.TBP0;
|
||||||
|
|
||||||
if ((FBP == 0x00d00 || FBP == 0x00000) && ZBP == 0x02100 && PRIM->TME && TBP == 0x01a00 && m_context->TEX0.PSM == PSM_PSMCT16S)
|
if ((FBP == 0x00d00 || FBP == 0x00000) && ZBP == 0x02100 && PRIM->TME && TBP == 0x01a00 && m_context->TEX0.PSM == PSM_PSMCT16S)
|
||||||
{
|
{
|
||||||
|
@ -2086,15 +2086,15 @@ bool GSRendererHW::OI_MetalSlug6(GSTexture* rt, GSTexture* ds, GSTextureCache::S
|
||||||
|
|
||||||
for (int i = (int)m_vertex.next; i > 0; i--, v++)
|
for (int i = (int)m_vertex.next; i > 0; i--, v++)
|
||||||
{
|
{
|
||||||
const uint32 c = v->RGBAQ.u32[0];
|
const u32 c = v->RGBAQ.U32[0];
|
||||||
|
|
||||||
const uint32 r = (c >> 0) & 0xff;
|
const u32 r = (c >> 0) & 0xff;
|
||||||
const uint32 g = (c >> 8) & 0xff;
|
const u32 g = (c >> 8) & 0xff;
|
||||||
const uint32 b = (c >> 16) & 0xff;
|
const u32 b = (c >> 16) & 0xff;
|
||||||
|
|
||||||
if (r == 0 && g != 0 && b != 0)
|
if (r == 0 && g != 0 && b != 0)
|
||||||
{
|
{
|
||||||
v->RGBAQ.u32[0] = (c & 0xffffff00) | ((g + b + 1) >> 1);
|
v->RGBAQ.U32[0] = (c & 0xffffff00) | ((g + b + 1) >> 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2107,8 +2107,8 @@ bool GSRendererHW::OI_RozenMaidenGebetGarden(GSTexture* rt, GSTexture* ds, GSTex
|
||||||
{
|
{
|
||||||
if (!PRIM->TME)
|
if (!PRIM->TME)
|
||||||
{
|
{
|
||||||
const uint32 FBP = m_context->FRAME.Block();
|
const u32 FBP = m_context->FRAME.Block();
|
||||||
const uint32 ZBP = m_context->ZBUF.Block();
|
const u32 ZBP = m_context->ZBUF.Block();
|
||||||
|
|
||||||
if (FBP == 0x008c0 && ZBP == 0x01a40)
|
if (FBP == 0x008c0 && ZBP == 0x01a40)
|
||||||
{
|
{
|
||||||
|
@ -2193,8 +2193,8 @@ bool GSRendererHW::OI_PointListPalette(GSTexture* rt, GSTexture* ds, GSTextureCa
|
||||||
{
|
{
|
||||||
if (m_vt.m_primclass == GS_POINT_CLASS && !PRIM->TME)
|
if (m_vt.m_primclass == GS_POINT_CLASS && !PRIM->TME)
|
||||||
{
|
{
|
||||||
const uint32 FBP = m_context->FRAME.Block();
|
const u32 FBP = m_context->FRAME.Block();
|
||||||
const uint32 FBW = m_context->FRAME.FBW;
|
const u32 FBW = m_context->FRAME.FBW;
|
||||||
|
|
||||||
if (FBP >= 0x03f40 && (FBP & 0x1f) == 0)
|
if (FBP >= 0x03f40 && (FBP & 0x1f) == 0)
|
||||||
{
|
{
|
||||||
|
@ -2204,12 +2204,12 @@ bool GSRendererHW::OI_PointListPalette(GSTexture* rt, GSTexture* ds, GSTextureCa
|
||||||
|
|
||||||
for (int i = 0; i < 16; i++, v++)
|
for (int i = 0; i < 16; i++, v++)
|
||||||
{
|
{
|
||||||
uint32 c = v->RGBAQ.u32[0];
|
u32 c = v->RGBAQ.U32[0];
|
||||||
const uint32 a = c >> 24;
|
const u32 a = c >> 24;
|
||||||
|
|
||||||
c = (a >= 0x80 ? 0xff000000 : (a << 25)) | (c & 0x00ffffff);
|
c = (a >= 0x80 ? 0xff000000 : (a << 25)) | (c & 0x00ffffff);
|
||||||
|
|
||||||
v->RGBAQ.u32[0] = c;
|
v->RGBAQ.U32[0] = c;
|
||||||
|
|
||||||
m_mem.WritePixel32(i & 7, i >> 3, c, FBP, FBW);
|
m_mem.WritePixel32(i & 7, i >> 3, c, FBP, FBW);
|
||||||
}
|
}
|
||||||
|
@ -2224,12 +2224,12 @@ bool GSRendererHW::OI_PointListPalette(GSTexture* rt, GSTexture* ds, GSTextureCa
|
||||||
|
|
||||||
for (int i = 0; i < 256; i++, v++)
|
for (int i = 0; i < 256; i++, v++)
|
||||||
{
|
{
|
||||||
uint32 c = v->RGBAQ.u32[0];
|
u32 c = v->RGBAQ.U32[0];
|
||||||
const uint32 a = c >> 24;
|
const u32 a = c >> 24;
|
||||||
|
|
||||||
c = (a >= 0x80 ? 0xff000000 : (a << 25)) | (c & 0x00ffffff);
|
c = (a >= 0x80 ? 0xff000000 : (a << 25)) | (c & 0x00ffffff);
|
||||||
|
|
||||||
v->RGBAQ.u32[0] = c;
|
v->RGBAQ.U32[0] = c;
|
||||||
|
|
||||||
m_mem.WritePixel32(i & 15, i >> 4, c, FBP, FBW);
|
m_mem.WritePixel32(i & 15, i >> 4, c, FBP, FBW);
|
||||||
}
|
}
|
||||||
|
@ -2329,7 +2329,7 @@ void GSRendererHW::OO_MajokkoALaMode2()
|
||||||
{
|
{
|
||||||
// palette readback
|
// palette readback
|
||||||
|
|
||||||
const uint32 FBP = m_context->FRAME.Block();
|
const u32 FBP = m_context->FRAME.Block();
|
||||||
|
|
||||||
if (!PRIM->TME && FBP == 0x03f40)
|
if (!PRIM->TME && FBP == 0x03f40)
|
||||||
{
|
{
|
||||||
|
@ -2349,7 +2349,7 @@ bool GSRendererHW::CU_MajokkoALaMode2()
|
||||||
{
|
{
|
||||||
// palette should stay 16 x 16
|
// palette should stay 16 x 16
|
||||||
|
|
||||||
const uint32 FBP = m_context->FRAME.Block();
|
const u32 FBP = m_context->FRAME.Block();
|
||||||
|
|
||||||
return FBP != 0x03f40;
|
return FBP != 0x03f40;
|
||||||
}
|
}
|
||||||
|
@ -2358,7 +2358,7 @@ bool GSRendererHW::CU_TalesOfAbyss()
|
||||||
{
|
{
|
||||||
// full image blur and brightening
|
// full image blur and brightening
|
||||||
|
|
||||||
const uint32 FBP = m_context->FRAME.Block();
|
const u32 FBP = m_context->FRAME.Block();
|
||||||
|
|
||||||
return FBP != 0x036e0 && FBP != 0x03560 && FBP != 0x038e0;
|
return FBP != 0x036e0 && FBP != 0x03560 && FBP != 0x038e0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -84,11 +84,11 @@ private:
|
||||||
};
|
};
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
class FunctionMap : public GSFunctionMap<uint32, T>
|
class FunctionMap : public GSFunctionMap<u32, T>
|
||||||
{
|
{
|
||||||
std::list<HackEntry<T>>& m_tbl;
|
std::list<HackEntry<T>>& m_tbl;
|
||||||
|
|
||||||
T GetDefaultFunction(uint32 key)
|
T GetDefaultFunction(u32 key)
|
||||||
{
|
{
|
||||||
CRC::Title title = (CRC::Title)(key & 0xffffff);
|
CRC::Title title = (CRC::Title)(key & 0xffffff);
|
||||||
CRC::Region region = (CRC::Region)(key >> 24);
|
CRC::Region region = (CRC::Region)(key >> 24);
|
||||||
|
@ -132,7 +132,7 @@ private:
|
||||||
|
|
||||||
#pragma endregion
|
#pragma endregion
|
||||||
|
|
||||||
uint16 Interpolate_UV(float alpha, int t0, int t1);
|
u16 Interpolate_UV(float alpha, int t0, int t1);
|
||||||
float alpha0(int L, int X0, int X1);
|
float alpha0(int L, int X0, int X1);
|
||||||
float alpha1(int L, int X0, int X1);
|
float alpha1(int L, int X0, int X1);
|
||||||
void SwSpriteRender();
|
void SwSpriteRender();
|
||||||
|
@ -168,13 +168,13 @@ public:
|
||||||
GSRendererHW(GSTextureCache* tc);
|
GSRendererHW(GSTextureCache* tc);
|
||||||
virtual ~GSRendererHW();
|
virtual ~GSRendererHW();
|
||||||
|
|
||||||
void SetGameCRC(uint32 crc, int options);
|
void SetGameCRC(u32 crc, int options);
|
||||||
bool CanUpscale();
|
bool CanUpscale();
|
||||||
int GetUpscaleMultiplier();
|
int GetUpscaleMultiplier();
|
||||||
GSVector2i GetCustomResolution();
|
GSVector2i GetCustomResolution();
|
||||||
void SetScaling();
|
void SetScaling();
|
||||||
void Lines2Sprites();
|
void Lines2Sprites();
|
||||||
void EmulateAtst(GSVector4& FogColor_AREF, uint8& atst, const bool pass_2);
|
void EmulateAtst(GSVector4& FogColor_AREF, u8& atst, const bool pass_2);
|
||||||
void ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba);
|
void ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba);
|
||||||
GSVector4 RealignTargetTextureCoordinate(const GSTextureCache::Source* tex);
|
GSVector4 RealignTargetTextureCoordinate(const GSTextureCache::Source* tex);
|
||||||
GSVector4i ComputeBoundingBox(const GSVector2& rtscale, const GSVector2i& rtsize);
|
GSVector4i ComputeBoundingBox(const GSVector2& rtscale, const GSVector2i& rtsize);
|
||||||
|
|
|
@ -54,7 +54,7 @@ GSTextureCache::GSTextureCache(GSRenderer* r)
|
||||||
// In theory 4MB is enough but 9MB is safer for overflow (8MB
|
// In theory 4MB is enough but 9MB is safer for overflow (8MB
|
||||||
// isn't enough in custom resolution)
|
// isn't enough in custom resolution)
|
||||||
// Test: onimusha 3 PAL 60Hz
|
// Test: onimusha 3 PAL 60Hz
|
||||||
m_temp = (uint8*)_aligned_malloc(9 * 1024 * 1024, 32);
|
m_temp = (u8*)_aligned_malloc(9 * 1024 * 1024, 32);
|
||||||
|
|
||||||
m_texture_inside_rt_cache.reserve(m_texture_inside_rt_cache_size);
|
m_texture_inside_rt_cache.reserve(m_texture_inside_rt_cache_size);
|
||||||
}
|
}
|
||||||
|
@ -119,8 +119,8 @@ GSTextureCache::Source* GSTextureCache::LookupDepthSource(const GIFRegTEX0& TEX0
|
||||||
Target* dst = NULL;
|
Target* dst = NULL;
|
||||||
|
|
||||||
// Check only current frame, I guess it is only used as a postprocessing effect
|
// Check only current frame, I guess it is only used as a postprocessing effect
|
||||||
uint32 bp = TEX0.TBP0;
|
u32 bp = TEX0.TBP0;
|
||||||
uint32 psm = TEX0.PSM;
|
u32 psm = TEX0.PSM;
|
||||||
|
|
||||||
for (auto t : m_dst[DepthStencil])
|
for (auto t : m_dst[DepthStencil])
|
||||||
{
|
{
|
||||||
|
@ -224,7 +224,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
|
||||||
if (psm_s.pal > 0)
|
if (psm_s.pal > 0)
|
||||||
m_renderer->m_mem.m_clut.Read32(TEX0, TEXA);
|
m_renderer->m_mem.m_clut.Read32(TEX0, TEXA);
|
||||||
|
|
||||||
const uint32* clut = m_renderer->m_mem.m_clut;
|
const u32* clut = m_renderer->m_mem.m_clut;
|
||||||
|
|
||||||
Source* src = NULL;
|
Source* src = NULL;
|
||||||
|
|
||||||
|
@ -234,7 +234,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
|
||||||
{
|
{
|
||||||
Source* s = *i;
|
Source* s = *i;
|
||||||
|
|
||||||
if (((TEX0.u32[0] ^ s->m_TEX0.u32[0]) | ((TEX0.u32[1] ^ s->m_TEX0.u32[1]) & 3)) != 0) // TBP0 TBW PSM TW TH
|
if (((TEX0.U32[0] ^ s->m_TEX0.U32[0]) | ((TEX0.U32[1] ^ s->m_TEX0.U32[1]) & 3)) != 0) // TBP0 TBW PSM TW TH
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
// Target are converted (AEM & palette) on the fly by the GPU. They don't need extra check
|
// Target are converted (AEM & palette) on the fly by the GPU. They don't need extra check
|
||||||
|
@ -248,7 +248,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
|
||||||
|
|
||||||
// We request a 24/16 bit RGBA texture. Alpha expansion was done by
|
// We request a 24/16 bit RGBA texture. Alpha expansion was done by
|
||||||
// the CPU. We need to check that TEXA is identical
|
// the CPU. We need to check that TEXA is identical
|
||||||
if (psm_s.pal == 0 && psm_s.fmt > 0 && s->m_TEXA.u64 != TEXA.u64)
|
if (psm_s.pal == 0 && psm_s.fmt > 0 && s->m_TEXA.U64 != TEXA.U64)
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -270,13 +270,13 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
|
||||||
if (src == NULL)
|
if (src == NULL)
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
uint32 bp = TEX0.TBP0;
|
u32 bp = TEX0.TBP0;
|
||||||
uint32 psm = TEX0.PSM;
|
u32 psm = TEX0.PSM;
|
||||||
|
|
||||||
uint32 bw = TEX0.TBW;
|
u32 bw = TEX0.TBW;
|
||||||
int tw = 1 << TEX0.TW;
|
int tw = 1 << TEX0.TW;
|
||||||
int th = 1 << TEX0.TH;
|
int th = 1 << TEX0.TH;
|
||||||
uint32 bp_end = psm_s.info.bn(tw - 1, th - 1, bp, bw); // Valid only for color formats
|
u32 bp_end = psm_s.info.bn(tw - 1, th - 1, bp, bw); // Valid only for color formats
|
||||||
|
|
||||||
// Arc the Lad finds the wrong surface here when looking for a depth stencil.
|
// Arc the Lad finds the wrong surface here when looking for a depth stencil.
|
||||||
// Since we're currently not caching depth stencils (check ToDo in CreateSource) we should not look for it here.
|
// Since we're currently not caching depth stencils (check ToDo in CreateSource) we should not look for it here.
|
||||||
|
@ -297,7 +297,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
|
||||||
// because of the previous draw call format
|
// because of the previous draw call format
|
||||||
//
|
//
|
||||||
// Solution: consider the RT as 32 bits if the alpha was used in the past
|
// Solution: consider the RT as 32 bits if the alpha was used in the past
|
||||||
uint32 t_psm = (t->m_dirty_alpha) ? t->m_TEX0.PSM & ~0x1 : t->m_TEX0.PSM;
|
u32 t_psm = (t->m_dirty_alpha) ? t->m_TEX0.PSM & ~0x1 : t->m_TEX0.PSM;
|
||||||
|
|
||||||
if (GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t_psm))
|
if (GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t_psm))
|
||||||
{
|
{
|
||||||
|
@ -376,7 +376,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
|
||||||
{
|
{
|
||||||
if (candidate_x_offset == 0 && candidate_y_offset == 0)
|
if (candidate_x_offset == 0 && candidate_y_offset == 0)
|
||||||
continue;
|
continue;
|
||||||
uint32 candidate_bp = psm_s.info.bn(candidate_x_offset, candidate_y_offset, t->m_TEX0.TBP0, bw);
|
u32 candidate_bp = psm_s.info.bn(candidate_x_offset, candidate_y_offset, t->m_TEX0.TBP0, bw);
|
||||||
if (bp == candidate_bp && bp_end <= t->m_end_block)
|
if (bp == candidate_bp && bp_end <= t->m_end_block)
|
||||||
{
|
{
|
||||||
// SWEEP HIT: <x,y> offset found
|
// SWEEP HIT: <x,y> offset found
|
||||||
|
@ -442,8 +442,8 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
|
||||||
if (psm_s.bpp > 8)
|
if (psm_s.bpp > 8)
|
||||||
{
|
{
|
||||||
GIFRegTEX0 depth_TEX0;
|
GIFRegTEX0 depth_TEX0;
|
||||||
depth_TEX0.u32[0] = TEX0.u32[0] | (0x30u << 20u);
|
depth_TEX0.U32[0] = TEX0.U32[0] | (0x30u << 20u);
|
||||||
depth_TEX0.u32[1] = TEX0.u32[1];
|
depth_TEX0.U32[1] = TEX0.U32[1];
|
||||||
return LookupDepthSource(depth_TEX0, TEXA, r);
|
return LookupDepthSource(depth_TEX0, TEXA, r);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -521,10 +521,10 @@ bool GSTextureCache::ShallSearchTextureInsideRt()
|
||||||
return m_texture_inside_rt || (m_renderer->m_game.flags & CRC::Flags::TextureInsideRt);
|
return m_texture_inside_rt || (m_renderer->m_game.flags & CRC::Flags::TextureInsideRt);
|
||||||
}
|
}
|
||||||
|
|
||||||
GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int w, int h, int type, bool used, uint32 fbmask)
|
GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int w, int h, int type, bool used, u32 fbmask)
|
||||||
{
|
{
|
||||||
const GSLocalMemory::psm_t& psm_s = GSLocalMemory::m_psm[TEX0.PSM];
|
const GSLocalMemory::psm_t& psm_s = GSLocalMemory::m_psm[TEX0.PSM];
|
||||||
uint32 bp = TEX0.TBP0;
|
u32 bp = TEX0.TBP0;
|
||||||
|
|
||||||
Target* dst = NULL;
|
Target* dst = NULL;
|
||||||
|
|
||||||
|
@ -655,7 +655,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int
|
||||||
|
|
||||||
GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int w, int h, int real_h)
|
GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int w, int h, int real_h)
|
||||||
{
|
{
|
||||||
uint32 bp = TEX0.TBP0;
|
u32 bp = TEX0.TBP0;
|
||||||
|
|
||||||
Target* dst = NULL;
|
Target* dst = NULL;
|
||||||
|
|
||||||
|
@ -771,7 +771,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int
|
||||||
// Goal: Depth And Target at the same address is not possible. On GS it is
|
// Goal: Depth And Target at the same address is not possible. On GS it is
|
||||||
// the same memory but not on the Dx/GL. Therefore a write to the Depth/Target
|
// the same memory but not on the Dx/GL. Therefore a write to the Depth/Target
|
||||||
// must invalidate the Target/Depth respectively
|
// must invalidate the Target/Depth respectively
|
||||||
void GSTextureCache::InvalidateVideoMemType(int type, uint32 bp)
|
void GSTextureCache::InvalidateVideoMemType(int type, u32 bp)
|
||||||
{
|
{
|
||||||
if (!m_can_convert_depth)
|
if (!m_can_convert_depth)
|
||||||
return;
|
return;
|
||||||
|
@ -799,9 +799,9 @@ void GSTextureCache::InvalidateVideoMemType(int type, uint32 bp)
|
||||||
// Called each time you want to write to the GS memory
|
// Called each time you want to write to the GS memory
|
||||||
void GSTextureCache::InvalidateVideoMem(const GSOffset& off, const GSVector4i& rect, bool target)
|
void GSTextureCache::InvalidateVideoMem(const GSOffset& off, const GSVector4i& rect, bool target)
|
||||||
{
|
{
|
||||||
uint32 bp = off.bp();
|
u32 bp = off.bp();
|
||||||
uint32 bw = off.bw();
|
u32 bw = off.bw();
|
||||||
uint32 psm = off.psm();
|
u32 psm = off.psm();
|
||||||
|
|
||||||
if (!target)
|
if (!target)
|
||||||
{
|
{
|
||||||
|
@ -820,7 +820,7 @@ void GSTextureCache::InvalidateVideoMem(const GSOffset& off, const GSVector4i& r
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32 bbp = bp + bw * 0x10;
|
u32 bbp = bp + bw * 0x10;
|
||||||
if (bw >= 16 && bbp < 16384)
|
if (bw >= 16 && bbp < 16384)
|
||||||
{
|
{
|
||||||
// Detect half of the render target (fix snow engine game)
|
// Detect half of the render target (fix snow engine game)
|
||||||
|
@ -844,7 +844,7 @@ void GSTextureCache::InvalidateVideoMem(const GSOffset& off, const GSVector4i& r
|
||||||
// we are screwed.
|
// we are screwed.
|
||||||
if (m_renderer->m_game.title == CRC::HauntingGround)
|
if (m_renderer->m_game.title == CRC::HauntingGround)
|
||||||
{
|
{
|
||||||
uint32 end_block = GSLocalMemory::m_psm[psm].info.bn(rect.z - 1, rect.w - 1, bp, bw); // Valid only for color formats
|
u32 end_block = GSLocalMemory::m_psm[psm].info.bn(rect.z - 1, rect.w - 1, bp, bw); // Valid only for color formats
|
||||||
auto type = RenderTarget;
|
auto type = RenderTarget;
|
||||||
|
|
||||||
for (auto t : m_dst[type])
|
for (auto t : m_dst[type])
|
||||||
|
@ -872,7 +872,7 @@ void GSTextureCache::InvalidateVideoMem(const GSOffset& off, const GSVector4i& r
|
||||||
|
|
||||||
GSVector4i r = rect.ralign<Align_Outside>((bp & 31) == 0 ? GSLocalMemory::m_psm[psm].pgs : GSLocalMemory::m_psm[psm].bs);
|
GSVector4i r = rect.ralign<Align_Outside>((bp & 31) == 0 ? GSLocalMemory::m_psm[psm].pgs : GSLocalMemory::m_psm[psm].bs);
|
||||||
|
|
||||||
off.loopPages(rect, [&](uint32 page)
|
off.loopPages(rect, [&](u32 page)
|
||||||
{
|
{
|
||||||
auto& list = m_src.m_map[page];
|
auto& list = m_src.m_map[page];
|
||||||
for (auto i = list.begin(); i != list.end();)
|
for (auto i = list.begin(); i != list.end();)
|
||||||
|
@ -892,7 +892,7 @@ void GSTextureCache::InvalidateVideoMem(const GSOffset& off, const GSVector4i& r
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
uint32* RESTRICT valid = s->m_valid;
|
u32* RESTRICT valid = s->m_valid;
|
||||||
|
|
||||||
// Invalidate data of input texture
|
// Invalidate data of input texture
|
||||||
if (s->m_repeating)
|
if (s->m_repeating)
|
||||||
|
@ -982,8 +982,8 @@ void GSTextureCache::InvalidateVideoMem(const GSOffset& off, const GSVector4i& r
|
||||||
{
|
{
|
||||||
if (bp < t->m_TEX0.TBP0)
|
if (bp < t->m_TEX0.TBP0)
|
||||||
{
|
{
|
||||||
uint32 rowsize = bw * 8192;
|
u32 rowsize = bw * 8192;
|
||||||
uint32 offset = (uint32)((t->m_TEX0.TBP0 - bp) * 256);
|
u32 offset = (u32)((t->m_TEX0.TBP0 - bp) * 256);
|
||||||
|
|
||||||
if (rowsize > 0 && offset % rowsize == 0)
|
if (rowsize > 0 && offset % rowsize == 0)
|
||||||
{
|
{
|
||||||
|
@ -1011,8 +1011,8 @@ void GSTextureCache::InvalidateVideoMem(const GSOffset& off, const GSVector4i& r
|
||||||
// (128 pixels) target
|
// (128 pixels) target
|
||||||
if (bw > 2 && t->m_TEX0.TBW == bw && t->Inside(bp, bw, psm, rect) && GSUtil::HasCompatibleBits(psm, t->m_TEX0.PSM))
|
if (bw > 2 && t->m_TEX0.TBW == bw && t->Inside(bp, bw, psm, rect) && GSUtil::HasCompatibleBits(psm, t->m_TEX0.PSM))
|
||||||
{
|
{
|
||||||
uint32 rowsize = bw * 8192u;
|
u32 rowsize = bw * 8192u;
|
||||||
uint32 offset = (uint32)((bp - t->m_TEX0.TBP0) * 256);
|
u32 offset = (u32)((bp - t->m_TEX0.TBP0) * 256);
|
||||||
|
|
||||||
if (rowsize > 0 && offset % rowsize == 0)
|
if (rowsize > 0 && offset % rowsize == 0)
|
||||||
{
|
{
|
||||||
|
@ -1038,9 +1038,9 @@ void GSTextureCache::InvalidateVideoMem(const GSOffset& off, const GSVector4i& r
|
||||||
// Called each time you want to read from the GS memory
|
// Called each time you want to read from the GS memory
|
||||||
void GSTextureCache::InvalidateLocalMem(const GSOffset& off, const GSVector4i& r)
|
void GSTextureCache::InvalidateLocalMem(const GSOffset& off, const GSVector4i& r)
|
||||||
{
|
{
|
||||||
uint32 bp = off.bp();
|
u32 bp = off.bp();
|
||||||
uint32 psm = off.psm();
|
u32 psm = off.psm();
|
||||||
//uint32 bw = off->bw;
|
//u32 bw = off->bw;
|
||||||
|
|
||||||
// No depth handling please.
|
// No depth handling please.
|
||||||
if (psm == PSM_PSMZ32 || psm == PSM_PSMZ24 || psm == PSM_PSMZ16 || psm == PSM_PSMZ16S)
|
if (psm == PSM_PSMZ32 || psm == PSM_PSMZ24 || psm == PSM_PSMZ16 || psm == PSM_PSMZ16S)
|
||||||
|
@ -1155,8 +1155,8 @@ void GSTextureCache::InvalidateLocalMem(const GSOffset& off, const GSVector4i& r
|
||||||
// && ((t->m_TEX0.TBP0 == 0) || (t->m_TEX0.TBP0==3328) || (t->m_TEX0.TBP0==3584)))
|
// && ((t->m_TEX0.TBP0 == 0) || (t->m_TEX0.TBP0==3328) || (t->m_TEX0.TBP0==3584)))
|
||||||
// {
|
// {
|
||||||
// //printf("first : %d-%d child : %d-%d\n", psm, bp, t->m_TEX0.PSM, t->m_TEX0.TBP0);
|
// //printf("first : %d-%d child : %d-%d\n", psm, bp, t->m_TEX0.PSM, t->m_TEX0.TBP0);
|
||||||
// uint32 rowsize = bw * 8192;
|
// u32 rowsize = bw * 8192;
|
||||||
// uint32 offset = (uint32)((bp - t->m_TEX0.TBP0) * 256);
|
// u32 offset = (u32)((bp - t->m_TEX0.TBP0) * 256);
|
||||||
|
|
||||||
// if (rowsize > 0 && offset % rowsize == 0)
|
// if (rowsize > 0 && offset % rowsize == 0)
|
||||||
// {
|
// {
|
||||||
|
@ -1656,10 +1656,10 @@ GSTextureCache::Target* GSTextureCache::CreateTarget(const GIFRegTEX0& TEX0, int
|
||||||
void GSTextureCache::PrintMemoryUsage()
|
void GSTextureCache::PrintMemoryUsage()
|
||||||
{
|
{
|
||||||
#ifdef ENABLE_OGL_DEBUG
|
#ifdef ENABLE_OGL_DEBUG
|
||||||
uint32 tex = 0;
|
u32 tex = 0;
|
||||||
uint32 tex_rt = 0;
|
u32 tex_rt = 0;
|
||||||
uint32 rt = 0;
|
u32 rt = 0;
|
||||||
uint32 dss = 0;
|
u32 dss = 0;
|
||||||
for (auto s : m_src.m_surfaces)
|
for (auto s : m_src.m_surfaces)
|
||||||
{
|
{
|
||||||
if (s && !s->m_shared_texture)
|
if (s && !s->m_shared_texture)
|
||||||
|
@ -1687,7 +1687,7 @@ void GSTextureCache::PrintMemoryUsage()
|
||||||
|
|
||||||
// GSTextureCache::Surface
|
// GSTextureCache::Surface
|
||||||
|
|
||||||
GSTextureCache::Surface::Surface(GSRenderer* r, uint8* temp)
|
GSTextureCache::Surface::Surface(GSRenderer* r, u8* temp)
|
||||||
: m_renderer(r)
|
: m_renderer(r)
|
||||||
, m_texture(NULL)
|
, m_texture(NULL)
|
||||||
, m_age(0)
|
, m_age(0)
|
||||||
|
@ -1712,24 +1712,24 @@ void GSTextureCache::Surface::UpdateAge()
|
||||||
m_age = 0;
|
m_age = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool GSTextureCache::Surface::Inside(uint32 bp, uint32 bw, uint32 psm, const GSVector4i& rect)
|
bool GSTextureCache::Surface::Inside(u32 bp, u32 bw, u32 psm, const GSVector4i& rect)
|
||||||
{
|
{
|
||||||
// Valid only for color formats.
|
// Valid only for color formats.
|
||||||
uint32 const end_block = GSLocalMemory::m_psm[psm].info.bn(rect.z - 1, rect.w - 1, bp, bw);
|
u32 const end_block = GSLocalMemory::m_psm[psm].info.bn(rect.z - 1, rect.w - 1, bp, bw);
|
||||||
return bp >= m_TEX0.TBP0 && end_block <= m_end_block;
|
return bp >= m_TEX0.TBP0 && end_block <= m_end_block;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool GSTextureCache::Surface::Overlaps(uint32 bp, uint32 bw, uint32 psm, const GSVector4i& rect)
|
bool GSTextureCache::Surface::Overlaps(u32 bp, u32 bw, u32 psm, const GSVector4i& rect)
|
||||||
{
|
{
|
||||||
// Valid only for color formats.
|
// Valid only for color formats.
|
||||||
uint32 const end_block = GSLocalMemory::m_psm[psm].info.bn(rect.z - 1, rect.w - 1, bp, bw);
|
u32 const end_block = GSLocalMemory::m_psm[psm].info.bn(rect.z - 1, rect.w - 1, bp, bw);
|
||||||
return (m_TEX0.TBP0 <= bp && bp <= m_end_block)
|
return (m_TEX0.TBP0 <= bp && bp <= m_end_block)
|
||||||
|| (m_TEX0.TBP0 <= end_block && end_block <= m_end_block);
|
|| (m_TEX0.TBP0 <= end_block && end_block <= m_end_block);
|
||||||
}
|
}
|
||||||
|
|
||||||
// GSTextureCache::Source
|
// GSTextureCache::Source
|
||||||
|
|
||||||
GSTextureCache::Source::Source(GSRenderer* r, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, uint8* temp, bool dummy_container)
|
GSTextureCache::Source::Source(GSRenderer* r, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, u8* temp, bool dummy_container)
|
||||||
: Surface(r, temp)
|
: Surface(r, temp)
|
||||||
, m_palette_obj(nullptr)
|
, m_palette_obj(nullptr)
|
||||||
, m_palette(nullptr)
|
, m_palette(nullptr)
|
||||||
|
@ -1801,7 +1801,7 @@ void GSTextureCache::Source::Update(const GSVector4i& rect, int layer)
|
||||||
const GSOffset& off = m_renderer->m_context->offset.tex;
|
const GSOffset& off = m_renderer->m_context->offset.tex;
|
||||||
GSOffset::BNHelper bn = off.bnMulti(r.left, r.top);
|
GSOffset::BNHelper bn = off.bnMulti(r.left, r.top);
|
||||||
|
|
||||||
uint32 blocks = 0;
|
u32 blocks = 0;
|
||||||
|
|
||||||
if (m_repeating)
|
if (m_repeating)
|
||||||
{
|
{
|
||||||
|
@ -1810,14 +1810,14 @@ void GSTextureCache::Source::Update(const GSVector4i& rect, int layer)
|
||||||
for (int x = r.left; x < r.right; bn.nextBlockX(), x += bs.x)
|
for (int x = r.left; x < r.right; bn.nextBlockX(), x += bs.x)
|
||||||
{
|
{
|
||||||
int i = (bn.blkY() << 7) + bn.blkX();
|
int i = (bn.blkY() << 7) + bn.blkX();
|
||||||
uint32 block = bn.valueNoWrap();
|
u32 block = bn.valueNoWrap();
|
||||||
|
|
||||||
if (block < MAX_BLOCKS || m_wrap_gs_mem)
|
if (block < MAX_BLOCKS || m_wrap_gs_mem)
|
||||||
{
|
{
|
||||||
uint32 addr = i % MAX_BLOCKS;
|
u32 addr = i % MAX_BLOCKS;
|
||||||
|
|
||||||
uint32 row = addr >> 5u;
|
u32 row = addr >> 5u;
|
||||||
uint32 col = 1 << (addr & 31u);
|
u32 col = 1 << (addr & 31u);
|
||||||
|
|
||||||
if ((m_valid[row] & col) == 0)
|
if ((m_valid[row] & col) == 0)
|
||||||
{
|
{
|
||||||
|
@ -1837,14 +1837,14 @@ void GSTextureCache::Source::Update(const GSVector4i& rect, int layer)
|
||||||
{
|
{
|
||||||
for (int x = r.left; x < r.right; x += bs.x, bn.nextBlockX())
|
for (int x = r.left; x < r.right; x += bs.x, bn.nextBlockX())
|
||||||
{
|
{
|
||||||
uint32 block = bn.valueNoWrap();
|
u32 block = bn.valueNoWrap();
|
||||||
|
|
||||||
if (block < MAX_BLOCKS || m_wrap_gs_mem)
|
if (block < MAX_BLOCKS || m_wrap_gs_mem)
|
||||||
{
|
{
|
||||||
block %= MAX_BLOCKS;
|
block %= MAX_BLOCKS;
|
||||||
|
|
||||||
uint32 row = block >> 5u;
|
u32 row = block >> 5u;
|
||||||
uint32 col = 1 << (block & 31u);
|
u32 col = 1 << (block & 31u);
|
||||||
|
|
||||||
if ((m_valid[row] & col) == 0)
|
if ((m_valid[row] & col) == 0)
|
||||||
{
|
{
|
||||||
|
@ -1921,7 +1921,7 @@ void GSTextureCache::Source::Write(const GSVector4i& r, int layer)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSTextureCache::Source::Flush(uint32 count, int layer)
|
void GSTextureCache::Source::Flush(u32 count, int layer)
|
||||||
{
|
{
|
||||||
// This function as written will not work for paletted formats copied from framebuffers
|
// This function as written will not work for paletted formats copied from framebuffers
|
||||||
// because they are 8 or 4 bit formats on the GS and the GS local memory module reads
|
// because they are 8 or 4 bit formats on the GS and the GS local memory module reads
|
||||||
|
@ -1935,7 +1935,7 @@ void GSTextureCache::Source::Flush(uint32 count, int layer)
|
||||||
|
|
||||||
GSVector4i tr(0, 0, tw, th);
|
GSVector4i tr(0, 0, tw, th);
|
||||||
|
|
||||||
int pitch = std::max(tw, psm.bs.x) * sizeof(uint32);
|
int pitch = std::max(tw, psm.bs.x) * sizeof(u32);
|
||||||
|
|
||||||
GSLocalMemory& mem = m_renderer->m_mem;
|
GSLocalMemory& mem = m_renderer->m_mem;
|
||||||
|
|
||||||
|
@ -1949,9 +1949,9 @@ void GSTextureCache::Source::Flush(uint32 count, int layer)
|
||||||
rtx = psm.rtxP;
|
rtx = psm.rtxP;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint8* buff = m_temp;
|
u8* buff = m_temp;
|
||||||
|
|
||||||
for (uint32 i = 0; i < count; i++)
|
for (u32 i = 0; i < count; i++)
|
||||||
{
|
{
|
||||||
GSVector4i r = m_write.rect[i];
|
GSVector4i r = m_write.rect[i];
|
||||||
|
|
||||||
|
@ -1996,7 +1996,7 @@ bool GSTextureCache::Source::ClutMatch(PaletteKey palette_key)
|
||||||
|
|
||||||
// GSTextureCache::Target
|
// GSTextureCache::Target
|
||||||
|
|
||||||
GSTextureCache::Target::Target(GSRenderer* r, const GIFRegTEX0& TEX0, uint8* temp, bool depth_supported)
|
GSTextureCache::Target::Target(GSRenderer* r, const GIFRegTEX0& TEX0, u8* temp, bool depth_supported)
|
||||||
: Surface(r, temp)
|
: Surface(r, temp)
|
||||||
, m_type(-1)
|
, m_type(-1)
|
||||||
, m_used(false)
|
, m_used(false)
|
||||||
|
@ -2135,7 +2135,7 @@ void GSTextureCache::SourceMap::Add(Source* s, const GIFRegTEX0& TEX0, const GSO
|
||||||
}
|
}
|
||||||
|
|
||||||
// The source pointer will be stored/duplicated in all m_map[array of pages]
|
// The source pointer will be stored/duplicated in all m_map[array of pages]
|
||||||
s->m_pages.loopPages([this, s](uint32 page)
|
s->m_pages.loopPages([this, s](u32 page)
|
||||||
{
|
{
|
||||||
s->m_erase_it[page] = m_map[page].InsertFront(s);
|
s->m_erase_it[page] = m_map[page].InsertFront(s);
|
||||||
});
|
});
|
||||||
|
@ -2169,7 +2169,7 @@ void GSTextureCache::SourceMap::RemoveAt(Source* s)
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
s->m_pages.loopPages([this, s](uint32 page)
|
s->m_pages.loopPages([this, s](u32 page)
|
||||||
{
|
{
|
||||||
m_map[page].EraseIndex(s->m_erase_it[page]);
|
m_map[page].EraseIndex(s->m_erase_it[page]);
|
||||||
});
|
});
|
||||||
|
@ -2178,7 +2178,7 @@ void GSTextureCache::SourceMap::RemoveAt(Source* s)
|
||||||
delete s;
|
delete s;
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSTextureCache::AttachPaletteToSource(Source* s, uint16 pal, bool need_gs_texture)
|
void GSTextureCache::AttachPaletteToSource(Source* s, u16 pal, bool need_gs_texture)
|
||||||
{
|
{
|
||||||
s->m_palette_obj = m_palette_map.LookupPalette(pal, need_gs_texture);
|
s->m_palette_obj = m_palette_map.LookupPalette(pal, need_gs_texture);
|
||||||
s->m_palette = need_gs_texture ? s->m_palette_obj->GetPaletteGSTexture() : nullptr;
|
s->m_palette = need_gs_texture ? s->m_palette_obj->GetPaletteGSTexture() : nullptr;
|
||||||
|
@ -2186,14 +2186,14 @@ void GSTextureCache::AttachPaletteToSource(Source* s, uint16 pal, bool need_gs_t
|
||||||
|
|
||||||
// GSTextureCache::Palette
|
// GSTextureCache::Palette
|
||||||
|
|
||||||
GSTextureCache::Palette::Palette(const GSRenderer* renderer, uint16 pal, bool need_gs_texture)
|
GSTextureCache::Palette::Palette(const GSRenderer* renderer, u16 pal, bool need_gs_texture)
|
||||||
: m_pal(pal)
|
: m_pal(pal)
|
||||||
, m_tex_palette(nullptr)
|
, m_tex_palette(nullptr)
|
||||||
, m_renderer(renderer)
|
, m_renderer(renderer)
|
||||||
{
|
{
|
||||||
uint16 palette_size = pal * sizeof(uint32);
|
u16 palette_size = pal * sizeof(u32);
|
||||||
m_clut = (uint32*)_aligned_malloc(palette_size, 64);
|
m_clut = (u32*)_aligned_malloc(palette_size, 64);
|
||||||
memcpy(m_clut, (const uint32*)m_renderer->m_mem.m_clut, palette_size);
|
memcpy(m_clut, (const u32*)m_renderer->m_mem.m_clut, palette_size);
|
||||||
if (need_gs_texture)
|
if (need_gs_texture)
|
||||||
{
|
{
|
||||||
InitializeTexture();
|
InitializeTexture();
|
||||||
|
@ -2234,20 +2234,20 @@ void GSTextureCache::Palette::InitializeTexture()
|
||||||
|
|
||||||
// Hashes the content of the clut.
|
// Hashes the content of the clut.
|
||||||
// The hashing function is implemented by taking two things into account:
|
// The hashing function is implemented by taking two things into account:
|
||||||
// 1) The clut can be an array of 16 or 256 uint32 (depending on the pal parameter) and in order to speed up the computation of the hash
|
// 1) The clut can be an array of 16 or 256 u32 (depending on the pal parameter) and in order to speed up the computation of the hash
|
||||||
// the array is hashed in blocks of 16 uint32, so for clut of size 16 uint32 the hashing is computed in one pass and for clut of 256 uint32
|
// the array is hashed in blocks of 16 u32, so for clut of size 16 u32 the hashing is computed in one pass and for clut of 256 u32
|
||||||
// it is computed in 16 passes,
|
// it is computed in 16 passes,
|
||||||
// 2) The clut can contain many 0s, so as a way to increase the spread of hashing values for small changes in the input clut the hashing function
|
// 2) The clut can contain many 0s, so as a way to increase the spread of hashing values for small changes in the input clut the hashing function
|
||||||
// is using addition in combination with logical XOR operator; The addition constants are large prime numbers, which may help in achieving what intended.
|
// is using addition in combination with logical XOR operator; The addition constants are large prime numbers, which may help in achieving what intended.
|
||||||
std::size_t GSTextureCache::PaletteKeyHash::operator()(const PaletteKey& key) const
|
std::size_t GSTextureCache::PaletteKeyHash::operator()(const PaletteKey& key) const
|
||||||
{
|
{
|
||||||
uint16 pal = key.pal;
|
u16 pal = key.pal;
|
||||||
const uint32* clut = key.clut;
|
const u32* clut = key.clut;
|
||||||
|
|
||||||
ASSERT((pal & 15) == 0);
|
ASSERT((pal & 15) == 0);
|
||||||
|
|
||||||
size_t clut_hash = 3831179159;
|
size_t clut_hash = 3831179159;
|
||||||
for (uint16 i = 0; i < pal; i += 16)
|
for (u16 i = 0; i < pal; i += 16)
|
||||||
{
|
{
|
||||||
clut_hash = (clut_hash + 1488000301) ^ (clut[i ] + 33644011);
|
clut_hash = (clut_hash + 1488000301) ^ (clut[i ] + 33644011);
|
||||||
clut_hash = (clut_hash + 3831179159) ^ (clut[i + 1] + 47627467);
|
clut_hash = (clut_hash + 3831179159) ^ (clut[i + 1] + 47627467);
|
||||||
|
@ -2295,7 +2295,7 @@ GSTextureCache::PaletteMap::PaletteMap(const GSRenderer* renderer)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<GSTextureCache::Palette> GSTextureCache::PaletteMap::LookupPalette(uint16 pal, bool need_gs_texture)
|
std::shared_ptr<GSTextureCache::Palette> GSTextureCache::PaletteMap::LookupPalette(u16 pal, bool need_gs_texture)
|
||||||
{
|
{
|
||||||
ASSERT(pal == 16 || pal == 256);
|
ASSERT(pal == 16 || pal == 256);
|
||||||
|
|
||||||
|
@ -2304,7 +2304,7 @@ std::shared_ptr<GSTextureCache::Palette> GSTextureCache::PaletteMap::LookupPalet
|
||||||
// pal == 256 : index 1
|
// pal == 256 : index 1
|
||||||
auto& map = m_maps[pal == 16 ? 0 : 1];
|
auto& map = m_maps[pal == 16 ? 0 : 1];
|
||||||
|
|
||||||
const uint32* clut = (const uint32*)m_renderer->m_mem.m_clut;
|
const u32* clut = (const u32*)m_renderer->m_mem.m_clut;
|
||||||
|
|
||||||
// Create PaletteKey for searching into map (clut is actually not copied, so do not store this key into the map)
|
// Create PaletteKey for searching into map (clut is actually not copied, so do not store this key into the map)
|
||||||
PaletteKey palette_key = {clut, pal};
|
PaletteKey palette_key = {clut, pal};
|
||||||
|
@ -2327,9 +2327,9 @@ std::shared_ptr<GSTextureCache::Palette> GSTextureCache::PaletteMap::LookupPalet
|
||||||
if (map.size() > MAX_SIZE)
|
if (map.size() > MAX_SIZE)
|
||||||
{
|
{
|
||||||
// If the map is too big, try to clean it by disposing and removing unused palettes, before adding the new one
|
// If the map is too big, try to clean it by disposing and removing unused palettes, before adding the new one
|
||||||
GL_INS("WARNING, %u-bit PaletteMap (Size %u): Max size %u exceeded, clearing unused palettes.", pal * sizeof(uint32), map.size(), MAX_SIZE);
|
GL_INS("WARNING, %u-bit PaletteMap (Size %u): Max size %u exceeded, clearing unused palettes.", pal * sizeof(u32), map.size(), MAX_SIZE);
|
||||||
|
|
||||||
uint32 current_size = map.size();
|
u32 current_size = map.size();
|
||||||
|
|
||||||
for (auto it = map.begin(); it != map.end();)
|
for (auto it = map.begin(); it != map.end();)
|
||||||
{
|
{
|
||||||
|
@ -2347,16 +2347,16 @@ std::shared_ptr<GSTextureCache::Palette> GSTextureCache::PaletteMap::LookupPalet
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32 cleared_palette_count = current_size - (uint32)map.size();
|
u32 cleared_palette_count = current_size - (u32)map.size();
|
||||||
|
|
||||||
if (cleared_palette_count == 0)
|
if (cleared_palette_count == 0)
|
||||||
{
|
{
|
||||||
GL_INS("ERROR, %u-bit PaletteMap (Size %u): Max size %u exceeded, could not clear any palette, negative performance impact.", pal * sizeof(uint32), map.size(), MAX_SIZE);
|
GL_INS("ERROR, %u-bit PaletteMap (Size %u): Max size %u exceeded, could not clear any palette, negative performance impact.", pal * sizeof(u32), map.size(), MAX_SIZE);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
map.reserve(MAX_SIZE); // Ensure map capacity is not modified by the clearing
|
map.reserve(MAX_SIZE); // Ensure map capacity is not modified by the clearing
|
||||||
GL_INS("INFO, %u-bit PaletteMap (Size %u): Cleared %u palettes.", pal * sizeof(uint32), map.size(), cleared_palette_count);
|
GL_INS("INFO, %u-bit PaletteMap (Size %u): Cleared %u palettes.", pal * sizeof(u32), map.size(), cleared_palette_count);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2364,7 +2364,7 @@ std::shared_ptr<GSTextureCache::Palette> GSTextureCache::PaletteMap::LookupPalet
|
||||||
|
|
||||||
map.emplace(palette->GetPaletteKey(), palette);
|
map.emplace(palette->GetPaletteKey(), palette);
|
||||||
|
|
||||||
GL_CACHE("TC, %u-bit PaletteMap (Size %u): Added new palette.", pal * sizeof(uint32), map.size());
|
GL_CACHE("TC, %u-bit PaletteMap (Size %u): Added new palette.", pal * sizeof(u32), map.size());
|
||||||
|
|
||||||
return palette;
|
return palette;
|
||||||
}
|
}
|
||||||
|
|
|
@ -38,36 +38,36 @@ public:
|
||||||
GIFRegTEX0 m_TEX0;
|
GIFRegTEX0 m_TEX0;
|
||||||
GIFRegTEXA m_TEXA;
|
GIFRegTEXA m_TEXA;
|
||||||
int m_age;
|
int m_age;
|
||||||
uint8* m_temp;
|
u8* m_temp;
|
||||||
bool m_32_bits_fmt; // Allow to detect the casting of 32 bits as 16 bits texture
|
bool m_32_bits_fmt; // Allow to detect the casting of 32 bits as 16 bits texture
|
||||||
bool m_shared_texture;
|
bool m_shared_texture;
|
||||||
uint32 m_end_block; // Hint of the surface area.
|
u32 m_end_block; // Hint of the surface area.
|
||||||
|
|
||||||
public:
|
public:
|
||||||
Surface(GSRenderer* r, uint8* temp);
|
Surface(GSRenderer* r, u8* temp);
|
||||||
virtual ~Surface();
|
virtual ~Surface();
|
||||||
|
|
||||||
void UpdateAge();
|
void UpdateAge();
|
||||||
bool Inside(uint32 bp, uint32 bw, uint32 psm, const GSVector4i& rect);
|
bool Inside(u32 bp, u32 bw, u32 psm, const GSVector4i& rect);
|
||||||
bool Overlaps(uint32 bp, uint32 bw, uint32 psm, const GSVector4i& rect);
|
bool Overlaps(u32 bp, u32 bw, u32 psm, const GSVector4i& rect);
|
||||||
};
|
};
|
||||||
|
|
||||||
struct PaletteKey
|
struct PaletteKey
|
||||||
{
|
{
|
||||||
const uint32* clut;
|
const u32* clut;
|
||||||
uint16 pal;
|
u16 pal;
|
||||||
};
|
};
|
||||||
|
|
||||||
class Palette
|
class Palette
|
||||||
{
|
{
|
||||||
private:
|
private:
|
||||||
uint32* m_clut;
|
u32* m_clut;
|
||||||
uint16 m_pal;
|
u16 m_pal;
|
||||||
GSTexture* m_tex_palette;
|
GSTexture* m_tex_palette;
|
||||||
const GSRenderer* m_renderer;
|
const GSRenderer* m_renderer;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
Palette(const GSRenderer* renderer, uint16 pal, bool need_gs_texture);
|
Palette(const GSRenderer* renderer, u16 pal, bool need_gs_texture);
|
||||||
~Palette();
|
~Palette();
|
||||||
|
|
||||||
// Disable copy constructor and copy operator
|
// Disable copy constructor and copy operator
|
||||||
|
@ -102,16 +102,16 @@ public:
|
||||||
struct
|
struct
|
||||||
{
|
{
|
||||||
GSVector4i* rect;
|
GSVector4i* rect;
|
||||||
uint32 count;
|
u32 count;
|
||||||
} m_write;
|
} m_write;
|
||||||
|
|
||||||
void Write(const GSVector4i& r, int layer);
|
void Write(const GSVector4i& r, int layer);
|
||||||
void Flush(uint32 count, int layer);
|
void Flush(u32 count, int layer);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
std::shared_ptr<Palette> m_palette_obj;
|
std::shared_ptr<Palette> m_palette_obj;
|
||||||
GSTexture* m_palette;
|
GSTexture* m_palette;
|
||||||
uint32 m_valid[MAX_PAGES]; // each uint32 bits map to the 32 blocks of that page
|
u32 m_valid[MAX_PAGES]; // each u32 bits map to the 32 blocks of that page
|
||||||
GSVector4i m_valid_rect;
|
GSVector4i m_valid_rect;
|
||||||
bool m_target;
|
bool m_target;
|
||||||
bool m_complete;
|
bool m_complete;
|
||||||
|
@ -124,11 +124,11 @@ public:
|
||||||
GIFRegTEX0 m_from_target_TEX0; // TEX0 of the target texture, if any, else equal to texture TEX0
|
GIFRegTEX0 m_from_target_TEX0; // TEX0 of the target texture, if any, else equal to texture TEX0
|
||||||
GIFRegTEX0 m_layer_TEX0[7]; // Detect already loaded value
|
GIFRegTEX0 m_layer_TEX0[7]; // Detect already loaded value
|
||||||
// Keep a GSTextureCache::SourceMap::m_map iterator to allow fast erase
|
// Keep a GSTextureCache::SourceMap::m_map iterator to allow fast erase
|
||||||
std::array<uint16, MAX_PAGES> m_erase_it;
|
std::array<u16, MAX_PAGES> m_erase_it;
|
||||||
GSOffset::PageLooper m_pages;
|
GSOffset::PageLooper m_pages;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
Source(GSRenderer* r, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, uint8* temp, bool dummy_container = false);
|
Source(GSRenderer* r, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, u8* temp, bool dummy_container = false);
|
||||||
virtual ~Source();
|
virtual ~Source();
|
||||||
|
|
||||||
void Update(const GSVector4i& rect, int layer = 0);
|
void Update(const GSVector4i& rect, int layer = 0);
|
||||||
|
@ -148,7 +148,7 @@ public:
|
||||||
bool m_dirty_alpha;
|
bool m_dirty_alpha;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
Target(GSRenderer* r, const GIFRegTEX0& TEX0, uint8* temp, bool depth_supported);
|
Target(GSRenderer* r, const GIFRegTEX0& TEX0, u8* temp, bool depth_supported);
|
||||||
|
|
||||||
void UpdateValidity(const GSVector4i& rect);
|
void UpdateValidity(const GSVector4i& rect);
|
||||||
|
|
||||||
|
@ -158,7 +158,7 @@ public:
|
||||||
class PaletteMap
|
class PaletteMap
|
||||||
{
|
{
|
||||||
private:
|
private:
|
||||||
static const uint16 MAX_SIZE = 65535; // Max size of each map.
|
static const u16 MAX_SIZE = 65535; // Max size of each map.
|
||||||
const GSRenderer* m_renderer;
|
const GSRenderer* m_renderer;
|
||||||
|
|
||||||
// Array of 2 maps, the first for 64B palettes and the second for 1024B palettes.
|
// Array of 2 maps, the first for 64B palettes and the second for 1024B palettes.
|
||||||
|
@ -170,7 +170,7 @@ public:
|
||||||
PaletteMap(const GSRenderer* renderer);
|
PaletteMap(const GSRenderer* renderer);
|
||||||
|
|
||||||
// Retrieves a shared pointer to a valid Palette from m_maps or creates a new one adding it to the data structure
|
// Retrieves a shared pointer to a valid Palette from m_maps or creates a new one adding it to the data structure
|
||||||
std::shared_ptr<Palette> LookupPalette(uint16 pal, bool need_gs_texture);
|
std::shared_ptr<Palette> LookupPalette(u16 pal, bool need_gs_texture);
|
||||||
|
|
||||||
void Clear(); // Clears m_maps, thus deletes Palette objects
|
void Clear(); // Clears m_maps, thus deletes Palette objects
|
||||||
};
|
};
|
||||||
|
@ -180,7 +180,7 @@ public:
|
||||||
public:
|
public:
|
||||||
std::unordered_set<Source*> m_surfaces;
|
std::unordered_set<Source*> m_surfaces;
|
||||||
std::array<FastList<Source*>, MAX_PAGES> m_map;
|
std::array<FastList<Source*>, MAX_PAGES> m_map;
|
||||||
uint32 m_pages[16]; // bitmap of all pages
|
u32 m_pages[16]; // bitmap of all pages
|
||||||
bool m_used;
|
bool m_used;
|
||||||
|
|
||||||
SourceMap()
|
SourceMap()
|
||||||
|
@ -197,12 +197,12 @@ public:
|
||||||
|
|
||||||
struct TexInsideRtCacheEntry
|
struct TexInsideRtCacheEntry
|
||||||
{
|
{
|
||||||
uint32 psm;
|
u32 psm;
|
||||||
uint32 bp;
|
u32 bp;
|
||||||
uint32 bp_end;
|
u32 bp_end;
|
||||||
uint32 bw;
|
u32 bw;
|
||||||
uint32 t_tex0_tbp0;
|
u32 t_tex0_tbp0;
|
||||||
uint32 m_end_block;
|
u32 m_end_block;
|
||||||
bool has_valid_offset;
|
bool has_valid_offset;
|
||||||
int x_offset;
|
int x_offset;
|
||||||
int y_offset;
|
int y_offset;
|
||||||
|
@ -215,14 +215,14 @@ protected:
|
||||||
FastList<Target*> m_dst[2];
|
FastList<Target*> m_dst[2];
|
||||||
bool m_paltex;
|
bool m_paltex;
|
||||||
bool m_preload_frame;
|
bool m_preload_frame;
|
||||||
uint8* m_temp;
|
u8* m_temp;
|
||||||
bool m_can_convert_depth;
|
bool m_can_convert_depth;
|
||||||
bool m_cpu_fb_conversion;
|
bool m_cpu_fb_conversion;
|
||||||
CRCHackLevel m_crc_hack_level;
|
CRCHackLevel m_crc_hack_level;
|
||||||
static bool m_disable_partial_invalidation;
|
static bool m_disable_partial_invalidation;
|
||||||
bool m_texture_inside_rt;
|
bool m_texture_inside_rt;
|
||||||
static bool m_wrap_gs_mem;
|
static bool m_wrap_gs_mem;
|
||||||
uint8 m_texture_inside_rt_cache_size = 255;
|
u8 m_texture_inside_rt_cache_size = 255;
|
||||||
std::vector<TexInsideRtCacheEntry> m_texture_inside_rt_cache;
|
std::vector<TexInsideRtCacheEntry> m_texture_inside_rt_cache;
|
||||||
|
|
||||||
virtual Source* CreateSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Target* t = NULL, bool half_right = false, int x_offset = 0, int y_offset = 0);
|
virtual Source* CreateSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Target* t = NULL, bool half_right = false, int x_offset = 0, int y_offset = 0);
|
||||||
|
@ -244,10 +244,10 @@ public:
|
||||||
Source* LookupSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r);
|
Source* LookupSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r);
|
||||||
Source* LookupDepthSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r, bool palette = false);
|
Source* LookupDepthSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r, bool palette = false);
|
||||||
|
|
||||||
Target* LookupTarget(const GIFRegTEX0& TEX0, int w, int h, int type, bool used, uint32 fbmask = 0);
|
Target* LookupTarget(const GIFRegTEX0& TEX0, int w, int h, int type, bool used, u32 fbmask = 0);
|
||||||
Target* LookupTarget(const GIFRegTEX0& TEX0, int w, int h, int real_h);
|
Target* LookupTarget(const GIFRegTEX0& TEX0, int w, int h, int real_h);
|
||||||
|
|
||||||
void InvalidateVideoMemType(int type, uint32 bp);
|
void InvalidateVideoMemType(int type, u32 bp);
|
||||||
void InvalidateVideoMemSubTarget(GSTextureCache::Target* rt);
|
void InvalidateVideoMemSubTarget(GSTextureCache::Target* rt);
|
||||||
void InvalidateVideoMem(const GSOffset& off, const GSVector4i& r, bool target = true);
|
void InvalidateVideoMem(const GSOffset& off, const GSVector4i& r, bool target = true);
|
||||||
void InvalidateLocalMem(const GSOffset& off, const GSVector4i& r);
|
void InvalidateLocalMem(const GSOffset& off, const GSVector4i& r);
|
||||||
|
@ -265,5 +265,5 @@ public:
|
||||||
|
|
||||||
void PrintMemoryUsage();
|
void PrintMemoryUsage();
|
||||||
|
|
||||||
void AttachPaletteToSource(Source* s, uint16 pal, bool need_gs_texture);
|
void AttachPaletteToSource(Source* s, u16 pal, bool need_gs_texture);
|
||||||
};
|
};
|
||||||
|
|
|
@ -25,8 +25,8 @@ struct alignas(32) GSVertexHW9
|
||||||
GSVector4 t;
|
GSVector4 t;
|
||||||
GSVector4 p;
|
GSVector4 p;
|
||||||
|
|
||||||
// t.z = union {struct {uint8 r, g, b, a;}; uint32 c0;};
|
// t.z = union {struct {u8 r, g, b, a;}; u32 c0;};
|
||||||
// t.w = union {struct {uint8 ta0, ta1, res, f;}; uint32 c1;}
|
// t.w = union {struct {u8 ta0, ta1, res, f;}; u32 c1;}
|
||||||
|
|
||||||
GSVertexHW9& operator=(GSVertexHW9& v)
|
GSVertexHW9& operator=(GSVertexHW9& v)
|
||||||
{
|
{
|
||||||
|
|
|
@ -25,7 +25,7 @@ private:
|
||||||
|
|
||||||
void DoMerge(GSTexture* sTex[3], GSVector4* sRect, GSTexture* dTex, GSVector4* dRect, const GSRegPMODE& PMODE, const GSRegEXTBUF& EXTBUF, const GSVector4& c) {}
|
void DoMerge(GSTexture* sTex[3], GSVector4* sRect, GSTexture* dTex, GSVector4* dRect, const GSRegPMODE& PMODE, const GSRegEXTBUF& EXTBUF, const GSVector4& c) {}
|
||||||
void DoInterlace(GSTexture* sTex, GSTexture* dTex, int shader, bool linear, float yoffset = 0) {}
|
void DoInterlace(GSTexture* sTex, GSTexture* dTex, int shader, bool linear, float yoffset = 0) {}
|
||||||
uint16 ConvertBlendEnum(uint16 generic) { return 0xFFFF; }
|
u16 ConvertBlendEnum(u16 generic) { return 0xFFFF; }
|
||||||
|
|
||||||
public:
|
public:
|
||||||
GSDeviceNull() {}
|
GSDeviceNull() {}
|
||||||
|
|
|
@ -23,11 +23,11 @@ namespace GLState
|
||||||
GSVector4i scissor;
|
GSVector4i scissor;
|
||||||
|
|
||||||
bool blend;
|
bool blend;
|
||||||
uint16 eq_RGB;
|
u16 eq_RGB;
|
||||||
uint16 f_sRGB;
|
u16 f_sRGB;
|
||||||
uint16 f_dRGB;
|
u16 f_dRGB;
|
||||||
uint8 bf;
|
u8 bf;
|
||||||
uint32 wrgba;
|
u32 wrgba;
|
||||||
|
|
||||||
bool depth;
|
bool depth;
|
||||||
GLenum depth_func;
|
GLenum depth_func;
|
||||||
|
@ -50,7 +50,7 @@ namespace GLState
|
||||||
GLuint program;
|
GLuint program;
|
||||||
GLuint pipeline;
|
GLuint pipeline;
|
||||||
|
|
||||||
int64 available_vram;
|
s64 available_vram;
|
||||||
|
|
||||||
void Clear()
|
void Clear()
|
||||||
{
|
{
|
||||||
|
|
|
@ -25,11 +25,11 @@ namespace GLState
|
||||||
extern GSVector4i scissor;
|
extern GSVector4i scissor;
|
||||||
|
|
||||||
extern bool blend;
|
extern bool blend;
|
||||||
extern uint16 eq_RGB;
|
extern u16 eq_RGB;
|
||||||
extern uint16 f_sRGB;
|
extern u16 f_sRGB;
|
||||||
extern uint16 f_dRGB;
|
extern u16 f_dRGB;
|
||||||
extern uint8 bf;
|
extern u8 bf;
|
||||||
extern uint32 wrgba;
|
extern u32 wrgba;
|
||||||
|
|
||||||
extern bool depth;
|
extern bool depth;
|
||||||
extern GLenum depth_func;
|
extern GLenum depth_func;
|
||||||
|
@ -52,7 +52,7 @@ namespace GLState
|
||||||
extern GLuint program;
|
extern GLuint program;
|
||||||
extern GLuint pipeline;
|
extern GLuint pipeline;
|
||||||
|
|
||||||
extern int64 available_vram;
|
extern s64 available_vram;
|
||||||
|
|
||||||
extern void Clear();
|
extern void Clear();
|
||||||
} // namespace GLState
|
} // namespace GLState
|
||||||
|
|
|
@ -30,17 +30,17 @@
|
||||||
|
|
||||||
// TODO port those value into PerfMon API
|
// TODO port those value into PerfMon API
|
||||||
#ifdef ENABLE_OGL_DEBUG_MEM_BW
|
#ifdef ENABLE_OGL_DEBUG_MEM_BW
|
||||||
uint64 g_real_texture_upload_byte = 0;
|
u64 g_real_texture_upload_byte = 0;
|
||||||
uint64 g_vertex_upload_byte = 0;
|
u64 g_vertex_upload_byte = 0;
|
||||||
uint64 g_uniform_upload_byte = 0;
|
u64 g_uniform_upload_byte = 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static constexpr uint32 g_merge_cb_index = 10;
|
static constexpr u32 g_merge_cb_index = 10;
|
||||||
static constexpr uint32 g_interlace_cb_index = 11;
|
static constexpr u32 g_interlace_cb_index = 11;
|
||||||
static constexpr uint32 g_fx_cb_index = 14;
|
static constexpr u32 g_fx_cb_index = 14;
|
||||||
static constexpr uint32 g_convert_index = 15;
|
static constexpr u32 g_convert_index = 15;
|
||||||
static constexpr uint32 g_vs_cb_index = 20;
|
static constexpr u32 g_vs_cb_index = 20;
|
||||||
static constexpr uint32 g_ps_cb_index = 21;
|
static constexpr u32 g_ps_cb_index = 21;
|
||||||
|
|
||||||
static constexpr u32 VERTEX_BUFFER_SIZE = 32 * 1024 * 1024;
|
static constexpr u32 VERTEX_BUFFER_SIZE = 32 * 1024 * 1024;
|
||||||
static constexpr u32 INDEX_BUFFER_SIZE = 16 * 1024 * 1024;
|
static constexpr u32 INDEX_BUFFER_SIZE = 16 * 1024 * 1024;
|
||||||
|
@ -179,10 +179,10 @@ void GSDeviceOGL::GenerateProfilerData()
|
||||||
const int first_query = replay > 1 ? m_profiler.last_query / replay : 0;
|
const int first_query = replay > 1 ? m_profiler.last_query / replay : 0;
|
||||||
|
|
||||||
glGetQueryObjectui64v(m_profiler.timer_query[first_query], GL_QUERY_RESULT, &time_start);
|
glGetQueryObjectui64v(m_profiler.timer_query[first_query], GL_QUERY_RESULT, &time_start);
|
||||||
for (uint32 q = first_query + 1; q < m_profiler.last_query; q++)
|
for (u32 q = first_query + 1; q < m_profiler.last_query; q++)
|
||||||
{
|
{
|
||||||
glGetQueryObjectui64v(m_profiler.timer_query[q], GL_QUERY_RESULT, &time_end);
|
glGetQueryObjectui64v(m_profiler.timer_query[q], GL_QUERY_RESULT, &time_end);
|
||||||
uint64 t = time_end - time_start;
|
u64 t = time_end - time_start;
|
||||||
times.push_back((double)t * ms);
|
times.push_back((double)t * ms);
|
||||||
|
|
||||||
time_start = time_end;
|
time_start = time_end;
|
||||||
|
@ -207,7 +207,7 @@ void GSDeviceOGL::GenerateProfilerData()
|
||||||
sd += pow(t - mean, 2);
|
sd += pow(t - mean, 2);
|
||||||
sd = sqrt(sd / frames);
|
sd = sqrt(sd / frames);
|
||||||
|
|
||||||
uint32 time_repartition[16] = {0};
|
u32 time_repartition[16] = {0};
|
||||||
for (auto t : times)
|
for (auto t : times)
|
||||||
{
|
{
|
||||||
size_t slot = std::min<size_t>(t / 2.0, std::size(time_repartition) - 1);
|
size_t slot = std::min<size_t>(t / 2.0, std::size(time_repartition) - 1);
|
||||||
|
@ -221,7 +221,7 @@ void GSDeviceOGL::GenerateProfilerData()
|
||||||
fprintf(stderr, "SD %4.2f ms\n", sd);
|
fprintf(stderr, "SD %4.2f ms\n", sd);
|
||||||
fprintf(stderr, "\n");
|
fprintf(stderr, "\n");
|
||||||
fprintf(stderr, "Frame Repartition\n");
|
fprintf(stderr, "Frame Repartition\n");
|
||||||
for (uint32 i = 0; i < std::size(time_repartition); i++)
|
for (u32 i = 0; i < std::size(time_repartition); i++)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "%3u ms => %3u ms\t%4u\n", 2 * i, 2 * (i + 1), time_repartition[i]);
|
fprintf(stderr, "%3u ms => %3u ms\t%4u\n", 2 * i, 2 * (i + 1), time_repartition[i]);
|
||||||
}
|
}
|
||||||
|
@ -420,7 +420,7 @@ bool GSDeviceOGL::Create(const WindowInfo& wi)
|
||||||
{
|
{
|
||||||
GL_PUSH("GSDeviceOGL::Sampler");
|
GL_PUSH("GSDeviceOGL::Sampler");
|
||||||
|
|
||||||
for (uint32 key = 0; key < std::size(m_ps_ss); key++)
|
for (u32 key = 0; key < std::size(m_ps_ss); key++)
|
||||||
{
|
{
|
||||||
m_ps_ss[key] = CreateSampler(PSSamplerSelector(key));
|
m_ps_ss[key] = CreateSampler(PSSamplerSelector(key));
|
||||||
}
|
}
|
||||||
|
@ -606,7 +606,7 @@ bool GSDeviceOGL::Create(const WindowInfo& wi)
|
||||||
// When VRAM is below 2GB, we add a factor 2 because RAM can be used. Potentially
|
// When VRAM is below 2GB, we add a factor 2 because RAM can be used. Potentially
|
||||||
// low VRAM gpu can go higher but perf will be bad anyway.
|
// low VRAM gpu can go higher but perf will be bad anyway.
|
||||||
if (vram[0] > 0 && vram[0] < 1800000)
|
if (vram[0] > 0 && vram[0] < 1800000)
|
||||||
GLState::available_vram = (int64)(vram[0]) * 1024ul * 2ul;
|
GLState::available_vram = (s64)(vram[0]) * 1024ul * 2ul;
|
||||||
|
|
||||||
fprintf(stdout, "Available VRAM/RAM:%lldMB for textures\n", GLState::available_vram >> 20u);
|
fprintf(stdout, "Available VRAM/RAM:%lldMB for textures\n", GLState::available_vram >> 20u);
|
||||||
|
|
||||||
|
@ -654,14 +654,14 @@ void GSDeviceOGL::CreateTextureFX()
|
||||||
m_gs[2] = CompileGS(GSSelector(2));
|
m_gs[2] = CompileGS(GSSelector(2));
|
||||||
m_gs[4] = CompileGS(GSSelector(4));
|
m_gs[4] = CompileGS(GSSelector(4));
|
||||||
|
|
||||||
for (uint32 key = 0; key < std::size(m_vs); key++)
|
for (u32 key = 0; key < std::size(m_vs); key++)
|
||||||
m_vs[key] = CompileVS(VSSelector(key));
|
m_vs[key] = CompileVS(VSSelector(key));
|
||||||
|
|
||||||
// Enable all bits for stencil operations. Technically 1 bit is
|
// Enable all bits for stencil operations. Technically 1 bit is
|
||||||
// enough but buffer is polluted with noise. Clear will be limited
|
// enough but buffer is polluted with noise. Clear will be limited
|
||||||
// to the mask.
|
// to the mask.
|
||||||
glStencilMask(0xFF);
|
glStencilMask(0xFF);
|
||||||
for (uint32 key = 0; key < std::size(m_om_dss); key++)
|
for (u32 key = 0; key < std::size(m_om_dss); key++)
|
||||||
{
|
{
|
||||||
m_om_dss[key] = CreateDepthStencil(OMDepthStencilSelector(key));
|
m_om_dss[key] = CreateDepthStencil(OMDepthStencilSelector(key));
|
||||||
}
|
}
|
||||||
|
@ -747,7 +747,7 @@ void GSDeviceOGL::ClearRenderTarget(GSTexture* t, const GSVector4& c)
|
||||||
// TODO: check size of scissor before toggling it
|
// TODO: check size of scissor before toggling it
|
||||||
glDisable(GL_SCISSOR_TEST);
|
glDisable(GL_SCISSOR_TEST);
|
||||||
|
|
||||||
const uint32 old_color_mask = GLState::wrgba;
|
const u32 old_color_mask = GLState::wrgba;
|
||||||
OMSetColorMaskState();
|
OMSetColorMaskState();
|
||||||
|
|
||||||
if (T->IsBackbuffer())
|
if (T->IsBackbuffer())
|
||||||
|
@ -773,7 +773,7 @@ void GSDeviceOGL::ClearRenderTarget(GSTexture* t, const GSVector4& c)
|
||||||
T->WasCleaned();
|
T->WasCleaned();
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSDeviceOGL::ClearRenderTarget(GSTexture* t, uint32 c)
|
void GSDeviceOGL::ClearRenderTarget(GSTexture* t, u32 c)
|
||||||
{
|
{
|
||||||
if (!t)
|
if (!t)
|
||||||
return;
|
return;
|
||||||
|
@ -828,7 +828,7 @@ void GSDeviceOGL::ClearDepth(GSTexture* t)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSDeviceOGL::ClearStencil(GSTexture* t, uint8 c)
|
void GSDeviceOGL::ClearStencil(GSTexture* t, u8 c)
|
||||||
{
|
{
|
||||||
if (!t)
|
if (!t)
|
||||||
return;
|
return;
|
||||||
|
@ -1404,7 +1404,7 @@ void GSDeviceOGL::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture
|
||||||
else
|
else
|
||||||
OMSetRenderTargets(dTex, NULL);
|
OMSetRenderTargets(dTex, NULL);
|
||||||
|
|
||||||
OMSetBlendState((uint8)bs);
|
OMSetBlendState((u8)bs);
|
||||||
OMSetColorMaskState(cms);
|
OMSetColorMaskState(cms);
|
||||||
|
|
||||||
// ************************************
|
// ************************************
|
||||||
|
@ -1475,7 +1475,7 @@ void GSDeviceOGL::RenderOsd(GSTexture* dt)
|
||||||
m_shader->BindPipeline(m_convert.ps[ShaderConvert_OSD]);
|
m_shader->BindPipeline(m_convert.ps[ShaderConvert_OSD]);
|
||||||
|
|
||||||
OMSetDepthStencilState(m_convert.dss);
|
OMSetDepthStencilState(m_convert.dss);
|
||||||
OMSetBlendState((uint8)GSDeviceOGL::m_MERGE_BLEND);
|
OMSetBlendState((u8)GSDeviceOGL::m_MERGE_BLEND);
|
||||||
OMSetRenderTargets(dt, NULL);
|
OMSetRenderTargets(dt, NULL);
|
||||||
|
|
||||||
if (m_osd.m_texture_dirty)
|
if (m_osd.m_texture_dirty)
|
||||||
|
@ -1839,7 +1839,7 @@ void GSDeviceOGL::OMSetColorMaskState(OMColorMaskSelector sel)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSDeviceOGL::OMSetBlendState(uint8 blend_index, uint8 blend_factor, bool is_blend_constant, bool accumulation_blend)
|
void GSDeviceOGL::OMSetBlendState(u8 blend_index, u8 blend_factor, bool is_blend_constant, bool accumulation_blend)
|
||||||
{
|
{
|
||||||
if (blend_index)
|
if (blend_index)
|
||||||
{
|
{
|
||||||
|
@ -2137,7 +2137,7 @@ void GSDeviceOGL::DebugOutputToFile(GLenum gl_source, GLenum gl_type, GLuint id,
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
uint16 GSDeviceOGL::ConvertBlendEnum(uint16 generic)
|
u16 GSDeviceOGL::ConvertBlendEnum(u16 generic)
|
||||||
{
|
{
|
||||||
switch (generic)
|
switch (generic)
|
||||||
{
|
{
|
||||||
|
|
|
@ -25,8 +25,8 @@
|
||||||
#include "GLState.h"
|
#include "GLState.h"
|
||||||
|
|
||||||
#ifdef ENABLE_OGL_DEBUG_MEM_BW
|
#ifdef ENABLE_OGL_DEBUG_MEM_BW
|
||||||
extern uint64 g_real_texture_upload_byte;
|
extern u64 g_real_texture_upload_byte;
|
||||||
extern uint64 g_vertex_upload_byte;
|
extern u64 g_vertex_upload_byte;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
class GSDepthStencilOGL
|
class GSDepthStencilOGL
|
||||||
|
@ -164,20 +164,20 @@ public:
|
||||||
{
|
{
|
||||||
struct
|
struct
|
||||||
{
|
{
|
||||||
uint32 int_fst : 1;
|
u32 int_fst : 1;
|
||||||
uint32 _free : 31;
|
u32 _free : 31;
|
||||||
};
|
};
|
||||||
|
|
||||||
uint32 key;
|
u32 key;
|
||||||
};
|
};
|
||||||
|
|
||||||
operator uint32() const { return key; }
|
operator u32() const { return key; }
|
||||||
|
|
||||||
VSSelector()
|
VSSelector()
|
||||||
: key(0)
|
: key(0)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
VSSelector(uint32 k)
|
VSSelector(u32 k)
|
||||||
: key(k)
|
: key(k)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
@ -189,23 +189,23 @@ public:
|
||||||
{
|
{
|
||||||
struct
|
struct
|
||||||
{
|
{
|
||||||
uint32 sprite : 1;
|
u32 sprite : 1;
|
||||||
uint32 point : 1;
|
u32 point : 1;
|
||||||
uint32 line : 1;
|
u32 line : 1;
|
||||||
|
|
||||||
uint32 _free : 29;
|
u32 _free : 29;
|
||||||
};
|
};
|
||||||
|
|
||||||
uint32 key;
|
u32 key;
|
||||||
};
|
};
|
||||||
|
|
||||||
operator uint32() const { return key; }
|
operator u32() const { return key; }
|
||||||
|
|
||||||
GSSelector()
|
GSSelector()
|
||||||
: key(0)
|
: key(0)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
GSSelector(uint32 k)
|
GSSelector(u32 k)
|
||||||
: key(k)
|
: key(k)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
@ -287,72 +287,72 @@ public:
|
||||||
{
|
{
|
||||||
// *** Word 1
|
// *** Word 1
|
||||||
// Format
|
// Format
|
||||||
uint32 tex_fmt : 4;
|
u32 tex_fmt : 4;
|
||||||
uint32 dfmt : 2;
|
u32 dfmt : 2;
|
||||||
uint32 depth_fmt : 2;
|
u32 depth_fmt : 2;
|
||||||
// Alpha extension/Correction
|
// Alpha extension/Correction
|
||||||
uint32 aem : 1;
|
u32 aem : 1;
|
||||||
uint32 fba : 1;
|
u32 fba : 1;
|
||||||
// Fog
|
// Fog
|
||||||
uint32 fog : 1;
|
u32 fog : 1;
|
||||||
// Flat/goround shading
|
// Flat/goround shading
|
||||||
uint32 iip : 1;
|
u32 iip : 1;
|
||||||
// Pixel test
|
// Pixel test
|
||||||
uint32 date : 3;
|
u32 date : 3;
|
||||||
uint32 atst : 3;
|
u32 atst : 3;
|
||||||
// Color sampling
|
// Color sampling
|
||||||
uint32 fst : 1; // Investigate to do it on the VS
|
u32 fst : 1; // Investigate to do it on the VS
|
||||||
uint32 tfx : 3;
|
u32 tfx : 3;
|
||||||
uint32 tcc : 1;
|
u32 tcc : 1;
|
||||||
uint32 wms : 2;
|
u32 wms : 2;
|
||||||
uint32 wmt : 2;
|
u32 wmt : 2;
|
||||||
uint32 ltf : 1;
|
u32 ltf : 1;
|
||||||
// Shuffle and fbmask effect
|
// Shuffle and fbmask effect
|
||||||
uint32 shuffle : 1;
|
u32 shuffle : 1;
|
||||||
uint32 read_ba : 1;
|
u32 read_ba : 1;
|
||||||
uint32 write_rg : 1;
|
u32 write_rg : 1;
|
||||||
uint32 fbmask : 1;
|
u32 fbmask : 1;
|
||||||
|
|
||||||
//uint32 _free1:0;
|
//u32 _free1:0;
|
||||||
|
|
||||||
// *** Word 2
|
// *** Word 2
|
||||||
// Blend and Colclip
|
// Blend and Colclip
|
||||||
uint32 blend_a : 2;
|
u32 blend_a : 2;
|
||||||
uint32 blend_b : 2;
|
u32 blend_b : 2;
|
||||||
uint32 blend_c : 2;
|
u32 blend_c : 2;
|
||||||
uint32 blend_d : 2;
|
u32 blend_d : 2;
|
||||||
uint32 clr1 : 1; // useful?
|
u32 clr1 : 1; // useful?
|
||||||
uint32 hdr : 1;
|
u32 hdr : 1;
|
||||||
uint32 colclip : 1;
|
u32 colclip : 1;
|
||||||
uint32 pabe : 1;
|
u32 pabe : 1;
|
||||||
|
|
||||||
// Others ways to fetch the texture
|
// Others ways to fetch the texture
|
||||||
uint32 channel : 3;
|
u32 channel : 3;
|
||||||
|
|
||||||
// Dithering
|
// Dithering
|
||||||
uint32 dither : 2;
|
u32 dither : 2;
|
||||||
|
|
||||||
// Depth clamp
|
// Depth clamp
|
||||||
uint32 zclamp : 1;
|
u32 zclamp : 1;
|
||||||
|
|
||||||
// Hack
|
// Hack
|
||||||
uint32 tcoffsethack : 1;
|
u32 tcoffsethack : 1;
|
||||||
uint32 urban_chaos_hle : 1;
|
u32 urban_chaos_hle : 1;
|
||||||
uint32 tales_of_abyss_hle : 1;
|
u32 tales_of_abyss_hle : 1;
|
||||||
uint32 tex_is_fb : 1; // Jak Shadows
|
u32 tex_is_fb : 1; // Jak Shadows
|
||||||
uint32 automatic_lod : 1;
|
u32 automatic_lod : 1;
|
||||||
uint32 manual_lod : 1;
|
u32 manual_lod : 1;
|
||||||
uint32 point_sampler : 1;
|
u32 point_sampler : 1;
|
||||||
uint32 invalid_tex0 : 1; // Lupin the 3rd
|
u32 invalid_tex0 : 1; // Lupin the 3rd
|
||||||
|
|
||||||
uint32 _free2 : 6;
|
u32 _free2 : 6;
|
||||||
};
|
};
|
||||||
|
|
||||||
uint64 key;
|
u64 key;
|
||||||
};
|
};
|
||||||
|
|
||||||
// FIXME is the & useful ?
|
// FIXME is the & useful ?
|
||||||
operator uint64() const { return key; }
|
operator u64() const { return key; }
|
||||||
|
|
||||||
PSSelector()
|
PSSelector()
|
||||||
: key(0)
|
: key(0)
|
||||||
|
@ -366,25 +366,25 @@ public:
|
||||||
{
|
{
|
||||||
struct
|
struct
|
||||||
{
|
{
|
||||||
uint32 tau : 1;
|
u32 tau : 1;
|
||||||
uint32 tav : 1;
|
u32 tav : 1;
|
||||||
uint32 biln : 1;
|
u32 biln : 1;
|
||||||
uint32 triln : 3;
|
u32 triln : 3;
|
||||||
uint32 aniso : 1;
|
u32 aniso : 1;
|
||||||
|
|
||||||
uint32 _free : 25;
|
u32 _free : 25;
|
||||||
};
|
};
|
||||||
|
|
||||||
uint32 key;
|
u32 key;
|
||||||
};
|
};
|
||||||
|
|
||||||
operator uint32() { return key; }
|
operator u32() { return key; }
|
||||||
|
|
||||||
PSSamplerSelector()
|
PSSamplerSelector()
|
||||||
: key(0)
|
: key(0)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
PSSamplerSelector(uint32 k)
|
PSSamplerSelector(u32 k)
|
||||||
: key(k)
|
: key(k)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
@ -396,25 +396,25 @@ public:
|
||||||
{
|
{
|
||||||
struct
|
struct
|
||||||
{
|
{
|
||||||
uint32 ztst : 2;
|
u32 ztst : 2;
|
||||||
uint32 zwe : 1;
|
u32 zwe : 1;
|
||||||
uint32 date : 1;
|
u32 date : 1;
|
||||||
uint32 date_one : 1;
|
u32 date_one : 1;
|
||||||
|
|
||||||
uint32 _free : 27;
|
u32 _free : 27;
|
||||||
};
|
};
|
||||||
|
|
||||||
uint32 key;
|
u32 key;
|
||||||
};
|
};
|
||||||
|
|
||||||
// FIXME is the & useful ?
|
// FIXME is the & useful ?
|
||||||
operator uint32() { return key; }
|
operator u32() { return key; }
|
||||||
|
|
||||||
OMDepthStencilSelector()
|
OMDepthStencilSelector()
|
||||||
: key(0)
|
: key(0)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
OMDepthStencilSelector(uint32 k)
|
OMDepthStencilSelector(u32 k)
|
||||||
: key(k)
|
: key(k)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
@ -426,30 +426,30 @@ public:
|
||||||
{
|
{
|
||||||
struct
|
struct
|
||||||
{
|
{
|
||||||
uint32 wr : 1;
|
u32 wr : 1;
|
||||||
uint32 wg : 1;
|
u32 wg : 1;
|
||||||
uint32 wb : 1;
|
u32 wb : 1;
|
||||||
uint32 wa : 1;
|
u32 wa : 1;
|
||||||
|
|
||||||
uint32 _free : 28;
|
u32 _free : 28;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct
|
struct
|
||||||
{
|
{
|
||||||
uint32 wrgba : 4;
|
u32 wrgba : 4;
|
||||||
};
|
};
|
||||||
|
|
||||||
uint32 key;
|
u32 key;
|
||||||
};
|
};
|
||||||
|
|
||||||
// FIXME is the & useful ?
|
// FIXME is the & useful ?
|
||||||
operator uint32() { return key & 0xf; }
|
operator u32() { return key & 0xf; }
|
||||||
|
|
||||||
OMColorMaskSelector()
|
OMColorMaskSelector()
|
||||||
: key(0xF)
|
: key(0xF)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
OMColorMaskSelector(uint32 c) { wrgba = c; }
|
OMColorMaskSelector(u32 c) { wrgba = c; }
|
||||||
};
|
};
|
||||||
|
|
||||||
struct alignas(32) MiscConstantBuffer
|
struct alignas(32) MiscConstantBuffer
|
||||||
|
@ -539,7 +539,7 @@ private:
|
||||||
|
|
||||||
struct
|
struct
|
||||||
{
|
{
|
||||||
uint16 last_query;
|
u16 last_query;
|
||||||
GLuint timer_query[1 << 16];
|
GLuint timer_query[1 << 16];
|
||||||
|
|
||||||
GLuint timer() { return timer_query[last_query]; }
|
GLuint timer() { return timer_query[last_query]; }
|
||||||
|
@ -549,7 +549,7 @@ private:
|
||||||
GLuint m_gs[1 << 3];
|
GLuint m_gs[1 << 3];
|
||||||
GLuint m_ps_ss[1 << 7];
|
GLuint m_ps_ss[1 << 7];
|
||||||
GSDepthStencilOGL* m_om_dss[1 << 5];
|
GSDepthStencilOGL* m_om_dss[1 << 5];
|
||||||
std::unordered_map<uint64, GLuint> m_ps;
|
std::unordered_map<u64, GLuint> m_ps;
|
||||||
GLuint m_apitrace;
|
GLuint m_apitrace;
|
||||||
|
|
||||||
GLuint m_palette_ss;
|
GLuint m_palette_ss;
|
||||||
|
@ -574,7 +574,7 @@ private:
|
||||||
void OMAttachDs(GSTextureOGL* ds = NULL);
|
void OMAttachDs(GSTextureOGL* ds = NULL);
|
||||||
void OMSetFBO(GLuint fbo);
|
void OMSetFBO(GLuint fbo);
|
||||||
|
|
||||||
uint16 ConvertBlendEnum(uint16 generic) final;
|
u16 ConvertBlendEnum(u16 generic) final;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
GSShaderOGL* m_shader;
|
GSShaderOGL* m_shader;
|
||||||
|
@ -597,9 +597,9 @@ public:
|
||||||
void DrawIndexedPrimitive(int offset, int count) final;
|
void DrawIndexedPrimitive(int offset, int count) final;
|
||||||
|
|
||||||
void ClearRenderTarget(GSTexture* t, const GSVector4& c) final;
|
void ClearRenderTarget(GSTexture* t, const GSVector4& c) final;
|
||||||
void ClearRenderTarget(GSTexture* t, uint32 c) final;
|
void ClearRenderTarget(GSTexture* t, u32 c) final;
|
||||||
void ClearDepth(GSTexture* t) final;
|
void ClearDepth(GSTexture* t) final;
|
||||||
void ClearStencil(GSTexture* t, uint8 c) final;
|
void ClearStencil(GSTexture* t, u8 c) final;
|
||||||
|
|
||||||
void InitPrimDateTexture(GSTexture* rt, const GSVector4i& area);
|
void InitPrimDateTexture(GSTexture* rt, const GSVector4i& area);
|
||||||
void RecycleDateTexture();
|
void RecycleDateTexture();
|
||||||
|
@ -624,7 +624,7 @@ public:
|
||||||
void PSSetSamplerState(GLuint ss);
|
void PSSetSamplerState(GLuint ss);
|
||||||
|
|
||||||
void OMSetDepthStencilState(GSDepthStencilOGL* dss);
|
void OMSetDepthStencilState(GSDepthStencilOGL* dss);
|
||||||
void OMSetBlendState(uint8 blend_index = 0, uint8 blend_factor = 0, bool is_blend_constant = false, bool accumulation_blend = false);
|
void OMSetBlendState(u8 blend_index = 0, u8 blend_factor = 0, bool is_blend_constant = false, bool accumulation_blend = false);
|
||||||
void OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i* scissor = NULL) final;
|
void OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i* scissor = NULL) final;
|
||||||
void OMSetColorMaskState(OMColorMaskSelector sel = OMColorMaskSelector());
|
void OMSetColorMaskState(OMColorMaskSelector sel = OMColorMaskSelector());
|
||||||
|
|
||||||
|
|
|
@ -131,8 +131,8 @@ void GSRendererOGL::EmulateZbuffer()
|
||||||
|
|
||||||
// On the real GS we appear to do clamping on the max z value the format allows.
|
// On the real GS we appear to do clamping on the max z value the format allows.
|
||||||
// Clamping is done after rasterization.
|
// Clamping is done after rasterization.
|
||||||
const uint32 max_z = 0xFFFFFFFF >> (GSLocalMemory::m_psm[m_context->ZBUF.PSM].fmt * 8);
|
const u32 max_z = 0xFFFFFFFF >> (GSLocalMemory::m_psm[m_context->ZBUF.PSM].fmt * 8);
|
||||||
const bool clamp_z = (uint32)(GSVector4i(m_vt.m_max.p).z) > max_z;
|
const bool clamp_z = (u32)(GSVector4i(m_vt.m_max.p).z) > max_z;
|
||||||
|
|
||||||
vs_cb.MaxDepth = GSVector2i(0xFFFFFFFF);
|
vs_cb.MaxDepth = GSVector2i(0xFFFFFFFF);
|
||||||
//ps_cb.MaxDepth = GSVector4(0.0f, 0.0f, 0.0f, 1.0f);
|
//ps_cb.MaxDepth = GSVector4(0.0f, 0.0f, 0.0f, 1.0f);
|
||||||
|
@ -184,11 +184,11 @@ void GSRendererOGL::EmulateTextureShuffleAndFbmask()
|
||||||
|
|
||||||
// Please bang my head against the wall!
|
// Please bang my head against the wall!
|
||||||
// 1/ Reduce the frame mask to a 16 bit format
|
// 1/ Reduce the frame mask to a 16 bit format
|
||||||
const uint32& m = m_context->FRAME.FBMSK;
|
const u32& m = m_context->FRAME.FBMSK;
|
||||||
const uint32 fbmask = ((m >> 3) & 0x1F) | ((m >> 6) & 0x3E0) | ((m >> 9) & 0x7C00) | ((m >> 16) & 0x8000);
|
const u32 fbmask = ((m >> 3) & 0x1F) | ((m >> 6) & 0x3E0) | ((m >> 9) & 0x7C00) | ((m >> 16) & 0x8000);
|
||||||
// FIXME GSVector will be nice here
|
// FIXME GSVector will be nice here
|
||||||
const uint8 rg_mask = fbmask & 0xFF;
|
const u8 rg_mask = fbmask & 0xFF;
|
||||||
const uint8 ba_mask = (fbmask >> 8) & 0xFF;
|
const u8 ba_mask = (fbmask >> 8) & 0xFF;
|
||||||
m_om_csel.wrgba = 0;
|
m_om_csel.wrgba = 0;
|
||||||
|
|
||||||
// 2 Select the new mask (Please someone put SSE here)
|
// 2 Select the new mask (Please someone put SSE here)
|
||||||
|
@ -443,10 +443,10 @@ void GSRendererOGL::EmulateChannelShuffle(GSTexture** rt, const GSTextureCache::
|
||||||
// the rendered size of the framebuffer
|
// the rendered size of the framebuffer
|
||||||
|
|
||||||
GSVertex* s = &m_vertex.buff[0];
|
GSVertex* s = &m_vertex.buff[0];
|
||||||
s[0].XYZ.X = (uint16)(m_context->XYOFFSET.OFX + 0);
|
s[0].XYZ.X = (u16)(m_context->XYOFFSET.OFX + 0);
|
||||||
s[1].XYZ.X = (uint16)(m_context->XYOFFSET.OFX + 16384);
|
s[1].XYZ.X = (u16)(m_context->XYOFFSET.OFX + 16384);
|
||||||
s[0].XYZ.Y = (uint16)(m_context->XYOFFSET.OFY + 0);
|
s[0].XYZ.Y = (u16)(m_context->XYOFFSET.OFY + 0);
|
||||||
s[1].XYZ.Y = (uint16)(m_context->XYOFFSET.OFY + 16384);
|
s[1].XYZ.Y = (u16)(m_context->XYOFFSET.OFY + 16384);
|
||||||
|
|
||||||
m_vertex.head = m_vertex.tail = m_vertex.next = 2;
|
m_vertex.head = m_vertex.tail = m_vertex.next = 2;
|
||||||
m_index.tail = 2;
|
m_index.tail = 2;
|
||||||
|
@ -473,7 +473,7 @@ void GSRendererOGL::EmulateBlending(bool& DATE_GL42, bool& DATE_GL45)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Compute the blending equation to detect special case
|
// Compute the blending equation to detect special case
|
||||||
const uint8 blend_index = uint8(((ALPHA.A * 3 + ALPHA.B) * 3 + ALPHA.C) * 3 + ALPHA.D);
|
const u8 blend_index = u8(((ALPHA.A * 3 + ALPHA.B) * 3 + ALPHA.C) * 3 + ALPHA.D);
|
||||||
const int blend_flag = m_dev->GetBlendFlags(blend_index);
|
const int blend_flag = m_dev->GetBlendFlags(blend_index);
|
||||||
|
|
||||||
// SW Blend is (nearly) free. Let's use it.
|
// SW Blend is (nearly) free. Let's use it.
|
||||||
|
@ -627,7 +627,7 @@ void GSRendererOGL::EmulateBlending(bool& DATE_GL42, bool& DATE_GL45)
|
||||||
if (m_ps_sel.dfmt == 1 && ALPHA.C == 1)
|
if (m_ps_sel.dfmt == 1 && ALPHA.C == 1)
|
||||||
{
|
{
|
||||||
// 24 bits doesn't have an alpha channel so use 1.0f fix factor as equivalent
|
// 24 bits doesn't have an alpha channel so use 1.0f fix factor as equivalent
|
||||||
const uint8 hacked_blend_index = blend_index + 3; // +3 <=> +1 on C
|
const u8 hacked_blend_index = blend_index + 3; // +3 <=> +1 on C
|
||||||
dev->OMSetBlendState(hacked_blend_index, 128, true);
|
dev->OMSetBlendState(hacked_blend_index, 128, true);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -646,8 +646,8 @@ void GSRendererOGL::EmulateTextureSampler(const GSTextureCache::Source* tex)
|
||||||
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[tex->m_TEX0.PSM];
|
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[tex->m_TEX0.PSM];
|
||||||
const GSLocalMemory::psm_t& cpsm = psm.pal > 0 ? GSLocalMemory::m_psm[m_context->TEX0.CPSM] : psm;
|
const GSLocalMemory::psm_t& cpsm = psm.pal > 0 ? GSLocalMemory::m_psm[m_context->TEX0.CPSM] : psm;
|
||||||
|
|
||||||
const uint8 wms = m_context->CLAMP.WMS;
|
const u8 wms = m_context->CLAMP.WMS;
|
||||||
const uint8 wmt = m_context->CLAMP.WMT;
|
const u8 wmt = m_context->CLAMP.WMT;
|
||||||
const bool complex_wms_wmt = !!((wms | wmt) & 2);
|
const bool complex_wms_wmt = !!((wms | wmt) & 2);
|
||||||
|
|
||||||
const bool need_mipmap = IsMipMapDraw();
|
const bool need_mipmap = IsMipMapDraw();
|
||||||
|
@ -660,7 +660,7 @@ void GSRendererOGL::EmulateTextureSampler(const GSTextureCache::Source* tex)
|
||||||
switch (UserHacks_tri_filter)
|
switch (UserHacks_tri_filter)
|
||||||
{
|
{
|
||||||
case TriFiltering::Forced:
|
case TriFiltering::Forced:
|
||||||
trilinear = static_cast<uint8>(GS_MIN_FILTER::Linear_Mipmap_Linear);
|
trilinear = static_cast<u8>(GS_MIN_FILTER::Linear_Mipmap_Linear);
|
||||||
trilinear_auto = m_mipmap != 2;
|
trilinear_auto = m_mipmap != 2;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -1323,7 +1323,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
||||||
{
|
{
|
||||||
m_ps_sel.fog = 1;
|
m_ps_sel.fog = 1;
|
||||||
|
|
||||||
const GSVector4 fc = GSVector4::rgba32(m_env.FOGCOL.u32[0]);
|
const GSVector4 fc = GSVector4::rgba32(m_env.FOGCOL.U32[0]);
|
||||||
// Blend AREF to avoid to load a random value for alpha (dirty cache)
|
// Blend AREF to avoid to load a random value for alpha (dirty cache)
|
||||||
ps_cb.FogColor_AREF = fc.blend32<8>(ps_cb.FogColor_AREF);
|
ps_cb.FogColor_AREF = fc.blend32<8>(ps_cb.FogColor_AREF);
|
||||||
}
|
}
|
||||||
|
@ -1334,7 +1334,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
||||||
// pass to handle the depth based on the alpha test.
|
// pass to handle the depth based on the alpha test.
|
||||||
bool ate_RGBA_then_Z = false;
|
bool ate_RGBA_then_Z = false;
|
||||||
bool ate_RGB_then_ZA = false;
|
bool ate_RGB_then_ZA = false;
|
||||||
uint8 ps_atst = 0;
|
u8 ps_atst = 0;
|
||||||
if (ate_first_pass & ate_second_pass)
|
if (ate_first_pass & ate_second_pass)
|
||||||
{
|
{
|
||||||
GL_DBG("Complex Alpha Test");
|
GL_DBG("Complex Alpha Test");
|
||||||
|
@ -1405,7 +1405,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
||||||
// We need the palette to convert the depth to the correct alpha value.
|
// We need the palette to convert the depth to the correct alpha value.
|
||||||
if (!tex->m_palette)
|
if (!tex->m_palette)
|
||||||
{
|
{
|
||||||
const uint16 pal = GSLocalMemory::m_psm[tex->m_TEX0.PSM].pal;
|
const u16 pal = GSLocalMemory::m_psm[tex->m_TEX0.PSM].pal;
|
||||||
m_tc->AttachPaletteToSource(tex, pal, true);
|
m_tc->AttachPaletteToSource(tex, pal, true);
|
||||||
dev->PSSetShaderResource(1, tex->m_palette);
|
dev->PSSetShaderResource(1, tex->m_palette);
|
||||||
}
|
}
|
||||||
|
|
|
@ -63,7 +63,7 @@ GLuint GSShaderOGL::LinkPipeline(const std::string& pretty_print, GLuint vs, GLu
|
||||||
|
|
||||||
GLuint GSShaderOGL::LinkProgram(GLuint vs, GLuint gs, GLuint ps)
|
GLuint GSShaderOGL::LinkProgram(GLuint vs, GLuint gs, GLuint ps)
|
||||||
{
|
{
|
||||||
uint32 hash = ((vs ^ gs) << 24) ^ ps;
|
u32 hash = ((vs ^ gs) << 24) ^ ps;
|
||||||
auto it = m_program.find(hash);
|
auto it = m_program.find(hash);
|
||||||
if (it != m_program.end())
|
if (it != m_program.end())
|
||||||
return it->second;
|
return it->second;
|
||||||
|
|
|
@ -20,7 +20,7 @@
|
||||||
class GSShaderOGL
|
class GSShaderOGL
|
||||||
{
|
{
|
||||||
GLuint m_pipeline;
|
GLuint m_pipeline;
|
||||||
std::unordered_map<uint32, GLuint> m_program;
|
std::unordered_map<u32, GLuint> m_program;
|
||||||
const bool m_debug_shader;
|
const bool m_debug_shader;
|
||||||
|
|
||||||
std::vector<GLuint> m_shad_to_delete;
|
std::vector<GLuint> m_shad_to_delete;
|
||||||
|
|
|
@ -20,20 +20,20 @@
|
||||||
#include "GS/GSPng.h"
|
#include "GS/GSPng.h"
|
||||||
|
|
||||||
#ifdef ENABLE_OGL_DEBUG_MEM_BW
|
#ifdef ENABLE_OGL_DEBUG_MEM_BW
|
||||||
extern uint64 g_real_texture_upload_byte;
|
extern u64 g_real_texture_upload_byte;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// FIXME OGL4: investigate, only 1 unpack buffer always bound
|
// FIXME OGL4: investigate, only 1 unpack buffer always bound
|
||||||
namespace PboPool
|
namespace PboPool
|
||||||
{
|
{
|
||||||
|
|
||||||
const uint32 m_pbo_size = 64 * 1024 * 1024;
|
const u32 m_pbo_size = 64 * 1024 * 1024;
|
||||||
const uint32 m_seg_size = 16 * 1024 * 1024;
|
const u32 m_seg_size = 16 * 1024 * 1024;
|
||||||
|
|
||||||
GLuint m_buffer;
|
GLuint m_buffer;
|
||||||
uptr m_offset;
|
uptr m_offset;
|
||||||
char* m_map;
|
char* m_map;
|
||||||
uint32 m_size;
|
u32 m_size;
|
||||||
GLsync m_fence[m_pbo_size / m_seg_size];
|
GLsync m_fence[m_pbo_size / m_seg_size];
|
||||||
|
|
||||||
// Option for buffer storage
|
// Option for buffer storage
|
||||||
|
@ -61,7 +61,7 @@ namespace PboPool
|
||||||
UnbindPbo();
|
UnbindPbo();
|
||||||
}
|
}
|
||||||
|
|
||||||
char* Map(uint32 size)
|
char* Map(u32 size)
|
||||||
{
|
{
|
||||||
char* map;
|
char* map;
|
||||||
// Note: keep offset aligned for SSE/AVX
|
// Note: keep offset aligned for SSE/AVX
|
||||||
|
@ -113,8 +113,8 @@ namespace PboPool
|
||||||
|
|
||||||
void Sync()
|
void Sync()
|
||||||
{
|
{
|
||||||
uint32 segment_current = m_offset / m_seg_size;
|
u32 segment_current = m_offset / m_seg_size;
|
||||||
uint32 segment_next = (m_offset + m_size) / m_seg_size;
|
u32 segment_next = (m_offset + m_size) / m_seg_size;
|
||||||
|
|
||||||
if (segment_current != segment_next)
|
if (segment_current != segment_next)
|
||||||
{
|
{
|
||||||
|
@ -256,7 +256,7 @@ GSTextureOGL::GSTextureOGL(int type, int w, int h, int format, GLuint fbo_read,
|
||||||
return; // backbuffer isn't a real texture
|
return; // backbuffer isn't a real texture
|
||||||
case GSTexture::Offscreen:
|
case GSTexture::Offscreen:
|
||||||
// Offscreen is only used to read color. So it only requires 4B by pixel
|
// Offscreen is only used to read color. So it only requires 4B by pixel
|
||||||
m_local_buffer = (uint8*)_aligned_malloc(m_size.x * m_size.y * 4, 32);
|
m_local_buffer = (u8*)_aligned_malloc(m_size.x * m_size.y * 4, 32);
|
||||||
break;
|
break;
|
||||||
case GSTexture::Texture:
|
case GSTexture::Texture:
|
||||||
// Only 32 bits input texture will be supported for mipmap
|
// Only 32 bits input texture will be supported for mipmap
|
||||||
|
@ -391,8 +391,8 @@ bool GSTextureOGL::Update(const GSVector4i& r, const void* data, int pitch, int
|
||||||
|
|
||||||
m_clean = false;
|
m_clean = false;
|
||||||
|
|
||||||
uint32 row_byte = r.width() << m_int_shift;
|
u32 row_byte = r.width() << m_int_shift;
|
||||||
uint32 map_size = r.height() * row_byte;
|
u32 map_size = r.height() * row_byte;
|
||||||
#ifdef ENABLE_OGL_DEBUG_MEM_BW
|
#ifdef ENABLE_OGL_DEBUG_MEM_BW
|
||||||
g_real_texture_upload_byte += map_size;
|
g_real_texture_upload_byte += map_size;
|
||||||
#endif
|
#endif
|
||||||
|
@ -457,7 +457,7 @@ bool GSTextureOGL::Map(GSMap& m, const GSVector4i* _r, int layer)
|
||||||
ASSERT(r.width() != 0);
|
ASSERT(r.width() != 0);
|
||||||
ASSERT(r.height() != 0);
|
ASSERT(r.height() != 0);
|
||||||
|
|
||||||
uint32 row_byte = r.width() << m_int_shift;
|
u32 row_byte = r.width() << m_int_shift;
|
||||||
m.pitch = row_byte;
|
m.pitch = row_byte;
|
||||||
|
|
||||||
if (m_type == GSTexture::Offscreen)
|
if (m_type == GSTexture::Offscreen)
|
||||||
|
@ -495,9 +495,9 @@ bool GSTextureOGL::Map(GSMap& m, const GSVector4i* _r, int layer)
|
||||||
|
|
||||||
m_clean = false;
|
m_clean = false;
|
||||||
|
|
||||||
uint32 map_size = r.height() * row_byte;
|
u32 map_size = r.height() * row_byte;
|
||||||
|
|
||||||
m.bits = (uint8*)PboPool::Map(map_size);
|
m.bits = (u8*)PboPool::Map(map_size);
|
||||||
|
|
||||||
#ifdef ENABLE_OGL_DEBUG_MEM_BW
|
#ifdef ENABLE_OGL_DEBUG_MEM_BW
|
||||||
g_real_texture_upload_byte += map_size;
|
g_real_texture_upload_byte += map_size;
|
||||||
|
@ -586,9 +586,9 @@ void GSTextureOGL::CommitPages(const GSVector2i& region, bool commit)
|
||||||
bool GSTextureOGL::Save(const std::string& fn)
|
bool GSTextureOGL::Save(const std::string& fn)
|
||||||
{
|
{
|
||||||
// Collect the texture data
|
// Collect the texture data
|
||||||
uint32 pitch = 4 * m_committed_size.x;
|
u32 pitch = 4 * m_committed_size.x;
|
||||||
uint32 buf_size = pitch * m_committed_size.y * 2; // Note *2 for security (depth/stencil)
|
u32 buf_size = pitch * m_committed_size.y * 2; // Note *2 for security (depth/stencil)
|
||||||
std::unique_ptr<uint8[]> image(new uint8[buf_size]);
|
std::unique_ptr<u8[]> image(new u8[buf_size]);
|
||||||
#ifdef ENABLE_OGL_DEBUG
|
#ifdef ENABLE_OGL_DEBUG
|
||||||
GSPng::Format fmt = GSPng::RGB_A_PNG;
|
GSPng::Format fmt = GSPng::RGB_A_PNG;
|
||||||
#else
|
#else
|
||||||
|
@ -646,7 +646,7 @@ bool GSTextureOGL::Save(const std::string& fn)
|
||||||
return GSPng::Save(fmt, fn, image.get(), m_committed_size.x, m_committed_size.y, pitch, compression);
|
return GSPng::Save(fmt, fn, image.get(), m_committed_size.x, m_committed_size.y, pitch, compression);
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32 GSTextureOGL::GetMemUsage()
|
u32 GSTextureOGL::GetMemUsage()
|
||||||
{
|
{
|
||||||
return m_mem_usage;
|
return m_mem_usage;
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,7 +24,7 @@ namespace PboPool
|
||||||
inline void UnbindPbo();
|
inline void UnbindPbo();
|
||||||
inline void Sync();
|
inline void Sync();
|
||||||
|
|
||||||
inline char* Map(uint32 size);
|
inline char* Map(u32 size);
|
||||||
inline void Unmap();
|
inline void Unmap();
|
||||||
inline uptr Offset();
|
inline uptr Offset();
|
||||||
inline void EndTransfer();
|
inline void EndTransfer();
|
||||||
|
@ -41,7 +41,7 @@ private:
|
||||||
bool m_clean;
|
bool m_clean;
|
||||||
bool m_generate_mipmap;
|
bool m_generate_mipmap;
|
||||||
|
|
||||||
uint8* m_local_buffer;
|
u8* m_local_buffer;
|
||||||
// Avoid alignment constrain
|
// Avoid alignment constrain
|
||||||
//GSVector4i m_r;
|
//GSVector4i m_r;
|
||||||
int m_r_x;
|
int m_r_x;
|
||||||
|
@ -54,10 +54,10 @@ private:
|
||||||
// internal opengl format/type/alignment
|
// internal opengl format/type/alignment
|
||||||
GLenum m_int_format;
|
GLenum m_int_format;
|
||||||
GLenum m_int_type;
|
GLenum m_int_type;
|
||||||
uint32 m_int_shift;
|
u32 m_int_shift;
|
||||||
|
|
||||||
// Allow to track size of allocated memory
|
// Allow to track size of allocated memory
|
||||||
uint32 m_mem_usage;
|
u32 m_mem_usage;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
explicit GSTextureOGL(int type, int w, int h, int format, GLuint fbo_read, bool mipmap);
|
explicit GSTextureOGL(int type, int w, int h, int format, GLuint fbo_read, bool mipmap);
|
||||||
|
@ -72,7 +72,7 @@ public:
|
||||||
bool IsBackbuffer() { return (m_type == GSTexture::Backbuffer); }
|
bool IsBackbuffer() { return (m_type == GSTexture::Backbuffer); }
|
||||||
bool IsDss() { return (m_type == GSTexture::DepthStencil || m_type == GSTexture::SparseDepthStencil); }
|
bool IsDss() { return (m_type == GSTexture::DepthStencil || m_type == GSTexture::SparseDepthStencil); }
|
||||||
|
|
||||||
uint32 GetID() final { return m_texture_id; }
|
u32 GetID() final { return m_texture_id; }
|
||||||
bool HasBeenCleaned() { return m_clean; }
|
bool HasBeenCleaned() { return m_clean; }
|
||||||
void WasAttached() { m_clean = false; }
|
void WasAttached() { m_clean = false; }
|
||||||
void WasCleaned() { m_clean = true; }
|
void WasCleaned() { m_clean = true; }
|
||||||
|
@ -82,5 +82,5 @@ public:
|
||||||
|
|
||||||
void CommitPages(const GSVector2i& region, bool commit) final;
|
void CommitPages(const GSVector2i& region, bool commit) final;
|
||||||
|
|
||||||
uint32 GetMemUsage();
|
u32 GetMemUsage();
|
||||||
};
|
};
|
||||||
|
|
|
@ -18,7 +18,7 @@
|
||||||
#include "GLState.h"
|
#include "GLState.h"
|
||||||
|
|
||||||
#ifdef ENABLE_OGL_DEBUG_MEM_BW
|
#ifdef ENABLE_OGL_DEBUG_MEM_BW
|
||||||
extern uint64 g_uniform_upload_byte;
|
extern u64 g_uniform_upload_byte;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
@ -26,11 +26,11 @@ class GSUniformBufferOGL
|
||||||
{
|
{
|
||||||
GLuint m_buffer; // data object
|
GLuint m_buffer; // data object
|
||||||
GLuint m_index; // GLSL slot
|
GLuint m_index; // GLSL slot
|
||||||
uint32 m_size; // size of the data
|
u32 m_size; // size of the data
|
||||||
uint8* m_cache; // content of the previous upload
|
u8* m_cache; // content of the previous upload
|
||||||
|
|
||||||
public:
|
public:
|
||||||
GSUniformBufferOGL(const std::string& pretty_name, GLuint index, uint32 size)
|
GSUniformBufferOGL(const std::string& pretty_name, GLuint index, u32 size)
|
||||||
: m_index(index), m_size(size)
|
: m_index(index), m_size(size)
|
||||||
{
|
{
|
||||||
glGenBuffers(1, &m_buffer);
|
glGenBuffers(1, &m_buffer);
|
||||||
|
@ -38,7 +38,7 @@ public:
|
||||||
glObjectLabel(GL_BUFFER, m_buffer, pretty_name.size(), pretty_name.c_str());
|
glObjectLabel(GL_BUFFER, m_buffer, pretty_name.size(), pretty_name.c_str());
|
||||||
allocate();
|
allocate();
|
||||||
attach();
|
attach();
|
||||||
m_cache = (uint8*)_aligned_malloc(m_size, 32);
|
m_cache = (u8*)_aligned_malloc(m_size, 32);
|
||||||
memset(m_cache, 0, m_size);
|
memset(m_cache, 0, m_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -103,14 +103,14 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data)
|
||||||
m_sp = m_sp_map[sel];
|
m_sp = m_sp_map[sel];
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSDrawScanline::EndDraw(uint64 frame, uint64 ticks, int actual, int total, int prims)
|
void GSDrawScanline::EndDraw(u64 frame, u64 ticks, int actual, int total, int prims)
|
||||||
{
|
{
|
||||||
m_ds_map.UpdateStats(frame, ticks, actual, total, prims);
|
m_ds_map.UpdateStats(frame, ticks, actual, total, prims);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef ENABLE_JIT_RASTERIZER
|
#ifndef ENABLE_JIT_RASTERIZER
|
||||||
|
|
||||||
void GSDrawScanline::SetupPrim(const GSVertexSW* vertex, const uint32* index, const GSVertexSW& dscan)
|
void GSDrawScanline::SetupPrim(const GSVertexSW* vertex, const u32* index, const GSVertexSW& dscan)
|
||||||
{
|
{
|
||||||
GSScanlineSelector sel = m_global.sel;
|
GSScanlineSelector sel = m_global.sel;
|
||||||
|
|
||||||
|
@ -162,7 +162,7 @@ void GSDrawScanline::SetupPrim(const GSVertexSW* vertex, const uint32* index, co
|
||||||
|
|
||||||
if (has_z)
|
if (has_z)
|
||||||
{
|
{
|
||||||
m_local.p.z = vertex[index[1]].t.u32[3]; // uint32 z is bypassed in t.w
|
m_local.p.z = vertex[index[1]].t.U32[3]; // u32 z is bypassed in t.w
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -315,7 +315,7 @@ void GSDrawScanline::SetupPrim(const GSVertexSW* vertex, const uint32* index, co
|
||||||
|
|
||||||
if (has_z)
|
if (has_z)
|
||||||
{
|
{
|
||||||
m_local.p.z = vertex[index[1]].t.u32[3]; // uint32 z is bypassed in t.w
|
m_local.p.z = vertex[index[1]].t.U32[3]; // u32 z is bypassed in t.w
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -564,8 +564,8 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
||||||
if (sel.ztest)
|
if (sel.ztest)
|
||||||
{
|
{
|
||||||
zd = GSVector8i::load(
|
zd = GSVector8i::load(
|
||||||
(uint8*)m_global.vm + za * 2, (uint8*)m_global.vm + za * 2 + 16,
|
(u8*)m_global.vm + za * 2, (u8*)m_global.vm + za * 2 + 16,
|
||||||
(uint8*)m_global.vm + za * 2 + 32, (uint8*)m_global.vm + za * 2 + 48);
|
(u8*)m_global.vm + za * 2 + 32, (u8*)m_global.vm + za * 2 + 48);
|
||||||
|
|
||||||
switch (sel.zpsm)
|
switch (sel.zpsm)
|
||||||
{
|
{
|
||||||
|
@ -717,24 +717,24 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
||||||
{
|
{
|
||||||
for (int i = 0; i < 8; i++)
|
for (int i = 0; i < 8; i++)
|
||||||
{
|
{
|
||||||
const uint8* tex = (const uint8*)m_global.tex[lodi.u32[i]];
|
const u8* tex = (const u8*)m_global.tex[lodi.U32[i]];
|
||||||
|
|
||||||
c00.u32[i] = m_global.clut[tex[addr00.u32[i]]];
|
c00.U32[i] = m_global.clut[tex[addr00.U32[i]]];
|
||||||
c01.u32[i] = m_global.clut[tex[addr01.u32[i]]];
|
c01.U32[i] = m_global.clut[tex[addr01.U32[i]]];
|
||||||
c10.u32[i] = m_global.clut[tex[addr10.u32[i]]];
|
c10.U32[i] = m_global.clut[tex[addr10.U32[i]]];
|
||||||
c11.u32[i] = m_global.clut[tex[addr11.u32[i]]];
|
c11.U32[i] = m_global.clut[tex[addr11.U32[i]]];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
for (int i = 0; i < 8; i++)
|
for (int i = 0; i < 8; i++)
|
||||||
{
|
{
|
||||||
const uint32* tex = (const uint32*)m_global.tex[lodi.u32[i]];
|
const u32* tex = (const u32*)m_global.tex[lodi.U32[i]];
|
||||||
|
|
||||||
c00.u32[i] = tex[addr00.u32[i]];
|
c00.U32[i] = tex[addr00.U32[i]];
|
||||||
c01.u32[i] = tex[addr01.u32[i]];
|
c01.U32[i] = tex[addr01.U32[i]];
|
||||||
c10.u32[i] = tex[addr10.u32[i]];
|
c10.U32[i] = tex[addr10.U32[i]];
|
||||||
c11.u32[i] = tex[addr11.u32[i]];
|
c11.U32[i] = tex[addr11.U32[i]];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -765,14 +765,14 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
||||||
{
|
{
|
||||||
for (int i = 0; i < 8; i++)
|
for (int i = 0; i < 8; i++)
|
||||||
{
|
{
|
||||||
c00.u32[i] = m_global.clut[((const uint8*)m_global.tex[lodi.u32[i]])[addr00.u32[i]]];
|
c00.U32[i] = m_global.clut[((const u8*)m_global.tex[lodi.U32[i]])[addr00.U32[i]]];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
for (int i = 0; i < 8; i++)
|
for (int i = 0; i < 8; i++)
|
||||||
{
|
{
|
||||||
c00.u32[i] = ((const uint32*)m_global.tex[lodi.u32[i]])[addr00.u32[i]];
|
c00.U32[i] = ((const u32*)m_global.tex[lodi.U32[i]])[addr00.U32[i]];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -838,24 +838,24 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
||||||
{
|
{
|
||||||
for (int i = 0; i < 8; i++)
|
for (int i = 0; i < 8; i++)
|
||||||
{
|
{
|
||||||
const uint8* tex = (const uint8*)m_global.tex[lodi.u32[i]];
|
const u8* tex = (const u8*)m_global.tex[lodi.U32[i]];
|
||||||
|
|
||||||
c00.u32[i] = m_global.clut[tex[addr00.u32[i]]];
|
c00.U32[i] = m_global.clut[tex[addr00.U32[i]]];
|
||||||
c01.u32[i] = m_global.clut[tex[addr01.u32[i]]];
|
c01.U32[i] = m_global.clut[tex[addr01.U32[i]]];
|
||||||
c10.u32[i] = m_global.clut[tex[addr10.u32[i]]];
|
c10.U32[i] = m_global.clut[tex[addr10.U32[i]]];
|
||||||
c11.u32[i] = m_global.clut[tex[addr11.u32[i]]];
|
c11.U32[i] = m_global.clut[tex[addr11.U32[i]]];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
for (int i = 0; i < 8; i++)
|
for (int i = 0; i < 8; i++)
|
||||||
{
|
{
|
||||||
const uint32* tex = (const uint32*)m_global.tex[lodi.u32[i]];
|
const u32* tex = (const u32*)m_global.tex[lodi.U32[i]];
|
||||||
|
|
||||||
c00.u32[i] = tex[addr00.u32[i]];
|
c00.U32[i] = tex[addr00.U32[i]];
|
||||||
c01.u32[i] = tex[addr01.u32[i]];
|
c01.U32[i] = tex[addr01.U32[i]];
|
||||||
c10.u32[i] = tex[addr10.u32[i]];
|
c10.U32[i] = tex[addr10.U32[i]];
|
||||||
c11.u32[i] = tex[addr11.u32[i]];
|
c11.U32[i] = tex[addr11.U32[i]];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -886,14 +886,14 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
||||||
{
|
{
|
||||||
for (int i = 0; i < 8; i++)
|
for (int i = 0; i < 8; i++)
|
||||||
{
|
{
|
||||||
c00.u32[i] = m_global.clut[((const uint8*)m_global.tex[lodi.u32[i]])[addr00.u32[i]]];
|
c00.U32[i] = m_global.clut[((const u8*)m_global.tex[lodi.U32[i]])[addr00.U32[i]]];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
for (int i = 0; i < 8; i++)
|
for (int i = 0; i < 8; i++)
|
||||||
{
|
{
|
||||||
c00.u32[i] = ((const uint32*)m_global.tex[lodi.u32[i]])[addr00.u32[i]];
|
c00.U32[i] = ((const u32*)m_global.tex[lodi.U32[i]])[addr00.U32[i]];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -977,7 +977,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
||||||
|
|
||||||
if (sel.tlu)
|
if (sel.tlu)
|
||||||
{
|
{
|
||||||
const uint8* tex = (const uint8*)m_global.tex[0];
|
const u8* tex = (const u8*)m_global.tex[0];
|
||||||
|
|
||||||
c00 = addr00.gather32_32(tex, m_global.clut);
|
c00 = addr00.gather32_32(tex, m_global.clut);
|
||||||
c01 = addr01.gather32_32(tex, m_global.clut);
|
c01 = addr01.gather32_32(tex, m_global.clut);
|
||||||
|
@ -986,7 +986,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
const uint32* tex = (const uint32*)m_global.tex[0];
|
const u32* tex = (const u32*)m_global.tex[0];
|
||||||
|
|
||||||
c00 = addr00.gather32_32(tex);
|
c00 = addr00.gather32_32(tex);
|
||||||
c01 = addr01.gather32_32(tex);
|
c01 = addr01.gather32_32(tex);
|
||||||
|
@ -1019,11 +1019,11 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
||||||
|
|
||||||
if (sel.tlu)
|
if (sel.tlu)
|
||||||
{
|
{
|
||||||
c00 = addr00.gather32_32((const uint8*)m_global.tex[0], m_global.clut);
|
c00 = addr00.gather32_32((const u8*)m_global.tex[0], m_global.clut);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
c00 = addr00.gather32_32((const uint32*)m_global.tex[0]);
|
c00 = addr00.gather32_32((const u32*)m_global.tex[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
rb = c00.sll16(8).srl16(8);
|
rb = c00.sll16(8).srl16(8);
|
||||||
|
@ -1149,8 +1149,8 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
||||||
if (sel.rfb)
|
if (sel.rfb)
|
||||||
{
|
{
|
||||||
fd = GSVector8i::load(
|
fd = GSVector8i::load(
|
||||||
(uint8*)m_global.vm + fa * 2, (uint8*)m_global.vm + fa * 2 + 16,
|
(u8*)m_global.vm + fa * 2, (u8*)m_global.vm + fa * 2 + 16,
|
||||||
(uint8*)m_global.vm + fa * 2 + 32, (uint8*)m_global.vm + fa * 2 + 48);
|
(u8*)m_global.vm + fa * 2 + 32, (u8*)m_global.vm + fa * 2 + 48);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1234,10 +1234,10 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
||||||
{
|
{
|
||||||
if (fast)
|
if (fast)
|
||||||
{
|
{
|
||||||
GSVector4i::storel((uint8*)m_global.vm + za * 2, zs.extract<0>());
|
GSVector4i::storel((u8*)m_global.vm + za * 2, zs.extract<0>());
|
||||||
GSVector4i::storeh((uint8*)m_global.vm + za * 2 + 16, zs.extract<0>());
|
GSVector4i::storeh((u8*)m_global.vm + za * 2 + 16, zs.extract<0>());
|
||||||
GSVector4i::storel((uint8*)m_global.vm + za * 2 + 32, zs.extract<1>());
|
GSVector4i::storel((u8*)m_global.vm + za * 2 + 32, zs.extract<1>());
|
||||||
GSVector4i::storeh((uint8*)m_global.vm + za * 2 + 48, zs.extract<1>());
|
GSVector4i::storeh((u8*)m_global.vm + za * 2 + 48, zs.extract<1>());
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -1255,10 +1255,10 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
||||||
{
|
{
|
||||||
if (fast)
|
if (fast)
|
||||||
{
|
{
|
||||||
if (fzm & 0x00000f00) GSVector4i::storel((uint8*)m_global.vm + za * 2, zs.extract<0>());
|
if (fzm & 0x00000f00) GSVector4i::storel((u8*)m_global.vm + za * 2, zs.extract<0>());
|
||||||
if (fzm & 0x0000f000) GSVector4i::storeh((uint8*)m_global.vm + za * 2 + 16, zs.extract<0>());
|
if (fzm & 0x0000f000) GSVector4i::storeh((u8*)m_global.vm + za * 2 + 16, zs.extract<0>());
|
||||||
if (fzm & 0x0f000000) GSVector4i::storel((uint8*)m_global.vm + za * 2 + 32, zs.extract<1>());
|
if (fzm & 0x0f000000) GSVector4i::storel((u8*)m_global.vm + za * 2 + 32, zs.extract<1>());
|
||||||
if (fzm & 0xf0000000) GSVector4i::storeh((uint8*)m_global.vm + za * 2 + 48, zs.extract<1>());
|
if (fzm & 0xf0000000) GSVector4i::storeh((u8*)m_global.vm + za * 2 + 48, zs.extract<1>());
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -1443,10 +1443,10 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
||||||
{
|
{
|
||||||
if (fast)
|
if (fast)
|
||||||
{
|
{
|
||||||
GSVector4i::storel((uint8*)m_global.vm + fa * 2, fs.extract<0>());
|
GSVector4i::storel((u8*)m_global.vm + fa * 2, fs.extract<0>());
|
||||||
GSVector4i::storeh((uint8*)m_global.vm + fa * 2 + 16, fs.extract<0>());
|
GSVector4i::storeh((u8*)m_global.vm + fa * 2 + 16, fs.extract<0>());
|
||||||
GSVector4i::storel((uint8*)m_global.vm + fa * 2 + 32, fs.extract<1>());
|
GSVector4i::storel((u8*)m_global.vm + fa * 2 + 32, fs.extract<1>());
|
||||||
GSVector4i::storeh((uint8*)m_global.vm + fa * 2 + 48, fs.extract<1>());
|
GSVector4i::storeh((u8*)m_global.vm + fa * 2 + 48, fs.extract<1>());
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -1464,10 +1464,10 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
||||||
{
|
{
|
||||||
if (fast)
|
if (fast)
|
||||||
{
|
{
|
||||||
if (fzm & 0x0000000f) GSVector4i::storel((uint8*)m_global.vm + fa * 2, fs.extract<0>());
|
if (fzm & 0x0000000f) GSVector4i::storel((u8*)m_global.vm + fa * 2, fs.extract<0>());
|
||||||
if (fzm & 0x000000f0) GSVector4i::storeh((uint8*)m_global.vm + fa * 2 + 16, fs.extract<0>());
|
if (fzm & 0x000000f0) GSVector4i::storeh((u8*)m_global.vm + fa * 2 + 16, fs.extract<0>());
|
||||||
if (fzm & 0x000f0000) GSVector4i::storel((uint8*)m_global.vm + fa * 2 + 32, fs.extract<1>());
|
if (fzm & 0x000f0000) GSVector4i::storel((u8*)m_global.vm + fa * 2 + 32, fs.extract<1>());
|
||||||
if (fzm & 0x00f00000) GSVector4i::storeh((uint8*)m_global.vm + fa * 2 + 48, fs.extract<1>());
|
if (fzm & 0x00f00000) GSVector4i::storeh((u8*)m_global.vm + fa * 2 + 48, fs.extract<1>());
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -1688,7 +1688,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
||||||
|
|
||||||
if (sel.ztest)
|
if (sel.ztest)
|
||||||
{
|
{
|
||||||
zd = GSVector4i::load((uint8*)m_global.vm + za * 2, (uint8*)m_global.vm + za * 2 + 16);
|
zd = GSVector4i::load((u8*)m_global.vm + za * 2, (u8*)m_global.vm + za * 2 + 16);
|
||||||
|
|
||||||
switch (sel.zpsm)
|
switch (sel.zpsm)
|
||||||
{
|
{
|
||||||
|
@ -1710,10 +1710,10 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
||||||
{
|
{
|
||||||
const unsigned int z_max = 0xffffffff >> (sel.zpsm * 8);
|
const unsigned int z_max = 0xffffffff >> (sel.zpsm * 8);
|
||||||
|
|
||||||
zso.u32[0] = std::min(z_max, zso.u32[0]);
|
zso.U32[0] = std::min(z_max, zso.U32[0]);
|
||||||
zso.u32[1] = std::min(z_max, zso.u32[1]);
|
zso.U32[1] = std::min(z_max, zso.U32[1]);
|
||||||
zso.u32[2] = std::min(z_max, zso.u32[2]);
|
zso.U32[2] = std::min(z_max, zso.U32[2]);
|
||||||
zso.u32[3] = std::min(z_max, zso.u32[3]);
|
zso.U32[3] = std::min(z_max, zso.U32[3]);
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (sel.ztst)
|
switch (sel.ztst)
|
||||||
|
@ -1860,24 +1860,24 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
||||||
{
|
{
|
||||||
for (int i = 0; i < 4; i++)
|
for (int i = 0; i < 4; i++)
|
||||||
{
|
{
|
||||||
const uint8* tex = (const uint8*)m_global.tex[lodi.u32[i]];
|
const u8* tex = (const u8*)m_global.tex[lodi.U32[i]];
|
||||||
|
|
||||||
c00.u32[i] = m_global.clut[tex[addr00.u32[i]]];
|
c00.U32[i] = m_global.clut[tex[addr00.U32[i]]];
|
||||||
c01.u32[i] = m_global.clut[tex[addr01.u32[i]]];
|
c01.U32[i] = m_global.clut[tex[addr01.U32[i]]];
|
||||||
c10.u32[i] = m_global.clut[tex[addr10.u32[i]]];
|
c10.U32[i] = m_global.clut[tex[addr10.U32[i]]];
|
||||||
c11.u32[i] = m_global.clut[tex[addr11.u32[i]]];
|
c11.U32[i] = m_global.clut[tex[addr11.U32[i]]];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
for (int i = 0; i < 4; i++)
|
for (int i = 0; i < 4; i++)
|
||||||
{
|
{
|
||||||
const uint32* tex = (const uint32*)m_global.tex[lodi.u32[i]];
|
const u32* tex = (const u32*)m_global.tex[lodi.U32[i]];
|
||||||
|
|
||||||
c00.u32[i] = tex[addr00.u32[i]];
|
c00.U32[i] = tex[addr00.U32[i]];
|
||||||
c01.u32[i] = tex[addr01.u32[i]];
|
c01.U32[i] = tex[addr01.U32[i]];
|
||||||
c10.u32[i] = tex[addr10.u32[i]];
|
c10.U32[i] = tex[addr10.U32[i]];
|
||||||
c11.u32[i] = tex[addr11.u32[i]];
|
c11.U32[i] = tex[addr11.U32[i]];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1908,14 +1908,14 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
||||||
{
|
{
|
||||||
for (int i = 0; i < 4; i++)
|
for (int i = 0; i < 4; i++)
|
||||||
{
|
{
|
||||||
c00.u32[i] = m_global.clut[((const uint8*)m_global.tex[lodi.u32[i]])[addr00.u32[i]]];
|
c00.U32[i] = m_global.clut[((const u8*)m_global.tex[lodi.U32[i]])[addr00.U32[i]]];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
for (int i = 0; i < 4; i++)
|
for (int i = 0; i < 4; i++)
|
||||||
{
|
{
|
||||||
c00.u32[i] = ((const uint32*)m_global.tex[lodi.u32[i]])[addr00.u32[i]];
|
c00.U32[i] = ((const u32*)m_global.tex[lodi.U32[i]])[addr00.U32[i]];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1981,24 +1981,24 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
||||||
{
|
{
|
||||||
for (int i = 0; i < 4; i++)
|
for (int i = 0; i < 4; i++)
|
||||||
{
|
{
|
||||||
const uint8* tex = (const uint8*)m_global.tex[lodi.u32[i]];
|
const u8* tex = (const u8*)m_global.tex[lodi.U32[i]];
|
||||||
|
|
||||||
c00.u32[i] = m_global.clut[tex[addr00.u32[i]]];
|
c00.U32[i] = m_global.clut[tex[addr00.U32[i]]];
|
||||||
c01.u32[i] = m_global.clut[tex[addr01.u32[i]]];
|
c01.U32[i] = m_global.clut[tex[addr01.U32[i]]];
|
||||||
c10.u32[i] = m_global.clut[tex[addr10.u32[i]]];
|
c10.U32[i] = m_global.clut[tex[addr10.U32[i]]];
|
||||||
c11.u32[i] = m_global.clut[tex[addr11.u32[i]]];
|
c11.U32[i] = m_global.clut[tex[addr11.U32[i]]];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
for (int i = 0; i < 4; i++)
|
for (int i = 0; i < 4; i++)
|
||||||
{
|
{
|
||||||
const uint32* tex = (const uint32*)m_global.tex[lodi.u32[i]];
|
const u32* tex = (const u32*)m_global.tex[lodi.U32[i]];
|
||||||
|
|
||||||
c00.u32[i] = tex[addr00.u32[i]];
|
c00.U32[i] = tex[addr00.U32[i]];
|
||||||
c01.u32[i] = tex[addr01.u32[i]];
|
c01.U32[i] = tex[addr01.U32[i]];
|
||||||
c10.u32[i] = tex[addr10.u32[i]];
|
c10.U32[i] = tex[addr10.U32[i]];
|
||||||
c11.u32[i] = tex[addr11.u32[i]];
|
c11.U32[i] = tex[addr11.U32[i]];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2029,14 +2029,14 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
||||||
{
|
{
|
||||||
for (int i = 0; i < 4; i++)
|
for (int i = 0; i < 4; i++)
|
||||||
{
|
{
|
||||||
c00.u32[i] = m_global.clut[((const uint8*)m_global.tex[lodi.u32[i]])[addr00.u32[i]]];
|
c00.U32[i] = m_global.clut[((const u8*)m_global.tex[lodi.U32[i]])[addr00.U32[i]]];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
for (int i = 0; i < 4; i++)
|
for (int i = 0; i < 4; i++)
|
||||||
{
|
{
|
||||||
c00.u32[i] = ((const uint32*)m_global.tex[lodi.u32[i]])[addr00.u32[i]];
|
c00.U32[i] = ((const u32*)m_global.tex[lodi.U32[i]])[addr00.U32[i]];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2117,7 +2117,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
||||||
|
|
||||||
if (sel.tlu)
|
if (sel.tlu)
|
||||||
{
|
{
|
||||||
const uint8* tex = (const uint8*)m_global.tex[0];
|
const u8* tex = (const u8*)m_global.tex[0];
|
||||||
|
|
||||||
c00 = addr00.gather32_32(tex, m_global.clut);
|
c00 = addr00.gather32_32(tex, m_global.clut);
|
||||||
c01 = addr01.gather32_32(tex, m_global.clut);
|
c01 = addr01.gather32_32(tex, m_global.clut);
|
||||||
|
@ -2126,7 +2126,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
const uint32* tex = (const uint32*)m_global.tex[0];
|
const u32* tex = (const u32*)m_global.tex[0];
|
||||||
|
|
||||||
c00 = addr00.gather32_32(tex);
|
c00 = addr00.gather32_32(tex);
|
||||||
c01 = addr01.gather32_32(tex);
|
c01 = addr01.gather32_32(tex);
|
||||||
|
@ -2159,11 +2159,11 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
||||||
|
|
||||||
if (sel.tlu)
|
if (sel.tlu)
|
||||||
{
|
{
|
||||||
c00 = addr00.gather32_32((const uint8*)m_global.tex[0], m_global.clut);
|
c00 = addr00.gather32_32((const u8*)m_global.tex[0], m_global.clut);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
c00 = addr00.gather32_32((const uint32*)m_global.tex[0]);
|
c00 = addr00.gather32_32((const u32*)m_global.tex[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
rb = c00.sll16(8).srl16(8);
|
rb = c00.sll16(8).srl16(8);
|
||||||
|
@ -2285,7 +2285,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
||||||
|
|
||||||
if (sel.rfb)
|
if (sel.rfb)
|
||||||
{
|
{
|
||||||
fd = GSVector4i::load((uint8*)m_global.vm + fa * 2, (uint8*)m_global.vm + fa * 2 + 16);
|
fd = GSVector4i::load((u8*)m_global.vm + fa * 2, (u8*)m_global.vm + fa * 2 + 16);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2364,10 +2364,10 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
||||||
{
|
{
|
||||||
const unsigned int z_max = 0xffffffff >> (sel.zpsm * 8);
|
const unsigned int z_max = 0xffffffff >> (sel.zpsm * 8);
|
||||||
|
|
||||||
zs.u32[0] = std::min(z_max, zs.u32[0]);
|
zs.U32[0] = std::min(z_max, zs.U32[0]);
|
||||||
zs.u32[1] = std::min(z_max, zs.u32[1]);
|
zs.U32[1] = std::min(z_max, zs.U32[1]);
|
||||||
zs.u32[2] = std::min(z_max, zs.u32[2]);
|
zs.U32[2] = std::min(z_max, zs.U32[2]);
|
||||||
zs.u32[3] = std::min(z_max, zs.u32[3]);
|
zs.U32[3] = std::min(z_max, zs.U32[3]);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool fast = sel.ztest ? sel.zpsm < 2 : sel.zpsm == 0 && sel.notest;
|
bool fast = sel.ztest ? sel.zpsm < 2 : sel.zpsm == 0 && sel.notest;
|
||||||
|
@ -2376,8 +2376,8 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
||||||
{
|
{
|
||||||
if (fast)
|
if (fast)
|
||||||
{
|
{
|
||||||
GSVector4i::storel((uint8*)m_global.vm + za * 2, zs);
|
GSVector4i::storel((u8*)m_global.vm + za * 2, zs);
|
||||||
GSVector4i::storeh((uint8*)m_global.vm + za * 2 + 16, zs);
|
GSVector4i::storeh((u8*)m_global.vm + za * 2 + 16, zs);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -2391,8 +2391,8 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
||||||
{
|
{
|
||||||
if (fast)
|
if (fast)
|
||||||
{
|
{
|
||||||
if (fzm & 0x0f00) GSVector4i::storel((uint8*)m_global.vm + za * 2, zs);
|
if (fzm & 0x0f00) GSVector4i::storel((u8*)m_global.vm + za * 2, zs);
|
||||||
if (fzm & 0xf000) GSVector4i::storeh((uint8*)m_global.vm + za * 2 + 16, zs);
|
if (fzm & 0xf000) GSVector4i::storeh((u8*)m_global.vm + za * 2 + 16, zs);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -2573,8 +2573,8 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
||||||
{
|
{
|
||||||
if (fast)
|
if (fast)
|
||||||
{
|
{
|
||||||
GSVector4i::storel((uint8*)m_global.vm + fa * 2, fs);
|
GSVector4i::storel((u8*)m_global.vm + fa * 2, fs);
|
||||||
GSVector4i::storeh((uint8*)m_global.vm + fa * 2 + 16, fs);
|
GSVector4i::storeh((u8*)m_global.vm + fa * 2 + 16, fs);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -2588,8 +2588,8 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
||||||
{
|
{
|
||||||
if (fast)
|
if (fast)
|
||||||
{
|
{
|
||||||
if (fzm & 0x000f) GSVector4i::storel((uint8*)m_global.vm + fa * 2, fs);
|
if (fzm & 0x000f) GSVector4i::storel((u8*)m_global.vm + fa * 2, fs);
|
||||||
if (fzm & 0x00f0) GSVector4i::storeh((uint8*)m_global.vm + fa * 2 + 16, fs);
|
if (fzm & 0x00f0) GSVector4i::storeh((u8*)m_global.vm + fa * 2 + 16, fs);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -2675,8 +2675,8 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
||||||
|
|
||||||
void GSDrawScanline::DrawEdge(int pixels, int left, int top, const GSVertexSW& scan)
|
void GSDrawScanline::DrawEdge(int pixels, int left, int top, const GSVertexSW& scan)
|
||||||
{
|
{
|
||||||
uint32 zwrite = m_global.sel.zwrite;
|
u32 zwrite = m_global.sel.zwrite;
|
||||||
uint32 edge = m_global.sel.edge;
|
u32 edge = m_global.sel.edge;
|
||||||
|
|
||||||
m_global.sel.zwrite = 0;
|
m_global.sel.zwrite = 0;
|
||||||
m_global.sel.edge = 1;
|
m_global.sel.edge = 1;
|
||||||
|
@ -2774,20 +2774,20 @@ bool GSDrawScanline::TestAlpha(T& test, T& fm, T& zm, const T& ga)
|
||||||
static const int s_offsets[] = {0, 2, 8, 10, 16, 18, 24, 26}; // columnTable16[0]
|
static const int s_offsets[] = {0, 2, 8, 10, 16, 18, 24, 26}; // columnTable16[0]
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
void GSDrawScanline::WritePixel(const T& src, int addr, int i, uint32 psm)
|
void GSDrawScanline::WritePixel(const T& src, int addr, int i, u32 psm)
|
||||||
{
|
{
|
||||||
uint8* dst = (uint8*)m_global.vm + addr * 2 + s_offsets[i] * 2;
|
u8* dst = (u8*)m_global.vm + addr * 2 + s_offsets[i] * 2;
|
||||||
|
|
||||||
switch (psm)
|
switch (psm)
|
||||||
{
|
{
|
||||||
case 0:
|
case 0:
|
||||||
*(uint32*)dst = src.u32[i];
|
*(u32*)dst = src.U32[i];
|
||||||
break;
|
break;
|
||||||
case 1:
|
case 1:
|
||||||
*(uint32*)dst = (src.u32[i] & 0xffffff) | (*(uint32*)dst & 0xff000000);
|
*(u32*)dst = (src.U32[i] & 0xffffff) | (*(u32*)dst & 0xff000000);
|
||||||
break;
|
break;
|
||||||
case 2:
|
case 2:
|
||||||
*(uint16*)dst = src.u16[i * 2];
|
*(u16*)dst = src.u16[i * 2];
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2801,38 +2801,38 @@ void GSDrawScanline::DrawRect(const GSVector4i& r, const GSVertexSW& v)
|
||||||
|
|
||||||
// FIXME: sometimes the frame and z buffer may overlap, the outcome is undefined
|
// FIXME: sometimes the frame and z buffer may overlap, the outcome is undefined
|
||||||
|
|
||||||
uint32 m;
|
u32 m;
|
||||||
|
|
||||||
#if _M_SSE >= 0x501
|
#if _M_SSE >= 0x501
|
||||||
m = m_global.zm;
|
m = m_global.zm;
|
||||||
#else
|
#else
|
||||||
m = m_global.zm.u32[0];
|
m = m_global.zm.U32[0];
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (m != 0xffffffff)
|
if (m != 0xffffffff)
|
||||||
{
|
{
|
||||||
uint32 z = v.t.u32[3]; // (uint32)v.p.z;
|
u32 z = v.t.U32[3]; // (u32)v.p.z;
|
||||||
|
|
||||||
if (m_global.sel.zpsm != 2)
|
if (m_global.sel.zpsm != 2)
|
||||||
{
|
{
|
||||||
if (m == 0)
|
if (m == 0)
|
||||||
{
|
{
|
||||||
DrawRectT<uint32, false>(m_global.zbo, r, z, m);
|
DrawRectT<u32, false>(m_global.zbo, r, z, m);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
DrawRectT<uint32, true>(m_global.zbo, r, z, m);
|
DrawRectT<u32, true>(m_global.zbo, r, z, m);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if ((m & 0xffff) == 0)
|
if ((m & 0xffff) == 0)
|
||||||
{
|
{
|
||||||
DrawRectT<uint16, false>(m_global.zbo, r, z, m);
|
DrawRectT<u16, false>(m_global.zbo, r, z, m);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
DrawRectT<uint16, true>(m_global.zbo, r, z, m);
|
DrawRectT<u16, true>(m_global.zbo, r, z, m);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2840,12 +2840,12 @@ void GSDrawScanline::DrawRect(const GSVector4i& r, const GSVertexSW& v)
|
||||||
#if _M_SSE >= 0x501
|
#if _M_SSE >= 0x501
|
||||||
m = m_global.fm;
|
m = m_global.fm;
|
||||||
#else
|
#else
|
||||||
m = m_global.fm.u32[0];
|
m = m_global.fm.U32[0];
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (m != 0xffffffff)
|
if (m != 0xffffffff)
|
||||||
{
|
{
|
||||||
uint32 c = (GSVector4i(v.c) >> 7).rgba32();
|
u32 c = (GSVector4i(v.c) >> 7).rgba32();
|
||||||
|
|
||||||
if (m_global.sel.fba)
|
if (m_global.sel.fba)
|
||||||
{
|
{
|
||||||
|
@ -2856,11 +2856,11 @@ void GSDrawScanline::DrawRect(const GSVector4i& r, const GSVertexSW& v)
|
||||||
{
|
{
|
||||||
if (m == 0)
|
if (m == 0)
|
||||||
{
|
{
|
||||||
DrawRectT<uint32, false>(m_global.fbo, r, c, m);
|
DrawRectT<u32, false>(m_global.fbo, r, c, m);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
DrawRectT<uint32, true>(m_global.fbo, r, c, m);
|
DrawRectT<u32, true>(m_global.fbo, r, c, m);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -2869,18 +2869,18 @@ void GSDrawScanline::DrawRect(const GSVector4i& r, const GSVertexSW& v)
|
||||||
|
|
||||||
if ((m & 0xffff) == 0)
|
if ((m & 0xffff) == 0)
|
||||||
{
|
{
|
||||||
DrawRectT<uint16, false>(m_global.fbo, r, c, m);
|
DrawRectT<u16, false>(m_global.fbo, r, c, m);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
DrawRectT<uint16, true>(m_global.fbo, r, c, m);
|
DrawRectT<u16, true>(m_global.fbo, r, c, m);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class T, bool masked>
|
template <class T, bool masked>
|
||||||
void GSDrawScanline::DrawRectT(const GSOffset& off, const GSVector4i& r, uint32 c, uint32 m)
|
void GSDrawScanline::DrawRectT(const GSOffset& off, const GSVector4i& r, u32 c, u32 m)
|
||||||
{
|
{
|
||||||
if (m == 0xffffffff)
|
if (m == 0xffffffff)
|
||||||
return;
|
return;
|
||||||
|
@ -2897,7 +2897,7 @@ void GSDrawScanline::DrawRectT(const GSOffset& off, const GSVector4i& r, uint32
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (sizeof(T) == sizeof(uint16))
|
if (sizeof(T) == sizeof(u16))
|
||||||
{
|
{
|
||||||
color = color.xxzzlh();
|
color = color.xxzzlh();
|
||||||
mask = mask.xxzzlh();
|
mask = mask.xxzzlh();
|
||||||
|
@ -2909,7 +2909,7 @@ void GSDrawScanline::DrawRectT(const GSOffset& off, const GSVector4i& r, uint32
|
||||||
c = c & (~m);
|
c = c & (~m);
|
||||||
|
|
||||||
if (masked)
|
if (masked)
|
||||||
ASSERT(mask.u32[0] != 0);
|
ASSERT(mask.U32[0] != 0);
|
||||||
|
|
||||||
GSVector4i br = r.ralign<Align_Inside>(GSVector2i(8 * 4 / sizeof(T), 8));
|
GSVector4i br = r.ralign<Align_Inside>(GSVector2i(8 * 4 / sizeof(T), 8));
|
||||||
|
|
||||||
|
@ -2933,7 +2933,7 @@ void GSDrawScanline::DrawRectT(const GSOffset& off, const GSVector4i& r, uint32
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class T, bool masked>
|
template <class T, bool masked>
|
||||||
void GSDrawScanline::FillRect(const GSOffset& off, const GSVector4i& r, uint32 c, uint32 m)
|
void GSDrawScanline::FillRect(const GSOffset& off, const GSVector4i& r, u32 c, u32 m)
|
||||||
{
|
{
|
||||||
if (r.x >= r.z)
|
if (r.x >= r.z)
|
||||||
return;
|
return;
|
||||||
|
|
|
@ -34,14 +34,14 @@ protected:
|
||||||
GSScanlineGlobalData m_global;
|
GSScanlineGlobalData m_global;
|
||||||
GSScanlineLocalData m_local;
|
GSScanlineLocalData m_local;
|
||||||
|
|
||||||
GSCodeGeneratorFunctionMap<GSSetupPrimCodeGenerator, uint64, SetupPrimPtr> m_sp_map;
|
GSCodeGeneratorFunctionMap<GSSetupPrimCodeGenerator, u64, SetupPrimPtr> m_sp_map;
|
||||||
GSCodeGeneratorFunctionMap<GSDrawScanlineCodeGenerator, uint64, DrawScanlinePtr> m_ds_map;
|
GSCodeGeneratorFunctionMap<GSDrawScanlineCodeGenerator, u64, DrawScanlinePtr> m_ds_map;
|
||||||
|
|
||||||
template <class T, bool masked>
|
template <class T, bool masked>
|
||||||
void DrawRectT(const GSOffset& off, const GSVector4i& r, uint32 c, uint32 m);
|
void DrawRectT(const GSOffset& off, const GSVector4i& r, u32 c, u32 m);
|
||||||
|
|
||||||
template <class T, bool masked>
|
template <class T, bool masked>
|
||||||
__forceinline void FillRect(const GSOffset& off, const GSVector4i& r, uint32 c, uint32 m);
|
__forceinline void FillRect(const GSOffset& off, const GSVector4i& r, u32 c, u32 m);
|
||||||
|
|
||||||
#if _M_SSE >= 0x501
|
#if _M_SSE >= 0x501
|
||||||
|
|
||||||
|
@ -62,13 +62,13 @@ public:
|
||||||
// IDrawScanline
|
// IDrawScanline
|
||||||
|
|
||||||
void BeginDraw(const GSRasterizerData* data);
|
void BeginDraw(const GSRasterizerData* data);
|
||||||
void EndDraw(uint64 frame, uint64 ticks, int actual, int total, int prims);
|
void EndDraw(u64 frame, u64 ticks, int actual, int total, int prims);
|
||||||
|
|
||||||
void DrawRect(const GSVector4i& r, const GSVertexSW& v);
|
void DrawRect(const GSVector4i& r, const GSVertexSW& v);
|
||||||
|
|
||||||
#ifndef ENABLE_JIT_RASTERIZER
|
#ifndef ENABLE_JIT_RASTERIZER
|
||||||
|
|
||||||
void SetupPrim(const GSVertexSW* vertex, const uint32* index, const GSVertexSW& dscan);
|
void SetupPrim(const GSVertexSW* vertex, const u32* index, const GSVertexSW& dscan);
|
||||||
void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan);
|
void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan);
|
||||||
void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan);
|
void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan);
|
||||||
|
|
||||||
|
@ -76,7 +76,7 @@ public:
|
||||||
bool IsRect() const { return m_global.sel.IsSolidRect(); }
|
bool IsRect() const { return m_global.sel.IsSolidRect(); }
|
||||||
|
|
||||||
template<class T> bool TestAlpha(T& test, T& fm, T& zm, const T& ga);
|
template<class T> bool TestAlpha(T& test, T& fm, T& zm, const T& ga);
|
||||||
template<class T> void WritePixel(const T& src, int addr, int i, uint32 psm);
|
template<class T> void WritePixel(const T& src, int addr, int i, u32 psm);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -100,7 +100,7 @@ using namespace Xbyak;
|
||||||
#define _rip_local_d_p(x) _rip_local_d(x)
|
#define _rip_local_d_p(x) _rip_local_d(x)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
GSDrawScanlineCodeGenerator2::GSDrawScanlineCodeGenerator2(Xbyak::CodeGenerator* base, CPUInfo cpu, void* param, uint64 key)
|
GSDrawScanlineCodeGenerator2::GSDrawScanlineCodeGenerator2(Xbyak::CodeGenerator* base, CPUInfo cpu, void* param, u64 key)
|
||||||
: _parent(base, cpu)
|
: _parent(base, cpu)
|
||||||
, m_local(*(GSScanlineLocalData*)param)
|
, m_local(*(GSScanlineLocalData*)param)
|
||||||
, m_rip(false)
|
, m_rip(false)
|
||||||
|
@ -201,7 +201,7 @@ void GSDrawScanlineCodeGenerator2::broadcastGPRToVec(const XYm& vec, const Xbyak
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSDrawScanlineCodeGenerator2::modulate16(const XYm& a, const Operand& f, uint8 shift)
|
void GSDrawScanlineCodeGenerator2::modulate16(const XYm& a, const Operand& f, u8 shift)
|
||||||
{
|
{
|
||||||
if (shift == 0)
|
if (shift == 0)
|
||||||
{
|
{
|
||||||
|
@ -214,7 +214,7 @@ void GSDrawScanlineCodeGenerator2::modulate16(const XYm& a, const Operand& f, ui
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSDrawScanlineCodeGenerator2::lerp16(const XYm& a, const XYm& b, const XYm& f, uint8 shift)
|
void GSDrawScanlineCodeGenerator2::lerp16(const XYm& a, const XYm& b, const XYm& f, u8 shift)
|
||||||
{
|
{
|
||||||
psubw(a, b);
|
psubw(a, b);
|
||||||
modulate16(a, f, shift);
|
modulate16(a, f, shift);
|
||||||
|
@ -251,7 +251,7 @@ void GSDrawScanlineCodeGenerator2::clamp16(const XYm& a, const XYm& temp)
|
||||||
|
|
||||||
void GSDrawScanlineCodeGenerator2::alltrue(const XYm& test)
|
void GSDrawScanlineCodeGenerator2::alltrue(const XYm& test)
|
||||||
{
|
{
|
||||||
uint32 mask = test.isYMM() ? 0xffffffff : 0xffff;
|
u32 mask = test.isYMM() ? 0xffffffff : 0xffff;
|
||||||
pmovmskb(eax, test);
|
pmovmskb(eax, test);
|
||||||
cmp(eax, mask);
|
cmp(eax, mask);
|
||||||
je("step", GSCodeGenerator::T_NEAR);
|
je("step", GSCodeGenerator::T_NEAR);
|
||||||
|
@ -1215,7 +1215,7 @@ void GSDrawScanlineCodeGenerator2::TestZ(const XYm& temp1, const XYm& temp2)
|
||||||
|
|
||||||
if (m_sel.zclamp)
|
if (m_sel.zclamp)
|
||||||
{
|
{
|
||||||
const uint8 amt = (uint8)((m_sel.zpsm & 0x3) * 8);
|
const u8 amt = (u8)((m_sel.zpsm & 0x3) * 8);
|
||||||
pcmpeqd(temp1, temp1);
|
pcmpeqd(temp1, temp1);
|
||||||
psrld(temp1, amt);
|
psrld(temp1, amt);
|
||||||
pminsd(xym0, temp1);
|
pminsd(xym0, temp1);
|
||||||
|
@ -1244,8 +1244,8 @@ void GSDrawScanlineCodeGenerator2::TestZ(const XYm& temp1, const XYm& temp2)
|
||||||
|
|
||||||
if (m_sel.zpsm)
|
if (m_sel.zpsm)
|
||||||
{
|
{
|
||||||
pslld(temp2, static_cast<uint8>(m_sel.zpsm * 8));
|
pslld(temp2, static_cast<u8>(m_sel.zpsm * 8));
|
||||||
psrld(temp2, static_cast<uint8>(m_sel.zpsm * 8));
|
psrld(temp2, static_cast<u8>(m_sel.zpsm * 8));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (m_sel.zoverflow || m_sel.zpsm == 0)
|
if (m_sel.zoverflow || m_sel.zpsm == 0)
|
||||||
|
@ -1415,7 +1415,7 @@ void GSDrawScanlineCodeGenerator2::SampleTexture_TexelReadHelper(int mip_offset)
|
||||||
|
|
||||||
THREEARG(punpcklwd, xym5, xym2, xym0);
|
THREEARG(punpcklwd, xym5, xym2, xym0);
|
||||||
punpckhwd(xym2, xym0);
|
punpckhwd(xym2, xym0);
|
||||||
pslld(xym2, static_cast<uint8>(m_sel.tw + 3));
|
pslld(xym2, static_cast<u8>(m_sel.tw + 3));
|
||||||
|
|
||||||
// xym0 = 0
|
// xym0 = 0
|
||||||
// xym2 = y0
|
// xym2 = y0
|
||||||
|
@ -1432,7 +1432,7 @@ void GSDrawScanlineCodeGenerator2::SampleTexture_TexelReadHelper(int mip_offset)
|
||||||
|
|
||||||
THREEARG(punpcklwd, xym1, xym3, xym0);
|
THREEARG(punpcklwd, xym1, xym3, xym0);
|
||||||
punpckhwd(xym3, xym0);
|
punpckhwd(xym3, xym0);
|
||||||
pslld(xym3, static_cast<uint8>(m_sel.tw + 3));
|
pslld(xym3, static_cast<u8>(m_sel.tw + 3));
|
||||||
|
|
||||||
// xym1 = x1
|
// xym1 = x1
|
||||||
// xym2 = y0
|
// xym2 = y0
|
||||||
|
@ -1460,10 +1460,10 @@ void GSDrawScanlineCodeGenerator2::SampleTexture_TexelReadHelper(int mip_offset)
|
||||||
// xym7 = used[x86] vf[x64&&!needsMoreRegs]
|
// xym7 = used[x86] vf[x64&&!needsMoreRegs]
|
||||||
// Free: xym4, xym5
|
// Free: xym4, xym5
|
||||||
|
|
||||||
// c00 = addr00.gather32_32((const uint32/uint8*)tex[, clut]);
|
// c00 = addr00.gather32_32((const u32/u8*)tex[, clut]);
|
||||||
// c01 = addr01.gather32_32((const uint32/uint8*)tex[, clut]);
|
// c01 = addr01.gather32_32((const u32/u8*)tex[, clut]);
|
||||||
// c10 = addr10.gather32_32((const uint32/uint8*)tex[, clut]);
|
// c10 = addr10.gather32_32((const u32/u8*)tex[, clut]);
|
||||||
// c11 = addr11.gather32_32((const uint32/uint8*)tex[, clut]);
|
// c11 = addr11.gather32_32((const u32/u8*)tex[, clut]);
|
||||||
|
|
||||||
const XYm& tmp1 = is64 ? xym7 : xym4; // OK to destroy if needsMoreRegs
|
const XYm& tmp1 = is64 ? xym7 : xym4; // OK to destroy if needsMoreRegs
|
||||||
const XYm& tmp2 = is64 ? xym4 : xym7;
|
const XYm& tmp2 = is64 ? xym4 : xym7;
|
||||||
|
@ -1559,7 +1559,7 @@ void GSDrawScanlineCodeGenerator2::SampleTexture_TexelReadHelper(int mip_offset)
|
||||||
|
|
||||||
paddd(xym2, xym5);
|
paddd(xym2, xym5);
|
||||||
|
|
||||||
// c00 = addr00.gather32_32((const uint32/uint8*)tex[, clut]);
|
// c00 = addr00.gather32_32((const u32/u8*)tex[, clut]);
|
||||||
|
|
||||||
ReadTexel1(xym5, xym2, xym0, xym1, mip_offset);
|
ReadTexel1(xym5, xym2, xym0, xym1, mip_offset);
|
||||||
|
|
||||||
|
@ -1875,25 +1875,25 @@ void GSDrawScanlineCodeGenerator2::SampleTextureLOD()
|
||||||
movdqa(xym5, xym2);
|
movdqa(xym5, xym2);
|
||||||
movdqa(xym3, xym6);
|
movdqa(xym3, xym6);
|
||||||
|
|
||||||
movd(xym0, _rip_local(temp.lod.i.u32[0]));
|
movd(xym0, _rip_local(temp.lod.i.U32[0]));
|
||||||
psrad(xym2, xym0);
|
psrad(xym2, xym0);
|
||||||
THREEARG(psrlw, xym1, xym4, xym0);
|
THREEARG(psrlw, xym1, xym4, xym0);
|
||||||
movq(_rip_local(temp.uv_minmax[0].u32[0]), xym1);
|
movq(_rip_local(temp.uv_minmax[0].U32[0]), xym1);
|
||||||
|
|
||||||
movd(xym0, _rip_local(temp.lod.i.u32[1]));
|
movd(xym0, _rip_local(temp.lod.i.U32[1]));
|
||||||
psrad(xym5, xym0);
|
psrad(xym5, xym0);
|
||||||
THREEARG(psrlw, xym1, xym4, xym0);
|
THREEARG(psrlw, xym1, xym4, xym0);
|
||||||
movq(_rip_local(temp.uv_minmax[1].u32[0]), xym1);
|
movq(_rip_local(temp.uv_minmax[1].U32[0]), xym1);
|
||||||
|
|
||||||
movd(xym0, _rip_local(temp.lod.i.u32[2]));
|
movd(xym0, _rip_local(temp.lod.i.U32[2]));
|
||||||
psrad(xym3, xym0);
|
psrad(xym3, xym0);
|
||||||
THREEARG(psrlw, xym1, xym4, xym0);
|
THREEARG(psrlw, xym1, xym4, xym0);
|
||||||
movq(_rip_local(temp.uv_minmax[0].u32[2]), xym1);
|
movq(_rip_local(temp.uv_minmax[0].U32[2]), xym1);
|
||||||
|
|
||||||
movd(xym0, _rip_local(temp.lod.i.u32[3]));
|
movd(xym0, _rip_local(temp.lod.i.U32[3]));
|
||||||
psrad(xym6, xym0);
|
psrad(xym6, xym0);
|
||||||
THREEARG(psrlw, xym1, xym4, xym0);
|
THREEARG(psrlw, xym1, xym4, xym0);
|
||||||
movq(_rip_local(temp.uv_minmax[1].u32[2]), xym1);
|
movq(_rip_local(temp.uv_minmax[1].U32[2]), xym1);
|
||||||
|
|
||||||
punpckldq(xym2, xym3);
|
punpckldq(xym2, xym3);
|
||||||
punpckhdq(xym5, xym6);
|
punpckhdq(xym5, xym6);
|
||||||
|
@ -1931,7 +1931,7 @@ void GSDrawScanlineCodeGenerator2::SampleTextureLOD()
|
||||||
{
|
{
|
||||||
// lod = K
|
// lod = K
|
||||||
|
|
||||||
movd(Xmm(xym0.getIdx()), _rip_global(lod.i.u32[0]));
|
movd(Xmm(xym0.getIdx()), _rip_global(lod.i.U32[0]));
|
||||||
|
|
||||||
psrad(xym2, Xmm(xym0.getIdx()));
|
psrad(xym2, Xmm(xym0.getIdx()));
|
||||||
psrad(xym3, Xmm(xym0.getIdx()));
|
psrad(xym3, Xmm(xym0.getIdx()));
|
||||||
|
@ -2743,7 +2743,7 @@ void GSDrawScanlineCodeGenerator2::WriteZBuf()
|
||||||
// Clamp Z to ZPSM_FMT_MAX
|
// Clamp Z to ZPSM_FMT_MAX
|
||||||
if (m_sel.zclamp)
|
if (m_sel.zclamp)
|
||||||
{
|
{
|
||||||
const uint8 amt = (uint8)((m_sel.zpsm & 0x3) * 8);
|
const u8 amt = (u8)((m_sel.zpsm & 0x3) * 8);
|
||||||
pcmpeqd(xym7, xym7);
|
pcmpeqd(xym7, xym7);
|
||||||
psrld(xym7, amt);
|
psrld(xym7, amt);
|
||||||
pminsd(xym1, xym7);
|
pminsd(xym1, xym7);
|
||||||
|
@ -3273,7 +3273,7 @@ void GSDrawScanlineCodeGenerator2::WritePixel(const XYm& src_, const AddressReg&
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSDrawScanlineCodeGenerator2::WritePixel(const Xmm& src, const AddressReg& addr, uint8 i, uint8 j, int psm)
|
void GSDrawScanlineCodeGenerator2::WritePixel(const Xmm& src, const AddressReg& addr, u8 i, u8 j, int psm)
|
||||||
{
|
{
|
||||||
constexpr int s_offsets[8] = {0, 2, 8, 10, 16, 18, 24, 26};
|
constexpr int s_offsets[8] = {0, 2, 8, 10, 16, 18, 24, 26};
|
||||||
|
|
||||||
|
@ -3356,7 +3356,7 @@ void GSDrawScanlineCodeGenerator2::ReadTexelImpl(
|
||||||
void GSDrawScanlineCodeGenerator2::ReadTexelImplLoadTexLOD(int lod, int mip_offset)
|
void GSDrawScanlineCodeGenerator2::ReadTexelImplLoadTexLOD(int lod, int mip_offset)
|
||||||
{
|
{
|
||||||
AddressReg texIn = is64 ? _64_m_local__gd__tex : t2;
|
AddressReg texIn = is64 ? _64_m_local__gd__tex : t2;
|
||||||
Address lod_addr = m_sel.lcm ? _rip_global(lod.i.u32[lod]) : _rip_local(temp.lod.i.u32[lod]);
|
Address lod_addr = m_sel.lcm ? _rip_global(lod.i.U32[lod]) : _rip_local(temp.lod.i.U32[lod]);
|
||||||
mov(ebx, lod_addr);
|
mov(ebx, lod_addr);
|
||||||
mov(rbx, ptr[texIn + rbx * wordsize + mip_offset]);
|
mov(rbx, ptr[texIn + rbx * wordsize + mip_offset]);
|
||||||
}
|
}
|
||||||
|
@ -3483,7 +3483,7 @@ void GSDrawScanlineCodeGenerator2::ReadTexelImplSSE4(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSDrawScanlineCodeGenerator2::ReadTexelImpl(const Xmm& dst, const Xmm& addr, uint8 i, bool texInRBX, bool preserveDst)
|
void GSDrawScanlineCodeGenerator2::ReadTexelImpl(const Xmm& dst, const Xmm& addr, u8 i, bool texInRBX, bool preserveDst)
|
||||||
{
|
{
|
||||||
ASSERT(i < 4);
|
ASSERT(i < 4);
|
||||||
|
|
||||||
|
|
|
@ -92,7 +92,7 @@ class GSDrawScanlineCodeGenerator2 : public GSNewCodeGenerator
|
||||||
}
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
GSDrawScanlineCodeGenerator2(Xbyak::CodeGenerator* base, CPUInfo cpu, void* param, uint64 key);
|
GSDrawScanlineCodeGenerator2(Xbyak::CodeGenerator* base, CPUInfo cpu, void* param, u64 key);
|
||||||
void Generate();
|
void Generate();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -120,8 +120,8 @@ private:
|
||||||
void pbroadcastwLocal(const XYm& reg, const Xbyak::Address& mem);
|
void pbroadcastwLocal(const XYm& reg, const Xbyak::Address& mem);
|
||||||
/// Broadcast a 32-bit GPR to a vector register
|
/// Broadcast a 32-bit GPR to a vector register
|
||||||
void broadcastGPRToVec(const XYm& vec, const Xbyak::Reg32& gpr);
|
void broadcastGPRToVec(const XYm& vec, const Xbyak::Reg32& gpr);
|
||||||
void modulate16(const XYm& a, const Xbyak::Operand& f, uint8 shift);
|
void modulate16(const XYm& a, const Xbyak::Operand& f, u8 shift);
|
||||||
void lerp16(const XYm& a, const XYm& b, const XYm& f, uint8 shift);
|
void lerp16(const XYm& a, const XYm& b, const XYm& f, u8 shift);
|
||||||
void lerp16_4(const XYm& a, const XYm& b, const XYm& f);
|
void lerp16_4(const XYm& a, const XYm& b, const XYm& f);
|
||||||
void mix16(const XYm& a, const XYm& b, const XYm& temp);
|
void mix16(const XYm& a, const XYm& b, const XYm& temp);
|
||||||
void clamp16(const XYm& a, const XYm& temp);
|
void clamp16(const XYm& a, const XYm& temp);
|
||||||
|
@ -159,7 +159,7 @@ private:
|
||||||
#else
|
#else
|
||||||
void WritePixel(const XYm& src_, const AddressReg& addr, const Xbyak::Reg32& mask, bool fast, int psm, int fz);
|
void WritePixel(const XYm& src_, const AddressReg& addr, const Xbyak::Reg32& mask, bool fast, int psm, int fz);
|
||||||
#endif
|
#endif
|
||||||
void WritePixel(const Xmm& src, const AddressReg& addr, uint8 i, uint8 j, int psm);
|
void WritePixel(const Xmm& src, const AddressReg& addr, u8 i, u8 j, int psm);
|
||||||
void ReadTexel1(const XYm& dst, const XYm& src, const XYm& tmp1, const XYm& tmp2, int mip_offset);
|
void ReadTexel1(const XYm& dst, const XYm& src, const XYm& tmp1, const XYm& tmp2, int mip_offset);
|
||||||
void ReadTexel4(
|
void ReadTexel4(
|
||||||
const XYm& d0, const XYm& d1,
|
const XYm& d0, const XYm& d1,
|
||||||
|
@ -185,5 +185,5 @@ private:
|
||||||
const Xmm& d2s0, const Xmm& d3s1,
|
const Xmm& d2s0, const Xmm& d3s1,
|
||||||
const Xmm& s2, const Xmm& s3,
|
const Xmm& s2, const Xmm& s3,
|
||||||
int pixels, int mip_offset);
|
int pixels, int mip_offset);
|
||||||
void ReadTexelImpl(const Xmm& dst, const Xmm& addr, uint8 i, bool texInA3, bool preserveDst);
|
void ReadTexelImpl(const Xmm& dst, const Xmm& addr, u8 i, bool texInA3, bool preserveDst);
|
||||||
};
|
};
|
||||||
|
|
|
@ -18,7 +18,7 @@
|
||||||
#include "GSDrawScanlineCodeGenerator.all.h"
|
#include "GSDrawScanlineCodeGenerator.all.h"
|
||||||
|
|
||||||
|
|
||||||
GSDrawScanlineCodeGenerator::GSDrawScanlineCodeGenerator(void* param, uint64 key, void* code, size_t maxsize)
|
GSDrawScanlineCodeGenerator::GSDrawScanlineCodeGenerator(void* param, u64 key, void* code, size_t maxsize)
|
||||||
: GSCodeGenerator(code, maxsize)
|
: GSCodeGenerator(code, maxsize)
|
||||||
, m_local(*(GSScanlineLocalData*)param)
|
, m_local(*(GSScanlineLocalData*)param)
|
||||||
, m_rip(false)
|
, m_rip(false)
|
||||||
|
|
|
@ -34,5 +34,5 @@ class GSDrawScanlineCodeGenerator : public GSCodeGenerator
|
||||||
bool m_rip;
|
bool m_rip;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
GSDrawScanlineCodeGenerator(void* param, uint64 key, void* code, size_t maxsize);
|
GSDrawScanlineCodeGenerator(void* param, u64 key, void* code, size_t maxsize);
|
||||||
};
|
};
|
||||||
|
|
|
@ -89,7 +89,7 @@ public:
|
||||||
"used SSE instruction in AVX code",
|
"used SSE instruction in AVX code",
|
||||||
"used AVX instruction in SSE code",
|
"used AVX instruction in SSE code",
|
||||||
};
|
};
|
||||||
if (static_cast<uint32>(value) < (sizeof(tbl) / sizeof(*tbl)))
|
if (static_cast<u32>(value) < (sizeof(tbl) / sizeof(*tbl)))
|
||||||
{
|
{
|
||||||
return tbl[value];
|
return tbl[value];
|
||||||
}
|
}
|
||||||
|
@ -320,11 +320,11 @@ public:
|
||||||
FORWARD(2, BASE, name, ARGS_OO) \
|
FORWARD(2, BASE, name, ARGS_OO) \
|
||||||
FORWARD(2, BASE, name, ARGS_OI)
|
FORWARD(2, BASE, name, ARGS_OI)
|
||||||
|
|
||||||
#define ARGS_OI const Operand&, uint32
|
#define ARGS_OI const Operand&, u32
|
||||||
#define ARGS_OO const Operand&, const Operand&
|
#define ARGS_OO const Operand&, const Operand&
|
||||||
#define ARGS_XI const Xmm&, int
|
#define ARGS_XI const Xmm&, int
|
||||||
#define ARGS_XO const Xmm&, const Operand&
|
#define ARGS_XO const Xmm&, const Operand&
|
||||||
#define ARGS_XOI const Xmm&, const Operand&, uint8
|
#define ARGS_XOI const Xmm&, const Operand&, u8
|
||||||
#define ARGS_XXO const Xmm&, const Xmm&, const Operand&
|
#define ARGS_XXO const Xmm&, const Xmm&, const Operand&
|
||||||
|
|
||||||
// For instructions that are ifdef'd out without XBYAK64
|
// For instructions that are ifdef'd out without XBYAK64
|
||||||
|
@ -334,7 +334,7 @@ public:
|
||||||
#define REQUIRE64(action) require64()
|
#define REQUIRE64(action) require64()
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
const uint8 *getCurr() { return actual.getCurr(); }
|
const u8 *getCurr() { return actual.getCurr(); }
|
||||||
void align(int x = 16) { return actual.align(x); }
|
void align(int x = 16) { return actual.align(x); }
|
||||||
void db(int code) { actual.db(code); }
|
void db(int code) { actual.db(code); }
|
||||||
void L(const std::string& label) { actual.L(label); }
|
void L(const std::string& label) { actual.L(label); }
|
||||||
|
@ -374,7 +374,7 @@ public:
|
||||||
SFORWARD(2, cvtdq2ps, ARGS_XO)
|
SFORWARD(2, cvtdq2ps, ARGS_XO)
|
||||||
SFORWARD(2, cvtps2dq, ARGS_XO)
|
SFORWARD(2, cvtps2dq, ARGS_XO)
|
||||||
SFORWARD(2, cvttps2dq, ARGS_XO)
|
SFORWARD(2, cvttps2dq, ARGS_XO)
|
||||||
SFORWARD(3, extractps, const Operand&, const Xmm&, uint8)
|
SFORWARD(3, extractps, const Operand&, const Xmm&, u8)
|
||||||
AFORWARD(2, maxps, ARGS_XO)
|
AFORWARD(2, maxps, ARGS_XO)
|
||||||
AFORWARD(2, minps, ARGS_XO)
|
AFORWARD(2, minps, ARGS_XO)
|
||||||
SFORWARD(2, movaps, ARGS_XO)
|
SFORWARD(2, movaps, ARGS_XO)
|
||||||
|
@ -403,8 +403,8 @@ public:
|
||||||
AFORWARD(2, pcmpeqd, ARGS_XO)
|
AFORWARD(2, pcmpeqd, ARGS_XO)
|
||||||
AFORWARD(2, pcmpeqw, ARGS_XO)
|
AFORWARD(2, pcmpeqw, ARGS_XO)
|
||||||
AFORWARD(2, pcmpgtd, ARGS_XO)
|
AFORWARD(2, pcmpgtd, ARGS_XO)
|
||||||
SFORWARD(3, pextrd, const Operand&, const Xmm&, uint8)
|
SFORWARD(3, pextrd, const Operand&, const Xmm&, u8)
|
||||||
SFORWARD(3, pextrw, const Operand&, const Xmm&, uint8)
|
SFORWARD(3, pextrw, const Operand&, const Xmm&, u8)
|
||||||
AFORWARD(3, pinsrd, ARGS_XOI)
|
AFORWARD(3, pinsrd, ARGS_XOI)
|
||||||
AFORWARD(2, pmaxsw, ARGS_XO)
|
AFORWARD(2, pmaxsw, ARGS_XO)
|
||||||
AFORWARD(2, pminsd, ARGS_XO)
|
AFORWARD(2, pminsd, ARGS_XO)
|
||||||
|
@ -448,12 +448,12 @@ public:
|
||||||
FORWARD(2, AVX2, vbroadcasti128, const Ymm&, const Address&)
|
FORWARD(2, AVX2, vbroadcasti128, const Ymm&, const Address&)
|
||||||
FORWARD(2, AVX, vbroadcastf128, const Ymm&, const Address&)
|
FORWARD(2, AVX, vbroadcastf128, const Ymm&, const Address&)
|
||||||
FORWARD(3, FMA, vfmadd213ps, ARGS_XXO)
|
FORWARD(3, FMA, vfmadd213ps, ARGS_XXO)
|
||||||
FORWARD(3, AVX2, vextracti128, const Operand&, const Ymm&, uint8)
|
FORWARD(3, AVX2, vextracti128, const Operand&, const Ymm&, u8)
|
||||||
FORWARD(4, AVX2, vinserti128, const Ymm&, const Ymm&, const Operand&, uint8);
|
FORWARD(4, AVX2, vinserti128, const Ymm&, const Ymm&, const Operand&, u8);
|
||||||
FORWARD(2, AVX2, vpbroadcastd, ARGS_XO)
|
FORWARD(2, AVX2, vpbroadcastd, ARGS_XO)
|
||||||
FORWARD(2, AVX2, vpbroadcastq, ARGS_XO)
|
FORWARD(2, AVX2, vpbroadcastq, ARGS_XO)
|
||||||
FORWARD(2, AVX2, vpbroadcastw, ARGS_XO)
|
FORWARD(2, AVX2, vpbroadcastw, ARGS_XO)
|
||||||
FORWARD(3, AVX2, vpermq, const Ymm&, const Operand&, uint8)
|
FORWARD(3, AVX2, vpermq, const Ymm&, const Operand&, u8)
|
||||||
FORWARD(3, AVX2, vpgatherdd, const Xmm&, const Address&, const Xmm&);
|
FORWARD(3, AVX2, vpgatherdd, const Xmm&, const Address&, const Xmm&);
|
||||||
FORWARD(3, AVX2, vpsravd, ARGS_XXO)
|
FORWARD(3, AVX2, vpsravd, ARGS_XXO)
|
||||||
FORWARD(3, AVX2, vpsrlvd, ARGS_XXO)
|
FORWARD(3, AVX2, vpsrlvd, ARGS_XXO)
|
||||||
|
|
|
@ -50,7 +50,7 @@ GSRasterizer::GSRasterizer(IDrawScanline* ds, int id, int threads, GSPerfMon* pe
|
||||||
m_edge.count = 0;
|
m_edge.count = 0;
|
||||||
|
|
||||||
int rows = (2048 >> m_thread_height) + 16;
|
int rows = (2048 >> m_thread_height) + 16;
|
||||||
m_scanline = (uint8*)_aligned_malloc(rows, 64);
|
m_scanline = (u8*)_aligned_malloc(rows, 64);
|
||||||
|
|
||||||
int row = 0;
|
int row = 0;
|
||||||
|
|
||||||
|
@ -148,10 +148,10 @@ void GSRasterizer::Draw(GSRasterizerData* data)
|
||||||
const GSVertexSW* vertex = data->vertex;
|
const GSVertexSW* vertex = data->vertex;
|
||||||
const GSVertexSW* vertex_end = data->vertex + data->vertex_count;
|
const GSVertexSW* vertex_end = data->vertex + data->vertex_count;
|
||||||
|
|
||||||
const uint32* index = data->index;
|
const u32* index = data->index;
|
||||||
const uint32* index_end = data->index + data->index_count;
|
const u32* index_end = data->index + data->index_count;
|
||||||
|
|
||||||
uint32 tmp_index[] = {0, 1, 2};
|
u32 tmp_index[] = {0, 1, 2};
|
||||||
|
|
||||||
bool scissor_test = !data->bbox.eq(data->bbox.rintersect(data->scissor));
|
bool scissor_test = !data->bbox.eq(data->bbox.rintersect(data->scissor));
|
||||||
|
|
||||||
|
@ -247,7 +247,7 @@ void GSRasterizer::Draw(GSRasterizerData* data)
|
||||||
|
|
||||||
data->pixels = m_pixels.actual;
|
data->pixels = m_pixels.actual;
|
||||||
|
|
||||||
uint64 ticks = __rdtsc() - data->start;
|
u64 ticks = __rdtsc() - data->start;
|
||||||
|
|
||||||
m_pixels.sum += m_pixels.actual;
|
m_pixels.sum += m_pixels.actual;
|
||||||
|
|
||||||
|
@ -255,7 +255,7 @@ void GSRasterizer::Draw(GSRasterizerData* data)
|
||||||
}
|
}
|
||||||
|
|
||||||
template <bool scissor_test>
|
template <bool scissor_test>
|
||||||
void GSRasterizer::DrawPoint(const GSVertexSW* vertex, int vertex_count, const uint32* index, int index_count)
|
void GSRasterizer::DrawPoint(const GSVertexSW* vertex, int vertex_count, const u32* index, int index_count)
|
||||||
{
|
{
|
||||||
m_primcount++;
|
m_primcount++;
|
||||||
|
|
||||||
|
@ -280,7 +280,7 @@ void GSRasterizer::DrawPoint(const GSVertexSW* vertex, int vertex_count, const u
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
uint32 tmp_index[1] = {0};
|
u32 tmp_index[1] = {0};
|
||||||
|
|
||||||
for (int i = 0; i < vertex_count; i++, vertex++)
|
for (int i = 0; i < vertex_count; i++, vertex++)
|
||||||
{
|
{
|
||||||
|
@ -301,7 +301,7 @@ void GSRasterizer::DrawPoint(const GSVertexSW* vertex, int vertex_count, const u
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSRasterizer::DrawLine(const GSVertexSW* vertex, const uint32* index)
|
void GSRasterizer::DrawLine(const GSVertexSW* vertex, const u32* index)
|
||||||
{
|
{
|
||||||
m_primcount++;
|
m_primcount++;
|
||||||
|
|
||||||
|
@ -405,7 +405,7 @@ void GSRasterizer::DrawLine(const GSVertexSW* vertex, const uint32* index)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static const uint8 s_ysort[8][4] =
|
static const u8 s_ysort[8][4] =
|
||||||
{
|
{
|
||||||
{0, 1, 2, 0}, // y0 <= y1 <= y2
|
{0, 1, 2, 0}, // y0 <= y1 <= y2
|
||||||
{1, 0, 2, 0}, // y1 < y0 <= y2
|
{1, 0, 2, 0}, // y1 < y0 <= y2
|
||||||
|
@ -419,7 +419,7 @@ static const uint8 s_ysort[8][4] =
|
||||||
|
|
||||||
#if _M_SSE >= 0x501
|
#if _M_SSE >= 0x501
|
||||||
|
|
||||||
void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const uint32* index)
|
void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const u32* index)
|
||||||
{
|
{
|
||||||
m_primcount++;
|
m_primcount++;
|
||||||
|
|
||||||
|
@ -612,7 +612,7 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW2& edge, c
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const uint32* index)
|
void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const u32* index)
|
||||||
{
|
{
|
||||||
m_primcount++;
|
m_primcount++;
|
||||||
|
|
||||||
|
@ -807,7 +807,7 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& edge, co
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void GSRasterizer::DrawSprite(const GSVertexSW* vertex, const uint32* index)
|
void GSRasterizer::DrawSprite(const GSVertexSW* vertex, const u32* index)
|
||||||
{
|
{
|
||||||
m_primcount++;
|
m_primcount++;
|
||||||
|
|
||||||
|
@ -971,7 +971,7 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
|
||||||
{
|
{
|
||||||
AddScanline(e, 1, xi, top, edge);
|
AddScanline(e, 1, xi, top, edge);
|
||||||
|
|
||||||
e->t.u32[3] = (0x10000 - xf) & 0xffff;
|
e->t.U32[3] = (0x10000 - xf) & 0xffff;
|
||||||
|
|
||||||
e++;
|
e++;
|
||||||
}
|
}
|
||||||
|
@ -994,7 +994,7 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
|
||||||
{
|
{
|
||||||
AddScanline(e, 1, xi, top, edge);
|
AddScanline(e, 1, xi, top, edge);
|
||||||
|
|
||||||
e->t.u32[3] = xf;
|
e->t.U32[3] = xf;
|
||||||
|
|
||||||
e++;
|
e++;
|
||||||
}
|
}
|
||||||
|
@ -1061,7 +1061,7 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
|
||||||
{
|
{
|
||||||
AddScanline(e, 1, left, yi, edge);
|
AddScanline(e, 1, left, yi, edge);
|
||||||
|
|
||||||
e->t.u32[3] = (0x10000 - yf) & 0xffff;
|
e->t.U32[3] = (0x10000 - yf) & 0xffff;
|
||||||
|
|
||||||
e++;
|
e++;
|
||||||
}
|
}
|
||||||
|
@ -1084,7 +1084,7 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
|
||||||
{
|
{
|
||||||
AddScanline(e, 1, left, yi, edge);
|
AddScanline(e, 1, left, yi, edge);
|
||||||
|
|
||||||
e->t.u32[3] = yf;
|
e->t.U32[3] = yf;
|
||||||
|
|
||||||
e++;
|
e++;
|
||||||
}
|
}
|
||||||
|
@ -1105,12 +1105,12 @@ void GSRasterizer::AddScanline(GSVertexSW* e, int pixels, int left, int top, con
|
||||||
{
|
{
|
||||||
*e = scan;
|
*e = scan;
|
||||||
|
|
||||||
e->_pad.i32[0] = pixels;
|
e->_pad.I32[0] = pixels;
|
||||||
e->_pad.i32[1] = left;
|
e->_pad.I32[1] = left;
|
||||||
e->_pad.i32[2] = top;
|
e->_pad.I32[2] = top;
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSRasterizer::Flush(const GSVertexSW* vertex, const uint32* index, const GSVertexSW& dscan, bool edge)
|
void GSRasterizer::Flush(const GSVertexSW* vertex, const u32* index, const GSVertexSW& dscan, bool edge)
|
||||||
{
|
{
|
||||||
// TODO: on win64 this could be the place where xmm6-15 are preserved (not by each DrawScanline)
|
// TODO: on win64 this could be the place where xmm6-15 are preserved (not by each DrawScanline)
|
||||||
|
|
||||||
|
@ -1127,9 +1127,9 @@ void GSRasterizer::Flush(const GSVertexSW* vertex, const uint32* index, const GS
|
||||||
{
|
{
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
int pixels = e->_pad.i32[0];
|
int pixels = e->_pad.I32[0];
|
||||||
int left = e->_pad.i32[1];
|
int left = e->_pad.I32[1];
|
||||||
int top = e->_pad.i32[2];
|
int top = e->_pad.I32[2];
|
||||||
|
|
||||||
DrawScanline(pixels, left, top, *e++);
|
DrawScanline(pixels, left, top, *e++);
|
||||||
} while (e < ee);
|
} while (e < ee);
|
||||||
|
@ -1138,9 +1138,9 @@ void GSRasterizer::Flush(const GSVertexSW* vertex, const uint32* index, const GS
|
||||||
{
|
{
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
int pixels = e->_pad.i32[0];
|
int pixels = e->_pad.I32[0];
|
||||||
int left = e->_pad.i32[1];
|
int left = e->_pad.I32[1];
|
||||||
int top = e->_pad.i32[2];
|
int top = e->_pad.I32[2];
|
||||||
|
|
||||||
DrawEdge(pixels, left, top, *e++);
|
DrawEdge(pixels, left, top, *e++);
|
||||||
} while (e < ee);
|
} while (e < ee);
|
||||||
|
@ -1185,7 +1185,7 @@ GSRasterizerList::GSRasterizerList(int threads, GSPerfMon* perfmon)
|
||||||
m_thread_height = compute_best_thread_height(threads);
|
m_thread_height = compute_best_thread_height(threads);
|
||||||
|
|
||||||
int rows = (2048 >> m_thread_height) + 16;
|
int rows = (2048 >> m_thread_height) + 16;
|
||||||
m_scanline = (uint8*)_aligned_malloc(rows, 64);
|
m_scanline = (u8*)_aligned_malloc(rows, 64);
|
||||||
|
|
||||||
int row = 0;
|
int row = 0;
|
||||||
|
|
||||||
|
@ -1193,7 +1193,7 @@ GSRasterizerList::GSRasterizerList(int threads, GSPerfMon* perfmon)
|
||||||
{
|
{
|
||||||
for (int i = 0; i < threads; i++, row++)
|
for (int i = 0; i < threads; i++, row++)
|
||||||
{
|
{
|
||||||
m_scanline[row] = (uint8)i;
|
m_scanline[row] = (u8)i;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -30,13 +30,13 @@ public:
|
||||||
GSVector4i scissor;
|
GSVector4i scissor;
|
||||||
GSVector4i bbox;
|
GSVector4i bbox;
|
||||||
GS_PRIM_CLASS primclass;
|
GS_PRIM_CLASS primclass;
|
||||||
uint8* buff;
|
u8* buff;
|
||||||
GSVertexSW* vertex;
|
GSVertexSW* vertex;
|
||||||
int vertex_count;
|
int vertex_count;
|
||||||
uint32* index;
|
u32* index;
|
||||||
int index_count;
|
int index_count;
|
||||||
uint64 frame;
|
u64 frame;
|
||||||
uint64 start;
|
u64 start;
|
||||||
int pixels;
|
int pixels;
|
||||||
int counter;
|
int counter;
|
||||||
|
|
||||||
|
@ -66,7 +66,7 @@ public:
|
||||||
class IDrawScanline : public GSAlignedClass<32>
|
class IDrawScanline : public GSAlignedClass<32>
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
typedef void (*SetupPrimPtr)(const GSVertexSW* vertex, const uint32* index, const GSVertexSW& dscan);
|
typedef void (*SetupPrimPtr)(const GSVertexSW* vertex, const u32* index, const GSVertexSW& dscan);
|
||||||
typedef void(__fastcall* DrawScanlinePtr)(int pixels, int left, int top, const GSVertexSW& scan);
|
typedef void(__fastcall* DrawScanlinePtr)(int pixels, int left, int top, const GSVertexSW& scan);
|
||||||
typedef void (IDrawScanline::*DrawRectPtr)(const GSVector4i& r, const GSVertexSW& v); // TODO: jit
|
typedef void (IDrawScanline::*DrawRectPtr)(const GSVector4i& r, const GSVertexSW& v); // TODO: jit
|
||||||
|
|
||||||
|
@ -87,18 +87,18 @@ public:
|
||||||
virtual ~IDrawScanline() {}
|
virtual ~IDrawScanline() {}
|
||||||
|
|
||||||
virtual void BeginDraw(const GSRasterizerData* data) = 0;
|
virtual void BeginDraw(const GSRasterizerData* data) = 0;
|
||||||
virtual void EndDraw(uint64 frame, uint64 ticks, int actual, int total, int prims) = 0;
|
virtual void EndDraw(u64 frame, u64 ticks, int actual, int total, int prims) = 0;
|
||||||
|
|
||||||
#ifdef ENABLE_JIT_RASTERIZER
|
#ifdef ENABLE_JIT_RASTERIZER
|
||||||
|
|
||||||
__forceinline void SetupPrim(const GSVertexSW* vertex, const uint32* index, const GSVertexSW& dscan) { m_sp(vertex, index, dscan); }
|
__forceinline void SetupPrim(const GSVertexSW* vertex, const u32* index, const GSVertexSW& dscan) { m_sp(vertex, index, dscan); }
|
||||||
__forceinline void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan) { m_ds(pixels, left, top, scan); }
|
__forceinline void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan) { m_ds(pixels, left, top, scan); }
|
||||||
__forceinline void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan) { m_de(pixels, left, top, scan); }
|
__forceinline void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan) { m_de(pixels, left, top, scan); }
|
||||||
__forceinline void DrawRect(const GSVector4i& r, const GSVertexSW& v) { (this->*m_dr)(r, v); }
|
__forceinline void DrawRect(const GSVector4i& r, const GSVertexSW& v) { (this->*m_dr)(r, v); }
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
virtual void SetupPrim(const GSVertexSW* vertex, const uint32* index, const GSVertexSW& dscan) = 0;
|
virtual void SetupPrim(const GSVertexSW* vertex, const u32* index, const GSVertexSW& dscan) = 0;
|
||||||
virtual void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan) = 0;
|
virtual void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan) = 0;
|
||||||
virtual void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan) = 0;
|
virtual void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan) = 0;
|
||||||
virtual void DrawRect(const GSVector4i& r, const GSVertexSW& v) = 0;
|
virtual void DrawRect(const GSVector4i& r, const GSVertexSW& v) = 0;
|
||||||
|
@ -131,7 +131,7 @@ protected:
|
||||||
int m_id;
|
int m_id;
|
||||||
int m_threads;
|
int m_threads;
|
||||||
int m_thread_height;
|
int m_thread_height;
|
||||||
uint8* m_scanline;
|
u8* m_scanline;
|
||||||
GSVector4i m_scissor;
|
GSVector4i m_scissor;
|
||||||
GSVector4 m_fscissor_x;
|
GSVector4 m_fscissor_x;
|
||||||
GSVector4 m_fscissor_y;
|
GSVector4 m_fscissor_y;
|
||||||
|
@ -142,10 +142,10 @@ protected:
|
||||||
typedef void (GSRasterizer::*DrawPrimPtr)(const GSVertexSW* v, int count);
|
typedef void (GSRasterizer::*DrawPrimPtr)(const GSVertexSW* v, int count);
|
||||||
|
|
||||||
template <bool scissor_test>
|
template <bool scissor_test>
|
||||||
void DrawPoint(const GSVertexSW* vertex, int vertex_count, const uint32* index, int index_count);
|
void DrawPoint(const GSVertexSW* vertex, int vertex_count, const u32* index, int index_count);
|
||||||
void DrawLine(const GSVertexSW* vertex, const uint32* index);
|
void DrawLine(const GSVertexSW* vertex, const u32* index);
|
||||||
void DrawTriangle(const GSVertexSW* vertex, const uint32* index);
|
void DrawTriangle(const GSVertexSW* vertex, const u32* index);
|
||||||
void DrawSprite(const GSVertexSW* vertex, const uint32* index);
|
void DrawSprite(const GSVertexSW* vertex, const u32* index);
|
||||||
|
|
||||||
#if _M_SSE >= 0x501
|
#if _M_SSE >= 0x501
|
||||||
__forceinline void DrawTriangleSection(int top, int bottom, GSVertexSW2& edge, const GSVertexSW2& dedge, const GSVertexSW2& dscan, const GSVector4& p0);
|
__forceinline void DrawTriangleSection(int top, int bottom, GSVertexSW2& edge, const GSVertexSW2& dedge, const GSVertexSW2& dscan, const GSVector4& p0);
|
||||||
|
@ -156,7 +156,7 @@ protected:
|
||||||
void DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GSVertexSW& dv, int orientation, int side);
|
void DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GSVertexSW& dv, int orientation, int side);
|
||||||
|
|
||||||
__forceinline void AddScanline(GSVertexSW* e, int pixels, int left, int top, const GSVertexSW& scan);
|
__forceinline void AddScanline(GSVertexSW* e, int pixels, int left, int top, const GSVertexSW& scan);
|
||||||
__forceinline void Flush(const GSVertexSW* vertex, const uint32* index, const GSVertexSW& dscan, bool edge = false);
|
__forceinline void Flush(const GSVertexSW* vertex, const u32* index, const GSVertexSW& dscan, bool edge = false);
|
||||||
|
|
||||||
__forceinline void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan);
|
__forceinline void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan);
|
||||||
__forceinline void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan);
|
__forceinline void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan);
|
||||||
|
@ -189,7 +189,7 @@ protected:
|
||||||
// Worker threads depend on the rasterizers, so don't change the order.
|
// Worker threads depend on the rasterizers, so don't change the order.
|
||||||
std::vector<std::unique_ptr<GSRasterizer>> m_r;
|
std::vector<std::unique_ptr<GSRasterizer>> m_r;
|
||||||
std::vector<std::unique_ptr<GSWorker>> m_workers;
|
std::vector<std::unique_ptr<GSWorker>> m_workers;
|
||||||
uint8* m_scanline;
|
u8* m_scanline;
|
||||||
int m_thread_height;
|
int m_thread_height;
|
||||||
|
|
||||||
GSRasterizerList(int threads, GSPerfMon* perfmon);
|
GSRasterizerList(int threads, GSPerfMon* perfmon);
|
||||||
|
|
|
@ -36,7 +36,7 @@ GSRendererSW::GSRendererSW(int threads)
|
||||||
|
|
||||||
m_rl = GSRasterizerList::Create<GSDrawScanline>(threads, &m_perfmon);
|
m_rl = GSRasterizerList::Create<GSDrawScanline>(threads, &m_perfmon);
|
||||||
|
|
||||||
m_output = (uint8*)_aligned_malloc(1024 * 1024 * sizeof(uint32), 32);
|
m_output = (u8*)_aligned_malloc(1024 * 1024 * sizeof(u32), 32);
|
||||||
|
|
||||||
std::fill(std::begin(m_fzb_pages), std::end(m_fzb_pages), 0);
|
std::fill(std::begin(m_fzb_pages), std::end(m_fzb_pages), 0);
|
||||||
std::fill(std::begin(m_tex_pages), std::end(m_tex_pages), 0);
|
std::fill(std::begin(m_tex_pages), std::end(m_tex_pages), 0);
|
||||||
|
@ -194,7 +194,7 @@ GSTexture* GSRendererSW::GetFeedbackOutput()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template <uint32 primclass, uint32 tme, uint32 fst, uint32 q_div>
|
template <u32 primclass, u32 tme, u32 fst, u32 q_div>
|
||||||
void GSRendererSW::ConvertVertexBuffer(GSVertexSW* RESTRICT dst, const GSVertex* RESTRICT src, size_t count)
|
void GSRendererSW::ConvertVertexBuffer(GSVertexSW* RESTRICT dst, const GSVertex* RESTRICT src, size_t count)
|
||||||
{
|
{
|
||||||
// FIXME q_div wasn't added to AVX2 code path.
|
// FIXME q_div wasn't added to AVX2 code path.
|
||||||
|
@ -339,20 +339,20 @@ void GSRendererSW::Draw()
|
||||||
std::shared_ptr<GSRasterizerData> data(sd);
|
std::shared_ptr<GSRasterizerData> data(sd);
|
||||||
|
|
||||||
sd->primclass = m_vt.m_primclass;
|
sd->primclass = m_vt.m_primclass;
|
||||||
sd->buff = (uint8*)_aligned_malloc(sizeof(GSVertexSW) * ((m_vertex.next + 1) & ~1) + sizeof(uint32) * m_index.tail, 64);
|
sd->buff = (u8*)_aligned_malloc(sizeof(GSVertexSW) * ((m_vertex.next + 1) & ~1) + sizeof(u32) * m_index.tail, 64);
|
||||||
sd->vertex = (GSVertexSW*)sd->buff;
|
sd->vertex = (GSVertexSW*)sd->buff;
|
||||||
sd->vertex_count = m_vertex.next;
|
sd->vertex_count = m_vertex.next;
|
||||||
sd->index = (uint32*)(sd->buff + sizeof(GSVertexSW) * ((m_vertex.next + 1) & ~1));
|
sd->index = (u32*)(sd->buff + sizeof(GSVertexSW) * ((m_vertex.next + 1) & ~1));
|
||||||
sd->index_count = m_index.tail;
|
sd->index_count = m_index.tail;
|
||||||
|
|
||||||
// skip per pixel division if q is constant.
|
// skip per pixel division if q is constant.
|
||||||
// Optimize the division by 1 with a nop. It also means that GS_SPRITE_CLASS must be processed when !m_vt.m_eq.q.
|
// Optimize the division by 1 with a nop. It also means that GS_SPRITE_CLASS must be processed when !m_vt.m_eq.q.
|
||||||
// If you have both GS_SPRITE_CLASS && m_vt.m_eq.q, it will depends on the first part of the 'OR'
|
// If you have both GS_SPRITE_CLASS && m_vt.m_eq.q, it will depends on the first part of the 'OR'
|
||||||
uint32 q_div = !IsMipMapActive() && ((m_vt.m_eq.q && m_vt.m_min.t.z != 1.0f) || (!m_vt.m_eq.q && m_vt.m_primclass == GS_SPRITE_CLASS));
|
u32 q_div = !IsMipMapActive() && ((m_vt.m_eq.q && m_vt.m_min.t.z != 1.0f) || (!m_vt.m_eq.q && m_vt.m_primclass == GS_SPRITE_CLASS));
|
||||||
|
|
||||||
(this->*m_cvb[m_vt.m_primclass][PRIM->TME][PRIM->FST][q_div])(sd->vertex, m_vertex.buff, m_vertex.next);
|
(this->*m_cvb[m_vt.m_primclass][PRIM->TME][PRIM->FST][q_div])(sd->vertex, m_vertex.buff, m_vertex.next);
|
||||||
|
|
||||||
memcpy(sd->index, m_index.buff, sizeof(uint32) * m_index.tail);
|
memcpy(sd->index, m_index.buff, sizeof(u32) * m_index.tail);
|
||||||
|
|
||||||
GSVector4i scissor = GSVector4i(context->scissor.in);
|
GSVector4i scissor = GSVector4i(context->scissor.in);
|
||||||
GSVector4i bbox = GSVector4i(m_vt.m_min.p.floor().xyxy(m_vt.m_max.p.ceil()));
|
GSVector4i bbox = GSVector4i(m_vt.m_min.p.floor().xyxy(m_vt.m_max.p.ceil()));
|
||||||
|
@ -382,7 +382,7 @@ void GSRendererSW::Draw()
|
||||||
{
|
{
|
||||||
int n = GSUtil::GetVertexCount(PRIM->PRIM);
|
int n = GSUtil::GetVertexCount(PRIM->PRIM);
|
||||||
|
|
||||||
for (uint32 i = 0, j = 0; i < m_index.tail; i += n, j++)
|
for (u32 i = 0, j = 0; i < m_index.tail; i += n, j++)
|
||||||
{
|
{
|
||||||
for (int k = 0; k < n; k++)
|
for (int k = 0; k < n; k++)
|
||||||
{
|
{
|
||||||
|
@ -443,7 +443,7 @@ void GSRendererSW::Draw()
|
||||||
{
|
{
|
||||||
Sync(2);
|
Sync(2);
|
||||||
|
|
||||||
uint64 frame = m_perfmon.GetFrame();
|
u64 frame = m_perfmon.GetFrame();
|
||||||
// Dump the texture in 32 bits format. It helps to debug texture shuffle effect
|
// Dump the texture in 32 bits format. It helps to debug texture shuffle effect
|
||||||
// It will breaks the few games that really uses 16 bits RT
|
// It will breaks the few games that really uses 16 bits RT
|
||||||
bool texture_shuffle = ((context->FRAME.PSM & 0x2) && ((context->TEX0.PSM & 3) == 2) && (m_vt.m_primclass == GS_SPRITE_CLASS));
|
bool texture_shuffle = ((context->FRAME.PSM & 0x2) && ((context->TEX0.PSM & 3) == 2) && (m_vt.m_primclass == GS_SPRITE_CLASS));
|
||||||
|
@ -594,7 +594,7 @@ void GSRendererSW::Sync(int reason)
|
||||||
|
|
||||||
GSPerfMonAutoTimer pmat(&m_perfmon, GSPerfMon::Sync);
|
GSPerfMonAutoTimer pmat(&m_perfmon, GSPerfMon::Sync);
|
||||||
|
|
||||||
uint64 t = __rdtsc();
|
u64 t = __rdtsc();
|
||||||
|
|
||||||
m_rl->Sync();
|
m_rl->Sync();
|
||||||
|
|
||||||
|
@ -645,7 +645,7 @@ void GSRendererSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS
|
||||||
|
|
||||||
if (!m_rl->IsSynced())
|
if (!m_rl->IsSynced())
|
||||||
{
|
{
|
||||||
pages.loopPagesWithBreak([&](uint32 page)
|
pages.loopPagesWithBreak([&](u32 page)
|
||||||
{
|
{
|
||||||
if (m_fzb_pages[page] | m_tex_pages[page])
|
if (m_fzb_pages[page] | m_tex_pages[page])
|
||||||
{
|
{
|
||||||
|
@ -673,7 +673,7 @@ void GSRendererSW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS
|
||||||
GSOffset off = m_mem.GetOffset(BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM);
|
GSOffset off = m_mem.GetOffset(BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM);
|
||||||
GSOffset::PageLooper pages = off.pageLooperForRect(r);
|
GSOffset::PageLooper pages = off.pageLooperForRect(r);
|
||||||
|
|
||||||
pages.loopPagesWithBreak([&](uint32 page)
|
pages.loopPagesWithBreak([&](u32 page)
|
||||||
{
|
{
|
||||||
if (m_fzb_pages[page])
|
if (m_fzb_pages[page])
|
||||||
{
|
{
|
||||||
|
@ -688,7 +688,7 @@ void GSRendererSW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS
|
||||||
|
|
||||||
void GSRendererSW::UsePages(const GSOffset::PageLooper& pages, const int type)
|
void GSRendererSW::UsePages(const GSOffset::PageLooper& pages, const int type)
|
||||||
{
|
{
|
||||||
pages.loopPages([=](uint32 page)
|
pages.loopPages([=](u32 page)
|
||||||
{
|
{
|
||||||
switch (type)
|
switch (type)
|
||||||
{
|
{
|
||||||
|
@ -712,7 +712,7 @@ void GSRendererSW::UsePages(const GSOffset::PageLooper& pages, const int type)
|
||||||
|
|
||||||
void GSRendererSW::ReleasePages(const GSOffset::PageLooper& pages, const int type)
|
void GSRendererSW::ReleasePages(const GSOffset::PageLooper& pages, const int type)
|
||||||
{
|
{
|
||||||
pages.loopPages([=](uint32 page)
|
pages.loopPages([=](u32 page)
|
||||||
{
|
{
|
||||||
switch (type)
|
switch (type)
|
||||||
{
|
{
|
||||||
|
@ -767,14 +767,14 @@ bool GSRendererSW::CheckTargetPages(const GSOffset::PageLooper* fb_pages, const
|
||||||
|
|
||||||
memset(m_fzb_cur_pages, 0, sizeof(m_fzb_cur_pages));
|
memset(m_fzb_cur_pages, 0, sizeof(m_fzb_cur_pages));
|
||||||
|
|
||||||
uint32 used = 0;
|
u32 used = 0;
|
||||||
|
|
||||||
requirePages();
|
requirePages();
|
||||||
|
|
||||||
fb_pages->loopPages([&](uint32 i)
|
fb_pages->loopPages([&](u32 i)
|
||||||
{
|
{
|
||||||
uint32 row = i >> 5;
|
u32 row = i >> 5;
|
||||||
uint32 col = 1 << (i & 31);
|
u32 col = 1 << (i & 31);
|
||||||
|
|
||||||
m_fzb_cur_pages[row] |= col;
|
m_fzb_cur_pages[row] |= col;
|
||||||
|
|
||||||
|
@ -782,10 +782,10 @@ bool GSRendererSW::CheckTargetPages(const GSOffset::PageLooper* fb_pages, const
|
||||||
used |= m_tex_pages[i];
|
used |= m_tex_pages[i];
|
||||||
});
|
});
|
||||||
|
|
||||||
zb_pages->loopPages([&](uint32 i)
|
zb_pages->loopPages([&](u32 i)
|
||||||
{
|
{
|
||||||
uint32 row = i >> 5;
|
u32 row = i >> 5;
|
||||||
uint32 col = 1 << (i & 31);
|
u32 col = 1 << (i & 31);
|
||||||
|
|
||||||
m_fzb_cur_pages[row] |= col;
|
m_fzb_cur_pages[row] |= col;
|
||||||
|
|
||||||
|
@ -825,12 +825,12 @@ bool GSRendererSW::CheckTargetPages(const GSOffset::PageLooper* fb_pages, const
|
||||||
|
|
||||||
requirePages();
|
requirePages();
|
||||||
|
|
||||||
uint32 used = 0;
|
u32 used = 0;
|
||||||
|
|
||||||
fb_pages->loopPages([&](uint32 i)
|
fb_pages->loopPages([&](u32 i)
|
||||||
{
|
{
|
||||||
uint32 row = i >> 5;
|
u32 row = i >> 5;
|
||||||
uint32 col = 1 << (i & 31);
|
u32 col = 1 << (i & 31);
|
||||||
|
|
||||||
if ((m_fzb_cur_pages[row] & col) == 0)
|
if ((m_fzb_cur_pages[row] & col) == 0)
|
||||||
{
|
{
|
||||||
|
@ -840,10 +840,10 @@ bool GSRendererSW::CheckTargetPages(const GSOffset::PageLooper* fb_pages, const
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
zb_pages->loopPages([&](uint32 i)
|
zb_pages->loopPages([&](u32 i)
|
||||||
{
|
{
|
||||||
uint32 row = i >> 5;
|
u32 row = i >> 5;
|
||||||
uint32 col = 1 << (i & 31);
|
u32 col = 1 << (i & 31);
|
||||||
|
|
||||||
if ((m_fzb_cur_pages[row] & col) == 0)
|
if ((m_fzb_cur_pages[row] & col) == 0)
|
||||||
{
|
{
|
||||||
|
@ -875,7 +875,7 @@ bool GSRendererSW::CheckTargetPages(const GSOffset::PageLooper* fb_pages, const
|
||||||
|
|
||||||
if (fb && !res)
|
if (fb && !res)
|
||||||
{
|
{
|
||||||
fb_pages->loopPagesWithBreak([&](uint32 page)
|
fb_pages->loopPagesWithBreak([&](u32 page)
|
||||||
{
|
{
|
||||||
if (m_fzb_pages[page] & 0xffff0000)
|
if (m_fzb_pages[page] & 0xffff0000)
|
||||||
{
|
{
|
||||||
|
@ -895,7 +895,7 @@ bool GSRendererSW::CheckTargetPages(const GSOffset::PageLooper* fb_pages, const
|
||||||
|
|
||||||
if (zb && !res)
|
if (zb && !res)
|
||||||
{
|
{
|
||||||
zb_pages->loopPagesWithBreak([&](uint32 page)
|
zb_pages->loopPagesWithBreak([&](u32 page)
|
||||||
{
|
{
|
||||||
if (m_fzb_pages[page] & 0x0000ffff)
|
if (m_fzb_pages[page] & 0x0000ffff)
|
||||||
{
|
{
|
||||||
|
@ -927,7 +927,7 @@ bool GSRendererSW::CheckSourcePages(SharedData* sd)
|
||||||
GSOffset::PageLooper pages = sd->m_tex[i].t->m_offset.pageLooperForRect(sd->m_tex[i].r);
|
GSOffset::PageLooper pages = sd->m_tex[i].t->m_offset.pageLooperForRect(sd->m_tex[i].r);
|
||||||
|
|
||||||
bool ret = false;
|
bool ret = false;
|
||||||
pages.loopPagesWithBreak([&](uint32 pages)
|
pages.loopPagesWithBreak([&](u32 pages)
|
||||||
{
|
{
|
||||||
// TODO: 8H 4HL 4HH texture at the same place as the render target (24 bit, or 32-bit where the alpha channel is masked, Valkyrie Profile 2)
|
// TODO: 8H 4HL 4HH texture at the same place as the render target (24 bit, or 32-bit where the alpha channel is masked, Valkyrie Profile 2)
|
||||||
|
|
||||||
|
@ -972,8 +972,8 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
|
||||||
gd.sel.ababcd = 0xff;
|
gd.sel.ababcd = 0xff;
|
||||||
gd.sel.prim = primclass;
|
gd.sel.prim = primclass;
|
||||||
|
|
||||||
uint32 fm = context->FRAME.FBMSK;
|
u32 fm = context->FRAME.FBMSK;
|
||||||
uint32 zm = context->ZBUF.ZMSK || context->TEST.ZTE == 0 ? 0xffffffff : 0;
|
u32 zm = context->ZBUF.ZMSK || context->TEST.ZTE == 0 ? 0xffffffff : 0;
|
||||||
|
|
||||||
if (context->TEST.ZTE && context->TEST.ZTST == ZTST_NEVER)
|
if (context->TEST.ZTE && context->TEST.ZTST == ZTST_NEVER)
|
||||||
{
|
{
|
||||||
|
@ -1049,9 +1049,9 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
|
||||||
{
|
{
|
||||||
gd.sel.tlu = 1;
|
gd.sel.tlu = 1;
|
||||||
|
|
||||||
gd.clut = (uint32*)_aligned_malloc(sizeof(uint32) * 256, 32); // FIXME: might address uninitialized data of the texture (0xCD) that is not in 0-15 range for 4-bpp formats
|
gd.clut = (u32*)_aligned_malloc(sizeof(u32) * 256, 32); // FIXME: might address uninitialized data of the texture (0xCD) that is not in 0-15 range for 4-bpp formats
|
||||||
|
|
||||||
memcpy(gd.clut, (const uint32*)m_mem.m_clut, sizeof(uint32) * GSLocalMemory::m_psm[context->TEX0.PSM].pal);
|
memcpy(gd.clut, (const u32*)m_mem.m_clut, sizeof(u32) * GSLocalMemory::m_psm[context->TEX0.PSM].pal);
|
||||||
}
|
}
|
||||||
|
|
||||||
gd.sel.wms = context->CLAMP.WMS;
|
gd.sel.wms = context->CLAMP.WMS;
|
||||||
|
@ -1216,30 +1216,30 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
uint16 tw = 1u << TEX0.TW;
|
u16 tw = 1u << TEX0.TW;
|
||||||
uint16 th = 1u << TEX0.TH;
|
u16 th = 1u << TEX0.TH;
|
||||||
|
|
||||||
switch (context->CLAMP.WMS)
|
switch (context->CLAMP.WMS)
|
||||||
{
|
{
|
||||||
case CLAMP_REPEAT:
|
case CLAMP_REPEAT:
|
||||||
gd.t.min.u16[0] = gd.t.minmax.u16[0] = tw - 1;
|
gd.t.min.U16[0] = gd.t.minmax.U16[0] = tw - 1;
|
||||||
gd.t.max.u16[0] = gd.t.minmax.u16[2] = 0;
|
gd.t.max.U16[0] = gd.t.minmax.U16[2] = 0;
|
||||||
gd.t.mask.u32[0] = 0xffffffff;
|
gd.t.mask.U32[0] = 0xffffffff;
|
||||||
break;
|
break;
|
||||||
case CLAMP_CLAMP:
|
case CLAMP_CLAMP:
|
||||||
gd.t.min.u16[0] = gd.t.minmax.u16[0] = 0;
|
gd.t.min.U16[0] = gd.t.minmax.U16[0] = 0;
|
||||||
gd.t.max.u16[0] = gd.t.minmax.u16[2] = tw - 1;
|
gd.t.max.U16[0] = gd.t.minmax.U16[2] = tw - 1;
|
||||||
gd.t.mask.u32[0] = 0;
|
gd.t.mask.U32[0] = 0;
|
||||||
break;
|
break;
|
||||||
case CLAMP_REGION_CLAMP:
|
case CLAMP_REGION_CLAMP:
|
||||||
gd.t.min.u16[0] = gd.t.minmax.u16[0] = std::min<uint16>(context->CLAMP.MINU, tw - 1);
|
gd.t.min.U16[0] = gd.t.minmax.U16[0] = std::min<u16>(context->CLAMP.MINU, tw - 1);
|
||||||
gd.t.max.u16[0] = gd.t.minmax.u16[2] = std::min<uint16>(context->CLAMP.MAXU, tw - 1);
|
gd.t.max.U16[0] = gd.t.minmax.U16[2] = std::min<u16>(context->CLAMP.MAXU, tw - 1);
|
||||||
gd.t.mask.u32[0] = 0;
|
gd.t.mask.U32[0] = 0;
|
||||||
break;
|
break;
|
||||||
case CLAMP_REGION_REPEAT:
|
case CLAMP_REGION_REPEAT:
|
||||||
gd.t.min.u16[0] = gd.t.minmax.u16[0] = context->CLAMP.MINU & (tw - 1);
|
gd.t.min.U16[0] = gd.t.minmax.U16[0] = context->CLAMP.MINU & (tw - 1);
|
||||||
gd.t.max.u16[0] = gd.t.minmax.u16[2] = context->CLAMP.MAXU & (tw - 1);
|
gd.t.max.U16[0] = gd.t.minmax.U16[2] = context->CLAMP.MAXU & (tw - 1);
|
||||||
gd.t.mask.u32[0] = 0xffffffff;
|
gd.t.mask.U32[0] = 0xffffffff;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
__assume(0);
|
__assume(0);
|
||||||
|
@ -1248,24 +1248,24 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
|
||||||
switch (context->CLAMP.WMT)
|
switch (context->CLAMP.WMT)
|
||||||
{
|
{
|
||||||
case CLAMP_REPEAT:
|
case CLAMP_REPEAT:
|
||||||
gd.t.min.u16[4] = gd.t.minmax.u16[1] = th - 1;
|
gd.t.min.U16[4] = gd.t.minmax.U16[1] = th - 1;
|
||||||
gd.t.max.u16[4] = gd.t.minmax.u16[3] = 0;
|
gd.t.max.U16[4] = gd.t.minmax.U16[3] = 0;
|
||||||
gd.t.mask.u32[2] = 0xffffffff;
|
gd.t.mask.U32[2] = 0xffffffff;
|
||||||
break;
|
break;
|
||||||
case CLAMP_CLAMP:
|
case CLAMP_CLAMP:
|
||||||
gd.t.min.u16[4] = gd.t.minmax.u16[1] = 0;
|
gd.t.min.U16[4] = gd.t.minmax.U16[1] = 0;
|
||||||
gd.t.max.u16[4] = gd.t.minmax.u16[3] = th - 1;
|
gd.t.max.U16[4] = gd.t.minmax.U16[3] = th - 1;
|
||||||
gd.t.mask.u32[2] = 0;
|
gd.t.mask.U32[2] = 0;
|
||||||
break;
|
break;
|
||||||
case CLAMP_REGION_CLAMP:
|
case CLAMP_REGION_CLAMP:
|
||||||
gd.t.min.u16[4] = gd.t.minmax.u16[1] = std::min<uint16>(context->CLAMP.MINV, th - 1);
|
gd.t.min.U16[4] = gd.t.minmax.U16[1] = std::min<u16>(context->CLAMP.MINV, th - 1);
|
||||||
gd.t.max.u16[4] = gd.t.minmax.u16[3] = std::min<uint16>(context->CLAMP.MAXV, th - 1); // ffx anima summon scene, when the anchor appears (th = 256, maxv > 256)
|
gd.t.max.U16[4] = gd.t.minmax.U16[3] = std::min<u16>(context->CLAMP.MAXV, th - 1); // ffx anima summon scene, when the anchor appears (th = 256, maxv > 256)
|
||||||
gd.t.mask.u32[2] = 0;
|
gd.t.mask.U32[2] = 0;
|
||||||
break;
|
break;
|
||||||
case CLAMP_REGION_REPEAT:
|
case CLAMP_REGION_REPEAT:
|
||||||
gd.t.min.u16[4] = gd.t.minmax.u16[1] = context->CLAMP.MINV & (th - 1); // skygunner main menu water texture 64x64, MINV = 127
|
gd.t.min.U16[4] = gd.t.minmax.U16[1] = context->CLAMP.MINV & (th - 1); // skygunner main menu water texture 64x64, MINV = 127
|
||||||
gd.t.max.u16[4] = gd.t.minmax.u16[3] = context->CLAMP.MAXV & (th - 1);
|
gd.t.max.U16[4] = gd.t.minmax.U16[3] = context->CLAMP.MAXV & (th - 1);
|
||||||
gd.t.mask.u32[2] = 0xffffffff;
|
gd.t.mask.U32[2] = 0xffffffff;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
__assume(0);
|
__assume(0);
|
||||||
|
@ -1281,8 +1281,8 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
|
||||||
{
|
{
|
||||||
gd.sel.fge = 1;
|
gd.sel.fge = 1;
|
||||||
|
|
||||||
gd.frb = env.FOGCOL.u32[0] & 0x00ff00ff;
|
gd.frb = env.FOGCOL.U32[0] & 0x00ff00ff;
|
||||||
gd.fga = (env.FOGCOL.u32[0] >> 8) & 0x00ff00ff;
|
gd.fga = (env.FOGCOL.U32[0] >> 8) & 0x00ff00ff;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (context->FRAME.PSM != PSM_PSMCT24)
|
if (context->FRAME.PSM != PSM_PSMCT24)
|
||||||
|
@ -1294,7 +1294,7 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
|
||||||
if (!IsOpaque())
|
if (!IsOpaque())
|
||||||
{
|
{
|
||||||
gd.sel.abe = PRIM->ABE;
|
gd.sel.abe = PRIM->ABE;
|
||||||
gd.sel.ababcd = context->ALPHA.u32[0];
|
gd.sel.ababcd = context->ALPHA.U32[0];
|
||||||
|
|
||||||
if (env.PABE.PABE)
|
if (env.PABE.PABE)
|
||||||
{
|
{
|
||||||
|
@ -1337,12 +1337,12 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
|
||||||
|
|
||||||
if (zwrite || ztest)
|
if (zwrite || ztest)
|
||||||
{
|
{
|
||||||
uint32_t z_max = 0xffffffff >> (GSLocalMemory::m_psm[context->ZBUF.PSM].fmt * 8);
|
u32 z_max = 0xffffffff >> (GSLocalMemory::m_psm[context->ZBUF.PSM].fmt * 8);
|
||||||
|
|
||||||
gd.sel.zpsm = GSLocalMemory::m_psm[context->ZBUF.PSM].fmt;
|
gd.sel.zpsm = GSLocalMemory::m_psm[context->ZBUF.PSM].fmt;
|
||||||
gd.sel.ztst = ztest ? context->TEST.ZTST : (int)ZTST_ALWAYS;
|
gd.sel.ztst = ztest ? context->TEST.ZTST : (int)ZTST_ALWAYS;
|
||||||
gd.sel.zoverflow = (uint32)GSVector4i(m_vt.m_max.p).z == 0x80000000U;
|
gd.sel.zoverflow = (u32)GSVector4i(m_vt.m_max.p).z == 0x80000000U;
|
||||||
gd.sel.zclamp = (uint32)GSVector4i(m_vt.m_max.p).z > z_max;
|
gd.sel.zclamp = (u32)GSVector4i(m_vt.m_max.p).z > z_max;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if _M_SSE >= 0x501
|
#if _M_SSE >= 0x501
|
||||||
|
@ -1356,8 +1356,8 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
|
||||||
}
|
}
|
||||||
else if (gd.sel.fpsm == 2)
|
else if (gd.sel.fpsm == 2)
|
||||||
{
|
{
|
||||||
uint32 rb = gd.fm & 0x00f800f8;
|
u32 rb = gd.fm & 0x00f800f8;
|
||||||
uint32 ga = gd.fm & 0x8000f800;
|
u32 ga = gd.fm & 0x8000f800;
|
||||||
|
|
||||||
gd.fm = (ga >> 16) | (rb >> 9) | (ga >> 6) | (rb >> 3) | 0xffff0000;
|
gd.fm = (ga >> 16) | (rb >> 9) | (ga >> 6) | (rb >> 3) | 0xffff0000;
|
||||||
}
|
}
|
||||||
|
@ -1403,7 +1403,7 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
|
||||||
{
|
{
|
||||||
gd.sel.notest = 1;
|
gd.sel.notest = 1;
|
||||||
|
|
||||||
uint32 ofx = context->XYOFFSET.OFX;
|
u32 ofx = context->XYOFFSET.OFX;
|
||||||
|
|
||||||
for (int i = 0, j = m_vertex.tail; i < j; i++)
|
for (int i = 0, j = m_vertex.tail; i < j; i++)
|
||||||
{
|
{
|
||||||
|
@ -1551,7 +1551,7 @@ void GSRendererSW::SharedData::UpdateSource()
|
||||||
|
|
||||||
if (m_parent->s_dump)
|
if (m_parent->s_dump)
|
||||||
{
|
{
|
||||||
uint64 frame = m_parent->m_perfmon.GetFrame();
|
u64 frame = m_parent->m_perfmon.GetFrame();
|
||||||
|
|
||||||
std::string s;
|
std::string s;
|
||||||
|
|
||||||
|
@ -1570,7 +1570,7 @@ void GSRendererSW::SharedData::UpdateSource()
|
||||||
{
|
{
|
||||||
GSTextureSW* t = new GSTextureSW(0, 256, 1);
|
GSTextureSW* t = new GSTextureSW(0, 256, 1);
|
||||||
|
|
||||||
t->Update(GSVector4i(0, 0, 256, 1), global.clut, sizeof(uint32) * 256);
|
t->Update(GSVector4i(0, 0, 256, 1), global.clut, sizeof(u32) * 256);
|
||||||
|
|
||||||
s = format("%05d_f%lld_itexp_%05x_%s.bmp", m_parent->s_n, frame, (int)m_parent->m_context->TEX0.CBP, psm_str(m_parent->m_context->TEX0.CPSM));
|
s = format("%05d_f%lld_itexp_%05x_%s.bmp", m_parent->s_n, frame, (int)m_parent->m_context->TEX0.CBP, psm_str(m_parent->m_context->TEX0.CPSM));
|
||||||
|
|
||||||
|
|
|
@ -63,19 +63,19 @@ class GSRendererSW : public GSRenderer
|
||||||
|
|
||||||
ConvertVertexBufferPtr m_cvb[4][2][2][2];
|
ConvertVertexBufferPtr m_cvb[4][2][2][2];
|
||||||
|
|
||||||
template <uint32 primclass, uint32 tme, uint32 fst, uint32 q_div>
|
template <u32 primclass, u32 tme, u32 fst, u32 q_div>
|
||||||
void ConvertVertexBuffer(GSVertexSW* RESTRICT dst, const GSVertex* RESTRICT src, size_t count);
|
void ConvertVertexBuffer(GSVertexSW* RESTRICT dst, const GSVertex* RESTRICT src, size_t count);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
IRasterizer* m_rl;
|
IRasterizer* m_rl;
|
||||||
GSTextureCacheSW* m_tc;
|
GSTextureCacheSW* m_tc;
|
||||||
GSTexture* m_texture[2];
|
GSTexture* m_texture[2];
|
||||||
uint8* m_output;
|
u8* m_output;
|
||||||
GSPixelOffset4* m_fzb;
|
GSPixelOffset4* m_fzb;
|
||||||
GSVector4i m_fzb_bbox;
|
GSVector4i m_fzb_bbox;
|
||||||
uint32 m_fzb_cur_pages[16];
|
u32 m_fzb_cur_pages[16];
|
||||||
std::atomic<uint32> m_fzb_pages[512]; // uint16 frame/zbuf pages interleaved
|
std::atomic<u32> m_fzb_pages[512]; // u16 frame/zbuf pages interleaved
|
||||||
std::atomic<uint16> m_tex_pages[512];
|
std::atomic<u16> m_tex_pages[512];
|
||||||
|
|
||||||
void Reset();
|
void Reset();
|
||||||
void VSync(int field);
|
void VSync(int field);
|
||||||
|
|
|
@ -22,79 +22,79 @@ union GSScanlineSelector
|
||||||
{
|
{
|
||||||
struct
|
struct
|
||||||
{
|
{
|
||||||
uint32 fpsm : 2; // 0
|
u32 fpsm : 2; // 0
|
||||||
uint32 zpsm : 2; // 2
|
u32 zpsm : 2; // 2
|
||||||
uint32 ztst : 2; // 4 (0: off, 1: write, 2: test (ge), 3: test (g))
|
u32 ztst : 2; // 4 (0: off, 1: write, 2: test (ge), 3: test (g))
|
||||||
uint32 atst : 3; // 6
|
u32 atst : 3; // 6
|
||||||
uint32 afail : 2; // 9
|
u32 afail : 2; // 9
|
||||||
uint32 iip : 1; // 11
|
u32 iip : 1; // 11
|
||||||
uint32 tfx : 3; // 12
|
u32 tfx : 3; // 12
|
||||||
uint32 tcc : 1; // 15
|
u32 tcc : 1; // 15
|
||||||
uint32 fst : 1; // 16
|
u32 fst : 1; // 16
|
||||||
uint32 ltf : 1; // 17
|
u32 ltf : 1; // 17
|
||||||
uint32 tlu : 1; // 18
|
u32 tlu : 1; // 18
|
||||||
uint32 fge : 1; // 19
|
u32 fge : 1; // 19
|
||||||
uint32 date : 1; // 20
|
u32 date : 1; // 20
|
||||||
uint32 abe : 1; // 21
|
u32 abe : 1; // 21
|
||||||
uint32 aba : 2; // 22
|
u32 aba : 2; // 22
|
||||||
uint32 abb : 2; // 24
|
u32 abb : 2; // 24
|
||||||
uint32 abc : 2; // 26
|
u32 abc : 2; // 26
|
||||||
uint32 abd : 2; // 28
|
u32 abd : 2; // 28
|
||||||
uint32 pabe : 1; // 30
|
u32 pabe : 1; // 30
|
||||||
uint32 aa1 : 1; // 31
|
u32 aa1 : 1; // 31
|
||||||
|
|
||||||
uint32 fwrite : 1; // 32
|
u32 fwrite : 1; // 32
|
||||||
uint32 ftest : 1; // 33
|
u32 ftest : 1; // 33
|
||||||
uint32 rfb : 1; // 34
|
u32 rfb : 1; // 34
|
||||||
uint32 zwrite : 1; // 35
|
u32 zwrite : 1; // 35
|
||||||
uint32 ztest : 1; // 36
|
u32 ztest : 1; // 36
|
||||||
uint32 zoverflow : 1; // 37 (z max >= 0x80000000)
|
u32 zoverflow : 1; // 37 (z max >= 0x80000000)
|
||||||
uint32 zclamp : 1; // 38
|
u32 zclamp : 1; // 38
|
||||||
uint32 wms : 2; // 39
|
u32 wms : 2; // 39
|
||||||
uint32 wmt : 2; // 41
|
u32 wmt : 2; // 41
|
||||||
uint32 datm : 1; // 43
|
u32 datm : 1; // 43
|
||||||
uint32 colclamp : 1; // 44
|
u32 colclamp : 1; // 44
|
||||||
uint32 fba : 1; // 45
|
u32 fba : 1; // 45
|
||||||
uint32 dthe : 1; // 46
|
u32 dthe : 1; // 46
|
||||||
uint32 prim : 2; // 47
|
u32 prim : 2; // 47
|
||||||
|
|
||||||
uint32 edge : 1; // 49
|
u32 edge : 1; // 49
|
||||||
uint32 tw : 3; // 50 (encodes values between 3 -> 10, texture cache makes sure it is at least 3)
|
u32 tw : 3; // 50 (encodes values between 3 -> 10, texture cache makes sure it is at least 3)
|
||||||
uint32 lcm : 1; // 53
|
u32 lcm : 1; // 53
|
||||||
uint32 mmin : 2; // 54
|
u32 mmin : 2; // 54
|
||||||
uint32 notest : 1; // 55 (no ztest, no atest, no date, no scissor test, and horizontally aligned to 4 pixels)
|
u32 notest : 1; // 55 (no ztest, no atest, no date, no scissor test, and horizontally aligned to 4 pixels)
|
||||||
// TODO: 1D texture flag? could save 2 texture reads and 4 lerps with bilinear, and also the texture coordinate clamp/wrap code in one direction
|
// TODO: 1D texture flag? could save 2 texture reads and 4 lerps with bilinear, and also the texture coordinate clamp/wrap code in one direction
|
||||||
|
|
||||||
uint32 breakpoint : 1; // Insert a trap to stop the program, helpful to stop debugger on a program
|
u32 breakpoint : 1; // Insert a trap to stop the program, helpful to stop debugger on a program
|
||||||
};
|
};
|
||||||
|
|
||||||
struct
|
struct
|
||||||
{
|
{
|
||||||
uint32 _pad1 : 22;
|
u32 _pad1 : 22;
|
||||||
uint32 ababcd : 8;
|
u32 ababcd : 8;
|
||||||
uint32 _pad2 : 2;
|
u32 _pad2 : 2;
|
||||||
|
|
||||||
uint32 fb : 2;
|
u32 fb : 2;
|
||||||
uint32 _pad3 : 1;
|
u32 _pad3 : 1;
|
||||||
uint32 zb : 2;
|
u32 zb : 2;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct
|
struct
|
||||||
{
|
{
|
||||||
uint32 lo;
|
u32 lo;
|
||||||
uint32 hi;
|
u32 hi;
|
||||||
};
|
};
|
||||||
|
|
||||||
uint64 key;
|
u64 key;
|
||||||
|
|
||||||
GSScanlineSelector() = default;
|
GSScanlineSelector() = default;
|
||||||
GSScanlineSelector(uint64 k)
|
GSScanlineSelector(u64 k)
|
||||||
: key(k)
|
: key(k)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
operator uint32() const { return lo; }
|
operator u32() const { return lo; }
|
||||||
operator uint64() const { return key; }
|
operator u64() const { return key; }
|
||||||
|
|
||||||
bool IsSolidRect() const
|
bool IsSolidRect() const
|
||||||
{
|
{
|
||||||
|
@ -124,7 +124,7 @@ struct alignas(32) GSScanlineGlobalData // per batch variables, this is like a p
|
||||||
|
|
||||||
void* vm;
|
void* vm;
|
||||||
const void* tex[7];
|
const void* tex[7];
|
||||||
uint32* clut;
|
u32* clut;
|
||||||
GSVector4i* dimx;
|
GSVector4i* dimx;
|
||||||
|
|
||||||
GSOffset fbo;
|
GSOffset fbo;
|
||||||
|
@ -138,8 +138,8 @@ struct alignas(32) GSScanlineGlobalData // per batch variables, this is like a p
|
||||||
|
|
||||||
#if _M_SSE >= 0x501
|
#if _M_SSE >= 0x501
|
||||||
|
|
||||||
uint32 fm, zm;
|
u32 fm, zm;
|
||||||
uint32 frb, fga;
|
u32 frb, fga;
|
||||||
GSVector8 mxl;
|
GSVector8 mxl;
|
||||||
GSVector8 k; // TEX1.K * 0x10000
|
GSVector8 k; // TEX1.K * 0x10000
|
||||||
GSVector8 l; // TEX1.L * -0x10000
|
GSVector8 l; // TEX1.L * -0x10000
|
||||||
|
@ -162,9 +162,9 @@ struct alignas(32) GSScanlineLocalData // per prim variables, each thread has it
|
||||||
#if _M_SSE >= 0x501
|
#if _M_SSE >= 0x501
|
||||||
|
|
||||||
struct skip { GSVector8 z, s, t, q; GSVector8i rb, ga, f, _pad; } d[8];
|
struct skip { GSVector8 z, s, t, q; GSVector8i rb, ga, f, _pad; } d[8];
|
||||||
struct step { GSVector4 stq; struct { uint32 rb, ga; } c; struct { uint32 z, f; } p; } d8;
|
struct step { GSVector4 stq; struct { u32 rb, ga; } c; struct { u32 z, f; } p; } d8;
|
||||||
struct { GSVector8i rb, ga; } c;
|
struct { GSVector8i rb, ga; } c;
|
||||||
struct { uint32 z, f; } p;
|
struct { u32 z, f; } p;
|
||||||
|
|
||||||
// these should be stored on stack as normal local variables (no free regs to use, esp cannot be saved to anywhere, and we need an aligned stack)
|
// these should be stored on stack as normal local variables (no free regs to use, esp cannot be saved to anywhere, and we need an aligned stack)
|
||||||
|
|
||||||
|
@ -231,11 +231,11 @@ struct alignas(32) GSScanlineLocalData // per prim variables, each thread has it
|
||||||
// absolute addressing. Otherwise we need to store a base address in a register.
|
// absolute addressing. Otherwise we need to store a base address in a register.
|
||||||
struct GSScanlineConstantData : public GSAlignedClass<32>
|
struct GSScanlineConstantData : public GSAlignedClass<32>
|
||||||
{
|
{
|
||||||
alignas(32) uint8 m_test_256b[16][8];
|
alignas(32) u8 m_test_256b[16][8];
|
||||||
alignas(32) float m_shift_256b[9][8];
|
alignas(32) float m_shift_256b[9][8];
|
||||||
alignas(32) float m_log2_coef_256b[4][8];
|
alignas(32) float m_log2_coef_256b[4][8];
|
||||||
|
|
||||||
alignas(16) uint32 m_test_128b[8][4];
|
alignas(16) u32 m_test_128b[8][4];
|
||||||
alignas(16) float m_shift_128b[5][4];
|
alignas(16) float m_shift_128b[5][4];
|
||||||
alignas(16) float m_log2_coef_128b[4][4];
|
alignas(16) float m_log2_coef_128b[4][4];
|
||||||
|
|
||||||
|
@ -245,7 +245,7 @@ struct GSScanlineConstantData : public GSAlignedClass<32>
|
||||||
// So it must be defered to post global constructor
|
// So it must be defered to post global constructor
|
||||||
void Init()
|
void Init()
|
||||||
{
|
{
|
||||||
uint8 I_hate_vs2013_m_test_256b[16][8] = {
|
u8 I_hate_vs2013_m_test_256b[16][8] = {
|
||||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||||
{0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
{0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||||
{0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
{0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||||
|
@ -264,7 +264,7 @@ struct GSScanlineConstantData : public GSAlignedClass<32>
|
||||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}
|
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}
|
||||||
};
|
};
|
||||||
|
|
||||||
uint32 I_hate_vs2013_m_test_128b[8][4] = {
|
u32 I_hate_vs2013_m_test_128b[8][4] = {
|
||||||
{0x00000000, 0x00000000, 0x00000000, 0x00000000},
|
{0x00000000, 0x00000000, 0x00000000, 0x00000000},
|
||||||
{0xffffffff, 0x00000000, 0x00000000, 0x00000000},
|
{0xffffffff, 0x00000000, 0x00000000, 0x00000000},
|
||||||
{0xffffffff, 0xffffffff, 0x00000000, 0x00000000},
|
{0xffffffff, 0xffffffff, 0x00000000, 0x00000000},
|
||||||
|
|
|
@ -48,7 +48,7 @@ using namespace Xbyak;
|
||||||
#define _rip_local_d_p(x) _rip_local_d(x)
|
#define _rip_local_d_p(x) _rip_local_d(x)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
GSSetupPrimCodeGenerator2::GSSetupPrimCodeGenerator2(Xbyak::CodeGenerator* base, CPUInfo cpu, void* param, uint64 key)
|
GSSetupPrimCodeGenerator2::GSSetupPrimCodeGenerator2(Xbyak::CodeGenerator* base, CPUInfo cpu, void* param, u64 key)
|
||||||
: _parent(base, cpu)
|
: _parent(base, cpu)
|
||||||
, m_local(*(GSScanlineLocalData*)param)
|
, m_local(*(GSScanlineLocalData*)param)
|
||||||
, m_rip(false), many_regs(false)
|
, m_rip(false), many_regs(false)
|
||||||
|
@ -212,7 +212,7 @@ void GSSetupPrimCodeGenerator2::Depth_XMM()
|
||||||
|
|
||||||
if (is32)
|
if (is32)
|
||||||
mov(_index, ptr[rsp + _32_index]);
|
mov(_index, ptr[rsp + _32_index]);
|
||||||
mov(eax, ptr[_index + sizeof(uint32) * 1]);
|
mov(eax, ptr[_index + sizeof(u32) * 1]);
|
||||||
shl(eax, 6); // * sizeof(GSVertexSW)
|
shl(eax, 6); // * sizeof(GSVertexSW)
|
||||||
if (is64)
|
if (is64)
|
||||||
add(rax, _64_vertex);
|
add(rax, _64_vertex);
|
||||||
|
@ -232,7 +232,7 @@ void GSSetupPrimCodeGenerator2::Depth_XMM()
|
||||||
|
|
||||||
if (m_en.z)
|
if (m_en.z)
|
||||||
{
|
{
|
||||||
// uint32 z is bypassed in t.w
|
// u32 z is bypassed in t.w
|
||||||
|
|
||||||
movdqa(xmm0, ptr[rax + offsetof(GSVertexSW, t)]);
|
movdqa(xmm0, ptr[rax + offsetof(GSVertexSW, t)]);
|
||||||
pshufd(xmm0, xmm0, _MM_SHUFFLE(3, 3, 3, 3));
|
pshufd(xmm0, xmm0, _MM_SHUFFLE(3, 3, 3, 3));
|
||||||
|
@ -314,7 +314,7 @@ void GSSetupPrimCodeGenerator2::Depth_YMM()
|
||||||
|
|
||||||
if (is32)
|
if (is32)
|
||||||
mov(_index, ptr[rsp + _32_index]);
|
mov(_index, ptr[rsp + _32_index]);
|
||||||
mov(eax, ptr[_index + sizeof(uint32) * 1]);
|
mov(eax, ptr[_index + sizeof(u32) * 1]);
|
||||||
shl(eax, 6); // * sizeof(GSVertexSW)
|
shl(eax, 6); // * sizeof(GSVertexSW)
|
||||||
if (is64)
|
if (is64)
|
||||||
add(rax, _64_vertex);
|
add(rax, _64_vertex);
|
||||||
|
@ -332,7 +332,7 @@ void GSSetupPrimCodeGenerator2::Depth_YMM()
|
||||||
|
|
||||||
if (m_en.z)
|
if (m_en.z)
|
||||||
{
|
{
|
||||||
// m_local.p.z = vertex[index[1]].t.u32[3]; // uint32 z is bypassed in t.w
|
// m_local.p.z = vertex[index[1]].t.u32[3]; // u32 z is bypassed in t.w
|
||||||
|
|
||||||
mov(t1.cvt32(), ptr[rax + offsetof(GSVertexSW, t.w)]);
|
mov(t1.cvt32(), ptr[rax + offsetof(GSVertexSW, t.w)]);
|
||||||
mov(_rip_local(p.z), t1.cvt32());
|
mov(_rip_local(p.z), t1.cvt32());
|
||||||
|
@ -524,7 +524,7 @@ void GSSetupPrimCodeGenerator2::Color()
|
||||||
{
|
{
|
||||||
if (is32)
|
if (is32)
|
||||||
mov(_index, ptr[rsp + _32_index]);
|
mov(_index, ptr[rsp + _32_index]);
|
||||||
mov(eax, ptr[_index + sizeof(uint32) * last]);
|
mov(eax, ptr[_index + sizeof(u32) * last]);
|
||||||
shl(eax, 6); // * sizeof(GSVertexSW)
|
shl(eax, 6); // * sizeof(GSVertexSW)
|
||||||
if (is64)
|
if (is64)
|
||||||
add(rax, _64_vertex);
|
add(rax, _64_vertex);
|
||||||
|
|
|
@ -57,7 +57,7 @@ class GSSetupPrimCodeGenerator2 : public GSNewCodeGenerator
|
||||||
bool m_rip;
|
bool m_rip;
|
||||||
bool many_regs;
|
bool many_regs;
|
||||||
|
|
||||||
struct {uint32 z:1, f:1, t:1, c:1;} m_en;
|
struct {u32 z:1, f:1, t:1, c:1;} m_en;
|
||||||
|
|
||||||
const XYm xym0{0}, xym1{1}, xym2{2}, xym3{3}, xym4{4}, xym5{5}, xym6{6}, xym7{7}, xym8{8}, xym9{9}, xym10{10}, xym11{11}, xym12{12}, xym13{13}, xym14{14}, xym15{15};
|
const XYm xym0{0}, xym1{1}, xym2{2}, xym3{3}, xym4{4}, xym5{5}, xym6{6}, xym7{7}, xym8{8}, xym9{9}, xym10{10}, xym11{11}, xym12{12}, xym13{13}, xym14{14}, xym15{15};
|
||||||
const AddressReg _64_vertex, _index, _dscan, _64_t0, t1;
|
const AddressReg _64_vertex, _index, _dscan, _64_t0, t1;
|
||||||
|
@ -69,7 +69,7 @@ class GSSetupPrimCodeGenerator2 : public GSNewCodeGenerator
|
||||||
}
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
GSSetupPrimCodeGenerator2(Xbyak::CodeGenerator* base, CPUInfo cpu, void* param, uint64 key);
|
GSSetupPrimCodeGenerator2(Xbyak::CodeGenerator* base, CPUInfo cpu, void* param, u64 key);
|
||||||
void Generate();
|
void Generate();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
|
@ -19,7 +19,7 @@
|
||||||
|
|
||||||
using namespace Xbyak;
|
using namespace Xbyak;
|
||||||
|
|
||||||
GSSetupPrimCodeGenerator::GSSetupPrimCodeGenerator(void* param, uint64 key, void* code, size_t maxsize)
|
GSSetupPrimCodeGenerator::GSSetupPrimCodeGenerator(void* param, u64 key, void* code, size_t maxsize)
|
||||||
: GSCodeGenerator(code, maxsize)
|
: GSCodeGenerator(code, maxsize)
|
||||||
, m_local(*(GSScanlineLocalData*)param)
|
, m_local(*(GSScanlineLocalData*)param)
|
||||||
, m_rip(false)
|
, m_rip(false)
|
||||||
|
|
|
@ -29,9 +29,9 @@ class GSSetupPrimCodeGenerator : public GSCodeGenerator
|
||||||
|
|
||||||
struct
|
struct
|
||||||
{
|
{
|
||||||
uint32 z : 1, f : 1, t : 1, c : 1;
|
u32 z : 1, f : 1, t : 1, c : 1;
|
||||||
} m_en;
|
} m_en;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
GSSetupPrimCodeGenerator(void* param, uint64 key, void* code, size_t maxsize);
|
GSSetupPrimCodeGenerator(void* param, u64 key, void* code, size_t maxsize);
|
||||||
};
|
};
|
||||||
|
|
|
@ -26,7 +26,7 @@ GSTextureCacheSW::~GSTextureCacheSW()
|
||||||
RemoveAll();
|
RemoveAll();
|
||||||
}
|
}
|
||||||
|
|
||||||
GSTextureCacheSW::Texture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, uint32 tw0)
|
GSTextureCacheSW::Texture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, u32 tw0)
|
||||||
{
|
{
|
||||||
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM];
|
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM];
|
||||||
|
|
||||||
|
@ -36,7 +36,7 @@ GSTextureCacheSW::Texture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TEX0, cons
|
||||||
{
|
{
|
||||||
Texture* t = *i;
|
Texture* t = *i;
|
||||||
|
|
||||||
if (((TEX0.u32[0] ^ t->m_TEX0.u32[0]) | ((TEX0.u32[1] ^ t->m_TEX0.u32[1]) & 3)) != 0) // TBP0 TBW PSM TW TH
|
if (((TEX0.U32[0] ^ t->m_TEX0.U32[0]) | ((TEX0.U32[1] ^ t->m_TEX0.U32[1]) & 3)) != 0) // TBP0 TBW PSM TW TH
|
||||||
{
|
{
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -62,7 +62,7 @@ GSTextureCacheSW::Texture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TEX0, cons
|
||||||
|
|
||||||
m_textures.insert(t);
|
m_textures.insert(t);
|
||||||
|
|
||||||
t->m_pages.loopPages([&](uint32 page)
|
t->m_pages.loopPages([&](u32 page)
|
||||||
{
|
{
|
||||||
t->m_erase_it[page] = m_map[page].InsertFront(t);
|
t->m_erase_it[page] = m_map[page].InsertFront(t);
|
||||||
});
|
});
|
||||||
|
@ -70,15 +70,15 @@ GSTextureCacheSW::Texture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TEX0, cons
|
||||||
return t;
|
return t;
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSTextureCacheSW::InvalidatePages(const GSOffset::PageLooper& pages, uint32 psm)
|
void GSTextureCacheSW::InvalidatePages(const GSOffset::PageLooper& pages, u32 psm)
|
||||||
{
|
{
|
||||||
pages.loopPages([&](uint32 page)
|
pages.loopPages([&](u32 page)
|
||||||
{
|
{
|
||||||
for (Texture* t : m_map[page])
|
for (Texture* t : m_map[page])
|
||||||
{
|
{
|
||||||
if (GSUtil::HasSharedBits(psm, t->m_sharedbits))
|
if (GSUtil::HasSharedBits(psm, t->m_sharedbits))
|
||||||
{
|
{
|
||||||
uint32* RESTRICT valid = t->m_valid;
|
u32* RESTRICT valid = t->m_valid;
|
||||||
|
|
||||||
if (t->m_repeating)
|
if (t->m_repeating)
|
||||||
{
|
{
|
||||||
|
@ -121,7 +121,7 @@ void GSTextureCacheSW::IncAge()
|
||||||
{
|
{
|
||||||
i = m_textures.erase(i);
|
i = m_textures.erase(i);
|
||||||
|
|
||||||
t->m_pages.loopPages([&](uint32 page)
|
t->m_pages.loopPages([&](u32 page)
|
||||||
{
|
{
|
||||||
m_map[page].EraseIndex(t->m_erase_it[page]);
|
m_map[page].EraseIndex(t->m_erase_it[page]);
|
||||||
});
|
});
|
||||||
|
@ -137,7 +137,7 @@ void GSTextureCacheSW::IncAge()
|
||||||
|
|
||||||
//
|
//
|
||||||
|
|
||||||
GSTextureCacheSW::Texture::Texture(GSState* state, uint32 tw0, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
|
GSTextureCacheSW::Texture::Texture(GSState* state, u32 tw0, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
|
||||||
: m_state(state)
|
: m_state(state)
|
||||||
, m_buff(NULL)
|
, m_buff(NULL)
|
||||||
, m_tw(tw0)
|
, m_tw(tw0)
|
||||||
|
@ -203,7 +203,7 @@ bool GSTextureCacheSW::Texture::Update(const GSVector4i& rect)
|
||||||
|
|
||||||
if (m_buff == NULL)
|
if (m_buff == NULL)
|
||||||
{
|
{
|
||||||
uint32 pitch = (1 << m_tw) << shift;
|
u32 pitch = (1 << m_tw) << shift;
|
||||||
|
|
||||||
m_buff = _aligned_malloc(pitch * th * 4, 32);
|
m_buff = _aligned_malloc(pitch * th * 4, 32);
|
||||||
|
|
||||||
|
@ -217,13 +217,13 @@ bool GSTextureCacheSW::Texture::Update(const GSVector4i& rect)
|
||||||
|
|
||||||
GSOffset off = m_offset;
|
GSOffset off = m_offset;
|
||||||
|
|
||||||
uint32 blocks = 0;
|
u32 blocks = 0;
|
||||||
|
|
||||||
GSLocalMemory::readTextureBlock rtxbP = psm.rtxbP;
|
GSLocalMemory::readTextureBlock rtxbP = psm.rtxbP;
|
||||||
|
|
||||||
uint32 pitch = (1 << m_tw) << shift;
|
u32 pitch = (1 << m_tw) << shift;
|
||||||
|
|
||||||
uint8* dst = (uint8*)m_buff + pitch * r.top;
|
u8* dst = (u8*)m_buff + pitch * r.top;
|
||||||
|
|
||||||
int block_pitch = pitch * bs.y;
|
int block_pitch = pitch * bs.y;
|
||||||
|
|
||||||
|
@ -240,10 +240,10 @@ bool GSTextureCacheSW::Texture::Update(const GSVector4i& rect)
|
||||||
for (; bn.blkX() < right; bn.nextBlockX())
|
for (; bn.blkX() < right; bn.nextBlockX())
|
||||||
{
|
{
|
||||||
int i = (bn.blkY() << 7) + bn.blkX();
|
int i = (bn.blkY() << 7) + bn.blkX();
|
||||||
uint32 block = bn.value();
|
u32 block = bn.value();
|
||||||
|
|
||||||
uint32 row = i >> 5;
|
u32 row = i >> 5;
|
||||||
uint32 col = 1 << (i & 31);
|
u32 col = 1 << (i & 31);
|
||||||
|
|
||||||
if ((m_valid[row] & col) == 0)
|
if ((m_valid[row] & col) == 0)
|
||||||
{
|
{
|
||||||
|
@ -262,10 +262,10 @@ bool GSTextureCacheSW::Texture::Update(const GSVector4i& rect)
|
||||||
{
|
{
|
||||||
for (; bn.blkX() < right; bn.nextBlockX())
|
for (; bn.blkX() < right; bn.nextBlockX())
|
||||||
{
|
{
|
||||||
uint32 block = bn.value();
|
u32 block = bn.value();
|
||||||
|
|
||||||
uint32 row = block >> 5;
|
u32 row = block >> 5;
|
||||||
uint32 col = 1 << (block & 31);
|
u32 col = 1 << (block & 31);
|
||||||
|
|
||||||
if ((m_valid[row] & col) == 0)
|
if ((m_valid[row] & col) == 0)
|
||||||
{
|
{
|
||||||
|
@ -291,7 +291,7 @@ bool GSTextureCacheSW::Texture::Update(const GSVector4i& rect)
|
||||||
|
|
||||||
bool GSTextureCacheSW::Texture::Save(const std::string& fn, bool dds) const
|
bool GSTextureCacheSW::Texture::Save(const std::string& fn, bool dds) const
|
||||||
{
|
{
|
||||||
const uint32* RESTRICT clut = m_state->m_mem.m_clut;
|
const u32* RESTRICT clut = m_state->m_mem.m_clut;
|
||||||
|
|
||||||
int w = 1 << m_TEX0.TW;
|
int w = 1 << m_TEX0.TW;
|
||||||
int h = 1 << m_TEX0.TH;
|
int h = 1 << m_TEX0.TH;
|
||||||
|
@ -304,20 +304,20 @@ bool GSTextureCacheSW::Texture::Save(const std::string& fn, bool dds) const
|
||||||
{
|
{
|
||||||
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[m_TEX0.PSM];
|
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[m_TEX0.PSM];
|
||||||
|
|
||||||
const uint8* RESTRICT src = (uint8*)m_buff;
|
const u8* RESTRICT src = (u8*)m_buff;
|
||||||
int pitch = 1 << (m_tw + (psm.pal == 0 ? 2 : 0));
|
int pitch = 1 << (m_tw + (psm.pal == 0 ? 2 : 0));
|
||||||
|
|
||||||
for (int j = 0; j < h; j++, src += pitch, m.bits += m.pitch)
|
for (int j = 0; j < h; j++, src += pitch, m.bits += m.pitch)
|
||||||
{
|
{
|
||||||
if (psm.pal == 0)
|
if (psm.pal == 0)
|
||||||
{
|
{
|
||||||
memcpy(m.bits, src, sizeof(uint32) * w);
|
memcpy(m.bits, src, sizeof(u32) * w);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
for (int i = 0; i < w; i++)
|
for (int i = 0; i < w; i++)
|
||||||
{
|
{
|
||||||
((uint32*)m.bits)[i] = clut[src[i]];
|
((u32*)m.bits)[i] = clut[src[i]];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -30,20 +30,20 @@ public:
|
||||||
GIFRegTEX0 m_TEX0;
|
GIFRegTEX0 m_TEX0;
|
||||||
GIFRegTEXA m_TEXA;
|
GIFRegTEXA m_TEXA;
|
||||||
void* m_buff;
|
void* m_buff;
|
||||||
uint32 m_tw;
|
u32 m_tw;
|
||||||
uint32 m_age;
|
u32 m_age;
|
||||||
bool m_complete;
|
bool m_complete;
|
||||||
bool m_repeating;
|
bool m_repeating;
|
||||||
std::vector<GSVector2i>* m_p2t;
|
std::vector<GSVector2i>* m_p2t;
|
||||||
uint32 m_valid[MAX_PAGES];
|
u32 m_valid[MAX_PAGES];
|
||||||
std::array<uint16, MAX_PAGES> m_erase_it;
|
std::array<u16, MAX_PAGES> m_erase_it;
|
||||||
const uint32* RESTRICT m_sharedbits;
|
const u32* RESTRICT m_sharedbits;
|
||||||
|
|
||||||
// m_valid
|
// m_valid
|
||||||
// fast mode: each uint32 bits map to the 32 blocks of that page
|
// fast mode: each u32 bits map to the 32 blocks of that page
|
||||||
// repeating mode: 1 bpp image of the texture tiles (8x8), also having 512 elements is just a coincidence (worst case: (1024*1024)/(8*8)/(sizeof(uint32)*8))
|
// repeating mode: 1 bpp image of the texture tiles (8x8), also having 512 elements is just a coincidence (worst case: (1024*1024)/(8*8)/(sizeof(u32)*8))
|
||||||
|
|
||||||
Texture(GSState* state, uint32 tw0, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
|
Texture(GSState* state, u32 tw0, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
|
||||||
virtual ~Texture();
|
virtual ~Texture();
|
||||||
|
|
||||||
bool Update(const GSVector4i& r);
|
bool Update(const GSVector4i& r);
|
||||||
|
@ -59,9 +59,9 @@ public:
|
||||||
GSTextureCacheSW(GSState* state);
|
GSTextureCacheSW(GSState* state);
|
||||||
virtual ~GSTextureCacheSW();
|
virtual ~GSTextureCacheSW();
|
||||||
|
|
||||||
Texture* Lookup(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, uint32 tw0 = 0);
|
Texture* Lookup(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, u32 tw0 = 0);
|
||||||
|
|
||||||
void InvalidatePages(const GSOffset::PageLooper& pages, uint32 psm);
|
void InvalidatePages(const GSOffset::PageLooper& pages, u32 psm);
|
||||||
|
|
||||||
void RemoveAll();
|
void RemoveAll();
|
||||||
void IncAge();
|
void IncAge();
|
||||||
|
|
|
@ -38,8 +38,8 @@ bool GSTextureSW::Update(const GSVector4i& r, const void* data, int pitch, int l
|
||||||
|
|
||||||
if (m_data != NULL && Map(m, &r))
|
if (m_data != NULL && Map(m, &r))
|
||||||
{
|
{
|
||||||
uint8* RESTRICT src = (uint8*)data;
|
u8* RESTRICT src = (u8*)data;
|
||||||
uint8* RESTRICT dst = m.bits;
|
u8* RESTRICT dst = m.bits;
|
||||||
|
|
||||||
int rowbytes = r.width() << 2;
|
int rowbytes = r.width() << 2;
|
||||||
|
|
||||||
|
@ -64,7 +64,7 @@ bool GSTextureSW::Map(GSMap& m, const GSVector4i* r, int layer)
|
||||||
{
|
{
|
||||||
if (!m_mapped.test_and_set(std::memory_order_acquire))
|
if (!m_mapped.test_and_set(std::memory_order_acquire))
|
||||||
{
|
{
|
||||||
m.bits = (uint8*)m_data + m_pitch * r2.top + (r2.left << 2);
|
m.bits = (u8*)m_data + m_pitch * r2.top + (r2.left << 2);
|
||||||
m.pitch = m_pitch;
|
m.pitch = m_pitch;
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
@ -87,5 +87,5 @@ bool GSTextureSW::Save(const std::string& fn)
|
||||||
GSPng::Format fmt = GSPng::RGB_PNG;
|
GSPng::Format fmt = GSPng::RGB_PNG;
|
||||||
#endif
|
#endif
|
||||||
int compression = theApp.GetConfigI("png_compression_level");
|
int compression = theApp.GetConfigI("png_compression_level");
|
||||||
return GSPng::Save(fmt, fn, static_cast<uint8*>(m_data), m_size.x, m_size.y, m_pitch, compression);
|
return GSPng::Save(fmt, fn, static_cast<u8*>(m_data), m_size.x, m_size.y, m_pitch, compression);
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,7 +19,7 @@
|
||||||
#include <gtest/gtest.h>
|
#include <gtest/gtest.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
static void swizzle(const uint8* table, uint8* dst, const uint8* src, int bpp, bool deswizzle)
|
static void swizzle(const u8* table, u8* dst, const u8* src, int bpp, bool deswizzle)
|
||||||
{
|
{
|
||||||
int pxbytes = bpp / 8;
|
int pxbytes = bpp / 8;
|
||||||
for (int i = 0; i < (256 / pxbytes); i++)
|
for (int i = 0; i < (256 / pxbytes); i++)
|
||||||
|
@ -30,21 +30,21 @@ static void swizzle(const uint8* table, uint8* dst, const uint8* src, int bpp, b
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void swizzle4(const uint16* table, uint8* dst, const uint8* src, bool deswizzle)
|
static void swizzle4(const u16* table, u8* dst, const u8* src, bool deswizzle)
|
||||||
{
|
{
|
||||||
for (int i = 0; i < 512; i++)
|
for (int i = 0; i < 512; i++)
|
||||||
{
|
{
|
||||||
int soff = (deswizzle ? table[i] : i);
|
int soff = (deswizzle ? table[i] : i);
|
||||||
int doff = (deswizzle ? i : table[i]);
|
int doff = (deswizzle ? i : table[i]);
|
||||||
int spx = src[soff >> 1] >> ((soff & 1) * 4) & 0xF;
|
int spx = src[soff >> 1] >> ((soff & 1) * 4) & 0xF;
|
||||||
uint8* dpx = &dst[doff >> 1];
|
u8* dpx = &dst[doff >> 1];
|
||||||
int dshift = (doff & 1) * 4;
|
int dshift = (doff & 1) * 4;
|
||||||
*dpx &= (0xF0 >> dshift);
|
*dpx &= (0xF0 >> dshift);
|
||||||
*dpx |= (spx << dshift);
|
*dpx |= (spx << dshift);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void swizzleH(const uint8* table, uint32* dst, const uint8* src, int bpp, int shift)
|
static void swizzleH(const u8* table, u32* dst, const u8* src, int bpp, int shift)
|
||||||
{
|
{
|
||||||
for (int i = 0; i < 64; i++)
|
for (int i = 0; i < 64; i++)
|
||||||
{
|
{
|
||||||
|
@ -58,7 +58,7 @@ static void swizzleH(const uint8* table, uint32* dst, const uint8* src, int bpp,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void expand16(uint32* dst, const uint16* src, const GIFRegTEXA& texa)
|
static void expand16(u32* dst, const u16* src, const GIFRegTEXA& texa)
|
||||||
{
|
{
|
||||||
for (int i = 0; i < 128; i++)
|
for (int i = 0; i < 128; i++)
|
||||||
{
|
{
|
||||||
|
@ -77,7 +77,7 @@ static void expand16(uint32* dst, const uint16* src, const GIFRegTEXA& texa)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void expand8(uint32* dst, const uint8* src, const uint32* palette)
|
static void expand8(u32* dst, const u8* src, const u32* palette)
|
||||||
{
|
{
|
||||||
for (int i = 0; i < 256; i++)
|
for (int i = 0; i < 256; i++)
|
||||||
{
|
{
|
||||||
|
@ -85,7 +85,7 @@ static void expand8(uint32* dst, const uint8* src, const uint32* palette)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void expand4(uint32* dst, const uint8* src, const uint32* palette)
|
static void expand4(u32* dst, const u8* src, const u32* palette)
|
||||||
{
|
{
|
||||||
for (int i = 0; i < 512; i++)
|
for (int i = 0; i < 512; i++)
|
||||||
{
|
{
|
||||||
|
@ -93,7 +93,7 @@ static void expand4(uint32* dst, const uint8* src, const uint32* palette)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void expand4P(uint8* dst, const uint8* src)
|
static void expand4P(u8* dst, const u8* src)
|
||||||
{
|
{
|
||||||
for (int i = 0; i < 512; i++)
|
for (int i = 0; i < 512; i++)
|
||||||
{
|
{
|
||||||
|
@ -101,7 +101,7 @@ static void expand4P(uint8* dst, const uint8* src)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void expandH(uint32* dst, const uint32* src, const uint32* palette, int shift, int mask)
|
static void expandH(u32* dst, const u32* src, const u32* palette, int shift, int mask)
|
||||||
{
|
{
|
||||||
for (int i = 0; i < 64; i++)
|
for (int i = 0; i < 64; i++)
|
||||||
{
|
{
|
||||||
|
@ -109,7 +109,7 @@ static void expandH(uint32* dst, const uint32* src, const uint32* palette, int s
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void expandHP(uint8* dst, const uint32* src, int shift, int mask)
|
static void expandHP(u8* dst, const u32* src, int shift, int mask)
|
||||||
{
|
{
|
||||||
for (int i = 0; i < 64; i++)
|
for (int i = 0; i < 64; i++)
|
||||||
{
|
{
|
||||||
|
@ -117,7 +117,7 @@ static void expandHP(uint8* dst, const uint32* src, int shift, int mask)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static std::string image2hex(const uint8* bin, int rows, int columns, int bpp)
|
static std::string image2hex(const u8* bin, int rows, int columns, int bpp)
|
||||||
{
|
{
|
||||||
std::string out;
|
std::string out;
|
||||||
const char* hex = "0123456789ABCDEF";
|
const char* hex = "0123456789ABCDEF";
|
||||||
|
@ -159,10 +159,10 @@ static std::string image2hex(const uint8* bin, int rows, int columns, int bpp)
|
||||||
|
|
||||||
struct TestData
|
struct TestData
|
||||||
{
|
{
|
||||||
alignas(64) uint8 block[256];
|
alignas(64) u8 block[256];
|
||||||
alignas(64) uint8 output[256 * (32 / 4)];
|
alignas(64) u8 output[256 * (32 / 4)];
|
||||||
alignas(64) uint32 clut32[256];
|
alignas(64) u32 clut32[256];
|
||||||
alignas(64) uint64 clut64[256];
|
alignas(64) u64 clut64[256];
|
||||||
|
|
||||||
/// Get some input data with pixel values counting up from 0
|
/// Get some input data with pixel values counting up from 0
|
||||||
static TestData Linear()
|
static TestData Linear()
|
||||||
|
@ -202,39 +202,39 @@ struct TestData
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
static TestData swizzle(const uint8* table, TestData data, int bpp, bool deswizzle)
|
static TestData swizzle(const u8* table, TestData data, int bpp, bool deswizzle)
|
||||||
{
|
{
|
||||||
swizzle(table, data.output, data.block, bpp, deswizzle);
|
swizzle(table, data.output, data.block, bpp, deswizzle);
|
||||||
return data;
|
return data;
|
||||||
}
|
}
|
||||||
|
|
||||||
static TestData swizzle4(const uint16* table, TestData data, bool deswizzle)
|
static TestData swizzle4(const u16* table, TestData data, bool deswizzle)
|
||||||
{
|
{
|
||||||
swizzle4(table, data.output, data.block, deswizzle);
|
swizzle4(table, data.output, data.block, deswizzle);
|
||||||
return data;
|
return data;
|
||||||
}
|
}
|
||||||
|
|
||||||
static TestData swizzleH(const uint8* table, TestData data, int bpp, int shift)
|
static TestData swizzleH(const u8* table, TestData data, int bpp, int shift)
|
||||||
{
|
{
|
||||||
swizzleH(table, reinterpret_cast<uint32*>(data.output), data.block, bpp, shift);
|
swizzleH(table, reinterpret_cast<u32*>(data.output), data.block, bpp, shift);
|
||||||
return data;
|
return data;
|
||||||
}
|
}
|
||||||
|
|
||||||
static TestData expand16(TestData data, const GIFRegTEXA& texa)
|
static TestData expand16(TestData data, const GIFRegTEXA& texa)
|
||||||
{
|
{
|
||||||
expand16(reinterpret_cast<uint32*>(data.output), reinterpret_cast<const uint16*>(data.block), texa);
|
expand16(reinterpret_cast<u32*>(data.output), reinterpret_cast<const u16*>(data.block), texa);
|
||||||
return data;
|
return data;
|
||||||
}
|
}
|
||||||
|
|
||||||
static TestData expand8(TestData data)
|
static TestData expand8(TestData data)
|
||||||
{
|
{
|
||||||
expand8(reinterpret_cast<uint32*>(data.output), data.block, data.clut32);
|
expand8(reinterpret_cast<u32*>(data.output), data.block, data.clut32);
|
||||||
return data;
|
return data;
|
||||||
}
|
}
|
||||||
|
|
||||||
static TestData expand4(TestData data)
|
static TestData expand4(TestData data)
|
||||||
{
|
{
|
||||||
expand4(reinterpret_cast<uint32*>(data.output), data.block, data.clut32);
|
expand4(reinterpret_cast<u32*>(data.output), data.block, data.clut32);
|
||||||
return data;
|
return data;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -246,13 +246,13 @@ static TestData expand4P(TestData data)
|
||||||
|
|
||||||
static TestData expandH(TestData data, int shift, int mask)
|
static TestData expandH(TestData data, int shift, int mask)
|
||||||
{
|
{
|
||||||
expandH(reinterpret_cast<uint32*>(data.output), reinterpret_cast<const uint32*>(data.block), data.clut32, shift, mask);
|
expandH(reinterpret_cast<u32*>(data.output), reinterpret_cast<const u32*>(data.block), data.clut32, shift, mask);
|
||||||
return data;
|
return data;
|
||||||
}
|
}
|
||||||
|
|
||||||
static TestData expandHP(TestData data, int shift, int mask)
|
static TestData expandHP(TestData data, int shift, int mask)
|
||||||
{
|
{
|
||||||
expandHP(data.output, reinterpret_cast<uint32*>(data.block), shift, mask);
|
expandHP(data.output, reinterpret_cast<u32*>(data.block), shift, mask);
|
||||||
return data;
|
return data;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -318,7 +318,7 @@ TEST(ReadAndExpandTest, Read16AEM)
|
||||||
runTest([](TestData data)
|
runTest([](TestData data)
|
||||||
{
|
{
|
||||||
// Actually test AEM
|
// Actually test AEM
|
||||||
uint8 idx = data.block[0] >> 1;
|
u8 idx = data.block[0] >> 1;
|
||||||
data.block[idx * 2 + 0] = 0;
|
data.block[idx * 2 + 0] = 0;
|
||||||
data.block[idx * 2 + 1] = 0;
|
data.block[idx * 2 + 1] = 0;
|
||||||
GIFRegTEXA texa = {0};
|
GIFRegTEXA texa = {0};
|
||||||
|
|
Loading…
Reference in New Issue