VertexLoader: Add a VertexLoader pointer to each function call

This commit is contained in:
degasus 2014-12-13 10:57:46 +01:00
parent 7edf6ec4e4
commit 7c486a8c24
9 changed files with 178 additions and 172 deletions

View File

@ -30,7 +30,7 @@ static TVtxDesc vertexDesc;
static PortableVertexDeclaration vertexDecl; static PortableVertexDeclaration vertexDecl;
// Gets the pointer to the current buffer position // Gets the pointer to the current buffer position
void LOADERDECL SetVertexBufferPosition() void LOADERDECL SetVertexBufferPosition(VertexLoader* loader)
{ {
bufferPos = g_vertex_manager_write_ptr; bufferPos = g_vertex_manager_write_ptr;
} }
@ -76,7 +76,7 @@ void Prepare(const VAT & vat, int primitive, const TVtxDesc & vtxDesc, const Por
} }
// Updates the bounding box // Updates the bounding box
void LOADERDECL Update() void LOADERDECL Update(VertexLoader* loader)
{ {
if (!active) if (!active)
return; return;

View File

@ -31,8 +31,8 @@ extern u8 posMtxIdx;
// Texture matrix indexes // Texture matrix indexes
extern u8 texMtxIdx[8]; extern u8 texMtxIdx[8];
void LOADERDECL SetVertexBufferPosition(); void LOADERDECL SetVertexBufferPosition(VertexLoader* loader);
void LOADERDECL Update(); void LOADERDECL Update(VertexLoader* loader);
void Prepare(const VAT & vat, int primitive, const TVtxDesc & vtxDesc, const PortableVertexDeclaration & vtxDecl); void Prepare(const VAT & vat, int primitive, const TVtxDesc & vtxDesc, const PortableVertexDeclaration & vtxDecl);
// Save state // Save state

View File

@ -29,79 +29,64 @@
#define inline #define inline
#endif #endif
// Matrix components are first in GC format but later in PC format - we need to store it temporarily
// when decoding each vertex.
static u8 s_curposmtx = g_main_cp_state.matrix_index_a.PosNormalMtxIdx;
static u8 s_curtexmtx[8];
static int s_texmtxwrite = 0;
static int s_texmtxread = 0;
// Vertex loaders read these. Although the scale ones should be baked into the shader.
int tcIndex;
int colIndex;
int colElements[2];
// Duplicated (4x and 2x respectively) and used in SSE code in the vertex loader JIT
GC_ALIGNED128(float posScale[4]);
GC_ALIGNED64(float tcScale[8][2]);
// This pointer is used as the source/dst for all fixed function loader calls // This pointer is used as the source/dst for all fixed function loader calls
u8* g_video_buffer_read_ptr; u8* g_video_buffer_read_ptr;
u8* g_vertex_manager_write_ptr; u8* g_vertex_manager_write_ptr;
static const float fractionTable[32] = {
1.0f / (1U << 0), 1.0f / (1U << 1), 1.0f / (1U << 2), 1.0f / (1U << 3),
1.0f / (1U << 4), 1.0f / (1U << 5), 1.0f / (1U << 6), 1.0f / (1U << 7),
1.0f / (1U << 8), 1.0f / (1U << 9), 1.0f / (1U << 10), 1.0f / (1U << 11),
1.0f / (1U << 12), 1.0f / (1U << 13), 1.0f / (1U << 14), 1.0f / (1U << 15),
1.0f / (1U << 16), 1.0f / (1U << 17), 1.0f / (1U << 18), 1.0f / (1U << 19),
1.0f / (1U << 20), 1.0f / (1U << 21), 1.0f / (1U << 22), 1.0f / (1U << 23),
1.0f / (1U << 24), 1.0f / (1U << 25), 1.0f / (1U << 26), 1.0f / (1U << 27),
1.0f / (1U << 28), 1.0f / (1U << 29), 1.0f / (1U << 30), 1.0f / (1U << 31),
};
using namespace Gen; using namespace Gen;
static void LOADERDECL PosMtx_ReadDirect_UByte()
void* VertexLoader::operator new (size_t size)
{ {
BoundingBox::posMtxIdx = s_curposmtx = DataReadU8() & 0x3f; return AllocateAlignedMemory(size, 16);
PRIM_LOG("posmtx: %d, ", s_curposmtx);
} }
static void LOADERDECL PosMtx_Write() void VertexLoader::operator delete (void *p)
{
FreeAlignedMemory(p);
}
static void LOADERDECL PosMtx_ReadDirect_UByte(VertexLoader* loader)
{
BoundingBox::posMtxIdx = loader->m_curposmtx = DataReadU8() & 0x3f;
PRIM_LOG("posmtx: %d, ", loader->m_curposmtx);
}
static void LOADERDECL PosMtx_Write(VertexLoader* loader)
{ {
// u8, 0, 0, 0 // u8, 0, 0, 0
DataWrite<u32>(s_curposmtx); DataWrite<u32>(loader->m_curposmtx);
} }
static void LOADERDECL TexMtx_ReadDirect_UByte() static void LOADERDECL TexMtx_ReadDirect_UByte(VertexLoader* loader)
{ {
BoundingBox::texMtxIdx[s_texmtxread] = s_curtexmtx[s_texmtxread] = DataReadU8() & 0x3f; BoundingBox::texMtxIdx[loader->m_texmtxread] = loader->m_curtexmtx[loader->m_texmtxread] = DataReadU8() & 0x3f;
PRIM_LOG("texmtx%d: %d, ", s_texmtxread, s_curtexmtx[s_texmtxread]); PRIM_LOG("texmtx%d: %d, ", loader->m_texmtxread, loader->m_curtexmtx[loader->m_texmtxread]);
s_texmtxread++; loader->m_texmtxread++;
} }
static void LOADERDECL TexMtx_Write_Float() static void LOADERDECL TexMtx_Write_Float(VertexLoader* loader)
{ {
DataWrite(float(s_curtexmtx[s_texmtxwrite++])); DataWrite(float(loader->m_curtexmtx[loader->m_texmtxwrite++]));
} }
static void LOADERDECL TexMtx_Write_Float2() static void LOADERDECL TexMtx_Write_Float2(VertexLoader* loader)
{ {
DataWrite(0.f); DataWrite(0.f);
DataWrite(float(s_curtexmtx[s_texmtxwrite++])); DataWrite(float(loader->m_curtexmtx[loader->m_texmtxwrite++]));
} }
static void LOADERDECL TexMtx_Write_Float4() static void LOADERDECL TexMtx_Write_Float4(VertexLoader* loader)
{ {
#if _M_SSE >= 0x200 #if _M_SSE >= 0x200
__m128 output = _mm_cvtsi32_ss(_mm_castsi128_ps(_mm_setzero_si128()), s_curtexmtx[s_texmtxwrite++]); __m128 output = _mm_cvtsi32_ss(_mm_castsi128_ps(_mm_setzero_si128()), loader->m_curtexmtx[loader->m_texmtxwrite++]);
_mm_storeu_ps((float*)g_vertex_manager_write_ptr, _mm_shuffle_ps(output, output, 0x45 /* 1, 1, 0, 1 */)); _mm_storeu_ps((float*)g_vertex_manager_write_ptr, _mm_shuffle_ps(output, output, 0x45 /* 1, 1, 0, 1 */));
g_vertex_manager_write_ptr += sizeof(float) * 4; g_vertex_manager_write_ptr += sizeof(float) * 4;
#else #else
DataWrite(0.f); DataWrite(0.f);
DataWrite(0.f); DataWrite(0.f);
DataWrite(float(s_curtexmtx[s_texmtxwrite++])); DataWrite(float(loader->m_curtexmtx[loader->m_texmtxwrite++]));
// Just to fill out with 0. // Just to fill out with 0.
DataWrite(0.f); DataWrite(0.f);
#endif #endif
@ -123,6 +108,14 @@ VertexLoader::VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr)
m_numPipelineStages = 0; m_numPipelineStages = 0;
CompileVertexTranslator(); CompileVertexTranslator();
#endif #endif
// generate frac factors
m_posScale[0] = m_posScale[1] = m_posScale[2] = m_posScale[3] = 1.0f / (1U << m_VtxAttr.PosFrac);
for (int i = 0; i < 8; i++)
m_tcScale[i][0] = m_tcScale[i][1] = 1.0f / (1U << m_VtxAttr.texCoord[i].Frac);
for (int i = 0; i < 2; i++)
m_colElements[i] = m_VtxAttr.color[i].Elements;
} }
VertexLoader::~VertexLoader() VertexLoader::~VertexLoader()
@ -143,11 +136,14 @@ void VertexLoader::CompileVertexTranslator()
m_compiledCode = GetCodePtr(); m_compiledCode = GetCodePtr();
// We only use RAX (caller saved) and RBX (callee saved). // We only use RAX (caller saved) and RBX (callee saved).
ABI_PushRegistersAndAdjustStack({RBX}, 8); ABI_PushRegistersAndAdjustStack({RBX, RBP}, 8);
// save count // save count
MOV(64, R(RBX), R(ABI_PARAM1)); MOV(64, R(RBX), R(ABI_PARAM1));
// save loader
MOV(64, R(RBP), R(ABI_PARAM2));
// Start loop here // Start loop here
const u8 *loop_start = GetCodePtr(); const u8 *loop_start = GetCodePtr();
@ -155,17 +151,17 @@ void VertexLoader::CompileVertexTranslator()
if (m_VtxDesc.Tex0Coord || m_VtxDesc.Tex1Coord || m_VtxDesc.Tex2Coord || m_VtxDesc.Tex3Coord || if (m_VtxDesc.Tex0Coord || m_VtxDesc.Tex1Coord || m_VtxDesc.Tex2Coord || m_VtxDesc.Tex3Coord ||
m_VtxDesc.Tex4Coord || m_VtxDesc.Tex5Coord || m_VtxDesc.Tex6Coord || m_VtxDesc.Tex7Coord) m_VtxDesc.Tex4Coord || m_VtxDesc.Tex5Coord || m_VtxDesc.Tex6Coord || m_VtxDesc.Tex7Coord)
{ {
WriteSetVariable(32, &tcIndex, Imm32(0)); WriteSetVariable(32, &m_tcIndex, Imm32(0));
} }
if (m_VtxDesc.Color0 || m_VtxDesc.Color1) if (m_VtxDesc.Color0 || m_VtxDesc.Color1)
{ {
WriteSetVariable(32, &colIndex, Imm32(0)); WriteSetVariable(32, &m_colIndex, Imm32(0));
} }
if (m_VtxDesc.Tex0MatIdx || m_VtxDesc.Tex1MatIdx || m_VtxDesc.Tex2MatIdx || m_VtxDesc.Tex3MatIdx || if (m_VtxDesc.Tex0MatIdx || m_VtxDesc.Tex1MatIdx || m_VtxDesc.Tex2MatIdx || m_VtxDesc.Tex3MatIdx ||
m_VtxDesc.Tex4MatIdx || m_VtxDesc.Tex5MatIdx || m_VtxDesc.Tex6MatIdx || m_VtxDesc.Tex7MatIdx) m_VtxDesc.Tex4MatIdx || m_VtxDesc.Tex5MatIdx || m_VtxDesc.Tex6MatIdx || m_VtxDesc.Tex7MatIdx)
{ {
WriteSetVariable(32, &s_texmtxwrite, Imm32(0)); WriteSetVariable(32, &m_texmtxwrite, Imm32(0));
WriteSetVariable(32, &s_texmtxread, Imm32(0)); WriteSetVariable(32, &m_texmtxread, Imm32(0));
} }
#else #else
// Reset pipeline // Reset pipeline
@ -405,7 +401,7 @@ void VertexLoader::CompileVertexTranslator()
SUB(64, R(RBX), Imm8(1)); SUB(64, R(RBX), Imm8(1));
J_CC(CC_NZ, loop_start); J_CC(CC_NZ, loop_start);
ABI_PopRegistersAndAdjustStack({RBX}, 8); ABI_PopRegistersAndAdjustStack({RBX, RBP}, 8);
RET(); RET();
#endif #endif
} }
@ -413,6 +409,7 @@ void VertexLoader::CompileVertexTranslator()
void VertexLoader::WriteCall(TPipelineFunction func) void VertexLoader::WriteCall(TPipelineFunction func)
{ {
#ifdef USE_VERTEX_LOADER_JIT #ifdef USE_VERTEX_LOADER_JIT
MOV(64, R(ABI_PARAM1), R(RBP));
ABI_CallFunction((const void*)func); ABI_CallFunction((const void*)func);
#else #else
m_PipelineStages[m_numPipelineStages++] = func; m_PipelineStages[m_numPipelineStages++] = func;
@ -441,13 +438,6 @@ void VertexLoader::SetupRunVertices(int primitive, int const count)
{ {
m_numLoadedVertices += count; m_numLoadedVertices += count;
posScale[0] = posScale[1] = posScale[2] = posScale[3] = fractionTable[m_VtxAttr.PosFrac];
if (m_native_components & VB_HAS_UVALL)
for (int i = 0; i < 8; i++)
tcScale[i][0] = tcScale[i][1] = fractionTable[m_VtxAttr.texCoord[i].Frac];
for (int i = 0; i < 2; i++)
colElements[i] = m_VtxAttr.color[i].Elements;
// Prepare bounding box // Prepare bounding box
if (!g_ActiveConfig.backend_info.bSupportsBBox) if (!g_ActiveConfig.backend_info.bSupportsBBox)
BoundingBox::Prepare(m_vat, primitive, m_VtxDesc, m_native_vtx_decl); BoundingBox::Prepare(m_vat, primitive, m_VtxDesc, m_native_vtx_decl);
@ -458,16 +448,16 @@ void VertexLoader::ConvertVertices ( int count )
#ifdef USE_VERTEX_LOADER_JIT #ifdef USE_VERTEX_LOADER_JIT
if (count > 0) if (count > 0)
{ {
((void (*)(int))(void*)m_compiledCode)(count); ((void (*)(int, VertexLoader* loader))(void*)m_compiledCode)(count, this);
} }
#else #else
for (int s = 0; s < count; s++) for (int s = 0; s < count; s++)
{ {
tcIndex = 0; m_tcIndex = 0;
colIndex = 0; m_colIndex = 0;
s_texmtxwrite = s_texmtxread = 0; m_texmtxwrite = m_texmtxread = 0;
for (int i = 0; i < m_numPipelineStages; i++) for (int i = 0; i < m_numPipelineStages; i++)
m_PipelineStages[i](); m_PipelineStages[i](this);
PRIM_LOG("\n"); PRIM_LOG("\n");
} }
#endif #endif

View File

@ -36,14 +36,8 @@
#define LOADERDECL #define LOADERDECL
#endif #endif
typedef void (LOADERDECL *TPipelineFunction)(); class VertexLoader;
typedef void (LOADERDECL *TPipelineFunction)(VertexLoader* loader);
// They are used for the communication with the loader functions
extern int tcIndex;
extern int colIndex;
extern int colElements[2];
GC_ALIGNED128(extern float posScale[4]);
GC_ALIGNED64(extern float tcScale[8][2]);
// ARMTODO: This should be done in a better way // ARMTODO: This should be done in a better way
#ifndef _M_GENERIC #ifndef _M_GENERIC
@ -53,6 +47,11 @@ class VertexLoader : public VertexLoaderBase
#endif #endif
{ {
public: public:
// This class need a 16 byte alignment. As this is broken on
// MSVC right now (Dec 2014), we use custom allocation.
void* operator new (size_t size);
void operator delete (void *p);
VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr); VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr);
~VertexLoader(); ~VertexLoader();
@ -60,6 +59,21 @@ public:
std::string GetName() const override { return "OldLoader"; } std::string GetName() const override { return "OldLoader"; }
bool IsInitialized() override { return true; } // This vertex loader supports all formats bool IsInitialized() override { return true; } // This vertex loader supports all formats
// They are used for the communication with the loader functions
// Duplicated (4x and 2x respectively) and used in SSE code in the vertex loader JIT
GC_ALIGNED128(float m_posScale[4]);
GC_ALIGNED64(float m_tcScale[8][2]);
int m_tcIndex;
int m_colIndex;
int m_colElements[2];
// Matrix components are first in GC format but later in PC format - we need to store it temporarily
// when decoding each vertex.
u8 m_curposmtx;
u8 m_curtexmtx[8];
int m_texmtxwrite;
int m_texmtxread;
private: private:
#ifndef USE_VERTEX_LOADER_JIT #ifndef USE_VERTEX_LOADER_JIT
// Pipeline. // Pipeline.

View File

@ -16,46 +16,46 @@
#define ASHIFT 24 #define ASHIFT 24
#define AMASK 0xFF000000 #define AMASK 0xFF000000
__forceinline void _SetCol(u32 val) __forceinline void _SetCol(VertexLoader* loader, u32 val)
{ {
DataWrite(val); DataWrite(val);
colIndex++; loader->m_colIndex++;
} }
//color comes in format BARG in 16 bits //color comes in format BARG in 16 bits
//BARG -> AABBGGRR //BARG -> AABBGGRR
__forceinline void _SetCol4444(u16 val) __forceinline void _SetCol4444(VertexLoader* loader, u16 val)
{ {
u32 col = (val & 0xF0); // col = 000000R0; u32 col = (val & 0xF0); // col = 000000R0;
col |= (val & 0xF ) << 12; // col |= 0000G000; col |= (val & 0xF ) << 12; // col |= 0000G000;
col |= (((u32)val) & 0xF000) << 8; // col |= 00B00000; col |= (((u32)val) & 0xF000) << 8; // col |= 00B00000;
col |= (((u32)val) & 0x0F00) << 20; // col |= A0000000; col |= (((u32)val) & 0x0F00) << 20; // col |= A0000000;
col |= col >> 4; // col = A0B0G0R0 | 0A0B0G0R; col |= col >> 4; // col = A0B0G0R0 | 0A0B0G0R;
_SetCol(col); _SetCol(loader, col);
} }
//color comes in format RGBA //color comes in format RGBA
//RRRRRRGG GGGGBBBB BBAAAAAA //RRRRRRGG GGGGBBBB BBAAAAAA
__forceinline void _SetCol6666(u32 val) __forceinline void _SetCol6666(VertexLoader* loader, u32 val)
{ {
u32 col = (val >> 16) & 0xFC; u32 col = (val >> 16) & 0xFC;
col |= (val >> 2) & 0xFC00; col |= (val >> 2) & 0xFC00;
col |= (val << 12) & 0xFC0000; col |= (val << 12) & 0xFC0000;
col |= (val << 26) & 0xFC000000; col |= (val << 26) & 0xFC000000;
col |= (col >> 6) & 0x03030303; col |= (col >> 6) & 0x03030303;
_SetCol(col); _SetCol(loader, col);
} }
//color comes in RGB //color comes in RGB
//RRRRRGGG GGGBBBBB //RRRRRGGG GGGBBBBB
__forceinline void _SetCol565(u16 val) __forceinline void _SetCol565(VertexLoader* loader, u16 val)
{ {
u32 col = (val >> 8) & 0xF8; u32 col = (val >> 8) & 0xF8;
col |= (val << 5) & 0xFC00; col |= (val << 5) & 0xFC00;
col |=(((u32)val) << 19) & 0xF80000; col |=(((u32)val) << 19) & 0xF80000;
col |= (col >> 5) & 0x070007; col |= (col >> 5) & 0x070007;
col |= (col >> 6) & 0x000300; col |= (col >> 6) & 0x000300;
_SetCol(col | AMASK); _SetCol(loader, col | AMASK);
} }
__forceinline u32 _Read24(const u8 *addr) __forceinline u32 _Read24(const u8 *addr)
@ -69,29 +69,29 @@ __forceinline u32 _Read32(const u8 *addr)
} }
void LOADERDECL Color_ReadDirect_24b_888() void LOADERDECL Color_ReadDirect_24b_888(VertexLoader* loader)
{ {
_SetCol(_Read24(DataGetPosition())); _SetCol(loader, _Read24(DataGetPosition()));
DataSkip(3); DataSkip(3);
} }
void LOADERDECL Color_ReadDirect_32b_888x() void LOADERDECL Color_ReadDirect_32b_888x(VertexLoader* loader)
{ {
_SetCol(_Read24(DataGetPosition())); _SetCol(loader, _Read24(DataGetPosition()));
DataSkip(4); DataSkip(4);
} }
void LOADERDECL Color_ReadDirect_16b_565() void LOADERDECL Color_ReadDirect_16b_565(VertexLoader* loader)
{ {
_SetCol565(DataReadU16()); _SetCol565(loader, DataReadU16());
} }
void LOADERDECL Color_ReadDirect_16b_4444() void LOADERDECL Color_ReadDirect_16b_4444(VertexLoader* loader)
{ {
_SetCol4444(*(u16*)DataGetPosition()); _SetCol4444(loader, *(u16*)DataGetPosition());
DataSkip(2); DataSkip(2);
} }
void LOADERDECL Color_ReadDirect_24b_6666() void LOADERDECL Color_ReadDirect_24b_6666(VertexLoader* loader)
{ {
_SetCol6666(Common::swap32(DataGetPosition() - 1)); _SetCol6666(loader, Common::swap32(DataGetPosition() - 1));
DataSkip(3); DataSkip(3);
} }
// F|RES: i am not 100 percent sure, but the colElements seems to be important for rendering only // F|RES: i am not 100 percent sure, but the colElements seems to be important for rendering only
@ -101,77 +101,77 @@ void LOADERDECL Color_ReadDirect_24b_6666()
// else // else
// col |= 0xFF<<ASHIFT; // col |= 0xFF<<ASHIFT;
// //
void LOADERDECL Color_ReadDirect_32b_8888() void LOADERDECL Color_ReadDirect_32b_8888(VertexLoader* loader)
{ {
// TODO (mb2): check this // TODO (mb2): check this
u32 col = DataReadU32Unswapped(); u32 col = DataReadU32Unswapped();
// "kill" the alpha // "kill" the alpha
if (!colElements[colIndex]) if (!loader->m_colElements[loader->m_colIndex])
col |= 0xFF << ASHIFT; col |= 0xFF << ASHIFT;
_SetCol(col); _SetCol(loader, col);
} }
template <typename I> template <typename I>
void Color_ReadIndex_16b_565() void Color_ReadIndex_16b_565(VertexLoader* loader)
{ {
auto const Index = DataRead<I>(); auto const Index = DataRead<I>();
u16 val = Common::swap16(*(const u16 *)(cached_arraybases[ARRAY_COLOR+colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+colIndex]))); u16 val = Common::swap16(*(const u16 *)(cached_arraybases[ARRAY_COLOR+loader->m_colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+loader->m_colIndex])));
_SetCol565(val); _SetCol565(loader, val);
} }
template <typename I> template <typename I>
void Color_ReadIndex_24b_888() void Color_ReadIndex_24b_888(VertexLoader* loader)
{ {
auto const Index = DataRead<I>(); auto const Index = DataRead<I>();
const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+colIndex]); const u8 *iAddress = cached_arraybases[ARRAY_COLOR+loader->m_colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+loader->m_colIndex]);
_SetCol(_Read24(iAddress)); _SetCol(loader, _Read24(iAddress));
} }
template <typename I> template <typename I>
void Color_ReadIndex_32b_888x() void Color_ReadIndex_32b_888x(VertexLoader* loader)
{ {
auto const Index = DataRead<I>(); auto const Index = DataRead<I>();
const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+colIndex]); const u8 *iAddress = cached_arraybases[ARRAY_COLOR+loader->m_colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+loader->m_colIndex]);
_SetCol(_Read24(iAddress)); _SetCol(loader, _Read24(iAddress));
} }
template <typename I> template <typename I>
void Color_ReadIndex_16b_4444() void Color_ReadIndex_16b_4444(VertexLoader* loader)
{ {
auto const Index = DataRead<I>(); auto const Index = DataRead<I>();
u16 val = *(const u16 *)(cached_arraybases[ARRAY_COLOR+colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+colIndex])); u16 val = *(const u16 *)(cached_arraybases[ARRAY_COLOR+loader->m_colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+loader->m_colIndex]));
_SetCol4444(val); _SetCol4444(loader, val);
} }
template <typename I> template <typename I>
void Color_ReadIndex_24b_6666() void Color_ReadIndex_24b_6666(VertexLoader* loader)
{ {
auto const Index = DataRead<I>(); auto const Index = DataRead<I>();
const u8* pData = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+colIndex]) - 1; const u8* pData = cached_arraybases[ARRAY_COLOR+loader->m_colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+loader->m_colIndex]) - 1;
u32 val = Common::swap32(pData); u32 val = Common::swap32(pData);
_SetCol6666(val); _SetCol6666(loader, val);
} }
template <typename I> template <typename I>
void Color_ReadIndex_32b_8888() void Color_ReadIndex_32b_8888(VertexLoader* loader)
{ {
auto const Index = DataRead<I>(); auto const Index = DataRead<I>();
const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+colIndex]); const u8 *iAddress = cached_arraybases[ARRAY_COLOR+loader->m_colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+loader->m_colIndex]);
_SetCol(_Read32(iAddress)); _SetCol(loader, _Read32(iAddress));
} }
void LOADERDECL Color_ReadIndex8_16b_565() { Color_ReadIndex_16b_565<u8>(); } void LOADERDECL Color_ReadIndex8_16b_565(VertexLoader* loader) { Color_ReadIndex_16b_565<u8>(loader); }
void LOADERDECL Color_ReadIndex8_24b_888() { Color_ReadIndex_24b_888<u8>(); } void LOADERDECL Color_ReadIndex8_24b_888(VertexLoader* loader) { Color_ReadIndex_24b_888<u8>(loader); }
void LOADERDECL Color_ReadIndex8_32b_888x() { Color_ReadIndex_32b_888x<u8>(); } void LOADERDECL Color_ReadIndex8_32b_888x(VertexLoader* loader) { Color_ReadIndex_32b_888x<u8>(loader); }
void LOADERDECL Color_ReadIndex8_16b_4444() { Color_ReadIndex_16b_4444<u8>(); } void LOADERDECL Color_ReadIndex8_16b_4444(VertexLoader* loader) { Color_ReadIndex_16b_4444<u8>(loader); }
void LOADERDECL Color_ReadIndex8_24b_6666() { Color_ReadIndex_24b_6666<u8>(); } void LOADERDECL Color_ReadIndex8_24b_6666(VertexLoader* loader) { Color_ReadIndex_24b_6666<u8>(loader); }
void LOADERDECL Color_ReadIndex8_32b_8888() { Color_ReadIndex_32b_8888<u8>(); } void LOADERDECL Color_ReadIndex8_32b_8888(VertexLoader* loader) { Color_ReadIndex_32b_8888<u8>(loader); }
void LOADERDECL Color_ReadIndex16_16b_565() { Color_ReadIndex_16b_565<u16>(); } void LOADERDECL Color_ReadIndex16_16b_565(VertexLoader* loader) { Color_ReadIndex_16b_565<u16>(loader); }
void LOADERDECL Color_ReadIndex16_24b_888() { Color_ReadIndex_24b_888<u16>(); } void LOADERDECL Color_ReadIndex16_24b_888(VertexLoader* loader) { Color_ReadIndex_24b_888<u16>(loader); }
void LOADERDECL Color_ReadIndex16_32b_888x() { Color_ReadIndex_32b_888x<u16>(); } void LOADERDECL Color_ReadIndex16_32b_888x(VertexLoader* loader) { Color_ReadIndex_32b_888x<u16>(loader); }
void LOADERDECL Color_ReadIndex16_16b_4444() { Color_ReadIndex_16b_4444<u16>(); } void LOADERDECL Color_ReadIndex16_16b_4444(VertexLoader* loader) { Color_ReadIndex_16b_4444<u16>(loader); }
void LOADERDECL Color_ReadIndex16_24b_6666() { Color_ReadIndex_24b_6666<u16>(); } void LOADERDECL Color_ReadIndex16_24b_6666(VertexLoader* loader) { Color_ReadIndex_24b_6666<u16>(loader); }
void LOADERDECL Color_ReadIndex16_32b_8888() { Color_ReadIndex_32b_8888<u16>(); } void LOADERDECL Color_ReadIndex16_32b_8888(VertexLoader* loader) { Color_ReadIndex_32b_8888<u16>(loader); }

View File

@ -6,23 +6,25 @@
#include "VideoCommon/NativeVertexFormat.h" #include "VideoCommon/NativeVertexFormat.h"
void LOADERDECL Color_ReadDirect_24b_888(); class VertexLoader;
void LOADERDECL Color_ReadDirect_32b_888x();
void LOADERDECL Color_ReadDirect_16b_565();
void LOADERDECL Color_ReadDirect_16b_4444();
void LOADERDECL Color_ReadDirect_24b_6666();
void LOADERDECL Color_ReadDirect_32b_8888();
void LOADERDECL Color_ReadIndex8_16b_565(); void LOADERDECL Color_ReadDirect_24b_888(VertexLoader* loader);
void LOADERDECL Color_ReadIndex8_24b_888(); void LOADERDECL Color_ReadDirect_32b_888x(VertexLoader* loader);
void LOADERDECL Color_ReadIndex8_32b_888x(); void LOADERDECL Color_ReadDirect_16b_565(VertexLoader* loader);
void LOADERDECL Color_ReadIndex8_16b_4444(); void LOADERDECL Color_ReadDirect_16b_4444(VertexLoader* loader);
void LOADERDECL Color_ReadIndex8_24b_6666(); void LOADERDECL Color_ReadDirect_24b_6666(VertexLoader* loader);
void LOADERDECL Color_ReadIndex8_32b_8888(); void LOADERDECL Color_ReadDirect_32b_8888(VertexLoader* loader);
void LOADERDECL Color_ReadIndex16_16b_565(); void LOADERDECL Color_ReadIndex8_16b_565(VertexLoader* loader);
void LOADERDECL Color_ReadIndex16_24b_888(); void LOADERDECL Color_ReadIndex8_24b_888(VertexLoader* loader);
void LOADERDECL Color_ReadIndex16_32b_888x(); void LOADERDECL Color_ReadIndex8_32b_888x(VertexLoader* loader);
void LOADERDECL Color_ReadIndex16_16b_4444(); void LOADERDECL Color_ReadIndex8_16b_4444(VertexLoader* loader);
void LOADERDECL Color_ReadIndex16_24b_6666(); void LOADERDECL Color_ReadIndex8_24b_6666(VertexLoader* loader);
void LOADERDECL Color_ReadIndex16_32b_8888(); void LOADERDECL Color_ReadIndex8_32b_8888(VertexLoader* loader);
void LOADERDECL Color_ReadIndex16_16b_565(VertexLoader* loader);
void LOADERDECL Color_ReadIndex16_24b_888(VertexLoader* loader);
void LOADERDECL Color_ReadIndex16_32b_888x(VertexLoader* loader);
void LOADERDECL Color_ReadIndex16_16b_4444(VertexLoader* loader);
void LOADERDECL Color_ReadIndex16_24b_6666(VertexLoader* loader);
void LOADERDECL Color_ReadIndex16_32b_8888(VertexLoader* loader);

View File

@ -57,7 +57,7 @@ __forceinline void ReadIndirect(const T* data)
template <typename T, int N> template <typename T, int N>
struct Normal_Direct struct Normal_Direct
{ {
static void LOADERDECL function() static void LOADERDECL function(VertexLoader* loader)
{ {
auto const source = reinterpret_cast<const T*>(DataGetPosition()); auto const source = reinterpret_cast<const T*>(DataGetPosition());
ReadIndirect<T, N * 3>(source); ReadIndirect<T, N * 3>(source);
@ -81,7 +81,7 @@ __forceinline void Normal_Index_Offset()
template <typename I, typename T, int N> template <typename I, typename T, int N>
struct Normal_Index struct Normal_Index
{ {
static void LOADERDECL function() static void LOADERDECL function(VertexLoader* loader)
{ {
Normal_Index_Offset<I, T, N, 0>(); Normal_Index_Offset<I, T, N, 0>();
} }
@ -92,7 +92,7 @@ struct Normal_Index
template <typename I, typename T> template <typename I, typename T>
struct Normal_Index_Indices3 struct Normal_Index_Indices3
{ {
static void LOADERDECL function() static void LOADERDECL function(VertexLoader* loader)
{ {
Normal_Index_Offset<I, T, 1, 0>(); Normal_Index_Offset<I, T, 1, 0>();
Normal_Index_Offset<I, T, 1, 1>(); Normal_Index_Offset<I, T, 1, 1>();
@ -106,7 +106,7 @@ struct Normal_Index_Indices3
template <typename T, int N> template <typename T, int N>
struct Normal_Direct_SSSE3 struct Normal_Direct_SSSE3
{ {
static void LOADERDECL function() static void LOADERDECL function(VertexLoader* loader)
{ {
const T* pData = reinterpret_cast<const T*>(DataGetPosition()); const T* pData = reinterpret_cast<const T*>(DataGetPosition());
const float frac = 1. / float(1u << (sizeof(T) * 8 - std::is_signed<T>::value - 1)); const float frac = 1. / float(1u << (sizeof(T) * 8 - std::is_signed<T>::value - 1));
@ -136,7 +136,7 @@ __forceinline void Normal_Index_Offset_SSSE3()
template <typename I, typename T, int N> template <typename I, typename T, int N>
struct Normal_Index_SSSE3 struct Normal_Index_SSSE3
{ {
static void LOADERDECL function() static void LOADERDECL function(VertexLoader* loader)
{ {
Normal_Index_Offset_SSSE3<I, T, N, 0>(); Normal_Index_Offset_SSSE3<I, T, N, 0>();
} }
@ -147,7 +147,7 @@ struct Normal_Index_SSSE3
template <typename I, typename T> template <typename I, typename T>
struct Normal_Index_Indices3_SSSE3 struct Normal_Index_Indices3_SSSE3
{ {
static void LOADERDECL function() static void LOADERDECL function(VertexLoader* loader)
{ {
Normal_Index_Offset_SSSE3<I, T, 1, 0>(); Normal_Index_Offset_SSSE3<I, T, 1, 0>();
Normal_Index_Offset_SSSE3<I, T, 1, 1>(); Normal_Index_Offset_SSSE3<I, T, 1, 1>();

View File

@ -71,10 +71,10 @@ float PosScale(float val, float scale)
} }
template <typename T, int N> template <typename T, int N>
void LOADERDECL Pos_ReadDirect() void LOADERDECL Pos_ReadDirect(VertexLoader* loader)
{ {
static_assert(N <= 3, "N > 3 is not sane!"); static_assert(N <= 3, "N > 3 is not sane!");
auto const scale = posScale[0]; auto const scale = loader->m_posScale[0];;
DataReader dst(g_vertex_manager_write_ptr, nullptr); DataReader dst(g_vertex_manager_write_ptr, nullptr);
DataReader src(g_video_buffer_read_ptr, nullptr); DataReader src(g_video_buffer_read_ptr, nullptr);
@ -87,14 +87,14 @@ void LOADERDECL Pos_ReadDirect()
} }
template <typename I, typename T, int N> template <typename I, typename T, int N>
void LOADERDECL Pos_ReadIndex() void LOADERDECL Pos_ReadIndex(VertexLoader* loader)
{ {
static_assert(std::is_unsigned<I>::value, "Only unsigned I is sane!"); static_assert(std::is_unsigned<I>::value, "Only unsigned I is sane!");
static_assert(N <= 3, "N > 3 is not sane!"); static_assert(N <= 3, "N > 3 is not sane!");
auto const index = DataRead<I>(); auto const index = DataRead<I>();
auto const data = reinterpret_cast<const T*>(cached_arraybases[ARRAY_POSITION] + (index * g_main_cp_state.array_strides[ARRAY_POSITION])); auto const data = reinterpret_cast<const T*>(cached_arraybases[ARRAY_POSITION] + (index * g_main_cp_state.array_strides[ARRAY_POSITION]));
auto const scale = posScale[0]; auto const scale = loader->m_posScale[0];
DataReader dst(g_vertex_manager_write_ptr, nullptr); DataReader dst(g_vertex_manager_write_ptr, nullptr);
for (int i = 0; i < 3; ++i) for (int i = 0; i < 3; ++i)
@ -106,21 +106,21 @@ void LOADERDECL Pos_ReadIndex()
#if _M_SSE >= 0x301 #if _M_SSE >= 0x301
template <typename T, bool three> template <typename T, bool three>
void LOADERDECL Pos_ReadDirect_SSSE3() void LOADERDECL Pos_ReadDirect_SSSE3(VertexLoader* loader)
{ {
const T* pData = reinterpret_cast<const T*>(DataGetPosition()); const T* pData = reinterpret_cast<const T*>(DataGetPosition());
Vertex_Read_SSSE3<T, three, true>(pData, *(__m128*)posScale); Vertex_Read_SSSE3<T, three, true>(pData, *(__m128*)loader->m_posScale);
DataSkip<(2 + three) * sizeof(T)>(); DataSkip<(2 + three) * sizeof(T)>();
LOG_VTX(); LOG_VTX();
} }
template <typename I, typename T, bool three> template <typename I, typename T, bool three>
void LOADERDECL Pos_ReadIndex_SSSE3() void LOADERDECL Pos_ReadIndex_SSSE3(VertexLoader* loader)
{ {
static_assert(std::is_unsigned<I>::value, "Only unsigned I is sane!"); static_assert(std::is_unsigned<I>::value, "Only unsigned I is sane!");
auto const index = DataRead<I>(); auto const index = DataRead<I>();
const T* pData = (const T*)(cached_arraybases[ARRAY_POSITION] + (index * g_main_cp_state.array_strides[ARRAY_POSITION])); const T* pData = (const T*)(cached_arraybases[ARRAY_POSITION] + (index * g_main_cp_state.array_strides[ARRAY_POSITION]));
Vertex_Read_SSSE3<T, three, true>(pData, *(__m128*)posScale); Vertex_Read_SSSE3<T, three, true>(pData, *(__m128*)loader->m_posScale);
LOG_VTX(); LOG_VTX();
} }
#endif #endif

View File

@ -29,9 +29,9 @@ __forceinline void LOG_TEX<2>()
// PRIM_LOG("tex: %f %f, ", ((float*)g_vertex_manager_write_ptr)[-2], ((float*)g_vertex_manager_write_ptr)[-1]); // PRIM_LOG("tex: %f %f, ", ((float*)g_vertex_manager_write_ptr)[-2], ((float*)g_vertex_manager_write_ptr)[-1]);
} }
static void LOADERDECL TexCoord_Read_Dummy() static void LOADERDECL TexCoord_Read_Dummy(VertexLoader* loader)
{ {
tcIndex++; loader->m_tcIndex++;
} }
template <typename T> template <typename T>
@ -47,9 +47,9 @@ float TCScale(float val, float scale)
} }
template <typename T, int N> template <typename T, int N>
void LOADERDECL TexCoord_ReadDirect() void LOADERDECL TexCoord_ReadDirect(VertexLoader* loader)
{ {
auto const scale = tcScale[tcIndex][0]; auto const scale = loader->m_tcScale[loader->m_tcIndex][0];
DataReader dst(g_vertex_manager_write_ptr, nullptr); DataReader dst(g_vertex_manager_write_ptr, nullptr);
DataReader src(g_video_buffer_read_ptr, nullptr); DataReader src(g_video_buffer_read_ptr, nullptr);
@ -60,18 +60,18 @@ void LOADERDECL TexCoord_ReadDirect()
src.WritePointer(&g_video_buffer_read_ptr); src.WritePointer(&g_video_buffer_read_ptr);
LOG_TEX<N>(); LOG_TEX<N>();
++tcIndex; ++loader->m_tcIndex;
} }
template <typename I, typename T, int N> template <typename I, typename T, int N>
void LOADERDECL TexCoord_ReadIndex() void LOADERDECL TexCoord_ReadIndex(VertexLoader* loader)
{ {
static_assert(std::is_unsigned<I>::value, "Only unsigned I is sane!"); static_assert(std::is_unsigned<I>::value, "Only unsigned I is sane!");
auto const index = DataRead<I>(); auto const index = DataRead<I>();
auto const data = reinterpret_cast<const T*>(cached_arraybases[ARRAY_TEXCOORD0 + tcIndex] auto const data = reinterpret_cast<const T*>(cached_arraybases[ARRAY_TEXCOORD0 + loader->m_tcIndex]
+ (index * g_main_cp_state.array_strides[ARRAY_TEXCOORD0 + tcIndex])); + (index * g_main_cp_state.array_strides[ARRAY_TEXCOORD0 + loader->m_tcIndex]));
auto const scale = tcScale[tcIndex][0]; auto const scale = loader->m_tcScale[loader->m_tcIndex][0];
DataReader dst(g_vertex_manager_write_ptr, nullptr); DataReader dst(g_vertex_manager_write_ptr, nullptr);
for (int i = 0; i != N; ++i) for (int i = 0; i != N; ++i)
@ -79,32 +79,32 @@ void LOADERDECL TexCoord_ReadIndex()
dst.WritePointer(&g_vertex_manager_write_ptr); dst.WritePointer(&g_vertex_manager_write_ptr);
LOG_TEX<N>(); LOG_TEX<N>();
++tcIndex; ++loader->m_tcIndex;
} }
#if _M_SSE >= 0x301 #if _M_SSE >= 0x301
template <typename T> template <typename T>
void LOADERDECL TexCoord_ReadDirect2_SSSE3() void LOADERDECL TexCoord_ReadDirect2_SSSE3(VertexLoader* loader)
{ {
const T* pData = reinterpret_cast<const T*>(DataGetPosition()); const T* pData = reinterpret_cast<const T*>(DataGetPosition());
__m128 scale = _mm_castsi128_ps(_mm_loadl_epi64((__m128i*)tcScale[tcIndex])); __m128 scale = _mm_castsi128_ps(_mm_loadl_epi64((__m128i*)loader->m_tcScale[loader->m_tcIndex]));
Vertex_Read_SSSE3<T, false, false>(pData, scale); Vertex_Read_SSSE3<T, false, false>(pData, scale);
DataSkip<2 * sizeof(T)>(); DataSkip<2 * sizeof(T)>();
LOG_TEX<2>(); LOG_TEX<2>();
tcIndex++; loader->m_tcIndex++;
} }
template <typename I, typename T> template <typename I, typename T>
void LOADERDECL TexCoord_ReadIndex2_SSSE3() void LOADERDECL TexCoord_ReadIndex2_SSSE3(VertexLoader* loader)
{ {
static_assert(std::is_unsigned<I>::value, "Only unsigned I is sane!"); static_assert(std::is_unsigned<I>::value, "Only unsigned I is sane!");
auto const index = DataRead<I>(); auto const index = DataRead<I>();
const T* pData = (const T*)(cached_arraybases[ARRAY_TEXCOORD0 + tcIndex] + (index * g_main_cp_state.array_strides[ARRAY_TEXCOORD0 + tcIndex])); const T* pData = (const T*)(cached_arraybases[ARRAY_TEXCOORD0 + loader->m_tcIndex] + (index * g_main_cp_state.array_strides[ARRAY_TEXCOORD0 + loader->m_tcIndex]));
__m128 scale = _mm_castsi128_ps(_mm_loadl_epi64((__m128i*)tcScale[tcIndex])); __m128 scale = _mm_castsi128_ps(_mm_loadl_epi64((__m128i*)loader->m_tcScale[loader->m_tcIndex]));
Vertex_Read_SSSE3<T, false, false>(pData, scale); Vertex_Read_SSSE3<T, false, false>(pData, scale);
LOG_TEX<2>(); LOG_TEX<2>();
tcIndex++; loader->m_tcIndex++;
} }
#endif #endif