VertexLoader: remove non-JIT SSE code
This commit is contained in:
parent
38cb9bbfae
commit
873902b4a3
|
@ -17,8 +17,6 @@ namespace FifoAnalyzer
|
||||||
void Init()
|
void Init()
|
||||||
{
|
{
|
||||||
VertexLoader_Normal::Init();
|
VertexLoader_Normal::Init();
|
||||||
VertexLoader_Position::Init();
|
|
||||||
VertexLoader_TextCoord::Init();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
u8 ReadFifo8(u8 *&data)
|
u8 ReadFifo8(u8 *&data)
|
||||||
|
|
|
@ -28,16 +28,6 @@
|
||||||
u8* g_video_buffer_read_ptr;
|
u8* g_video_buffer_read_ptr;
|
||||||
u8* g_vertex_manager_write_ptr;
|
u8* g_vertex_manager_write_ptr;
|
||||||
|
|
||||||
void* VertexLoader::operator new (size_t size)
|
|
||||||
{
|
|
||||||
return AllocateAlignedMemory(size, 16);
|
|
||||||
}
|
|
||||||
|
|
||||||
void VertexLoader::operator delete (void *p)
|
|
||||||
{
|
|
||||||
FreeAlignedMemory(p);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void LOADERDECL PosMtx_ReadDirect_UByte(VertexLoader* loader)
|
static void LOADERDECL PosMtx_ReadDirect_UByte(VertexLoader* loader)
|
||||||
{
|
{
|
||||||
u8 posmtx = BoundingBox::posMtxIdx = DataReadU8() & 0x3f;
|
u8 posmtx = BoundingBox::posMtxIdx = DataReadU8() & 0x3f;
|
||||||
|
@ -66,15 +56,9 @@ static void LOADERDECL TexMtx_Write_Float2(VertexLoader* loader)
|
||||||
|
|
||||||
static void LOADERDECL TexMtx_Write_Float3(VertexLoader* loader)
|
static void LOADERDECL TexMtx_Write_Float3(VertexLoader* loader)
|
||||||
{
|
{
|
||||||
#if _M_SSE >= 0x200
|
|
||||||
__m128 output = _mm_cvtsi32_ss(_mm_castsi128_ps(_mm_setzero_si128()), loader->m_curtexmtx[loader->m_texmtxwrite++]);
|
|
||||||
_mm_storeu_ps((float*)g_vertex_manager_write_ptr, _mm_shuffle_ps(output, output, 0x45 /* 1, 1, 0, 1 */));
|
|
||||||
g_vertex_manager_write_ptr += sizeof(float) * 3;
|
|
||||||
#else
|
|
||||||
DataWrite(0.f);
|
DataWrite(0.f);
|
||||||
DataWrite(0.f);
|
DataWrite(0.f);
|
||||||
DataWrite(float(loader->m_curtexmtx[loader->m_texmtxwrite++]));
|
DataWrite(float(loader->m_curtexmtx[loader->m_texmtxwrite++]));
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void LOADERDECL SkipVertex(VertexLoader* loader)
|
static void LOADERDECL SkipVertex(VertexLoader* loader)
|
||||||
|
@ -92,15 +76,13 @@ VertexLoader::VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr)
|
||||||
: VertexLoaderBase(vtx_desc, vtx_attr)
|
: VertexLoaderBase(vtx_desc, vtx_attr)
|
||||||
{
|
{
|
||||||
VertexLoader_Normal::Init();
|
VertexLoader_Normal::Init();
|
||||||
VertexLoader_Position::Init();
|
|
||||||
VertexLoader_TextCoord::Init();
|
|
||||||
|
|
||||||
CompileVertexTranslator();
|
CompileVertexTranslator();
|
||||||
|
|
||||||
// generate frac factors
|
// generate frac factors
|
||||||
m_posScale[0] = m_posScale[1] = m_posScale[2] = m_posScale[3] = 1.0f / (1U << m_VtxAttr.PosFrac);
|
m_posScale = 1.0f / (1U << m_VtxAttr.PosFrac);
|
||||||
for (int i = 0; i < 8; i++)
|
for (int i = 0; i < 8; i++)
|
||||||
m_tcScale[i][0] = m_tcScale[i][1] = 1.0f / (1U << m_VtxAttr.texCoord[i].Frac);
|
m_tcScale[i] = 1.0f / (1U << m_VtxAttr.texCoord[i].Frac);
|
||||||
|
|
||||||
for (int i = 0; i < 2; i++)
|
for (int i = 0; i < 2; i++)
|
||||||
m_colElements[i] = m_VtxAttr.color[i].Elements;
|
m_colElements[i] = m_VtxAttr.color[i].Elements;
|
||||||
|
|
|
@ -18,13 +18,6 @@
|
||||||
#include "VideoCommon/VertexLoaderBase.h"
|
#include "VideoCommon/VertexLoaderBase.h"
|
||||||
#include "VideoCommon/VertexLoaderUtils.h"
|
#include "VideoCommon/VertexLoaderUtils.h"
|
||||||
|
|
||||||
#if _M_SSE >= 0x401
|
|
||||||
#include <smmintrin.h>
|
|
||||||
#include <emmintrin.h>
|
|
||||||
#elif _M_SSE >= 0x301 && !(defined __GNUC__ && !defined __SSSE3__)
|
|
||||||
#include <tmmintrin.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef WIN32
|
#ifdef WIN32
|
||||||
#define LOADERDECL __cdecl
|
#define LOADERDECL __cdecl
|
||||||
#else
|
#else
|
||||||
|
@ -37,11 +30,6 @@ typedef void (LOADERDECL *TPipelineFunction)(VertexLoader* loader);
|
||||||
class VertexLoader : public VertexLoaderBase
|
class VertexLoader : public VertexLoaderBase
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
// This class need a 16 byte alignment. As this is broken on
|
|
||||||
// MSVC right now (Dec 2014), we use custom allocation.
|
|
||||||
void* operator new (size_t size);
|
|
||||||
void operator delete (void *p);
|
|
||||||
|
|
||||||
VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr);
|
VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr);
|
||||||
|
|
||||||
int RunVertices(int primitive, int count, DataReader src, DataReader dst) override;
|
int RunVertices(int primitive, int count, DataReader src, DataReader dst) override;
|
||||||
|
@ -49,9 +37,8 @@ public:
|
||||||
bool IsInitialized() override { return true; } // This vertex loader supports all formats
|
bool IsInitialized() override { return true; } // This vertex loader supports all formats
|
||||||
|
|
||||||
// They are used for the communication with the loader functions
|
// They are used for the communication with the loader functions
|
||||||
// Duplicated (4x and 2x respectively) and used in SSE code in the vertex loader JIT
|
float m_posScale;
|
||||||
GC_ALIGNED128(float m_posScale[4]);
|
float m_tcScale[8];
|
||||||
GC_ALIGNED64(float m_tcScale[8][2]);
|
|
||||||
int m_tcIndex;
|
int m_tcIndex;
|
||||||
int m_colIndex;
|
int m_colIndex;
|
||||||
int m_colElements[2];
|
int m_colElements[2];
|
||||||
|
@ -73,61 +60,3 @@ private:
|
||||||
|
|
||||||
void WriteCall(TPipelineFunction);
|
void WriteCall(TPipelineFunction);
|
||||||
};
|
};
|
||||||
|
|
||||||
#if _M_SSE >= 0x301
|
|
||||||
static const __m128i kMaskSwap32_3 = _mm_set_epi32(0xFFFFFFFFL, 0x08090A0BL, 0x04050607L, 0x00010203L);
|
|
||||||
static const __m128i kMaskSwap32_2 = _mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0x04050607L, 0x00010203L);
|
|
||||||
static const __m128i kMaskSwap16to32l_3 = _mm_set_epi32(0xFFFFFFFFL, 0xFFFF0405L, 0xFFFF0203L, 0xFFFF0001L);
|
|
||||||
static const __m128i kMaskSwap16to32l_2 = _mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFF0203L, 0xFFFF0001L);
|
|
||||||
static const __m128i kMaskSwap16to32h_3 = _mm_set_epi32(0xFFFFFFFFL, 0x0405FFFFL, 0x0203FFFFL, 0x0001FFFFL);
|
|
||||||
static const __m128i kMaskSwap16to32h_2 = _mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0x0203FFFFL, 0x0001FFFFL);
|
|
||||||
static const __m128i kMask8to32l_3 = _mm_set_epi32(0xFFFFFFFFL, 0xFFFFFF02L, 0xFFFFFF01L, 0xFFFFFF00L);
|
|
||||||
static const __m128i kMask8to32l_2 = _mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFF01L, 0xFFFFFF00L);
|
|
||||||
static const __m128i kMask8to32h_3 = _mm_set_epi32(0xFFFFFFFFL, 0x02FFFFFFL, 0x01FFFFFFL, 0x00FFFFFFL);
|
|
||||||
static const __m128i kMask8to32h_2 = _mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0x01FFFFFFL, 0x00FFFFFFL);
|
|
||||||
|
|
||||||
template <typename T, bool threeIn, bool threeOut>
|
|
||||||
__forceinline void Vertex_Read_SSSE3(const T* pData, __m128 scale)
|
|
||||||
{
|
|
||||||
__m128i coords, mask;
|
|
||||||
|
|
||||||
int loadBytes = sizeof(T) * (2 + threeIn);
|
|
||||||
if (loadBytes > 8)
|
|
||||||
coords = _mm_loadu_si128((__m128i*)pData);
|
|
||||||
else if (loadBytes > 4)
|
|
||||||
coords = _mm_loadl_epi64((__m128i*)pData);
|
|
||||||
else
|
|
||||||
coords = _mm_cvtsi32_si128(*(u32*)pData);
|
|
||||||
|
|
||||||
// Float case (no scaling)
|
|
||||||
if (sizeof(T) == 4)
|
|
||||||
{
|
|
||||||
coords = _mm_shuffle_epi8(coords, threeIn ? kMaskSwap32_3 : kMaskSwap32_2);
|
|
||||||
if (threeOut)
|
|
||||||
_mm_storeu_si128((__m128i*)g_vertex_manager_write_ptr, coords);
|
|
||||||
else
|
|
||||||
_mm_storel_epi64((__m128i*)g_vertex_manager_write_ptr, coords);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// Byte swap, unpack, and move to high bytes for sign extend.
|
|
||||||
if (std::is_unsigned<T>::value)
|
|
||||||
mask = sizeof(T) == 2 ? (threeIn ? kMaskSwap16to32l_3 : kMaskSwap16to32l_2) : (threeIn ? kMask8to32l_3 : kMask8to32l_2);
|
|
||||||
else
|
|
||||||
mask = sizeof(T) == 2 ? (threeIn ? kMaskSwap16to32h_3 : kMaskSwap16to32h_2) : (threeIn ? kMask8to32h_3 : kMask8to32h_2);
|
|
||||||
coords = _mm_shuffle_epi8(coords, mask);
|
|
||||||
|
|
||||||
// Sign extend
|
|
||||||
if (std::is_signed<T>::value)
|
|
||||||
coords = _mm_srai_epi32(coords, 32 - sizeof(T) * 8);
|
|
||||||
|
|
||||||
__m128 out = _mm_mul_ps(_mm_cvtepi32_ps(coords), scale);
|
|
||||||
if (threeOut)
|
|
||||||
_mm_storeu_ps((float*)g_vertex_manager_write_ptr, out);
|
|
||||||
else
|
|
||||||
_mm_storel_pi((__m64*)g_vertex_manager_write_ptr, out);
|
|
||||||
}
|
|
||||||
|
|
||||||
g_vertex_manager_write_ptr += sizeof(float) * (2 + threeOut);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
|
@ -6,8 +6,6 @@
|
||||||
#include <type_traits>
|
#include <type_traits>
|
||||||
|
|
||||||
#include "Common/CommonTypes.h"
|
#include "Common/CommonTypes.h"
|
||||||
#include "Common/CPUDetect.h"
|
|
||||||
|
|
||||||
#include "VideoCommon/VertexLoader.h"
|
#include "VideoCommon/VertexLoader.h"
|
||||||
#include "VideoCommon/VertexLoader_Normal.h"
|
#include "VideoCommon/VertexLoader_Normal.h"
|
||||||
#include "VideoCommon/VertexManagerBase.h"
|
#include "VideoCommon/VertexManagerBase.h"
|
||||||
|
@ -102,63 +100,6 @@ struct Normal_Index_Indices3
|
||||||
static const int size = sizeof(I) * 3;
|
static const int size = sizeof(I) * 3;
|
||||||
};
|
};
|
||||||
|
|
||||||
#if _M_SSE >= 0x301
|
|
||||||
template <typename T, int N>
|
|
||||||
struct Normal_Direct_SSSE3
|
|
||||||
{
|
|
||||||
static void LOADERDECL function(VertexLoader* loader)
|
|
||||||
{
|
|
||||||
const T* pData = reinterpret_cast<const T*>(DataGetPosition());
|
|
||||||
const float frac = 1. / float(1u << (sizeof(T) * 8 - std::is_signed<T>::value - 1));
|
|
||||||
const __m128 scale = _mm_set_ps1(frac);
|
|
||||||
for (int i = 0; i < N; i++, pData += 3)
|
|
||||||
Vertex_Read_SSSE3<T, true, true>(pData, scale);
|
|
||||||
DataSkip<N * 3 * sizeof(T)>();
|
|
||||||
}
|
|
||||||
|
|
||||||
static const int size = sizeof(T) * N * 3;
|
|
||||||
};
|
|
||||||
|
|
||||||
template <typename I, typename T, int N, int Offset>
|
|
||||||
__forceinline void Normal_Index_Offset_SSSE3()
|
|
||||||
{
|
|
||||||
static_assert(std::is_unsigned<I>::value, "Only unsigned I is sane!");
|
|
||||||
|
|
||||||
auto const index = DataRead<I>();
|
|
||||||
const T* pData = (const T*)(cached_arraybases[ARRAY_NORMAL]
|
|
||||||
+ (index * g_main_cp_state.array_strides[ARRAY_NORMAL]) + sizeof(T) * 3 * Offset);
|
|
||||||
const float frac = 1. / float(1u << (sizeof(T) * 8 - std::is_signed<T>::value - 1));
|
|
||||||
const __m128 scale = _mm_set_ps1(frac);
|
|
||||||
for (int i = 0; i < N; i++, pData += 3)
|
|
||||||
Vertex_Read_SSSE3<T, true, true>(pData, scale);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename I, typename T, int N>
|
|
||||||
struct Normal_Index_SSSE3
|
|
||||||
{
|
|
||||||
static void LOADERDECL function(VertexLoader* loader)
|
|
||||||
{
|
|
||||||
Normal_Index_Offset_SSSE3<I, T, N, 0>();
|
|
||||||
}
|
|
||||||
|
|
||||||
static const int size = sizeof(I);
|
|
||||||
};
|
|
||||||
|
|
||||||
template <typename I, typename T>
|
|
||||||
struct Normal_Index_Indices3_SSSE3
|
|
||||||
{
|
|
||||||
static void LOADERDECL function(VertexLoader* loader)
|
|
||||||
{
|
|
||||||
Normal_Index_Offset_SSSE3<I, T, 1, 0>();
|
|
||||||
Normal_Index_Offset_SSSE3<I, T, 1, 1>();
|
|
||||||
Normal_Index_Offset_SSSE3<I, T, 1, 2>();
|
|
||||||
}
|
|
||||||
|
|
||||||
static const int size = sizeof(I) * 3;
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void VertexLoader_Normal::Init()
|
void VertexLoader_Normal::Init()
|
||||||
|
@ -231,77 +172,6 @@ void VertexLoader_Normal::Init()
|
||||||
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Normal_Index_Indices3<u16, u16>();
|
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Normal_Index_Indices3<u16, u16>();
|
||||||
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Normal_Index_Indices3<u16, s16>();
|
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Normal_Index_Indices3<u16, s16>();
|
||||||
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Normal_Index_Indices3<u16, float>();
|
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Normal_Index_Indices3<u16, float>();
|
||||||
|
|
||||||
#if _M_SSE >= 0x301
|
|
||||||
if (cpu_info.bSSSE3)
|
|
||||||
{
|
|
||||||
m_Table[NRM_DIRECT][NRM_INDICES1][NRM_NBT][FORMAT_UBYTE] = Normal_Direct_SSSE3<u8, 1>();
|
|
||||||
m_Table[NRM_DIRECT][NRM_INDICES1][NRM_NBT][FORMAT_BYTE] = Normal_Direct_SSSE3<s8, 1>();
|
|
||||||
m_Table[NRM_DIRECT][NRM_INDICES1][NRM_NBT][FORMAT_USHORT] = Normal_Direct_SSSE3<u16, 1>();
|
|
||||||
m_Table[NRM_DIRECT][NRM_INDICES1][NRM_NBT][FORMAT_SHORT] = Normal_Direct_SSSE3<s16, 1>();
|
|
||||||
m_Table[NRM_DIRECT][NRM_INDICES1][NRM_NBT][FORMAT_FLOAT] = Normal_Direct_SSSE3<float, 1>();
|
|
||||||
m_Table[NRM_DIRECT][NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Normal_Direct_SSSE3<u8, 3>();
|
|
||||||
m_Table[NRM_DIRECT][NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Normal_Direct_SSSE3<s8, 3>();
|
|
||||||
m_Table[NRM_DIRECT][NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Normal_Direct_SSSE3<u16, 3>();
|
|
||||||
m_Table[NRM_DIRECT][NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Normal_Direct_SSSE3<s16, 3>();
|
|
||||||
m_Table[NRM_DIRECT][NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Normal_Direct_SSSE3<float, 3>();
|
|
||||||
|
|
||||||
m_Table[NRM_DIRECT][NRM_INDICES3][NRM_NBT][FORMAT_UBYTE] = Normal_Direct_SSSE3<u8, 1>();
|
|
||||||
m_Table[NRM_DIRECT][NRM_INDICES3][NRM_NBT][FORMAT_BYTE] = Normal_Direct_SSSE3<s8, 1>();
|
|
||||||
m_Table[NRM_DIRECT][NRM_INDICES3][NRM_NBT][FORMAT_USHORT] = Normal_Direct_SSSE3<u16, 1>();
|
|
||||||
m_Table[NRM_DIRECT][NRM_INDICES3][NRM_NBT][FORMAT_SHORT] = Normal_Direct_SSSE3<s16, 1>();
|
|
||||||
m_Table[NRM_DIRECT][NRM_INDICES3][NRM_NBT][FORMAT_FLOAT] = Normal_Direct_SSSE3<float, 1>();
|
|
||||||
m_Table[NRM_DIRECT][NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Normal_Direct_SSSE3<u8, 3>();
|
|
||||||
m_Table[NRM_DIRECT][NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Normal_Direct_SSSE3<s8, 3>();
|
|
||||||
m_Table[NRM_DIRECT][NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Normal_Direct_SSSE3<u16, 3>();
|
|
||||||
m_Table[NRM_DIRECT][NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Normal_Direct_SSSE3<s16, 3>();
|
|
||||||
m_Table[NRM_DIRECT][NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Normal_Direct_SSSE3<float, 3>();
|
|
||||||
|
|
||||||
m_Table[NRM_INDEX8][NRM_INDICES1][NRM_NBT][FORMAT_UBYTE] = Normal_Index_SSSE3<u8, u8, 1>();
|
|
||||||
m_Table[NRM_INDEX8][NRM_INDICES1][NRM_NBT][FORMAT_BYTE] = Normal_Index_SSSE3<u8, s8, 1>();
|
|
||||||
m_Table[NRM_INDEX8][NRM_INDICES1][NRM_NBT][FORMAT_USHORT] = Normal_Index_SSSE3<u8, u16, 1>();
|
|
||||||
m_Table[NRM_INDEX8][NRM_INDICES1][NRM_NBT][FORMAT_SHORT] = Normal_Index_SSSE3<u8, s16, 1>();
|
|
||||||
m_Table[NRM_INDEX8][NRM_INDICES1][NRM_NBT][FORMAT_FLOAT] = Normal_Index_SSSE3<u8, float, 1>();
|
|
||||||
m_Table[NRM_INDEX8][NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Normal_Index_SSSE3<u8, u8, 3>();
|
|
||||||
m_Table[NRM_INDEX8][NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Normal_Index_SSSE3<u8, s8, 3>();
|
|
||||||
m_Table[NRM_INDEX8][NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Normal_Index_SSSE3<u8, u16, 3>();
|
|
||||||
m_Table[NRM_INDEX8][NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Normal_Index_SSSE3<u8, s16, 3>();
|
|
||||||
m_Table[NRM_INDEX8][NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Normal_Index_SSSE3<u8, float, 3>();
|
|
||||||
|
|
||||||
m_Table[NRM_INDEX8][NRM_INDICES3][NRM_NBT][FORMAT_UBYTE] = Normal_Index_SSSE3<u8, u8, 1>();
|
|
||||||
m_Table[NRM_INDEX8][NRM_INDICES3][NRM_NBT][FORMAT_BYTE] = Normal_Index_SSSE3<u8, s8, 1>();
|
|
||||||
m_Table[NRM_INDEX8][NRM_INDICES3][NRM_NBT][FORMAT_USHORT] = Normal_Index_SSSE3<u8, u16, 1>();
|
|
||||||
m_Table[NRM_INDEX8][NRM_INDICES3][NRM_NBT][FORMAT_SHORT] = Normal_Index_SSSE3<u8, s16, 1>();
|
|
||||||
m_Table[NRM_INDEX8][NRM_INDICES3][NRM_NBT][FORMAT_FLOAT] = Normal_Index_SSSE3<u8, float, 1>();
|
|
||||||
m_Table[NRM_INDEX8][NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Normal_Index_Indices3_SSSE3<u8, u8>();
|
|
||||||
m_Table[NRM_INDEX8][NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Normal_Index_Indices3_SSSE3<u8, s8>();
|
|
||||||
m_Table[NRM_INDEX8][NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Normal_Index_Indices3_SSSE3<u8, u16>();
|
|
||||||
m_Table[NRM_INDEX8][NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Normal_Index_Indices3_SSSE3<u8, s16>();
|
|
||||||
m_Table[NRM_INDEX8][NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Normal_Index_Indices3_SSSE3<u8, float>();
|
|
||||||
|
|
||||||
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT][FORMAT_UBYTE] = Normal_Index_SSSE3<u16, u8, 1>();
|
|
||||||
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT][FORMAT_BYTE] = Normal_Index_SSSE3<u16, s8, 1>();
|
|
||||||
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT][FORMAT_USHORT] = Normal_Index_SSSE3<u16, u16, 1>();
|
|
||||||
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT][FORMAT_SHORT] = Normal_Index_SSSE3<u16, s16, 1>();
|
|
||||||
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT][FORMAT_FLOAT] = Normal_Index_SSSE3<u16, float, 1>();
|
|
||||||
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Normal_Index_SSSE3<u16, u8, 3>();
|
|
||||||
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Normal_Index_SSSE3<u16, s8, 3>();
|
|
||||||
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Normal_Index_SSSE3<u16, u16, 3>();
|
|
||||||
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Normal_Index_SSSE3<u16, s16, 3>();
|
|
||||||
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Normal_Index_SSSE3<u16, float, 3>();
|
|
||||||
|
|
||||||
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT][FORMAT_UBYTE] = Normal_Index_SSSE3<u16, u8, 1>();
|
|
||||||
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT][FORMAT_BYTE] = Normal_Index_SSSE3<u16, s8, 1>();
|
|
||||||
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT][FORMAT_USHORT] = Normal_Index_SSSE3<u16, u16, 1>();
|
|
||||||
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT][FORMAT_SHORT] = Normal_Index_SSSE3<u16, s16, 1>();
|
|
||||||
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT][FORMAT_FLOAT] = Normal_Index_SSSE3<u16, float, 1>();
|
|
||||||
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Normal_Index_Indices3_SSSE3<u16, u8>();
|
|
||||||
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Normal_Index_Indices3_SSSE3<u16, s8>();
|
|
||||||
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Normal_Index_Indices3_SSSE3<u16, u16>();
|
|
||||||
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Normal_Index_Indices3_SSSE3<u16, s16>();
|
|
||||||
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Normal_Index_Indices3_SSSE3<u16, float>();
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned int VertexLoader_Normal::GetSize(u64 _type,
|
unsigned int VertexLoader_Normal::GetSize(u64 _type,
|
||||||
|
|
|
@ -5,8 +5,6 @@
|
||||||
#include <type_traits>
|
#include <type_traits>
|
||||||
|
|
||||||
#include "Common/CommonTypes.h"
|
#include "Common/CommonTypes.h"
|
||||||
#include "Common/CPUDetect.h"
|
|
||||||
|
|
||||||
#include "VideoCommon/VertexLoader.h"
|
#include "VideoCommon/VertexLoader.h"
|
||||||
#include "VideoCommon/VertexLoader_Position.h"
|
#include "VideoCommon/VertexLoader_Position.h"
|
||||||
#include "VideoCommon/VertexManagerBase.h"
|
#include "VideoCommon/VertexManagerBase.h"
|
||||||
|
@ -28,7 +26,7 @@ template <typename T, int N>
|
||||||
void LOADERDECL Pos_ReadDirect(VertexLoader* loader)
|
void LOADERDECL Pos_ReadDirect(VertexLoader* loader)
|
||||||
{
|
{
|
||||||
static_assert(N <= 3, "N > 3 is not sane!");
|
static_assert(N <= 3, "N > 3 is not sane!");
|
||||||
auto const scale = loader->m_posScale[0];;
|
auto const scale = loader->m_posScale;
|
||||||
DataReader dst(g_vertex_manager_write_ptr, nullptr);
|
DataReader dst(g_vertex_manager_write_ptr, nullptr);
|
||||||
DataReader src(g_video_buffer_read_ptr, nullptr);
|
DataReader src(g_video_buffer_read_ptr, nullptr);
|
||||||
|
|
||||||
|
@ -49,7 +47,7 @@ void LOADERDECL Pos_ReadIndex(VertexLoader* loader)
|
||||||
auto const index = DataRead<I>();
|
auto const index = DataRead<I>();
|
||||||
loader->m_vertexSkip = index == std::numeric_limits<I>::max();
|
loader->m_vertexSkip = index == std::numeric_limits<I>::max();
|
||||||
auto const data = reinterpret_cast<const T*>(cached_arraybases[ARRAY_POSITION] + (index * g_main_cp_state.array_strides[ARRAY_POSITION]));
|
auto const data = reinterpret_cast<const T*>(cached_arraybases[ARRAY_POSITION] + (index * g_main_cp_state.array_strides[ARRAY_POSITION]));
|
||||||
auto const scale = loader->m_posScale[0];
|
auto const scale = loader->m_posScale;
|
||||||
DataReader dst(g_vertex_manager_write_ptr, nullptr);
|
DataReader dst(g_vertex_manager_write_ptr, nullptr);
|
||||||
|
|
||||||
for (int i = 0; i < 3; ++i)
|
for (int i = 0; i < 3; ++i)
|
||||||
|
@ -59,28 +57,6 @@ void LOADERDECL Pos_ReadIndex(VertexLoader* loader)
|
||||||
LOG_VTX();
|
LOG_VTX();
|
||||||
}
|
}
|
||||||
|
|
||||||
#if _M_SSE >= 0x301
|
|
||||||
template <typename T, bool three>
|
|
||||||
void LOADERDECL Pos_ReadDirect_SSSE3(VertexLoader* loader)
|
|
||||||
{
|
|
||||||
const T* pData = reinterpret_cast<const T*>(DataGetPosition());
|
|
||||||
Vertex_Read_SSSE3<T, three, true>(pData, *(__m128*)loader->m_posScale);
|
|
||||||
DataSkip<(2 + three) * sizeof(T)>();
|
|
||||||
LOG_VTX();
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename I, typename T, bool three>
|
|
||||||
void LOADERDECL Pos_ReadIndex_SSSE3(VertexLoader* loader)
|
|
||||||
{
|
|
||||||
static_assert(std::is_unsigned<I>::value, "Only unsigned I is sane!");
|
|
||||||
auto const index = DataRead<I>();
|
|
||||||
loader->m_vertexSkip = index == std::numeric_limits<I>::max();
|
|
||||||
const T* pData = (const T*)(cached_arraybases[ARRAY_POSITION] + (index * g_main_cp_state.array_strides[ARRAY_POSITION]));
|
|
||||||
Vertex_Read_SSSE3<T, three, true>(pData, *(__m128*)loader->m_posScale);
|
|
||||||
LOG_VTX();
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static TPipelineFunction tableReadPosition[4][8][2] = {
|
static TPipelineFunction tableReadPosition[4][8][2] = {
|
||||||
{
|
{
|
||||||
{nullptr, nullptr,},
|
{nullptr, nullptr,},
|
||||||
|
@ -127,48 +103,6 @@ static int tableReadPositionVertexSize[4][8][2] = {
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
void VertexLoader_Position::Init()
|
|
||||||
{
|
|
||||||
|
|
||||||
#if _M_SSE >= 0x301
|
|
||||||
if (cpu_info.bSSSE3)
|
|
||||||
{
|
|
||||||
tableReadPosition[1][0][0] = Pos_ReadDirect_SSSE3<u8, false>;
|
|
||||||
tableReadPosition[1][0][1] = Pos_ReadDirect_SSSE3<u8, true>;
|
|
||||||
tableReadPosition[1][1][0] = Pos_ReadDirect_SSSE3<s8, false>;
|
|
||||||
tableReadPosition[1][1][1] = Pos_ReadDirect_SSSE3<s8, true>;
|
|
||||||
tableReadPosition[1][2][0] = Pos_ReadDirect_SSSE3<u16, false>;
|
|
||||||
tableReadPosition[1][2][1] = Pos_ReadDirect_SSSE3<u16, true>;
|
|
||||||
tableReadPosition[1][3][0] = Pos_ReadDirect_SSSE3<s16, false>;
|
|
||||||
tableReadPosition[1][3][1] = Pos_ReadDirect_SSSE3<s16, true>;
|
|
||||||
tableReadPosition[1][4][0] = Pos_ReadDirect_SSSE3<float, false>;
|
|
||||||
tableReadPosition[1][4][1] = Pos_ReadDirect_SSSE3<float, true>;
|
|
||||||
tableReadPosition[2][0][0] = Pos_ReadIndex_SSSE3<u8, u8, false>;
|
|
||||||
tableReadPosition[2][0][1] = Pos_ReadIndex_SSSE3<u8, u8, true>;
|
|
||||||
tableReadPosition[3][0][0] = Pos_ReadIndex_SSSE3<u16, u8, false>;
|
|
||||||
tableReadPosition[3][0][1] = Pos_ReadIndex_SSSE3<u16, u8, true>;
|
|
||||||
tableReadPosition[2][1][0] = Pos_ReadIndex_SSSE3<u8, s8, false>;
|
|
||||||
tableReadPosition[2][1][1] = Pos_ReadIndex_SSSE3<u8, s8, true>;
|
|
||||||
tableReadPosition[3][1][0] = Pos_ReadIndex_SSSE3<u16, s8, false>;
|
|
||||||
tableReadPosition[3][1][1] = Pos_ReadIndex_SSSE3<u16, s8, true>;
|
|
||||||
tableReadPosition[2][2][0] = Pos_ReadIndex_SSSE3<u8, u16, false>;
|
|
||||||
tableReadPosition[2][2][1] = Pos_ReadIndex_SSSE3<u8, u16, true>;
|
|
||||||
tableReadPosition[3][2][0] = Pos_ReadIndex_SSSE3<u16, u16, false>;
|
|
||||||
tableReadPosition[3][2][1] = Pos_ReadIndex_SSSE3<u16, u16, true>;
|
|
||||||
tableReadPosition[2][3][0] = Pos_ReadIndex_SSSE3<u8, s16, false>;
|
|
||||||
tableReadPosition[2][3][1] = Pos_ReadIndex_SSSE3<u8, s16, true>;
|
|
||||||
tableReadPosition[3][3][0] = Pos_ReadIndex_SSSE3<u16, s16, false>;
|
|
||||||
tableReadPosition[3][3][1] = Pos_ReadIndex_SSSE3<u16, s16, true>;
|
|
||||||
tableReadPosition[2][4][0] = Pos_ReadIndex_SSSE3<u8, float, false>;
|
|
||||||
tableReadPosition[2][4][1] = Pos_ReadIndex_SSSE3<u8, float, true>;
|
|
||||||
tableReadPosition[3][4][0] = Pos_ReadIndex_SSSE3<u16, float, false>;
|
|
||||||
tableReadPosition[3][4][1] = Pos_ReadIndex_SSSE3<u16, float, true>;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned int VertexLoader_Position::GetSize(u64 _type, unsigned int _format, unsigned int _elements)
|
unsigned int VertexLoader_Position::GetSize(u64 _type, unsigned int _format, unsigned int _elements)
|
||||||
{
|
{
|
||||||
return tableReadPositionVertexSize[_type][_format][_elements];
|
return tableReadPositionVertexSize[_type][_format][_elements];
|
||||||
|
|
|
@ -5,8 +5,6 @@
|
||||||
#include <type_traits>
|
#include <type_traits>
|
||||||
|
|
||||||
#include "Common/CommonTypes.h"
|
#include "Common/CommonTypes.h"
|
||||||
#include "Common/CPUDetect.h"
|
|
||||||
|
|
||||||
#include "VideoCommon/VertexLoader.h"
|
#include "VideoCommon/VertexLoader.h"
|
||||||
#include "VideoCommon/VertexLoader_TextCoord.h"
|
#include "VideoCommon/VertexLoader_TextCoord.h"
|
||||||
#include "VideoCommon/VertexManagerBase.h"
|
#include "VideoCommon/VertexManagerBase.h"
|
||||||
|
@ -49,7 +47,7 @@ float TCScale(float val, float scale)
|
||||||
template <typename T, int N>
|
template <typename T, int N>
|
||||||
void LOADERDECL TexCoord_ReadDirect(VertexLoader* loader)
|
void LOADERDECL TexCoord_ReadDirect(VertexLoader* loader)
|
||||||
{
|
{
|
||||||
auto const scale = loader->m_tcScale[loader->m_tcIndex][0];
|
auto const scale = loader->m_tcScale[loader->m_tcIndex];
|
||||||
DataReader dst(g_vertex_manager_write_ptr, nullptr);
|
DataReader dst(g_vertex_manager_write_ptr, nullptr);
|
||||||
DataReader src(g_video_buffer_read_ptr, nullptr);
|
DataReader src(g_video_buffer_read_ptr, nullptr);
|
||||||
|
|
||||||
|
@ -71,7 +69,7 @@ void LOADERDECL TexCoord_ReadIndex(VertexLoader* loader)
|
||||||
auto const index = DataRead<I>();
|
auto const index = DataRead<I>();
|
||||||
auto const data = reinterpret_cast<const T*>(cached_arraybases[ARRAY_TEXCOORD0 + loader->m_tcIndex]
|
auto const data = reinterpret_cast<const T*>(cached_arraybases[ARRAY_TEXCOORD0 + loader->m_tcIndex]
|
||||||
+ (index * g_main_cp_state.array_strides[ARRAY_TEXCOORD0 + loader->m_tcIndex]));
|
+ (index * g_main_cp_state.array_strides[ARRAY_TEXCOORD0 + loader->m_tcIndex]));
|
||||||
auto const scale = loader->m_tcScale[loader->m_tcIndex][0];
|
auto const scale = loader->m_tcScale[loader->m_tcIndex];
|
||||||
DataReader dst(g_vertex_manager_write_ptr, nullptr);
|
DataReader dst(g_vertex_manager_write_ptr, nullptr);
|
||||||
|
|
||||||
for (int i = 0; i != N; ++i)
|
for (int i = 0; i != N; ++i)
|
||||||
|
@ -82,32 +80,6 @@ void LOADERDECL TexCoord_ReadIndex(VertexLoader* loader)
|
||||||
++loader->m_tcIndex;
|
++loader->m_tcIndex;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if _M_SSE >= 0x301
|
|
||||||
template <typename T>
|
|
||||||
void LOADERDECL TexCoord_ReadDirect2_SSSE3(VertexLoader* loader)
|
|
||||||
{
|
|
||||||
const T* pData = reinterpret_cast<const T*>(DataGetPosition());
|
|
||||||
__m128 scale = _mm_castsi128_ps(_mm_loadl_epi64((__m128i*)loader->m_tcScale[loader->m_tcIndex]));
|
|
||||||
Vertex_Read_SSSE3<T, false, false>(pData, scale);
|
|
||||||
DataSkip<2 * sizeof(T)>();
|
|
||||||
LOG_TEX<2>();
|
|
||||||
loader->m_tcIndex++;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename I, typename T>
|
|
||||||
void LOADERDECL TexCoord_ReadIndex2_SSSE3(VertexLoader* loader)
|
|
||||||
{
|
|
||||||
static_assert(std::is_unsigned<I>::value, "Only unsigned I is sane!");
|
|
||||||
|
|
||||||
auto const index = DataRead<I>();
|
|
||||||
const T* pData = (const T*)(cached_arraybases[ARRAY_TEXCOORD0 + loader->m_tcIndex] + (index * g_main_cp_state.array_strides[ARRAY_TEXCOORD0 + loader->m_tcIndex]));
|
|
||||||
__m128 scale = _mm_castsi128_ps(_mm_loadl_epi64((__m128i*)loader->m_tcScale[loader->m_tcIndex]));
|
|
||||||
Vertex_Read_SSSE3<T, false, false>(pData, scale);
|
|
||||||
LOG_TEX<2>();
|
|
||||||
loader->m_tcIndex++;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static TPipelineFunction tableReadTexCoord[4][8][2] = {
|
static TPipelineFunction tableReadTexCoord[4][8][2] = {
|
||||||
{
|
{
|
||||||
{nullptr, nullptr,},
|
{nullptr, nullptr,},
|
||||||
|
@ -154,32 +126,6 @@ static int tableReadTexCoordVertexSize[4][8][2] = {
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
void VertexLoader_TextCoord::Init()
|
|
||||||
{
|
|
||||||
|
|
||||||
#if _M_SSE >= 0x301
|
|
||||||
if (cpu_info.bSSSE3)
|
|
||||||
{
|
|
||||||
tableReadTexCoord[1][0][1] = TexCoord_ReadDirect2_SSSE3<u8>;
|
|
||||||
tableReadTexCoord[1][1][1] = TexCoord_ReadDirect2_SSSE3<s8>;
|
|
||||||
tableReadTexCoord[1][2][1] = TexCoord_ReadDirect2_SSSE3<u16>;
|
|
||||||
tableReadTexCoord[1][3][1] = TexCoord_ReadDirect2_SSSE3<s16>;
|
|
||||||
tableReadTexCoord[1][4][1] = TexCoord_ReadDirect2_SSSE3<float>;
|
|
||||||
tableReadTexCoord[2][0][1] = TexCoord_ReadIndex2_SSSE3<u8, u8>;
|
|
||||||
tableReadTexCoord[2][1][1] = TexCoord_ReadIndex2_SSSE3<u8, s8>;
|
|
||||||
tableReadTexCoord[2][2][1] = TexCoord_ReadIndex2_SSSE3<u8, u16>;
|
|
||||||
tableReadTexCoord[2][3][1] = TexCoord_ReadIndex2_SSSE3<u8, s16>;
|
|
||||||
tableReadTexCoord[2][4][1] = TexCoord_ReadIndex2_SSSE3<u8, float>;
|
|
||||||
tableReadTexCoord[3][0][1] = TexCoord_ReadIndex2_SSSE3<u16, u8>;
|
|
||||||
tableReadTexCoord[3][1][1] = TexCoord_ReadIndex2_SSSE3<u16, s8>;
|
|
||||||
tableReadTexCoord[3][2][1] = TexCoord_ReadIndex2_SSSE3<u16, u16>;
|
|
||||||
tableReadTexCoord[3][3][1] = TexCoord_ReadIndex2_SSSE3<u16, s16>;
|
|
||||||
tableReadTexCoord[3][4][1] = TexCoord_ReadIndex2_SSSE3<u16, float>;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned int VertexLoader_TextCoord::GetSize(u64 _type, unsigned int _format, unsigned int _elements)
|
unsigned int VertexLoader_TextCoord::GetSize(u64 _type, unsigned int _format, unsigned int _elements)
|
||||||
{
|
{
|
||||||
return tableReadTexCoordVertexSize[_type][_format][_elements];
|
return tableReadTexCoordVertexSize[_type][_format][_elements];
|
||||||
|
|
Loading…
Reference in New Issue