From 873902b4a3cc40b6a03bef197e54c5165f98d842 Mon Sep 17 00:00:00 2001 From: Tillmann Karras Date: Mon, 19 Jan 2015 18:33:08 +0100 Subject: [PATCH] VertexLoader: remove non-JIT SSE code --- Source/Core/Core/FifoPlayer/FifoAnalyzer.cpp | 2 - Source/Core/VideoCommon/VertexLoader.cpp | 22 +-- Source/Core/VideoCommon/VertexLoader.h | 75 +--------- .../Core/VideoCommon/VertexLoader_Normal.cpp | 130 ------------------ .../VideoCommon/VertexLoader_Position.cpp | 70 +--------- .../VideoCommon/VertexLoader_TextCoord.cpp | 58 +------- 6 files changed, 8 insertions(+), 349 deletions(-) diff --git a/Source/Core/Core/FifoPlayer/FifoAnalyzer.cpp b/Source/Core/Core/FifoPlayer/FifoAnalyzer.cpp index aeecca8d00..31beb60107 100644 --- a/Source/Core/Core/FifoPlayer/FifoAnalyzer.cpp +++ b/Source/Core/Core/FifoPlayer/FifoAnalyzer.cpp @@ -17,8 +17,6 @@ namespace FifoAnalyzer void Init() { VertexLoader_Normal::Init(); - VertexLoader_Position::Init(); - VertexLoader_TextCoord::Init(); } u8 ReadFifo8(u8 *&data) diff --git a/Source/Core/VideoCommon/VertexLoader.cpp b/Source/Core/VideoCommon/VertexLoader.cpp index 1122a04f72..732f0db696 100644 --- a/Source/Core/VideoCommon/VertexLoader.cpp +++ b/Source/Core/VideoCommon/VertexLoader.cpp @@ -28,16 +28,6 @@ u8* g_video_buffer_read_ptr; u8* g_vertex_manager_write_ptr; -void* VertexLoader::operator new (size_t size) -{ - return AllocateAlignedMemory(size, 16); -} - -void VertexLoader::operator delete (void *p) -{ - FreeAlignedMemory(p); -} - static void LOADERDECL PosMtx_ReadDirect_UByte(VertexLoader* loader) { u8 posmtx = BoundingBox::posMtxIdx = DataReadU8() & 0x3f; @@ -66,15 +56,9 @@ static void LOADERDECL TexMtx_Write_Float2(VertexLoader* loader) static void LOADERDECL TexMtx_Write_Float3(VertexLoader* loader) { -#if _M_SSE >= 0x200 - __m128 output = _mm_cvtsi32_ss(_mm_castsi128_ps(_mm_setzero_si128()), loader->m_curtexmtx[loader->m_texmtxwrite++]); - _mm_storeu_ps((float*)g_vertex_manager_write_ptr, _mm_shuffle_ps(output, output, 0x45 /* 1, 1, 0, 1 */)); - g_vertex_manager_write_ptr += sizeof(float) * 3; -#else DataWrite(0.f); DataWrite(0.f); DataWrite(float(loader->m_curtexmtx[loader->m_texmtxwrite++])); -#endif } static void LOADERDECL SkipVertex(VertexLoader* loader) @@ -92,15 +76,13 @@ VertexLoader::VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr) : VertexLoaderBase(vtx_desc, vtx_attr) { VertexLoader_Normal::Init(); - VertexLoader_Position::Init(); - VertexLoader_TextCoord::Init(); CompileVertexTranslator(); // generate frac factors - m_posScale[0] = m_posScale[1] = m_posScale[2] = m_posScale[3] = 1.0f / (1U << m_VtxAttr.PosFrac); + m_posScale = 1.0f / (1U << m_VtxAttr.PosFrac); for (int i = 0; i < 8; i++) - m_tcScale[i][0] = m_tcScale[i][1] = 1.0f / (1U << m_VtxAttr.texCoord[i].Frac); + m_tcScale[i] = 1.0f / (1U << m_VtxAttr.texCoord[i].Frac); for (int i = 0; i < 2; i++) m_colElements[i] = m_VtxAttr.color[i].Elements; diff --git a/Source/Core/VideoCommon/VertexLoader.h b/Source/Core/VideoCommon/VertexLoader.h index a7acd5dba1..247dce8f41 100644 --- a/Source/Core/VideoCommon/VertexLoader.h +++ b/Source/Core/VideoCommon/VertexLoader.h @@ -18,13 +18,6 @@ #include "VideoCommon/VertexLoaderBase.h" #include "VideoCommon/VertexLoaderUtils.h" -#if _M_SSE >= 0x401 -#include -#include -#elif _M_SSE >= 0x301 && !(defined __GNUC__ && !defined __SSSE3__) -#include -#endif - #ifdef WIN32 #define LOADERDECL __cdecl #else @@ -37,11 +30,6 @@ typedef void (LOADERDECL *TPipelineFunction)(VertexLoader* loader); class VertexLoader : public VertexLoaderBase { public: - // This class need a 16 byte alignment. As this is broken on - // MSVC right now (Dec 2014), we use custom allocation. - void* operator new (size_t size); - void operator delete (void *p); - VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr); int RunVertices(int primitive, int count, DataReader src, DataReader dst) override; @@ -49,9 +37,8 @@ public: bool IsInitialized() override { return true; } // This vertex loader supports all formats // They are used for the communication with the loader functions - // Duplicated (4x and 2x respectively) and used in SSE code in the vertex loader JIT - GC_ALIGNED128(float m_posScale[4]); - GC_ALIGNED64(float m_tcScale[8][2]); + float m_posScale; + float m_tcScale[8]; int m_tcIndex; int m_colIndex; int m_colElements[2]; @@ -73,61 +60,3 @@ private: void WriteCall(TPipelineFunction); }; - -#if _M_SSE >= 0x301 -static const __m128i kMaskSwap32_3 = _mm_set_epi32(0xFFFFFFFFL, 0x08090A0BL, 0x04050607L, 0x00010203L); -static const __m128i kMaskSwap32_2 = _mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0x04050607L, 0x00010203L); -static const __m128i kMaskSwap16to32l_3 = _mm_set_epi32(0xFFFFFFFFL, 0xFFFF0405L, 0xFFFF0203L, 0xFFFF0001L); -static const __m128i kMaskSwap16to32l_2 = _mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFF0203L, 0xFFFF0001L); -static const __m128i kMaskSwap16to32h_3 = _mm_set_epi32(0xFFFFFFFFL, 0x0405FFFFL, 0x0203FFFFL, 0x0001FFFFL); -static const __m128i kMaskSwap16to32h_2 = _mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0x0203FFFFL, 0x0001FFFFL); -static const __m128i kMask8to32l_3 = _mm_set_epi32(0xFFFFFFFFL, 0xFFFFFF02L, 0xFFFFFF01L, 0xFFFFFF00L); -static const __m128i kMask8to32l_2 = _mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFF01L, 0xFFFFFF00L); -static const __m128i kMask8to32h_3 = _mm_set_epi32(0xFFFFFFFFL, 0x02FFFFFFL, 0x01FFFFFFL, 0x00FFFFFFL); -static const __m128i kMask8to32h_2 = _mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0x01FFFFFFL, 0x00FFFFFFL); - -template -__forceinline void Vertex_Read_SSSE3(const T* pData, __m128 scale) -{ - __m128i coords, mask; - - int loadBytes = sizeof(T) * (2 + threeIn); - if (loadBytes > 8) - coords = _mm_loadu_si128((__m128i*)pData); - else if (loadBytes > 4) - coords = _mm_loadl_epi64((__m128i*)pData); - else - coords = _mm_cvtsi32_si128(*(u32*)pData); - - // Float case (no scaling) - if (sizeof(T) == 4) - { - coords = _mm_shuffle_epi8(coords, threeIn ? kMaskSwap32_3 : kMaskSwap32_2); - if (threeOut) - _mm_storeu_si128((__m128i*)g_vertex_manager_write_ptr, coords); - else - _mm_storel_epi64((__m128i*)g_vertex_manager_write_ptr, coords); - } - else - { - // Byte swap, unpack, and move to high bytes for sign extend. - if (std::is_unsigned::value) - mask = sizeof(T) == 2 ? (threeIn ? kMaskSwap16to32l_3 : kMaskSwap16to32l_2) : (threeIn ? kMask8to32l_3 : kMask8to32l_2); - else - mask = sizeof(T) == 2 ? (threeIn ? kMaskSwap16to32h_3 : kMaskSwap16to32h_2) : (threeIn ? kMask8to32h_3 : kMask8to32h_2); - coords = _mm_shuffle_epi8(coords, mask); - - // Sign extend - if (std::is_signed::value) - coords = _mm_srai_epi32(coords, 32 - sizeof(T) * 8); - - __m128 out = _mm_mul_ps(_mm_cvtepi32_ps(coords), scale); - if (threeOut) - _mm_storeu_ps((float*)g_vertex_manager_write_ptr, out); - else - _mm_storel_pi((__m64*)g_vertex_manager_write_ptr, out); - } - - g_vertex_manager_write_ptr += sizeof(float) * (2 + threeOut); -} -#endif diff --git a/Source/Core/VideoCommon/VertexLoader_Normal.cpp b/Source/Core/VideoCommon/VertexLoader_Normal.cpp index 0d794d9944..298d01ed01 100644 --- a/Source/Core/VideoCommon/VertexLoader_Normal.cpp +++ b/Source/Core/VideoCommon/VertexLoader_Normal.cpp @@ -6,8 +6,6 @@ #include #include "Common/CommonTypes.h" -#include "Common/CPUDetect.h" - #include "VideoCommon/VertexLoader.h" #include "VideoCommon/VertexLoader_Normal.h" #include "VideoCommon/VertexManagerBase.h" @@ -102,63 +100,6 @@ struct Normal_Index_Indices3 static const int size = sizeof(I) * 3; }; -#if _M_SSE >= 0x301 -template -struct Normal_Direct_SSSE3 -{ - static void LOADERDECL function(VertexLoader* loader) - { - const T* pData = reinterpret_cast(DataGetPosition()); - const float frac = 1. / float(1u << (sizeof(T) * 8 - std::is_signed::value - 1)); - const __m128 scale = _mm_set_ps1(frac); - for (int i = 0; i < N; i++, pData += 3) - Vertex_Read_SSSE3(pData, scale); - DataSkip(); - } - - static const int size = sizeof(T) * N * 3; -}; - -template -__forceinline void Normal_Index_Offset_SSSE3() -{ - static_assert(std::is_unsigned::value, "Only unsigned I is sane!"); - - auto const index = DataRead(); - const T* pData = (const T*)(cached_arraybases[ARRAY_NORMAL] - + (index * g_main_cp_state.array_strides[ARRAY_NORMAL]) + sizeof(T) * 3 * Offset); - const float frac = 1. / float(1u << (sizeof(T) * 8 - std::is_signed::value - 1)); - const __m128 scale = _mm_set_ps1(frac); - for (int i = 0; i < N; i++, pData += 3) - Vertex_Read_SSSE3(pData, scale); -} - -template -struct Normal_Index_SSSE3 -{ - static void LOADERDECL function(VertexLoader* loader) - { - Normal_Index_Offset_SSSE3(); - } - - static const int size = sizeof(I); -}; - -template -struct Normal_Index_Indices3_SSSE3 -{ - static void LOADERDECL function(VertexLoader* loader) - { - Normal_Index_Offset_SSSE3(); - Normal_Index_Offset_SSSE3(); - Normal_Index_Offset_SSSE3(); - } - - static const int size = sizeof(I) * 3; -}; - -#endif - } void VertexLoader_Normal::Init() @@ -231,77 +172,6 @@ void VertexLoader_Normal::Init() m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Normal_Index_Indices3(); m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Normal_Index_Indices3(); m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Normal_Index_Indices3(); - -#if _M_SSE >= 0x301 - if (cpu_info.bSSSE3) - { - m_Table[NRM_DIRECT][NRM_INDICES1][NRM_NBT][FORMAT_UBYTE] = Normal_Direct_SSSE3(); - m_Table[NRM_DIRECT][NRM_INDICES1][NRM_NBT][FORMAT_BYTE] = Normal_Direct_SSSE3(); - m_Table[NRM_DIRECT][NRM_INDICES1][NRM_NBT][FORMAT_USHORT] = Normal_Direct_SSSE3(); - m_Table[NRM_DIRECT][NRM_INDICES1][NRM_NBT][FORMAT_SHORT] = Normal_Direct_SSSE3(); - m_Table[NRM_DIRECT][NRM_INDICES1][NRM_NBT][FORMAT_FLOAT] = Normal_Direct_SSSE3(); - m_Table[NRM_DIRECT][NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Normal_Direct_SSSE3(); - m_Table[NRM_DIRECT][NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Normal_Direct_SSSE3(); - m_Table[NRM_DIRECT][NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Normal_Direct_SSSE3(); - m_Table[NRM_DIRECT][NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Normal_Direct_SSSE3(); - m_Table[NRM_DIRECT][NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Normal_Direct_SSSE3(); - - m_Table[NRM_DIRECT][NRM_INDICES3][NRM_NBT][FORMAT_UBYTE] = Normal_Direct_SSSE3(); - m_Table[NRM_DIRECT][NRM_INDICES3][NRM_NBT][FORMAT_BYTE] = Normal_Direct_SSSE3(); - m_Table[NRM_DIRECT][NRM_INDICES3][NRM_NBT][FORMAT_USHORT] = Normal_Direct_SSSE3(); - m_Table[NRM_DIRECT][NRM_INDICES3][NRM_NBT][FORMAT_SHORT] = Normal_Direct_SSSE3(); - m_Table[NRM_DIRECT][NRM_INDICES3][NRM_NBT][FORMAT_FLOAT] = Normal_Direct_SSSE3(); - m_Table[NRM_DIRECT][NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Normal_Direct_SSSE3(); - m_Table[NRM_DIRECT][NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Normal_Direct_SSSE3(); - m_Table[NRM_DIRECT][NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Normal_Direct_SSSE3(); - m_Table[NRM_DIRECT][NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Normal_Direct_SSSE3(); - m_Table[NRM_DIRECT][NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Normal_Direct_SSSE3(); - - m_Table[NRM_INDEX8][NRM_INDICES1][NRM_NBT][FORMAT_UBYTE] = Normal_Index_SSSE3(); - m_Table[NRM_INDEX8][NRM_INDICES1][NRM_NBT][FORMAT_BYTE] = Normal_Index_SSSE3(); - m_Table[NRM_INDEX8][NRM_INDICES1][NRM_NBT][FORMAT_USHORT] = Normal_Index_SSSE3(); - m_Table[NRM_INDEX8][NRM_INDICES1][NRM_NBT][FORMAT_SHORT] = Normal_Index_SSSE3(); - m_Table[NRM_INDEX8][NRM_INDICES1][NRM_NBT][FORMAT_FLOAT] = Normal_Index_SSSE3(); - m_Table[NRM_INDEX8][NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Normal_Index_SSSE3(); - m_Table[NRM_INDEX8][NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Normal_Index_SSSE3(); - m_Table[NRM_INDEX8][NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Normal_Index_SSSE3(); - m_Table[NRM_INDEX8][NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Normal_Index_SSSE3(); - m_Table[NRM_INDEX8][NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Normal_Index_SSSE3(); - - m_Table[NRM_INDEX8][NRM_INDICES3][NRM_NBT][FORMAT_UBYTE] = Normal_Index_SSSE3(); - m_Table[NRM_INDEX8][NRM_INDICES3][NRM_NBT][FORMAT_BYTE] = Normal_Index_SSSE3(); - m_Table[NRM_INDEX8][NRM_INDICES3][NRM_NBT][FORMAT_USHORT] = Normal_Index_SSSE3(); - m_Table[NRM_INDEX8][NRM_INDICES3][NRM_NBT][FORMAT_SHORT] = Normal_Index_SSSE3(); - m_Table[NRM_INDEX8][NRM_INDICES3][NRM_NBT][FORMAT_FLOAT] = Normal_Index_SSSE3(); - m_Table[NRM_INDEX8][NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Normal_Index_Indices3_SSSE3(); - m_Table[NRM_INDEX8][NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Normal_Index_Indices3_SSSE3(); - m_Table[NRM_INDEX8][NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Normal_Index_Indices3_SSSE3(); - m_Table[NRM_INDEX8][NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Normal_Index_Indices3_SSSE3(); - m_Table[NRM_INDEX8][NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Normal_Index_Indices3_SSSE3(); - - m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT][FORMAT_UBYTE] = Normal_Index_SSSE3(); - m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT][FORMAT_BYTE] = Normal_Index_SSSE3(); - m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT][FORMAT_USHORT] = Normal_Index_SSSE3(); - m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT][FORMAT_SHORT] = Normal_Index_SSSE3(); - m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT][FORMAT_FLOAT] = Normal_Index_SSSE3(); - m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Normal_Index_SSSE3(); - m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Normal_Index_SSSE3(); - m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Normal_Index_SSSE3(); - m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Normal_Index_SSSE3(); - m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Normal_Index_SSSE3(); - - m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT][FORMAT_UBYTE] = Normal_Index_SSSE3(); - m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT][FORMAT_BYTE] = Normal_Index_SSSE3(); - m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT][FORMAT_USHORT] = Normal_Index_SSSE3(); - m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT][FORMAT_SHORT] = Normal_Index_SSSE3(); - m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT][FORMAT_FLOAT] = Normal_Index_SSSE3(); - m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Normal_Index_Indices3_SSSE3(); - m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Normal_Index_Indices3_SSSE3(); - m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Normal_Index_Indices3_SSSE3(); - m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Normal_Index_Indices3_SSSE3(); - m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Normal_Index_Indices3_SSSE3(); - } -#endif } unsigned int VertexLoader_Normal::GetSize(u64 _type, diff --git a/Source/Core/VideoCommon/VertexLoader_Position.cpp b/Source/Core/VideoCommon/VertexLoader_Position.cpp index 43e56119ea..3dac5a55b1 100644 --- a/Source/Core/VideoCommon/VertexLoader_Position.cpp +++ b/Source/Core/VideoCommon/VertexLoader_Position.cpp @@ -5,8 +5,6 @@ #include #include "Common/CommonTypes.h" -#include "Common/CPUDetect.h" - #include "VideoCommon/VertexLoader.h" #include "VideoCommon/VertexLoader_Position.h" #include "VideoCommon/VertexManagerBase.h" @@ -28,7 +26,7 @@ template void LOADERDECL Pos_ReadDirect(VertexLoader* loader) { static_assert(N <= 3, "N > 3 is not sane!"); - auto const scale = loader->m_posScale[0];; + auto const scale = loader->m_posScale; DataReader dst(g_vertex_manager_write_ptr, nullptr); DataReader src(g_video_buffer_read_ptr, nullptr); @@ -49,7 +47,7 @@ void LOADERDECL Pos_ReadIndex(VertexLoader* loader) auto const index = DataRead(); loader->m_vertexSkip = index == std::numeric_limits::max(); auto const data = reinterpret_cast(cached_arraybases[ARRAY_POSITION] + (index * g_main_cp_state.array_strides[ARRAY_POSITION])); - auto const scale = loader->m_posScale[0]; + auto const scale = loader->m_posScale; DataReader dst(g_vertex_manager_write_ptr, nullptr); for (int i = 0; i < 3; ++i) @@ -59,28 +57,6 @@ void LOADERDECL Pos_ReadIndex(VertexLoader* loader) LOG_VTX(); } -#if _M_SSE >= 0x301 -template -void LOADERDECL Pos_ReadDirect_SSSE3(VertexLoader* loader) -{ - const T* pData = reinterpret_cast(DataGetPosition()); - Vertex_Read_SSSE3(pData, *(__m128*)loader->m_posScale); - DataSkip<(2 + three) * sizeof(T)>(); - LOG_VTX(); -} - -template -void LOADERDECL Pos_ReadIndex_SSSE3(VertexLoader* loader) -{ - static_assert(std::is_unsigned::value, "Only unsigned I is sane!"); - auto const index = DataRead(); - loader->m_vertexSkip = index == std::numeric_limits::max(); - const T* pData = (const T*)(cached_arraybases[ARRAY_POSITION] + (index * g_main_cp_state.array_strides[ARRAY_POSITION])); - Vertex_Read_SSSE3(pData, *(__m128*)loader->m_posScale); - LOG_VTX(); -} -#endif - static TPipelineFunction tableReadPosition[4][8][2] = { { {nullptr, nullptr,}, @@ -127,48 +103,6 @@ static int tableReadPositionVertexSize[4][8][2] = { }, }; - -void VertexLoader_Position::Init() -{ - -#if _M_SSE >= 0x301 - if (cpu_info.bSSSE3) - { - tableReadPosition[1][0][0] = Pos_ReadDirect_SSSE3; - tableReadPosition[1][0][1] = Pos_ReadDirect_SSSE3; - tableReadPosition[1][1][0] = Pos_ReadDirect_SSSE3; - tableReadPosition[1][1][1] = Pos_ReadDirect_SSSE3; - tableReadPosition[1][2][0] = Pos_ReadDirect_SSSE3; - tableReadPosition[1][2][1] = Pos_ReadDirect_SSSE3; - tableReadPosition[1][3][0] = Pos_ReadDirect_SSSE3; - tableReadPosition[1][3][1] = Pos_ReadDirect_SSSE3; - tableReadPosition[1][4][0] = Pos_ReadDirect_SSSE3; - tableReadPosition[1][4][1] = Pos_ReadDirect_SSSE3; - tableReadPosition[2][0][0] = Pos_ReadIndex_SSSE3; - tableReadPosition[2][0][1] = Pos_ReadIndex_SSSE3; - tableReadPosition[3][0][0] = Pos_ReadIndex_SSSE3; - tableReadPosition[3][0][1] = Pos_ReadIndex_SSSE3; - tableReadPosition[2][1][0] = Pos_ReadIndex_SSSE3; - tableReadPosition[2][1][1] = Pos_ReadIndex_SSSE3; - tableReadPosition[3][1][0] = Pos_ReadIndex_SSSE3; - tableReadPosition[3][1][1] = Pos_ReadIndex_SSSE3; - tableReadPosition[2][2][0] = Pos_ReadIndex_SSSE3; - tableReadPosition[2][2][1] = Pos_ReadIndex_SSSE3; - tableReadPosition[3][2][0] = Pos_ReadIndex_SSSE3; - tableReadPosition[3][2][1] = Pos_ReadIndex_SSSE3; - tableReadPosition[2][3][0] = Pos_ReadIndex_SSSE3; - tableReadPosition[2][3][1] = Pos_ReadIndex_SSSE3; - tableReadPosition[3][3][0] = Pos_ReadIndex_SSSE3; - tableReadPosition[3][3][1] = Pos_ReadIndex_SSSE3; - tableReadPosition[2][4][0] = Pos_ReadIndex_SSSE3; - tableReadPosition[2][4][1] = Pos_ReadIndex_SSSE3; - tableReadPosition[3][4][0] = Pos_ReadIndex_SSSE3; - tableReadPosition[3][4][1] = Pos_ReadIndex_SSSE3; - } -#endif - -} - unsigned int VertexLoader_Position::GetSize(u64 _type, unsigned int _format, unsigned int _elements) { return tableReadPositionVertexSize[_type][_format][_elements]; diff --git a/Source/Core/VideoCommon/VertexLoader_TextCoord.cpp b/Source/Core/VideoCommon/VertexLoader_TextCoord.cpp index 6e807a46e8..69e8ca155e 100644 --- a/Source/Core/VideoCommon/VertexLoader_TextCoord.cpp +++ b/Source/Core/VideoCommon/VertexLoader_TextCoord.cpp @@ -5,8 +5,6 @@ #include #include "Common/CommonTypes.h" -#include "Common/CPUDetect.h" - #include "VideoCommon/VertexLoader.h" #include "VideoCommon/VertexLoader_TextCoord.h" #include "VideoCommon/VertexManagerBase.h" @@ -49,7 +47,7 @@ float TCScale(float val, float scale) template void LOADERDECL TexCoord_ReadDirect(VertexLoader* loader) { - auto const scale = loader->m_tcScale[loader->m_tcIndex][0]; + auto const scale = loader->m_tcScale[loader->m_tcIndex]; DataReader dst(g_vertex_manager_write_ptr, nullptr); DataReader src(g_video_buffer_read_ptr, nullptr); @@ -71,7 +69,7 @@ void LOADERDECL TexCoord_ReadIndex(VertexLoader* loader) auto const index = DataRead(); auto const data = reinterpret_cast(cached_arraybases[ARRAY_TEXCOORD0 + loader->m_tcIndex] + (index * g_main_cp_state.array_strides[ARRAY_TEXCOORD0 + loader->m_tcIndex])); - auto const scale = loader->m_tcScale[loader->m_tcIndex][0]; + auto const scale = loader->m_tcScale[loader->m_tcIndex]; DataReader dst(g_vertex_manager_write_ptr, nullptr); for (int i = 0; i != N; ++i) @@ -82,32 +80,6 @@ void LOADERDECL TexCoord_ReadIndex(VertexLoader* loader) ++loader->m_tcIndex; } -#if _M_SSE >= 0x301 -template -void LOADERDECL TexCoord_ReadDirect2_SSSE3(VertexLoader* loader) -{ - const T* pData = reinterpret_cast(DataGetPosition()); - __m128 scale = _mm_castsi128_ps(_mm_loadl_epi64((__m128i*)loader->m_tcScale[loader->m_tcIndex])); - Vertex_Read_SSSE3(pData, scale); - DataSkip<2 * sizeof(T)>(); - LOG_TEX<2>(); - loader->m_tcIndex++; -} - -template -void LOADERDECL TexCoord_ReadIndex2_SSSE3(VertexLoader* loader) -{ - static_assert(std::is_unsigned::value, "Only unsigned I is sane!"); - - auto const index = DataRead(); - const T* pData = (const T*)(cached_arraybases[ARRAY_TEXCOORD0 + loader->m_tcIndex] + (index * g_main_cp_state.array_strides[ARRAY_TEXCOORD0 + loader->m_tcIndex])); - __m128 scale = _mm_castsi128_ps(_mm_loadl_epi64((__m128i*)loader->m_tcScale[loader->m_tcIndex])); - Vertex_Read_SSSE3(pData, scale); - LOG_TEX<2>(); - loader->m_tcIndex++; -} -#endif - static TPipelineFunction tableReadTexCoord[4][8][2] = { { {nullptr, nullptr,}, @@ -154,32 +126,6 @@ static int tableReadTexCoordVertexSize[4][8][2] = { }, }; -void VertexLoader_TextCoord::Init() -{ - -#if _M_SSE >= 0x301 - if (cpu_info.bSSSE3) - { - tableReadTexCoord[1][0][1] = TexCoord_ReadDirect2_SSSE3; - tableReadTexCoord[1][1][1] = TexCoord_ReadDirect2_SSSE3; - tableReadTexCoord[1][2][1] = TexCoord_ReadDirect2_SSSE3; - tableReadTexCoord[1][3][1] = TexCoord_ReadDirect2_SSSE3; - tableReadTexCoord[1][4][1] = TexCoord_ReadDirect2_SSSE3; - tableReadTexCoord[2][0][1] = TexCoord_ReadIndex2_SSSE3; - tableReadTexCoord[2][1][1] = TexCoord_ReadIndex2_SSSE3; - tableReadTexCoord[2][2][1] = TexCoord_ReadIndex2_SSSE3; - tableReadTexCoord[2][3][1] = TexCoord_ReadIndex2_SSSE3; - tableReadTexCoord[2][4][1] = TexCoord_ReadIndex2_SSSE3; - tableReadTexCoord[3][0][1] = TexCoord_ReadIndex2_SSSE3; - tableReadTexCoord[3][1][1] = TexCoord_ReadIndex2_SSSE3; - tableReadTexCoord[3][2][1] = TexCoord_ReadIndex2_SSSE3; - tableReadTexCoord[3][3][1] = TexCoord_ReadIndex2_SSSE3; - tableReadTexCoord[3][4][1] = TexCoord_ReadIndex2_SSSE3; - } -#endif - -} - unsigned int VertexLoader_TextCoord::GetSize(u64 _type, unsigned int _format, unsigned int _elements) { return tableReadTexCoordVertexSize[_type][_format][_elements];