Merge pull request #2192 from Tilka/sse2
VertexLoaderX64: support SSE2 as a fallback
This commit is contained in:
commit
39c41f5c70
|
@ -1638,6 +1638,7 @@ void XEmitter::PACKUSWB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x67, dest, ar
|
||||||
void XEmitter::PUNPCKLBW(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x60, dest, arg);}
|
void XEmitter::PUNPCKLBW(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x60, dest, arg);}
|
||||||
void XEmitter::PUNPCKLWD(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x61, dest, arg);}
|
void XEmitter::PUNPCKLWD(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x61, dest, arg);}
|
||||||
void XEmitter::PUNPCKLDQ(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x62, dest, arg);}
|
void XEmitter::PUNPCKLDQ(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x62, dest, arg);}
|
||||||
|
void XEmitter::PUNPCKLQDQ(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x6C, dest, arg);}
|
||||||
|
|
||||||
void XEmitter::PSRLW(X64Reg reg, int shift)
|
void XEmitter::PSRLW(X64Reg reg, int shift)
|
||||||
{
|
{
|
||||||
|
|
|
@ -680,6 +680,7 @@ public:
|
||||||
void PUNPCKLBW(X64Reg dest, const OpArg &arg);
|
void PUNPCKLBW(X64Reg dest, const OpArg &arg);
|
||||||
void PUNPCKLWD(X64Reg dest, const OpArg &arg);
|
void PUNPCKLWD(X64Reg dest, const OpArg &arg);
|
||||||
void PUNPCKLDQ(X64Reg dest, const OpArg &arg);
|
void PUNPCKLDQ(X64Reg dest, const OpArg &arg);
|
||||||
|
void PUNPCKLQDQ(X64Reg dest, const OpArg &arg);
|
||||||
|
|
||||||
void PTEST(X64Reg dest, OpArg arg);
|
void PTEST(X64Reg dest, OpArg arg);
|
||||||
void PAND(X64Reg dest, OpArg arg);
|
void PAND(X64Reg dest, OpArg arg);
|
||||||
|
|
|
@ -100,16 +100,17 @@ static T ReadNormalized(I value)
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, bool swap = false>
|
template <typename T, bool swap = false>
|
||||||
static void ReadVertexAttribute(T* dst, DataReader src, const AttributeFormat& format, int base_component, int max_components, bool reverse)
|
static void ReadVertexAttribute(T* dst, DataReader src, const AttributeFormat& format, int base_component, int components, bool reverse)
|
||||||
{
|
{
|
||||||
if (format.enable)
|
if (format.enable)
|
||||||
{
|
{
|
||||||
src.Skip(format.offset);
|
src.Skip(format.offset);
|
||||||
src.Skip(base_component * (1<<(format.type>>1)));
|
src.Skip(base_component * (1<<(format.type>>1)));
|
||||||
|
|
||||||
for (int i = 0; i < std::min(format.components - base_component, max_components); i++)
|
int i;
|
||||||
|
for (i = 0; i < std::min(format.components - base_component, components); i++)
|
||||||
{
|
{
|
||||||
int i_dst = reverse ? max_components - i - 1 : i;
|
int i_dst = reverse ? components - i - 1 : i;
|
||||||
switch (format.type)
|
switch (format.type)
|
||||||
{
|
{
|
||||||
case VAR_UNSIGNED_BYTE:
|
case VAR_UNSIGNED_BYTE:
|
||||||
|
@ -131,6 +132,11 @@ static void ReadVertexAttribute(T* dst, DataReader src, const AttributeFormat& f
|
||||||
|
|
||||||
_assert_msg_(VIDEO, !format.integer || format.type != VAR_FLOAT, "only non-float values are allowed to be streamed as integer");
|
_assert_msg_(VIDEO, !format.integer || format.type != VAR_FLOAT, "only non-float values are allowed to be streamed as integer");
|
||||||
}
|
}
|
||||||
|
for (; i < components; i++)
|
||||||
|
{
|
||||||
|
int i_dst = reverse ? components - i - 1 : i;
|
||||||
|
dst[i_dst] = i == 3;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -131,12 +131,13 @@ void VertexLoader::CompileVertexTranslator()
|
||||||
WriteCall(VertexLoader_Position::GetFunction(m_VtxDesc.Position, m_VtxAttr.PosFormat, m_VtxAttr.PosElements));
|
WriteCall(VertexLoader_Position::GetFunction(m_VtxDesc.Position, m_VtxAttr.PosFormat, m_VtxAttr.PosElements));
|
||||||
|
|
||||||
m_VertexSize += VertexLoader_Position::GetSize(m_VtxDesc.Position, m_VtxAttr.PosFormat, m_VtxAttr.PosElements);
|
m_VertexSize += VertexLoader_Position::GetSize(m_VtxDesc.Position, m_VtxAttr.PosFormat, m_VtxAttr.PosElements);
|
||||||
m_native_vtx_decl.position.components = 3;
|
int pos_elements = m_VtxAttr.PosElements + 2;
|
||||||
|
m_native_vtx_decl.position.components = pos_elements;
|
||||||
m_native_vtx_decl.position.enable = true;
|
m_native_vtx_decl.position.enable = true;
|
||||||
m_native_vtx_decl.position.offset = nat_offset;
|
m_native_vtx_decl.position.offset = nat_offset;
|
||||||
m_native_vtx_decl.position.type = VAR_FLOAT;
|
m_native_vtx_decl.position.type = VAR_FLOAT;
|
||||||
m_native_vtx_decl.position.integer = false;
|
m_native_vtx_decl.position.integer = false;
|
||||||
nat_offset += 12;
|
nat_offset += pos_elements * sizeof(float);
|
||||||
|
|
||||||
// Normals
|
// Normals
|
||||||
if (m_VtxDesc.Normal != NOT_PRESENT)
|
if (m_VtxDesc.Normal != NOT_PRESENT)
|
||||||
|
|
|
@ -370,8 +370,9 @@ void VertexLoaderARM64::GenerateVertexLoader()
|
||||||
load_size <<= 3;
|
load_size <<= 3;
|
||||||
|
|
||||||
s32 offset = GetAddressImm(ARRAY_POSITION, m_VtxDesc.Position, EncodeRegTo64(scratch1_reg), load_size);
|
s32 offset = GetAddressImm(ARRAY_POSITION, m_VtxDesc.Position, EncodeRegTo64(scratch1_reg), load_size);
|
||||||
ReadVertex(m_VtxDesc.Position, m_VtxAttr.PosFormat, m_VtxAttr.PosElements + 2, 3,
|
int pos_elements = m_VtxAttr.PosElements + 2;
|
||||||
m_VtxAttr.ByteDequant, m_VtxAttr.PosFrac, &m_native_vtx_decl.position, offset);
|
ReadVertex(m_VtxDesc.Position, m_VtxAttr.PosFormat, pos_elements, pos_elements,
|
||||||
|
m_VtxAttr.ByteDequant, m_VtxAttr.PosFrac, &m_native_vtx_decl.position, offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (m_VtxDesc.Normal)
|
if (m_VtxDesc.Normal)
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
#include "Common/BitSet.h"
|
||||||
#include "Common/CPUDetect.h"
|
#include "Common/CPUDetect.h"
|
||||||
#include "Common/Intrinsics.h"
|
#include "Common/Intrinsics.h"
|
||||||
#include "Common/JitRegister.h"
|
#include "Common/JitRegister.h"
|
||||||
|
@ -6,8 +7,6 @@
|
||||||
|
|
||||||
using namespace Gen;
|
using namespace Gen;
|
||||||
|
|
||||||
#define VERTEX_LOADER_REGS {XMM0+16}
|
|
||||||
|
|
||||||
static const X64Reg src_reg = ABI_PARAM1;
|
static const X64Reg src_reg = ABI_PARAM1;
|
||||||
static const X64Reg dst_reg = ABI_PARAM2;
|
static const X64Reg dst_reg = ABI_PARAM2;
|
||||||
static const X64Reg scratch1 = RAX;
|
static const X64Reg scratch1 = RAX;
|
||||||
|
@ -66,7 +65,7 @@ OpArg VertexLoaderX64::GetVertexAddr(int array, u64 attribute)
|
||||||
|
|
||||||
int VertexLoaderX64::ReadVertex(OpArg data, u64 attribute, int format, int count_in, int count_out, bool dequantize, u8 scaling_exponent, AttributeFormat* native_format)
|
int VertexLoaderX64::ReadVertex(OpArg data, u64 attribute, int format, int count_in, int count_out, bool dequantize, u8 scaling_exponent, AttributeFormat* native_format)
|
||||||
{
|
{
|
||||||
static const __m128i shuffle_lut[5][3] = {
|
static const __m128i shuffle_lut[4][3] = {
|
||||||
{_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFF00L), // 1x u8
|
{_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFF00L), // 1x u8
|
||||||
_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFF01L, 0xFFFFFF00L), // 2x u8
|
_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFF01L, 0xFFFFFF00L), // 2x u8
|
||||||
_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFF02L, 0xFFFFFF01L, 0xFFFFFF00L)}, // 3x u8
|
_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFF02L, 0xFFFFFF01L, 0xFFFFFF00L)}, // 3x u8
|
||||||
|
@ -79,9 +78,6 @@ int VertexLoaderX64::ReadVertex(OpArg data, u64 attribute, int format, int count
|
||||||
{_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFFFFL, 0x0001FFFFL), // 1x s16
|
{_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFFFFL, 0x0001FFFFL), // 1x s16
|
||||||
_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0x0203FFFFL, 0x0001FFFFL), // 2x s16
|
_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0x0203FFFFL, 0x0001FFFFL), // 2x s16
|
||||||
_mm_set_epi32(0xFFFFFFFFL, 0x0405FFFFL, 0x0203FFFFL, 0x0001FFFFL)}, // 3x s16
|
_mm_set_epi32(0xFFFFFFFFL, 0x0405FFFFL, 0x0203FFFFL, 0x0001FFFFL)}, // 3x s16
|
||||||
{_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFFFFL, 0x00010203L), // 1x float
|
|
||||||
_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0x04050607L, 0x00010203L), // 2x float
|
|
||||||
_mm_set_epi32(0xFFFFFFFFL, 0x08090A0BL, 0x04050607L, 0x00010203L)}, // 3x float
|
|
||||||
};
|
};
|
||||||
static const __m128 scale_factors[32] = {
|
static const __m128 scale_factors[32] = {
|
||||||
_mm_set_ps1(1./(1u<< 0)), _mm_set_ps1(1./(1u<< 1)), _mm_set_ps1(1./(1u<< 2)), _mm_set_ps1(1./(1u<< 3)),
|
_mm_set_ps1(1./(1u<< 0)), _mm_set_ps1(1./(1u<< 1)), _mm_set_ps1(1./(1u<< 2)), _mm_set_ps1(1./(1u<< 3)),
|
||||||
|
@ -98,47 +94,109 @@ int VertexLoaderX64::ReadVertex(OpArg data, u64 attribute, int format, int count
|
||||||
|
|
||||||
int elem_size = 1 << (format / 2);
|
int elem_size = 1 << (format / 2);
|
||||||
int load_bytes = elem_size * count_in;
|
int load_bytes = elem_size * count_in;
|
||||||
if (load_bytes > 8)
|
|
||||||
MOVDQU(coords, data);
|
|
||||||
else if (load_bytes > 4)
|
|
||||||
MOVQ_xmm(coords, data);
|
|
||||||
else
|
|
||||||
MOVD_xmm(coords, data);
|
|
||||||
|
|
||||||
PSHUFB(coords, M(&shuffle_lut[format][count_in - 1]));
|
|
||||||
|
|
||||||
if (format != FORMAT_FLOAT)
|
|
||||||
{
|
|
||||||
// Sign extend
|
|
||||||
if (format == FORMAT_BYTE)
|
|
||||||
PSRAD(coords, 24);
|
|
||||||
if (format == FORMAT_SHORT)
|
|
||||||
PSRAD(coords, 16);
|
|
||||||
|
|
||||||
CVTDQ2PS(coords, R(coords));
|
|
||||||
|
|
||||||
if (dequantize && scaling_exponent)
|
|
||||||
MULPS(coords, M(&scale_factors[scaling_exponent]));
|
|
||||||
}
|
|
||||||
|
|
||||||
OpArg dest = MDisp(dst_reg, m_dst_ofs);
|
OpArg dest = MDisp(dst_reg, m_dst_ofs);
|
||||||
switch (count_out)
|
|
||||||
{
|
|
||||||
case 1: MOVSS(dest, coords); break;
|
|
||||||
case 2: MOVLPS(dest, coords); break;
|
|
||||||
case 3: MOVUPS(dest, coords); break;
|
|
||||||
}
|
|
||||||
|
|
||||||
native_format->components = count_out;
|
native_format->components = count_out;
|
||||||
native_format->enable = true;
|
native_format->enable = true;
|
||||||
native_format->offset = m_dst_ofs;
|
native_format->offset = m_dst_ofs;
|
||||||
native_format->type = VAR_FLOAT;
|
native_format->type = VAR_FLOAT;
|
||||||
native_format->integer = false;
|
native_format->integer = false;
|
||||||
|
|
||||||
m_dst_ofs += sizeof(float) * count_out;
|
m_dst_ofs += sizeof(float) * count_out;
|
||||||
|
|
||||||
if (attribute == DIRECT)
|
if (attribute == DIRECT)
|
||||||
m_src_ofs += load_bytes;
|
m_src_ofs += load_bytes;
|
||||||
|
|
||||||
|
if (format == FORMAT_FLOAT)
|
||||||
|
{
|
||||||
|
// Floats don't need to be scaled or converted,
|
||||||
|
// so we can just load/swap/store them directly
|
||||||
|
// and return early.
|
||||||
|
for (int i = 0; i < count_in; i++)
|
||||||
|
{
|
||||||
|
LoadAndSwap(32, scratch3, data);
|
||||||
|
MOV(32, dest, R(scratch3));
|
||||||
|
data.offset += sizeof(float);
|
||||||
|
dest.offset += sizeof(float);
|
||||||
|
}
|
||||||
|
return load_bytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cpu_info.bSSSE3)
|
||||||
|
{
|
||||||
|
if (load_bytes > 8)
|
||||||
|
MOVDQU(coords, data);
|
||||||
|
else if (load_bytes > 4)
|
||||||
|
MOVQ_xmm(coords, data);
|
||||||
|
else
|
||||||
|
MOVD_xmm(coords, data);
|
||||||
|
|
||||||
|
PSHUFB(coords, M(&shuffle_lut[format][count_in - 1]));
|
||||||
|
|
||||||
|
// Sign-extend.
|
||||||
|
if (format == FORMAT_BYTE)
|
||||||
|
PSRAD(coords, 24);
|
||||||
|
if (format == FORMAT_SHORT)
|
||||||
|
PSRAD(coords, 16);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// SSE2
|
||||||
|
X64Reg temp = XMM1;
|
||||||
|
switch (format)
|
||||||
|
{
|
||||||
|
case FORMAT_UBYTE:
|
||||||
|
MOVD_xmm(coords, data);
|
||||||
|
PXOR(temp, R(temp));
|
||||||
|
PUNPCKLBW(coords, R(temp));
|
||||||
|
PUNPCKLWD(coords, R(temp));
|
||||||
|
break;
|
||||||
|
case FORMAT_BYTE:
|
||||||
|
MOVD_xmm(coords, data);
|
||||||
|
PUNPCKLBW(coords, R(coords));
|
||||||
|
PUNPCKLWD(coords, R(coords));
|
||||||
|
PSRAD(coords, 24);
|
||||||
|
break;
|
||||||
|
case FORMAT_USHORT:
|
||||||
|
case FORMAT_SHORT:
|
||||||
|
switch (count_in)
|
||||||
|
{
|
||||||
|
case 1:
|
||||||
|
LoadAndSwap(32, scratch3, data);
|
||||||
|
MOVD_xmm(coords, R(scratch3)); // ......X.
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
LoadAndSwap(32, scratch3, data);
|
||||||
|
MOVD_xmm(coords, R(scratch3)); // ......XY
|
||||||
|
PSHUFLW(coords, R(coords), 0x24); // ....Y.X.
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
LoadAndSwap(64, scratch3, data);
|
||||||
|
MOVQ_xmm(coords, R(scratch3)); // ....XYZ.
|
||||||
|
PUNPCKLQDQ(coords, R(coords)); // ..Z.XYZ.
|
||||||
|
PSHUFLW(coords, R(coords), 0xAC); // ..Z.Y.X.
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (format == FORMAT_SHORT)
|
||||||
|
PSRAD(coords, 16);
|
||||||
|
else
|
||||||
|
PSRLD(coords, 16);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
CVTDQ2PS(coords, R(coords));
|
||||||
|
|
||||||
|
if (dequantize && scaling_exponent)
|
||||||
|
MULPS(coords, M(&scale_factors[scaling_exponent]));
|
||||||
|
|
||||||
|
switch (count_out)
|
||||||
|
{
|
||||||
|
case 1: MOVSS(dest, coords); break;
|
||||||
|
case 2: MOVLPS(dest, coords); break;
|
||||||
|
case 3: MOVUPS(dest, coords); break;
|
||||||
|
}
|
||||||
|
|
||||||
return load_bytes;
|
return load_bytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -290,7 +348,10 @@ void VertexLoaderX64::ReadColor(OpArg data, u64 attribute, int format)
|
||||||
|
|
||||||
void VertexLoaderX64::GenerateVertexLoader()
|
void VertexLoaderX64::GenerateVertexLoader()
|
||||||
{
|
{
|
||||||
ABI_PushRegistersAndAdjustStack(VERTEX_LOADER_REGS, 8);
|
BitSet32 xmm_regs;
|
||||||
|
xmm_regs[XMM0+16] = true;
|
||||||
|
xmm_regs[XMM1+16] = !cpu_info.bSSSE3;
|
||||||
|
ABI_PushRegistersAndAdjustStack(xmm_regs, 8);
|
||||||
|
|
||||||
// Backup count since we're going to count it down.
|
// Backup count since we're going to count it down.
|
||||||
PUSH(32, R(ABI_PARAM3));
|
PUSH(32, R(ABI_PARAM3));
|
||||||
|
@ -332,7 +393,8 @@ void VertexLoaderX64::GenerateVertexLoader()
|
||||||
}
|
}
|
||||||
|
|
||||||
OpArg data = GetVertexAddr(ARRAY_POSITION, m_VtxDesc.Position);
|
OpArg data = GetVertexAddr(ARRAY_POSITION, m_VtxDesc.Position);
|
||||||
ReadVertex(data, m_VtxDesc.Position, m_VtxAttr.PosFormat, m_VtxAttr.PosElements + 2, 3,
|
int pos_elements = 2 + m_VtxAttr.PosElements;
|
||||||
|
ReadVertex(data, m_VtxDesc.Position, m_VtxAttr.PosFormat, pos_elements, pos_elements,
|
||||||
m_VtxAttr.ByteDequant, m_VtxAttr.PosFrac, &m_native_vtx_decl.position);
|
m_VtxAttr.ByteDequant, m_VtxAttr.PosFrac, &m_native_vtx_decl.position);
|
||||||
|
|
||||||
if (m_VtxDesc.Normal)
|
if (m_VtxDesc.Normal)
|
||||||
|
@ -408,7 +470,7 @@ void VertexLoaderX64::GenerateVertexLoader()
|
||||||
m_native_vtx_decl.texcoords[i].offset = m_dst_ofs;
|
m_native_vtx_decl.texcoords[i].offset = m_dst_ofs;
|
||||||
PXOR(XMM0, R(XMM0));
|
PXOR(XMM0, R(XMM0));
|
||||||
CVTSI2SS(XMM0, R(scratch1));
|
CVTSI2SS(XMM0, R(scratch1));
|
||||||
SHUFPS(XMM0, R(XMM0), 0x45);
|
SHUFPS(XMM0, R(XMM0), 0x45); // 000X -> 0X00
|
||||||
MOVUPS(MDisp(dst_reg, m_dst_ofs), XMM0);
|
MOVUPS(MDisp(dst_reg, m_dst_ofs), XMM0);
|
||||||
m_dst_ofs += sizeof(float) * 3;
|
m_dst_ofs += sizeof(float) * 3;
|
||||||
}
|
}
|
||||||
|
@ -426,7 +488,7 @@ void VertexLoaderX64::GenerateVertexLoader()
|
||||||
// Get the original count.
|
// Get the original count.
|
||||||
POP(32, R(ABI_RETURN));
|
POP(32, R(ABI_RETURN));
|
||||||
|
|
||||||
ABI_PopRegistersAndAdjustStack(VERTEX_LOADER_REGS, 8);
|
ABI_PopRegistersAndAdjustStack(xmm_regs, 8);
|
||||||
|
|
||||||
if (m_VtxDesc.Position & MASK_INDEXED)
|
if (m_VtxDesc.Position & MASK_INDEXED)
|
||||||
{
|
{
|
||||||
|
@ -446,12 +508,6 @@ void VertexLoaderX64::GenerateVertexLoader()
|
||||||
m_native_vtx_decl.stride = m_dst_ofs;
|
m_native_vtx_decl.stride = m_dst_ofs;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool VertexLoaderX64::IsInitialized()
|
|
||||||
{
|
|
||||||
// Uses PSHUFB.
|
|
||||||
return cpu_info.bSSSE3;
|
|
||||||
}
|
|
||||||
|
|
||||||
int VertexLoaderX64::RunVertices(DataReader src, DataReader dst, int count, int primitive)
|
int VertexLoaderX64::RunVertices(DataReader src, DataReader dst, int count, int primitive)
|
||||||
{
|
{
|
||||||
m_numLoadedVertices += count;
|
m_numLoadedVertices += count;
|
||||||
|
|
|
@ -8,7 +8,7 @@ public:
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
std::string GetName() const override { return "VertexLoaderX64"; }
|
std::string GetName() const override { return "VertexLoaderX64"; }
|
||||||
bool IsInitialized() override;
|
bool IsInitialized() override { return true; }
|
||||||
int RunVertices(DataReader src, DataReader dst, int count, int primitive) override;
|
int RunVertices(DataReader src, DataReader dst, int count, int primitive) override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
|
@ -30,8 +30,8 @@ void LOADERDECL Pos_ReadDirect(VertexLoader* loader)
|
||||||
DataReader dst(g_vertex_manager_write_ptr, nullptr);
|
DataReader dst(g_vertex_manager_write_ptr, nullptr);
|
||||||
DataReader src(g_video_buffer_read_ptr, nullptr);
|
DataReader src(g_video_buffer_read_ptr, nullptr);
|
||||||
|
|
||||||
for (int i = 0; i < 3; ++i)
|
for (int i = 0; i < N; ++i)
|
||||||
dst.Write(i < N ? PosScale(src.Read<T>(), scale) : 0.f);
|
dst.Write(PosScale(src.Read<T>(), scale));
|
||||||
|
|
||||||
g_vertex_manager_write_ptr = dst.GetPointer();
|
g_vertex_manager_write_ptr = dst.GetPointer();
|
||||||
g_video_buffer_read_ptr = src.GetPointer();
|
g_video_buffer_read_ptr = src.GetPointer();
|
||||||
|
@ -50,8 +50,8 @@ void LOADERDECL Pos_ReadIndex(VertexLoader* loader)
|
||||||
auto const scale = loader->m_posScale;
|
auto const scale = loader->m_posScale;
|
||||||
DataReader dst(g_vertex_manager_write_ptr, nullptr);
|
DataReader dst(g_vertex_manager_write_ptr, nullptr);
|
||||||
|
|
||||||
for (int i = 0; i < 3; ++i)
|
for (int i = 0; i < N; ++i)
|
||||||
dst.Write(i < N ? PosScale(Common::FromBigEndian(data[i]), scale) : 0.f);
|
dst.Write(PosScale(Common::FromBigEndian(data[i]), scale));
|
||||||
|
|
||||||
g_vertex_manager_write_ptr = dst.GetPointer();
|
g_vertex_manager_write_ptr = dst.GetPointer();
|
||||||
LOG_VTX();
|
LOG_VTX();
|
||||||
|
|
|
@ -810,6 +810,7 @@ TWO_OP_SSE_TEST(PACKUSWB, "dqword")
|
||||||
TWO_OP_SSE_TEST(PUNPCKLBW, "dqword")
|
TWO_OP_SSE_TEST(PUNPCKLBW, "dqword")
|
||||||
TWO_OP_SSE_TEST(PUNPCKLWD, "dqword")
|
TWO_OP_SSE_TEST(PUNPCKLWD, "dqword")
|
||||||
TWO_OP_SSE_TEST(PUNPCKLDQ, "dqword")
|
TWO_OP_SSE_TEST(PUNPCKLDQ, "dqword")
|
||||||
|
TWO_OP_SSE_TEST(PUNPCKLQDQ, "dqword")
|
||||||
|
|
||||||
TWO_OP_SSE_TEST(PTEST, "dqword")
|
TWO_OP_SSE_TEST(PTEST, "dqword")
|
||||||
TWO_OP_SSE_TEST(PAND, "dqword")
|
TWO_OP_SSE_TEST(PAND, "dqword")
|
||||||
|
|
|
@ -1,13 +1,18 @@
|
||||||
|
#include <limits>
|
||||||
|
#include <memory>
|
||||||
|
#include <tuple>
|
||||||
|
#include <type_traits>
|
||||||
#include <unordered_set>
|
#include <unordered_set>
|
||||||
|
|
||||||
#include "Common/Common.h"
|
|
||||||
#include "VideoCommon/DataReader.h"
|
|
||||||
#include "VideoCommon/VertexLoaderBase.h"
|
|
||||||
|
|
||||||
// Needs to be included later because it defines a TEST macro that conflicts
|
|
||||||
// with a TEST method definition in x64Emitter.h.
|
|
||||||
#include <gtest/gtest.h> // NOLINT
|
#include <gtest/gtest.h> // NOLINT
|
||||||
|
|
||||||
|
#include "Common/Common.h"
|
||||||
|
#include "Common/MathUtil.h"
|
||||||
|
#include "VideoCommon/CPMemory.h"
|
||||||
|
#include "VideoCommon/DataReader.h"
|
||||||
|
#include "VideoCommon/OpcodeDecoding.h"
|
||||||
|
#include "VideoCommon/VertexLoaderBase.h"
|
||||||
|
|
||||||
TEST(VertexLoaderUID, UniqueEnough)
|
TEST(VertexLoaderUID, UniqueEnough)
|
||||||
{
|
{
|
||||||
std::unordered_set<VertexLoaderUID> uids;
|
std::unordered_set<VertexLoaderUID> uids;
|
||||||
|
@ -38,181 +43,207 @@ protected:
|
||||||
|
|
||||||
void SetUp() override
|
void SetUp() override
|
||||||
{
|
{
|
||||||
memset(&input_memory[0], 0, sizeof(input_memory));
|
memset(input_memory, 0, sizeof(input_memory));
|
||||||
memset(&output_memory[0], 0, sizeof(input_memory));
|
memset(output_memory, 0xFF, sizeof(input_memory));
|
||||||
|
|
||||||
memset(&m_vtx_desc, 0, sizeof(m_vtx_desc));
|
memset(&m_vtx_desc, 0, sizeof(m_vtx_desc));
|
||||||
memset(&m_vtx_attr, 0, sizeof(m_vtx_attr));
|
memset(&m_vtx_attr, 0, sizeof(m_vtx_attr));
|
||||||
|
|
||||||
|
m_loader = nullptr;
|
||||||
|
|
||||||
ResetPointers();
|
ResetPointers();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Pushes a value to the input stream.
|
void CreateAndCheckSizes(size_t input_size, size_t output_size)
|
||||||
|
{
|
||||||
|
m_loader.reset(VertexLoaderBase::CreateVertexLoader(m_vtx_desc, m_vtx_attr));
|
||||||
|
ASSERT_EQ((int)input_size, m_loader->m_VertexSize);
|
||||||
|
ASSERT_EQ((int)output_size, m_loader->m_native_vtx_decl.stride);
|
||||||
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void Input(T val)
|
void Input(T val)
|
||||||
{
|
{
|
||||||
// Converts *to* big endian, not from.
|
// Write swapped.
|
||||||
*(T*)(&input_memory[m_input_pos]) = Common::FromBigEndian(val);
|
m_src.Write<T, true>(val);
|
||||||
m_input_pos += sizeof(val);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reads a value from the output stream.
|
void ExpectOut(float val)
|
||||||
template <typename T>
|
|
||||||
T Output()
|
|
||||||
{
|
{
|
||||||
T out = *(T*)&output_memory[m_output_pos];
|
// Read unswapped.
|
||||||
m_output_pos += sizeof(out);
|
MathUtil::IntFloat expected(val), actual(m_dst.Read<float, false>());
|
||||||
return out;
|
if (!actual.f || actual.f != actual.f)
|
||||||
|
EXPECT_EQ(expected.i, actual.i);
|
||||||
|
else
|
||||||
|
EXPECT_EQ(expected.f, actual.f);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Combination of EXPECT_EQ and Output.
|
void RunVertices(int count, int expected_count = -1)
|
||||||
template <typename T>
|
|
||||||
void ExpectOut(T val)
|
|
||||||
{
|
{
|
||||||
EXPECT_EQ(val, Output<T>());
|
if (expected_count == -1)
|
||||||
|
expected_count = count;
|
||||||
|
ResetPointers();
|
||||||
|
int actual_count = m_loader->RunVertices(m_src, m_dst, count, GX_DRAW_POINTS);
|
||||||
|
EXPECT_EQ(actual_count, expected_count);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ResetPointers()
|
void ResetPointers()
|
||||||
{
|
{
|
||||||
m_input_pos = m_output_pos = 0;
|
m_src = DataReader(input_memory, input_memory + sizeof(input_memory));
|
||||||
src = DataReader(input_memory, input_memory + sizeof(input_memory));
|
m_dst = DataReader(output_memory, output_memory + sizeof(output_memory));
|
||||||
dst = DataReader(output_memory, output_memory + sizeof(output_memory));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 m_input_pos, m_output_pos;
|
DataReader m_src;
|
||||||
DataReader src;
|
DataReader m_dst;
|
||||||
DataReader dst;
|
|
||||||
|
|
||||||
TVtxDesc m_vtx_desc;
|
TVtxDesc m_vtx_desc;
|
||||||
VAT m_vtx_attr;
|
VAT m_vtx_attr;
|
||||||
|
std::unique_ptr<VertexLoaderBase> m_loader;
|
||||||
};
|
};
|
||||||
|
|
||||||
TEST_F(VertexLoaderTest, PositionDirectFloatXYZ)
|
class VertexLoaderParamTest : public VertexLoaderTest, public ::testing::WithParamInterface<std::tuple<int, int, int, int>> {};
|
||||||
|
extern int gtest_AllCombinationsVertexLoaderParamTest_dummy_;
|
||||||
|
INSTANTIATE_TEST_CASE_P(
|
||||||
|
AllCombinations, VertexLoaderParamTest,
|
||||||
|
::testing::Combine(
|
||||||
|
::testing::Values(DIRECT, INDEX8, INDEX16),
|
||||||
|
::testing::Values(FORMAT_UBYTE, FORMAT_BYTE, FORMAT_USHORT, FORMAT_SHORT, FORMAT_FLOAT),
|
||||||
|
::testing::Values(0, 1), // elements
|
||||||
|
::testing::Values(0, 1, 31) // frac
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
TEST_P(VertexLoaderParamTest, PositionAll)
|
||||||
{
|
{
|
||||||
m_vtx_desc.Position = 1; // Direct
|
int addr, format, elements, frac;
|
||||||
m_vtx_attr.g0.PosElements = 1; // XYZ
|
std::tie(addr, format, elements, frac) = GetParam();
|
||||||
m_vtx_attr.g0.PosFormat = 4; // Float
|
this->m_vtx_desc.Position = addr;
|
||||||
|
this->m_vtx_attr.g0.PosFormat = format;
|
||||||
|
this->m_vtx_attr.g0.PosElements = elements;
|
||||||
|
this->m_vtx_attr.g0.PosFrac = frac;
|
||||||
|
this->m_vtx_attr.g0.ByteDequant = true;
|
||||||
|
elements += 2;
|
||||||
|
|
||||||
VertexLoaderBase* loader = VertexLoaderBase::CreateVertexLoader(m_vtx_desc, m_vtx_attr);
|
std::vector<float> values = {
|
||||||
|
std::numeric_limits<float>::lowest(),
|
||||||
|
std::numeric_limits<float>::denorm_min(),
|
||||||
|
std::numeric_limits<float>::min(),
|
||||||
|
std::numeric_limits<float>::max(),
|
||||||
|
std::numeric_limits<float>::quiet_NaN(),
|
||||||
|
std::numeric_limits<float>::infinity(),
|
||||||
|
-0x8000, -0x80, -1, -0, 0, 1, 123, 0x7F, 0xFF, 0x7FFF, 0xFFFF, 12345678,
|
||||||
|
};
|
||||||
|
ASSERT_EQ(0u, values.size() % 2);
|
||||||
|
ASSERT_EQ(0u, values.size() % 3);
|
||||||
|
|
||||||
ASSERT_EQ(3 * sizeof(float), (u32)loader->m_native_vtx_decl.stride);
|
int count = (int)values.size() / elements;
|
||||||
ASSERT_EQ(3 * sizeof(float), (u32)loader->m_VertexSize);
|
u32 elem_size = 1 << (format / 2);
|
||||||
|
size_t input_size = elements * elem_size;
|
||||||
// Write some vertices.
|
if (addr & MASK_INDEXED)
|
||||||
Input(0.0f); Input(0.0f); Input(0.0f);
|
|
||||||
Input(1.0f); Input(0.0f); Input(0.0f);
|
|
||||||
Input(0.0f); Input(1.0f); Input(0.0f);
|
|
||||||
Input(0.0f); Input(0.0f); Input(1.0f);
|
|
||||||
|
|
||||||
// Convert 4 points. "7" -> primitive are points.
|
|
||||||
int count = loader->RunVertices(src, dst, 4, 7);
|
|
||||||
src.Skip(4 * loader->m_VertexSize);
|
|
||||||
dst.Skip(count * loader->m_native_vtx_decl.stride);
|
|
||||||
delete loader;
|
|
||||||
|
|
||||||
ExpectOut(0.0f); ExpectOut(0.0f); ExpectOut(0.0f);
|
|
||||||
ExpectOut(1.0f); ExpectOut(0.0f); ExpectOut(0.0f);
|
|
||||||
ExpectOut(0.0f); ExpectOut(1.0f); ExpectOut(0.0f);
|
|
||||||
ExpectOut(0.0f); ExpectOut(0.0f); ExpectOut(1.0f);
|
|
||||||
|
|
||||||
// Test that scale does nothing for floating point inputs.
|
|
||||||
Input(1.0f); Input(2.0f); Input(4.0f);
|
|
||||||
m_vtx_attr.g0.PosFrac = 1;
|
|
||||||
loader = VertexLoaderBase::CreateVertexLoader(m_vtx_desc, m_vtx_attr);
|
|
||||||
count = loader->RunVertices(src, dst, 1, 7);
|
|
||||||
src.Skip(1 * loader->m_VertexSize);
|
|
||||||
dst.Skip(count * loader->m_native_vtx_decl.stride);
|
|
||||||
ExpectOut(1.0f); ExpectOut(2.0f); ExpectOut(4.0f);
|
|
||||||
delete loader;
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_F(VertexLoaderTest, PositionDirectU16XY)
|
|
||||||
{
|
|
||||||
m_vtx_desc.Position = 1; // Direct
|
|
||||||
m_vtx_attr.g0.PosElements = 0; // XY
|
|
||||||
m_vtx_attr.g0.PosFormat = 2; // U16
|
|
||||||
|
|
||||||
VertexLoaderBase* loader = VertexLoaderBase::CreateVertexLoader(m_vtx_desc, m_vtx_attr);
|
|
||||||
|
|
||||||
ASSERT_EQ(3 * sizeof(float), (u32)loader->m_native_vtx_decl.stride);
|
|
||||||
ASSERT_EQ(2 * sizeof(u16), (u32)loader->m_VertexSize);
|
|
||||||
|
|
||||||
// Write some vertices.
|
|
||||||
Input<u16>(0); Input<u16>(0);
|
|
||||||
Input<u16>(1); Input<u16>(2);
|
|
||||||
Input<u16>(256); Input<u16>(257);
|
|
||||||
Input<u16>(65535); Input<u16>(65534);
|
|
||||||
Input<u16>(12345); Input<u16>(54321);
|
|
||||||
|
|
||||||
// Convert 5 points. "7" -> primitive are points.
|
|
||||||
int count = loader->RunVertices(src, dst, 5, 7);
|
|
||||||
src.Skip(5 * loader->m_VertexSize);
|
|
||||||
dst.Skip(count * loader->m_native_vtx_decl.stride);
|
|
||||||
delete loader;
|
|
||||||
|
|
||||||
ExpectOut(0.0f); ExpectOut(0.0f); ExpectOut(0.0f);
|
|
||||||
ExpectOut(1.0f); ExpectOut(2.0f); ExpectOut(0.0f);
|
|
||||||
ExpectOut(256.0f); ExpectOut(257.0f); ExpectOut(0.0f);
|
|
||||||
ExpectOut(65535.0f); ExpectOut(65534.0f); ExpectOut(0.0f);
|
|
||||||
ExpectOut(12345.0f); ExpectOut(54321.0f); ExpectOut(0.0f);
|
|
||||||
|
|
||||||
// Test that scale works on U16 inputs.
|
|
||||||
Input<u16>(42); Input<u16>(24);
|
|
||||||
m_vtx_attr.g0.PosFrac = 1;
|
|
||||||
m_vtx_attr.g0.ByteDequant = 1;
|
|
||||||
loader = VertexLoaderBase::CreateVertexLoader(m_vtx_desc, m_vtx_attr);
|
|
||||||
count = loader->RunVertices(src, dst, 1, 7);
|
|
||||||
src.Skip(1 * loader->m_VertexSize);
|
|
||||||
dst.Skip(count * loader->m_native_vtx_decl.stride);
|
|
||||||
ExpectOut(21.0f); ExpectOut(12.0f); ExpectOut(0.0f);
|
|
||||||
delete loader;
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_F(VertexLoaderTest, PositionDirectFloatXYZSpeed)
|
|
||||||
{
|
|
||||||
m_vtx_desc.Position = 1; // Direct
|
|
||||||
m_vtx_attr.g0.PosElements = 1; // XYZ
|
|
||||||
m_vtx_attr.g0.PosFormat = 4; // Float
|
|
||||||
|
|
||||||
VertexLoaderBase* loader = VertexLoaderBase::CreateVertexLoader(m_vtx_desc, m_vtx_attr);
|
|
||||||
|
|
||||||
ASSERT_EQ(3 * sizeof(float), (u32)loader->m_native_vtx_decl.stride);
|
|
||||||
ASSERT_EQ(3 * sizeof(float), (u32)loader->m_VertexSize);
|
|
||||||
|
|
||||||
for (int i = 0; i < 1000; ++i)
|
|
||||||
{
|
{
|
||||||
ResetPointers();
|
input_size = addr - 1;
|
||||||
int count = loader->RunVertices(src, dst, 100000, 7);
|
for (int i = 0; i < count; i++)
|
||||||
src.Skip(100000 * loader->m_VertexSize);
|
if (addr == INDEX8)
|
||||||
dst.Skip(count * loader->m_native_vtx_decl.stride);
|
Input<u8>(i);
|
||||||
|
else
|
||||||
|
Input<u16>(i);
|
||||||
|
cached_arraybases[ARRAY_POSITION] = m_src.GetPointer();
|
||||||
|
g_main_cp_state.array_strides[ARRAY_POSITION] = elements * elem_size;
|
||||||
|
}
|
||||||
|
CreateAndCheckSizes(input_size, elements * sizeof(float));
|
||||||
|
for (float value : values)
|
||||||
|
{
|
||||||
|
switch (format)
|
||||||
|
{
|
||||||
|
case FORMAT_UBYTE: Input((u8)value); break;
|
||||||
|
case FORMAT_BYTE: Input((s8)value); break;
|
||||||
|
case FORMAT_USHORT: Input((u16)value); break;
|
||||||
|
case FORMAT_SHORT: Input((s16)value); break;
|
||||||
|
case FORMAT_FLOAT: Input(value); break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
RunVertices(count);
|
||||||
|
|
||||||
|
float scale = 1.f / (1u << (format == FORMAT_FLOAT ? 0 : frac));
|
||||||
|
for (auto iter = values.begin(); iter != values.end();)
|
||||||
|
{
|
||||||
|
float f, g;
|
||||||
|
switch (format)
|
||||||
|
{
|
||||||
|
case FORMAT_UBYTE: f = (u8)*iter++; g = (u8)*iter++; break;
|
||||||
|
case FORMAT_BYTE: f = (s8)*iter++; g = (s8)*iter++; break;
|
||||||
|
case FORMAT_USHORT: f = (u16)*iter++; g = (u16)*iter++; break;
|
||||||
|
case FORMAT_SHORT: f = (s16)*iter++; g = (s16)*iter++; break;
|
||||||
|
case FORMAT_FLOAT: f = *iter++; g = *iter++; break;
|
||||||
|
}
|
||||||
|
ExpectOut(f * scale);
|
||||||
|
ExpectOut(g * scale);
|
||||||
}
|
}
|
||||||
delete loader;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(VertexLoaderTest, PositionDirectU16XYSpeed)
|
TEST_F(VertexLoaderTest, PositionIndex16FloatXY)
|
||||||
{
|
{
|
||||||
m_vtx_desc.Position = 1; // Direct
|
m_vtx_desc.Position = INDEX16;
|
||||||
m_vtx_attr.g0.PosElements = 0; // XY
|
m_vtx_attr.g0.PosFormat = FORMAT_FLOAT;
|
||||||
m_vtx_attr.g0.PosFormat = 2; // U16
|
CreateAndCheckSizes(sizeof(u16), 2 * sizeof(float));
|
||||||
|
Input<u16>(1); Input<u16>(0);
|
||||||
|
cached_arraybases[ARRAY_POSITION] = m_src.GetPointer();
|
||||||
|
g_main_cp_state.array_strides[ARRAY_POSITION] = sizeof(float); // ;)
|
||||||
|
Input(1.f); Input(2.f); Input(3.f);
|
||||||
|
RunVertices(2);
|
||||||
|
ExpectOut(2); ExpectOut(3);
|
||||||
|
ExpectOut(1); ExpectOut(2);
|
||||||
|
}
|
||||||
|
|
||||||
VertexLoaderBase* loader = VertexLoaderBase::CreateVertexLoader(m_vtx_desc, m_vtx_attr);
|
class VertexLoaderSpeedTest : public VertexLoaderTest, public ::testing::WithParamInterface<std::tuple<int, int>> {};
|
||||||
|
extern int gtest_FormatsAndElementsVertexLoaderSpeedTest_dummy_;
|
||||||
ASSERT_EQ(3 * sizeof(float), (u32)loader->m_native_vtx_decl.stride);
|
INSTANTIATE_TEST_CASE_P(
|
||||||
ASSERT_EQ(2 * sizeof(u16), (u32)loader->m_VertexSize);
|
FormatsAndElements, VertexLoaderSpeedTest,
|
||||||
|
::testing::Combine(
|
||||||
|
::testing::Values(FORMAT_UBYTE, FORMAT_BYTE, FORMAT_USHORT, FORMAT_SHORT, FORMAT_FLOAT),
|
||||||
|
::testing::Values(0, 1) // elements
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
TEST_P(VertexLoaderSpeedTest, PositionDirectAll)
|
||||||
|
{
|
||||||
|
int format, elements;
|
||||||
|
std::tie(format, elements) = GetParam();
|
||||||
|
const char* map[] = { "u8", "s8", "u16", "s16", "float" };
|
||||||
|
printf("format: %s, elements: %d\n", map[format], elements);
|
||||||
|
m_vtx_desc.Position = DIRECT;
|
||||||
|
m_vtx_attr.g0.PosFormat = format;
|
||||||
|
m_vtx_attr.g0.PosElements = elements;
|
||||||
|
elements += 2;
|
||||||
|
size_t elem_size = 1 << (format / 2);
|
||||||
|
CreateAndCheckSizes(elements * elem_size, elements * sizeof(float));
|
||||||
for (int i = 0; i < 1000; ++i)
|
for (int i = 0; i < 1000; ++i)
|
||||||
{
|
RunVertices(100000);
|
||||||
ResetPointers();
|
}
|
||||||
int count = loader->RunVertices(src, dst, 100000, 7);
|
|
||||||
src.Skip(100000 * loader->m_VertexSize);
|
TEST_P(VertexLoaderSpeedTest, TexCoordSingleElement)
|
||||||
dst.Skip(count * loader->m_native_vtx_decl.stride);
|
{
|
||||||
}
|
int format, elements;
|
||||||
delete loader;
|
std::tie(format, elements) = GetParam();
|
||||||
|
const char* map[] = { "u8", "s8", "u16", "s16", "float" };
|
||||||
|
printf("format: %s, elements: %d\n", map[format], elements);
|
||||||
|
m_vtx_desc.Position = DIRECT;
|
||||||
|
m_vtx_attr.g0.PosFormat = FORMAT_BYTE;
|
||||||
|
m_vtx_desc.Tex0Coord = DIRECT;
|
||||||
|
m_vtx_attr.g0.Tex0CoordFormat = format;
|
||||||
|
m_vtx_attr.g0.Tex0CoordElements = elements;
|
||||||
|
elements += 1;
|
||||||
|
size_t elem_size = 1 << (format / 2);
|
||||||
|
CreateAndCheckSizes(2 * sizeof(s8) + elements * elem_size,
|
||||||
|
2 * sizeof(float) + elements * sizeof(float));
|
||||||
|
for (int i = 0; i < 1000; ++i)
|
||||||
|
RunVertices(100000);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(VertexLoaderTest, LargeFloatVertexSpeed)
|
TEST_F(VertexLoaderTest, LargeFloatVertexSpeed)
|
||||||
{
|
{
|
||||||
// Enables most attributes in floating point direct mode to test speed.
|
// Enables most attributes in floating point indexed mode to test speed.
|
||||||
m_vtx_desc.PosMatIdx = 1;
|
m_vtx_desc.PosMatIdx = 1;
|
||||||
m_vtx_desc.Tex0MatIdx = 1;
|
m_vtx_desc.Tex0MatIdx = 1;
|
||||||
m_vtx_desc.Tex1MatIdx = 1;
|
m_vtx_desc.Tex1MatIdx = 1;
|
||||||
|
@ -222,54 +253,54 @@ TEST_F(VertexLoaderTest, LargeFloatVertexSpeed)
|
||||||
m_vtx_desc.Tex5MatIdx = 1;
|
m_vtx_desc.Tex5MatIdx = 1;
|
||||||
m_vtx_desc.Tex6MatIdx = 1;
|
m_vtx_desc.Tex6MatIdx = 1;
|
||||||
m_vtx_desc.Tex7MatIdx = 1;
|
m_vtx_desc.Tex7MatIdx = 1;
|
||||||
m_vtx_desc.Position = 1;
|
m_vtx_desc.Position = INDEX16;
|
||||||
m_vtx_desc.Normal = 1;
|
m_vtx_desc.Normal = INDEX16;
|
||||||
m_vtx_desc.Color0 = 1;
|
m_vtx_desc.Color0 = INDEX16;
|
||||||
m_vtx_desc.Color1 = 1;
|
m_vtx_desc.Color1 = INDEX16;
|
||||||
m_vtx_desc.Tex0Coord = 1;
|
m_vtx_desc.Tex0Coord = INDEX16;
|
||||||
m_vtx_desc.Tex1Coord = 1;
|
m_vtx_desc.Tex1Coord = INDEX16;
|
||||||
m_vtx_desc.Tex2Coord = 1;
|
m_vtx_desc.Tex2Coord = INDEX16;
|
||||||
m_vtx_desc.Tex3Coord = 1;
|
m_vtx_desc.Tex3Coord = INDEX16;
|
||||||
m_vtx_desc.Tex4Coord = 1;
|
m_vtx_desc.Tex4Coord = INDEX16;
|
||||||
m_vtx_desc.Tex5Coord = 1;
|
m_vtx_desc.Tex5Coord = INDEX16;
|
||||||
m_vtx_desc.Tex6Coord = 1;
|
m_vtx_desc.Tex6Coord = INDEX16;
|
||||||
m_vtx_desc.Tex7Coord = 1;
|
m_vtx_desc.Tex7Coord = INDEX16;
|
||||||
|
|
||||||
m_vtx_attr.g0.PosElements = 1; // XYZ
|
m_vtx_attr.g0.PosElements = 1; // XYZ
|
||||||
m_vtx_attr.g0.PosFormat = 4; // Float
|
m_vtx_attr.g0.PosFormat = FORMAT_FLOAT;
|
||||||
m_vtx_attr.g0.NormalElements = 1; // NBT
|
m_vtx_attr.g0.NormalElements = 1; // NBT
|
||||||
m_vtx_attr.g0.NormalFormat = 4; // Float
|
m_vtx_attr.g0.NormalFormat = FORMAT_FLOAT;
|
||||||
m_vtx_attr.g0.Color0Elements = 1; // Has Alpha
|
m_vtx_attr.g0.Color0Elements = 1; // Has Alpha
|
||||||
m_vtx_attr.g0.Color0Comp = 5; // RGBA8888
|
m_vtx_attr.g0.Color0Comp = FORMAT_32B_8888;
|
||||||
m_vtx_attr.g0.Color1Elements = 1; // Has Alpha
|
m_vtx_attr.g0.Color1Elements = 1; // Has Alpha
|
||||||
m_vtx_attr.g0.Color1Comp = 5; // RGBA8888
|
m_vtx_attr.g0.Color1Comp = FORMAT_32B_8888;
|
||||||
m_vtx_attr.g0.Tex0CoordElements = 1; // ST
|
m_vtx_attr.g0.Tex0CoordElements = 1; // ST
|
||||||
m_vtx_attr.g0.Tex0CoordFormat = 4; // Float
|
m_vtx_attr.g0.Tex0CoordFormat = FORMAT_FLOAT;
|
||||||
m_vtx_attr.g1.Tex1CoordElements = 1; // ST
|
m_vtx_attr.g1.Tex1CoordElements = 1; // ST
|
||||||
m_vtx_attr.g1.Tex1CoordFormat = 4; // Float
|
m_vtx_attr.g1.Tex1CoordFormat = FORMAT_FLOAT;
|
||||||
m_vtx_attr.g1.Tex2CoordElements = 1; // ST
|
m_vtx_attr.g1.Tex2CoordElements = 1; // ST
|
||||||
m_vtx_attr.g1.Tex2CoordFormat = 4; // Float
|
m_vtx_attr.g1.Tex2CoordFormat = FORMAT_FLOAT;
|
||||||
m_vtx_attr.g1.Tex3CoordElements = 1; // ST
|
m_vtx_attr.g1.Tex3CoordElements = 1; // ST
|
||||||
m_vtx_attr.g1.Tex3CoordFormat = 4; // Float
|
m_vtx_attr.g1.Tex3CoordFormat = FORMAT_FLOAT;
|
||||||
m_vtx_attr.g1.Tex4CoordElements = 1; // ST
|
m_vtx_attr.g1.Tex4CoordElements = 1; // ST
|
||||||
m_vtx_attr.g1.Tex4CoordFormat = 4; // Float
|
m_vtx_attr.g1.Tex4CoordFormat = FORMAT_FLOAT;
|
||||||
m_vtx_attr.g2.Tex5CoordElements = 1; // ST
|
m_vtx_attr.g2.Tex5CoordElements = 1; // ST
|
||||||
m_vtx_attr.g2.Tex5CoordFormat = 4; // Float
|
m_vtx_attr.g2.Tex5CoordFormat = FORMAT_FLOAT;
|
||||||
m_vtx_attr.g2.Tex6CoordElements = 1; // ST
|
m_vtx_attr.g2.Tex6CoordElements = 1; // ST
|
||||||
m_vtx_attr.g2.Tex6CoordFormat = 4; // Float
|
m_vtx_attr.g2.Tex6CoordFormat = FORMAT_FLOAT;
|
||||||
m_vtx_attr.g2.Tex7CoordElements = 1; // ST
|
m_vtx_attr.g2.Tex7CoordElements = 1; // ST
|
||||||
m_vtx_attr.g2.Tex7CoordFormat = 4; // Float
|
m_vtx_attr.g2.Tex7CoordFormat = FORMAT_FLOAT;
|
||||||
|
|
||||||
VertexLoaderBase* loader = VertexLoaderBase::CreateVertexLoader(m_vtx_desc, m_vtx_attr);
|
CreateAndCheckSizes(33, 156);
|
||||||
|
|
||||||
|
for (int i = 0; i < 16; i++)
|
||||||
|
{
|
||||||
|
cached_arraybases[i] = m_src.GetPointer();
|
||||||
|
g_main_cp_state.array_strides[i] = 129;
|
||||||
|
}
|
||||||
|
|
||||||
// This test is only done 100x in a row since it's ~20x slower using the
|
// This test is only done 100x in a row since it's ~20x slower using the
|
||||||
// current vertex loader implementation.
|
// current vertex loader implementation.
|
||||||
for (int i = 0; i < 100; ++i)
|
for (int i = 0; i < 100; ++i)
|
||||||
{
|
RunVertices(100000);
|
||||||
ResetPointers();
|
|
||||||
int count = loader->RunVertices(src, dst, 100000, 7);
|
|
||||||
src.Skip(100000 * loader->m_VertexSize);
|
|
||||||
dst.Skip(count * loader->m_native_vtx_decl.stride);
|
|
||||||
}
|
|
||||||
delete loader;
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue