Merge pull request #2141 from Tilka/position_cache
zfreeze: cache vertex positions
This commit is contained in:
commit
588de63f9c
|
@ -1823,6 +1823,7 @@ void XEmitter::PCMPGTD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x66, d
|
||||||
|
|
||||||
void XEmitter::PEXTRW(X64Reg dest, const OpArg& arg, u8 subreg) {WriteSSEOp(0x66, 0xC5, dest, arg); Write8(subreg);}
|
void XEmitter::PEXTRW(X64Reg dest, const OpArg& arg, u8 subreg) {WriteSSEOp(0x66, 0xC5, dest, arg); Write8(subreg);}
|
||||||
void XEmitter::PINSRW(X64Reg dest, const OpArg& arg, u8 subreg) {WriteSSEOp(0x66, 0xC4, dest, arg); Write8(subreg);}
|
void XEmitter::PINSRW(X64Reg dest, const OpArg& arg, u8 subreg) {WriteSSEOp(0x66, 0xC4, dest, arg); Write8(subreg);}
|
||||||
|
void XEmitter::PINSRD(X64Reg dest, const OpArg& arg, u8 subreg) {WriteSSE41Op(0x66, 0x3A22, dest, arg); Write8(subreg);}
|
||||||
|
|
||||||
void XEmitter::PMADDWD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xF5, dest, arg); }
|
void XEmitter::PMADDWD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xF5, dest, arg); }
|
||||||
void XEmitter::PSADBW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xF6, dest, arg);}
|
void XEmitter::PSADBW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xF6, dest, arg);}
|
||||||
|
|
|
@ -711,6 +711,7 @@ public:
|
||||||
|
|
||||||
void PEXTRW(X64Reg dest, const OpArg& arg, u8 subreg);
|
void PEXTRW(X64Reg dest, const OpArg& arg, u8 subreg);
|
||||||
void PINSRW(X64Reg dest, const OpArg& arg, u8 subreg);
|
void PINSRW(X64Reg dest, const OpArg& arg, u8 subreg);
|
||||||
|
void PINSRD(X64Reg dest, const OpArg& arg, u8 subreg);
|
||||||
|
|
||||||
void PMADDWD(X64Reg dest, const OpArg& arg);
|
void PMADDWD(X64Reg dest, const OpArg& arg);
|
||||||
void PSADBW(X64Reg dest, const OpArg& arg);
|
void PSADBW(X64Reg dest, const OpArg& arg);
|
||||||
|
|
|
@ -14,6 +14,7 @@
|
||||||
#include "VideoCommon/VertexLoader_Normal.h"
|
#include "VideoCommon/VertexLoader_Normal.h"
|
||||||
#include "VideoCommon/VertexLoader_Position.h"
|
#include "VideoCommon/VertexLoader_Position.h"
|
||||||
#include "VideoCommon/VertexLoader_TextCoord.h"
|
#include "VideoCommon/VertexLoader_TextCoord.h"
|
||||||
|
#include "VideoCommon/VertexLoaderManager.h"
|
||||||
#include "VideoCommon/VideoCommon.h"
|
#include "VideoCommon/VideoCommon.h"
|
||||||
#include "VideoCommon/VideoConfig.h"
|
#include "VideoCommon/VideoConfig.h"
|
||||||
|
|
||||||
|
@ -24,6 +25,8 @@ u8* g_vertex_manager_write_ptr;
|
||||||
static void LOADERDECL PosMtx_ReadDirect_UByte(VertexLoader* loader)
|
static void LOADERDECL PosMtx_ReadDirect_UByte(VertexLoader* loader)
|
||||||
{
|
{
|
||||||
u32 posmtx = DataReadU8() & 0x3f;
|
u32 posmtx = DataReadU8() & 0x3f;
|
||||||
|
if (loader->m_counter < 3)
|
||||||
|
VertexLoaderManager::position_matrix_index[loader->m_counter] = posmtx;
|
||||||
DataWrite<u32>(posmtx);
|
DataWrite<u32>(posmtx);
|
||||||
PRIM_LOG("posmtx: %d, ", posmtx);
|
PRIM_LOG("posmtx: %d, ", posmtx);
|
||||||
}
|
}
|
||||||
|
@ -316,7 +319,7 @@ int VertexLoader::RunVertices(DataReader src, DataReader dst, int count)
|
||||||
m_numLoadedVertices += count;
|
m_numLoadedVertices += count;
|
||||||
m_skippedVertices = 0;
|
m_skippedVertices = 0;
|
||||||
|
|
||||||
for (int s = 0; s < count; s++)
|
for (m_counter = count - 1; m_counter >= 0; m_counter--)
|
||||||
{
|
{
|
||||||
m_tcIndex = 0;
|
m_tcIndex = 0;
|
||||||
m_colIndex = 0;
|
m_colIndex = 0;
|
||||||
|
|
|
@ -49,6 +49,7 @@ public:
|
||||||
int m_texmtxread;
|
int m_texmtxread;
|
||||||
bool m_vertexSkip;
|
bool m_vertexSkip;
|
||||||
int m_skippedVertices;
|
int m_skippedVertices;
|
||||||
|
int m_counter;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// Pipeline.
|
// Pipeline.
|
||||||
|
|
|
@ -165,6 +165,18 @@ int VertexLoaderARM64::ReadVertex(u64 attribute, int format, int count_in, int c
|
||||||
m_float_emit.ST1(32, 1, coords, EncodeRegTo64(scratch2_reg));
|
m_float_emit.ST1(32, 1, coords, EncodeRegTo64(scratch2_reg));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Z-Freeze
|
||||||
|
if (native_format == &m_native_vtx_decl.position)
|
||||||
|
{
|
||||||
|
CMP(count_reg, 3);
|
||||||
|
FixupBranch dont_store = B(CC_GT);
|
||||||
|
MOVI2R(EncodeRegTo64(scratch2_reg), (u64)VertexLoaderManager::position_cache);
|
||||||
|
ORR(scratch1_reg, WSP, count_reg, ArithOption(count_reg, ST_LSL, 4));
|
||||||
|
ADD(EncodeRegTo64(scratch1_reg), EncodeRegTo64(scratch1_reg), EncodeRegTo64(scratch2_reg));
|
||||||
|
m_float_emit.STUR(write_size, coords, EncodeRegTo64(scratch1_reg), -16);
|
||||||
|
SetJumpTarget(dont_store);
|
||||||
|
}
|
||||||
|
|
||||||
native_format->components = count_out;
|
native_format->components = count_out;
|
||||||
native_format->enable = true;
|
native_format->enable = true;
|
||||||
native_format->offset = m_dst_ofs;
|
native_format->offset = m_dst_ofs;
|
||||||
|
@ -342,6 +354,14 @@ void VertexLoaderARM64::GenerateVertexLoader()
|
||||||
LDRB(INDEX_UNSIGNED, scratch1_reg, src_reg, m_src_ofs);
|
LDRB(INDEX_UNSIGNED, scratch1_reg, src_reg, m_src_ofs);
|
||||||
AND(scratch1_reg, scratch1_reg, 0, 5);
|
AND(scratch1_reg, scratch1_reg, 0, 5);
|
||||||
STR(INDEX_UNSIGNED, scratch1_reg, dst_reg, m_dst_ofs);
|
STR(INDEX_UNSIGNED, scratch1_reg, dst_reg, m_dst_ofs);
|
||||||
|
|
||||||
|
// Z-Freeze
|
||||||
|
CMP(count_reg, 3);
|
||||||
|
FixupBranch dont_store = B(CC_GT);
|
||||||
|
MOVI2R(EncodeRegTo64(scratch2_reg), (u64)VertexLoaderManager::position_matrix_index - sizeof(u32));
|
||||||
|
STR(INDEX_UNSIGNED, scratch1_reg, EncodeRegTo64(scratch2_reg), 0);
|
||||||
|
SetJumpTarget(dont_store);
|
||||||
|
|
||||||
m_native_components |= VB_HAS_POSMTXIDX;
|
m_native_components |= VB_HAS_POSMTXIDX;
|
||||||
m_native_vtx_decl.posmtx.components = 4;
|
m_native_vtx_decl.posmtx.components = 4;
|
||||||
m_native_vtx_decl.posmtx.enable = true;
|
m_native_vtx_decl.posmtx.enable = true;
|
||||||
|
|
|
@ -26,6 +26,9 @@
|
||||||
namespace VertexLoaderManager
|
namespace VertexLoaderManager
|
||||||
{
|
{
|
||||||
|
|
||||||
|
float position_cache[3][4];
|
||||||
|
u32 position_matrix_index[3];
|
||||||
|
|
||||||
typedef std::unordered_map<PortableVertexDeclaration, std::unique_ptr<NativeVertexFormat>> NativeVertexFormatMap;
|
typedef std::unordered_map<PortableVertexDeclaration, std::unique_ptr<NativeVertexFormat>> NativeVertexFormatMap;
|
||||||
static NativeVertexFormatMap s_native_vertex_map;
|
static NativeVertexFormatMap s_native_vertex_map;
|
||||||
static NativeVertexFormat* s_current_vtx_fmt;
|
static NativeVertexFormat* s_current_vtx_fmt;
|
||||||
|
|
|
@ -28,5 +28,10 @@ namespace VertexLoaderManager
|
||||||
// Resolved pointers to array bases. Used by vertex loaders.
|
// Resolved pointers to array bases. Used by vertex loaders.
|
||||||
extern u8 *cached_arraybases[12];
|
extern u8 *cached_arraybases[12];
|
||||||
void UpdateVertexArrayPointers();
|
void UpdateVertexArrayPointers();
|
||||||
|
|
||||||
|
// Position cache for zfreeze (3 vertices, 4 floats each to allow SIMD overwrite).
|
||||||
|
// These arrays are in reverse order.
|
||||||
|
extern float position_cache[3][4];
|
||||||
|
extern u32 position_matrix_index[3];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -23,6 +23,11 @@ static const X64Reg base_reg = RBX;
|
||||||
|
|
||||||
static const u8* memory_base_ptr = (u8*)&g_main_cp_state.array_strides;
|
static const u8* memory_base_ptr = (u8*)&g_main_cp_state.array_strides;
|
||||||
|
|
||||||
|
static OpArg MPIC(const void* ptr, X64Reg scale_reg, int scale = SCALE_1)
|
||||||
|
{
|
||||||
|
return MComplex(base_reg, scale_reg, scale, (s32)((u8*)ptr - memory_base_ptr));
|
||||||
|
}
|
||||||
|
|
||||||
static OpArg MPIC(const void* ptr)
|
static OpArg MPIC(const void* ptr)
|
||||||
{
|
{
|
||||||
return MDisp(base_reg, (s32)((u8*)ptr - memory_base_ptr));
|
return MDisp(base_reg, (s32)((u8*)ptr - memory_base_ptr));
|
||||||
|
@ -77,7 +82,7 @@ OpArg VertexLoaderX64::GetVertexAddr(int array, u64 attribute)
|
||||||
|
|
||||||
int VertexLoaderX64::ReadVertex(OpArg data, u64 attribute, int format, int count_in, int count_out, bool dequantize, u8 scaling_exponent, AttributeFormat* native_format)
|
int VertexLoaderX64::ReadVertex(OpArg data, u64 attribute, int format, int count_in, int count_out, bool dequantize, u8 scaling_exponent, AttributeFormat* native_format)
|
||||||
{
|
{
|
||||||
static const __m128i shuffle_lut[4][3] = {
|
static const __m128i shuffle_lut[5][3] = {
|
||||||
{_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFF00L), // 1x u8
|
{_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFF00L), // 1x u8
|
||||||
_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFF01L, 0xFFFFFF00L), // 2x u8
|
_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFF01L, 0xFFFFFF00L), // 2x u8
|
||||||
_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFF02L, 0xFFFFFF01L, 0xFFFFFF00L)}, // 3x u8
|
_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFF02L, 0xFFFFFF01L, 0xFFFFFF00L)}, // 3x u8
|
||||||
|
@ -90,6 +95,9 @@ int VertexLoaderX64::ReadVertex(OpArg data, u64 attribute, int format, int count
|
||||||
{_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFFFFL, 0x0001FFFFL), // 1x s16
|
{_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFFFFL, 0x0001FFFFL), // 1x s16
|
||||||
_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0x0203FFFFL, 0x0001FFFFL), // 2x s16
|
_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0x0203FFFFL, 0x0001FFFFL), // 2x s16
|
||||||
_mm_set_epi32(0xFFFFFFFFL, 0x0405FFFFL, 0x0203FFFFL, 0x0001FFFFL)}, // 3x s16
|
_mm_set_epi32(0xFFFFFFFFL, 0x0405FFFFL, 0x0203FFFFL, 0x0001FFFFL)}, // 3x s16
|
||||||
|
{_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFFFFL, 0x00010203L), // 1x float
|
||||||
|
_mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0x04050607L, 0x00010203L), // 2x float
|
||||||
|
_mm_set_epi32(0xFFFFFFFFL, 0x08090A0BL, 0x04050607L, 0x00010203L)}, // 3x float
|
||||||
};
|
};
|
||||||
static const __m128 scale_factors[32] = {
|
static const __m128 scale_factors[32] = {
|
||||||
_mm_set_ps1(1./(1u<< 0)), _mm_set_ps1(1./(1u<< 1)), _mm_set_ps1(1./(1u<< 2)), _mm_set_ps1(1./(1u<< 3)),
|
_mm_set_ps1(1./(1u<< 0)), _mm_set_ps1(1./(1u<< 1)), _mm_set_ps1(1./(1u<< 2)), _mm_set_ps1(1./(1u<< 3)),
|
||||||
|
@ -119,21 +127,6 @@ int VertexLoaderX64::ReadVertex(OpArg data, u64 attribute, int format, int count
|
||||||
if (attribute == DIRECT)
|
if (attribute == DIRECT)
|
||||||
m_src_ofs += load_bytes;
|
m_src_ofs += load_bytes;
|
||||||
|
|
||||||
if (format == FORMAT_FLOAT)
|
|
||||||
{
|
|
||||||
// Floats don't need to be scaled or converted,
|
|
||||||
// so we can just load/swap/store them directly
|
|
||||||
// and return early.
|
|
||||||
for (int i = 0; i < count_in; i++)
|
|
||||||
{
|
|
||||||
LoadAndSwap(32, scratch3, data);
|
|
||||||
MOV(32, dest, R(scratch3));
|
|
||||||
data.AddMemOffset(sizeof(float));
|
|
||||||
dest.AddMemOffset(sizeof(float));
|
|
||||||
}
|
|
||||||
return load_bytes;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (cpu_info.bSSSE3)
|
if (cpu_info.bSSSE3)
|
||||||
{
|
{
|
||||||
if (load_bytes > 8)
|
if (load_bytes > 8)
|
||||||
|
@ -194,13 +187,54 @@ int VertexLoaderX64::ReadVertex(OpArg data, u64 attribute, int format, int count
|
||||||
else
|
else
|
||||||
PSRLD(coords, 16);
|
PSRLD(coords, 16);
|
||||||
break;
|
break;
|
||||||
|
case FORMAT_FLOAT:
|
||||||
|
// Floats don't need to be scaled or converted,
|
||||||
|
// so we can just load/swap/store them directly
|
||||||
|
// and return early.
|
||||||
|
// (In SSSE3 we still need to store them.)
|
||||||
|
for (int i = 0; i < count_in; i++)
|
||||||
|
{
|
||||||
|
LoadAndSwap(32, scratch3, data);
|
||||||
|
MOV(32, dest, R(scratch3));
|
||||||
|
data.AddMemOffset(sizeof(float));
|
||||||
|
dest.AddMemOffset(sizeof(float));
|
||||||
|
|
||||||
|
// zfreeze
|
||||||
|
if (native_format == &m_native_vtx_decl.position)
|
||||||
|
{
|
||||||
|
if (cpu_info.bSSE4_1)
|
||||||
|
{
|
||||||
|
PINSRD(coords, R(scratch3), i);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
PINSRW(coords, R(scratch3), 2 * i + 0);
|
||||||
|
SHR(32, R(scratch3), Imm8(16));
|
||||||
|
PINSRW(coords, R(scratch3), 2 * i + 1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// zfreeze
|
||||||
|
if (native_format == &m_native_vtx_decl.position)
|
||||||
|
{
|
||||||
|
CMP(32, R(count_reg), Imm8(3));
|
||||||
|
FixupBranch dont_store = J_CC(CC_A);
|
||||||
|
LEA(32, scratch3, MScaled(count_reg, SCALE_4, -4));
|
||||||
|
MOVUPS(MPIC(VertexLoaderManager::position_cache, scratch3, SCALE_4), coords);
|
||||||
|
SetJumpTarget(dont_store);
|
||||||
|
}
|
||||||
|
return load_bytes;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (format != FORMAT_FLOAT)
|
||||||
|
{
|
||||||
CVTDQ2PS(coords, R(coords));
|
CVTDQ2PS(coords, R(coords));
|
||||||
|
|
||||||
if (dequantize && scaling_exponent)
|
if (dequantize && scaling_exponent)
|
||||||
MULPS(coords, MPIC(&scale_factors[scaling_exponent]));
|
MULPS(coords, MPIC(&scale_factors[scaling_exponent]));
|
||||||
|
}
|
||||||
|
|
||||||
switch (count_out)
|
switch (count_out)
|
||||||
{
|
{
|
||||||
|
@ -209,6 +243,16 @@ int VertexLoaderX64::ReadVertex(OpArg data, u64 attribute, int format, int count
|
||||||
case 3: MOVUPS(dest, coords); break;
|
case 3: MOVUPS(dest, coords); break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// zfreeze
|
||||||
|
if (native_format == &m_native_vtx_decl.position)
|
||||||
|
{
|
||||||
|
CMP(32, R(count_reg), Imm8(3));
|
||||||
|
FixupBranch dont_store = J_CC(CC_A);
|
||||||
|
LEA(32, scratch3, MScaled(count_reg, SCALE_4, -4));
|
||||||
|
MOVUPS(MPIC(VertexLoaderManager::position_cache, scratch3, SCALE_4), coords);
|
||||||
|
SetJumpTarget(dont_store);
|
||||||
|
}
|
||||||
|
|
||||||
return load_bytes;
|
return load_bytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -384,6 +428,13 @@ void VertexLoaderX64::GenerateVertexLoader()
|
||||||
MOVZX(32, 8, scratch1, MDisp(src_reg, m_src_ofs));
|
MOVZX(32, 8, scratch1, MDisp(src_reg, m_src_ofs));
|
||||||
AND(32, R(scratch1), Imm8(0x3F));
|
AND(32, R(scratch1), Imm8(0x3F));
|
||||||
MOV(32, MDisp(dst_reg, m_dst_ofs), R(scratch1));
|
MOV(32, MDisp(dst_reg, m_dst_ofs), R(scratch1));
|
||||||
|
|
||||||
|
// zfreeze
|
||||||
|
CMP(32, R(count_reg), Imm8(3));
|
||||||
|
FixupBranch dont_store = J_CC(CC_A);
|
||||||
|
MOV(32, MPIC(VertexLoaderManager::position_matrix_index - 1, count_reg, SCALE_4), R(scratch1));
|
||||||
|
SetJumpTarget(dont_store);
|
||||||
|
|
||||||
m_native_components |= VB_HAS_POSMTXIDX;
|
m_native_components |= VB_HAS_POSMTXIDX;
|
||||||
m_native_vtx_decl.posmtx.components = 4;
|
m_native_vtx_decl.posmtx.components = 4;
|
||||||
m_native_vtx_decl.posmtx.enable = true;
|
m_native_vtx_decl.posmtx.enable = true;
|
||||||
|
|
|
@ -32,7 +32,12 @@ void LOADERDECL Pos_ReadDirect(VertexLoader* loader)
|
||||||
DataReader src(g_video_buffer_read_ptr, nullptr);
|
DataReader src(g_video_buffer_read_ptr, nullptr);
|
||||||
|
|
||||||
for (int i = 0; i < N; ++i)
|
for (int i = 0; i < N; ++i)
|
||||||
dst.Write(PosScale(src.Read<T>(), scale));
|
{
|
||||||
|
float value = PosScale(src.Read<T>(), scale);
|
||||||
|
if (loader->m_counter < 3)
|
||||||
|
VertexLoaderManager::position_cache[loader->m_counter][i] = value;
|
||||||
|
dst.Write(value);
|
||||||
|
}
|
||||||
|
|
||||||
g_vertex_manager_write_ptr = dst.GetPointer();
|
g_vertex_manager_write_ptr = dst.GetPointer();
|
||||||
g_video_buffer_read_ptr = src.GetPointer();
|
g_video_buffer_read_ptr = src.GetPointer();
|
||||||
|
@ -52,7 +57,12 @@ void LOADERDECL Pos_ReadIndex(VertexLoader* loader)
|
||||||
DataReader dst(g_vertex_manager_write_ptr, nullptr);
|
DataReader dst(g_vertex_manager_write_ptr, nullptr);
|
||||||
|
|
||||||
for (int i = 0; i < N; ++i)
|
for (int i = 0; i < N; ++i)
|
||||||
dst.Write(PosScale(Common::FromBigEndian(data[i]), scale));
|
{
|
||||||
|
float value = PosScale(Common::FromBigEndian(data[i]), scale);
|
||||||
|
if (loader->m_counter < 3)
|
||||||
|
VertexLoaderManager::position_cache[loader->m_counter][i] = value;
|
||||||
|
dst.Write(value);
|
||||||
|
}
|
||||||
|
|
||||||
g_vertex_manager_write_ptr = dst.GetPointer();
|
g_vertex_manager_write_ptr = dst.GetPointer();
|
||||||
LOG_VTX();
|
LOG_VTX();
|
||||||
|
|
|
@ -279,7 +279,6 @@ void VertexManager::DoState(PointerWrap& p)
|
||||||
|
|
||||||
void VertexManager::CalculateZSlope(NativeVertexFormat* format)
|
void VertexManager::CalculateZSlope(NativeVertexFormat* format)
|
||||||
{
|
{
|
||||||
float vtx[9];
|
|
||||||
float out[12];
|
float out[12];
|
||||||
float viewOffset[2] = { xfmem.viewport.xOrig - bpmem.scissorOffset.x * 2,
|
float viewOffset[2] = { xfmem.viewport.xOrig - bpmem.scissorOffset.x * 2,
|
||||||
xfmem.viewport.yOrig - bpmem.scissorOffset.y * 2};
|
xfmem.viewport.yOrig - bpmem.scissorOffset.y * 2};
|
||||||
|
@ -290,31 +289,24 @@ void VertexManager::CalculateZSlope(NativeVertexFormat* format)
|
||||||
// Global matrix ID.
|
// Global matrix ID.
|
||||||
u32 mtxIdx = g_main_cp_state.matrix_index_a.PosNormalMtxIdx;
|
u32 mtxIdx = g_main_cp_state.matrix_index_a.PosNormalMtxIdx;
|
||||||
const PortableVertexDeclaration vert_decl = format->GetVertexDeclaration();
|
const PortableVertexDeclaration vert_decl = format->GetVertexDeclaration();
|
||||||
size_t posOff = vert_decl.position.offset;
|
|
||||||
size_t mtxOff = vert_decl.posmtx.offset;
|
|
||||||
|
|
||||||
// Make sure the buffer contains at least 3 vertices.
|
// Make sure the buffer contains at least 3 vertices.
|
||||||
if ((s_pCurBufferPointer - s_pBaseBufferPointer) < (vert_decl.stride * 3))
|
if ((s_pCurBufferPointer - s_pBaseBufferPointer) < (vert_decl.stride * 3))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// Lookup vertices of the last rendered triangle and software-transform them
|
// Lookup vertices of the last rendered triangle and software-transform them
|
||||||
// This allows us to determine the depth slope, which will be used if z--freeze
|
// This allows us to determine the depth slope, which will be used if z-freeze
|
||||||
// is enabled in the following flush.
|
// is enabled in the following flush.
|
||||||
for (unsigned int i = 0; i < 3; ++i)
|
for (unsigned int i = 0; i < 3; ++i)
|
||||||
{
|
{
|
||||||
u8* vtx_ptr = s_pCurBufferPointer - vert_decl.stride * (3 - i);
|
|
||||||
vtx[0 + i * 3] = ((float*)(vtx_ptr + posOff))[0];
|
|
||||||
vtx[1 + i * 3] = ((float*)(vtx_ptr + posOff))[1];
|
|
||||||
if (vert_decl.position.components == 3)
|
|
||||||
vtx[2 + i * 3] = ((float*)(vtx_ptr + posOff))[2];
|
|
||||||
else
|
|
||||||
vtx[2 + i * 3] = 0;
|
|
||||||
|
|
||||||
// If this vertex format has per-vertex position matrix IDs, look it up.
|
// If this vertex format has per-vertex position matrix IDs, look it up.
|
||||||
if (vert_decl.posmtx.enable)
|
if (vert_decl.posmtx.enable)
|
||||||
mtxIdx = *((u32*)(vtx_ptr + mtxOff));
|
mtxIdx = VertexLoaderManager::position_matrix_index[2 - i];
|
||||||
|
|
||||||
VertexShaderManager::TransformToClipSpace(&vtx[i * 3], &out[i * 4], mtxIdx);
|
if (vert_decl.position.components == 2)
|
||||||
|
VertexLoaderManager::position_cache[2 - i][2] = 0;
|
||||||
|
|
||||||
|
VertexShaderManager::TransformToClipSpace(&VertexLoaderManager::position_cache[2 - i][0], &out[i * 4], mtxIdx);
|
||||||
|
|
||||||
// Transform to Screenspace
|
// Transform to Screenspace
|
||||||
float inv_w = 1.0f / out[3 + i * 4];
|
float inv_w = 1.0f / out[3 + i * 4];
|
||||||
|
|
Loading…
Reference in New Issue