From 8e3b9b434cca5b2f6e69b44bf3bbb4b785c72de5 Mon Sep 17 00:00:00 2001 From: Stefanos Kornilios Mitsis Poiitidis Date: Tue, 25 Apr 2017 13:20:33 +0200 Subject: [PATCH] Cleanup/optimize ta_thd_data32_i - Use 128-byte copies - read pcw from memory --- core/hw/pvr/ta.cpp | 50 +++++++++++++++++++++++++++++----------------- 1 file changed, 32 insertions(+), 18 deletions(-) diff --git a/core/hw/pvr/ta.cpp b/core/hw/pvr/ta.cpp index 23c4d3db0..a0c72cd82 100644 --- a/core/hw/pvr/ta.cpp +++ b/core/hw/pvr/ta.cpp @@ -18,7 +18,19 @@ extern u32 SQW,DMAW; #else #define SQWC(x) #define DMAWC(x) +#endif +#if HOST_CPU == CPU_X86 +#include +typedef __m128 simd128_t; +#elif HOST_CPU == CPU_ARM && defined(__ARM_NEON__) +#include +typedef uint64x2_t simd128_t; +#else +struct simd128_t +{ +DECL_ALIGN(32) u64 data[2]; +}; #endif /* @@ -273,34 +285,36 @@ void ta_vtx_SoftReset() ta_cur_state=TAS_NS; } - INLINE void DYNACALL ta_thd_data32_i(void* data) -{ - f64* dst=(f64*)ta_tad.thd_data; - f64* src=(f64*)data; +{ + simd128_t* dst = (simd128_t*)ta_tad.thd_data; + simd128_t* src = (simd128_t*)data; - ta_tad.thd_data+=32; + PCW pcw = *(PCW*)src; + + dst[0] = src[0]; + dst[1] = src[1]; - f64 t = src[0]; - dst[0]=t; - dst[1]=src[1]; - dst[2]=src[2]; - dst[3]=src[3]; + ta_tad.thd_data += 32; - PCW pcw=(PCW&)t; - u32 state_in = (ta_cur_state<<8) | (pcw.ParaType<<5) | (pcw.obj_ctrl>>2)%32; - - u8 trans = ta_fsm[state_in]; - ta_cur_state = (ta_state)trans; - bool must_handle=trans&0xF0; + u32 state_in = (ta_cur_state << 8) | (pcw.ParaType << 5) | ((pcw.obj_ctrl >> 2) & 31); + + u32 trans = ta_fsm[state_in]; + ta_cur_state = (ta_state)trans; + bool must_handle = trans & 0xF0; - if (unlikely(must_handle)) + if (!unlikely(must_handle)) + { + return; + } + else + { ta_handle_cmd(trans); + } } - void DYNACALL ta_vtx_data32(void* data) { SQWC(1);