From 37379d5d1dffca271b35c968a27eb01355fb8ce3 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Sun, 20 Nov 2016 19:21:24 +0100 Subject: [PATCH] gsdx sw x64: shuffle memory allocation tex address is a3 vm address is a1 Could help to avoid REX prefix Reduce prologue/epilogue register copy Byte code size 41893 => 38912 (on my testcase) --- .../GSDrawScanlineCodeGenerator.x64.avx.cpp | 47 ++++++++++--------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/plugins/GSdx/GSDrawScanlineCodeGenerator.x64.avx.cpp b/plugins/GSdx/GSDrawScanlineCodeGenerator.x64.avx.cpp index cb35f22881..1371ccb90c 100644 --- a/plugins/GSdx/GSDrawScanlineCodeGenerator.x64.avx.cpp +++ b/plugins/GSdx/GSDrawScanlineCodeGenerator.x64.avx.cpp @@ -26,11 +26,11 @@ #if _M_SSE < 0x501 && (defined(_M_AMD64) || defined(_WIN64)) // Ease the reading of the code -#define _m_local r11 -#define _m_local__gd r12 -#define _m_local__gd__vm r13 -#define _m_local__gd__clut r14 -#define _m_local__gd__tex r15 +#define _m_local r12 +#define _m_local__gd r13 +#define _m_local__gd__vm a1 +#define _m_local__gd__clut r11 +#define _m_local__gd__tex a3 // More pretty name #define _z xmm8 #define _f xmm9 @@ -89,23 +89,22 @@ void GSDrawScanlineCodeGenerator::Generate_AVX() // No reservation on the stack as a red zone is available push(rbp); mov(ptr[rsp + _rz_rbx], rbx); - mov(ptr[rsp + _rz_r12], r12); - mov(ptr[rsp + _rz_r13], r13); - if(need_clut) - mov(ptr[rsp + _rz_r14], r14); - if(need_tex) - mov(ptr[rsp + _rz_r15], r15); + if (!m_rip) + { + mov(ptr[rsp + _rz_r12], r12); + mov(ptr[rsp + _rz_r13], r13); + } #endif mov(r10, (size_t)&m_test[0]); - mov(_m_local, (size_t)&m_local); - mov(_m_local__gd, _rip_local(gd)); + if (!m_rip) + { + mov(_m_local, (size_t)&m_local); + mov(_m_local__gd, _rip_local(gd)); + } - mov(_m_local__gd__vm, _rip_global(vm)); if(need_clut) mov(_m_local__gd__clut, _rip_global(clut)); - if(need_tex) - mov(_m_local__gd__tex, _rip_global(tex)); Init_AVX(); @@ -272,12 +271,11 @@ L("exit"); pop(rbx); #else mov(rbx, ptr[rsp + _rz_rbx]); - mov(r12, ptr[rsp + _rz_r12]); - mov(r13, ptr[rsp + _rz_r13]); - if(need_clut) - mov(r14, ptr[rsp + _rz_r14]); - if(need_tex) - mov(r15, ptr[rsp + _rz_r15]); + if (!m_rip) + { + mov(r12, ptr[rsp + _rz_r12]); + mov(r13, ptr[rsp + _rz_r13]); + } pop(rbp); #endif @@ -482,13 +480,16 @@ void GSDrawScanlineCodeGenerator::Init_AVX() } } - if(m_sel.fwrite && m_sel.fpsm == 2 && m_sel.dthe) { // On linux, a2 is edx which will be used for fzm // In all case, it will require a mov in dthe code, so let's keep the value on the stack mov(ptr[rsp + _rz_top], a2); } + + mov(_m_local__gd__vm, _rip_global(vm)); + if(m_sel.fb && m_sel.tfx != TFX_NONE) + mov(_m_local__gd__tex, _rip_global(tex)); } void GSDrawScanlineCodeGenerator::Step_AVX()