From 6018936dc272c72ec4e9306bdf14423332f4cb5a Mon Sep 17 00:00:00 2001 From: Stenzek Date: Sun, 25 Dec 2022 22:14:30 +1000 Subject: [PATCH] x86/microVU: Optimize loadstores to offsets from vi00 --- pcsx2/x86/microVU_Lower.inl | 133 +++++++++++++++++++----------------- pcsx2/x86/microVU_Misc.inl | 21 ++++++ 2 files changed, 92 insertions(+), 62 deletions(-) diff --git a/pcsx2/x86/microVU_Lower.inl b/pcsx2/x86/microVU_Lower.inl index 2fcb3122c2..d68313baee 100644 --- a/pcsx2/x86/microVU_Lower.inl +++ b/pcsx2/x86/microVU_Lower.inl @@ -1077,14 +1077,17 @@ mVUop(mVU_ILW) pass2 { void* ptr = mVU.regs().Mem + offsetSS; - - mVU.regAlloc->moveVIToGPR(gprT1, _Is_); - if (_Imm11_ != 0) - xADD(gprT1, _Imm11_); - mVUaddrFix(mVU, gprT1q); + std::optional optaddr(mVUoptimizeConstantAddr(mVU, _Is_, _Imm11_, offsetSS)); + if (!optaddr.has_value()) + { + mVU.regAlloc->moveVIToGPR(gprT1, _Is_); + if (_Imm11_ != 0) + xADD(gprT1, _Imm11_); + mVUaddrFix(mVU, gprT1q); + } const xRegister32& regT = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI); - xMOVZX(regT, ptr16[xComplexAddress(gprT2q, ptr, gprT1q)]); + xMOVZX(regT, ptr16[optaddr.has_value() ? optaddr.value() : xComplexAddress(gprT2q, ptr, gprT1q)]); mVU.regAlloc->clearNeeded(regT); mVU.profiler.EmitOp(opILW); } @@ -1128,40 +1131,6 @@ mVUop(mVU_ILWR) // ISW/ISWR //------------------------------------------------------------------ -static void writeBackISW(microVU& mVU, void* base_ptr, xAddressReg reg, const xRegister32& val) -{ - if (!reg.IsEmpty() && (sptr)base_ptr != (s32)(sptr)base_ptr) - { - int register_offset = -1; - auto writeBackAt = [&](int offset) { - if (register_offset == -1) - { - xLEA(gprT2q, ptr[(void*)((sptr)base_ptr + offset)]); - register_offset = offset; - } - xMOV(ptr32[gprT2q + reg + (offset - register_offset)], val); - }; - if (_X) writeBackAt(0); - if (_Y) writeBackAt(4); - if (_Z) writeBackAt(8); - if (_W) writeBackAt(12); - } - else if (reg.IsEmpty()) - { - if (_X) xMOV(ptr32[(void*)((uptr)base_ptr )], val); - if (_Y) xMOV(ptr32[(void*)((uptr)base_ptr + 4)], val); - if (_Z) xMOV(ptr32[(void*)((uptr)base_ptr + 8)], val); - if (_W) xMOV(ptr32[(void*)((uptr)base_ptr + 12)], val); - } - else - { - if (_X) xMOV(ptr32[base_ptr+reg ], val); - if (_Y) xMOV(ptr32[base_ptr+reg + 4], val); - if (_Z) xMOV(ptr32[base_ptr+reg + 8], val); - if (_W) xMOV(ptr32[base_ptr+reg + 12], val); - } -} - mVUop(mVU_ISW) { pass1 @@ -1172,16 +1141,22 @@ mVUop(mVU_ISW) } pass2 { - void* ptr = mVU.regs().Mem; - - mVU.regAlloc->moveVIToGPR(gprT1, _Is_); - if (_Imm11_ != 0) - xADD(gprT1, _Imm11_); - mVUaddrFix(mVU, gprT1q); + std::optional optaddr(mVUoptimizeConstantAddr(mVU, _Is_, _Imm11_, 0)); + if (!optaddr.has_value()) + { + mVU.regAlloc->moveVIToGPR(gprT1, _Is_); + if (_Imm11_ != 0) + xADD(gprT1, _Imm11_); + mVUaddrFix(mVU, gprT1q); + } // If regT is dirty, the high bits might not be zero. const xRegister32& regT = mVU.regAlloc->allocGPR(_It_, -1, false, true); - writeBackISW(mVU, ptr, gprT1q, regT); + const xAddressVoid ptr(optaddr.has_value() ? optaddr.value() : xComplexAddress(gprT2q, mVU.regs().Mem, gprT1q)); + if (_X) xMOV(ptr32[ptr], regT); + if (_Y) xMOV(ptr32[ptr + 4], regT); + if (_Z) xMOV(ptr32[ptr + 8], regT); + if (_W) xMOV(ptr32[ptr + 12], regT); mVU.regAlloc->clearNeeded(regT); mVU.profiler.EmitOp(opISW); } @@ -1198,7 +1173,7 @@ mVUop(mVU_ISWR) } pass2 { - void* ptr = mVU.regs().Mem; + void* base = mVU.regs().Mem; xAddressReg is = xEmptyReg; if (_Is_) { @@ -1207,7 +1182,36 @@ mVUop(mVU_ISWR) is = gprT1q; } const xRegister32& regT = mVU.regAlloc->allocGPR(_It_, -1, false, true); - writeBackISW(mVU, ptr, is, regT); + if (!is.IsEmpty() && (sptr)base != (s32)(sptr)base) + { + int register_offset = -1; + auto writeBackAt = [&](int offset) { + if (register_offset == -1) + { + xLEA(gprT2q, ptr[(void*)((sptr)base + offset)]); + register_offset = offset; + } + xMOV(ptr32[gprT2q + is + (offset - register_offset)], regT); + }; + if (_X) writeBackAt(0); + if (_Y) writeBackAt(4); + if (_Z) writeBackAt(8); + if (_W) writeBackAt(12); + } + else if (is.IsEmpty()) + { + if (_X) xMOV(ptr32[(void*)((uptr)base)], regT); + if (_Y) xMOV(ptr32[(void*)((uptr)base + 4)], regT); + if (_Z) xMOV(ptr32[(void*)((uptr)base + 8)], regT); + if (_W) xMOV(ptr32[(void*)((uptr)base + 12)], regT); + } + else + { + if (_X) xMOV(ptr32[base + is], regT); + if (_Y) xMOV(ptr32[base + is + 4], regT); + if (_Z) xMOV(ptr32[base + is + 8], regT); + if (_W) xMOV(ptr32[base + is + 12], regT); + } mVU.regAlloc->clearNeeded(regT); mVU.profiler.EmitOp(opISWR); @@ -1224,14 +1228,17 @@ mVUop(mVU_LQ) pass1 { mVUanalyzeLQ(mVU, _Ft_, _Is_, false); } pass2 { - void* ptr = mVU.regs().Mem; - mVU.regAlloc->moveVIToGPR(gprT1, _Is_); - if (_Imm11_ != 0) - xADD(gprT1, _Imm11_); - mVUaddrFix(mVU, gprT1q); + const std::optional optaddr(mVUoptimizeConstantAddr(mVU, _Is_, _Imm11_, 0)); + if (!optaddr.has_value()) + { + mVU.regAlloc->moveVIToGPR(gprT1, _Is_); + if (_Imm11_ != 0) + xADD(gprT1, _Imm11_); + mVUaddrFix(mVU, gprT1q); + } const xmm& Ft = mVU.regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W); - mVUloadReg(Ft, xComplexAddress(gprT2q, ptr, gprT1q), _X_Y_Z_W); + mVUloadReg(Ft, optaddr.has_value() ? optaddr.value() : xComplexAddress(gprT2q, mVU.regs().Mem, gprT1q), _X_Y_Z_W); mVU.regAlloc->clearNeeded(Ft); mVU.profiler.EmitOp(opLQ); } @@ -1315,15 +1322,17 @@ mVUop(mVU_SQ) pass1 { mVUanalyzeSQ(mVU, _Fs_, _It_, false); } pass2 { - void* ptr = mVU.regs().Mem; - - mVU.regAlloc->moveVIToGPR(gprT1, _It_); - if (_Imm11_ != 0) - xADD(gprT1, _Imm11_); - mVUaddrFix(mVU, gprT1q); + const std::optional optptr(mVUoptimizeConstantAddr(mVU, _It_, _Imm11_, 0)); + if (!optptr.has_value()) + { + mVU.regAlloc->moveVIToGPR(gprT1, _It_); + if (_Imm11_ != 0) + xADD(gprT1, _Imm11_); + mVUaddrFix(mVU, gprT1q); + } const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, _XYZW_PS ? -1 : 0, _X_Y_Z_W); - mVUsaveReg(Fs, xComplexAddress(gprT2q, ptr, gprT1q), _X_Y_Z_W, 1); + mVUsaveReg(Fs, optptr.has_value() ? optptr.value() : xComplexAddress(gprT2q, mVU.regs().Mem, gprT1q), _X_Y_Z_W, 1); mVU.regAlloc->clearNeeded(Fs); mVU.profiler.EmitOp(opSQ); } diff --git a/pcsx2/x86/microVU_Misc.inl b/pcsx2/x86/microVU_Misc.inl index cd8e57c2f6..9bb2a6e4ab 100644 --- a/pcsx2/x86/microVU_Misc.inl +++ b/pcsx2/x86/microVU_Misc.inl @@ -15,6 +15,7 @@ #pragma once #include +#include //------------------------------------------------------------------ // Micro VU - Reg Loading/Saving/Shuffling/Unpacking/Merging... @@ -337,6 +338,26 @@ __fi void mVUaddrFix(mV, const xAddressReg& gprReg) } } +__fi std::optional mVUoptimizeConstantAddr(mV, u32 srcreg, s32 offset, s32 offsetSS_) +{ + // if we had const prop for VIs, we could do that here.. + if (srcreg != 0) + return std::nullopt; + + const s32 addr = 0 + offset; + if (isVU1) + { + return ptr[mVU.regs().Mem + ((addr & 0x3FFu) << 4) + offsetSS_]; + } + else + { + if (addr & 0x400) + return std::nullopt; + + return ptr[mVU.regs().Mem + ((addr & 0xFFu) << 4) + offsetSS_]; + } +} + //------------------------------------------------------------------ // Micro VU - Custom SSE Instructions //------------------------------------------------------------------