diff --git a/bin/GameIndex.dbf b/bin/GameIndex.dbf index ce917b0e91..958f87de2b 100644 --- a/bin/GameIndex.dbf +++ b/bin/GameIndex.dbf @@ -56,7 +56,6 @@ -- Game Fixes (gameFixName = 1) --------------------------------------------- -- VuAddSubHack = 1 // Tri-ace games, they use an encryption algorithm that requires VU ADDI opcode to be bit-accurate. --- VuClipFlagHack = 1 // Persona games, maybe others. It's to do with the VU clip flag (sVU-only). -- FpuCompareHack = 1 // Digimon Rumble Arena 2, fixes spinning/hanging on intro-menu. -- FpuMulHack = 1 // Tales of Destiny hangs. -- FpuNegDivHack = 1 // Gundam games messed up camera-view. Dakar 2's sky showing over 3D. Others... @@ -3125,7 +3124,6 @@ Region = NTSC-K Serial = SCKA-20099 Name = Persona 3 Region = NTSC-K -VuClipFlagHack = 1 --------------------------------------------- Serial = SCKA-20101 Name = Seiken Densetsu 4 (Dawn of Mana) @@ -3139,7 +3137,6 @@ Region = NTSC-K Serial = SCKA-20109 Name = Persona 3 FES [Independent Starting Version] Region = NTSC-K -VuClipFlagHack = 1 MemCardFilter = SCKA-20109/SCKA-20099 --------------------------------------------- Serial = SCKA-20114 @@ -3158,7 +3155,6 @@ Serial = SCKA-20132 Name = Shin Megami Tensei: Persona 4 Region = NTSC-K Compat = 5 -VuClipFlagHack = 1 --------------------------------------------- Serial = SCKA-24008 Name = SOCOM - U.S. Navy SEALs @@ -17695,7 +17691,6 @@ Serial = SLES-55018 Name = Shin Megami Tensei: Persona 3 Region = PAL-E Compat = 5 -VuClipFlagHack = 1 --------------------------------------------- Serial = SLES-55019 Name = Ratchet & Clank - Size Matters @@ -18081,7 +18076,6 @@ Compat = 5 Serial = SLES-55354 Name = Shin Megami Tensei: Persona 3 FES Region = PAL-E -VuClipFlagHack = 1 MemCardFilter = SLES-55354/SLES-55018 --------------------------------------------- Serial = SLES-55355 @@ -18233,7 +18227,6 @@ Region = PAL-M5 Serial = SLES-55474 Name = Shin Megami Tensei: Persona 4 Region = PAL-E -VuClipFlagHack = 1 --------------------------------------------- Serial = SLES-55476 Name = Scooby-Doo! First Frights @@ -27428,7 +27421,6 @@ Serial = SLPM-66445 Name = Persona 3 Region = NTSC-J Compat = 5 -VuClipFlagHack = 1 --------------------------------------------- Serial = SLPM-66446 Name = Beat Down - Fists of Vengeance [CapKore] @@ -28359,13 +28351,11 @@ Region = NTSC-J Serial = SLPM-66689 Name = Persona 3 FES [Append Edition] Region = NTSC-J -VuClipFlagHack = 1 MemCardFilter = SLPM-66445/SLPM-66689/SLPM-66690 --------------------------------------------- Serial = SLPM-66690 Name = Persona 3 FES [Independent Starting Version] Region = NTSC-J -VuClipFlagHack = 1 MemCardFilter = SLPM-66445/SLPM-66689/SLPM-66690 --------------------------------------------- Serial = SLPM-66691 @@ -29447,7 +29437,6 @@ Serial = SLPM-66978 Name = Persona 4 [Konami-style Special Edition] Region = NTSC-J Compat = 5 -VuClipFlagHack = 1 --------------------------------------------- Serial = SLPM-66987 Name = Kowloon Youma Gakuen Ki [Best Version] @@ -29923,12 +29912,10 @@ Serial = SLPM-74277 Name = Persona 3 FES [PlayStation 2 The Best] Region = NTSC-J Compat = 5 -VuClipFlagHack = 1 --------------------------------------------- Serial = SLPM-74278 Name = Persona 4 [PlayStation 2 The Best] Region = NTSC-J -VuClipFlagHack = 1 --------------------------------------------- Serial = SLPM-74286 Name = Shin Sangoku Musou 5 Special [PlayStation 2 The Best] @@ -42478,7 +42465,6 @@ Serial = SLUS-21569 Name = Shin Megami Tensei: Persona 3 Region = NTSC-U Compat = 5 -VuClipFlagHack = 1 --------------------------------------------- Serial = SLUS-21570 Name = Heatseeker @@ -42748,7 +42734,6 @@ Serial = SLUS-21621 Name = Shin Megami Tensei: Persona 3 FES Region = NTSC-U Compat = 5 -VuClipFlagHack = 1 MemCardFilter = SLUS-21621/SLUS-21569 --------------------------------------------- Serial = SLUS-21622 @@ -43525,13 +43510,11 @@ Serial = SLUS-21782 Name = Shin Megami Tensei: Persona 4 Region = NTSC-U Compat = 5 -VuClipFlagHack = 1 --------------------------------------------- Serial = SLUS-21782B Name = Shin Megami Tensei: Persona 4 Region = NTSC-U Compat = 5 -VuClipFlagHack = 1 --------------------------------------------- Serial = SLUS-21783 Name = Space Chimps diff --git a/build.sh b/build.sh index 8c04d5d3a6..0791cd9656 100755 --- a/build.sh +++ b/build.sh @@ -126,7 +126,7 @@ run_clangtidy() compile_json=compile_commands.json cpp_list=cpp_file.txt summary=clang_tidy_summary.txt - grep '"file"' $compile_json | sed -e 's/"//g' -e 's/^\s*file\s*:\s*//' | grep -v "aVUzerorec.S" | sort -u > $cpp_list + grep '"file"' $compile_json | sed -e 's/"//g' -e 's/^\s*file\s*:\s*//' | sort -u > $cpp_list # EXAMPLE # diff --git a/cmake/BuildParameters.cmake b/cmake/BuildParameters.cmake index 8c41725623..e1ca1ee922 100644 --- a/cmake/BuildParameters.cmake +++ b/cmake/BuildParameters.cmake @@ -20,7 +20,6 @@ #------------------------------------------------------------------------------- # Misc option #------------------------------------------------------------------------------- -option(DISABLE_SVU "Disable superVU (don't use it)") option(DISABLE_BUILD_DATE "Disable including the binary compile date") if(DISABLE_BUILD_DATE OR openSUSE) @@ -222,11 +221,6 @@ if(${PCSX2_TARGET_ARCHITECTURES} MATCHES "i386") endif() endif() - # Don't bother porting SuperVU - if (NOT Linux) - set(DISABLE_SVU TRUE) - endif() - add_definitions(-D_ARCH_32=1 -D_M_X86=1 -D_M_X86_32=1) set(_ARCH_32 1) set(_M_X86 1) @@ -235,9 +229,6 @@ elseif(${PCSX2_TARGET_ARCHITECTURES} MATCHES "x86_64") # x86_64 requires -fPIC set(CMAKE_POSITION_INDEPENDENT_CODE ON) - # SuperVU will not be ported - set(DISABLE_SVU TRUE) - if(NOT DEFINED ARCH_FLAG) if (DISABLE_ADVANCE_SIMD) if (USE_ICC) @@ -309,9 +300,6 @@ option(USE_PGO_OPTIMIZE "Enable PGO optimization (use profile)") # Note1: Builtin strcmp/memcmp was proved to be slower on Mesa than stdlib version. # Note2: float operation SSE is impacted by the PCSX2 SSE configuration. In particular, flush to zero denormal. set(COMMON_FLAG "-pipe -fvisibility=hidden -pthread -fno-builtin-strcmp -fno-builtin-memcmp -mfpmath=sse") -if (DISABLE_SVU) - set(COMMON_FLAG "${COMMON_FLAG} -DDISABLE_SVU") -endif() if(USE_VTUNE) set(COMMON_FLAG "${COMMON_FLAG} -DENABLE_VTUNE") diff --git a/pcsx2/CMakeLists.txt b/pcsx2/CMakeLists.txt index 2c7cda13a2..88673bdb2c 100644 --- a/pcsx2/CMakeLists.txt +++ b/pcsx2/CMakeLists.txt @@ -153,7 +153,6 @@ set(pcsx2Headers MTVU.h Memory.h MemoryTypes.h - NakedAsm.h Patch.h PathDefs.h Plugins.h @@ -562,14 +561,6 @@ set(pcsx2x86Sources x86/newVif_Unpack.cpp x86/newVif_UnpackSSE.cpp ) -if (NOT DISABLE_SVU) - set(pcsx2x86Sources ${pcsx2x86Sources} - x86/sVU_Lower.cpp - x86/sVU_Micro.cpp - x86/sVU_Upper.cpp - x86/sVU_zerorec.cpp - ) -endif() # x86 headers set(pcsx2x86Headers @@ -610,20 +601,8 @@ set(pcsx2x86Headers x86/newVif_HashBucket.h x86/newVif_UnpackSSE.h x86/R5900_Profiler.h - x86/sVU_Micro.h - x86/sVU_zerorec.h ) -# collect .S files -if (NOT DISABLE_SVU) - set(pcsx2SSources x86/aVUzerorec.S) - # change language of .S-files to c++ - set_source_files_properties(${pcsx2SSources} PROPERTIES LANGUAGE CXX) -else() - set(pcsx2SSources "") -endif() - - # common Sources set(Common ${pcsx2Sources} @@ -647,8 +626,7 @@ set(Common ${pcsx2x86Sources} ${pcsx2x86Headers} ${pcsx2ZipToolsSources} - ${pcsx2ZipToolsHeaders} - ${pcsx2SSources}) + ${pcsx2ZipToolsHeaders}) # platform sources # Linux diff --git a/pcsx2/Config.h b/pcsx2/Config.h index 4c5302140f..614947b963 100644 --- a/pcsx2/Config.h +++ b/pcsx2/Config.h @@ -43,7 +43,6 @@ enum GamefixId GamefixId_FIRST = 0, Fix_VuAddSub = GamefixId_FIRST, - Fix_VuClipFlag, Fix_FpuCompare, Fix_FpuMultiply, Fix_FpuNegDiv, @@ -347,7 +346,6 @@ struct Pcsx2Config BITFIELD32() bool VuAddSubHack : 1, // Tri-ace games, they use an encryption algorithm that requires VU ADDI opcode to be bit-accurate. - VuClipFlagHack : 1, // Persona games, maybe others. It's to do with the VU clip flag (again). FpuCompareHack : 1, // Digimon Rumble Arena 2, fixes spinning/hanging on intro-menu. FpuMulHack : 1, // Tales of Destiny hangs. FpuNegDivHack : 1, // Gundam games messed up camera-view. @@ -537,7 +535,6 @@ TraceLogFilters& SetTraceConfig(); //------------ SPECIAL GAME FIXES!!! --------------- #define CHECK_VUADDSUBHACK (EmuConfig.Gamefixes.VuAddSubHack) // Special Fix for Tri-ace games, they use an encryption algorithm that requires VU addi opcode to be bit-accurate. #define CHECK_FPUCOMPAREHACK (EmuConfig.Gamefixes.FpuCompareHack) // Special Fix for Digimon Rumble Arena 2, fixes spinning/hanging on intro-menu. -#define CHECK_VUCLIPFLAGHACK (EmuConfig.Gamefixes.VuClipFlagHack) // Special Fix for Persona games, maybe others. It's to do with the VU clip flag (again). #define CHECK_FPUMULHACK (EmuConfig.Gamefixes.FpuMulHack) // Special Fix for Tales of Destiny hangs. #define CHECK_FPUNEGDIVHACK (EmuConfig.Gamefixes.FpuNegDivHack) // Special Fix for Gundam games messed up camera-view. #define CHECK_XGKICKHACK (EmuConfig.Gamefixes.XgKickHack) // Special Fix for Erementar Gerad, adds more delay to VU XGkick instructions. Corrects the color of some graphics. diff --git a/pcsx2/NakedAsm.h b/pcsx2/NakedAsm.h deleted file mode 100644 index b8cd585c40..0000000000 --- a/pcsx2/NakedAsm.h +++ /dev/null @@ -1,30 +0,0 @@ -/* PCSX2 - PS2 Emulator for PCs - * Copyright (C) 2002-2010 PCSX2 Dev Team - * - * PCSX2 is free software: you can redistribute it and/or modify it under the terms - * of the GNU Lesser General Public License as published by the Free Software Found- - * ation, either version 3 of the License, or (at your option) any later version. - * - * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; - * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR - * PURPOSE. See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along with PCSX2. - * If not, see . - */ - - // Externs for various routines that are defined in assembly files on Linux. -#ifndef NAKED_ASM_H -#define NAKED_ASM_H - -#ifdef __POSIX__ - -extern "C" -{ - // aVUzerorec.S - void* SuperVUGetProgram(u32 startpc, int vuindex); - void SuperVUCleanupProgram(u32 startpc, int vuindex); -} -#endif - -#endif diff --git a/pcsx2/Pcsx2Config.cpp b/pcsx2/Pcsx2Config.cpp index 8138720158..76d167f77b 100644 --- a/pcsx2/Pcsx2Config.cpp +++ b/pcsx2/Pcsx2Config.cpp @@ -316,7 +316,6 @@ void Pcsx2Config::GamefixOptions::Set( GamefixId id, bool enabled ) switch(id) { case Fix_VuAddSub: VuAddSubHack = enabled; break; - case Fix_VuClipFlag: VuClipFlagHack = enabled; break; case Fix_FpuCompare: FpuCompareHack = enabled; break; case Fix_FpuMultiply: FpuMulHack = enabled; break; case Fix_FpuNegDiv: FpuNegDivHack = enabled; break; @@ -343,7 +342,6 @@ bool Pcsx2Config::GamefixOptions::Get( GamefixId id ) const switch(id) { case Fix_VuAddSub: return VuAddSubHack; - case Fix_VuClipFlag: return VuClipFlagHack; case Fix_FpuCompare: return FpuCompareHack; case Fix_FpuMultiply: return FpuMulHack; case Fix_FpuNegDiv: return FpuNegDivHack; @@ -370,7 +368,6 @@ void Pcsx2Config::GamefixOptions::LoadSave( IniInterface& ini ) ScopedIniGroup path( ini, L"Gamefixes" ); IniBitBool( VuAddSubHack ); - IniBitBool( VuClipFlagHack ); IniBitBool( FpuCompareHack ); IniBitBool( FpuMulHack ); IniBitBool( FpuNegDivHack ); diff --git a/pcsx2/System.cpp b/pcsx2/System.cpp index 7a950a0205..5331bb4084 100644 --- a/pcsx2/System.cpp +++ b/pcsx2/System.cpp @@ -101,8 +101,7 @@ RecompiledCodeReserve& RecompiledCodeReserve::SetProfilerName( const wxString& s return *this; } -// This error message is shared by R5900, R3000, and microVU recompilers. It is not used by the -// SuperVU recompiler, since it has its own customized message. +// This error message is shared by R5900, R3000, and microVU recompilers. void RecompiledCodeReserve::ThrowIfNotOk() const { if (IsOk()) return; @@ -334,12 +333,6 @@ CpuInitializer< CpuType >::~CpuInitializer() class CpuInitializerSet { public: -#ifndef DISABLE_SVU - // Note: Allocate sVU first -- it's the most picky. - CpuInitializer superVU0; - CpuInitializer superVU1; -#endif - CpuInitializer microVU0; CpuInitializer microVU1; @@ -501,14 +494,6 @@ bool SysCpuProviderPack::IsRecAvailable_MicroVU1() const { return CpuProviders-> BaseException* SysCpuProviderPack::GetException_MicroVU0() const { return CpuProviders->microVU0.ExThrown.get(); } BaseException* SysCpuProviderPack::GetException_MicroVU1() const { return CpuProviders->microVU1.ExThrown.get(); } -#ifndef DISABLE_SVU -bool SysCpuProviderPack::IsRecAvailable_SuperVU0() const { return CpuProviders->superVU0.IsAvailable(); } -bool SysCpuProviderPack::IsRecAvailable_SuperVU1() const { return CpuProviders->superVU1.IsAvailable(); } -BaseException* SysCpuProviderPack::GetException_SuperVU0() const { return CpuProviders->superVU0.ExThrown.get(); } -BaseException* SysCpuProviderPack::GetException_SuperVU1() const { return CpuProviders->superVU1.ExThrown.get(); } -#endif - - void SysCpuProviderPack::CleanupMess() noexcept { try @@ -534,15 +519,8 @@ bool SysCpuProviderPack::HadSomeFailures( const Pcsx2Config::RecompilerOptions& { return (recOpts.EnableEE && !IsRecAvailable_EE()) || (recOpts.EnableIOP && !IsRecAvailable_IOP()) || -#ifndef DISABLE_SVU - (recOpts.EnableVU0 && recOpts.UseMicroVU0 && !IsRecAvailable_MicroVU0()) || - (recOpts.EnableVU1 && recOpts.UseMicroVU0 && !IsRecAvailable_MicroVU1()) || - (recOpts.EnableVU0 && !recOpts.UseMicroVU0 && !IsRecAvailable_SuperVU0()) || - (recOpts.EnableVU1 && !recOpts.UseMicroVU1 && !IsRecAvailable_SuperVU1()) -#else (recOpts.EnableVU0 && !IsRecAvailable_MicroVU0()) || (recOpts.EnableVU1 && !IsRecAvailable_MicroVU1()) -#endif ; } @@ -559,32 +537,12 @@ void SysCpuProviderPack::ApplyConfig() const CpuVU1 = CpuProviders->interpVU1; if( EmuConfig.Cpu.Recompiler.EnableVU0 ) -#ifndef DISABLE_SVU - CpuVU0 = EmuConfig.Cpu.Recompiler.UseMicroVU0 ? (BaseVUmicroCPU*)CpuProviders->microVU0 : (BaseVUmicroCPU*)CpuProviders->superVU0; -#else CpuVU0 = (BaseVUmicroCPU*)CpuProviders->microVU0; -#endif if( EmuConfig.Cpu.Recompiler.EnableVU1 ) -#ifndef DISABLE_SVU - CpuVU1 = EmuConfig.Cpu.Recompiler.UseMicroVU1 ? (BaseVUmicroCPU*)CpuProviders->microVU1 : (BaseVUmicroCPU*)CpuProviders->superVU1; -#else CpuVU1 = (BaseVUmicroCPU*)CpuProviders->microVU1; -#endif } -#ifndef DISABLE_SVU -// This is a semi-hacky function for convenience -BaseVUmicroCPU* SysCpuProviderPack::getVUprovider(int whichProvider, int vuIndex) const { - switch (whichProvider) { - case 0: return vuIndex ? (BaseVUmicroCPU*)CpuProviders->interpVU1 : (BaseVUmicroCPU*)CpuProviders->interpVU0; - case 1: return vuIndex ? (BaseVUmicroCPU*)CpuProviders->superVU1 : (BaseVUmicroCPU*)CpuProviders->superVU0; - case 2: return vuIndex ? (BaseVUmicroCPU*)CpuProviders->microVU1 : (BaseVUmicroCPU*)CpuProviders->microVU0; - } - return NULL; -} -#endif - // Resets all PS2 cpu execution caches, which does not affect that actual PS2 state/condition. // This can be called at any time outside the context of a Cpu->Execute() block without // bad things happening (recompilers will slow down for a brief moment since rec code blocks diff --git a/pcsx2/System.h b/pcsx2/System.h index 0a3ef3b044..6875f6cee9 100644 --- a/pcsx2/System.h +++ b/pcsx2/System.h @@ -41,11 +41,6 @@ class RecompiledCodeReserve; namespace HostMemoryMap { - // superVU is OLD SCHOOL, and it requires its allocation to be in the lower 256mb - // of the virtual memory space. (8mb each) - static const uptr sVU0rec = _256mb - (_8mb*3); - static const uptr sVU1rec = _256mb - (_8mb*2); - #ifdef ASAN_WORKAROUND // address sanitizer uses a shadow memory to monitor the state of the memory. Shadow is computed // as S = (M >> 3) + 0x20000000. So PCSX2 can't use 0x20000000 to 0x3FFFFFFF... Just add another @@ -141,9 +136,6 @@ public: virtual ~SysCpuProviderPack(); void ApplyConfig() const; -#ifndef DISABLE_SVU - BaseVUmicroCPU* getVUprovider(int whichProvider, int vuIndex) const; -#endif bool HadSomeFailures( const Pcsx2Config::RecompilerOptions& recOpts ) const; @@ -158,11 +150,6 @@ public: BaseException* GetException_MicroVU0() const; BaseException* GetException_MicroVU1() const; - bool IsRecAvailable_SuperVU0() const; - bool IsRecAvailable_SuperVU1() const; - BaseException* GetException_SuperVU0() const; - BaseException* GetException_SuperVU1() const; - protected: void CleanupMess() noexcept; }; diff --git a/pcsx2/VU.h b/pcsx2/VU.h index 360c4ac738..f668064516 100644 --- a/pcsx2/VU.h +++ b/pcsx2/VU.h @@ -131,7 +131,7 @@ struct __aligned16 VURegs { u32 cycle; u32 flags; - // Current opcode being interpreted or recompiled (this var is used by Interps and superVU + // Current opcode being interpreted or recompiled (this var is used by Interps // but not microVU. Would like to have it local to their respective classes... someday) u32 code; @@ -142,7 +142,7 @@ struct __aligned16 VURegs { u32 delaybranchpc; bool takedelaybranch; - // MAC/Status flags -- these are used by interpreters and superVU, but are kind of hacky + // MAC/Status flags -- these are used by interpreters but are kind of hacky // and shouldn't be relied on for any useful/valid info. Would like to move them out of // this struct eventually. u32 macflag; diff --git a/pcsx2/VUmicro.h b/pcsx2/VUmicro.h index 240dd9bb26..8c6104ae91 100644 --- a/pcsx2/VUmicro.h +++ b/pcsx2/VUmicro.h @@ -254,47 +254,6 @@ public: void SetCacheReserve( uint reserveInMegs ) const; }; -// -------------------------------------------------------------------------------------- -// recSuperVU0 / recSuperVU1 -// -------------------------------------------------------------------------------------- - -class recSuperVU0 : public BaseVUmicroCPU -{ -public: - recSuperVU0(); - - const char* GetShortName() const { return "sVU0"; } - wxString GetLongName() const { return L"SuperVU0 Recompiler"; } - - void Reserve(); - void Shutdown() noexcept; - void Reset(); - void Execute(u32 cycles); - void Clear(u32 Addr, u32 Size); - - uint GetCacheReserve() const; - void SetCacheReserve( uint reserveInMegs ) const; -}; - -class recSuperVU1 : public BaseVUmicroCPU -{ -public: - recSuperVU1(); - - const char* GetShortName() const { return "sVU1"; } - wxString GetLongName() const { return L"SuperVU1 Recompiler"; } - - void Reserve(); - void Shutdown() noexcept; - void Reset(); - void Execute(u32 cycles); - void Clear(u32 Addr, u32 Size); - void ResumeXGkick() { Console.Warning("ResumeXGkick() Not implemented!"); } - - uint GetCacheReserve() const; - void SetCacheReserve( uint reserveInMegs ) const; -}; - extern BaseVUmicroCPU* CpuVU0; extern BaseVUmicroCPU* CpuVU1; diff --git a/pcsx2/gui/AppCoreThread.cpp b/pcsx2/gui/AppCoreThread.cpp index 1d1684ba53..260abb4cbd 100644 --- a/pcsx2/gui/AppCoreThread.cpp +++ b/pcsx2/gui/AppCoreThread.cpp @@ -283,10 +283,6 @@ static int loadGameSettings(Pcsx2Config& dest, const Game_Data& game) { if (game.keyExists(key)) { - // VuClipFlag hack is only used by the SuperVu1 Recompiler. - if (id == Fix_VuClipFlag && (!g_Conf->EmuOptions.Cpu.Recompiler.EnableVU1 || g_Conf->EmuOptions.Cpu.Recompiler.UseMicroVU1)) - continue; - bool enableIt = game.getBool(key); dest.Gamefixes.Set(id, enableIt); PatchesCon->WriteLn(L"(GameDB) %s Gamefix: " + key, enableIt ? L"Enabled" : L"Disabled"); diff --git a/pcsx2/gui/AppInit.cpp b/pcsx2/gui/AppInit.cpp index a76b2ad16e..a39c9a586d 100644 --- a/pcsx2/gui/AppInit.cpp +++ b/pcsx2/gui/AppInit.cpp @@ -184,40 +184,16 @@ void Pcsx2App::AllocateCoreStuffs() { scrollableTextArea->AppendText( L"* microVU0\n\t" + ex->FormatDisplayMessage() + L"\n\n" ); recOps.UseMicroVU0 = false; -#ifndef DISABLE_SVU - recOps.EnableVU0 = recOps.EnableVU0 && m_CpuProviders->IsRecAvailable_SuperVU0(); -#else recOps.EnableVU0 = false; -#endif } if( BaseException* ex = m_CpuProviders->GetException_MicroVU1() ) { scrollableTextArea->AppendText( L"* microVU1\n\t" + ex->FormatDisplayMessage() + L"\n\n" ); recOps.UseMicroVU1 = false; -#ifndef DISABLE_SVU - recOps.EnableVU1 = recOps.EnableVU1 && m_CpuProviders->IsRecAvailable_SuperVU1(); -#else recOps.EnableVU1 = false; -#endif } -#ifndef DISABLE_SVU - if( BaseException* ex = m_CpuProviders->GetException_SuperVU0() ) - { - scrollableTextArea->AppendText( L"* SuperVU0\n\t" + ex->FormatDisplayMessage() + L"\n\n" ); - recOps.UseMicroVU0 = m_CpuProviders->IsRecAvailable_MicroVU0(); - recOps.EnableVU0 = recOps.EnableVU0 && recOps.UseMicroVU0; - } - - if( BaseException* ex = m_CpuProviders->GetException_SuperVU1() ) - { - scrollableTextArea->AppendText( L"* SuperVU1\n\t" + ex->FormatDisplayMessage() + L"\n\n" ); - recOps.UseMicroVU1 = m_CpuProviders->IsRecAvailable_MicroVU1(); - recOps.EnableVU1 = recOps.EnableVU1 && recOps.UseMicroVU1; - } -#endif - exconf += exconf.Heading(pxE( L"Note: Recompilers are not necessary for PCSX2 to run, however they typically improve emulation speed substantially. You may have to manually re-enable the recompilers listed above, if you resolve the errors." )); pxIssueConfirmation( exconf, MsgButtons().OK() ); diff --git a/pcsx2/gui/Panels/CpuPanel.cpp b/pcsx2/gui/Panels/CpuPanel.cpp index 9b4459ce03..115e56f1cc 100644 --- a/pcsx2/gui/Panels/CpuPanel.cpp +++ b/pcsx2/gui/Panels/CpuPanel.cpp @@ -186,12 +186,7 @@ Panels::CpuPanelVU::CpuPanelVU( wxWindow* parent ) .SetToolTip(_("Vector Unit Interpreter. Slow and not very compatible. Only use for diagnostics.")), RadioPanelItem(_("microVU Recompiler")) - .SetToolTip(_("New Vector Unit recompiler with much improved compatibility. Recommended.")), - -#ifndef DISABLE_SVU - RadioPanelItem(_("superVU Recompiler [legacy]")) - .SetToolTip(_("Useful for diagnosing bugs or clamping issues in the new mVU recompiler.")) -#endif + .SetToolTip(_("New Vector Unit recompiler with much improved compatibility. Recommended.")) }; m_panel_VU0 = &(new pxRadioPanel( this, tbl_CpuTypes_VU )) ->SetDefaultItem( 1 ); @@ -280,11 +275,6 @@ void Panels::CpuPanelVU::Apply() Pcsx2Config::RecompilerOptions& recOps( g_Conf->EmuOptions.Cpu.Recompiler ); recOps.EnableVU0 = m_panel_VU0->GetSelection() > 0; recOps.EnableVU1 = m_panel_VU1->GetSelection() > 0; - -#ifndef DISABLE_SVU - recOps.UseMicroVU0 = m_panel_VU0->GetSelection() == 1; - recOps.UseMicroVU1 = m_panel_VU1->GetSelection() == 1; -#endif } void Panels::CpuPanelVU::AppStatusEvent_OnSettingsApplied() @@ -295,24 +285,8 @@ void Panels::CpuPanelVU::AppStatusEvent_OnSettingsApplied() void Panels::CpuPanelVU::ApplyConfigToGui( AppConfig& configToApply, int flags ) { Pcsx2Config::RecompilerOptions& recOps( configToApply.EmuOptions.Cpu.Recompiler ); -#ifndef DISABLE_SVU - if( recOps.UseMicroVU0 ) - m_panel_VU0->SetSelection( recOps.EnableVU0 ? 1 : 0 ); - else - m_panel_VU0->SetSelection( recOps.EnableVU0 ? 2 : 0 ); -#else m_panel_VU0->SetSelection( recOps.EnableVU0 ? 1 : 0 ); -#endif - -#ifndef DISABLE_SVU - if( recOps.UseMicroVU1 ) - m_panel_VU1->SetSelection( recOps.EnableVU1 ? 1 : 0 ); - else - m_panel_VU1->SetSelection( recOps.EnableVU1 ? 2 : 0 ); -#else m_panel_VU1->SetSelection( recOps.EnableVU1 ? 1 : 0 ); -#endif - m_panel_VU0->Enable(!configToApply.EnablePresets); m_panel_VU1->Enable(!configToApply.EnablePresets); m_button_RestoreDefaults->Enable(!configToApply.EnablePresets); diff --git a/pcsx2/gui/Panels/GameFixesPanel.cpp b/pcsx2/gui/Panels/GameFixesPanel.cpp index fc8b0d3410..3368c43b7d 100644 --- a/pcsx2/gui/Panels/GameFixesPanel.cpp +++ b/pcsx2/gui/Panels/GameFixesPanel.cpp @@ -40,10 +40,6 @@ Panels::GameFixesPanel::GameFixesPanel( wxWindow* parent ) _("VU Add Hack - Fixes Tri-Ace games boot crash."), _("Games that need this hack to boot:\n * Star Ocean 3\n * Radiata Stories\n * Valkyrie Profile 2") }, - { - _("VU Clip Flag Hack - For Persona games (SuperVU recompiler only!)"), - wxEmptyString - }, { _("FPU Compare Hack - For Digimon Rumble Arena 2."), wxEmptyString diff --git a/pcsx2/gui/Panels/SpeedhacksPanel.cpp b/pcsx2/gui/Panels/SpeedhacksPanel.cpp index 2d57b7dfb2..6c51419ff2 100644 --- a/pcsx2/gui/Panels/SpeedhacksPanel.cpp +++ b/pcsx2/gui/Panels/SpeedhacksPanel.cpp @@ -176,7 +176,7 @@ Panels::SpeedHacksPanel::SpeedHacksPanel( wxWindow* parent ) m_check_vuThread = new pxCheckBox( vuHacksPanel, _("MTVU (Multi-Threaded microVU1)"), _("Good Speedup and High Compatibility; may cause hanging... [Recommended if 3+ cores]") ); - m_check_vuFlagHack->SetToolTip( pxEt( L"Updates Status Flags only on blocks which will read them, instead of all the time. This is safe most of the time, and Super VU does something similar by default." + m_check_vuFlagHack->SetToolTip( pxEt( L"Updates Status Flags only on blocks which will read them, instead of all the time. This is safe most of the time, and Super VU did something similar by default." ) ); m_check_vuThread->SetToolTip( pxEt( L"Runs VU1 on its own thread (microVU1-only). Generally a speedup on CPUs with 3 or more cores. This is safe for most games, but a few games are incompatible and may hang. In the case of GS limited games, it may be a slowdown (especially on dual core CPUs)." diff --git a/pcsx2/windows/VCprojects/pcsx2.vcxproj b/pcsx2/windows/VCprojects/pcsx2.vcxproj index f8671a2de3..5ea0fb5a9b 100644 --- a/pcsx2/windows/VCprojects/pcsx2.vcxproj +++ b/pcsx2/windows/VCprojects/pcsx2.vcxproj @@ -57,7 +57,6 @@ $(ProjectRootDir)/gui;%(AdditionalIncludeDirectories) - DISABLE_SVU;%(PreprocessorDefinitions) Async Use PrecompiledHeader.h @@ -70,7 +69,6 @@ - @@ -230,18 +228,6 @@ - - true - - - true - - - true - - - true - @@ -462,7 +448,6 @@ - @@ -482,8 +467,6 @@ - - diff --git a/pcsx2/windows/VCprojects/pcsx2.vcxproj.filters b/pcsx2/windows/VCprojects/pcsx2.vcxproj.filters index 3b650d5b07..e8603ce7e4 100644 --- a/pcsx2/windows/VCprojects/pcsx2.vcxproj.filters +++ b/pcsx2/windows/VCprojects/pcsx2.vcxproj.filters @@ -162,9 +162,6 @@ Docs - - System\Linux - System\Ps2\EmotionEngine\VU\Dynarec\microVU @@ -296,18 +293,6 @@ System\Ps2\EmotionEngine\VU\Dynarec\microVU - - System\Ps2\EmotionEngine\VU\Dynarec\Super VU - - - System\Ps2\EmotionEngine\VU\Dynarec\Super VU - - - System\Ps2\EmotionEngine\VU\Dynarec\Super VU - - - System\Ps2\EmotionEngine\VU\Dynarec\Super VU - System\Ps2\EmotionEngine\VU\Interpreter @@ -918,9 +903,6 @@ System\Include - - System\Include - System\Include @@ -972,12 +954,6 @@ System\Ps2\EmotionEngine\VU\Dynarec\microVU - - System\Ps2\EmotionEngine\VU\Dynarec\Super VU - - - System\Ps2\EmotionEngine\VU\Dynarec\Super VU - System\Ps2\EmotionEngine\VU\Interpreter diff --git a/pcsx2/x86/aVUzerorec.S b/pcsx2/x86/aVUzerorec.S deleted file mode 100644 index 37cd85b658..0000000000 --- a/pcsx2/x86/aVUzerorec.S +++ /dev/null @@ -1,78 +0,0 @@ -// iVUzerorec.cpp assembly routines -// zerofrog(@gmail.com) - -#ifdef __APPLE__ - #define svudispfntemp _svudispfntemp - #define s_TotalVUCycles _s_TotalVUCycles - #define s_callstack _s_callstack - #define s_vu1ebp _s_vu1ebp - #define s_vu1esp _s_vu1esp - #define s_vu1esi _s_vu1esi - #define s_vuedi _s_vuedi - #define s_vuebx _s_vuebx - #define s_saveebx _s_saveebx - #define s_saveecx _s_saveecx - #define s_saveedx _s_saveedx - #define s_saveesi _s_saveesi - #define s_saveedi _s_saveedi - #define s_saveebp _s_saveebp - #define s_writeQ _s_writeQ - #define s_writeP _s_writeP - #define g_curdebugvu _g_curdebugvu - #define SuperVUGetProgram _SuperVUGetProgram - #define SuperVUCleanupProgram _SuperVUCleanupProgram - #define SuperVUExecuteProgram _SuperVUExecuteProgram - #define SuperVUEndProgram _SuperVUEndProgram - #define g_sseVUMXCSR _g_sseVUMXCSR - #define g_sseMXCSR _g_sseMXCSR -#endif - - -.intel_syntax noprefix -.extern s_TotalVUCycles -.extern s_callstack -.extern s_vu1esp -.extern s_writeQ -.extern s_writeP -.extern g_curdebugvu -.extern SuperVUGetProgram -.extern SuperVUCleanupProgram -.extern g_sseVUMXCSR -.extern g_sseMXCSR - -// SuperVUExecuteProgram(u32 startpc, int vuindex) -.globl SuperVUExecuteProgram -SuperVUExecuteProgram: - mov eax, [esp] - mov dword ptr s_TotalVUCycles, 0 - add esp, 4 - mov dword ptr [s_callstack], eax - call SuperVUGetProgram - mov s_vu1esi, esi - mov s_vuedi, edi - mov s_vuebx, ebx - - mov s_vu1esp, esp - and esp, -16 // align stack for GCC compilance - - ldmxcsr g_sseVUMXCSR - mov dword ptr s_writeQ, 0xffffffff - mov dword ptr s_writeP, 0xffffffff - jmp eax - -.globl SuperVUEndProgram -SuperVUEndProgram: - // restore cpu state - ldmxcsr g_sseMXCSR - mov esi, s_vu1esi - mov edi, s_vuedi - mov ebx, s_vuebx - - mov esp, s_vu1esp - - call SuperVUCleanupProgram - jmp [s_callstack] // so returns correctly - -#if defined(__POSIX__) && defined(__ELF__) -.section .note.GNU-stack,"",%progbits -#endif diff --git a/pcsx2/x86/iCore.cpp b/pcsx2/x86/iCore.cpp index d06e4b75bf..94a0ec3921 100644 --- a/pcsx2/x86/iCore.cpp +++ b/pcsx2/x86/iCore.cpp @@ -167,62 +167,6 @@ int _allocTempXMMreg(XMMSSEType type, int xmmreg) { return xmmreg; } -#ifndef DISABLE_SVU -int _allocVFtoXMMreg(VURegs *VU, int xmmreg, int vfreg, int mode) { - int i; - int readfromreg = -1; - - for (i=0; (uint)i= 0 ) { - // requested specific reg, so return that instead - if( i != xmmreg ) { - if( xmmregs[i].mode & MODE_READ ) readfromreg = i; - //if( xmmregs[i].mode & MODE_WRITE ) mode |= MODE_WRITE; - mode |= xmmregs[i].mode&MODE_WRITE; - xmmregs[i].inuse = 0; - break; - } - } - - xmmregs[i].needed = 1; - - if( !(xmmregs[i].mode & MODE_READ) && (mode&MODE_READ) ) { - xMOVAPS(xRegisterSSE(i), ptr[(void*)(VU_VFx_ADDR(vfreg))]); - xmmregs[i].mode |= MODE_READ; - } - - g_xmmtypes[i] = XMMT_FPS; - xmmregs[i].counter = g_xmmAllocCounter++; // update counter - xmmregs[i].mode|= mode; - return i; - } - - if (xmmreg == -1) - xmmreg = _getFreeXMMreg(); - else - _freeXMMreg(xmmreg); - - g_xmmtypes[xmmreg] = XMMT_FPS; - xmmregs[xmmreg].inuse = 1; - xmmregs[xmmreg].type = XMMTYPE_VFREG; - xmmregs[xmmreg].reg = vfreg; - xmmregs[xmmreg].mode = mode; - xmmregs[xmmreg].needed = 1; - xmmregs[xmmreg].VU = XMM_CONV_VU(VU); - xmmregs[xmmreg].counter = g_xmmAllocCounter++; - if (mode & MODE_READ) { - if( readfromreg >= 0 ) xMOVAPS(xRegisterSSE(xmmreg), xRegisterSSE(readfromreg)); - else xMOVAPS(xRegisterSSE(xmmreg), ptr[(void*)(VU_VFx_ADDR(xmmregs[xmmreg].reg))]); - } - - return xmmreg; -} -#endif - // Search register "reg" of type "type" which is inuse // If register doesn't have the read flag but mode is read // then populate the register from the memory @@ -258,65 +202,6 @@ int _checkXMMreg(int type, int reg, int mode) return -1; } -#ifndef DISABLE_SVU -int _allocACCtoXMMreg(VURegs *VU, int xmmreg, int mode) { - int i; - int readfromreg = -1; - - for (i=0; (uint)i= 0 ) { - // requested specific reg, so return that instead - if( i != xmmreg ) { - if( xmmregs[i].mode & MODE_READ ) readfromreg = i; - //if( xmmregs[i].mode & MODE_WRITE ) mode |= MODE_WRITE; - mode |= xmmregs[i].mode&MODE_WRITE; - xmmregs[i].inuse = 0; - break; - } - } - - if( !(xmmregs[i].mode & MODE_READ) && (mode&MODE_READ)) { - xMOVAPS(xRegisterSSE(i), ptr[(void*)(VU_ACCx_ADDR)]); - xmmregs[i].mode |= MODE_READ; - } - - g_xmmtypes[i] = XMMT_FPS; - xmmregs[i].counter = g_xmmAllocCounter++; // update counter - xmmregs[i].needed = 1; - xmmregs[i].mode|= mode; - return i; - } - - if (xmmreg == -1) - xmmreg = _getFreeXMMreg(); - else - _freeXMMreg(xmmreg); - - g_xmmtypes[xmmreg] = XMMT_FPS; - xmmregs[xmmreg].inuse = 1; - xmmregs[xmmreg].type = XMMTYPE_ACC; - xmmregs[xmmreg].mode = mode; - xmmregs[xmmreg].needed = 1; - xmmregs[xmmreg].VU = XMM_CONV_VU(VU); - xmmregs[xmmreg].counter = g_xmmAllocCounter++; - xmmregs[xmmreg].reg = 0; - - if (mode & MODE_READ) - { - if( readfromreg >= 0 ) - xMOVAPS(xRegisterSSE(xmmreg), xRegisterSSE(readfromreg)); - else - xMOVAPS(xRegisterSSE(xmmreg), ptr[(void*)(VU_ACCx_ADDR)]); - } - - return xmmreg; -} -#endif - // Fully allocate a FPU register // first trial: // search an already reserved reg then populate it if we read it @@ -479,21 +364,6 @@ int _allocFPACCtoXMMreg(int xmmreg, int mode) return xmmreg; } -#ifndef DISABLE_SVU -void _addNeededVFtoXMMreg(int vfreg) { - int i; - - for (i=0; (uint)i 0) { - for(i = 0; i < ArraySize(pinst->writeType); ++i) { - if( pinst->writeType[i] == xmmtype && pinst->writeReg[i] == reg ) - return inst; - } - for(i = 0; i < ArraySize(pinst->readType); ++i) { - if( pinst->readType[i] == xmmtype && pinst->readReg[i] == reg ) - return inst; - } - ++inst; - pinst++; - } - - return 0; -} -#endif - void _recFillRegister(EEINST& pinst, int type, int reg, int write) { u32 i = 0; diff --git a/pcsx2/x86/iCore.h b/pcsx2/x86/iCore.h index 4fe0296580..25c284ce0f 100644 --- a/pcsx2/x86/iCore.h +++ b/pcsx2/x86/iCore.h @@ -50,9 +50,6 @@ // used in VU recs #define PROCESS_VU_UPDATEFLAGS 0x10 -#ifndef DISABLE_SVU -#define PROCESS_VU_SUPER 0x40 // set if using supervu recompilation -#endif #define PROCESS_VU_COP2 0x80 // simple cop2 #define EEREC_S (((info)>>8)&0xf) @@ -180,14 +177,6 @@ void _flushXMMregs(); u8 _hasFreeXMMreg(); void _freeXMMregs(); int _getNumXMMwrite(); -#ifndef DISABLE_SVU -int _allocVFtoXMMreg(VURegs *VU, int xmmreg, int vfreg, int mode); -int _allocACCtoXMMreg(VURegs *VU, int xmmreg, int mode); -void _addNeededVFtoXMMreg(int vfreg); -void _addNeededACCtoXMMreg(); -void _deleteVFtoXMMreg(int reg, int vu, int flush); -#endif - void _signExtendSFtoM(uptr mem); // returns new index of reg, lower 32 bits already in mmx diff --git a/pcsx2/x86/iFPU.cpp b/pcsx2/x86/iFPU.cpp index afcba2ede2..643488137f 100644 --- a/pcsx2/x86/iFPU.cpp +++ b/pcsx2/x86/iFPU.cpp @@ -21,10 +21,6 @@ #include "iR5900.h" #include "iFPU.h" -#ifndef DISABLE_SVU -#include "sVU_Micro.h" -#endif - using namespace x86Emitter; const __aligned16 u32 g_minvals[4] = {0xff7fffff, 0xff7fffff, 0xff7fffff, 0xff7fffff}; diff --git a/pcsx2/x86/iFPUd.cpp b/pcsx2/x86/iFPUd.cpp index e2e557b7b2..9a773bfddf 100644 --- a/pcsx2/x86/iFPUd.cpp +++ b/pcsx2/x86/iFPUd.cpp @@ -22,10 +22,6 @@ #include "iR5900.h" #include "iFPU.h" -#ifndef DISABLE_SVU -#include "sVU_Micro.h" -#endif - /* This is a version of the FPU that emulates an exponent of 0xff and overflow/underflow flags */ /* Can be made faster by not converting stuff back and forth between instructions. */ diff --git a/pcsx2/x86/iR3000A.cpp b/pcsx2/x86/iR3000A.cpp index 8c87e566f2..607f66cfe5 100644 --- a/pcsx2/x86/iR3000A.cpp +++ b/pcsx2/x86/iR3000A.cpp @@ -33,7 +33,6 @@ #include "IopCommon.h" #include "iCore.h" -#include "NakedAsm.h" #include "AppConfig.h" #include "Utilities/Perf.h" diff --git a/pcsx2/x86/iR5900.h b/pcsx2/x86/iR5900.h index c03c3ba405..9c24fe769d 100644 --- a/pcsx2/x86/iR5900.h +++ b/pcsx2/x86/iR5900.h @@ -23,8 +23,8 @@ #include "R5900_Profiler.h" extern u32 maxrecmem; -extern u32 pc; // recompiler pc (also used by the SuperVU! .. why? (air)) -extern int g_branch; // set for branch (also used by the SuperVU! .. why? (air)) +extern u32 pc; // recompiler pc +extern int g_branch; // set for branch extern u32 target; // branch target extern u32 s_nBlockCycles; // cycles of current block recompiling diff --git a/pcsx2/x86/ix86-32/iR5900-32.cpp b/pcsx2/x86/ix86-32/iR5900-32.cpp index 1224ca1cb9..222168c6c1 100644 --- a/pcsx2/x86/ix86-32/iR5900-32.cpp +++ b/pcsx2/x86/ix86-32/iR5900-32.cpp @@ -1951,12 +1951,6 @@ StartRecomp: g_pCurInstInfo = s_pInstCache; for(i = startpc; i < s_nEndBlock; i += 4) { - -#ifndef DISABLE_SVU - // superVU hack: it needs vucycles, for some reason. >_< - extern int vucycle; -#endif - g_pCurInstInfo++; cpuRegs.code = *(u32*)PSM(i); @@ -1965,9 +1959,6 @@ StartRecomp: if( !usecop2 ) { // init -#ifndef DISABLE_SVU - vucycle = 0; -#endif usecop2 = 1; } @@ -1975,13 +1966,6 @@ StartRecomp: _vuRegsCOP22( &VU0, &g_pCurInstInfo->vuregs ); continue; } - -#ifndef DISABLE_SVU - // fixme - This should be based on the cycle count of the current EE - // instruction being analyzed. - if( usecop2 ) vucycle++; -#endif - } // This *is* important because g_pCurInstInfo is checked a bit later on and // if it's not equal to s_pInstCache it handles recompilation differently. diff --git a/pcsx2/x86/ix86-32/iR5900LoadStore.cpp b/pcsx2/x86/ix86-32/iR5900LoadStore.cpp index abbdee7f37..2f0730685f 100644 --- a/pcsx2/x86/ix86-32/iR5900LoadStore.cpp +++ b/pcsx2/x86/ix86-32/iR5900LoadStore.cpp @@ -573,10 +573,6 @@ void recSWC1() void recLQC2() { -#ifndef DISABLE_SVU - _deleteVFtoXMMreg(_Ft_, 0, 2); -#endif - if (_Rt_) xMOV(edx, (uptr)&VU0.VF[_Ft_].UD[0]); else @@ -606,10 +602,6 @@ void recLQC2() void recSQC2() { -#ifndef DISABLE_SVU - _deleteVFtoXMMreg(_Ft_, 0, 1); //Want to flush it but not clear it -#endif - xMOV(edx, (uptr)&VU0.VF[_Ft_].UD[0]); if (GPR_IS_CONST1(_Rs_)) diff --git a/pcsx2/x86/ix86-32/iR5900Templates.cpp b/pcsx2/x86/ix86-32/iR5900Templates.cpp index 3589ceebde..f31c8eb9ef 100644 --- a/pcsx2/x86/ix86-32/iR5900Templates.cpp +++ b/pcsx2/x86/ix86-32/iR5900Templates.cpp @@ -25,11 +25,6 @@ #include "VU.h" #include "VUmicro.h" -#ifndef DISABLE_SVU -#include "sVU_Micro.h" -#include "sVU_zerorec.h" -#endif - #include "vtlb.h" using namespace x86Emitter; diff --git a/pcsx2/x86/sVU_Lower.cpp b/pcsx2/x86/sVU_Lower.cpp deleted file mode 100644 index 094208762d..0000000000 --- a/pcsx2/x86/sVU_Lower.cpp +++ /dev/null @@ -1,1996 +0,0 @@ -/* PCSX2 - PS2 Emulator for PCs - * Copyright (C) 2002-2010 PCSX2 Dev Team - * - * PCSX2 is free software: you can redistribute it and/or modify it under the terms - * of the GNU Lesser General Public License as published by the Free Software Found- - * ation, either version 3 of the License, or (at your option) any later version. - * - * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; - * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR - * PURPOSE. See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along with PCSX2. - * If not, see . - */ - -#include "PrecompiledHeader.h" - -#include "Common.h" -#include "GS.h" -#include "R5900OpcodeTables.h" -#include "iR5900.h" -#include "iMMI.h" -#include "iFPU.h" -#include "iCOP0.h" -#include "VUmicro.h" -#include "sVU_Micro.h" -#include "sVU_zerorec.h" -#include "Gif_Unit.h" - -using namespace x86Emitter; -//------------------------------------------------------------------ - -//------------------------------------------------------------------ -// Helper Macros -//------------------------------------------------------------------ -#define _Ft_ (( VU->code >> 16) & 0x1F) // The rt part of the instruction register -#define _Fs_ (( VU->code >> 11) & 0x1F) // The rd part of the instruction register -#define _Fd_ (( VU->code >> 6) & 0x1F) // The sa part of the instruction register -#define _It_ (_Ft_ & 15) -#define _Is_ (_Fs_ & 15) -#define _Id_ (_Fd_ & 15) - -#define _X (( VU->code>>24) & 0x1) -#define _Y (( VU->code>>23) & 0x1) -#define _Z (( VU->code>>22) & 0x1) -#define _W (( VU->code>>21) & 0x1) - -#define _XYZW_SS (_X+_Y+_Z+_W==1) - -#define _Fsf_ (( VU->code >> 21) & 0x03) -#define _Ftf_ (( VU->code >> 23) & 0x03) - -#define _Imm11_ (s32)(VU->code & 0x400 ? 0xfffffc00 | (VU->code & 0x3ff) : VU->code & 0x3ff) -#define _UImm11_ (s32)(VU->code & 0x7ff) - -#define VU_VFx_ADDR(x) (uptr)&VU->VF[x].UL[0] -#define VU_VFy_ADDR(x) (uptr)&VU->VF[x].UL[1] -#define VU_VFz_ADDR(x) (uptr)&VU->VF[x].UL[2] -#define VU_VFw_ADDR(x) (uptr)&VU->VF[x].UL[3] - -#define VU_REGR_ADDR (uptr)&VU->VI[REG_R] -#define VU_REGQ_ADDR (uptr)&VU->VI[REG_Q] -#define VU_REGMAC_ADDR (uptr)&VU->VI[REG_MAC_FLAG] - -#define VU_VI_ADDR(x, read) GetVIAddr(VU, x, read, info) - -#define VU_ACCx_ADDR (uptr)&VU->ACC.UL[0] -#define VU_ACCy_ADDR (uptr)&VU->ACC.UL[1] -#define VU_ACCz_ADDR (uptr)&VU->ACC.UL[2] -#define VU_ACCw_ADDR (uptr)&VU->ACC.UL[3] - -#define _X_Y_Z_W ((( VU->code >> 21 ) & 0xF ) ) - - -static const __aligned16 u32 VU_ONE[4] = {0x3f800000, 0xffffffff, 0xffffffff, 0xffffffff}; -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// *VU Lower Instructions!* -// -// Note: * = Checked for errors by cottonvibes -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// DIV* -//------------------------------------------------------------------ -void recVUMI_DIV(VURegs *VU, int info) -{ - u8 *pjmp, *pjmp1; - u32 *ajmp32, *bjmp32; - - //Console.WriteLn("recVUMI_DIV()"); - xAND(ptr32[(u32*)(VU_VI_ADDR(REG_STATUS_FLAG, 2))], 0xFCF); // Clear D/I flags - - // FT can be zero here! so we need to check if its zero and set the correct flag. - xXOR.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_TEMP)); // Clear EEREC_TEMP - xCMPEQ.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_T)); // Set all F's if each vector is zero - - xMOVMSKPS(eax, xRegisterSSE(EEREC_TEMP)); // Move the sign bits of the previous calculation - - xAND(eax, (1<<_Ftf_) ); // Grab "Is Zero" bits from the previous calculation - ajmp32 = JZ32(0); // Skip if none are - - xXOR.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_TEMP)); // Clear EEREC_TEMP - xCMPEQ.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); // Set all F's if each vector is zero - xMOVMSKPS(eax, xRegisterSSE(EEREC_TEMP)); // Move the sign bits of the previous calculation - - xAND(eax, (1<<_Fsf_) ); // Grab "Is Zero" bits from the previous calculation - pjmp = JZ8(0); - xOR(ptr32[(u32*)(VU_VI_ADDR(REG_STATUS_FLAG, 2))], 0x410 ); // Set invalid flag (0/0) - pjmp1 = JMP8(0); - x86SetJ8(pjmp); - xOR(ptr32[(u32*)(VU_VI_ADDR(REG_STATUS_FLAG, 2))], 0x820 ); // Zero divide (only when not 0/0) - x86SetJ8(pjmp1); - - _unpackVFSS_xyzw(EEREC_TEMP, EEREC_S, _Fsf_); - - _vuFlipRegSS_xyzw(EEREC_T, _Ftf_); - xXOR.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_T)); - _vuFlipRegSS_xyzw(EEREC_T, _Ftf_); - - xAND.PS(xRegisterSSE(EEREC_TEMP), ptr[&const_clip[4]]); - xOR.PS(xRegisterSSE(EEREC_TEMP), ptr[&g_maxvals[0]]); // If division by zero, then EEREC_TEMP = +/- fmax - - bjmp32 = JMP32(0); - - x86SetJ32(ajmp32); - - if (CHECK_VU_EXTRA_OVERFLOW) { - vuFloat5_useEAX(EEREC_S, EEREC_TEMP, (1 << (3-_Fsf_))); - vuFloat5_useEAX(EEREC_T, EEREC_TEMP, (1 << (3-_Ftf_))); - } - - _unpackVFSS_xyzw(EEREC_TEMP, EEREC_S, _Fsf_); - - _vuFlipRegSS_xyzw(EEREC_T, _Ftf_); - xDIV.SS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_T)); - _vuFlipRegSS_xyzw(EEREC_T, _Ftf_); - - vuFloat_useEAX(info, EEREC_TEMP, 0x8); - - x86SetJ32(bjmp32); - - xMOVSS(ptr[(void*)(VU_VI_ADDR(REG_Q, 0))], xRegisterSSE(EEREC_TEMP)); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// SQRT* -//------------------------------------------------------------------ -void recVUMI_SQRT( VURegs *VU, int info ) -{ - u8* pjmp; - //Console.WriteLn("recVUMI_SQRT()"); - - _unpackVFSS_xyzw(EEREC_TEMP, EEREC_T, _Ftf_); - xAND(ptr32[(u32*)(VU_VI_ADDR(REG_STATUS_FLAG, 2))], 0xFCF); // Clear D/I flags - - /* Check for negative sqrt */ - xMOVMSKPS(eax, xRegisterSSE(EEREC_TEMP)); - xAND(eax, 1); //Check sign - pjmp = JZ8(0); //Skip if none are - xOR(ptr32[(u32*)(VU_VI_ADDR(REG_STATUS_FLAG, 2))], 0x410); // Invalid Flag - Negative number sqrt - x86SetJ8(pjmp); - - xAND.PS(xRegisterSSE(EEREC_TEMP), ptr[const_clip]); // Do a cardinal sqrt - if (CHECK_VU_OVERFLOW) xMIN.SS(xRegisterSSE(EEREC_TEMP), ptr[g_maxvals]); // Clamp infinities (only need to do positive clamp since EEREC_TEMP is positive) - xSQRT.SS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_TEMP)); - xMOVSS(ptr[(void*)(VU_VI_ADDR(REG_Q, 0))], xRegisterSSE(EEREC_TEMP)); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// RSQRT* -//------------------------------------------------------------------ -__aligned16 u64 RSQRT_TEMP_XMM[2]; -void recVUMI_RSQRT(VURegs *VU, int info) -{ - u8 *ajmp8, *bjmp8; - u8 *qjmp1, *qjmp2; - int t1reg, t1boolean; - //Console.WriteLn("recVUMI_RSQRT()"); - - _unpackVFSS_xyzw(EEREC_TEMP, EEREC_T, _Ftf_); - xAND(ptr32[(u32*)(VU_VI_ADDR(REG_STATUS_FLAG, 2))], 0xFCF); // Clear D/I flags - - /* Check for negative divide */ - xMOVMSKPS(eax, xRegisterSSE(EEREC_TEMP)); - xAND(eax, 1); //Check sign - ajmp8 = JZ8(0); //Skip if none are - xOR(ptr32[(u32*)(VU_VI_ADDR(REG_STATUS_FLAG, 2))], 0x410); // Invalid Flag - Negative number sqrt - x86SetJ8(ajmp8); - - xAND.PS(xRegisterSSE(EEREC_TEMP), ptr[const_clip]); // Do a cardinal sqrt - if (CHECK_VU_OVERFLOW) xMIN.SS(xRegisterSSE(EEREC_TEMP), ptr[g_maxvals]); // Clamp Infinities to Fmax - xSQRT.SS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_TEMP)); - - t1reg = _vuGetTempXMMreg(info); - if( t1reg < 0 ) { - for (t1reg = 0; ( (t1reg == EEREC_TEMP) || (t1reg == EEREC_S) ); t1reg++) - ; // Makes t1reg not be EEREC_TEMP or EEREC_S. - xMOVAPS(ptr[&RSQRT_TEMP_XMM[0]], xRegisterSSE(t1reg )); // backup data in t1reg to a temp address - t1boolean = 1; - } - else t1boolean = 0; - - // Ft can still be zero here! so we need to check if its zero and set the correct flag. - xXOR.PS(xRegisterSSE(t1reg), xRegisterSSE(t1reg)); // Clear t1reg - xCMPEQ.SS(xRegisterSSE(t1reg), xRegisterSSE(EEREC_TEMP)); // Set all F's if each vector is zero - - xMOVMSKPS(eax, xRegisterSSE(t1reg)); // Move the sign bits of the previous calculation - - xAND(eax, 0x01 ); // Grab "Is Zero" bits from the previous calculation - ajmp8 = JZ8(0); // Skip if none are - - //check for 0/0 - _unpackVFSS_xyzw(EEREC_TEMP, EEREC_S, _Fsf_); - - xXOR.PS(xRegisterSSE(t1reg), xRegisterSSE(t1reg)); // Clear EEREC_TEMP - xCMPEQ.PS(xRegisterSSE(t1reg), xRegisterSSE(EEREC_TEMP)); // Set all F's if each vector is zero - xMOVMSKPS(eax, xRegisterSSE(t1reg)); // Move the sign bits of the previous calculation - - xAND(eax, 0x01 ); // Grab "Is Zero" bits from the previous calculation - qjmp1 = JZ8(0); - xOR(ptr32[(u32*)(VU_VI_ADDR(REG_STATUS_FLAG, 2))], 0x410 ); // Set invalid flag (0/0) - qjmp2 = JMP8(0); - x86SetJ8(qjmp1); - xOR(ptr32[(u32*)(VU_VI_ADDR(REG_STATUS_FLAG, 2))], 0x820 ); // Zero divide (only when not 0/0) - x86SetJ8(qjmp2); - - xAND.PS(xRegisterSSE(EEREC_TEMP), ptr[&const_clip[4]]); - xOR.PS(xRegisterSSE(EEREC_TEMP), ptr[&g_maxvals[0]]); // If division by zero, then EEREC_TEMP = +/- fmax - xMOVSS(ptr[(void*)(VU_VI_ADDR(REG_Q, 0))], xRegisterSSE(EEREC_TEMP)); - bjmp8 = JMP8(0); - x86SetJ8(ajmp8); - - _unpackVFSS_xyzw(t1reg, EEREC_S, _Fsf_); - if (CHECK_VU_EXTRA_OVERFLOW) vuFloat_useEAX(info, t1reg, 0x8); // Clamp Infinities - xDIV.SS(xRegisterSSE(t1reg), xRegisterSSE(EEREC_TEMP)); - vuFloat_useEAX(info, t1reg, 0x8); - xMOVSS(ptr[(void*)(VU_VI_ADDR(REG_Q, 0))], xRegisterSSE(t1reg)); - - x86SetJ8(bjmp8); - - if (t1boolean) xMOVAPS(xRegisterSSE(t1reg), ptr[&RSQRT_TEMP_XMM[0] ]); // restore t1reg data - else _freeXMMreg(t1reg); // free t1reg -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// _addISIMMtoIT() - Used in IADDI, IADDIU, and ISUBIU instructions -//------------------------------------------------------------------ -void _addISIMMtoIT(VURegs *VU, s16 imm, int info) -{ - int isreg = -1, itreg; - if (_It_ == 0) return; - - if( _Is_ == 0 ) { - itreg = ALLOCVI(_It_, MODE_WRITE); - xMOV(xRegister32(itreg), imm&0xffff); - return; - } - - ADD_VI_NEEDED(_It_); - isreg = ALLOCVI(_Is_, MODE_READ); - itreg = ALLOCVI(_It_, MODE_WRITE); - - if ( _It_ == _Is_ ) { - if (imm != 0 ) xADD(xRegister16(itreg), imm); - } - else { - if( imm ) { - xLEA(xRegister32(itreg), ptr[xAddressReg(isreg)+imm]); - xMOVZX(xRegister32(itreg), xRegister16(itreg)); - } - else xMOV(xRegister32(itreg), xRegister32(isreg)); - } -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// IADDI -//------------------------------------------------------------------ -void recVUMI_IADDI(VURegs *VU, int info) -{ - s16 imm; - - if ( _It_ == 0 ) return; - //Console.WriteLn("recVUMI_IADDI"); - imm = ( VU->code >> 6 ) & 0x1f; - imm = ( imm & 0x10 ? 0xfff0 : 0) | ( imm & 0xf ); - _addISIMMtoIT(VU, imm, info); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// IADDIU -//------------------------------------------------------------------ -void recVUMI_IADDIU(VURegs *VU, int info) -{ - s16 imm; - - if ( _It_ == 0 ) return; - //Console.WriteLn("recVUMI_IADDIU"); - imm = ( ( VU->code >> 10 ) & 0x7800 ) | ( VU->code & 0x7ff ); - _addISIMMtoIT(VU, imm, info); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// IADD -//------------------------------------------------------------------ -void recVUMI_IADD( VURegs *VU, int info ) -{ - int idreg, isreg = -1, itreg = -1; - if ( _Id_ == 0 ) return; - //Console.WriteLn("recVUMI_IADD"); - if ( ( _It_ == 0 ) && ( _Is_ == 0 ) ) { - idreg = ALLOCVI(_Id_, MODE_WRITE); - xXOR(xRegister32(idreg), xRegister32(idreg)); - return; - } - - ADD_VI_NEEDED(_Is_); - ADD_VI_NEEDED(_It_); - idreg = ALLOCVI(_Id_, MODE_WRITE); - - if ( _Is_ == 0 ) - { - if( (itreg = _checkX86reg(X86TYPE_VI|((VU==&VU1)?X86TYPE_VU1:0), _It_, MODE_READ)) >= 0 ) { - if( idreg != itreg ) xMOV(xRegister32(idreg), xRegister32(itreg)); - } - else xMOVZX(xRegister32(idreg), ptr16[(u16*)(VU_VI_ADDR(_It_, 1))]); - } - else if ( _It_ == 0 ) - { - if( (isreg = _checkX86reg(X86TYPE_VI|((VU==&VU1)?X86TYPE_VU1:0), _Is_, MODE_READ)) >= 0 ) { - if( idreg != isreg ) xMOV(xRegister32(idreg), xRegister32(isreg)); - } - else xMOVZX(xRegister32(idreg), ptr16[(u16*)(VU_VI_ADDR(_Is_, 1))]); - } - else { - //ADD_VI_NEEDED(_It_); - isreg = ALLOCVI(_Is_, MODE_READ); - itreg = ALLOCVI(_It_, MODE_READ); - - if( idreg == isreg ) xADD(xRegister32(idreg), xRegister32(itreg)); - else if( idreg == itreg ) xADD(xRegister32(idreg), xRegister32(isreg)); - else xLEA(xRegister32(idreg), ptr[xAddressReg(isreg) + xAddressReg(itreg)]); - xMOVZX(xRegister32(idreg), xRegister16(idreg)); // needed since don't know if idreg's upper bits are 0 - } -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// IAND -//------------------------------------------------------------------ -void recVUMI_IAND( VURegs *VU, int info ) -{ - int idreg, isreg = -1, itreg = -1; - if ( _Id_ == 0 ) return; - //Console.WriteLn("recVUMI_IAND"); - if ( ( _Is_ == 0 ) || ( _It_ == 0 ) ) { - idreg = ALLOCVI(_Id_, MODE_WRITE); - xXOR(xRegister32(idreg), xRegister32(idreg)); - return; - } - - ADD_VI_NEEDED(_Is_); - ADD_VI_NEEDED(_It_); - idreg = ALLOCVI(_Id_, MODE_WRITE); - - isreg = ALLOCVI(_Is_, MODE_READ); - itreg = ALLOCVI(_It_, MODE_READ); - - if( idreg == isreg ) xAND(xRegister16(idreg), xRegister16(itreg)); - else if( idreg == itreg ) xAND(xRegister16(idreg), xRegister16(isreg)); - else { - xMOV(xRegister32(idreg), xRegister32(itreg)); - xAND(xRegister32(idreg), xRegister32(isreg)); - } -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// IOR -//------------------------------------------------------------------ -void recVUMI_IOR( VURegs *VU, int info ) -{ - int idreg, isreg = -1, itreg = -1; - if ( _Id_ == 0 ) return; - //Console.WriteLn("recVUMI_IOR"); - if ( ( _It_ == 0 ) && ( _Is_ == 0 ) ) { - idreg = ALLOCVI(_Id_, MODE_WRITE); - xXOR(xRegister32(idreg), xRegister32(idreg)); - return; - } - - ADD_VI_NEEDED(_Is_); - ADD_VI_NEEDED(_It_); - idreg = ALLOCVI(_Id_, MODE_WRITE); - - if ( _Is_ == 0 ) - { - if( (itreg = _checkX86reg(X86TYPE_VI|((VU==&VU1)?X86TYPE_VU1:0), _It_, MODE_READ)) >= 0 ) { - if( idreg != itreg ) xMOV(xRegister32(idreg), xRegister32(itreg)); - } - else xMOVZX(xRegister32(idreg), ptr16[(u16*)(VU_VI_ADDR(_It_, 1))]); - } - else if ( _It_ == 0 ) - { - if( (isreg = _checkX86reg(X86TYPE_VI|((VU==&VU1)?X86TYPE_VU1:0), _Is_, MODE_READ)) >= 0 ) { - if( idreg != isreg ) xMOV(xRegister32(idreg), xRegister32(isreg)); - } - else xMOVZX(xRegister32(idreg), ptr16[(u16*)(VU_VI_ADDR(_Is_, 1))]); - } - else - { - isreg = ALLOCVI(_Is_, MODE_READ); - itreg = ALLOCVI(_It_, MODE_READ); - - if( idreg == isreg ) xOR(xRegister16(idreg), xRegister16(itreg)); - else if( idreg == itreg ) xOR(xRegister16(idreg), xRegister16(isreg)); - else { - xMOV(xRegister32(idreg), xRegister32(isreg)); - xOR(xRegister32(idreg), xRegister32(itreg)); - } - } -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// ISUB -//------------------------------------------------------------------ -void recVUMI_ISUB( VURegs *VU, int info ) -{ - int idreg, isreg = -1, itreg = -1; - if ( _Id_ == 0 ) return; - //Console.WriteLn("recVUMI_ISUB"); - if ( ( _It_ == 0 ) && ( _Is_ == 0 ) ) { - idreg = ALLOCVI(_Id_, MODE_WRITE); - xXOR(xRegister32(idreg), xRegister32(idreg)); - return; - } - - ADD_VI_NEEDED(_Is_); - ADD_VI_NEEDED(_It_); - idreg = ALLOCVI(_Id_, MODE_WRITE); - - if ( _Is_ == 0 ) - { - if( (itreg = _checkX86reg(X86TYPE_VI|((VU==&VU1)?X86TYPE_VU1:0), _It_, MODE_READ)) >= 0 ) { - if( idreg != itreg ) xMOV(xRegister32(idreg), xRegister32(itreg)); - } - else xMOVZX(xRegister32(idreg), ptr16[(u16*)(VU_VI_ADDR(_It_, 1))]); - xNEG(xRegister16(idreg)); - } - else if ( _It_ == 0 ) - { - if( (isreg = _checkX86reg(X86TYPE_VI|((VU==&VU1)?X86TYPE_VU1:0), _Is_, MODE_READ)) >= 0 ) { - if( idreg != isreg ) xMOV(xRegister32(idreg), xRegister32(isreg)); - } - else xMOVZX(xRegister32(idreg), ptr16[(u16*)(VU_VI_ADDR(_Is_, 1))]); - } - else - { - isreg = ALLOCVI(_Is_, MODE_READ); - itreg = ALLOCVI(_It_, MODE_READ); - - if( idreg == isreg ) xSUB(xRegister16(idreg), xRegister16(itreg)); - else if( idreg == itreg ) { - xSUB(xRegister16(idreg), xRegister16(isreg)); - xNEG(xRegister16(idreg)); - } - else { - xMOV(xRegister32(idreg), xRegister32(isreg)); - xSUB(xRegister16(idreg), xRegister16(itreg)); - } - } -} -//------------------------------------------------------------------ - -//------------------------------------------------------------------ -// ISUBIU -//------------------------------------------------------------------ -void recVUMI_ISUBIU( VURegs *VU, int info ) -{ - s16 imm; - - if ( _It_ == 0 ) return; - //Console.WriteLn("recVUMI_ISUBIU"); - imm = ( ( VU->code >> 10 ) & 0x7800 ) | ( VU->code & 0x7ff ); - imm = -imm; - _addISIMMtoIT(VU, imm, info); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// MOVE* -//------------------------------------------------------------------ -void recVUMI_MOVE( VURegs *VU, int info ) -{ - if ( (_Ft_ == 0) || (_X_Y_Z_W == 0) ) return; - //Console.WriteLn("recVUMI_MOVE"); - if (_X_Y_Z_W == 0x8) xMOVSS(xRegisterSSE(EEREC_T), xRegisterSSE(EEREC_S)); - else if (_X_Y_Z_W == 0xf) xMOVAPS(xRegisterSSE(EEREC_T), xRegisterSSE(EEREC_S)); - else { - xMOVAPS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - VU_MERGE_REGS(EEREC_T, EEREC_TEMP); - } -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// MFIR* -//------------------------------------------------------------------ -void recVUMI_MFIR( VURegs *VU, int info ) -{ - if ( (_Ft_ == 0) || (_X_Y_Z_W == 0) ) return; - //Console.WriteLn("recVUMI_MFIR"); - _deleteX86reg(X86TYPE_VI|((VU==&VU1)?X86TYPE_VU1:0), _Is_, 1); - - if( _XYZW_SS ) { - xMOVDZX(xRegisterSSE(EEREC_TEMP), ptr[(void*)(VU_VI_ADDR(_Is_, 1)-2)]); - - _vuFlipRegSS(VU, EEREC_T); - xPSRA.D(xRegisterSSE(EEREC_TEMP), 16); - xMOVSS(xRegisterSSE(EEREC_T), xRegisterSSE(EEREC_TEMP)); - _vuFlipRegSS(VU, EEREC_T); - } - else if (_X_Y_Z_W != 0xf) { - xMOVDZX(xRegisterSSE(EEREC_TEMP), ptr[(void*)(VU_VI_ADDR(_Is_, 1)-2)]); - - xPSRA.D(xRegisterSSE(EEREC_TEMP), 16); - xSHUF.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_TEMP), 0); - VU_MERGE_REGS(EEREC_T, EEREC_TEMP); - } - else { - xMOVDZX(xRegisterSSE(EEREC_T), ptr[(void*)(VU_VI_ADDR(_Is_, 1)-2)]); - - xPSRA.D(xRegisterSSE(EEREC_T), 16); - xSHUF.PS(xRegisterSSE(EEREC_T), xRegisterSSE(EEREC_T), 0); - } -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// MTIR* -//------------------------------------------------------------------ -void recVUMI_MTIR( VURegs *VU, int info ) -{ - if ( _It_ == 0 ) return; - //Console.WriteLn("recVUMI_MTIR"); - _deleteX86reg(X86TYPE_VI|((VU==&VU1)?X86TYPE_VU1:0), _It_, 2); - - if( _Fsf_ == 0 ) { - xMOVSS(ptr[(void*)(VU_VI_ADDR(_It_, 0))], xRegisterSSE(EEREC_S)); - } - else { - _unpackVFSS_xyzw(EEREC_TEMP, EEREC_S, _Fsf_); - xMOVSS(ptr[(void*)(VU_VI_ADDR(_It_, 0))], xRegisterSSE(EEREC_TEMP)); - } - - xAND(ptr32[(u32*)(VU_VI_ADDR(_It_, 0))], 0xffff); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// MR32* -//------------------------------------------------------------------ -void recVUMI_MR32( VURegs *VU, int info ) -{ - if ( (_Ft_ == 0) || (_X_Y_Z_W == 0) ) return; - //Console.WriteLn("recVUMI_MR32"); - if (_X_Y_Z_W != 0xf) { - xMOVAPS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - xSHUF.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_TEMP), 0x39); - VU_MERGE_REGS(EEREC_T, EEREC_TEMP); - } - else { - xMOVAPS(xRegisterSSE(EEREC_T), xRegisterSSE(EEREC_S)); - xSHUF.PS(xRegisterSSE(EEREC_T), xRegisterSSE(EEREC_T), 0x39); - } -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// _loadEAX() -// -// NOTE: If x86reg < 0, reads directly from offset -//------------------------------------------------------------------ -void _loadEAX(VURegs *VU, int x86reg, uptr offset, int info) -{ - pxAssert( offset < 0x80000000 ); - - if( x86reg >= 0 ) { - switch(_X_Y_Z_W) { - case 3: // ZW - xMOVH.PS(xRegisterSSE(EEREC_T), ptr[xAddressReg(x86reg)+offset+8]); - break; - case 6: // YZ - xSHUF.PS(xRegisterSSE(EEREC_T), ptr[xAddressReg(x86reg)+offset], 0x9c); - xSHUF.PS(xRegisterSSE(EEREC_T), xRegisterSSE(EEREC_T), 0x78); - break; - - case 8: // X - xMOVSSZX(xRegisterSSE(EEREC_TEMP), ptr[xAddressReg(x86reg)+offset]); - xMOVSS(xRegisterSSE(EEREC_T), xRegisterSSE(EEREC_TEMP)); - break; - case 9: // XW - xSHUF.PS(xRegisterSSE(EEREC_T), ptr[xAddressReg(x86reg)+offset], 0xc9); - xSHUF.PS(xRegisterSSE(EEREC_T), xRegisterSSE(EEREC_T), 0xd2); - break; - case 12: // XY - xMOVL.PS(xRegisterSSE(EEREC_T), ptr[xAddressReg(x86reg)+offset]); - break; - case 15: - if( VU == &VU1 ) xMOVAPS(xRegisterSSE(EEREC_T), ptr[xAddressReg(x86reg)+offset]); - else xMOVUPS(xRegisterSSE(EEREC_T), ptr[xAddressReg(x86reg)+offset]); - break; - default: - if( VU == &VU1 ) xMOVAPS(xRegisterSSE(EEREC_TEMP), ptr[xAddressReg(x86reg)+offset]); - else xMOVUPS(xRegisterSSE(EEREC_TEMP), ptr[xAddressReg(x86reg)+offset]); - - VU_MERGE_REGS(EEREC_T, EEREC_TEMP); - break; - } - } - else { - switch(_X_Y_Z_W) { - case 3: // ZW - xMOVH.PS(xRegisterSSE(EEREC_T), ptr[(void*)(offset+8)]); - break; - case 6: // YZ - xSHUF.PS(xRegisterSSE(EEREC_T), ptr[(void*)(offset)], 0x9c); - xSHUF.PS(xRegisterSSE(EEREC_T), xRegisterSSE(EEREC_T), 0x78); - break; - case 8: // X - xMOVSSZX(xRegisterSSE(EEREC_TEMP), ptr[(void*)(offset)]); - xMOVSS(xRegisterSSE(EEREC_T), xRegisterSSE(EEREC_TEMP)); - break; - case 9: // XW - xSHUF.PS(xRegisterSSE(EEREC_T), ptr[(void*)(offset)], 0xc9); - xSHUF.PS(xRegisterSSE(EEREC_T), xRegisterSSE(EEREC_T), 0xd2); - break; - case 12: // XY - xMOVL.PS(xRegisterSSE(EEREC_T), ptr[(void*)(offset)]); - break; - case 15: - if( VU == &VU1 ) xMOVAPS(xRegisterSSE(EEREC_T), ptr[(void*)(offset)]); - else xMOVUPS(xRegisterSSE(EEREC_T), ptr[(void*)(offset)]); - break; - default: - if( VU == &VU1 ) xMOVAPS(xRegisterSSE(EEREC_TEMP), ptr[(void*)(offset)]); - else xMOVUPS(xRegisterSSE(EEREC_TEMP), ptr[(void*)(offset)]); - VU_MERGE_REGS(EEREC_T, EEREC_TEMP); - break; - } - } -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// recVUTransformAddr() -//------------------------------------------------------------------ -int recVUTransformAddr(int x86reg, VURegs* VU, int vireg, int imm) -{ - if( x86reg == eax.GetId() ) { - if (imm) xADD(xRegister32(x86reg), imm); - } - else { - if( imm ) xLEA(eax, ptr[xAddressReg(x86reg)+imm]); - else xMOV(eax, xRegister32(x86reg)); - } - - if( VU == &VU1 ) { - xAND(eax, 0x3ff); // wrap around - xSHL(eax, 4); - } - else { - - // VU0 has a somewhat interesting memory mapping: - // if addr & 0x4000, reads VU1's VF regs and VI regs - // otherwise, wrap around at 0x1000 - - xTEST(eax, 0x400); - xForwardJNZ8 vu1regs; // if addr & 0x4000, reads VU1's VF regs and VI regs - xAND(eax, 0xff); // if !(addr & 0x4000), wrap around - xForwardJump8 done; - vu1regs.SetTarget(); - xAND(eax, 0x3f); - xADD(eax, (u128*)VU1.VF - (u128*)VU0.Mem); - done.SetTarget(); - - xSHL(eax, 4); // multiply by 16 (shift left by 4) - } - - return eax.GetId(); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// LQ -//------------------------------------------------------------------ -void recVUMI_LQ(VURegs *VU, int info) -{ - s16 imm; - if ( _Ft_ == 0 ) return; - //Console.WriteLn("recVUMI_LQ"); - imm = (VU->code & 0x400) ? (VU->code & 0x3ff) | 0xfc00 : (VU->code & 0x3ff); - if (_Is_ == 0) { - _loadEAX(VU, -1, (uptr)GET_VU_MEM(VU, (u32)imm*16), info); - } - else { - int isreg = ALLOCVI(_Is_, MODE_READ); - _loadEAX(VU, recVUTransformAddr(isreg, VU, _Is_, imm), (uptr)VU->Mem, info); - } -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// LQD -//------------------------------------------------------------------ -void recVUMI_LQD( VURegs *VU, int info ) -{ - int isreg = 0; - //Console.WriteLn("recVUMI_LQD"); - if ( _Is_ != 0 ) { - isreg = ALLOCVI(_Is_, MODE_READ|MODE_WRITE); - xSUB(xRegister16(isreg), 1 ); - } - - if ( _Ft_ == 0 ) return; - - if ( _Is_ == 0 ) _loadEAX(VU, -1, (uptr)VU->Mem, info); - else _loadEAX(VU, recVUTransformAddr(isreg, VU, _Is_, 0), (uptr)VU->Mem, info); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// LQI -//------------------------------------------------------------------ -void recVUMI_LQI(VURegs *VU, int info) -{ - int isreg; - //Console.WriteLn("recVUMI_LQI"); - if ( _Ft_ == 0 ) { - if( _Is_ != 0 ) { - if( (isreg = _checkX86reg(X86TYPE_VI|(VU==&VU1?X86TYPE_VU1:0), _Is_, MODE_WRITE|MODE_READ)) >= 0 ) { - xADD(xRegister16(isreg), 1); - } - else { - xADD(ptr16[(u16*)(VU_VI_ADDR( _Is_, 0 ))], 1 ); - } - } - return; - } - - if (_Is_ == 0) { - _loadEAX(VU, -1, (uptr)VU->Mem, info); - } - else { - isreg = ALLOCVI(_Is_, MODE_READ|MODE_WRITE); - _loadEAX(VU, recVUTransformAddr(isreg, VU, _Is_, 0), (uptr)VU->Mem, info); - xADD(xRegister16(isreg), 1 ); - } -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// _saveEAX() -//------------------------------------------------------------------ -void _saveEAX(VURegs *VU, int x86reg, uptr offset, int info) -{ - pxAssert( offset < 0x80000000 ); - - if ( _Fs_ == 0 ) { - if ( _XYZW_SS ) { - u32 c = _W ? 0x3f800000 : 0; - if ( x86reg >= 0 ) xMOV(ptr32[xAddressReg(x86reg)+offset+(_W?12:(_Z?8:(_Y?4:0)))], c); - else xMOV(ptr32[(u32*)(offset+(_W?12:(_Z?8:(_Y?4:0))))], c); - } - else { - - // (this is one of my test cases for the new emitter --air) - using namespace x86Emitter; - xAddressVoid indexer( offset ); - if( x86reg != -1 ) indexer.Add( xAddressReg( x86reg ) ); - - if ( _X ) xMOV(ptr32[indexer], 0x00000000); - if ( _Y ) xMOV(ptr32[indexer+4], 0x00000000); - if ( _Z ) xMOV(ptr32[indexer+8], 0x00000000); - if ( _W ) xMOV(ptr32[indexer+12], 0x3f800000); - } - return; - } - - switch ( _X_Y_Z_W ) { - case 1: // W - xPSHUF.D(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S), 0x27); - if ( x86reg >= 0 ) xMOVSS(ptr[xAddressReg(x86reg)+offset+12], xRegisterSSE(EEREC_TEMP)); - else xMOVSS(ptr[(void*)(offset+12)], xRegisterSSE(EEREC_TEMP)); - break; - case 2: // Z - xMOVHL.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - if ( x86reg >= 0 ) xMOVSS(ptr[xAddressReg(x86reg)+offset+8], xRegisterSSE(EEREC_TEMP)); - else xMOVSS(ptr[(void*)(offset+8)], xRegisterSSE(EEREC_TEMP)); - break; - case 3: // ZW - if ( x86reg >= 0 ) xMOVH.PS(ptr[xAddressReg(x86reg)+offset+8], xRegisterSSE(EEREC_S)); - else xMOVH.PS(ptr[(void*)(offset+8)], xRegisterSSE(EEREC_S)); - break; - case 4: // Y - xPSHUF.LW(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S), 0x4e); - if ( x86reg >= 0 ) xMOVSS(ptr[xAddressReg(x86reg)+offset+4], xRegisterSSE(EEREC_TEMP)); - else xMOVSS(ptr[(void*)(offset+4)], xRegisterSSE(EEREC_TEMP)); - break; - case 5: // YW - xSHUF.PS(xRegisterSSE(EEREC_S), xRegisterSSE(EEREC_S), 0xB1); - xMOVHL.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - if ( x86reg >= 0 ) { - xMOVSS(ptr[xAddressReg(x86reg)+offset+4], xRegisterSSE(EEREC_S)); - xMOVSS(ptr[xAddressReg(x86reg)+offset+12], xRegisterSSE(EEREC_TEMP)); - } - else { - xMOVSS(ptr[(void*)(offset+4)], xRegisterSSE(EEREC_S)); - xMOVSS(ptr[(void*)(offset+12)], xRegisterSSE(EEREC_TEMP)); - } - xSHUF.PS(xRegisterSSE(EEREC_S), xRegisterSSE(EEREC_S), 0xB1); - break; - case 6: // YZ - xPSHUF.D(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S), 0xc9); - if ( x86reg >= 0 ) xMOVL.PS(ptr[xAddressReg(x86reg)+offset+4], xRegisterSSE(EEREC_TEMP)); - else xMOVL.PS(ptr[(void*)(offset+4)], xRegisterSSE(EEREC_TEMP)); - break; - case 7: // YZW - xPSHUF.D(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S), 0x93); //ZYXW - if ( x86reg >= 0 ) { - xMOVH.PS(ptr[xAddressReg(x86reg)+offset+4], xRegisterSSE(EEREC_TEMP)); - xMOVSS(ptr[xAddressReg(x86reg)+offset+12], xRegisterSSE(EEREC_TEMP)); - } - else { - xMOVH.PS(ptr[(void*)(offset+4)], xRegisterSSE(EEREC_TEMP)); - xMOVSS(ptr[(void*)(offset+12)], xRegisterSSE(EEREC_TEMP)); - } - break; - case 8: // X - if ( x86reg >= 0 ) xMOVSS(ptr[xAddressReg(x86reg)+offset], xRegisterSSE(EEREC_S)); - else xMOVSS(ptr[(void*)(offset)], xRegisterSSE(EEREC_S)); - break; - case 9: // XW - if ( x86reg >= 0 ) xMOVSS(ptr[xAddressReg(x86reg)+offset], xRegisterSSE(EEREC_S)); - else xMOVSS(ptr[(void*)(offset)], xRegisterSSE(EEREC_S)); - - xPSHUF.D(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S), 0xff); //WWWW - - if ( x86reg >= 0 ) xMOVSS(ptr[xAddressReg(x86reg)+offset+12], xRegisterSSE(EEREC_TEMP)); - else xMOVSS(ptr[(void*)(offset+12)], xRegisterSSE(EEREC_TEMP)); - - break; - case 10: //XZ - xMOVHL.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - if ( x86reg >= 0 ) { - xMOVSS(ptr[xAddressReg(x86reg)+offset], xRegisterSSE(EEREC_S)); - xMOVSS(ptr[xAddressReg(x86reg)+offset+8], xRegisterSSE(EEREC_TEMP)); - } - else { - xMOVSS(ptr[(void*)(offset)], xRegisterSSE(EEREC_S)); - xMOVSS(ptr[(void*)(offset+8)], xRegisterSSE(EEREC_TEMP)); - } - break; - case 11: //XZW - if ( x86reg >= 0 ) { - xMOVSS(ptr[xAddressReg(x86reg)+offset], xRegisterSSE(EEREC_S)); - xMOVH.PS(ptr[xAddressReg(x86reg)+offset+8], xRegisterSSE(EEREC_S)); - } - else { - xMOVSS(ptr[(void*)(offset)], xRegisterSSE(EEREC_S)); - xMOVH.PS(ptr[(void*)(offset+8)], xRegisterSSE(EEREC_S)); - } - break; - case 12: // XY - if ( x86reg >= 0 ) xMOVL.PS(ptr[xAddressReg(x86reg)+offset+0], xRegisterSSE(EEREC_S)); - else xMOVL.PS(ptr[(void*)(offset)], xRegisterSSE(EEREC_S)); - break; - case 13: // XYW - xPSHUF.D(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S), 0x4b); //YXZW - if ( x86reg >= 0 ) { - xMOVH.PS(ptr[xAddressReg(x86reg)+offset+0], xRegisterSSE(EEREC_TEMP)); - xMOVSS(ptr[xAddressReg(x86reg)+offset+12], xRegisterSSE(EEREC_TEMP)); - } - else { - xMOVH.PS(ptr[(void*)(offset)], xRegisterSSE(EEREC_TEMP)); - xMOVSS(ptr[(void*)(offset+12)], xRegisterSSE(EEREC_TEMP)); - } - break; - case 14: // XYZ - xMOVHL.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - if ( x86reg >= 0 ) { - xMOVL.PS(ptr[xAddressReg(x86reg)+offset+0], xRegisterSSE(EEREC_S)); - xMOVSS(ptr[xAddressReg(x86reg)+offset+8], xRegisterSSE(EEREC_TEMP)); - } - else { - xMOVL.PS(ptr[(void*)(offset)], xRegisterSSE(EEREC_S)); - xMOVSS(ptr[(void*)(offset+8)], xRegisterSSE(EEREC_TEMP)); - } - break; - case 15: // XYZW - if ( VU == &VU1 ) { - if( x86reg >= 0 ) xMOVAPS(ptr[xAddressReg(x86reg)+offset+0], xRegisterSSE(EEREC_S)); - else xMOVAPS(ptr[(void*)(offset)], xRegisterSSE(EEREC_S)); - } - else { - if( x86reg >= 0 ) xMOVUPS(ptr[xAddressReg(x86reg)+offset+0], xRegisterSSE(EEREC_S)); - else { - if( offset & 15 ) xMOVUPS(ptr[(void*)(offset)], xRegisterSSE(EEREC_S)); - else xMOVAPS(ptr[(void*)(offset)], xRegisterSSE(EEREC_S)); - } - } - break; - } -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// SQ -//------------------------------------------------------------------ -void recVUMI_SQ(VURegs *VU, int info) -{ - s16 imm; - //Console.WriteLn("recVUMI_SQ"); - imm = ( VU->code & 0x400) ? ( VU->code & 0x3ff) | 0xfc00 : ( VU->code & 0x3ff); - if ( _It_ == 0 ) _saveEAX(VU, -1, (uptr)GET_VU_MEM(VU, (int)imm * 16), info); - else { - int itreg = ALLOCVI(_It_, MODE_READ); - _saveEAX(VU, recVUTransformAddr(itreg, VU, _It_, imm), (uptr)VU->Mem, info); - } -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// SQD -//------------------------------------------------------------------ -void recVUMI_SQD(VURegs *VU, int info) -{ - //Console.WriteLn("recVUMI_SQD"); - if (_It_ == 0) _saveEAX(VU, -1, (uptr)VU->Mem, info); - else { - int itreg = ALLOCVI(_It_, MODE_READ|MODE_WRITE); - xSUB(xRegister16(itreg), 1 ); - _saveEAX(VU, recVUTransformAddr(itreg, VU, _It_, 0), (uptr)VU->Mem, info); - } -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// SQI -//------------------------------------------------------------------ -void recVUMI_SQI(VURegs *VU, int info) -{ - //Console.WriteLn("recVUMI_SQI"); - if (_It_ == 0) _saveEAX(VU, -1, (uptr)VU->Mem, info); - else { - int itreg = ALLOCVI(_It_, MODE_READ|MODE_WRITE); - _saveEAX(VU, recVUTransformAddr(itreg, VU, _It_, 0), (uptr)VU->Mem, info); - xADD(xRegister16(itreg), 1 ); - } -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// ILW -//------------------------------------------------------------------ -void recVUMI_ILW(VURegs *VU, int info) -{ - int itreg; - s16 imm, off = 0; - - if ( ( _It_ == 0 ) || ( _X_Y_Z_W == 0 ) ) return; - //Console.WriteLn("recVUMI_ILW"); - imm = ( VU->code & 0x400) ? ( VU->code & 0x3ff) | 0xfc00 : ( VU->code & 0x3ff); - if (_X) off = 0; - else if (_Y) off = 4; - else if (_Z) off = 8; - else if (_W) off = 12; - else pxAssertMsg(0, "off is 0 as default value, could be incorrect"); - - ADD_VI_NEEDED(_Is_); - itreg = ALLOCVI(_It_, MODE_WRITE); - - if ( _Is_ == 0 ) { - xMOVZX(xRegister32(itreg), ptr16[GET_VU_MEM(VU, (int)imm * 16 + off)]); - } - else { - int isreg = ALLOCVI(_Is_, MODE_READ); - xMOV(xRegister32(itreg), ptr[xAddressReg(recVUTransformAddr(isreg, VU, _Is_, imm))+(uptr)VU->Mem + off]); - } -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// ISW -//------------------------------------------------------------------ -void recVUMI_ISW( VURegs *VU, int info ) -{ - s16 imm; - //Console.WriteLn("recVUMI_ISW"); - imm = ( VU->code & 0x400) ? ( VU->code & 0x3ff) | 0xfc00 : ( VU->code & 0x3ff); - - if (_Is_ == 0) { - uptr off = (uptr)GET_VU_MEM(VU, (int)imm * 16); - int itreg = ALLOCVI(_It_, MODE_READ); - - if (_X) xMOV(ptr[(void*)(off)], xRegister32(itreg)); - if (_Y) xMOV(ptr[(void*)(off+4)], xRegister32(itreg)); - if (_Z) xMOV(ptr[(void*)(off+8)], xRegister32(itreg)); - if (_W) xMOV(ptr[(void*)(off+12)], xRegister32(itreg)); - } - else { - int x86reg, isreg, itreg; - - ADD_VI_NEEDED(_It_); - isreg = ALLOCVI(_Is_, MODE_READ); - itreg = ALLOCVI(_It_, MODE_READ); - - x86reg = recVUTransformAddr(isreg, VU, _Is_, imm); - - if (_X) xMOV(ptr[xAddressReg(x86reg)+(uptr)VU->Mem], xRegister32(itreg)); - if (_Y) xMOV(ptr[xAddressReg(x86reg)+(uptr)VU->Mem+4], xRegister32(itreg)); - if (_Z) xMOV(ptr[xAddressReg(x86reg)+(uptr)VU->Mem+8], xRegister32(itreg)); - if (_W) xMOV(ptr[xAddressReg(x86reg)+(uptr)VU->Mem+12], xRegister32(itreg)); - } -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// ILWR -//------------------------------------------------------------------ -void recVUMI_ILWR( VURegs *VU, int info ) -{ - int off = 0; - int itreg = 0; - - if ( ( _It_ == 0 ) || ( _X_Y_Z_W == 0 ) ) return; - //Console.WriteLn("recVUMI_ILWR"); - if (_X) off = 0; - else if (_Y) off = 4; - else if (_Z) off = 8; - else if (_W) off = 12; - else pxAssertMsg(0, "off is 0 as default value, could be incorrect"); - - ADD_VI_NEEDED(_Is_); - itreg = ALLOCVI(_It_, MODE_WRITE); - - if ( _Is_ == 0 ) { - xMOVZX(xRegister32(itreg), ptr16[(u16*)((uptr)VU->Mem + off )]); - } - else { - int isreg = ALLOCVI(_Is_, MODE_READ); - xMOVZX(xRegister32(itreg), ptr16[xAddressReg( recVUTransformAddr(isreg, VU, _Is_, 0) ) + (uptr)VU->Mem + off]); - } -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// ISWR -//------------------------------------------------------------------ -void recVUMI_ISWR( VURegs *VU, int info ) -{ - int itreg; - //Console.WriteLn("recVUMI_ISWR"); - ADD_VI_NEEDED(_Is_); - itreg = ALLOCVI(_It_, MODE_READ); - - if (_Is_ == 0) { - if (_X) xMOV(ptr[(VU->Mem)], xRegister32(itreg)); - if (_Y) xMOV(ptr[(void*)((uptr)VU->Mem+4)], xRegister32(itreg)); - if (_Z) xMOV(ptr[(void*)((uptr)VU->Mem+8)], xRegister32(itreg)); - if (_W) xMOV(ptr[(void*)((uptr)VU->Mem+12)], xRegister32(itreg)); - } - else { - int x86reg; - int isreg = ALLOCVI(_Is_, MODE_READ); - x86reg = recVUTransformAddr(isreg, VU, _Is_, 0); - - if (_X) xMOV(ptr[xAddressReg(x86reg)+(uptr)VU->Mem], xRegister32(itreg)); - if (_Y) xMOV(ptr[xAddressReg(x86reg)+(uptr)VU->Mem+4], xRegister32(itreg)); - if (_Z) xMOV(ptr[xAddressReg(x86reg)+(uptr)VU->Mem+8], xRegister32(itreg)); - if (_W) xMOV(ptr[xAddressReg(x86reg)+(uptr)VU->Mem+12], xRegister32(itreg)); - } -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// RINIT* -//------------------------------------------------------------------ -void recVUMI_RINIT(VURegs *VU, int info) -{ - //Console.WriteLn("recVUMI_RINIT()"); - if( (xmmregs[EEREC_S].mode & MODE_WRITE) && (xmmregs[EEREC_S].mode & MODE_NOFLUSH) ) { - _deleteX86reg(X86TYPE_VI|(VU==&VU1?X86TYPE_VU1:0), REG_R, 2); - _unpackVFSS_xyzw(EEREC_TEMP, EEREC_S, _Fsf_); - - xAND.PS(xRegisterSSE(EEREC_TEMP), ptr[s_mask]); - xOR.PS(xRegisterSSE(EEREC_TEMP), ptr[VU_ONE]); - xMOVSS(ptr[(void*)(VU_REGR_ADDR)], xRegisterSSE(EEREC_TEMP)); - } - else { - int rreg = ALLOCVI(REG_R, MODE_WRITE); - - if( xmmregs[EEREC_S].mode & MODE_WRITE ) { - xMOVAPS(ptr[(&VU->VF[_Fs_])], xRegisterSSE(EEREC_S)); - xmmregs[EEREC_S].mode &= ~MODE_WRITE; - } - - xMOV(xRegister32(rreg), ptr[(void*)(VU_VFx_ADDR( _Fs_ ) + 4 * _Fsf_ )]); - xAND(xRegister32(rreg), 0x7fffff ); - xOR(xRegister32(rreg), 0x7f << 23 ); - - _deleteX86reg(X86TYPE_VI|(VU==&VU1?X86TYPE_VU1:0), REG_R, 1); - } -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// RGET* -//------------------------------------------------------------------ -void recVUMI_RGET(VURegs *VU, int info) -{ - //Console.WriteLn("recVUMI_RGET()"); - if ( (_Ft_ == 0) || (_X_Y_Z_W == 0) ) return; - - _deleteX86reg(X86TYPE_VI|(VU==&VU1?X86TYPE_VU1:0), REG_R, 1); - - if (_X_Y_Z_W != 0xf) { - xMOVSSZX(xRegisterSSE(EEREC_TEMP), ptr[(void*)(VU_REGR_ADDR)]); - xSHUF.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_TEMP), 0); - VU_MERGE_REGS(EEREC_T, EEREC_TEMP); - } - else { - xMOVSSZX(xRegisterSSE(EEREC_T), ptr[(void*)(VU_REGR_ADDR)]); - xSHUF.PS(xRegisterSSE(EEREC_T), xRegisterSSE(EEREC_T), 0); - } -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// RNEXT* -//------------------------------------------------------------------ -void recVUMI_RNEXT( VURegs *VU, int info ) -{ - int rreg, x86temp0, x86temp1; - //Console.WriteLn("recVUMI_RNEXT()"); - - rreg = ALLOCVI(REG_R, MODE_WRITE|MODE_READ); - - x86temp0 = ALLOCTEMPX86(0); - x86temp1 = ALLOCTEMPX86(0); - - // code from www.project-fao.org - //xMOV(xRegister32(rreg), ptr[(void*)(VU_REGR_ADDR)]); - xMOV(xRegister32(x86temp0), xRegister32(rreg)); - xSHR(xRegister32(x86temp0), 4); - xAND(xRegister32(x86temp0), 1); - - xMOV(xRegister32(x86temp1), xRegister32(rreg)); - xSHR(xRegister32(x86temp1), 22); - xAND(xRegister32(x86temp1), 1); - - xSHL(xRegister32(rreg), 1); - xXOR(xRegister32(x86temp0), xRegister32(x86temp1)); - xXOR(xRegister32(rreg), xRegister32(x86temp0)); - xAND(xRegister32(rreg), 0x7fffff); - xOR(xRegister32(rreg), 0x3f800000); - - _freeX86reg(x86temp0); - _freeX86reg(x86temp1); - - if ( (_Ft_ == 0) || (_X_Y_Z_W == 0) ) { - _deleteX86reg(X86TYPE_VI|(VU==&VU1?X86TYPE_VU1:0), REG_R, 1); - return; - } - - recVUMI_RGET(VU, info); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// RXOR* -//------------------------------------------------------------------ -void recVUMI_RXOR( VURegs *VU, int info ) -{ - //Console.WriteLn("recVUMI_RXOR()"); - if( (xmmregs[EEREC_S].mode & MODE_WRITE) && (xmmregs[EEREC_S].mode & MODE_NOFLUSH) ) { - _deleteX86reg(X86TYPE_VI|(VU==&VU1?X86TYPE_VU1:0), REG_R, 1); - _unpackVFSS_xyzw(EEREC_TEMP, EEREC_S, _Fsf_); - - xXOR.PS(xRegisterSSE(EEREC_TEMP), ptr[(void*)(VU_REGR_ADDR)]); - xAND.PS(xRegisterSSE(EEREC_TEMP), ptr[s_mask]); - xOR.PS(xRegisterSSE(EEREC_TEMP), ptr[s_fones]); - xMOVSS(ptr[(void*)(VU_REGR_ADDR)], xRegisterSSE(EEREC_TEMP)); - } - else { - int rreg = ALLOCVI(REG_R, MODE_WRITE|MODE_READ); - - if( xmmregs[EEREC_S].mode & MODE_WRITE ) { - xMOVAPS(ptr[(&VU->VF[_Fs_])], xRegisterSSE(EEREC_S)); - xmmregs[EEREC_S].mode &= ~MODE_WRITE; - } - - xXOR(xRegister32(rreg), ptr[(void*)(VU_VFx_ADDR( _Fs_ ) + 4 * _Fsf_ )]); - xAND(xRegister32(rreg), 0x7fffff ); - xOR(xRegister32(rreg), 0x3f800000 ); - - _deleteX86reg(X86TYPE_VI|(VU==&VU1?X86TYPE_VU1:0), REG_R, 1); - } -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// WAITQ -//------------------------------------------------------------------ -void recVUMI_WAITQ( VURegs *VU, int info ) -{ - //Console.WriteLn("recVUMI_WAITQ"); -// if( info & PROCESS_VU_SUPER ) { -// //xCALL((void*)waitqfn); -// SuperVUFlush(0, 1); -// } -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// FSAND -//------------------------------------------------------------------ -void recVUMI_FSAND( VURegs *VU, int info ) -{ - int itreg; - u16 imm; - //Console.WriteLn("recVUMI_FSAND"); - imm = (((VU->code >> 21 ) & 0x1) << 11) | (VU->code & 0x7ff); - if(_It_ == 0) return; - - itreg = ALLOCVI(_It_, MODE_WRITE); - xMOV(xRegister32(itreg), ptr[(void*)(VU_VI_ADDR(REG_STATUS_FLAG, 1))]); - xAND(xRegister32(itreg), imm ); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// FSEQ -//------------------------------------------------------------------ -void recVUMI_FSEQ( VURegs *VU, int info ) -{ - int itreg; - u16 imm; - if ( _It_ == 0 ) return; - //Console.WriteLn("recVUMI_FSEQ"); - imm = (((VU->code >> 21 ) & 0x1) << 11) | (VU->code & 0x7ff); - - itreg = ALLOCVI(_It_, MODE_WRITE|MODE_8BITREG); - - xMOVZX(eax, ptr16[(u16*)(VU_VI_ADDR(REG_STATUS_FLAG, 1))]); - xXOR(xRegister32(itreg), xRegister32(itreg)); - xCMP(ax, imm); - xSETE(xRegister8(itreg)); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// FSOR -//------------------------------------------------------------------ -void recVUMI_FSOR( VURegs *VU, int info ) -{ - int itreg; - u32 imm; - if(_It_ == 0) return; - //Console.WriteLn("recVUMI_FSOR"); - imm = (((VU->code >> 21 ) & 0x1) << 11) | (VU->code & 0x7ff); - - itreg = ALLOCVI(_It_, MODE_WRITE); - - xMOVZX(xRegister32(itreg), ptr16[(u16*)(VU_VI_ADDR(REG_STATUS_FLAG, 1))]); - xOR(xRegister32(itreg), imm ); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// FSSET -//------------------------------------------------------------------ -void recVUMI_FSSET(VURegs *VU, int info) -{ - u32 writeaddr = VU_VI_ADDR(REG_STATUS_FLAG, 0); - u32 prevaddr = VU_VI_ADDR(REG_STATUS_FLAG, 2); - - u16 imm = 0; - //Console.WriteLn("recVUMI_FSSET"); - imm = (((VU->code >> 21 ) & 0x1) << 11) | (VU->code & 0x7FF); - - // keep the low 6 bits ONLY if the upper instruction is an fmac instruction (otherwise rewrite) - metal gear solid 3 - //if( (info & PROCESS_VU_SUPER) && VUREC_FMAC ) { - xMOV(eax, ptr[(void*)(prevaddr)]); - xAND(eax, 0x3f); - if ((imm&0xfc0) != 0) xOR(eax, imm & 0xFC0); - xMOV(ptr[(void*)(writeaddr ? writeaddr : prevaddr)], eax); - //} - //else { - // xMOV(ptr32[(u32*)(writeaddr ? writeaddr : prevaddr)], imm&0xfc0); - //} -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// FMAND -//------------------------------------------------------------------ -void recVUMI_FMAND( VURegs *VU, int info ) -{ - int isreg, itreg; - if ( _It_ == 0 ) return; - //Console.WriteLn("recVUMI_FMAND"); - isreg = _checkX86reg(X86TYPE_VI|(VU==&VU1?X86TYPE_VU1:0), _Is_, MODE_READ); - itreg = ALLOCVI(_It_, MODE_WRITE);//|MODE_8BITREG); - - if( isreg >= 0 ) { - if( itreg != isreg ) xMOV(xRegister32(itreg), xRegister32(isreg)); - } - else xMOVZX(xRegister32(itreg), ptr16[(u16*)(VU_VI_ADDR(_Is_, 1))]); - - xAND(xRegister16(itreg), ptr[(void*)(VU_VI_ADDR(REG_MAC_FLAG, 1))]); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// FMEQ -//------------------------------------------------------------------ -void recVUMI_FMEQ( VURegs *VU, int info ) -{ - int itreg, isreg; - if ( _It_ == 0 ) return; - //Console.WriteLn("recVUMI_FMEQ"); - if( _It_ == _Is_ ) { - itreg = ALLOCVI(_It_, MODE_WRITE|MODE_READ);//|MODE_8BITREG - - xCMP(xRegister16(itreg), ptr[(void*)(VU_VI_ADDR(REG_MAC_FLAG, 1))]); - xSETE(al); - xMOVZX(xRegister32(itreg), al); - } - else { - ADD_VI_NEEDED(_Is_); - itreg = ALLOCVI(_It_, MODE_WRITE|MODE_8BITREG); - isreg = ALLOCVI(_Is_, MODE_READ); - - xXOR(xRegister32(itreg), xRegister32(itreg)); - - xCMP(xRegister16(isreg), ptr[(void*)(VU_VI_ADDR(REG_MAC_FLAG, 1))]); - xSETE(xRegister8(itreg)); - } -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// FMOR -//------------------------------------------------------------------ -void recVUMI_FMOR( VURegs *VU, int info ) -{ - int isreg, itreg; - if ( _It_ == 0 ) return; - //Console.WriteLn("recVUMI_FMOR"); - if( _Is_ == 0 ) { - itreg = ALLOCVI(_It_, MODE_WRITE);//|MODE_8BITREG); - xMOVZX(xRegister32(itreg), ptr16[(u16*)(VU_VI_ADDR(REG_MAC_FLAG, 1))]); - } - else if( _It_ == _Is_ ) { - itreg = ALLOCVI(_It_, MODE_WRITE|MODE_READ);//|MODE_8BITREG); - xOR(xRegister16(itreg), ptr[(void*)(VU_VI_ADDR(REG_MAC_FLAG, 1))]); - } - else { - isreg = _checkX86reg(X86TYPE_VI|(VU==&VU1?X86TYPE_VU1:0), _Is_, MODE_READ); - itreg = ALLOCVI(_It_, MODE_WRITE); - - xMOVZX(xRegister32(itreg), ptr16[(u16*)(VU_VI_ADDR(REG_MAC_FLAG, 1))]); - - if( isreg >= 0 ) - xOR(xRegister16(itreg), xRegister16(isreg )); - else - xOR(xRegister16(itreg), ptr[(void*)(VU_VI_ADDR(_Is_, 1))]); - } -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// FCAND -//------------------------------------------------------------------ -void recVUMI_FCAND( VURegs *VU, int info ) -{ - int itreg = ALLOCVI(1, MODE_WRITE|MODE_8BITREG); - //Console.WriteLn("recVUMI_FCAND"); - xMOV(eax, ptr[(void*)(VU_VI_ADDR(REG_CLIP_FLAG, 1))]); - xXOR(xRegister32(itreg), xRegister32(itreg )); - xAND(eax, VU->code & 0xFFFFFF ); - - xSETNZ(xRegister8(itreg)); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// FCEQ -//------------------------------------------------------------------ -void recVUMI_FCEQ( VURegs *VU, int info ) -{ - int itreg = ALLOCVI(1, MODE_WRITE|MODE_8BITREG); - //Console.WriteLn("recVUMI_FCEQ"); - xMOV(eax, ptr[(void*)(VU_VI_ADDR(REG_CLIP_FLAG, 1))]); - xAND(eax, 0xffffff ); - xXOR(xRegister32(itreg), xRegister32(itreg )); - xCMP(eax, VU->code&0xffffff ); - - xSETE(xRegister8(itreg)); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// FCOR -//------------------------------------------------------------------ -void recVUMI_FCOR( VURegs *VU, int info ) -{ - int itreg; - //Console.WriteLn("recVUMI_FCOR"); - itreg = ALLOCVI(1, MODE_WRITE); - xMOV(xRegister32(itreg), ptr[(void*)(VU_VI_ADDR(REG_CLIP_FLAG, 1))]); - xOR(xRegister32(itreg), VU->code ); - xAND(xRegister32(itreg), 0xffffff ); - xADD(xRegister32(itreg), 1 ); // If 24 1's will make 25th bit 1, else 0 - xSHR(xRegister32(itreg), 24 ); // Get the 25th bit (also clears the rest of the garbage in the reg) -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// FCSET -//------------------------------------------------------------------ -void recVUMI_FCSET( VURegs *VU, int info ) -{ - u32 addr = VU_VI_ADDR(REG_CLIP_FLAG, 0); - //Console.WriteLn("recVUMI_FCSET"); - xMOV(ptr32[(u32*)(addr ? addr : VU_VI_ADDR(REG_CLIP_FLAG, 2))], VU->code&0xffffff); - - if( !(info & (PROCESS_VU_SUPER|PROCESS_VU_COP2)) ) - xMOV(ptr32[(u32*)(VU_VI_ADDR(REG_CLIP_FLAG, 1))], VU->code&0xffffff ); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// FCGET -//------------------------------------------------------------------ -void recVUMI_FCGET( VURegs *VU, int info ) -{ - int itreg; - if(_It_ == 0) return; - //Console.WriteLn("recVUMI_FCGET"); - itreg = ALLOCVI(_It_, MODE_WRITE); - - xMOV(xRegister32(itreg), ptr[(void*)(VU_VI_ADDR(REG_CLIP_FLAG, 1))]); - xAND(xRegister32(itreg), 0x0fff); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// _recbranchAddr() -// -// NOTE: Due to static var dependencies, several SuperVU branch instructions -// are still located in iVUzerorec.cpp. -//------------------------------------------------------------------ - -//------------------------------------------------------------------ -// MFP* -//------------------------------------------------------------------ -void recVUMI_MFP(VURegs *VU, int info) -{ - if ( (_Ft_ == 0) || (_X_Y_Z_W == 0) ) return; - //Console.WriteLn("recVUMI_MFP"); - if( _XYZW_SS ) { - _vuFlipRegSS(VU, EEREC_T); - xMOVSSZX(xRegisterSSE(EEREC_TEMP), ptr[(void*)(VU_VI_ADDR(REG_P, 1))]); - xMOVSS(xRegisterSSE(EEREC_T), xRegisterSSE(EEREC_TEMP)); - _vuFlipRegSS(VU, EEREC_T); - } - else if (_X_Y_Z_W != 0xf) { - xMOVSSZX(xRegisterSSE(EEREC_TEMP), ptr[(void*)(VU_VI_ADDR(REG_P, 1))]); - xSHUF.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_TEMP), 0); - VU_MERGE_REGS(EEREC_T, EEREC_TEMP); - } - else { - xMOVSSZX(xRegisterSSE(EEREC_T), ptr[(void*)(VU_VI_ADDR(REG_P, 1))]); - xSHUF.PS(xRegisterSSE(EEREC_T), xRegisterSSE(EEREC_T), 0); - } -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// WAITP -//------------------------------------------------------------------ -static __aligned16 float s_tempmem[4]; -void recVUMI_WAITP(VURegs *VU, int info) -{ - //Console.WriteLn("recVUMI_WAITP"); -// if( info & PROCESS_VU_SUPER ) -// SuperVUFlush(1, 1); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// vuSqSumXYZ()* -// -// NOTE: In all EFU insts, EEREC_D is a temp reg -//------------------------------------------------------------------ -void vuSqSumXYZ(int regd, int regs, int regtemp) // regd.x = x ^ 2 + y ^ 2 + z ^ 2 -{ - //Console.WriteLn("VU: SUMXYZ"); - if( x86caps.hasStreamingSIMD4Extensions ) - { - xMOVAPS(xRegisterSSE(regd), xRegisterSSE(regs)); - if (CHECK_VU_EXTRA_OVERFLOW) vuFloat2(regd, regtemp, 0xf); - xDP.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0x71); - } - else - { - xMOVAPS(xRegisterSSE(regtemp), xRegisterSSE(regs)); - if (CHECK_VU_EXTRA_OVERFLOW) vuFloat2(regtemp, regd, 0xf); - xMUL.PS(xRegisterSSE(regtemp), xRegisterSSE(regtemp)); // xyzw ^ 2 - - if( x86caps.hasStreamingSIMD3Extensions ) { - xHADD.PS(xRegisterSSE(regd), xRegisterSSE(regtemp)); - xADD.PS(xRegisterSSE(regd), xRegisterSSE(regtemp)); // regd.z = x ^ 2 + y ^ 2 + z ^ 2 - xMOVHL.PS(xRegisterSSE(regd), xRegisterSSE(regd)); // regd.x = regd.z - } - else { - xMOVSS(xRegisterSSE(regd), xRegisterSSE(regtemp)); - xPSHUF.LW(xRegisterSSE(regtemp), xRegisterSSE(regtemp), 0x4e); // wzyx -> wzxy - xADD.SS(xRegisterSSE(regd), xRegisterSSE(regtemp)); // x ^ 2 + y ^ 2 - xSHUF.PS(xRegisterSSE(regtemp), xRegisterSSE(regtemp), 0xD2); // wzxy -> wxyz - xADD.SS(xRegisterSSE(regd), xRegisterSSE(regtemp)); // x ^ 2 + y ^ 2 + z ^ 2 - } - } -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// ESADD* -//------------------------------------------------------------------ -void recVUMI_ESADD( VURegs *VU, int info) -{ - //Console.WriteLn("VU: ESADD"); - pxAssert( VU == &VU1 ); - if( EEREC_TEMP == EEREC_D ) { // special code to reset P ( FixMe: don't know if this is still needed! (cottonvibes) ) - Console.Warning("ESADD: Resetting P reg!!!\n"); - xMOV(ptr32[(u32*)(VU_VI_ADDR(REG_P, 0))], 0); - return; - } - vuSqSumXYZ(EEREC_D, EEREC_S, EEREC_TEMP); - if (CHECK_VU_OVERFLOW) xMIN.SS(xRegisterSSE(EEREC_D), ptr[g_maxvals]); // Only need to do positive clamp since (x ^ 2 + y ^ 2 + z ^ 2) is positive - xMOVSS(ptr[(void*)(VU_VI_ADDR(REG_P, 0))], xRegisterSSE(EEREC_D)); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// ERSADD* -//------------------------------------------------------------------ -void recVUMI_ERSADD( VURegs *VU, int info ) -{ - //Console.WriteLn("VU: ERSADD"); - pxAssert( VU == &VU1 ); - vuSqSumXYZ(EEREC_D, EEREC_S, EEREC_TEMP); - // don't use RCPSS (very bad precision) - xMOVSSZX(xRegisterSSE(EEREC_TEMP), ptr[VU_ONE]); - xDIV.SS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_D)); - if (CHECK_VU_OVERFLOW) xMIN.SS(xRegisterSSE(EEREC_TEMP), ptr[g_maxvals]); // Only need to do positive clamp since (x ^ 2 + y ^ 2 + z ^ 2) is positive - xMOVSS(ptr[(void*)(VU_VI_ADDR(REG_P, 0))], xRegisterSSE(EEREC_TEMP)); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// ELENG* -//------------------------------------------------------------------ -void recVUMI_ELENG( VURegs *VU, int info ) -{ - //Console.WriteLn("VU: ELENG"); - pxAssert( VU == &VU1 ); - vuSqSumXYZ(EEREC_D, EEREC_S, EEREC_TEMP); - if (CHECK_VU_OVERFLOW) xMIN.SS(xRegisterSSE(EEREC_D), ptr[g_maxvals]); // Only need to do positive clamp since (x ^ 2 + y ^ 2 + z ^ 2) is positive - xSQRT.SS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); - xMOVSS(ptr[(void*)(VU_VI_ADDR(REG_P, 0))], xRegisterSSE(EEREC_D)); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// ERLENG* -//------------------------------------------------------------------ -void recVUMI_ERLENG( VURegs *VU, int info ) -{ - //Console.WriteLn("VU: ERLENG"); - pxAssert( VU == &VU1 ); - vuSqSumXYZ(EEREC_D, EEREC_S, EEREC_TEMP); - if (CHECK_VU_OVERFLOW) xMIN.SS(xRegisterSSE(EEREC_D), ptr[g_maxvals]); // Only need to do positive clamp since (x ^ 2 + y ^ 2 + z ^ 2) is positive - xSQRT.SS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); // regd <- sqrt(x^2 + y^2 + z^2) - xMOVSSZX(xRegisterSSE(EEREC_TEMP), ptr[VU_ONE]); // temp <- 1 - xDIV.SS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_D)); // temp = 1 / sqrt(x^2 + y^2 + z^2) - if (CHECK_VU_OVERFLOW) xMIN.SS(xRegisterSSE(EEREC_TEMP), ptr[g_maxvals]); // Only need to do positive clamp - xMOVSS(ptr[(void*)(VU_VI_ADDR(REG_P, 0))], xRegisterSSE(EEREC_TEMP)); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// EATANxy -//------------------------------------------------------------------ -void recVUMI_EATANxy( VURegs *VU, int info ) -{ - pxAssert( VU == &VU1 ); - //Console.WriteLn("recVUMI_EATANxy"); - if( (xmmregs[EEREC_S].mode & MODE_WRITE) && (xmmregs[EEREC_S].mode&MODE_NOFLUSH) ) { - xMOVL.PS(ptr[s_tempmem], xRegisterSSE(EEREC_S)); - FLD32((uptr)&s_tempmem[0]); - FLD32((uptr)&s_tempmem[1]); - } - else { - if( xmmregs[EEREC_S].mode & MODE_WRITE ) { - xMOVAPS(ptr[(&VU->VF[_Fs_])], xRegisterSSE(EEREC_S)); - xmmregs[EEREC_S].mode &= ~MODE_WRITE; - } - - FLD32((uptr)&VU->VF[_Fs_].UL[0]); - FLD32((uptr)&VU->VF[_Fs_].UL[1]); - } - - FPATAN(); - FSTP32(VU_VI_ADDR(REG_P, 0)); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// EATANxz -//------------------------------------------------------------------ -void recVUMI_EATANxz( VURegs *VU, int info ) -{ - pxAssert( VU == &VU1 ); - //Console.WriteLn("recVUMI_EATANxz"); - if( (xmmregs[EEREC_S].mode & MODE_WRITE) && (xmmregs[EEREC_S].mode&MODE_NOFLUSH) ) { - xMOVL.PS(ptr[s_tempmem], xRegisterSSE(EEREC_S)); - FLD32((uptr)&s_tempmem[0]); - FLD32((uptr)&s_tempmem[2]); - } - else { - if( xmmregs[EEREC_S].mode & MODE_WRITE ) { - xMOVAPS(ptr[(&VU->VF[_Fs_])], xRegisterSSE(EEREC_S)); - xmmregs[EEREC_S].mode &= ~MODE_WRITE; - } - - FLD32((uptr)&VU->VF[_Fs_].UL[0]); - FLD32((uptr)&VU->VF[_Fs_].UL[2]); - } - FPATAN(); - FSTP32(VU_VI_ADDR(REG_P, 0)); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// ESUM* -//------------------------------------------------------------------ -void recVUMI_ESUM( VURegs *VU, int info ) -{ - //Console.WriteLn("VU: ESUM"); - pxAssert( VU == &VU1 ); - - if( x86caps.hasStreamingSIMD3Extensions ) { - xMOVAPS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - if (CHECK_VU_EXTRA_OVERFLOW) vuFloat_useEAX(info, EEREC_TEMP, 0xf); - xHADD.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_TEMP)); - xHADD.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_TEMP)); - } - else { - xMOVHL.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); // z, w, z, w - xADD.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); // z+x, w+y, z+z, w+w - xUNPCK.LPS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_TEMP)); // z+x, z+x, w+y, w+y - xMOVHL.PS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_TEMP)); // w+y, w+y, w+y, w+y - xADD.SS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_D)); // x+y+z+w, w+y, w+y, w+y - } - - vuFloat_useEAX(info, EEREC_TEMP, 8); - xMOVSS(ptr[(void*)(VU_VI_ADDR(REG_P, 0))], xRegisterSSE(EEREC_TEMP)); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// ERCPR* -//------------------------------------------------------------------ -void recVUMI_ERCPR( VURegs *VU, int info ) -{ - pxAssert( VU == &VU1 ); - //Console.WriteLn("VU1: ERCPR"); - - // don't use RCPSS (very bad precision) - switch ( _Fsf_ ) { - case 0: //0001 - if (CHECK_VU_EXTRA_OVERFLOW) vuFloat5_useEAX(EEREC_S, EEREC_TEMP, 8); - xMOVSSZX(xRegisterSSE(EEREC_TEMP), ptr[VU_ONE]); // temp <- 1 - xDIV.SS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - break; - case 1: //0010 - xPSHUF.LW(xRegisterSSE(EEREC_S), xRegisterSSE(EEREC_S), 0x4e); - if (CHECK_VU_EXTRA_OVERFLOW) vuFloat5_useEAX(EEREC_S, EEREC_TEMP, 8); - xMOVSSZX(xRegisterSSE(EEREC_TEMP), ptr[VU_ONE]); // temp <- 1 - xDIV.SS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - xPSHUF.LW(xRegisterSSE(EEREC_S), xRegisterSSE(EEREC_S), 0x4e); - break; - case 2: //0100 - xSHUF.PS(xRegisterSSE(EEREC_S), xRegisterSSE(EEREC_S), 0xc6); - if (CHECK_VU_EXTRA_OVERFLOW) vuFloat5_useEAX(EEREC_S, EEREC_TEMP, 8); - xMOVSSZX(xRegisterSSE(EEREC_TEMP), ptr[VU_ONE]); // temp <- 1 - xDIV.SS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - xSHUF.PS(xRegisterSSE(EEREC_S), xRegisterSSE(EEREC_S), 0xc6); - break; - case 3: //1000 - xSHUF.PS(xRegisterSSE(EEREC_S), xRegisterSSE(EEREC_S), 0x27); - if (CHECK_VU_EXTRA_OVERFLOW) vuFloat5_useEAX(EEREC_S, EEREC_TEMP, 8); - xMOVSSZX(xRegisterSSE(EEREC_TEMP), ptr[VU_ONE]); // temp <- 1 - xDIV.SS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - xSHUF.PS(xRegisterSSE(EEREC_S), xRegisterSSE(EEREC_S), 0x27); - break; - } - - vuFloat_useEAX(info, EEREC_TEMP, 8); - xMOVSS(ptr[(void*)(VU_VI_ADDR(REG_P, 0))], xRegisterSSE(EEREC_TEMP)); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// ESQRT* -//------------------------------------------------------------------ -void recVUMI_ESQRT( VURegs *VU, int info ) -{ - pxAssert( VU == &VU1 ); - - //Console.WriteLn("VU1: ESQRT"); - _unpackVFSS_xyzw(EEREC_TEMP, EEREC_S, _Fsf_); - xAND.PS(xRegisterSSE(EEREC_TEMP), ptr[const_clip]); // abs(x) - if (CHECK_VU_OVERFLOW) xMIN.SS(xRegisterSSE(EEREC_TEMP), ptr[g_maxvals]); // Only need to do positive clamp - xSQRT.SS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_TEMP)); - - xMOVSS(ptr[(void*)(VU_VI_ADDR(REG_P, 0))], xRegisterSSE(EEREC_TEMP)); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// ERSQRT* -//------------------------------------------------------------------ -void recVUMI_ERSQRT( VURegs *VU, int info ) -{ - int t1reg = _vuGetTempXMMreg(info); - - pxAssert( VU == &VU1 ); - //Console.WriteLn("VU1: ERSQRT"); - - _unpackVFSS_xyzw(EEREC_TEMP, EEREC_S, _Fsf_); - xAND.PS(xRegisterSSE(EEREC_TEMP), ptr[const_clip]); // abs(x) - xMIN.SS(xRegisterSSE(EEREC_TEMP), ptr[g_maxvals]); // Clamp Infinities to Fmax - xSQRT.SS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_TEMP)); // SQRT(abs(x)) - - if( t1reg >= 0 ) - { - xMOVSSZX(xRegisterSSE(t1reg), ptr[VU_ONE]); - xDIV.SS(xRegisterSSE(t1reg), xRegisterSSE(EEREC_TEMP)); - vuFloat_useEAX(info, t1reg, 8); - xMOVSS(ptr[(void*)(VU_VI_ADDR(REG_P, 0))], xRegisterSSE(t1reg)); - _freeXMMreg(t1reg); - } - else - { - xMOVSS(ptr[(void*)(VU_VI_ADDR(REG_P, 0))], xRegisterSSE(EEREC_TEMP)); - xMOVSSZX(xRegisterSSE(EEREC_TEMP), ptr[VU_ONE]); - xDIV.SS(xRegisterSSE(EEREC_TEMP), ptr[(void*)(VU_VI_ADDR(REG_P, 0))]); - vuFloat_useEAX(info, EEREC_TEMP, 8); - xMOVSS(ptr[(void*)(VU_VI_ADDR(REG_P, 0))], xRegisterSSE(EEREC_TEMP)); - } -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// ESIN -//------------------------------------------------------------------ -void recVUMI_ESIN( VURegs *VU, int info ) -{ - pxAssert( VU == &VU1 ); - - //Console.WriteLn("recVUMI_ESIN"); - if( (xmmregs[EEREC_S].mode & MODE_WRITE) && (xmmregs[EEREC_S].mode&MODE_NOFLUSH) ) { - switch(_Fsf_) { - case 0: xMOVSS(ptr[s_tempmem], xRegisterSSE(EEREC_S)); break; - case 1: xMOVL.PS(ptr[s_tempmem], xRegisterSSE(EEREC_S)); break; - default: xMOVH.PS(ptr[&s_tempmem[2]], xRegisterSSE(EEREC_S)); break; - } - FLD32((uptr)&s_tempmem[_Fsf_]); - } - else { - if( xmmregs[EEREC_S].mode & MODE_WRITE ) { - xMOVAPS(ptr[(&VU->VF[_Fs_])], xRegisterSSE(EEREC_S)); - xmmregs[EEREC_S].mode &= ~MODE_WRITE; - } - - FLD32((uptr)&VU->VF[_Fs_].UL[_Fsf_]); - } - - FSIN(); - FSTP32(VU_VI_ADDR(REG_P, 0)); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// EATAN -//------------------------------------------------------------------ -void recVUMI_EATAN( VURegs *VU, int info ) -{ - pxAssert( VU == &VU1 ); - - //Console.WriteLn("recVUMI_EATAN"); - if( (xmmregs[EEREC_S].mode & MODE_WRITE) && (xmmregs[EEREC_S].mode&MODE_NOFLUSH) ) { - switch(_Fsf_) { - case 0: xMOVSS(ptr[s_tempmem], xRegisterSSE(EEREC_S)); break; - case 1: xMOVL.PS(ptr[s_tempmem], xRegisterSSE(EEREC_S)); break; - default: xMOVH.PS(ptr[&s_tempmem[2]], xRegisterSSE(EEREC_S)); break; - } - FLD32((uptr)&s_tempmem[_Fsf_]); - } - else { - if( xmmregs[EEREC_S].mode & MODE_WRITE ) { - xMOVAPS(ptr[(&VU->VF[_Fs_])], xRegisterSSE(EEREC_S)); - xmmregs[EEREC_S].mode &= ~MODE_WRITE; - } - } - - FLD1(); - FLD32((uptr)&VU->VF[_Fs_].UL[_Fsf_]); - FPATAN(); - FSTP32(VU_VI_ADDR(REG_P, 0)); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// EEXP -//------------------------------------------------------------------ -void recVUMI_EEXP( VURegs *VU, int info ) -{ - pxAssert( VU == &VU1 ); - //Console.WriteLn("recVUMI_EEXP"); - FLDL2E(); - - if( (xmmregs[EEREC_S].mode & MODE_WRITE) && (xmmregs[EEREC_S].mode&MODE_NOFLUSH) ) { - switch(_Fsf_) { - case 0: xMOVSS(ptr[s_tempmem], xRegisterSSE(EEREC_S)); break; - case 1: xMOVL.PS(ptr[s_tempmem], xRegisterSSE(EEREC_S)); break; - default: xMOVH.PS(ptr[&s_tempmem[2]], xRegisterSSE(EEREC_S)); break; - } - FMUL32((uptr)&s_tempmem[_Fsf_]); - } - else { - if( xmmregs[EEREC_S].mode & MODE_WRITE ) { - xMOVAPS(ptr[(&VU->VF[_Fs_])], xRegisterSSE(EEREC_S)); - xmmregs[EEREC_S].mode &= ~MODE_WRITE; - } - - FMUL32((uptr)&VU->VF[_Fs_].UL[_Fsf_]); - } - - // basically do 2^(log_2(e) * val) - FLD(0); - FRNDINT(); - FXCH(1); - FSUB32Rto0(1); - F2XM1(); - FLD1(); - FADD320toR(1); - FSCALE(); - FSTP(1); - - FSTP32(VU_VI_ADDR(REG_P, 0)); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// XITOP -//------------------------------------------------------------------ -void recVUMI_XITOP( VURegs *VU, int info ) -{ - int itreg; - if (_It_ == 0) return; - //Console.WriteLn("recVUMI_XITOP"); - itreg = ALLOCVI(_It_, MODE_WRITE); - xMOVZX(xRegister32(itreg), ptr16[(u16*)((uptr)&VU->GetVifRegs().itop )]); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// XTOP -//------------------------------------------------------------------ -void recVUMI_XTOP( VURegs *VU, int info ) -{ - int itreg; - if ( _It_ == 0 ) return; - //Console.WriteLn("recVUMI_XTOP"); - itreg = ALLOCVI(_It_, MODE_WRITE); - xMOVZX(xRegister32(itreg), ptr16[(u16*)((uptr)&VU->GetVifRegs().top )]); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// VU1XGKICK_MTGSTransfer() - Called by ivuZerorec.cpp -//------------------------------------------------------------------ - -void __fastcall VU1XGKICK_MTGSTransfer(u32 addr) -{ - addr &= 0x3fff; - u32 diff = 0x4000 - addr; - u32 size = gifUnit.GetGSPacketSize(GIF_PATH_1, vuRegs[1].Mem, addr); - - if (size > diff) { - //DevCon.WriteLn(Color_Green, "superVU1: XGkick Wrap!"); - gifUnit.gifPath[0].CopyGSPacketData(&vuRegs[1].Mem[addr], diff, true); - gifUnit.TransferGSPacketData(GIF_TRANS_XGKICK, &vuRegs[1].Mem[0],size-diff,true); - } - else { - gifUnit.TransferGSPacketData(GIF_TRANS_XGKICK, &vuRegs[1].Mem[addr], size, true); - } -} -//------------------------------------------------------------------ diff --git a/pcsx2/x86/sVU_Micro.cpp b/pcsx2/x86/sVU_Micro.cpp deleted file mode 100644 index 4b426235d7..0000000000 --- a/pcsx2/x86/sVU_Micro.cpp +++ /dev/null @@ -1,1742 +0,0 @@ -/* PCSX2 - PS2 Emulator for PCs - * Copyright (C) 2002-2010 PCSX2 Dev Team - * - * PCSX2 is free software: you can redistribute it and/or modify it under the terms - * of the GNU Lesser General Public License as published by the Free Software Found- - * ation, either version 3 of the License, or (at your option) any later version. - * - * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; - * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR - * PURPOSE. See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along with PCSX2. - * If not, see . - */ - -#include "PrecompiledHeader.h" - -#include "Common.h" -#include "GS.h" -#include "R5900OpcodeTables.h" -#include "iR5900.h" -#include "iMMI.h" -#include "iFPU.h" -#include "iCOP0.h" -#include "VUmicro.h" -#include "VUflags.h" -#include "sVU_Micro.h" -#include "sVU_zerorec.h" - -using namespace x86Emitter; - -#ifdef _WIN32 -#pragma warning(disable:4244) -#pragma warning(disable:4761) -#endif -//------------------------------------------------------------------ - -// fixme - VUmicro should really use its own static vars for pc and branch. -// Sharing with the EE's copies of pc and branch is not cool! (air) - -//------------------------------------------------------------------ -// Helper Macros -//------------------------------------------------------------------ -#define _Ft_ (( VU->code >> 16) & 0x1F) // The rt part of the instruction register -#define _Fs_ (( VU->code >> 11) & 0x1F) // The rd part of the instruction register -#define _Fd_ (( VU->code >> 6) & 0x1F) // The sa part of the instruction register -#define _It_ (_Ft_ & 15) -#define _Is_ (_Fs_ & 15) -#define _Id_ (_Fd_ & 15) - -#define _X (( VU->code>>24) & 0x1) -#define _Y (( VU->code>>23) & 0x1) -#define _Z (( VU->code>>22) & 0x1) -#define _W (( VU->code>>21) & 0x1) - -#define _XYZW_SS (_X+_Y+_Z+_W==1) - -#define _Fsf_ (( VU->code >> 21) & 0x03) -#define _Ftf_ (( VU->code >> 23) & 0x03) - -#define _Imm11_ (s32)(VU->code & 0x400 ? 0xfffffc00 | (VU->code & 0x3ff) : VU->code & 0x3ff) -#define _UImm11_ (s32)(VU->code & 0x7ff) - -#define VU_VFx_ADDR(x) (uptr)&VU->VF[x].UL[0] -#define VU_VFy_ADDR(x) (uptr)&VU->VF[x].UL[1] -#define VU_VFz_ADDR(x) (uptr)&VU->VF[x].UL[2] -#define VU_VFw_ADDR(x) (uptr)&VU->VF[x].UL[3] - -#define VU_REGR_ADDR (uptr)&VU->VI[REG_R] -#define VU_REGQ_ADDR (uptr)&VU->VI[REG_Q] -#define VU_REGMAC_ADDR (uptr)&VU->VI[REG_MAC_FLAG] - -#define VU_VI_ADDR(x, read) GetVIAddr(VU, x, read, info) - -#define VU_ACCx_ADDR (uptr)&VU->ACC.UL[0] -#define VU_ACCy_ADDR (uptr)&VU->ACC.UL[1] -#define VU_ACCz_ADDR (uptr)&VU->ACC.UL[2] -#define VU_ACCw_ADDR (uptr)&VU->ACC.UL[3] - -#define _X_Y_Z_W ((( VU->code >> 21 ) & 0xF ) ) -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// Global Variables -//------------------------------------------------------------------ -int vucycle; - -const __aligned16 float s_fones[8] = {1.0f, 1.0f, 1.0f, 1.0f, -1.0f, -1.0f, -1.0f, -1.0f}; -const __aligned16 u32 s_mask[4] = {0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff}; -const __aligned16 u32 s_expmask[4] = {0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000}; -const __aligned16 u32 const_clip[8] = {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, - 0x80000000, 0x80000000, 0x80000000, 0x80000000}; - -const __aligned(64) u32 g_ones[4] = {0x00000001, 0x00000001, 0x00000001, 0x00000001}; - -const __aligned16 u32 g_minvals_XYZW[16][4] = -{ - { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }, //0000 - { 0xffffffff, 0xffffffff, 0xffffffff, 0xff7fffff }, //0001 - { 0xffffffff, 0xffffffff, 0xff7fffff, 0xffffffff }, //0010 - { 0xffffffff, 0xffffffff, 0xff7fffff, 0xff7fffff }, //0011 - { 0xffffffff, 0xff7fffff, 0xffffffff, 0xffffffff }, //0100 - { 0xffffffff, 0xff7fffff, 0xffffffff, 0xff7fffff }, //0101 - { 0xffffffff, 0xff7fffff, 0xff7fffff, 0xffffffff }, //0110 - { 0xffffffff, 0xff7fffff, 0xff7fffff, 0xff7fffff }, //0111 - { 0xff7fffff, 0xffffffff, 0xffffffff, 0xffffffff }, //1000 - { 0xff7fffff, 0xffffffff, 0xffffffff, 0xff7fffff }, //1001 - { 0xff7fffff, 0xffffffff, 0xff7fffff, 0xffffffff }, //1010 - { 0xff7fffff, 0xffffffff, 0xff7fffff, 0xff7fffff }, //1011 - { 0xff7fffff, 0xff7fffff, 0xffffffff, 0xffffffff }, //1100 - { 0xff7fffff, 0xff7fffff, 0xffffffff, 0xff7fffff }, //1101 - { 0xff7fffff, 0xff7fffff, 0xff7fffff, 0xffffffff }, //1110 - { 0xff7fffff, 0xff7fffff, 0xff7fffff, 0xff7fffff }, //1111 -}; -const __aligned16 u32 g_maxvals_XYZW[16][4] = -{ - { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }, //0000 - { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7f7fffff }, //0001 - { 0x7fffffff, 0x7fffffff, 0x7f7fffff, 0x7fffffff }, //0010 - { 0x7fffffff, 0x7fffffff, 0x7f7fffff, 0x7f7fffff }, //0011 - { 0x7fffffff, 0x7f7fffff, 0x7fffffff, 0x7fffffff }, //0100 - { 0x7fffffff, 0x7f7fffff, 0x7fffffff, 0x7f7fffff }, //0101 - { 0x7fffffff, 0x7f7fffff, 0x7f7fffff, 0x7fffffff }, //0110 - { 0x7fffffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff }, //0111 - { 0x7f7fffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }, //1000 - { 0x7f7fffff, 0x7fffffff, 0x7fffffff, 0x7f7fffff }, //1001 - { 0x7f7fffff, 0x7fffffff, 0x7f7fffff, 0x7fffffff }, //1010 - { 0x7f7fffff, 0x7fffffff, 0x7f7fffff, 0x7f7fffff }, //1011 - { 0x7f7fffff, 0x7f7fffff, 0x7fffffff, 0x7fffffff }, //1100 - { 0x7f7fffff, 0x7f7fffff, 0x7fffffff, 0x7f7fffff }, //1101 - { 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7fffffff }, //1110 - { 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff }, //1111 -}; -//------------------------------------------------------------------ - -//------------------------------------------------------------------ -// VU Pipeline/Test Stalls/Analyzing Functions -//------------------------------------------------------------------ -void _recvuFMACflush(VURegs * VU, bool intermediate) { - int i; - - for (i=0; i<8; i++) { - if (VU->fmac[i].enable == 0) continue; - - if( intermediate ) { - if ((vucycle - VU->fmac[i].sCycle) > VU->fmac[i].Cycle) { -// VUM_LOG("flushing FMAC pipe[%d]", i); - VU->fmac[i].enable = 0; - } - } - else { - if ((vucycle - VU->fmac[i].sCycle) >= VU->fmac[i].Cycle) { -// VUM_LOG("flushing FMAC pipe[%d]", i); - VU->fmac[i].enable = 0; - } - } - } -} - -void _recvuFDIVflush(VURegs * VU, bool intermediate) { - if (VU->fdiv.enable == 0) return; - - if( intermediate ) { - if ((vucycle - VU->fdiv.sCycle) > VU->fdiv.Cycle) { -// Console.WriteLn("flushing FDIV pipe"); - VU->fdiv.enable = 0; - } - } - else { - if ((vucycle - VU->fdiv.sCycle) >= VU->fdiv.Cycle) { -// Console.WriteLn("flushing FDIV pipe"); - VU->fdiv.enable = 0; - } - } -} - -void _recvuEFUflush(VURegs * VU, bool intermediate) { - if (VU->efu.enable == 0) return; - - if( intermediate ) { - if ((vucycle - VU->efu.sCycle) > VU->efu.Cycle) { -// Console.WriteLn("flushing FDIV pipe"); - VU->efu.enable = 0; - } - } - else { - if ((vucycle - VU->efu.sCycle) >= VU->efu.Cycle) { -// Console.WriteLn("flushing FDIV pipe"); - VU->efu.enable = 0; - } - } -} - -void _recvuIALUflush(VURegs * VU, bool intermediate) { - int i; - - for (i=0; i<8; i++) { - if (VU->ialu[i].enable == 0) continue; - - if( intermediate ) { - if ((vucycle - VU->ialu[i].sCycle) > VU->ialu[i].Cycle) { -// VUM_LOG("flushing IALU pipe[%d]", i); - VU->ialu[i].enable = 0; - } - } - else { - if ((vucycle - VU->ialu[i].sCycle) >= VU->ialu[i].Cycle) { -// VUM_LOG("flushing IALU pipe[%d]", i); - VU->ialu[i].enable = 0; - } - } - } -} - -void _recvuTestPipes(VURegs * VU, bool intermediate) { // intermediate = true if called by upper FMAC stall detection - _recvuFMACflush(VU, intermediate); - _recvuFDIVflush(VU, intermediate); - _recvuEFUflush(VU, intermediate); - _recvuIALUflush(VU, intermediate); -} - -void _recvuFMACTestStall(VURegs * VU, int reg, int xyzw) { - int cycle; - int i; - u32 mask = 0; - - for (i=0; i<8; i++) { - if (VU->fmac[i].enable == 0) continue; - if (VU->fmac[i].reg == reg && (VU->fmac[i].xyzw & xyzw)) break; - } - - if (i == 8) return; - - // do a perchannel delay - // old code -// cycle = VU->fmac[i].Cycle - (vucycle - VU->fmac[i].sCycle); - - // new code - mask = 4; // w -// if( VU->fmac[i].xyzw & 1 ) mask = 4; // w -// else if( VU->fmac[i].xyzw & 2 ) mask = 3; // z -// else if( VU->fmac[i].xyzw & 4 ) mask = 2; // y -// else { -// assert(VU->fmac[i].xyzw & 8 ); -// mask = 1; // x -// } - -// mask = 0; -// if( VU->fmac[i].xyzw & 1 ) mask++; // w -// else if( VU->fmac[i].xyzw & 2 ) mask++; // z -// else if( VU->fmac[i].xyzw & 4 ) mask++; // y -// else if( VU->fmac[i].xyzw & 8 ) mask++; // x - - assert( (int)VU->fmac[i].sCycle < (int)vucycle ); - cycle = 0; - if( vucycle - VU->fmac[i].sCycle < mask ) - cycle = mask - (vucycle - VU->fmac[i].sCycle); - - VU->fmac[i].enable = 0; - vucycle+= cycle; - _recvuTestPipes(VU, true); // for lower instructions -} - -void _recvuIALUTestStall(VURegs * VU, int reg) { - int cycle; - int i; - u32 latency; - - for (i=0; i<8; i++) { - if (VU->ialu[i].enable == 0) continue; - if (VU->ialu[i].reg == reg) break; - } - - if (i == 8) return; - - latency = VU->ialu[i].Cycle + 1; - cycle = 0; - if( vucycle - VU->ialu[i].sCycle < latency ) - cycle = latency - (vucycle - VU->ialu[i].sCycle); - - VU->ialu[i].enable = 0; - vucycle+= cycle; - _recvuTestPipes(VU, true); -} - -void _recvuFMACAdd(VURegs * VU, int reg, int xyzw) { - int i; - - /* find a free fmac pipe */ - for (i=0; i<8; i++) { - if (VU->fmac[i].enable == 1) continue; - break; - } - - if (i==8) { - Console.Error("*PCSX2*: error , out of fmacs"); -// VUM_LOG("adding FMAC pipe[%d]; reg %d", i, reg); - } else { - VU->fmac[i].enable = 1; - VU->fmac[i].sCycle = vucycle; - VU->fmac[i].Cycle = 3; - VU->fmac[i].xyzw = xyzw; - VU->fmac[i].reg = reg; - } -} - -void _recvuFDIVAdd(VURegs * VU, int cycles) { -// Console.WriteLn("adding FDIV pipe"); - VU->fdiv.enable = 1; - VU->fdiv.sCycle = vucycle; - VU->fdiv.Cycle = cycles; -} - -void _recvuEFUAdd(VURegs * VU, int cycles) { -// Console.WriteLn("adding EFU pipe"); - VU->efu.enable = 1; - VU->efu.sCycle = vucycle; - VU->efu.Cycle = cycles; -} - -void _recvuIALUAdd(VURegs * VU, int reg, int cycles) { - int i; - - /* find a free ialu pipe */ - for (i=0; i<8; i++) { - if (VU->ialu[i].enable == 1) continue; - break; - } - - if (i==8) { - Console.Error("*PCSX2*: error , out of ialus"); - } else { - VU->ialu[i].enable = 1; - VU->ialu[i].sCycle = vucycle; - VU->ialu[i].Cycle = cycles; - VU->ialu[i].reg = reg; - } -} - -void _recvuTestIALUStalls(VURegs * VU, _VURegsNum *VUregsn) { - - int VIread0 = 0, VIread1 = 0; // max 2 integer registers are read simulataneously - int i; - - for(i=0;i<16;i++) { // find used integer(vi00-vi15) registers - if( (VUregsn->VIread >> i) & 1 ) { - if( VIread0 ) VIread1 = i; - else VIread0 = i; - } - } - - if( VIread0 ) _recvuIALUTestStall(VU, VIread0); - if( VIread1 ) _recvuIALUTestStall(VU, VIread1); -} - -void _recvuAddIALUStalls(VURegs * VU, _VURegsNum *VUregsn) { - if (VUregsn->VIwrite && VUregsn->cycles) { - int VIWrite0 = 0; - int i; - - for(i=0;i<16;i++) { // find used(vi00-vi15) registers - if( (VUregsn->VIwrite >> i) & 1 ) { - VIWrite0 = i; - } - } - if( VIWrite0 ) _recvuIALUAdd(VU, VIWrite0, VUregsn->cycles); - } -} - -void _recvuTestFMACStalls(VURegs * VU, _VURegsNum *VUregsn, bool upper) { - - if( VUregsn->VFread0 && (VUregsn->VFread0 == VUregsn->VFread1) ) { - _recvuFMACTestStall(VU, VUregsn->VFread0, VUregsn->VFr0xyzw|VUregsn->VFr1xyzw); - } - else { - if (VUregsn->VFread0) _recvuFMACTestStall(VU, VUregsn->VFread0, VUregsn->VFr0xyzw); - if (VUregsn->VFread1) _recvuFMACTestStall(VU, VUregsn->VFread1, VUregsn->VFr1xyzw); - } - - if( !upper && VUregsn->VIread ) _recvuTestIALUStalls(VU, VUregsn); // for lower instructions which read integer reg -} - -void _recvuAddFMACStalls(VURegs * VU, _VURegsNum *VUregsn) { - - if (VUregsn->VFwrite) _recvuFMACAdd(VU, VUregsn->VFwrite, VUregsn->VFwxyzw); - else if (VUregsn->VIwrite & (1 << REG_CLIP_FLAG)) _recvuFMACAdd(VU, -REG_CLIP_FLAG, 0); // REG_CLIP_FLAG pipe - else _recvuFMACAdd(VU, 0, 0); // cause no data dependency with fp registers -} - -void _recvuFlushFDIV(VURegs * VU) { - int cycle; - - if (VU->fdiv.enable == 0) return; - - cycle = VU->fdiv.Cycle + 1 - (vucycle - VU->fdiv.sCycle); //VU->fdiv.Cycle contains the latency minus 1 (6 or 12) -// Console.WriteLn("waiting FDIV pipe %d", cycle); - VU->fdiv.enable = 0; - vucycle+= cycle; -} - -void _recvuFlushEFU(VURegs * VU) { - int cycle; - - if (VU->efu.enable == 0) return; - - cycle = VU->efu.Cycle - (vucycle - VU->efu.sCycle); -// Console.WriteLn("waiting FDIV pipe %d", cycle); - VU->efu.enable = 0; - vucycle+= cycle; -} - -void _recvuTestFDIVStalls(VURegs * VU, _VURegsNum *VUregsn) { - _recvuTestFMACStalls(VU,VUregsn, false); - _recvuFlushFDIV(VU); -} - -void _recvuTestEFUStalls(VURegs * VU, _VURegsNum *VUregsn) { - _recvuTestFMACStalls(VU,VUregsn, false); - _recvuFlushEFU(VU); -} - -void _recvuAddFDIVStalls(VURegs * VU, _VURegsNum *VUregsn) { -// _vuTestFMACStalls(VURegs * VU, _VURegsNum *VUregsn); - if (VUregsn->VIwrite & (1 << REG_Q)) { - _recvuFDIVAdd(VU, VUregsn->cycles); - } -} - -void _recvuAddEFUStalls(VURegs * VU, _VURegsNum *VUregsn) { -// _vuTestFMACStalls(VURegs * VU, _VURegsNum *VUregsn); - if (VUregsn->VIwrite & (1 << REG_P)) { - _recvuEFUAdd(VU, VUregsn->cycles); - } -} - -void _recvuTestUpperStalls(VURegs * VU, _VURegsNum *VUregsn) { - switch (VUregsn->pipe) { - case VUPIPE_FMAC: _recvuTestFMACStalls(VU, VUregsn, true); break; - } -} - -void _recvuTestLowerStalls(VURegs * VU, _VURegsNum *VUregsn) { - switch (VUregsn->pipe) { - case VUPIPE_FMAC: _recvuTestFMACStalls(VU, VUregsn, false); break; - case VUPIPE_FDIV: _recvuTestFDIVStalls(VU, VUregsn); break; - case VUPIPE_EFU: _recvuTestEFUStalls(VU, VUregsn); break; - case VUPIPE_IALU: _recvuTestIALUStalls(VU, VUregsn); break; - case VUPIPE_BRANCH: _recvuTestIALUStalls(VU, VUregsn); break; - } -} - -void _recvuAddUpperStalls(VURegs * VU, _VURegsNum *VUregsn) { - switch (VUregsn->pipe) { - case VUPIPE_FMAC: _recvuAddFMACStalls(VU, VUregsn); break; - } -} - -void _recvuAddLowerStalls(VURegs * VU, _VURegsNum *VUregsn) { - switch (VUregsn->pipe) { - case VUPIPE_FMAC: _recvuAddFMACStalls(VU, VUregsn); break; - case VUPIPE_FDIV: _recvuAddFDIVStalls(VU, VUregsn); break; - case VUPIPE_EFU: _recvuAddEFUStalls(VU, VUregsn); break; - case VUPIPE_IALU: _recvuAddIALUStalls(VU, VUregsn); break; // note: only ILW and ILWR cause stall in IALU pipe - } -} - -void SuperVUAnalyzeOp(VURegs *VU, _vuopinfo *info, _VURegsNum* pCodeRegs) -{ - _VURegsNum* lregs; - _VURegsNum* uregs; - int *code_ptr; - - lregs = pCodeRegs; - uregs = pCodeRegs+1; - - code_ptr = (int*)&VU->Micro[pc]; - pc += 8; - - if (code_ptr[1] & 0x40000000) { // EOP - g_branch |= 8; - } - - VU->code = code_ptr[1]; - if (VU == &VU1) VU1regs_UPPER_OPCODE[VU->code & 0x3f](uregs); - else VU0regs_UPPER_OPCODE[VU->code & 0x3f](uregs); - - _recvuTestUpperStalls(VU, uregs); - switch(VU->code & 0x3f) { - case 0x10: case 0x11: case 0x12: case 0x13: - case 0x14: case 0x15: case 0x16: case 0x17: - case 0x1d: case 0x1f: - case 0x2b: case 0x2f: - break; - - case 0x3c: - switch ((VU->code >> 6) & 0x1f) { - case 0x4: case 0x5: - break; - default: - info->statusflag = 4; - info->macflag = 4; - break; - } - break; - case 0x3d: - switch ((VU->code >> 6) & 0x1f) { - case 0x4: case 0x5: case 0x7: - break; - default: - info->statusflag = 4; - info->macflag = 4; - break; - } - break; - case 0x3e: - switch ((VU->code >> 6) & 0x1f) { - case 0x4: case 0x5: - break; - default: - info->statusflag = 4; - info->macflag = 4; - break; - } - break; - case 0x3f: - switch ((VU->code >> 6) & 0x1f) { - case 0x4: case 0x5: case 0x7: case 0xb: - break; - default: - info->statusflag = 4; - info->macflag = 4; - break; - } - break; - - default: - info->statusflag = 4; - info->macflag = 4; - break; - } - - if (uregs->VIread & (1 << REG_Q)) { info->q |= 2; } - if (uregs->VIread & (1 << REG_P)) { info->p |= 2; assert( VU == &VU1 ); } - - // check upper flags - if (code_ptr[1] & 0x80000000) { // I flag - info->cycle = vucycle; - memzero(*lregs); - } - else { - - VU->code = code_ptr[0]; - if (VU == &VU1) VU1regs_LOWER_OPCODE[VU->code >> 25](lregs); - else VU0regs_LOWER_OPCODE[VU->code >> 25](lregs); - - _recvuTestLowerStalls(VU, lregs); - info->cycle = vucycle; - - if (lregs->pipe == VUPIPE_BRANCH) { - g_branch |= 1; - } - - if (lregs->VIwrite & (1 << REG_Q)) { - info->q |= 4; - info->cycles = lregs->cycles; - info->pqinst = (VU->code&2)>>1; // rsqrt is 2 - } - else if (lregs->pipe == VUPIPE_FDIV) { - info->q |= 8|1; - info->pqinst = 0; - } - - if (lregs->VIwrite & (1 << REG_P)) { - assert( VU == &VU1 ); - info->p |= 4; - info->cycles = lregs->cycles; - - switch( VU->code & 0xff ) { - case 0xfd: info->pqinst = 0; break; //eatan - case 0x7c: info->pqinst = 0; break; //eatanxy - case 0x7d: info->pqinst = 0; break; //eatanzy - case 0xfe: info->pqinst = 1; break; //eexp - case 0xfc: info->pqinst = 2; break; //esin - case 0x3f: info->pqinst = 3; break; //erleng - case 0x3e: info->pqinst = 4; break; //eleng - case 0x3d: info->pqinst = 4; break; //ersadd - case 0xbd: info->pqinst = 4; break; //ersqrt - case 0xbe: info->pqinst = 5; break; //ercpr - case 0xbc: info->pqinst = 5; break; //esqrt - case 0x7e: info->pqinst = 5; break; //esum - case 0x3c: info->pqinst = 6; break; //esadd - default: assert(0); - } - } - else if (lregs->pipe == VUPIPE_EFU) { - info->p |= 8|1; - } - - if (lregs->VIread & (1 << REG_STATUS_FLAG)) info->statusflag|= VUOP_READ; - if (lregs->VIread & (1 << REG_MAC_FLAG)) info->macflag|= VUOP_READ; - - if (lregs->VIwrite & (1 << REG_STATUS_FLAG)) info->statusflag|= VUOP_WRITE; - if (lregs->VIwrite & (1 << REG_MAC_FLAG)) info->macflag|= VUOP_WRITE; - - if (lregs->VIread & (1 << REG_Q)) { info->q |= 2; } - if (lregs->VIread & (1 << REG_P)) { info->p |= 2; assert( VU == &VU1 ); } - - _recvuAddLowerStalls(VU, lregs); - } - - _recvuAddUpperStalls(VU, uregs); - _recvuTestPipes(VU, false); - - vucycle++; -} - -int eeVURecompileCode(VURegs *VU, _VURegsNum* regs) -{ - int info = 0; - int vfread0=-1, vfread1 = -1, vfwrite = -1, vfacc = -1, vftemp=-1; - - assert( regs != NULL ); - - if( regs->VFread0 ) _addNeededVFtoXMMreg(regs->VFread0); - if( regs->VFread1 ) _addNeededVFtoXMMreg(regs->VFread1); - if( regs->VFwrite ) _addNeededVFtoXMMreg(regs->VFwrite); - if( regs->VIread & (1<VIread & (1<VFread0 ) vfread0 = _allocVFtoXMMreg(VU, -1, regs->VFread0, MODE_READ); - else if( regs->VIread & (1<VFread1 ) vfread1 = _allocVFtoXMMreg(VU, -1, regs->VFread1, MODE_READ); - else if( (regs->VIread & (1<VFr1xyzw != 0xff) vfread1 = _allocVFtoXMMreg(VU, -1, 0, MODE_READ); - - if( regs->VIread & (1<VIwrite&(1<VIwrite & (1<VFwxyzw != 0xf?MODE_READ:0)); - } - - if( regs->VFwrite ) { - assert( !(regs->VIwrite&(1<VFwrite, MODE_WRITE|(regs->VFwxyzw != 0xf?MODE_READ:0)); - } - - if( vfacc>= 0 ) info |= PROCESS_EE_SET_ACC(vfacc); - if( vfwrite >= 0 ) { - if( regs->VFwrite == _Ft_ && vfread1 < 0 ) { - info |= PROCESS_EE_SET_T(vfwrite); - } - else { - assert( regs->VFwrite == _Fd_ ); - info |= PROCESS_EE_SET_D(vfwrite); - } - } - - if( vfread0 >= 0 ) info |= PROCESS_EE_SET_S(vfread0); - if( vfread1 >= 0 ) info |= PROCESS_EE_SET_T(vfread1); - - vftemp = _allocTempXMMreg(XMMT_FPS, -1); - info |= PROCESS_VU_SET_TEMP(vftemp); - - if( regs->VIwrite & (1 << REG_CLIP_FLAG) ) { - // CLIP inst, need two extra temp registers, put it EEREC_D and EEREC_ACC - int t1reg = _allocTempXMMreg(XMMT_FPS, -1); - int t2reg = _allocTempXMMreg(XMMT_FPS, -1); - - info |= PROCESS_EE_SET_D(t1reg); - info |= PROCESS_EE_SET_ACC(t2reg); - - _freeXMMreg(t1reg); // don't need - _freeXMMreg(t2reg); // don't need - } - else if( regs->VIwrite & (1<VI[reg].UL; - - if( read != 1 ) { - if( reg == REG_MAC_FLAG ) return (uptr)&VU->macflag; - if( reg == REG_CLIP_FLAG ) return (uptr)&VU->clipflag; - if( reg == REG_STATUS_FLAG ) return (uptr)&VU->statusflag; - if( reg == REG_Q ) return (uptr)&VU->q; - if( reg == REG_P ) return (uptr)&VU->p; - } - - return (uptr)&VU->VI[reg].UL; -} - -// gets a temp reg that is not EEREC_TEMP -int _vuGetTempXMMreg(int info) -{ - int t1reg = -1; - - if( _hasFreeXMMreg() ) { - t1reg = _allocTempXMMreg(XMMT_FPS, -1); - - if( t1reg == EEREC_TEMP ) { - if( _hasFreeXMMreg() ) { - int t = _allocTempXMMreg(XMMT_FPS, -1); - _freeXMMreg(t1reg); - t1reg = t; - } - else { - _freeXMMreg(t1reg); - t1reg = -1; - } - } - } - - return t1reg; -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// Misc VU Reg Flipping/Merging Functions -//------------------------------------------------------------------ -void _unpackVF_xyzw(int dstreg, int srcreg, int xyzw) -{ - switch (xyzw) { - case 0: xPSHUF.D(xRegisterSSE(dstreg), xRegisterSSE(srcreg), 0x00); break; - case 1: xPSHUF.D(xRegisterSSE(dstreg), xRegisterSSE(srcreg), 0x55); break; - case 2: xPSHUF.D(xRegisterSSE(dstreg), xRegisterSSE(srcreg), 0xaa); break; - case 3: xPSHUF.D(xRegisterSSE(dstreg), xRegisterSSE(srcreg), 0xff); break; - } -} - -void _unpackVFSS_xyzw(int dstreg, int srcreg, int xyzw) -{ - switch (xyzw) { - case 0: xMOVSS(xRegisterSSE(dstreg), xRegisterSSE(srcreg)); break; - case 1: if ( x86caps.hasStreamingSIMD4Extensions ) xINSERTPS(xRegisterSSE(dstreg), xRegisterSSE(srcreg), _MM_MK_INSERTPS_NDX(1, 0, 0)); - else xPSHUF.LW(xRegisterSSE(dstreg), xRegisterSSE(srcreg), 0xee); - break; - case 2: xMOVHL.PS(xRegisterSSE(dstreg), xRegisterSSE(srcreg)); break; - case 3: if ( x86caps.hasStreamingSIMD4Extensions ) xINSERTPS(xRegisterSSE(dstreg), xRegisterSSE(srcreg), _MM_MK_INSERTPS_NDX(3, 0, 0)); - else { xMOVHL.PS(xRegisterSSE(dstreg), xRegisterSSE(srcreg)); xPSHUF.LW(xRegisterSSE(dstreg), xRegisterSSE(dstreg), 0xee); } - break; - } -} - -void _vuFlipRegSS(VURegs * VU, int reg) -{ - assert( _XYZW_SS ); - if( _Y ) xPSHUF.LW(xRegisterSSE(reg), xRegisterSSE(reg), 0x4e); - else if( _Z ) xSHUF.PS(xRegisterSSE(reg), xRegisterSSE(reg), 0xc6); - else if( _W ) xSHUF.PS(xRegisterSSE(reg), xRegisterSSE(reg), 0x27); -} - -void _vuFlipRegSS_xyzw(int reg, int xyzw) -{ - switch ( xyzw ) { - case 1: xPSHUF.LW(xRegisterSSE(reg), xRegisterSSE(reg), 0x4e); break; - case 2: xSHUF.PS(xRegisterSSE(reg), xRegisterSSE(reg), 0xc6); break; - case 3: xSHUF.PS(xRegisterSSE(reg), xRegisterSSE(reg), 0x27); break; - } -} - -void _vuMoveSS(VURegs * VU, int dstreg, int srcreg) -{ - assert( _XYZW_SS ); - if( _Y ) _unpackVFSS_xyzw(dstreg, srcreg, 1); - else if( _Z ) _unpackVFSS_xyzw(dstreg, srcreg, 2); - else if( _W ) _unpackVFSS_xyzw(dstreg, srcreg, 3); - else _unpackVFSS_xyzw(dstreg, srcreg, 0); -} - -// 1 - src, 0 - dest wzyx -void VU_MERGE0(int dest, int src) { // 0000s -} -void VU_MERGE1(int dest, int src) { // 1000 - xMOVHL.PS(xRegisterSSE(src), xRegisterSSE(dest)); - xSHUF.PS(xRegisterSSE(dest), xRegisterSSE(src), 0xc4); -} -void VU_MERGE1b(int dest, int src) { // 1000s - xSHUF.PS(xRegisterSSE(src), xRegisterSSE(src), 0x27); - xSHUF.PS(xRegisterSSE(dest), xRegisterSSE(dest), 0x27); - xMOVSS(xRegisterSSE(dest), xRegisterSSE(src)); - xSHUF.PS(xRegisterSSE(src), xRegisterSSE(src), 0x27); - xSHUF.PS(xRegisterSSE(dest), xRegisterSSE(dest), 0x27); -} -void VU_MERGE2(int dest, int src) { // 0100 - xMOVHL.PS(xRegisterSSE(src), xRegisterSSE(dest)); - xSHUF.PS(xRegisterSSE(dest), xRegisterSSE(src), 0x64); -} -void VU_MERGE2b(int dest, int src) { // 0100s - xSHUF.PS(xRegisterSSE(src), xRegisterSSE(src), 0xC6); - xSHUF.PS(xRegisterSSE(dest), xRegisterSSE(dest), 0xC6); - xMOVSS(xRegisterSSE(dest), xRegisterSSE(src)); - xSHUF.PS(xRegisterSSE(src), xRegisterSSE(src), 0xC6); - xSHUF.PS(xRegisterSSE(dest), xRegisterSSE(dest), 0xC6); -} -void VU_MERGE3(int dest, int src) { // 1100s - xSHUF.PS(xRegisterSSE(dest), xRegisterSSE(src), 0xe4); -} -void VU_MERGE4(int dest, int src) { // 0010 - xMOVSS(xRegisterSSE(src), xRegisterSSE(dest)); - xMOVSD(xRegisterSSE(dest), xRegisterSSE(src)); -} -void VU_MERGE4b(int dest, int src) { // 0010s - xSHUF.PS(xRegisterSSE(src), xRegisterSSE(src), 0xE1); - xSHUF.PS(xRegisterSSE(dest), xRegisterSSE(dest), 0xE1); - xMOVSS(xRegisterSSE(dest), xRegisterSSE(src)); - xSHUF.PS(xRegisterSSE(src), xRegisterSSE(src), 0xE1); - xSHUF.PS(xRegisterSSE(dest), xRegisterSSE(dest), 0xE1); -} -void VU_MERGE5(int dest, int src) { // 1010 - xSHUF.PS(xRegisterSSE(dest), xRegisterSSE(src), 0xd8); - xSHUF.PS(xRegisterSSE(dest), xRegisterSSE(dest), 0xd8); -} -void VU_MERGE5b(int dest, int src) { // 1010s - xSHUF.PS(xRegisterSSE(src), xRegisterSSE(src), 0x27); - xSHUF.PS(xRegisterSSE(dest), xRegisterSSE(dest), 0x27); - xMOVSS(xRegisterSSE(dest), xRegisterSSE(src)); - xSHUF.PS(xRegisterSSE(src), xRegisterSSE(src), 0x27); - xSHUF.PS(xRegisterSSE(dest), xRegisterSSE(dest), 0x27); - xSHUF.PS(xRegisterSSE(src), xRegisterSSE(src), 0xE1); - xSHUF.PS(xRegisterSSE(dest), xRegisterSSE(dest), 0xE1); - xMOVSS(xRegisterSSE(dest), xRegisterSSE(src)); - xSHUF.PS(xRegisterSSE(src), xRegisterSSE(src), 0xE1); - xSHUF.PS(xRegisterSSE(dest), xRegisterSSE(dest), 0xE1); -} -void VU_MERGE6(int dest, int src) { // 0110 - xSHUF.PS(xRegisterSSE(dest), xRegisterSSE(src), 0x9c); - xSHUF.PS(xRegisterSSE(dest), xRegisterSSE(dest), 0x78); -} -void VU_MERGE6b(int dest, int src) { // 0110s - xSHUF.PS(xRegisterSSE(src), xRegisterSSE(src), 0xC6); - xSHUF.PS(xRegisterSSE(dest), xRegisterSSE(dest), 0xC6); - xMOVSS(xRegisterSSE(dest), xRegisterSSE(src)); - xSHUF.PS(xRegisterSSE(src), xRegisterSSE(src), 0xC6); - xSHUF.PS(xRegisterSSE(dest), xRegisterSSE(dest), 0xC6); - xSHUF.PS(xRegisterSSE(src), xRegisterSSE(src), 0xE1); - xSHUF.PS(xRegisterSSE(dest), xRegisterSSE(dest), 0xE1); - xMOVSS(xRegisterSSE(dest), xRegisterSSE(src)); - xSHUF.PS(xRegisterSSE(src), xRegisterSSE(src), 0xE1); - xSHUF.PS(xRegisterSSE(dest), xRegisterSSE(dest), 0xE1); -} -void VU_MERGE7(int dest, int src) { // 1110 - xMOVSS(xRegisterSSE(src), xRegisterSSE(dest)); - xMOVAPS(xRegisterSSE(dest), xRegisterSSE(src)); -} -void VU_MERGE7b(int dest, int src) { // 1110s - xSHUF.PS(xRegisterSSE(dest), xRegisterSSE(src), 0xe4); - xSHUF.PS(xRegisterSSE(src), xRegisterSSE(src), 0xE1); - xSHUF.PS(xRegisterSSE(dest), xRegisterSSE(dest), 0xE1); - xMOVSS(xRegisterSSE(dest), xRegisterSSE(src)); - xSHUF.PS(xRegisterSSE(src), xRegisterSSE(src), 0xE1); - xSHUF.PS(xRegisterSSE(dest), xRegisterSSE(dest), 0xE1); -} -void VU_MERGE8(int dest, int src) { // 0001s - xMOVSS(xRegisterSSE(dest), xRegisterSSE(src)); -} -void VU_MERGE9(int dest, int src) { // 1001 - xSHUF.PS(xRegisterSSE(dest), xRegisterSSE(src), 0xc9); - xSHUF.PS(xRegisterSSE(dest), xRegisterSSE(dest), 0xd2); -} -void VU_MERGE9b(int dest, int src) { // 1001s - xMOVSS(xRegisterSSE(dest), xRegisterSSE(src)); - xSHUF.PS(xRegisterSSE(src), xRegisterSSE(src), 0x27); - xSHUF.PS(xRegisterSSE(dest), xRegisterSSE(dest), 0x27); - xMOVSS(xRegisterSSE(dest), xRegisterSSE(src)); - xSHUF.PS(xRegisterSSE(src), xRegisterSSE(src), 0x27); - xSHUF.PS(xRegisterSSE(dest), xRegisterSSE(dest), 0x27); -} -void VU_MERGE10(int dest, int src) { // 0101 - xSHUF.PS(xRegisterSSE(dest), xRegisterSSE(src), 0x8d); - xSHUF.PS(xRegisterSSE(dest), xRegisterSSE(dest), 0x72); -} -void VU_MERGE10b(int dest, int src) { // 0101s - xMOVSS(xRegisterSSE(dest), xRegisterSSE(src)); - xSHUF.PS(xRegisterSSE(src), xRegisterSSE(src), 0xC6); - xSHUF.PS(xRegisterSSE(dest), xRegisterSSE(dest), 0xC6); - xMOVSS(xRegisterSSE(dest), xRegisterSSE(src)); - xSHUF.PS(xRegisterSSE(src), xRegisterSSE(src), 0xC6); - xSHUF.PS(xRegisterSSE(dest), xRegisterSSE(dest), 0xC6); -} -void VU_MERGE11(int dest, int src) { // 1101s - xMOVSS(xRegisterSSE(dest), xRegisterSSE(src)); - xSHUF.PS(xRegisterSSE(dest), xRegisterSSE(src), 0xe4); -} -void VU_MERGE12(int dest, int src) { // 0011 - xMOVSD(xRegisterSSE(dest), xRegisterSSE(src)); -} -void VU_MERGE13(int dest, int src) { // 1011 - xMOVHL.PS(xRegisterSSE(dest), xRegisterSSE(src)); - xSHUF.PS(xRegisterSSE(src), xRegisterSSE(dest), 0x64); - xMOVAPS(xRegisterSSE(dest), xRegisterSSE(src)); -} -void VU_MERGE13b(int dest, int src) { // 1011s - xMOVSS(xRegisterSSE(dest), xRegisterSSE(src)); - xSHUF.PS(xRegisterSSE(src), xRegisterSSE(src), 0x27); - xSHUF.PS(xRegisterSSE(dest), xRegisterSSE(dest), 0x27); - xMOVSS(xRegisterSSE(dest), xRegisterSSE(src)); - xSHUF.PS(xRegisterSSE(src), xRegisterSSE(src), 0x27); - xSHUF.PS(xRegisterSSE(dest), xRegisterSSE(dest), 0x27); - xSHUF.PS(xRegisterSSE(src), xRegisterSSE(src), 0xE1); - xSHUF.PS(xRegisterSSE(dest), xRegisterSSE(dest), 0xE1); - xMOVSS(xRegisterSSE(dest), xRegisterSSE(src)); - xSHUF.PS(xRegisterSSE(src), xRegisterSSE(src), 0xE1); - xSHUF.PS(xRegisterSSE(dest), xRegisterSSE(dest), 0xE1); -} -void VU_MERGE14(int dest, int src) { // 0111 - xMOVHL.PS(xRegisterSSE(dest), xRegisterSSE(src)); - xSHUF.PS(xRegisterSSE(src), xRegisterSSE(dest), 0xc4); - xMOVAPS(xRegisterSSE(dest), xRegisterSSE(src)); -} -void VU_MERGE14b(int dest, int src) { // 0111s - xMOVSS(xRegisterSSE(dest), xRegisterSSE(src)); - xSHUF.PS(xRegisterSSE(src), xRegisterSSE(src), 0xE1); - xSHUF.PS(xRegisterSSE(dest), xRegisterSSE(dest), 0xE1); - xMOVSS(xRegisterSSE(dest), xRegisterSSE(src)); - xSHUF.PS(xRegisterSSE(src), xRegisterSSE(src), 0xE1); - xSHUF.PS(xRegisterSSE(dest), xRegisterSSE(dest), 0xE1); - xSHUF.PS(xRegisterSSE(src), xRegisterSSE(src), 0xC6); - xSHUF.PS(xRegisterSSE(dest), xRegisterSSE(dest), 0xC6); - xMOVSS(xRegisterSSE(dest), xRegisterSSE(src)); - xSHUF.PS(xRegisterSSE(src), xRegisterSSE(src), 0xC6); - xSHUF.PS(xRegisterSSE(dest), xRegisterSSE(dest), 0xC6); -} -void VU_MERGE15(int dest, int src) { // 1111s - xMOVAPS(xRegisterSSE(dest), xRegisterSSE(src)); -} - -typedef void (*VUMERGEFN)(int dest, int src); - -static VUMERGEFN s_VuMerge[16] = { - VU_MERGE0, VU_MERGE1, VU_MERGE2, VU_MERGE3, - VU_MERGE4, VU_MERGE5, VU_MERGE6, VU_MERGE7, - VU_MERGE8, VU_MERGE9, VU_MERGE10, VU_MERGE11, - VU_MERGE12, VU_MERGE13, VU_MERGE14, VU_MERGE15 }; - -static VUMERGEFN s_VuMerge2[16] = { - VU_MERGE0, VU_MERGE1b, VU_MERGE2b, VU_MERGE3, - VU_MERGE4b, VU_MERGE5b, VU_MERGE6b, VU_MERGE7b, - VU_MERGE8, VU_MERGE9b, VU_MERGE10b, VU_MERGE11, - VU_MERGE12, VU_MERGE13b, VU_MERGE14b, VU_MERGE15 }; - -// Modifies the Source Reg! -void VU_MERGE_REGS_CUSTOM(int dest, int src, int xyzw) { - xyzw &= 0xf; - if ( (dest != src) && (xyzw != 0) ) { - if ( x86caps.hasStreamingSIMD4Extensions && (xyzw != 0x8) && (xyzw != 0xf) ) { - xyzw = ((xyzw & 1) << 3) | ((xyzw & 2) << 1) | ((xyzw & 4) >> 1) | ((xyzw & 8) >> 3); - xBLEND.PS(xRegisterSSE(dest), xRegisterSSE(src), xyzw); - } - else s_VuMerge[xyzw](dest, src); - } -} -// Doesn't Modify the Source Reg! (ToDo: s_VuMerge2() has room for optimization) -void VU_MERGE_REGS_SAFE(int dest, int src, int xyzw) { - xyzw &= 0xf; - if ( (dest != src) && (xyzw != 0) ) { - if ( x86caps.hasStreamingSIMD4Extensions && (xyzw != 0x8) && (xyzw != 0xf) ) { - xyzw = ((xyzw & 1) << 3) | ((xyzw & 2) << 1) | ((xyzw & 4) >> 1) | ((xyzw & 8) >> 3); - xBLEND.PS(xRegisterSSE(dest), xRegisterSSE(src), xyzw); - } - else s_VuMerge2[xyzw](dest, src); - } -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// Misc VU Reg Clamping/Overflow Functions -//------------------------------------------------------------------ -#define CLAMP_NORMAL_SSE4(n) \ - xMOVAPS(xRegisterSSE(regTemp), xRegisterSSE(regd));\ - xPMIN.UD(xRegisterSSE(regd), ptr[&g_minvals_XYZW[n][0]]);\ - xPSUB.D(xRegisterSSE(regTemp), xRegisterSSE(regd));\ - xPCMP.GTD(xRegisterSSE(regTemp), ptr[&g_ones[0]]);\ - xPMIN.SD(xRegisterSSE(regd), ptr[&g_maxvals_XYZW[n][0]]);\ - xPSLL.D(xRegisterSSE(regTemp), 31);\ - xXOR.PS(xRegisterSSE(regd), xRegisterSSE(regTemp)); - -#define CLAMP_SIGN_SSE4(n) \ - xPMIN.SD(xRegisterSSE(regd), ptr[&g_maxvals_XYZW[n][0]]);\ - xPMIN.UD(xRegisterSSE(regd), ptr[&g_minvals_XYZW[n][0]]); - -void vFloat0(int regd, int regTemp) { } //0000 -void vFloat1(int regd, int regTemp) { //1000 - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0x27); - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0x27); -} -void vFloat1c(int regd, int regTemp) { //1000 - if ( x86caps.hasStreamingSIMD4Extensions ) { - CLAMP_SIGN_SSE4(1); - } - else { - xMOVAPS(xRegisterSSE(regTemp), xRegisterSSE(regd)); - xAND.PS(xRegisterSSE(regTemp), ptr[&const_clip[4]]); - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0x27); - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0x27); - xOR.PS(xRegisterSSE(regd), xRegisterSSE(regTemp)); - } -} -void vFloat2(int regd, int regTemp) { //0100 - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0xc6); - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0xc6); -} -void vFloat2c(int regd, int regTemp) { //0100 - if ( x86caps.hasStreamingSIMD4Extensions ) { - CLAMP_SIGN_SSE4(2); - } - else { - xMOVAPS(xRegisterSSE(regTemp), xRegisterSSE(regd)); - xAND.PS(xRegisterSSE(regTemp), ptr[&const_clip[4]]); - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0xc6); - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0xc6); - xOR.PS(xRegisterSSE(regd), xRegisterSSE(regTemp)); - } -} -void vFloat3(int regd, int regTemp) { //1100 - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0xc6); - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0x27); - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0x36); -} -void vFloat3b(int regd, int regTemp) { //1100 //regTemp is Modified - xMOVSD(xRegisterSSE(regTemp), xRegisterSSE(regd)); - xMIN.PS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.PS(xRegisterSSE(regd), ptr[g_minvals]); - xMOVSD(xRegisterSSE(regd), xRegisterSSE(regTemp)); -} -void vFloat3c(int regd, int regTemp) { //1100 - if ( x86caps.hasStreamingSIMD4Extensions ) { - CLAMP_SIGN_SSE4(3); - } - else { - xMOVAPS(xRegisterSSE(regTemp), xRegisterSSE(regd)); - xAND.PS(xRegisterSSE(regTemp), ptr[&const_clip[4]]); - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0xc6); - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0x27); - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0x36); - xOR.PS(xRegisterSSE(regd), xRegisterSSE(regTemp)); - } -} -void vFloat4(int regd, int regTemp) { //0010 - xPSHUF.LW(xRegisterSSE(regd), xRegisterSSE(regd), 0x4e); - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xPSHUF.LW(xRegisterSSE(regd), xRegisterSSE(regd), 0x4e); -} -void vFloat4c(int regd, int regTemp) { //0010 - if ( x86caps.hasStreamingSIMD4Extensions ) { - CLAMP_SIGN_SSE4(4); - } - else { - xMOVAPS(xRegisterSSE(regTemp), xRegisterSSE(regd)); - xAND.PS(xRegisterSSE(regTemp), ptr[&const_clip[4]]); - xPSHUF.LW(xRegisterSSE(regd), xRegisterSSE(regd), 0x4e); - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xPSHUF.LW(xRegisterSSE(regd), xRegisterSSE(regd), 0x4e); - xOR.PS(xRegisterSSE(regd), xRegisterSSE(regTemp)); - } -} -void vFloat5(int regd, int regTemp) { //1010 - xPSHUF.LW(xRegisterSSE(regd), xRegisterSSE(regd), 0x4e); - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0x27); - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0x2d); -} -void vFloat5b(int regd, int regTemp) { //1010 //regTemp is Modified - if ( x86caps.hasStreamingSIMD4Extensions ) { - CLAMP_NORMAL_SSE4(5); - } - else { - xPSHUF.LW(xRegisterSSE(regd), xRegisterSSE(regd), 0x4e); - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0x27); - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0x2d); - } -} -void vFloat5c(int regd, int regTemp) { //1010 - if ( x86caps.hasStreamingSIMD4Extensions ) { - CLAMP_SIGN_SSE4(5); - } - else { - xMOVAPS(xRegisterSSE(regTemp), xRegisterSSE(regd)); - xAND.PS(xRegisterSSE(regTemp), ptr[&const_clip[4]]); - xPSHUF.LW(xRegisterSSE(regd), xRegisterSSE(regd), 0x4e); - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0x27); - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0x2d); - xOR.PS(xRegisterSSE(regd), xRegisterSSE(regTemp)); - } -} -void vFloat6(int regd, int regTemp) { //0110 - xPSHUF.LW(xRegisterSSE(regd), xRegisterSSE(regd), 0x4e); - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0xc6); - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0xc9); -} -void vFloat6b(int regd, int regTemp) { //0110 //regTemp is Modified - if ( x86caps.hasStreamingSIMD4Extensions ) { - CLAMP_NORMAL_SSE4(6); - } - else { - xPSHUF.LW(xRegisterSSE(regd), xRegisterSSE(regd), 0x4e); - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0xc6); - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0xc9); - } -} -void vFloat6c(int regd, int regTemp) { //0110 - if ( x86caps.hasStreamingSIMD4Extensions ) { - CLAMP_SIGN_SSE4(6); - } - else { - xMOVAPS(xRegisterSSE(regTemp), xRegisterSSE(regd)); - xAND.PS(xRegisterSSE(regTemp), ptr[&const_clip[4]]); - xPSHUF.LW(xRegisterSSE(regd), xRegisterSSE(regd), 0x4e); - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0xc6); - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0xc9); - xOR.PS(xRegisterSSE(regd), xRegisterSSE(regTemp)); - } -} -void vFloat7(int regd, int regTemp) { //1110 - xPSHUF.LW(xRegisterSSE(regd), xRegisterSSE(regd), 0x4e); - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0xc6); - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0x27); - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0x39); -} -void vFloat7_useEAX(int regd, int regTemp) { //1110 //EAX is Modified - xMOVD(eax, xRegisterSSE(regd)); - xMIN.PS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.PS(xRegisterSSE(regd), ptr[g_minvals]); - if ( x86caps.hasStreamingSIMD4Extensions ) - xPINSR.D(xRegisterSSE(regd), eax, 0x00); - else { - xPINSR.W(xRegisterSSE(regd), eax, 0); - xSHR(eax, 16); - xPINSR.W(xRegisterSSE(regd), eax, 1); - } -} -void vFloat7b(int regd, int regTemp) { //1110 //regTemp is Modified - xMOVSS(xRegisterSSE(regTemp), xRegisterSSE(regd)); - xMIN.PS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.PS(xRegisterSSE(regd), ptr[g_minvals]); - xMOVSS(xRegisterSSE(regd), xRegisterSSE(regTemp)); -} -void vFloat7c(int regd, int regTemp) { //1110 - if ( x86caps.hasStreamingSIMD4Extensions ) { - CLAMP_SIGN_SSE4(7); - } - else { - xMOVAPS(xRegisterSSE(regTemp), xRegisterSSE(regd)); - xAND.PS(xRegisterSSE(regTemp), ptr[&const_clip[4]]); - xPSHUF.LW(xRegisterSSE(regd), xRegisterSSE(regd), 0x4e); - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0xc6); - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0x27); - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0x39); - xOR.PS(xRegisterSSE(regd), xRegisterSSE(regTemp)); - } -} -void vFloat7c_useEAX(int regd, int regTemp) { //1110 //EAX is Modified - if ( x86caps.hasStreamingSIMD4Extensions ) { - CLAMP_SIGN_SSE4(7); - } - else { - xMOVD(eax, xRegisterSSE(regd)); - xMOVAPS(xRegisterSSE(regTemp), xRegisterSSE(regd)); - xAND.PS(xRegisterSSE(regTemp), ptr[&const_clip[4]]); - xMIN.PS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.PS(xRegisterSSE(regd), ptr[g_minvals]); - xOR.PS(xRegisterSSE(regd), xRegisterSSE(regTemp)); - xMOVDZX(xRegisterSSE(regTemp), eax); - xMOVSS(xRegisterSSE(regd), xRegisterSSE(regTemp)); - } -} -void vFloat8(int regd, int regTemp) { //0001 - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); -} -void vFloat8c(int regd, int regTemp) { //0001 - if ( x86caps.hasStreamingSIMD4Extensions ) { - CLAMP_SIGN_SSE4(8); - } - else { - xMOVAPS(xRegisterSSE(regTemp), xRegisterSSE(regd)); - xAND.PS(xRegisterSSE(regTemp), ptr[&const_clip[4]]); - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xOR.PS(xRegisterSSE(regd), xRegisterSSE(regTemp)); - } -} -void vFloat9(int regd, int regTemp) { //1001 - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0x27); - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0x27); -} -void vFloat9b(int regd, int regTemp) { //1001 //regTemp is Modified - if ( x86caps.hasStreamingSIMD4Extensions ) { - CLAMP_NORMAL_SSE4(9); - } - else { - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0x27); - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0x27); - } -} -void vFloat9c(int regd, int regTemp) { //1001 - if ( x86caps.hasStreamingSIMD4Extensions ) { - CLAMP_SIGN_SSE4(9); - } - else { - xMOVAPS(xRegisterSSE(regTemp), xRegisterSSE(regd)); - xAND.PS(xRegisterSSE(regTemp), ptr[&const_clip[4]]); - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0x27); - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0x27); - xOR.PS(xRegisterSSE(regd), xRegisterSSE(regTemp)); - } -} -void vFloat10(int regd, int regTemp) { //0101 - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0xc6); - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0xc6); -} -void vFloat10b(int regd, int regTemp) { //0101 //regTemp is Modified - if ( x86caps.hasStreamingSIMD4Extensions ) { - CLAMP_NORMAL_SSE4(10); - } - else { - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0xc6); - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0xc6); - } -} -void vFloat10c(int regd, int regTemp) { //0101 - if ( x86caps.hasStreamingSIMD4Extensions ) { - CLAMP_SIGN_SSE4(10); - } - else { - xMOVAPS(xRegisterSSE(regTemp), xRegisterSSE(regd)); - xAND.PS(xRegisterSSE(regTemp), ptr[&const_clip[4]]); - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0xc6); - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0xc6); - xOR.PS(xRegisterSSE(regd), xRegisterSSE(regTemp)); - } -} -void vFloat11(int regd, int regTemp) { //1101 - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0xc6); - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0x27); - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0x36); -} -void vFloat11_useEAX(int regd, int regTemp) { //1101 //EAX is Modified - xPSHUF.LW(xRegisterSSE(regd), xRegisterSSE(regd), 0x4e); - xMOVD(eax, xRegisterSSE(regd)); - xMIN.PS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.PS(xRegisterSSE(regd), ptr[g_minvals]); - if ( x86caps.hasStreamingSIMD4Extensions ) - xPINSR.D(xRegisterSSE(regd), eax, 0x00); - else { - xPINSR.W(xRegisterSSE(regd), eax, 0); - xSHR(eax, 16); - xPINSR.W(xRegisterSSE(regd), eax, 1); - } - xPSHUF.LW(xRegisterSSE(regd), xRegisterSSE(regd), 0x4e); -} -void vFloat11b(int regd, int regTemp) { //1101 //regTemp is Modified - xMOVAPS(xRegisterSSE(regTemp), xRegisterSSE(regd)); - xMIN.PS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.PS(xRegisterSSE(regd), ptr[g_minvals]); - xMOVSS(xRegisterSSE(regTemp), xRegisterSSE(regd)); - xMOVSD(xRegisterSSE(regd), xRegisterSSE(regTemp)); -} -void vFloat11c(int regd, int regTemp) { //1101 - if ( x86caps.hasStreamingSIMD4Extensions ) { - CLAMP_SIGN_SSE4(11); - } - else { - xMOVAPS(xRegisterSSE(regTemp), xRegisterSSE(regd)); - xAND.PS(xRegisterSSE(regTemp), ptr[&const_clip[4]]); - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0xc6); - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0x27); - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0x36); - xOR.PS(xRegisterSSE(regd), xRegisterSSE(regTemp)); - } -} -void vFloat11c_useEAX(int regd, int regTemp) { //1101 // EAX is modified - if ( x86caps.hasStreamingSIMD4Extensions ) { - CLAMP_SIGN_SSE4(11); - } - else { - xPSHUF.LW(xRegisterSSE(regd), xRegisterSSE(regd), 0x4e); - xMOVD(eax, xRegisterSSE(regd)); - xMOVAPS(xRegisterSSE(regTemp), xRegisterSSE(regd)); - xAND.PS(xRegisterSSE(regTemp), ptr[&const_clip[4]]); - xMIN.PS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.PS(xRegisterSSE(regd), ptr[g_minvals]); - xOR.PS(xRegisterSSE(regd), xRegisterSSE(regTemp)); - xMOVDZX(xRegisterSSE(regTemp), eax); - xMOVSS(xRegisterSSE(regd), xRegisterSSE(regTemp)); - xPSHUF.LW(xRegisterSSE(regd), xRegisterSSE(regd), 0x4e); - } -} -void vFloat12(int regd, int regTemp) { //0011 - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xPSHUF.LW(xRegisterSSE(regd), xRegisterSSE(regd), 0x4e); - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xPSHUF.LW(xRegisterSSE(regd), xRegisterSSE(regd), 0x4e); -} -void vFloat12b(int regd, int regTemp) { //0011 //regTemp is Modified - xMOVHL.PS(xRegisterSSE(regTemp), xRegisterSSE(regd)); - xMIN.PS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.PS(xRegisterSSE(regd), ptr[g_minvals]); - xPUNPCK.LQDQ(xRegisterSSE(regd), xRegisterSSE(regTemp)); -} -void vFloat12c(int regd, int regTemp) { //0011 - if ( x86caps.hasStreamingSIMD4Extensions ) { - CLAMP_SIGN_SSE4(12); - } - else { - xMOVAPS(xRegisterSSE(regTemp), xRegisterSSE(regd)); - xAND.PS(xRegisterSSE(regTemp), ptr[&const_clip[4]]); - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xPSHUF.LW(xRegisterSSE(regd), xRegisterSSE(regd), 0x4e); - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xPSHUF.LW(xRegisterSSE(regd), xRegisterSSE(regd), 0x4e); - xOR.PS(xRegisterSSE(regd), xRegisterSSE(regTemp)); - } -} -void vFloat13(int regd, int regTemp) { //1011 - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xPSHUF.LW(xRegisterSSE(regd), xRegisterSSE(regd), 0x4e); - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0x27); - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0x2d); -} -void vFloat13_useEAX(int regd, int regTemp) { //1011 // EAX is modified - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0xc6); - xMOVD(eax, xRegisterSSE(regd)); - xMIN.PS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.PS(xRegisterSSE(regd), ptr[g_minvals]); - if ( x86caps.hasStreamingSIMD4Extensions ) - xPINSR.D(xRegisterSSE(regd), eax, 0x00); - else { - xPINSR.W(xRegisterSSE(regd), eax, 0); - xSHR(eax, 16); - xPINSR.W(xRegisterSSE(regd), eax, 1); - } - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0xc6); -} -void vFloat13b(int regd, int regTemp) { //1011 //regTemp is Modified - xMOVAPS(xRegisterSSE(regTemp), xRegisterSSE(regd)); - xMIN.PS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.PS(xRegisterSSE(regd), ptr[g_minvals]); - xMOVHL.PS(xRegisterSSE(regTemp), xRegisterSSE(regd)); - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regTemp), 0x64); -} -void vFloat13c(int regd, int regTemp) { //1011 - if ( x86caps.hasStreamingSIMD4Extensions ) { - CLAMP_SIGN_SSE4(13); - } - else { - xMOVAPS(xRegisterSSE(regTemp), xRegisterSSE(regd)); - xAND.PS(xRegisterSSE(regTemp), ptr[&const_clip[4]]); - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xPSHUF.LW(xRegisterSSE(regd), xRegisterSSE(regd), 0x4e); - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0x27); - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0x2d); - xOR.PS(xRegisterSSE(regd), xRegisterSSE(regTemp)); - } -} -void vFloat13c_useEAX(int regd, int regTemp) { //1011 // EAX is modified - if ( x86caps.hasStreamingSIMD4Extensions ) { - CLAMP_SIGN_SSE4(13); - } - else { - xPSHUF.D(xRegisterSSE(regd), xRegisterSSE(regd), 0xc6); - xMOVD(eax, xRegisterSSE(regd)); - xMOVAPS(xRegisterSSE(regTemp), xRegisterSSE(regd)); - xAND.PS(xRegisterSSE(regTemp), ptr[&const_clip[4]]); - xMIN.PS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.PS(xRegisterSSE(regd), ptr[g_minvals]); - xOR.PS(xRegisterSSE(regd), xRegisterSSE(regTemp)); - xMOVDZX(xRegisterSSE(regTemp), eax); - xMOVSS(xRegisterSSE(regd), xRegisterSSE(regTemp)); - xPSHUF.D(xRegisterSSE(regd), xRegisterSSE(regd), 0xc6); - } -} -void vFloat14(int regd, int regTemp) { //0111 - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xPSHUF.LW(xRegisterSSE(regd), xRegisterSSE(regd), 0x4e); - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0xc6); - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0xc9); -} -void vFloat14_useEAX(int regd, int regTemp) { //0111 // EAX is modified - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0x27); - xMOVD(eax, xRegisterSSE(regd)); - xMIN.PS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.PS(xRegisterSSE(regd), ptr[g_minvals]); - if ( x86caps.hasStreamingSIMD4Extensions ) - xPINSR.D(xRegisterSSE(regd), eax, 0x00); - else { - xPINSR.W(xRegisterSSE(regd), eax, 0); - xSHR(eax, 16); - xPINSR.W(xRegisterSSE(regd), eax, 1); - } - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0x27); -} -void vFloat14b(int regd, int regTemp) { //0111 //regTemp is Modified - xMOVAPS(xRegisterSSE(regTemp), xRegisterSSE(regd)); - xMIN.PS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.PS(xRegisterSSE(regd), ptr[g_minvals]); - xMOVHL.PS(xRegisterSSE(regTemp), xRegisterSSE(regd)); - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regTemp), 0xc4); -} -void vFloat14c(int regd, int regTemp) { //0111 - if ( x86caps.hasStreamingSIMD4Extensions ) { - CLAMP_SIGN_SSE4(14); - } - else { - xMOVAPS(xRegisterSSE(regTemp), xRegisterSSE(regd)); - xAND.PS(xRegisterSSE(regTemp), ptr[&const_clip[4]]); - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xPSHUF.LW(xRegisterSSE(regd), xRegisterSSE(regd), 0x4e); - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0xc6); - xMIN.SS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.SS(xRegisterSSE(regd), ptr[g_minvals]); - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0xc9); - xOR.PS(xRegisterSSE(regd), xRegisterSSE(regTemp)); - } -} -void vFloat14c_useEAX(int regd, int regTemp) { //0111 // EAX is modified - if ( x86caps.hasStreamingSIMD4Extensions ) { - CLAMP_SIGN_SSE4(14); - } - else { - xPSHUF.D(xRegisterSSE(regd), xRegisterSSE(regd), 0x27); - xMOVD(eax, xRegisterSSE(regd)); - xMOVAPS(xRegisterSSE(regTemp), xRegisterSSE(regd)); - xAND.PS(xRegisterSSE(regTemp), ptr[&const_clip[4]]); - xMIN.PS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.PS(xRegisterSSE(regd), ptr[g_minvals]); - xOR.PS(xRegisterSSE(regd), xRegisterSSE(regTemp)); - xMOVDZX(xRegisterSSE(regTemp), eax); - xMOVSS(xRegisterSSE(regd), xRegisterSSE(regTemp)); - xPSHUF.D(xRegisterSSE(regd), xRegisterSSE(regd), 0x27); - } -} -void vFloat15(int regd, int regTemp) { //1111 - xMIN.PS(xRegisterSSE(regd), ptr[g_maxvals]); - xMAX.PS(xRegisterSSE(regd), ptr[g_minvals]); -} -void vFloat15c(int regd, int regTemp) { //1111 - if ( x86caps.hasStreamingSIMD4Extensions ) { - CLAMP_SIGN_SSE4(15); - } - else { - xMOVAPS(xRegisterSSE(regTemp), xRegisterSSE(regd)); - xAND.PS(xRegisterSSE(regTemp), ptr[&const_clip[4]]); - xMIN.PS(xRegisterSSE(regd), ptr[&g_maxvals[0]]); - xMAX.PS(xRegisterSSE(regd), ptr[&g_minvals[0]]); - xOR.PS(xRegisterSSE(regd), xRegisterSSE(regTemp)); - } -} - -vFloat vFloats1[16] = { //regTemp is not modified - vFloat0, vFloat1, vFloat2, vFloat3, - vFloat4, vFloat5, vFloat6, vFloat7, - vFloat8, vFloat9, vFloat10, vFloat11, - vFloat12, vFloat13, vFloat14, vFloat15 }; - -vFloat vFloats1_useEAX[16] = { //regTemp is not modified but EAX is used - vFloat0, vFloat1, vFloat2, vFloat3, - vFloat4, vFloat5, vFloat6, vFloat7_useEAX, - vFloat8, vFloat9, vFloat10, vFloat11_useEAX, - vFloat12, vFloat13_useEAX, vFloat14_useEAX, vFloat15 }; - -vFloat vFloats2[16] = { //regTemp is modified - vFloat0, vFloat1, vFloat2, vFloat3b, - vFloat4, vFloat5b, vFloat6b, vFloat7b, - vFloat8, vFloat9b, vFloat10b, vFloat11b, - vFloat12b, vFloat13b, vFloat14b, vFloat15 }; - -vFloat vFloats4[16] = { //regTemp is modified - vFloat0, vFloat1c, vFloat2c, vFloat3c, - vFloat4c, vFloat5c, vFloat6c, vFloat7c, - vFloat8c, vFloat9c, vFloat10c, vFloat11c, - vFloat12c, vFloat13c, vFloat14c, vFloat15c }; - -vFloat vFloats4_useEAX[16] = { //regTemp is modified and EAX is used - vFloat0, vFloat1c, vFloat2c, vFloat3c, - vFloat4c, vFloat5c, vFloat6c, vFloat7c_useEAX, - vFloat8c, vFloat9c, vFloat10c, vFloat11c_useEAX, - vFloat12c, vFloat13c_useEAX, vFloat14c_useEAX, vFloat15c }; - -//------------------------------------------------------------------ -// Clamping Functions (wrapper for vFloat* functions) -// vuFloat : "normal" clamping -// vuFloat_useEAX : "normal" clamping (faster but EAX is modified) -// vuFloat2 : "normal" clamping (fastest but regTemp is modified) -// vuFloat3 : "preserve sign" clamping for pointer -// vuFloat4 : "preserve sign" clamping (regTemp is modified; *FASTEST* on SSE4 CPUs) -// vuFloat4_useEAX : "preserve sign" clamping (faster but regTemp and EAX are modified) -// vuFloat5 : wrapper function for vuFloat2 and vuFloat4 -// vuFloat5_useEAX : wrapper function for vuFloat2 and vuFloat4_useEAX -// vuFloatExtra : for debugging -// -// Notice 1: vuFloat*_useEAX may be slower on AMD CPUs, which have independent execution pipeline for -// vector and scalar instructions (need checks) -// Notice 2: recVUMI_MUL_xyzw_toD and recVUMI_MADD_xyzw_toD use vFloats directly! -//------------------------------------------------------------------ - -// Clamps +/-NaN to +fMax and +/-Inf to +/-fMax (doesn't use any temp regs) -void vuFloat( int info, int regd, int XYZW) { - if( CHECK_VU_OVERFLOW ) { - /*if ( (XYZW != 0) && (XYZW != 8) && (XYZW != 0xF) ) { - int t1reg = _vuGetTempXMMreg(info); - if (t1reg >= 0) { - vuFloat2( regd, t1reg, XYZW ); - _freeXMMreg( t1reg ); - return; - } - }*/ - //vuFloatExtra(regd, XYZW); - vFloats1[XYZW](regd, regd); - } -} - -// Clamps +/-NaN to +fMax and +/-Inf to +/-fMax (uses EAX as a temp register; faster but **destroys EAX**) -void vuFloat_useEAX( int info, int regd, int XYZW) { - if( CHECK_VU_OVERFLOW ) { - vFloats1_useEAX[XYZW](regd, regd); - } -} - -// Clamps +/-NaN to +fMax and +/-Inf to +/-fMax (uses a temp reg) -void vuFloat2(int regd, int regTemp, int XYZW) { - if( CHECK_VU_OVERFLOW ) { - //vuFloatExtra(regd, XYZW); - vFloats2[XYZW](regd, regTemp); - } -} - -// Clamps +/-NaN and +/-Inf to +/-fMax (uses a temp reg) -void vuFloat4(int regd, int regTemp, int XYZW) { - if( CHECK_VU_OVERFLOW ) { - vFloats4[XYZW](regd, regTemp); - } -} - -// Clamps +/-NaN and +/-Inf to +/-fMax (uses a temp reg, and uses EAX as a temp register; faster but **destroys EAX**) -void vuFloat4_useEAX(int regd, int regTemp, int XYZW) { - if( CHECK_VU_OVERFLOW ) { - vFloats4_useEAX[XYZW](regd, regTemp); - } -} - -// Uses vuFloat4 or vuFloat2 depending on the CHECK_VU_SIGN_OVERFLOW setting -void vuFloat5(int regd, int regTemp, int XYZW) { - if (CHECK_VU_SIGN_OVERFLOW) { - vuFloat4(regd, regTemp, XYZW); - } - else vuFloat2(regd, regTemp, XYZW); -} - -// Uses vuFloat4_useEAX or vuFloat2 depending on the CHECK_VU_SIGN_OVERFLOW setting (uses EAX as a temp register; faster but **destoroyes EAX**) -void vuFloat5_useEAX(int regd, int regTemp, int XYZW) { - if (CHECK_VU_SIGN_OVERFLOW) { - vuFloat4_useEAX(regd, regTemp, XYZW); - } - else vuFloat2(regd, regTemp, XYZW); -} - -// Clamps +/-infs to +/-fMax, and +/-NaNs to +/-fMax -void vuFloat3(uptr x86ptr) { - u8* pjmp; - - if( CHECK_VU_OVERFLOW ) { - xCMP(ptr32[(u32*)(x86ptr)], 0x7f800000 ); - pjmp = JL8(0); // Signed Comparison - xMOV(ptr32[(u32*)(x86ptr)], 0x7f7fffff ); - x86SetJ8(pjmp); - - xCMP(ptr32[(u32*)(x86ptr)], 0xff800000 ); - pjmp = JB8(0); // Unsigned Comparison - xMOV(ptr32[(u32*)(x86ptr)], 0xff7fffff ); - x86SetJ8(pjmp); - } -} - -__aligned16 u64 vuFloatData[4]; - -// Makes NaN == 0, Infinities stay the same; Very Slow - Use only for debugging -void vuFloatExtra( int regd, int XYZW) { - int t1reg = (regd == 0) ? (regd + 1) : (regd - 1); - int t2reg = (regd <= 1) ? (regd + 2) : (regd - 2); - xMOVAPS(ptr[&vuFloatData[0]], xRegisterSSE(t1reg )); - xMOVAPS(ptr[&vuFloatData[2]], xRegisterSSE(t2reg )); - - xXOR.PS(xRegisterSSE(t1reg), xRegisterSSE(t1reg)); - xCMPORD.PS(xRegisterSSE(t1reg), xRegisterSSE(regd)); - xMOVAPS(xRegisterSSE(t2reg), xRegisterSSE(regd)); - xAND.PS(xRegisterSSE(t2reg), xRegisterSSE(t1reg)); - VU_MERGE_REGS_CUSTOM(regd, t2reg, XYZW); - - xMOVAPS(xRegisterSSE(t1reg), ptr[&vuFloatData[0] ]); - xMOVAPS(xRegisterSSE(t2reg), ptr[&vuFloatData[2] ]); -} - -static __aligned16 u32 tempRegX[] = {0x00000000, 0x00000000, 0x00000000, 0x00000000}; - -// Called by testWhenOverflow() function -void testPrintOverflow() { - tempRegX[0] &= 0xff800000; - tempRegX[1] &= 0xff800000; - tempRegX[2] &= 0xff800000; - tempRegX[3] &= 0xff800000; - if ( (tempRegX[0] == 0x7f800000) || (tempRegX[1] == 0x7f800000) || (tempRegX[2] == 0x7f800000) || (tempRegX[3] == 0x7f800000) ) - Console.Warning( "VU OVERFLOW!: Changing to +Fmax!!!!!!!!!!!!" ); - if ( (tempRegX[0] == 0xff800000) || (tempRegX[1] == 0xff800000) || (tempRegX[2] == 0xff800000) || (tempRegX[3] == 0xff800000) ) - Console.Warning( "VU OVERFLOW!: Changing to -Fmax!!!!!!!!!!!!" ); -} - -// Outputs to the console when overflow has occured. -void testWhenOverflow(int info, int regd, int t0reg) { - xMOVAPS(ptr[tempRegX], xRegisterSSE(regd)); - xCALL((void*)(uptr)testPrintOverflow); -} diff --git a/pcsx2/x86/sVU_Micro.h b/pcsx2/x86/sVU_Micro.h deleted file mode 100644 index 633f3e4562..0000000000 --- a/pcsx2/x86/sVU_Micro.h +++ /dev/null @@ -1,267 +0,0 @@ -/* PCSX2 - PS2 Emulator for PCs - * Copyright (C) 2002-2010 PCSX2 Dev Team - * - * PCSX2 is free software: you can redistribute it and/or modify it under the terms - * of the GNU Lesser General Public License as published by the Free Software Found- - * ation, either version 3 of the License, or (at your option) any later version. - * - * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; - * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR - * PURPOSE. See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along with PCSX2. - * If not, see . - */ - -#pragma once - -#include "VUmicro.h" - -extern u32 vudump; - -u32 GetVIAddr(VURegs * VU, int reg, int read, int info); // returns the correct VI addr -void recUpdateFlags(VURegs * VU, int reg, int info); - -void _recvuTestPipes(VURegs * VU); -void _recvuFlushFDIV(VURegs * VU); -void _recvuTestUpperStalls(VURegs * VU, _VURegsNum *VUregsn); -void _recvuTestLowerStalls(VURegs * VU, _VURegsNum *VUregsn); -void _recvuAddUpperStalls(VURegs * VU, _VURegsNum *VUregsn); -void _recvuAddLowerStalls(VURegs * VU, _VURegsNum *VUregsn); - -#define VUOP_READ 2 -#define VUOP_WRITE 4 - -// save on mem -struct _vuopinfo { - int cycle; - int cycles; - u8 statusflag; - u8 macflag; - u8 clipflag; - u8 dummy; - u8 q; - u8 p; - u16 pqinst; // bit of instruction specifying index (srec only) -}; - -void SuperVUAnalyzeOp(VURegs *VU, _vuopinfo *info, _VURegsNum* pCodeRegs); -int eeVURecompileCode(VURegs *VU, _VURegsNum* regs); // allocates all the necessary regs and returns the indices -void __fastcall VU1XGKICK_MTGSTransfer(u32 addr); // used for MTGS in XGKICK - -extern int vucycle; -typedef void (*vFloat)(int regd, int regTemp); -extern vFloat vFloats1[16]; -extern vFloat vFloats1_useEAX[16]; -extern vFloat vFloats2[16]; -extern vFloat vFloats4[16]; -extern vFloat vFloats4_useEAX[16]; -extern const __aligned16 float s_fones[8]; -extern const __aligned16 u32 s_mask[4]; -extern const __aligned16 u32 s_expmask[4]; -extern const __aligned16 u32 const_clip[8]; - -u32 GetVIAddr(VURegs * VU, int reg, int read, int info); -int _vuGetTempXMMreg(int info); -void vuFloat(int info, int regd, int XYZW); -void vuFloat_useEAX(int regd, int regTemp, int XYZW); -void vuFloat2(int regd, int regTemp, int XYZW); -void vuFloat3(uptr x86ptr); -void vuFloat4(int regd, int regTemp, int XYZW); -void vuFloat4_useEAX(int regd, int regTemp, int XYZW); -void vuFloat5(int regd, int regTemp, int XYZW); -void vuFloat5_useEAX(int regd, int regTemp, int XYZW); -void _vuFlipRegSS(VURegs * VU, int reg); -void _vuFlipRegSS_xyzw(int reg, int xyzw); -void _vuMoveSS(VURegs * VU, int dstreg, int srcreg); -void _unpackVF_xyzw(int dstreg, int srcreg, int xyzw); -void _unpackVFSS_xyzw(int dstreg, int srcreg, int xyzw); -void VU_MERGE_REGS_CUSTOM(int dest, int src, int xyzw); -void VU_MERGE_REGS_SAFE(int dest, int src, int xyzw); -#define VU_MERGE_REGS(dest, src) { \ - VU_MERGE_REGS_CUSTOM(dest, src, _X_Y_Z_W); \ -} - -// use for allocating vi regs -#define ALLOCTEMPX86(mode) _allocX86reg(xEmptyReg, X86TYPE_TEMP, 0, ((info&PROCESS_VU_SUPER)?0:MODE_NOFRAME)|mode) -#define ALLOCVI(vi, mode) _allocX86reg(xEmptyReg, X86TYPE_VI|((VU==&VU1)?X86TYPE_VU1:0), vi, ((info&PROCESS_VU_SUPER)?0:MODE_NOFRAME)|mode) -#define ADD_VI_NEEDED(vi) _addNeededX86reg(X86TYPE_VI|(VU==&VU1?X86TYPE_VU1:0), vi); - -#define SWAP(x, y) *(u32*)&y ^= *(u32*)&x ^= *(u32*)&y ^= *(u32*)&x; - -/***************************************** - VU Micromode Upper instructions -*****************************************/ - -void recVUMI_ABS(VURegs *vuRegs, int info); -void recVUMI_ADD(VURegs *vuRegs, int info); -void recVUMI_ADDi(VURegs *vuRegs, int info); -void recVUMI_ADDq(VURegs *vuRegs, int info); -void recVUMI_ADDx(VURegs *vuRegs, int info); -void recVUMI_ADDy(VURegs *vuRegs, int info); -void recVUMI_ADDz(VURegs *vuRegs, int info); -void recVUMI_ADDw(VURegs *vuRegs, int info); -void recVUMI_ADDA(VURegs *vuRegs, int info); -void recVUMI_ADDAi(VURegs *vuRegs, int info); -void recVUMI_ADDAq(VURegs *vuRegs, int info); -void recVUMI_ADDAx(VURegs *vuRegs, int info); -void recVUMI_ADDAy(VURegs *vuRegs, int info); -void recVUMI_ADDAz(VURegs *vuRegs, int info); -void recVUMI_ADDAw(VURegs *vuRegs, int info); -void recVUMI_SUB(VURegs *vuRegs, int info); -void recVUMI_SUBi(VURegs *vuRegs, int info); -void recVUMI_SUBq(VURegs *vuRegs, int info); -void recVUMI_SUBx(VURegs *vuRegs, int info); -void recVUMI_SUBy(VURegs *vuRegs, int info); -void recVUMI_SUBz(VURegs *vuRegs, int info); -void recVUMI_SUBw(VURegs *vuRegs, int info); -void recVUMI_SUBA(VURegs *vuRegs, int info); -void recVUMI_SUBAi(VURegs *vuRegs, int info); -void recVUMI_SUBAq(VURegs *vuRegs, int info); -void recVUMI_SUBAx(VURegs *vuRegs, int info); -void recVUMI_SUBAy(VURegs *vuRegs, int info); -void recVUMI_SUBAz(VURegs *vuRegs, int info); -void recVUMI_SUBAw(VURegs *vuRegs, int info); -void recVUMI_MUL(VURegs *vuRegs, int info); -void recVUMI_MULi(VURegs *vuRegs, int info); -void recVUMI_MULq(VURegs *vuRegs, int info); -void recVUMI_MULx(VURegs *vuRegs, int info); -void recVUMI_MULy(VURegs *vuRegs, int info); -void recVUMI_MULz(VURegs *vuRegs, int info); -void recVUMI_MULw(VURegs *vuRegs, int info); -void recVUMI_MULA(VURegs *vuRegs, int info); -void recVUMI_MULAi(VURegs *vuRegs, int info); -void recVUMI_MULAq(VURegs *vuRegs, int info); -void recVUMI_MULAx(VURegs *vuRegs, int info); -void recVUMI_MULAy(VURegs *vuRegs, int info); -void recVUMI_MULAz(VURegs *vuRegs, int info); -void recVUMI_MULAw(VURegs *vuRegs, int info); -void recVUMI_MADD(VURegs *vuRegs, int info); -void recVUMI_MADDi(VURegs *vuRegs, int info); -void recVUMI_MADDq(VURegs *vuRegs, int info); -void recVUMI_MADDx(VURegs *vuRegs, int info); -void recVUMI_MADDy(VURegs *vuRegs, int info); -void recVUMI_MADDz(VURegs *vuRegs, int info); -void recVUMI_MADDw(VURegs *vuRegs, int info); -void recVUMI_MADDA(VURegs *vuRegs, int info); -void recVUMI_MADDAi(VURegs *vuRegs, int info); -void recVUMI_MADDAq(VURegs *vuRegs, int info); -void recVUMI_MADDAx(VURegs *vuRegs, int info); -void recVUMI_MADDAy(VURegs *vuRegs, int info); -void recVUMI_MADDAz(VURegs *vuRegs, int info); -void recVUMI_MADDAw(VURegs *vuRegs, int info); -void recVUMI_MSUB(VURegs *vuRegs, int info); -void recVUMI_MSUBi(VURegs *vuRegs, int info); -void recVUMI_MSUBq(VURegs *vuRegs, int info); -void recVUMI_MSUBx(VURegs *vuRegs, int info); -void recVUMI_MSUBy(VURegs *vuRegs, int info); -void recVUMI_MSUBz(VURegs *vuRegs, int info); -void recVUMI_MSUBw(VURegs *vuRegs, int info); -void recVUMI_MSUBA(VURegs *vuRegs, int info); -void recVUMI_MSUBAi(VURegs *vuRegs, int info); -void recVUMI_MSUBAq(VURegs *vuRegs, int info); -void recVUMI_MSUBAx(VURegs *vuRegs, int info); -void recVUMI_MSUBAy(VURegs *vuRegs, int info); -void recVUMI_MSUBAz(VURegs *vuRegs, int info); -void recVUMI_MSUBAw(VURegs *vuRegs, int info); -void recVUMI_MAX(VURegs *vuRegs, int info); -void recVUMI_MAXi(VURegs *vuRegs, int info); -void recVUMI_MAXx(VURegs *vuRegs, int info); -void recVUMI_MAXy(VURegs *vuRegs, int info); -void recVUMI_MAXz(VURegs *vuRegs, int info); -void recVUMI_MAXw(VURegs *vuRegs, int info); -void recVUMI_MINI(VURegs *vuRegs, int info); -void recVUMI_MINIi(VURegs *vuRegs, int info); -void recVUMI_MINIx(VURegs *vuRegs, int info); -void recVUMI_MINIy(VURegs *vuRegs, int info); -void recVUMI_MINIz(VURegs *vuRegs, int info); -void recVUMI_MINIw(VURegs *vuRegs, int info); -void recVUMI_OPMULA(VURegs *vuRegs, int info); -void recVUMI_OPMSUB(VURegs *vuRegs, int info); -void recVUMI_NOP(VURegs *vuRegs, int info); -void recVUMI_FTOI0(VURegs *vuRegs, int info); -void recVUMI_FTOI4(VURegs *vuRegs, int info); -void recVUMI_FTOI12(VURegs *vuRegs, int info); -void recVUMI_FTOI15(VURegs *vuRegs, int info); -void recVUMI_ITOF0(VURegs *vuRegs, int info); -void recVUMI_ITOF4(VURegs *vuRegs, int info); -void recVUMI_ITOF12(VURegs *vuRegs, int info); -void recVUMI_ITOF15(VURegs *vuRegs, int info); -void recVUMI_CLIP(VURegs *vuRegs, int info); - -/***************************************** - VU Micromode Lower instructions -*****************************************/ - -void recVUMI_DIV(VURegs *vuRegs, int info); -void recVUMI_SQRT(VURegs *vuRegs, int info); -void recVUMI_RSQRT(VURegs *vuRegs, int info); -void recVUMI_IADD(VURegs *vuRegs, int info); -void recVUMI_IADDI(VURegs *vuRegs, int info); -void recVUMI_IADDIU(VURegs *vuRegs, int info); -void recVUMI_IAND(VURegs *vuRegs, int info); -void recVUMI_IOR(VURegs *vuRegs, int info); -void recVUMI_ISUB(VURegs *vuRegs, int info); -void recVUMI_ISUBIU(VURegs *vuRegs, int info); -void recVUMI_MOVE(VURegs *vuRegs, int info); -void recVUMI_MFIR(VURegs *vuRegs, int info); -void recVUMI_MTIR(VURegs *vuRegs, int info); -void recVUMI_MR32(VURegs *vuRegs, int info); -void recVUMI_LQ(VURegs *vuRegs, int info); -void recVUMI_LQD(VURegs *vuRegs, int info); -void recVUMI_LQI(VURegs *vuRegs, int info); -void recVUMI_SQ(VURegs *vuRegs, int info); -void recVUMI_SQD(VURegs *vuRegs, int info); -void recVUMI_SQI(VURegs *vuRegs, int info); -void recVUMI_ILW(VURegs *vuRegs, int info); -void recVUMI_ISW(VURegs *vuRegs, int info); -void recVUMI_ILWR(VURegs *vuRegs, int info); -void recVUMI_ISWR(VURegs *vuRegs, int info); -void recVUMI_LOI(VURegs *vuRegs, int info); -void recVUMI_RINIT(VURegs *vuRegs, int info); -void recVUMI_RGET(VURegs *vuRegs, int info); -void recVUMI_RNEXT(VURegs *vuRegs, int info); -void recVUMI_RXOR(VURegs *vuRegs, int info); -void recVUMI_WAITQ(VURegs *vuRegs, int info); -void recVUMI_FSAND(VURegs *vuRegs, int info); -void recVUMI_FSEQ(VURegs *vuRegs, int info); -void recVUMI_FSOR(VURegs *vuRegs, int info); -void recVUMI_FSSET(VURegs *vuRegs, int info); -void recVUMI_FMAND(VURegs *vuRegs, int info); -void recVUMI_FMEQ(VURegs *vuRegs, int info); -void recVUMI_FMOR(VURegs *vuRegs, int info); -void recVUMI_FCAND(VURegs *vuRegs, int info); -void recVUMI_FCEQ(VURegs *vuRegs, int info); -void recVUMI_FCOR(VURegs *vuRegs, int info); -void recVUMI_FCSET(VURegs *vuRegs, int info); -void recVUMI_FCGET(VURegs *vuRegs, int info); -void recVUMI_IBEQ(VURegs *vuRegs, int info); -void recVUMI_IBGEZ(VURegs *vuRegs, int info); -void recVUMI_IBGTZ(VURegs *vuRegs, int info); -void recVUMI_IBLTZ(VURegs *vuRegs, int info); -void recVUMI_IBLEZ(VURegs *vuRegs, int info); -void recVUMI_IBNE(VURegs *vuRegs, int info); -void recVUMI_B(VURegs *vuRegs, int info); -void recVUMI_BAL(VURegs *vuRegs, int info); -void recVUMI_JR(VURegs *vuRegs, int info); -void recVUMI_JALR(VURegs *vuRegs, int info); -void recVUMI_MFP(VURegs *vuRegs, int info); -void recVUMI_WAITP(VURegs *vuRegs, int info); -void recVUMI_ESADD(VURegs *vuRegs, int info); -void recVUMI_ERSADD(VURegs *vuRegs, int info); -void recVUMI_ELENG(VURegs *vuRegs, int info); -void recVUMI_ERLENG(VURegs *vuRegs, int info); -void recVUMI_EATANxy(VURegs *vuRegs, int info); -void recVUMI_EATANxz(VURegs *vuRegs, int info); -void recVUMI_ESUM(VURegs *vuRegs, int info); -void recVUMI_ERCPR(VURegs *vuRegs, int info); -void recVUMI_ESQRT(VURegs *vuRegs, int info); -void recVUMI_ERSQRT(VURegs *vuRegs, int info); -void recVUMI_ESIN(VURegs *vuRegs, int info); -void recVUMI_EATAN(VURegs *vuRegs, int info); -void recVUMI_EEXP(VURegs *vuRegs, int info); -void recVUMI_XGKICK(VURegs *vuRegs, int info); -void recVUMI_XTOP(VURegs *vuRegs, int info); -void recVUMI_XITOP(VURegs *vuRegs, int info); -void recVUMI_XTOP( VURegs *VU , int info); - diff --git a/pcsx2/x86/sVU_Upper.cpp b/pcsx2/x86/sVU_Upper.cpp deleted file mode 100644 index de53ed52c2..0000000000 --- a/pcsx2/x86/sVU_Upper.cpp +++ /dev/null @@ -1,3068 +0,0 @@ -/* PCSX2 - PS2 Emulator for PCs - * Copyright (C) 2002-2010 PCSX2 Dev Team - * - * PCSX2 is free software: you can redistribute it and/or modify it under the terms - * of the GNU Lesser General Public License as published by the Free Software Found- - * ation, either version 3 of the License, or (at your option) any later version. - * - * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; - * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR - * PURPOSE. See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along with PCSX2. - * If not, see . - */ - -#include "PrecompiledHeader.h" - -#include "Common.h" -#include "GS.h" -#include "R5900OpcodeTables.h" -#include "iR5900.h" -#include "iMMI.h" -#include "iFPU.h" -#include "iCOP0.h" -#include "VUmicro.h" -#include "VUflags.h" -#include "sVU_Micro.h" -#include "sVU_zerorec.h" - -using namespace x86Emitter; - -//------------------------------------------------------------------ -#define MINMAXFIX 1 -//------------------------------------------------------------------ -// Helper Macros -//------------------------------------------------------------------ -#define _Ft_ (( VU->code >> 16) & 0x1F) // The rt part of the instruction register -#define _Fs_ (( VU->code >> 11) & 0x1F) // The rd part of the instruction register -#define _Fd_ (( VU->code >> 6) & 0x1F) // The sa part of the instruction register - -#define _X (( VU->code>>24) & 0x1) -#define _Y (( VU->code>>23) & 0x1) -#define _Z (( VU->code>>22) & 0x1) -#define _W (( VU->code>>21) & 0x1) - -#define _XYZW_SS (_X+_Y+_Z+_W==1) - -#define _Fsf_ (( VU->code >> 21) & 0x03) -#define _Ftf_ (( VU->code >> 23) & 0x03) - -#define _Imm11_ (s32)(VU->code & 0x400 ? 0xfffffc00 | (VU->code & 0x3ff) : VU->code & 0x3ff) -#define _UImm11_ (s32)(VU->code & 0x7ff) - -#define VU_VFx_ADDR(x) (uptr)&VU->VF[x].UL[0] -#define VU_VFy_ADDR(x) (uptr)&VU->VF[x].UL[1] -#define VU_VFz_ADDR(x) (uptr)&VU->VF[x].UL[2] -#define VU_VFw_ADDR(x) (uptr)&VU->VF[x].UL[3] - -#define VU_REGR_ADDR (uptr)&VU->VI[REG_R] -#define VU_REGQ_ADDR (uptr)&VU->VI[REG_Q] -#define VU_REGMAC_ADDR (uptr)&VU->VI[REG_MAC_FLAG] - -#define VU_VI_ADDR(x, read) GetVIAddr(VU, x, read, info) - -#define VU_ACCx_ADDR (uptr)&VU->ACC.UL[0] -#define VU_ACCy_ADDR (uptr)&VU->ACC.UL[1] -#define VU_ACCz_ADDR (uptr)&VU->ACC.UL[2] -#define VU_ACCw_ADDR (uptr)&VU->ACC.UL[3] - -#define _X_Y_Z_W ((( VU->code >> 21 ) & 0xF ) ) -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// Global Variables -//------------------------------------------------------------------ -static const __aligned16 uint SSEmovMask[ 16 ][ 4 ] = -{ - { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, - { 0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFF }, - { 0x00000000, 0x00000000, 0xFFFFFFFF, 0x00000000 }, - { 0x00000000, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF }, - { 0x00000000, 0xFFFFFFFF, 0x00000000, 0x00000000 }, - { 0x00000000, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF }, - { 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000 }, - { 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF }, - { 0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000 }, - { 0xFFFFFFFF, 0x00000000, 0x00000000, 0xFFFFFFFF }, - { 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF, 0x00000000 }, - { 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF }, - { 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000 }, - { 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF }, - { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000 }, - { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF } -}; - -static const __aligned16 u32 const_abs_table[16][4] = -{ - { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }, //0000 - { 0xffffffff, 0xffffffff, 0xffffffff, 0x7fffffff }, //0001 - { 0xffffffff, 0xffffffff, 0x7fffffff, 0xffffffff }, //0010 - { 0xffffffff, 0xffffffff, 0x7fffffff, 0x7fffffff }, //0011 - { 0xffffffff, 0x7fffffff, 0xffffffff, 0xffffffff }, //0100 - { 0xffffffff, 0x7fffffff, 0xffffffff, 0x7fffffff }, //0101 - { 0xffffffff, 0x7fffffff, 0x7fffffff, 0xffffffff }, //0110 - { 0xffffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }, //0111 - { 0x7fffffff, 0xffffffff, 0xffffffff, 0xffffffff }, //1000 - { 0x7fffffff, 0xffffffff, 0xffffffff, 0x7fffffff }, //1001 - { 0x7fffffff, 0xffffffff, 0x7fffffff, 0xffffffff }, //1010 - { 0x7fffffff, 0xffffffff, 0x7fffffff, 0x7fffffff }, //1011 - { 0x7fffffff, 0x7fffffff, 0xffffffff, 0xffffffff }, //1100 - { 0x7fffffff, 0x7fffffff, 0xffffffff, 0x7fffffff }, //1101 - { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0xffffffff }, //1110 - { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }, //1111 -}; - -static const __aligned16 float recMult_float_to_int4[4] = { 16.0, 16.0, 16.0, 16.0 }; -static const __aligned16 float recMult_float_to_int12[4] = { 4096.0, 4096.0, 4096.0, 4096.0 }; -static const __aligned16 float recMult_float_to_int15[4] = { 32768.0, 32768.0, 32768.0, 32768.0 }; - -static const __aligned16 float recMult_int_to_float4[4] = { 0.0625f, 0.0625f, 0.0625f, 0.0625f }; -static const __aligned16 float recMult_int_to_float12[4] = { 0.000244140625, 0.000244140625, 0.000244140625, 0.000244140625 }; -static const __aligned16 float recMult_int_to_float15[4] = { 0.000030517578125, 0.000030517578125, 0.000030517578125, 0.000030517578125 }; - -static const __aligned16 u32 VU_Underflow_Mask1[4] = {0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000}; -static const __aligned16 u32 VU_Underflow_Mask2[4] = {0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff}; -static const __aligned16 u32 VU_Zero_Mask[4] = {0x00000000, 0x00000000, 0x00000000, 0x00000000}; -static const __aligned16 u32 VU_Zero_Helper_Mask[4] = {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff}; -static const __aligned16 u32 VU_Signed_Zero_Mask[4] = {0x80000000, 0x80000000, 0x80000000, 0x80000000}; -static const __aligned16 u32 VU_Pos_Infinity[4] = {0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000}; -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// recUpdateFlags() - Computes the flags for the Upper Opcodes -// -// Note: Computes under/overflow flags if CHECK_VU_EXTRA_FLAGS is 1 -//------------------------------------------------------------------ -static __aligned16 u64 TEMPXMMData[2]; -void recUpdateFlags(VURegs * VU, int reg, int info) -{ - static u8 *pjmp, *pjmp2; - static u32 *pjmp32; - static u32 macaddr, stataddr, prevstataddr; - static int x86macflag, x86statflag, x86temp; - static int t1reg, t1regBoolean; - static const int flipMask[16] = {0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15}; - - if( !(info & PROCESS_VU_UPDATEFLAGS) ) { - if (CHECK_VU_EXTRA_OVERFLOW) { - if (reg != EEREC_TEMP) vuFloat2(reg, EEREC_TEMP, _X_Y_Z_W); - else vuFloat_useEAX(info, reg, _X_Y_Z_W); - } - return; - } - - //Console.WriteLn ("recUpdateFlags"); - - macaddr = VU_VI_ADDR(REG_MAC_FLAG, 0); - stataddr = VU_VI_ADDR(REG_STATUS_FLAG, 0); // write address - prevstataddr = VU_VI_ADDR(REG_STATUS_FLAG, 2); // previous address - - if( stataddr == 0 ) stataddr = prevstataddr; - if( macaddr == 0 ) { - Console.WriteLn( "VU ALLOCATION WARNING: Using Mac Flag Previous Address!" ); - macaddr = VU_VI_ADDR(REG_MAC_FLAG, 2); - } - - x86macflag = ALLOCTEMPX86(0); - x86statflag = ALLOCTEMPX86(0); - - if (reg == EEREC_TEMP) { - t1reg = _vuGetTempXMMreg(info); - if (t1reg < 0) { - //Console.WriteLn( "VU ALLOCATION ERROR: Temp reg can't be allocated!!!!" ); - t1reg = (reg == 0) ? 1 : 0; // Make t1reg != reg - xMOVAPS(ptr[TEMPXMMData], xRegisterSSE(t1reg )); // Backup data to temp address - t1regBoolean = 1; - } - else t1regBoolean = 0; - } - else { - t1reg = EEREC_TEMP; - t1regBoolean = 2; - } - - xSHUF.PS(xRegisterSSE(reg), xRegisterSSE(reg), 0x1B); // Flip wzyx to xyzw - xMOV(xRegister32(x86statflag), ptr[(void*)(prevstataddr)]); // Load the previous status in to x86statflag - xAND(xRegister16(x86statflag), 0xff0); // Keep Sticky and D/I flags - - - if (CHECK_VU_EXTRA_FLAGS) { // Checks all flags - - x86temp = ALLOCTEMPX86(0); - - //-------------------------Check for Overflow flags------------------------------ - - //xXOR.PS(xRegisterSSE(t1reg), xRegisterSSE(t1reg)); // Clear t1reg - //xCMPUNORD.PS(xRegisterSSE(t1reg), xRegisterSSE(reg)); // If reg == NaN then set Vector to 0xFFFFFFFF - - //xMOVAPS(xRegisterSSE(t1reg), xRegisterSSE(reg)); - //xMIN.PS(xRegisterSSE(t1reg), ptr[g_maxvals]); - //xMAX.PS(xRegisterSSE(t1reg), ptr[g_minvals]); - //xCMPNE.PS(xRegisterSSE(t1reg), xRegisterSSE(reg)); // If they're not equal, then overflow has occured - - xMOVAPS(xRegisterSSE(t1reg), xRegisterSSE(reg)); - xAND.PS(xRegisterSSE(t1reg), ptr[VU_Zero_Helper_Mask]); - xCMPEQ.PS(xRegisterSSE(t1reg), ptr[VU_Pos_Infinity]); // If infinity, then overflow has occured (NaN's don't report as overflow) - - xMOVMSKPS(xRegister32(x86macflag), xRegisterSSE(t1reg)); // Move the sign bits of the previous calculation - - xAND(xRegister16(x86macflag), _X_Y_Z_W ); // Grab "Has Overflowed" bits from the previous calculation (also make sure we're only grabbing from the XYZW being modified) - pjmp = JZ8(0); // Skip if none are - xOR(xRegister16(x86statflag), 0x208); // OS, O flags - xSHL(xRegister16(x86macflag), 12); - if (_XYZW_SS) pjmp32 = JMP32(0); // Skip Underflow Check - x86SetJ8(pjmp); - - //-------------------------Check for Underflow flags------------------------------ - - xMOVAPS(xRegisterSSE(t1reg), xRegisterSSE(reg)); // t1reg <- reg - - xAND.PS(xRegisterSSE(t1reg), ptr[&VU_Underflow_Mask1[ 0 ]]); - xCMPEQ.PS(xRegisterSSE(t1reg), ptr[&VU_Zero_Mask[ 0 ]]); // If (t1reg == zero exponent) then set Vector to 0xFFFFFFFF - - xAND.PS(xRegisterSSE(t1reg), xRegisterSSE(reg)); - xAND.PS(xRegisterSSE(t1reg), ptr[&VU_Underflow_Mask2[ 0 ]]); - xCMPNE.PS(xRegisterSSE(t1reg), ptr[&VU_Zero_Mask[ 0 ]]); // If (t1reg != zero mantisa) then set Vector to 0xFFFFFFFF - - xMOVMSKPS(eax, xRegisterSSE(t1reg)); // Move the sign bits of the previous calculation - - xAND(ax, _X_Y_Z_W ); // Grab "Has Underflowed" bits from the previous calculation - pjmp = JZ8(0); // Skip if none are - xOR(xRegister16(x86statflag), 0x104); // US, U flags - xSHL(ax, 8); - xOR(xRegister32(x86macflag), eax); - x86SetJ8(pjmp); - - //-------------------------Optional Code: Denormals Are Zero------------------------------ - if (CHECK_VU_UNDERFLOW) { // Sets underflow/denormals to zero - xANDN.PS(xRegisterSSE(t1reg), xRegisterSSE(reg)); // t1reg = !t1reg & reg (t1reg = denormals are positive zero) - VU_MERGE_REGS_SAFE(t1reg, reg, (15 - flipMask[_X_Y_Z_W])); // Send t1reg the vectors that shouldn't be modified (since reg was flipped, we need a mask to get the unmodified vectors) - // Now we have Denormals are Positive Zero in t1reg; the next two lines take Signed Zero into account - xAND.PS(xRegisterSSE(reg), ptr[&VU_Signed_Zero_Mask[ 0 ]]); // Only keep the sign bit for each vector - xOR.PS(xRegisterSSE(reg), xRegisterSSE(t1reg)); // Denormals are Signed Zero, and unmodified vectors stay the same! - } - - if (_XYZW_SS) x86SetJ32(pjmp32); // If we skipped the Underflow Flag Checking (when we had an Overflow), return here - - vuFloat2(reg, t1reg, flipMask[_X_Y_Z_W]); // Clamp overflowed vectors that were modified (remember reg's vectors have been flipped, so have to use a flipmask) - - //-------------------------Check for Signed flags------------------------------ - - xXOR.PS(xRegisterSSE(t1reg), xRegisterSSE(t1reg)); // Clear t1reg - xCMPEQ.PS(xRegisterSSE(t1reg), xRegisterSSE(reg)); // Set all F's if each vector is zero - xMOVMSKPS(xRegister32(x86temp), xRegisterSSE(t1reg)); // Used for Zero Flag Calculation - - xMOVMSKPS(eax, xRegisterSSE(reg)); // Move the sign bits of the t1reg - - xAND(ax, _X_Y_Z_W ); // Grab "Is Signed" bits from the previous calculation - pjmp = JZ8(0); // Skip if none are - xOR(xRegister16(x86statflag), 0x82); // SS, S flags - xSHL(ax, 4); - xOR(xRegister32(x86macflag), eax); - if (_XYZW_SS) pjmp2 = JMP8(0); // If negative and not Zero, we can skip the Zero Flag checking - x86SetJ8(pjmp); - - //-------------------------Check for Zero flags------------------------------ - - xAND(xRegister16(x86temp), _X_Y_Z_W ); // Grab "Is Zero" bits from the previous calculation - pjmp = JZ8(0); // Skip if none are - xOR(xRegister16(x86statflag), 0x41); // ZS, Z flags - xOR(xRegister32(x86macflag), xRegister32(x86temp)); - x86SetJ8(pjmp); - - _freeX86reg(x86temp); - } - else { // Only Checks for Sign and Zero Flags - - vuFloat2(reg, t1reg, flipMask[_X_Y_Z_W]); // Clamp overflowed vectors that were modified (remember reg's vectors have been flipped, so have to use a flipmask) - - //-------------------------Check for Signed flags------------------------------ - - // The following code makes sure the Signed Bit isn't set with Negative Zero - xXOR.PS(xRegisterSSE(t1reg), xRegisterSSE(t1reg)); // Clear t1reg - xCMPEQ.PS(xRegisterSSE(t1reg), xRegisterSSE(reg)); // Set all F's if each vector is zero - xMOVMSKPS(eax, xRegisterSSE(t1reg)); // Used for Zero Flag Calculation - xANDN.PS(xRegisterSSE(t1reg), xRegisterSSE(reg)); - - xMOVMSKPS(xRegister32(x86macflag), xRegisterSSE(t1reg)); // Move the sign bits of the t1reg - - xAND(xRegister16(x86macflag), _X_Y_Z_W ); // Grab "Is Signed" bits from the previous calculation - pjmp = JZ8(0); // Skip if none are - xOR(xRegister16(x86statflag), 0x82); // SS, S flags - xSHL(xRegister16(x86macflag), 4); - if (_XYZW_SS) pjmp2 = JMP8(0); // If negative and not Zero, we can skip the Zero Flag checking - x86SetJ8(pjmp); - - //-------------------------Check for Zero flags------------------------------ - - xAND(ax, _X_Y_Z_W ); // Grab "Is Zero" bits from the previous calculation - pjmp = JZ8(0); // Skip if none are - xOR(xRegister16(x86statflag), 0x41); // ZS, Z flags - xOR(xRegister32(x86macflag), eax); - x86SetJ8(pjmp); - } - //-------------------------Finally: Send the Flags to the Mac Flag Address------------------------------ - - if (_XYZW_SS) x86SetJ8(pjmp2); // If we skipped the Zero Flag Checking, return here - - if (t1regBoolean == 2) xSHUF.PS(xRegisterSSE(reg), xRegisterSSE(reg), 0x1B); // Flip back reg to wzyx (have to do this because reg != EEREC_TEMP) - else if (t1regBoolean == 1) xMOVAPS(xRegisterSSE(t1reg), ptr[TEMPXMMData ]); // Restore data from temo address - else _freeXMMreg(t1reg); // Free temp reg - - xMOV(ptr[(void*)(macaddr)], xRegister16(x86macflag)); - xMOV(ptr[(void*)(stataddr)], xRegister16(x86statflag)); - - _freeX86reg(x86macflag); - _freeX86reg(x86statflag); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// Custom VU ADD/SUB routines by Nneeve -// -// Note: See FPU_ADD_SUB() for more info on what this is doing. -//------------------------------------------------------------------ -static __aligned16 u32 VU_addsuband[2][4]; -static __aligned16 u32 VU_addsub_reg[2][4]; - -static u32 tempECX; - -void VU_ADD_SUB(u32 regd, u32 regt, int is_sub, int info) -{ - u8 *localptr[4][8]; - - xMOV(ptr[&tempECX], ecx); - - int temp1 = ecx.GetId(); //receives regd - int temp2 = ALLOCTEMPX86(0); - - if (temp2 == ecx.GetId()) - { - temp2 = ALLOCTEMPX86(0); - _freeX86reg(ecx); - } - - xMOVAPS(ptr[&VU_addsub_reg[0][0]], xRegisterSSE(regd)); - xMOVAPS(ptr[&VU_addsub_reg[1][0]], xRegisterSSE(regt)); - - xPCMP.EQB(xRegisterSSE(regd), xRegisterSSE(regd)); - xMOVAPS(ptr[&VU_addsuband[0][0]], xRegisterSSE(regd)); - xMOVAPS(ptr[&VU_addsuband[1][0]], xRegisterSSE(regd)); - xMOVAPS(xRegisterSSE(regd), ptr[&VU_addsub_reg[0][0]]); - - xPSLL.D(xRegisterSSE(regd), 1); - xPSLL.D(xRegisterSSE(regt), 1); - - xPSRL.D(xRegisterSSE(regd), 24); - xPSRL.D(xRegisterSSE(regt), 24); - - xPSUB.D(xRegisterSSE(regd), xRegisterSSE(regt)); - -#define PERFORM(i) \ - \ - xPEXTR.W(xRegister32(temp1), xRegisterSSE(regd), i*2); \ - xMOVSX(xRegister32(temp1), xRegister16(temp1)); \ - xCMP(xRegister32(temp1), 25);\ - localptr[i][0] = JGE8(0);\ - xCMP(xRegister32(temp1), 0);\ - localptr[i][1] = JG8(0);\ - localptr[i][2] = JE8(0);\ - xCMP(xRegister32(temp1), -25);\ - localptr[i][3] = JLE8(0);\ - \ - xNEG(xRegister32(temp1)); \ - xDEC(xRegister32(temp1));\ - xMOV(xRegister32(temp2), 0xffffffff); \ - xSHL(xRegister32(temp2), cl); \ - xMOV(ptr[&VU_addsuband[0][i]], xRegister32(temp2));\ - localptr[i][4] = JMP8(0);\ - \ - x86SetJ8(localptr[i][0]);\ - xMOV(ptr32[&VU_addsuband[1][i]], 0x80000000);\ - localptr[i][5] = JMP8(0);\ - \ - x86SetJ8(localptr[i][1]);\ - xDEC(xRegister32(temp1));\ - xMOV(xRegister32(temp2), 0xffffffff);\ - xSHL(xRegister32(temp2), cl); \ - xMOV(ptr[&VU_addsuband[1][i]], xRegister32(temp2));\ - localptr[i][6] = JMP8(0);\ - \ - x86SetJ8(localptr[i][3]);\ - xMOV(ptr32[&VU_addsuband[0][i]], 0x80000000);\ - localptr[i][7] = JMP8(0);\ - \ - x86SetJ8(localptr[i][2]);\ - \ - x86SetJ8(localptr[i][4]);\ - x86SetJ8(localptr[i][5]);\ - x86SetJ8(localptr[i][6]);\ - x86SetJ8(localptr[i][7]); - - PERFORM(0); - PERFORM(1); - PERFORM(2); - PERFORM(3); -#undef PERFORM - - xMOVAPS(xRegisterSSE(regd), ptr[&VU_addsub_reg[0][0]]); - xMOVAPS(xRegisterSSE(regt), ptr[&VU_addsub_reg[1][0]]); - - xAND.PS(xRegisterSSE(regd), ptr[&VU_addsuband[0][0]]); - xAND.PS(xRegisterSSE(regt), ptr[&VU_addsuband[1][0]]); - - if (is_sub) xSUB.PS(xRegisterSSE(regd), xRegisterSSE(regt)); - else xADD.PS(xRegisterSSE(regd), xRegisterSSE(regt)); - - xMOVAPS(xRegisterSSE(regt), ptr[&VU_addsub_reg[1][0]]); - - _freeX86reg(temp2); - - xMOV(ecx, ptr[&tempECX]); -} - -void VU_ADD_SUB_SS(u32 regd, u32 regt, int is_sub, int is_mem, int info) -{ - u8 *localptr[8]; - u32 addrt = regt; //for case is_mem - - xMOV(ptr[&tempECX], ecx); - - int temp1 = ecx.GetId(); //receives regd - int temp2 = ALLOCTEMPX86(0); - - if (temp2 == ecx.GetId()) - { - temp2 = ALLOCTEMPX86(0); - _freeX86reg(ecx); - } - - xMOVAPS(ptr[&VU_addsub_reg[0][0]], xRegisterSSE(regd)); - if (!is_mem) xMOVAPS(ptr[&VU_addsub_reg[1][0]], xRegisterSSE(regt)); - - xMOVD(xRegister32(temp1), xRegisterSSE(regd)); - xSHR(xRegister32(temp1), 23); - - if (is_mem) { - xMOV(xRegister32(temp2), ptr[(void*)(addrt)]); - xMOV(ptr[&VU_addsub_reg[1][0]], xRegister32(temp2)); - xSHR(xRegister32(temp2), 23); - } - else { - xMOVD(xRegister32(temp2), xRegisterSSE(regt)); - xSHR(xRegister32(temp2), 23); - } - - xAND(xRegister32(temp1), 0xff); - xAND(xRegister32(temp2), 0xff); - - xSUB(xRegister32(temp1), xRegister32(temp2)); //temp1 = exponent difference - - xCMP(xRegister32(temp1), 25); - localptr[0] = JGE8(0); - xCMP(xRegister32(temp1), 0); - localptr[1] = JG8(0); - localptr[2] = JE8(0); - xCMP(xRegister32(temp1), -25); - localptr[3] = JLE8(0); - - xNEG(xRegister32(temp1)); - xDEC(xRegister32(temp1)); - xMOV(xRegister32(temp2), 0xffffffff); - xSHL(xRegister32(temp2), cl); - xPCMP.EQB(xRegisterSSE(regd), xRegisterSSE(regd)); - if (is_mem) { - xPINSR.W(xRegisterSSE(regd), xRegister32(temp2), 0); - xSHR(xRegister32(temp2), 16); - xPINSR.W(xRegisterSSE(regd), xRegister32(temp2), 1); - } - else { - xMOVDZX(xRegisterSSE(regt), xRegister32(temp2)); - xMOVSS(xRegisterSSE(regd), xRegisterSSE(regt)); - xPCMP.EQB(xRegisterSSE(regt), xRegisterSSE(regt)); - } - localptr[4] = JMP8(0); - - x86SetJ8(localptr[0]); - xMOV(xRegister32(temp2), 0x80000000); - if (is_mem) - xAND(ptr[&VU_addsub_reg[1][0]], xRegister32(temp2)); - else { - xPCMP.EQB(xRegisterSSE(regt), xRegisterSSE(regt)); - xMOVDZX(xRegisterSSE(regd), xRegister32(temp2)); - xMOVSS(xRegisterSSE(regt), xRegisterSSE(regd)); - } - xPCMP.EQB(xRegisterSSE(regd), xRegisterSSE(regd)); - localptr[5] = JMP8(0); - - x86SetJ8(localptr[1]); - xDEC(xRegister32(temp1)); - xMOV(xRegister32(temp2), 0xffffffff); - xSHL(xRegister32(temp2), cl); - if (is_mem) - xAND(ptr[&VU_addsub_reg[1][0]], xRegister32(temp2)); - else { - xPCMP.EQB(xRegisterSSE(regt), xRegisterSSE(regt)); - xMOVDZX(xRegisterSSE(regd), xRegister32(temp2)); - xMOVSS(xRegisterSSE(regt), xRegisterSSE(regd)); - } - xPCMP.EQB(xRegisterSSE(regd), xRegisterSSE(regd)); - localptr[6] = JMP8(0); - - x86SetJ8(localptr[3]); - xMOV(xRegister32(temp2), 0x80000000); - xPCMP.EQB(xRegisterSSE(regd), xRegisterSSE(regd)); - if (is_mem) { - xPINSR.W(xRegisterSSE(regd), xRegister32(temp2), 0); - xSHR(xRegister32(temp2), 16); - xPINSR.W(xRegisterSSE(regd), xRegister32(temp2), 1); - } - else { - xMOVDZX(xRegisterSSE(regt), xRegister32(temp2)); - xMOVSS(xRegisterSSE(regd), xRegisterSSE(regt)); - xPCMP.EQB(xRegisterSSE(regt), xRegisterSSE(regt)); - } - localptr[7] = JMP8(0); - - x86SetJ8(localptr[2]); - x86SetJ8(localptr[4]); - x86SetJ8(localptr[5]); - x86SetJ8(localptr[6]); - x86SetJ8(localptr[7]); - - if (is_mem) - { - xAND.PS(xRegisterSSE(regd), ptr[&VU_addsub_reg[0][0]]); //regd contains mask - - if (is_sub) xSUB.SS(xRegisterSSE(regd), ptr[&VU_addsub_reg[1][0]]); - else xADD.SS(xRegisterSSE(regd), ptr[&VU_addsub_reg[1][0]]); - } - else - { - xAND.PS(xRegisterSSE(regd), ptr[&VU_addsub_reg[0][0]]); //regd contains mask - xAND.PS(xRegisterSSE(regt), ptr[&VU_addsub_reg[1][0]]); //regt contains mask - - if (is_sub) xSUB.SS(xRegisterSSE(regd), xRegisterSSE(regt)); - else xADD.SS(xRegisterSSE(regd), xRegisterSSE(regt)); - - xMOVAPS(xRegisterSSE(regt), ptr[&VU_addsub_reg[1][0]]); - } - - _freeX86reg(temp2); - - xMOV(ecx, ptr[&tempECX]); -} - -void SSE_ADDPS_XMM_to_XMM_custom(int info, int regd, int regt) { - if (CHECK_VUADDSUBHACK) { - VU_ADD_SUB(regd, regt, 0, info); - } - else xADD.PS(xRegisterSSE(regd), xRegisterSSE(regt)); -} -void SSE_SUBPS_XMM_to_XMM_custom(int info, int regd, int regt) { - if (CHECK_VUADDSUBHACK) { - VU_ADD_SUB(regd, regt, 1, info); - } - else xSUB.PS(xRegisterSSE(regd), xRegisterSSE(regt)); -} -void SSE_ADDSS_XMM_to_XMM_custom(int info, int regd, int regt) { - if (CHECK_VUADDSUBHACK) { - VU_ADD_SUB_SS(regd, regt, 0, 0, info); - } - else xADD.SS(xRegisterSSE(regd), xRegisterSSE(regt)); -} -void SSE_SUBSS_XMM_to_XMM_custom(int info, int regd, int regt) { - if (CHECK_VUADDSUBHACK) { - VU_ADD_SUB_SS(regd, regt, 1, 0, info); - } - else xSUB.SS(xRegisterSSE(regd), xRegisterSSE(regt)); -} -void SSE_ADDSS_M32_to_XMM_custom(int info, int regd, int regt) { - if (CHECK_VUADDSUBHACK) { - VU_ADD_SUB_SS(regd, regt, 0, 1, info); - } - else xADD.SS(xRegisterSSE(regd), ptr[(void*)(regt)]); -} -void SSE_SUBSS_M32_to_XMM_custom(int info, int regd, int regt) { - if (CHECK_VUADDSUBHACK) { - VU_ADD_SUB_SS(regd, regt, 1, 1, info); - } - else xSUB.SS(xRegisterSSE(regd), ptr[(void*)(regt)]); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// *VU Upper Instructions!* -// -// Note: * = Checked for errors by cottonvibes -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// ABS* -//------------------------------------------------------------------ -void recVUMI_ABS(VURegs *VU, int info) -{ - //Console.WriteLn("recVUMI_ABS()"); - if ( (_Ft_ == 0) || (_X_Y_Z_W == 0) ) return; - - if ((_X_Y_Z_W == 0x8) || (_X_Y_Z_W == 0xf)) { - VU_MERGE_REGS(EEREC_T, EEREC_S); - xAND.PS(xRegisterSSE(EEREC_T), ptr[&const_abs_table[ _X_Y_Z_W ][ 0 ] ]); - } - else { // Use a temp reg because VU_MERGE_REGS() modifies source reg! - xMOVAPS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - xAND.PS(xRegisterSSE(EEREC_TEMP), ptr[&const_abs_table[ _X_Y_Z_W ][ 0 ] ]); - VU_MERGE_REGS(EEREC_T, EEREC_TEMP); - } -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// ADD*, ADD_iq*, ADD_xyzw* -//------------------------------------------------------------------ -static const __aligned16 float s_two[4] = {0,0,0,2}; -void recVUMI_ADD(VURegs *VU, int info) -{ - //Console.WriteLn("recVUMI_ADD()"); - if ( _X_Y_Z_W == 0 ) goto flagUpdate; // Don't do anything and just clear flags - if ( !_Fd_ ) info = (info & ~PROCESS_EE_SET_D(0xf)) | PROCESS_EE_SET_D(EEREC_TEMP); - - if ( _Fs_ == 0 && _Ft_ == 0 ) { // if adding VF00 with VF00, then the result is always 0,0,0,2 - if ( _X_Y_Z_W != 0xf ) { - xMOVAPS(xRegisterSSE(EEREC_TEMP), ptr[s_two]); - VU_MERGE_REGS(EEREC_D, EEREC_TEMP); - } - else xMOVAPS(xRegisterSSE(EEREC_D), ptr[s_two]); - } - else { - if (CHECK_VU_EXTRA_OVERFLOW) { - if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); - if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, _X_Y_Z_W ); - } - if( _X_Y_Z_W == 8 ) { // If only adding x, then we can do a Scalar Add - if (EEREC_D == EEREC_S) xADD.SS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - else if (EEREC_D == EEREC_T) xADD.SS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - else { - xMOVSS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - xADD.SS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - } - } - else if (_X_Y_Z_W != 0xf) { // If xyzw != 1111, then we have to use a temp reg - xMOVAPS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - xADD.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_T)); - VU_MERGE_REGS(EEREC_D, EEREC_TEMP); - } - else { // All xyzw being modified (xyzw == 1111) - if (EEREC_D == EEREC_S) xADD.PS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - else if (EEREC_D == EEREC_T) xADD.PS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - else { - xMOVAPS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - xADD.PS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - } - } - } -flagUpdate: - recUpdateFlags(VU, EEREC_D, info); -} - -void recVUMI_ADD_iq(VURegs *VU, uptr addr, int info) -{ - //Console.WriteLn("recVUMI_ADD_iq()"); - if ( _X_Y_Z_W == 0 ) goto flagUpdate; - if ( !_Fd_ ) info = (info & ~PROCESS_EE_SET_D(0xf)) | PROCESS_EE_SET_D(EEREC_TEMP); - if (CHECK_VU_EXTRA_OVERFLOW) { - vuFloat3(addr); - if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); - } - - if ( _XYZW_SS ) { - if ( EEREC_D == EEREC_TEMP ) { - _vuFlipRegSS(VU, EEREC_S); - xMOVSS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - xADD.SS(xRegisterSSE(EEREC_D), ptr[(void*)(addr)]); - _vuFlipRegSS(VU, EEREC_S); - _vuFlipRegSS(VU, EEREC_D); // have to flip over EEREC_D for computing flags! - } - else if ( EEREC_D == EEREC_S ) { - _vuFlipRegSS(VU, EEREC_D); - xADD.SS(xRegisterSSE(EEREC_D), ptr[(void*)(addr)]); - _vuFlipRegSS(VU, EEREC_D); - } - else { - if ( _X ) { - xMOVSS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - SSE_ADDSS_M32_to_XMM_custom(info, EEREC_D, addr); - } - else { - xMOVSSZX(xRegisterSSE(EEREC_TEMP), ptr[(void*)(addr)]); - xSHUF.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_TEMP), 0x00); - SSE_ADDPS_XMM_to_XMM_custom(info, EEREC_TEMP, EEREC_S); - VU_MERGE_REGS(EEREC_D, EEREC_TEMP); - } - } - } - else { - if ( (_X_Y_Z_W != 0xf) || (EEREC_D == EEREC_S) || (EEREC_D == EEREC_TEMP) ) { - xMOVSSZX(xRegisterSSE(EEREC_TEMP), ptr[(void*)(addr)]); - xSHUF.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_TEMP), 0x00); - } - - if (_X_Y_Z_W != 0xf) { - xADD.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - VU_MERGE_REGS(EEREC_D, EEREC_TEMP); - } - else { - if ( EEREC_D == EEREC_TEMP ) xADD.PS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - else if ( EEREC_D == EEREC_S ) xADD.PS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_TEMP)); - else { - xMOVSSZX(xRegisterSSE(EEREC_D), ptr[(void*)(addr)]); - xSHUF.PS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D), 0x00); - xADD.PS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - } - } - } -flagUpdate: - recUpdateFlags(VU, EEREC_D, info); -} - -void recVUMI_ADD_xyzw(VURegs *VU, int xyzw, int info) -{ - //Console.WriteLn("recVUMI_ADD_xyzw()"); - if ( _X_Y_Z_W == 0 ) goto flagUpdate; - if ( !_Fd_ ) info = (info & ~PROCESS_EE_SET_D(0xf)) | PROCESS_EE_SET_D(EEREC_TEMP); - if (CHECK_VU_EXTRA_OVERFLOW) { - if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); - if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, ( 1 << (3 - xyzw) ) ); - } - - if ( _Ft_ == 0 && xyzw < 3 ) { // just move since adding zero - if ( _X_Y_Z_W == 0x8 ) { VU_MERGE_REGS(EEREC_D, EEREC_S); } - else if ( _X_Y_Z_W != 0xf ) { - xMOVAPS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - VU_MERGE_REGS(EEREC_D, EEREC_TEMP); - } - else xMOVAPS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - } - else if ( _X_Y_Z_W == 8 && (EEREC_D != EEREC_TEMP) ) { - if ( xyzw == 0 ) { - if ( EEREC_D == EEREC_T ) xADD.SS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - else { - xMOVSS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - xADD.SS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - } - } - else { - _unpackVFSS_xyzw(EEREC_TEMP, EEREC_T, xyzw); - xMOVSS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - xADD.SS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_TEMP)); - } - } - else if( _Fs_ == 0 && !_W ) { // just move - _unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw); - VU_MERGE_REGS(EEREC_D, EEREC_TEMP); - } - else { - if ( _X_Y_Z_W != 0xf ) { - _unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw); - xADD.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - VU_MERGE_REGS(EEREC_D, EEREC_TEMP); - } - else { - if( EEREC_D == EEREC_TEMP ) { _unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw); xADD.PS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); } - else if( EEREC_D == EEREC_S ) { _unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw); xADD.PS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_TEMP)); } - else { _unpackVF_xyzw(EEREC_D, EEREC_T, xyzw); xADD.PS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); } - } - } -flagUpdate: - recUpdateFlags(VU, EEREC_D, info); -} - -void recVUMI_ADDi(VURegs *VU, int info) { recVUMI_ADD_iq(VU, VU_VI_ADDR(REG_I, 1), info); } -void recVUMI_ADDq(VURegs *VU, int info) { recVUMI_ADD_iq(VU, VU_REGQ_ADDR, info); } -void recVUMI_ADDx(VURegs *VU, int info) { recVUMI_ADD_xyzw(VU, 0, info); } -void recVUMI_ADDy(VURegs *VU, int info) { recVUMI_ADD_xyzw(VU, 1, info); } -void recVUMI_ADDz(VURegs *VU, int info) { recVUMI_ADD_xyzw(VU, 2, info); } -void recVUMI_ADDw(VURegs *VU, int info) { recVUMI_ADD_xyzw(VU, 3, info); } -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// ADDA*, ADDA_iq*, ADDA_xyzw* -//------------------------------------------------------------------ -void recVUMI_ADDA(VURegs *VU, int info) -{ - //Console.WriteLn("recVUMI_ADDA()"); - if ( _X_Y_Z_W == 0 ) goto flagUpdate; - if (CHECK_VU_EXTRA_OVERFLOW) { - if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); - if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, _X_Y_Z_W ); - } - - if( _X_Y_Z_W == 8 ) { - if (EEREC_ACC == EEREC_S) xADD.SS(xRegisterSSE(EEREC_ACC), xRegisterSSE(EEREC_T)); // Can this case happen? (cottonvibes) - else if (EEREC_ACC == EEREC_T) xADD.SS(xRegisterSSE(EEREC_ACC), xRegisterSSE(EEREC_S)); // Can this case happen? - else { - xMOVSS(xRegisterSSE(EEREC_ACC), xRegisterSSE(EEREC_S)); - xADD.SS(xRegisterSSE(EEREC_ACC), xRegisterSSE(EEREC_T)); - } - } - else if (_X_Y_Z_W != 0xf) { - xMOVAPS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - xADD.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_T)); - - VU_MERGE_REGS(EEREC_ACC, EEREC_TEMP); - } - else { - if( EEREC_ACC == EEREC_S ) xADD.PS(xRegisterSSE(EEREC_ACC), xRegisterSSE(EEREC_T)); // Can this case happen? - else if( EEREC_ACC == EEREC_T ) xADD.PS(xRegisterSSE(EEREC_ACC), xRegisterSSE(EEREC_S)); // Can this case happen? - else { - xMOVAPS(xRegisterSSE(EEREC_ACC), xRegisterSSE(EEREC_S)); - xADD.PS(xRegisterSSE(EEREC_ACC), xRegisterSSE(EEREC_T)); - } - } -flagUpdate: - recUpdateFlags(VU, EEREC_ACC, info); -} - -void recVUMI_ADDA_iq(VURegs *VU, uptr addr, int info) -{ - //Console.WriteLn("recVUMI_ADDA_iq()"); - if ( _X_Y_Z_W == 0 ) goto flagUpdate; - if (CHECK_VU_EXTRA_OVERFLOW) { - vuFloat3(addr); - if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); - } - - if( _XYZW_SS ) { - assert( EEREC_ACC != EEREC_TEMP ); - if( EEREC_ACC == EEREC_S ) { - _vuFlipRegSS(VU, EEREC_ACC); - xADD.SS(xRegisterSSE(EEREC_ACC), ptr[(void*)(addr)]); - _vuFlipRegSS(VU, EEREC_ACC); - } - else { - if( _X ) { - xMOVSS(xRegisterSSE(EEREC_ACC), xRegisterSSE(EEREC_S)); - xADD.SS(xRegisterSSE(EEREC_ACC), ptr[(void*)(addr)]); - } - else { - xMOVSSZX(xRegisterSSE(EEREC_TEMP), ptr[(void*)(addr)]); - xSHUF.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_TEMP), 0x00); - xADD.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - VU_MERGE_REGS(EEREC_ACC, EEREC_TEMP); - } - } - } - else { - if( _X_Y_Z_W != 0xf || EEREC_ACC == EEREC_S ) { - xMOVSSZX(xRegisterSSE(EEREC_TEMP), ptr[(void*)(addr)]); - xSHUF.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_TEMP), 0x00); - } - - if (_X_Y_Z_W != 0xf) { - xADD.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - VU_MERGE_REGS(EEREC_ACC, EEREC_TEMP); - } - else { - if( EEREC_ACC == EEREC_S ) xADD.PS(xRegisterSSE(EEREC_ACC), xRegisterSSE(EEREC_TEMP)); - else { - xMOVSSZX(xRegisterSSE(EEREC_ACC), ptr[(void*)(addr)]); - xSHUF.PS(xRegisterSSE(EEREC_ACC), xRegisterSSE(EEREC_ACC), 0x00); - xADD.PS(xRegisterSSE(EEREC_ACC), xRegisterSSE(EEREC_S)); - } - } - } -flagUpdate: - recUpdateFlags(VU, EEREC_ACC, info); -} - -void recVUMI_ADDA_xyzw(VURegs *VU, int xyzw, int info) -{ - //Console.WriteLn("recVUMI_ADDA_xyzw()"); - if ( _X_Y_Z_W == 0 ) goto flagUpdate; - if (CHECK_VU_EXTRA_OVERFLOW) { - if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); - if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, ( 1 << (3 - xyzw) ) ); - } - - if( _X_Y_Z_W == 8 ) { - assert( EEREC_ACC != EEREC_T ); - if( xyzw == 0 ) { - xMOVSS(xRegisterSSE(EEREC_ACC), xRegisterSSE(EEREC_S)); - xADD.SS(xRegisterSSE(EEREC_ACC), xRegisterSSE(EEREC_T)); - } - else { - _unpackVFSS_xyzw(EEREC_TEMP, EEREC_T, xyzw); - if( _Fs_ == 0 ) { - xMOVSS(xRegisterSSE(EEREC_ACC), xRegisterSSE(EEREC_TEMP)); - } - else { - xMOVSS(xRegisterSSE(EEREC_ACC), xRegisterSSE(EEREC_S)); - xADD.SS(xRegisterSSE(EEREC_ACC), xRegisterSSE(EEREC_TEMP)); - } - } - } - else { - if( _X_Y_Z_W != 0xf || EEREC_ACC == EEREC_S ) - _unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw); - - if (_X_Y_Z_W != 0xf) { - xADD.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - VU_MERGE_REGS(EEREC_ACC, EEREC_TEMP); - } - else { - if( EEREC_ACC == EEREC_S ) xADD.PS(xRegisterSSE(EEREC_ACC), xRegisterSSE(EEREC_TEMP)); - else { - _unpackVF_xyzw(EEREC_ACC, EEREC_T, xyzw); - xADD.PS(xRegisterSSE(EEREC_ACC), xRegisterSSE(EEREC_S)); - } - } - } -flagUpdate: - recUpdateFlags(VU, EEREC_ACC, info); -} - -void recVUMI_ADDAi(VURegs *VU, int info) { recVUMI_ADDA_iq(VU, VU_VI_ADDR(REG_I, 1), info); } -void recVUMI_ADDAq(VURegs *VU, int info) { recVUMI_ADDA_iq(VU, VU_REGQ_ADDR, info); } -void recVUMI_ADDAx(VURegs *VU, int info) { recVUMI_ADDA_xyzw(VU, 0, info); } -void recVUMI_ADDAy(VURegs *VU, int info) { recVUMI_ADDA_xyzw(VU, 1, info); } -void recVUMI_ADDAz(VURegs *VU, int info) { recVUMI_ADDA_xyzw(VU, 2, info); } -void recVUMI_ADDAw(VURegs *VU, int info) { recVUMI_ADDA_xyzw(VU, 3, info); } -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// SUB*, SUB_iq*, SUB_xyzw* -//------------------------------------------------------------------ -void recVUMI_SUB(VURegs *VU, int info) -{ - //Console.WriteLn("recVUMI_SUB()"); - if ( _X_Y_Z_W == 0 ) goto flagUpdate; - if ( !_Fd_ ) info = (info & ~PROCESS_EE_SET_D(0xf)) | PROCESS_EE_SET_D(EEREC_TEMP); - - if( EEREC_S == EEREC_T ) { - if (_X_Y_Z_W != 0xf) xAND.PS(xRegisterSSE(EEREC_D), ptr[(&SSEmovMask[15-_X_Y_Z_W][0])]); - else xXOR.PS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); - } - else if( _X_Y_Z_W == 8 ) { - if (CHECK_VU_EXTRA_OVERFLOW) { - if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); - if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, _X_Y_Z_W ); - } - if (EEREC_D == EEREC_S) { - if (_Ft_) xSUB.SS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - } - else if (EEREC_D == EEREC_T) { - if (_Ft_) { - xMOVSS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - xSUB.SS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_T)); - xMOVSS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_TEMP)); - } - else xMOVSS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - } - else { - xMOVSS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - if (_Ft_) xSUB.SS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - } - } - else { - if (CHECK_VU_EXTRA_OVERFLOW) { - if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); - if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, _X_Y_Z_W ); - } - if (_X_Y_Z_W != 0xf) { - xMOVAPS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - if( ( _Ft_ > 0 ) || _W ) xSUB.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_T)); - - VU_MERGE_REGS(EEREC_D, EEREC_TEMP); - } - else { - if (EEREC_D == EEREC_S) xSUB.PS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - else if (EEREC_D == EEREC_T) { - xMOVAPS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - xSUB.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_T)); - xMOVAPS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_TEMP)); - } - else { - xMOVAPS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - xSUB.PS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - } - } - } -flagUpdate: - recUpdateFlags(VU, EEREC_D, info); -} - -void recVUMI_SUB_iq(VURegs *VU, uptr addr, int info) -{ - //Console.WriteLn("recVUMI_SUB_iq()"); - if ( _X_Y_Z_W == 0 ) goto flagUpdate; - if (CHECK_VU_EXTRA_OVERFLOW) { - vuFloat3(addr); - if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); - } - if ( !_Fd_ ) info = (info & ~PROCESS_EE_SET_D(0xf)) | PROCESS_EE_SET_D(EEREC_TEMP); - - if( _XYZW_SS ) { - if( EEREC_D == EEREC_TEMP ) { - _vuFlipRegSS(VU, EEREC_S); - xMOVSS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - xSUB.SS(xRegisterSSE(EEREC_D), ptr[(void*)(addr)]); - _vuFlipRegSS(VU, EEREC_S); - _vuFlipRegSS(VU, EEREC_D); - } - else if( EEREC_D == EEREC_S ) { - _vuFlipRegSS(VU, EEREC_D); - xSUB.SS(xRegisterSSE(EEREC_D), ptr[(void*)(addr)]); - _vuFlipRegSS(VU, EEREC_D); - } - else { - if( _X ) { - xMOVSS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - xSUB.SS(xRegisterSSE(EEREC_D), ptr[(void*)(addr)]); - } - else { - _vuMoveSS(VU, EEREC_TEMP, EEREC_S); - _vuFlipRegSS(VU, EEREC_D); - xSUB.SS(xRegisterSSE(EEREC_TEMP), ptr[(void*)(addr)]); - xMOVSS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_TEMP)); - _vuFlipRegSS(VU, EEREC_D); - } - } - } - else { - xMOVSSZX(xRegisterSSE(EEREC_TEMP), ptr[(void*)(addr)]); - xSHUF.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_TEMP), 0x00); - - if (_X_Y_Z_W != 0xf) { - int t1reg = _vuGetTempXMMreg(info); - - if( t1reg >= 0 ) { - xMOVAPS(xRegisterSSE(t1reg), xRegisterSSE(EEREC_S)); - xSUB.PS(xRegisterSSE(t1reg), xRegisterSSE(EEREC_TEMP)); - - VU_MERGE_REGS(EEREC_D, t1reg); - _freeXMMreg(t1reg); - } - else { - // negate - xXOR.PS(xRegisterSSE(EEREC_TEMP), ptr[&const_clip[4]]); - xADD.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - VU_MERGE_REGS(EEREC_D, EEREC_TEMP); - } - } - else { - if( EEREC_D == EEREC_TEMP ) { - xXOR.PS(xRegisterSSE(EEREC_D), ptr[&const_clip[4]]); - xADD.PS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - } - else { - xMOVAPS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - xSUB.PS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_TEMP)); - } - } - } -flagUpdate: - recUpdateFlags(VU, EEREC_D, info); -} - -void recVUMI_SUB_xyzw(VURegs *VU, int xyzw, int info) -{ - //Console.WriteLn("recVUMI_SUB_xyzw()"); - if ( _X_Y_Z_W == 0 ) goto flagUpdate; - if ( !_Fd_ ) info = (info & ~PROCESS_EE_SET_D(0xf)) | PROCESS_EE_SET_D(EEREC_TEMP); - if (CHECK_VU_EXTRA_OVERFLOW) { - if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); - if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, ( 1 << (3 - xyzw) ) ); - } - - if ( _X_Y_Z_W == 8 ) { - if ( (xyzw == 0) && (_Ft_ == _Fs_) ) { - xAND.PS(xRegisterSSE(EEREC_D), ptr[&SSEmovMask[7][0]]); - } - else if ( EEREC_D == EEREC_TEMP ) { - xMOVSS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - if ( (_Ft_ > 0) || (xyzw == 3) ) { - _vuFlipRegSS_xyzw(EEREC_T, xyzw); - xSUB.SS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - _vuFlipRegSS_xyzw(EEREC_T, xyzw); - } - } - else { - if ( (_Ft_ > 0) || (xyzw == 3) ) { - _unpackVFSS_xyzw(EEREC_TEMP, EEREC_T, xyzw); - xMOVSS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - xSUB.SS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_TEMP)); - } - else xMOVSS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - } - } - else { - _unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw); - - if (_X_Y_Z_W != 0xf) { - int t1reg = _vuGetTempXMMreg(info); - - if( t1reg >= 0 ) { - xMOVAPS(xRegisterSSE(t1reg), xRegisterSSE(EEREC_S)); - xSUB.PS(xRegisterSSE(t1reg), xRegisterSSE(EEREC_TEMP)); - - VU_MERGE_REGS(EEREC_D, t1reg); - _freeXMMreg(t1reg); - } - else { - // negate - xXOR.PS(xRegisterSSE(EEREC_TEMP), ptr[&const_clip[4]]); - xADD.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - VU_MERGE_REGS(EEREC_D, EEREC_TEMP); - } - } - else { - if( EEREC_D == EEREC_TEMP ) { - xXOR.PS(xRegisterSSE(EEREC_D), ptr[&const_clip[4]]); - xADD.PS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - } - else { - xMOVAPS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - xSUB.PS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_TEMP)); - } - } - } -flagUpdate: - recUpdateFlags(VU, EEREC_D, info); -} - -void recVUMI_SUBi(VURegs *VU, int info) { recVUMI_SUB_iq(VU, VU_VI_ADDR(REG_I, 1), info); } -void recVUMI_SUBq(VURegs *VU, int info) { recVUMI_SUB_iq(VU, VU_REGQ_ADDR, info); } -void recVUMI_SUBx(VURegs *VU, int info) { recVUMI_SUB_xyzw(VU, 0, info); } -void recVUMI_SUBy(VURegs *VU, int info) { recVUMI_SUB_xyzw(VU, 1, info); } -void recVUMI_SUBz(VURegs *VU, int info) { recVUMI_SUB_xyzw(VU, 2, info); } -void recVUMI_SUBw(VURegs *VU, int info) { recVUMI_SUB_xyzw(VU, 3, info); } -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// SUBA*, SUBA_iq, SUBA_xyzw -//------------------------------------------------------------------ -void recVUMI_SUBA(VURegs *VU, int info) -{ - //Console.WriteLn("recVUMI_SUBA()"); - if ( _X_Y_Z_W == 0 ) goto flagUpdate; - if (CHECK_VU_EXTRA_OVERFLOW) { - if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); - if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, _X_Y_Z_W ); - } - - if( EEREC_S == EEREC_T ) { - if (_X_Y_Z_W != 0xf) xAND.PS(xRegisterSSE(EEREC_ACC), ptr[(&SSEmovMask[15-_X_Y_Z_W][0])]); - else xXOR.PS(xRegisterSSE(EEREC_ACC), xRegisterSSE(EEREC_ACC)); - } - else if( _X_Y_Z_W == 8 ) { - if (EEREC_ACC == EEREC_S) xSUB.SS(xRegisterSSE(EEREC_ACC), xRegisterSSE(EEREC_T)); - else if (EEREC_ACC == EEREC_T) { - xMOVSS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - xSUB.SS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_T)); - xMOVSS(xRegisterSSE(EEREC_ACC), xRegisterSSE(EEREC_TEMP)); - } - else { - xMOVSS(xRegisterSSE(EEREC_ACC), xRegisterSSE(EEREC_S)); - xSUB.SS(xRegisterSSE(EEREC_ACC), xRegisterSSE(EEREC_T)); - } - } - else if (_X_Y_Z_W != 0xf) { - xMOVAPS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - xSUB.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_T)); - - VU_MERGE_REGS(EEREC_ACC, EEREC_TEMP); - } - else { - if( EEREC_ACC == EEREC_S ) xSUB.PS(xRegisterSSE(EEREC_ACC), xRegisterSSE(EEREC_T)); - else if( EEREC_ACC == EEREC_T ) { - xMOVAPS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - xSUB.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_T)); - xMOVAPS(xRegisterSSE(EEREC_ACC), xRegisterSSE(EEREC_TEMP)); - } - else { - xMOVAPS(xRegisterSSE(EEREC_ACC), xRegisterSSE(EEREC_S)); - xSUB.PS(xRegisterSSE(EEREC_ACC), xRegisterSSE(EEREC_T)); - } - } -flagUpdate: - recUpdateFlags(VU, EEREC_ACC, info); -} - -void recVUMI_SUBA_iq(VURegs *VU, uptr addr, int info) -{ - //Console.WriteLn ("recVUMI_SUBA_iq"); - if (CHECK_VU_EXTRA_OVERFLOW) { - vuFloat3(addr); - if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); - } - - if( _XYZW_SS ) { - if( EEREC_ACC == EEREC_S ) { - _vuFlipRegSS(VU, EEREC_ACC); - xSUB.SS(xRegisterSSE(EEREC_ACC), ptr[(void*)(addr)]); - _vuFlipRegSS(VU, EEREC_ACC); - } - else { - if( _X ) { - xMOVSS(xRegisterSSE(EEREC_ACC), xRegisterSSE(EEREC_S)); - xSUB.SS(xRegisterSSE(EEREC_ACC), ptr[(void*)(addr)]); - } - else { - _vuMoveSS(VU, EEREC_TEMP, EEREC_S); - _vuFlipRegSS(VU, EEREC_ACC); - xSUB.SS(xRegisterSSE(EEREC_TEMP), ptr[(void*)(addr)]); - xMOVSS(xRegisterSSE(EEREC_ACC), xRegisterSSE(EEREC_TEMP)); - _vuFlipRegSS(VU, EEREC_ACC); - } - } - } - else { - xMOVSSZX(xRegisterSSE(EEREC_TEMP), ptr[(void*)(addr)]); - xSHUF.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_TEMP), 0x00); - - if (_X_Y_Z_W != 0xf) { - int t1reg = _vuGetTempXMMreg(info); - - if( t1reg >= 0 ) { - xMOVAPS(xRegisterSSE(t1reg), xRegisterSSE(EEREC_S)); - xSUB.PS(xRegisterSSE(t1reg), xRegisterSSE(EEREC_TEMP)); - - VU_MERGE_REGS(EEREC_ACC, t1reg); - _freeXMMreg(t1reg); - } - else { - // negate - xXOR.PS(xRegisterSSE(EEREC_TEMP), ptr[&const_clip[4]]); - xADD.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - VU_MERGE_REGS(EEREC_ACC, EEREC_TEMP); - } - } - else { - xMOVAPS(xRegisterSSE(EEREC_ACC), xRegisterSSE(EEREC_S)); - xSUB.PS(xRegisterSSE(EEREC_ACC), xRegisterSSE(EEREC_TEMP)); - } - } - recUpdateFlags(VU, EEREC_ACC, info); -} - -void recVUMI_SUBA_xyzw(VURegs *VU, int xyzw, int info) -{ - //Console.WriteLn ("recVUMI_SUBA_xyzw"); - if (CHECK_VU_EXTRA_OVERFLOW) { - if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); - if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, ( 1 << (3 - xyzw) ) ); - } - - if( _X_Y_Z_W == 8 ) { - if( xyzw == 0 ) { - xMOVSS(xRegisterSSE(EEREC_ACC), xRegisterSSE(EEREC_S)); - xSUB.SS(xRegisterSSE(EEREC_ACC), xRegisterSSE(EEREC_T)); - } - else { - _unpackVFSS_xyzw(EEREC_TEMP, EEREC_T, xyzw); - xMOVSS(xRegisterSSE(EEREC_ACC), xRegisterSSE(EEREC_S)); - xSUB.SS(xRegisterSSE(EEREC_ACC), xRegisterSSE(EEREC_TEMP)); - } - } - else { - _unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw); - - if (_X_Y_Z_W != 0xf) { - int t1reg = _vuGetTempXMMreg(info); - - if( t1reg >= 0 ) { - xMOVAPS(xRegisterSSE(t1reg), xRegisterSSE(EEREC_S)); - xSUB.PS(xRegisterSSE(t1reg), xRegisterSSE(EEREC_TEMP)); - - VU_MERGE_REGS(EEREC_ACC, t1reg); - _freeXMMreg(t1reg); - } - else { - // negate - xXOR.PS(xRegisterSSE(EEREC_TEMP), ptr[&const_clip[4]]); - xADD.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - VU_MERGE_REGS(EEREC_ACC, EEREC_TEMP); - } - } - else { - xMOVAPS(xRegisterSSE(EEREC_ACC), xRegisterSSE(EEREC_S)); - xSUB.PS(xRegisterSSE(EEREC_ACC), xRegisterSSE(EEREC_TEMP)); - } - } - recUpdateFlags(VU, EEREC_ACC, info); -} - -void recVUMI_SUBAi(VURegs *VU, int info) { recVUMI_SUBA_iq(VU, VU_VI_ADDR(REG_I, 1), info); } -void recVUMI_SUBAq(VURegs *VU, int info) { recVUMI_SUBA_iq(VU, VU_REGQ_ADDR, info); } -void recVUMI_SUBAx(VURegs *VU, int info) { recVUMI_SUBA_xyzw(VU, 0, info); } -void recVUMI_SUBAy(VURegs *VU, int info) { recVUMI_SUBA_xyzw(VU, 1, info); } -void recVUMI_SUBAz(VURegs *VU, int info) { recVUMI_SUBA_xyzw(VU, 2, info); } -void recVUMI_SUBAw(VURegs *VU, int info) { recVUMI_SUBA_xyzw(VU, 3, info); } -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// MUL -//------------------------------------------------------------------ -void recVUMI_MUL_toD(VURegs *VU, int regd, int info) -{ - //Console.WriteLn ("recVUMI_MUL_toD"); - if (CHECK_VU_EXTRA_OVERFLOW) { - if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); - if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, _X_Y_Z_W ); - } - - if (_X_Y_Z_W == 1 && (_Ft_ == 0 || _Fs_==0) ) { // W - xMOVAPS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(_Ft_ ? EEREC_T : EEREC_S)); - VU_MERGE_REGS(regd, EEREC_TEMP); - } - else if( _Fd_ == _Fs_ && _Fs_ == _Ft_ && _XYZW_SS ) { - _vuFlipRegSS(VU, EEREC_D); - xMUL.SS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); - _vuFlipRegSS(VU, EEREC_D); - } - else if( _X_Y_Z_W == 8 ) { - if (regd == EEREC_S) xMUL.SS(xRegisterSSE(regd), xRegisterSSE(EEREC_T)); - else if (regd == EEREC_T) xMUL.SS(xRegisterSSE(regd), xRegisterSSE(EEREC_S)); - else { - xMOVSS(xRegisterSSE(regd), xRegisterSSE(EEREC_S)); - xMUL.SS(xRegisterSSE(regd), xRegisterSSE(EEREC_T)); - } - } - else if (_X_Y_Z_W != 0xf) { - xMOVAPS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - xMUL.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_T)); - - VU_MERGE_REGS(regd, EEREC_TEMP); - } - else { - if (regd == EEREC_S) xMUL.PS(xRegisterSSE(regd), xRegisterSSE(EEREC_T)); - else if (regd == EEREC_T) xMUL.PS(xRegisterSSE(regd), xRegisterSSE(EEREC_S)); - else { - xMOVAPS(xRegisterSSE(regd), xRegisterSSE(EEREC_S)); - xMUL.PS(xRegisterSSE(regd), xRegisterSSE(EEREC_T)); - } - } -} - -void recVUMI_MUL_iq_toD(VURegs *VU, uptr addr, int regd, int info) -{ - //Console.WriteLn ("recVUMI_MUL_iq_toD"); - if (CHECK_VU_EXTRA_OVERFLOW) { - vuFloat3(addr); - if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); - } - - if( _XYZW_SS ) { - if( regd == EEREC_TEMP ) { - _vuFlipRegSS(VU, EEREC_S); - xMOVSS(xRegisterSSE(regd), xRegisterSSE(EEREC_S)); - xMUL.SS(xRegisterSSE(regd), ptr[(void*)(addr)]); - _vuFlipRegSS(VU, EEREC_S); - _vuFlipRegSS(VU, regd); - } - else if( regd == EEREC_S ) { - _vuFlipRegSS(VU, regd); - xMUL.SS(xRegisterSSE(regd), ptr[(void*)(addr)]); - _vuFlipRegSS(VU, regd); - } - else { - if( _X ) { - xMOVSS(xRegisterSSE(regd), xRegisterSSE(EEREC_S)); - xMUL.SS(xRegisterSSE(regd), ptr[(void*)(addr)]); - } - else { - xMOVSSZX(xRegisterSSE(EEREC_TEMP), ptr[(void*)(addr)]); - xSHUF.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_TEMP), 0x00); - xMUL.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - VU_MERGE_REGS(regd, EEREC_TEMP); - } - } - } - else { - if( _X_Y_Z_W != 0xf || regd == EEREC_TEMP || regd == EEREC_S ) { - xMOVSSZX(xRegisterSSE(EEREC_TEMP), ptr[(void*)(addr)]); - xSHUF.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_TEMP), 0x00); - } - - if (_X_Y_Z_W != 0xf) { - xMUL.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - VU_MERGE_REGS(regd, EEREC_TEMP); - } - else { - if( regd == EEREC_TEMP ) xMUL.PS(xRegisterSSE(regd), xRegisterSSE(EEREC_S)); - else if (regd == EEREC_S) xMUL.PS(xRegisterSSE(regd), xRegisterSSE(EEREC_TEMP)); - else { - xMOVSSZX(xRegisterSSE(regd), ptr[(void*)(addr)]); - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0x00); - xMUL.PS(xRegisterSSE(regd), xRegisterSSE(EEREC_S)); - } - } - } -} - -void recVUMI_MUL_xyzw_toD(VURegs *VU, int xyzw, int regd, int info) -{ - //Console.WriteLn ("recVUMI_MUL_xyzw_toD"); - if (CHECK_VU_EXTRA_OVERFLOW) { - if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, ( 1 << (3 - xyzw) ) ); - } - if (_Fs_) { // This is needed for alot of games; so always clamp this operand - if (CHECK_VU_SIGN_OVERFLOW) vFloats4_useEAX[_X_Y_Z_W]( EEREC_S, EEREC_TEMP ); // Always clamp EEREC_S, regardless if CHECK_VU_OVERFLOW is set - else vFloats2[_X_Y_Z_W]( EEREC_S, EEREC_TEMP ); // Always clamp EEREC_S, regardless if CHECK_VU_OVERFLOW is set - } - if( _Ft_ == 0 ) { - if( xyzw < 3 ) { - if (_X_Y_Z_W != 0xf) { - xXOR.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_TEMP)); - VU_MERGE_REGS(regd, EEREC_TEMP); - } - else xXOR.PS(xRegisterSSE(regd), xRegisterSSE(regd)); - } - else { - assert(xyzw==3); - if (_X_Y_Z_W != 0xf) { - xMOVAPS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - VU_MERGE_REGS(regd, EEREC_TEMP); - } - else xMOVAPS(xRegisterSSE(regd), xRegisterSSE(EEREC_S)); - } - } - else if( _X_Y_Z_W == 8 ) { - if( regd == EEREC_TEMP ) { - _unpackVFSS_xyzw(EEREC_TEMP, EEREC_T, xyzw); - xMUL.SS(xRegisterSSE(regd), xRegisterSSE(EEREC_S)); - } - else { - if( xyzw == 0 ) { - if( regd == EEREC_T ) { - xMUL.SS(xRegisterSSE(regd), xRegisterSSE(EEREC_S)); - } - else { - xMOVSS(xRegisterSSE(regd), xRegisterSSE(EEREC_S)); - xMUL.SS(xRegisterSSE(regd), xRegisterSSE(EEREC_T)); - } - } - else { - _unpackVFSS_xyzw(EEREC_TEMP, EEREC_T, xyzw); - xMOVSS(xRegisterSSE(regd), xRegisterSSE(EEREC_S)); - xMUL.SS(xRegisterSSE(regd), xRegisterSSE(EEREC_TEMP)); - } - } - } - else { - if( _X_Y_Z_W != 0xf || regd == EEREC_TEMP || regd == EEREC_S ) - _unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw); - - if (_X_Y_Z_W != 0xf) { - xMUL.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - VU_MERGE_REGS(regd, EEREC_TEMP); - } - else { - if( regd == EEREC_TEMP ) xMUL.PS(xRegisterSSE(regd), xRegisterSSE(EEREC_S)); - else if (regd == EEREC_S) xMUL.PS(xRegisterSSE(regd), xRegisterSSE(EEREC_TEMP)); - else { - _unpackVF_xyzw(regd, EEREC_T, xyzw); - xMUL.PS(xRegisterSSE(regd), xRegisterSSE(EEREC_S)); - } - } - } -} - -void recVUMI_MUL(VURegs *VU, int info) -{ - //Console.WriteLn ("recVUMI_MUL"); - if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP); - recVUMI_MUL_toD(VU, EEREC_D, info); - recUpdateFlags(VU, EEREC_D, info); -} - -void recVUMI_MUL_iq(VURegs *VU, int addr, int info) -{ - //Console.WriteLn ("recVUMI_MUL_iq"); - if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP); - recVUMI_MUL_iq_toD(VU, addr, EEREC_D, info); - recUpdateFlags(VU, EEREC_D, info); - // spacefisherman needs overflow checking on MULi.z -} - -void recVUMI_MUL_xyzw(VURegs *VU, int xyzw, int info) -{ - //Console.WriteLn ("recVUMI_MUL_xyzw"); - if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP); - recVUMI_MUL_xyzw_toD(VU, xyzw, EEREC_D, info); - recUpdateFlags(VU, EEREC_D, info); -} - -void recVUMI_MULi(VURegs *VU, int info) { recVUMI_MUL_iq(VU, VU_VI_ADDR(REG_I, 1), info); } -void recVUMI_MULq(VURegs *VU, int info) { recVUMI_MUL_iq(VU, VU_REGQ_ADDR, info); } -void recVUMI_MULx(VURegs *VU, int info) { recVUMI_MUL_xyzw(VU, 0, info); } -void recVUMI_MULy(VURegs *VU, int info) { recVUMI_MUL_xyzw(VU, 1, info); } -void recVUMI_MULz(VURegs *VU, int info) { recVUMI_MUL_xyzw(VU, 2, info); } -void recVUMI_MULw(VURegs *VU, int info) { recVUMI_MUL_xyzw(VU, 3, info); } -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// MULA -//------------------------------------------------------------------ -void recVUMI_MULA( VURegs *VU, int info ) -{ - //Console.WriteLn ("recVUMI_MULA"); - recVUMI_MUL_toD(VU, EEREC_ACC, info); - recUpdateFlags(VU, EEREC_ACC, info); -} - -void recVUMI_MULA_iq(VURegs *VU, int addr, int info) -{ - //Console.WriteLn ("recVUMI_MULA_iq"); - recVUMI_MUL_iq_toD(VU, addr, EEREC_ACC, info); - recUpdateFlags(VU, EEREC_ACC, info); -} - -void recVUMI_MULA_xyzw(VURegs *VU, int xyzw, int info) -{ - //Console.WriteLn ("recVUMI_MULA_xyzw"); - recVUMI_MUL_xyzw_toD(VU, xyzw, EEREC_ACC, info); - recUpdateFlags(VU, EEREC_ACC, info); -} - -void recVUMI_MULAi(VURegs *VU, int info) { recVUMI_MULA_iq(VU, VU_VI_ADDR(REG_I, 1), info); } -void recVUMI_MULAq(VURegs *VU, int info) { recVUMI_MULA_iq(VU, VU_REGQ_ADDR, info); } -void recVUMI_MULAx(VURegs *VU, int info) { recVUMI_MULA_xyzw(VU, 0, info); } -void recVUMI_MULAy(VURegs *VU, int info) { recVUMI_MULA_xyzw(VU, 1, info); } -void recVUMI_MULAz(VURegs *VU, int info) { recVUMI_MULA_xyzw(VU, 2, info); } -void recVUMI_MULAw(VURegs *VU, int info) { recVUMI_MULA_xyzw(VU, 3, info); } -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// MADD -//------------------------------------------------------------------ -void recVUMI_MADD_toD(VURegs *VU, int regd, int info) -{ - //Console.WriteLn ("recVUMI_MADD_toD"); - if (CHECK_VU_EXTRA_OVERFLOW) { - if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); - if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, _X_Y_Z_W ); - vuFloat5_useEAX( EEREC_ACC, EEREC_TEMP, _X_Y_Z_W ); - } - - - if( _X_Y_Z_W == 8 ) { - if( regd == EEREC_ACC ) { - xMOVSS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - xMUL.SS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_T)); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, 8); } - xADD.SS(xRegisterSSE(regd), xRegisterSSE(EEREC_TEMP)); - } - else if (regd == EEREC_T) { - xMUL.SS(xRegisterSSE(regd), xRegisterSSE(EEREC_S)); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, 8); } - xADD.SS(xRegisterSSE(regd), xRegisterSSE(EEREC_ACC)); - } - else if (regd == EEREC_S) { - xMUL.SS(xRegisterSSE(regd), xRegisterSSE(EEREC_T)); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, 8); } - xADD.SS(xRegisterSSE(regd), xRegisterSSE(EEREC_ACC)); - } - else { - xMOVSS(xRegisterSSE(regd), xRegisterSSE(EEREC_S)); - xMUL.SS(xRegisterSSE(regd), xRegisterSSE(EEREC_T)); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, 8); } - xADD.SS(xRegisterSSE(regd), xRegisterSSE(EEREC_ACC)); - } - } - else if (_X_Y_Z_W != 0xf) { - xMOVAPS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - xMUL.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_T)); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W ); } - xADD.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_ACC)); - - VU_MERGE_REGS(regd, EEREC_TEMP); - } - else { - if( regd == EEREC_ACC ) { - xMOVAPS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - xMUL.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_T)); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W ); } - xADD.PS(xRegisterSSE(regd), xRegisterSSE(EEREC_TEMP)); - } - else if (regd == EEREC_T) { - xMUL.PS(xRegisterSSE(regd), xRegisterSSE(EEREC_S)); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W ); } - xADD.PS(xRegisterSSE(regd), xRegisterSSE(EEREC_ACC)); - } - else if (regd == EEREC_S) { - xMUL.PS(xRegisterSSE(regd), xRegisterSSE(EEREC_T)); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W ); } - xADD.PS(xRegisterSSE(regd), xRegisterSSE(EEREC_ACC)); - } - else { - xMOVAPS(xRegisterSSE(regd), xRegisterSSE(EEREC_S)); - xMUL.PS(xRegisterSSE(regd), xRegisterSSE(EEREC_T)); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W ); } - xADD.PS(xRegisterSSE(regd), xRegisterSSE(EEREC_ACC)); - } - } -} - -void recVUMI_MADD_iq_toD(VURegs *VU, uptr addr, int regd, int info) -{ - //Console.WriteLn ("recVUMI_MADD_iq_toD"); - if (CHECK_VU_EXTRA_OVERFLOW) { - vuFloat3(addr); - if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); - vuFloat5_useEAX( EEREC_ACC, EEREC_TEMP, _X_Y_Z_W ); - } - - if( _X_Y_Z_W == 8 ) { - if( _Fs_ == 0 ) { - // do nothing if regd == ACC (ACCx <= ACCx + 0.0 * *addr) - if( regd != EEREC_ACC ) { - xMOVSS(xRegisterSSE(regd), xRegisterSSE(EEREC_ACC)); - } - return; - } - - if( regd == EEREC_ACC ) { - assert( EEREC_TEMP < iREGCNT_XMM ); - xMOVSSZX(xRegisterSSE(EEREC_TEMP), ptr[(void*)(addr)]); - xMUL.SS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, 8); } - xADD.SS(xRegisterSSE(regd), xRegisterSSE(EEREC_TEMP)); - } - else if( regd == EEREC_S ) { - xMUL.SS(xRegisterSSE(regd), ptr[(void*)(addr)]); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W ); } - xADD.SS(xRegisterSSE(regd), xRegisterSSE(EEREC_ACC)); - } - else { - xMOVSS(xRegisterSSE(regd), xRegisterSSE(EEREC_S)); - xMUL.SS(xRegisterSSE(regd), ptr[(void*)(addr)]); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W ); } - xADD.SS(xRegisterSSE(regd), xRegisterSSE(EEREC_ACC)); - } - } - else { - if( _Fs_ == 0 ) { - if( regd == EEREC_ACC ) { // ACCxyz is unchanged, ACCw <= ACCw + *addr - if( _W ) { // if _W is zero, do nothing - xMOVSSZX(xRegisterSSE(EEREC_TEMP), ptr[(void*)(addr)]); // { *addr, 0, 0, 0 } - xSHUF.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_TEMP), 0x27); // { 0, 0, 0, *addr } - xADD.PS(xRegisterSSE(regd), xRegisterSSE(EEREC_TEMP)); // { ACCx, ACCy, ACCz, ACCw + *addr } - } - } - else { // DESTxyz <= ACCxyz, DESTw <= ACCw + *addr - if( _W ) { - xMOVSSZX(xRegisterSSE(EEREC_TEMP), ptr[(void*)(addr)]); // { *addr, 0, 0, 0 } - xSHUF.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_TEMP), 0x27); // { 0, 0, 0, *addr } - xADD.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_ACC)); // { ACCx, ACCy, ACCz, ACCw + *addr } - } - else xMOVAPS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_ACC)); - VU_MERGE_REGS(regd, EEREC_TEMP); - } - - return; - } - - if( _X_Y_Z_W != 0xf || regd == EEREC_ACC || regd == EEREC_TEMP || regd == EEREC_S ) { - xMOVSSZX(xRegisterSSE(EEREC_TEMP), ptr[(void*)(addr)]); - xSHUF.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_TEMP), 0x00); - } - - if (_X_Y_Z_W != 0xf) { - xMUL.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W ); } - xADD.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_ACC)); - - VU_MERGE_REGS(regd, EEREC_TEMP); - } - else { - if( regd == EEREC_ACC ) { - xMUL.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W ); } - xADD.PS(xRegisterSSE(regd), xRegisterSSE(EEREC_TEMP)); - } - else if( regd == EEREC_S ) { - xMUL.PS(xRegisterSSE(regd), xRegisterSSE(EEREC_TEMP)); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W ); } - xADD.PS(xRegisterSSE(regd), xRegisterSSE(EEREC_ACC)); - } - else if( regd == EEREC_TEMP ) { - xMUL.PS(xRegisterSSE(regd), xRegisterSSE(EEREC_S)); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W ); } - xADD.PS(xRegisterSSE(regd), xRegisterSSE(EEREC_ACC)); - } - else { - xMOVSSZX(xRegisterSSE(regd), ptr[(void*)(addr)]); - xSHUF.PS(xRegisterSSE(regd), xRegisterSSE(regd), 0x00); - xMUL.PS(xRegisterSSE(regd), xRegisterSSE(EEREC_S)); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W ); } - xADD.PS(xRegisterSSE(regd), xRegisterSSE(EEREC_ACC)); - } - } - } -} - -void recVUMI_MADD_xyzw_toD(VURegs *VU, int xyzw, int regd, int info) -{ - //Console.WriteLn ("recVUMI_MADD_xyzw_toD"); - if (CHECK_VU_EXTRA_OVERFLOW) { - if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, ( 1 << (3 - xyzw) ) ); - vuFloat5_useEAX( EEREC_ACC, EEREC_TEMP, _X_Y_Z_W ); - } - if (_Fs_) { // This is needed for alot of games; so always clamp this operand - if (CHECK_VU_SIGN_OVERFLOW) vFloats4_useEAX[_X_Y_Z_W]( EEREC_S, EEREC_TEMP ); // Always clamp EEREC_S, regardless if CHECK_VU_OVERFLOW is set - else vFloats2[_X_Y_Z_W]( EEREC_S, EEREC_TEMP ); // Always clamp EEREC_S, regardless if CHECK_VU_OVERFLOW is set - } - if( _Ft_ == 0 ) { - - if( xyzw == 3 ) { - // just add - if( _X_Y_Z_W == 8 ) { - if( regd == EEREC_S ) xADD.SS(xRegisterSSE(regd), xRegisterSSE(EEREC_ACC)); - else { - xMOVSS(xRegisterSSE(regd), xRegisterSSE(EEREC_ACC)); - xADD.SS(xRegisterSSE(regd), xRegisterSSE(EEREC_S)); - } - } - else { - if( _X_Y_Z_W != 0xf ) { - xMOVAPS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - xADD.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_ACC)); - - VU_MERGE_REGS(regd, EEREC_TEMP); - } - else { - if( regd == EEREC_S ) xADD.PS(xRegisterSSE(regd), xRegisterSSE(EEREC_ACC)); - else { - xMOVAPS(xRegisterSSE(regd), xRegisterSSE(EEREC_ACC)); - xADD.PS(xRegisterSSE(regd), xRegisterSSE(EEREC_S)); - } - } - } - } - else { - // just move acc to regd - if( _X_Y_Z_W != 0xf ) { - xMOVAPS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_ACC)); - VU_MERGE_REGS(regd, EEREC_TEMP); - } - else xMOVAPS(xRegisterSSE(regd), xRegisterSSE(EEREC_ACC)); - } - - return; - } - - if( _X_Y_Z_W == 8 ) { - _unpackVFSS_xyzw(EEREC_TEMP, EEREC_T, xyzw); - - if( regd == EEREC_ACC ) { - xMUL.SS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, 8); } - xADD.SS(xRegisterSSE(regd), xRegisterSSE(EEREC_TEMP)); - } - else if( regd == EEREC_S ) { - xMUL.SS(xRegisterSSE(regd), xRegisterSSE(EEREC_TEMP)); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, 8); } - xADD.SS(xRegisterSSE(regd), xRegisterSSE(EEREC_ACC)); - } - else if( regd == EEREC_TEMP ) { - xMUL.SS(xRegisterSSE(regd), xRegisterSSE(EEREC_S)); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, 8); } - xADD.SS(xRegisterSSE(regd), xRegisterSSE(EEREC_ACC)); - } - else { - xMOVSS(xRegisterSSE(regd), xRegisterSSE(EEREC_ACC)); - xMUL.SS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, 8); } - xADD.SS(xRegisterSSE(regd), xRegisterSSE(EEREC_TEMP)); - } - } - else { - if( _X_Y_Z_W != 0xf || regd == EEREC_ACC || regd == EEREC_TEMP || regd == EEREC_S ) { - _unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw); - } - - if (_X_Y_Z_W != 0xf) { - xMUL.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W ); } - xADD.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_ACC)); - - VU_MERGE_REGS(regd, EEREC_TEMP); - } - else { - if( regd == EEREC_ACC ) { - xMUL.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W ); } - xADD.PS(xRegisterSSE(regd), xRegisterSSE(EEREC_TEMP)); - } - else if( regd == EEREC_S ) { - xMUL.PS(xRegisterSSE(regd), xRegisterSSE(EEREC_TEMP)); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W ); } - xADD.PS(xRegisterSSE(regd), xRegisterSSE(EEREC_ACC)); - } - else if( regd == EEREC_TEMP ) { - xMUL.PS(xRegisterSSE(regd), xRegisterSSE(EEREC_S)); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W ); } - xADD.PS(xRegisterSSE(regd), xRegisterSSE(EEREC_ACC)); - } - else { - _unpackVF_xyzw(regd, EEREC_T, xyzw); - xMUL.PS(xRegisterSSE(regd), xRegisterSSE(EEREC_S)); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W ); } - xADD.PS(xRegisterSSE(regd), xRegisterSSE(EEREC_ACC)); - } - } - } -} - -void recVUMI_MADD(VURegs *VU, int info) -{ - //Console.WriteLn ("recVUMI_MADD"); - if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP); - recVUMI_MADD_toD(VU, EEREC_D, info); - recUpdateFlags(VU, EEREC_D, info); -} - -void recVUMI_MADD_iq(VURegs *VU, int addr, int info) -{ - //Console.WriteLn ("recVUMI_MADD_iq"); - if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP); - recVUMI_MADD_iq_toD(VU, addr, EEREC_D, info); - recUpdateFlags(VU, EEREC_D, info); -} - -void recVUMI_MADD_xyzw(VURegs *VU, int xyzw, int info) -{ - //Console.WriteLn ("recVUMI_MADD_xyzw"); - if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP); - recVUMI_MADD_xyzw_toD(VU, xyzw, EEREC_D, info); - recUpdateFlags(VU, EEREC_D, info); - // super bust-a-move arrows needs overflow clamping -} - -void recVUMI_MADDi(VURegs *VU, int info) { recVUMI_MADD_iq(VU, VU_VI_ADDR(REG_I, 1), info); } -void recVUMI_MADDq(VURegs *VU, int info) { recVUMI_MADD_iq(VU, VU_REGQ_ADDR, info); } -void recVUMI_MADDx(VURegs *VU, int info) { recVUMI_MADD_xyzw(VU, 0, info); } -void recVUMI_MADDy(VURegs *VU, int info) { recVUMI_MADD_xyzw(VU, 1, info); } -void recVUMI_MADDz(VURegs *VU, int info) { recVUMI_MADD_xyzw(VU, 2, info); } -void recVUMI_MADDw(VURegs *VU, int info) { recVUMI_MADD_xyzw(VU, 3, info); } -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// MADDA -//------------------------------------------------------------------ -void recVUMI_MADDA( VURegs *VU, int info ) -{ - //Console.WriteLn ("recVUMI_MADDA"); - recVUMI_MADD_toD(VU, EEREC_ACC, info); - recUpdateFlags(VU, EEREC_ACC, info); -} - -void recVUMI_MADDAi( VURegs *VU , int info) -{ - //Console.WriteLn ("recVUMI_MADDAi"); - recVUMI_MADD_iq_toD( VU, VU_VI_ADDR(REG_I, 1), EEREC_ACC, info); - recUpdateFlags(VU, EEREC_ACC, info); -} - -void recVUMI_MADDAq( VURegs *VU , int info) -{ - //Console.WriteLn ("recVUMI_MADDAq "); - recVUMI_MADD_iq_toD( VU, VU_REGQ_ADDR, EEREC_ACC, info); - recUpdateFlags(VU, EEREC_ACC, info); -} - -void recVUMI_MADDAx( VURegs *VU , int info) -{ - //Console.WriteLn ("recVUMI_MADDAx"); - recVUMI_MADD_xyzw_toD(VU, 0, EEREC_ACC, info); - recUpdateFlags(VU, EEREC_ACC, info); -} - -void recVUMI_MADDAy( VURegs *VU , int info) -{ - //Console.WriteLn ("recVUMI_MADDAy"); - recVUMI_MADD_xyzw_toD(VU, 1, EEREC_ACC, info); - recUpdateFlags(VU, EEREC_ACC, info); -} - -void recVUMI_MADDAz( VURegs *VU , int info) -{ - //Console.WriteLn ("recVUMI_MADDAz"); - recVUMI_MADD_xyzw_toD(VU, 2, EEREC_ACC, info); - recUpdateFlags(VU, EEREC_ACC, info); -} - -void recVUMI_MADDAw( VURegs *VU , int info) -{ - //Console.WriteLn ("recVUMI_MADDAw"); - recVUMI_MADD_xyzw_toD(VU, 3, EEREC_ACC, info); - recUpdateFlags(VU, EEREC_ACC, info); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// MSUB -//------------------------------------------------------------------ -void recVUMI_MSUB_toD(VURegs *VU, int regd, int info) -{ - //Console.WriteLn ("recVUMI_MSUB_toD"); - if (CHECK_VU_EXTRA_OVERFLOW) { - if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); - if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, _X_Y_Z_W ); - vuFloat5_useEAX( EEREC_ACC, EEREC_TEMP, _X_Y_Z_W ); - } - - if (_X_Y_Z_W != 0xf) { - int t1reg = _vuGetTempXMMreg(info); - - xMOVAPS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - xMUL.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_T)); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W ); } - - if( t1reg >= 0 ) { - xMOVAPS(xRegisterSSE(t1reg), xRegisterSSE(EEREC_ACC)); - xSUB.PS(xRegisterSSE(t1reg), xRegisterSSE(EEREC_TEMP)); - - VU_MERGE_REGS(regd, t1reg); - _freeXMMreg(t1reg); - } - else { - xXOR.PS(xRegisterSSE(EEREC_TEMP), ptr[&const_clip[4]]); - xADD.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_ACC)); - VU_MERGE_REGS(regd, EEREC_TEMP); - } - } - else { - if( regd == EEREC_S ) { - assert( regd != EEREC_ACC ); - xMUL.PS(xRegisterSSE(regd), xRegisterSSE(EEREC_T)); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W ); } - xSUB.PS(xRegisterSSE(regd), xRegisterSSE(EEREC_ACC)); - xXOR.PS(xRegisterSSE(regd), ptr[&const_clip[4]]); - } - else if( regd == EEREC_T ) { - assert( regd != EEREC_ACC ); - xMUL.PS(xRegisterSSE(regd), xRegisterSSE(EEREC_S)); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W ); } - xSUB.PS(xRegisterSSE(regd), xRegisterSSE(EEREC_ACC)); - xXOR.PS(xRegisterSSE(regd), ptr[&const_clip[4]]); - } - else if( regd == EEREC_TEMP ) { - xMOVAPS(xRegisterSSE(regd), xRegisterSSE(EEREC_S)); - xMUL.PS(xRegisterSSE(regd), xRegisterSSE(EEREC_T)); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W ); } - xSUB.PS(xRegisterSSE(regd), xRegisterSSE(EEREC_ACC)); - xXOR.PS(xRegisterSSE(regd), ptr[&const_clip[4]]); - } - else { - xMOVAPS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - xMOVAPS(xRegisterSSE(regd), xRegisterSSE(EEREC_ACC)); - xMUL.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_T)); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W ); } - xSUB.PS(xRegisterSSE(regd), xRegisterSSE(EEREC_TEMP)); - } - } -} - -void recVUMI_MSUB_temp_toD(VURegs *VU, int regd, int info) -{ - //Console.WriteLn ("recVUMI_MSUB_temp_toD"); - - if (_X_Y_Z_W != 0xf) { - int t1reg = _vuGetTempXMMreg(info); - - xMUL.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W ); } - - if( t1reg >= 0 ) { - xMOVAPS(xRegisterSSE(t1reg), xRegisterSSE(EEREC_ACC)); - xSUB.PS(xRegisterSSE(t1reg), xRegisterSSE(EEREC_TEMP)); - - if ( regd != EEREC_TEMP ) { VU_MERGE_REGS(regd, t1reg); } - else xMOVAPS(xRegisterSSE(regd), xRegisterSSE(t1reg)); - - _freeXMMreg(t1reg); - } - else { - xXOR.PS(xRegisterSSE(EEREC_TEMP), ptr[&const_clip[4]]); - xADD.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_ACC)); - VU_MERGE_REGS(regd, EEREC_TEMP); - } - } - else { - if( regd == EEREC_ACC ) { - xMUL.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W ); } - xSUB.PS(xRegisterSSE(regd), xRegisterSSE(EEREC_TEMP)); - } - else if( regd == EEREC_S ) { - xMUL.PS(xRegisterSSE(regd), xRegisterSSE(EEREC_TEMP)); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W ); } - xSUB.PS(xRegisterSSE(regd), xRegisterSSE(EEREC_ACC)); - xXOR.PS(xRegisterSSE(regd), ptr[&const_clip[4]]); - } - else if( regd == EEREC_TEMP ) { - xMUL.PS(xRegisterSSE(regd), xRegisterSSE(EEREC_S)); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W ); } - xSUB.PS(xRegisterSSE(regd), xRegisterSSE(EEREC_ACC)); - xXOR.PS(xRegisterSSE(regd), ptr[&const_clip[4]]); - } - else { - xMOVAPS(xRegisterSSE(regd), xRegisterSSE(EEREC_ACC)); - xMUL.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W ); } - xSUB.PS(xRegisterSSE(regd), xRegisterSSE(EEREC_TEMP)); - } - } -} - -void recVUMI_MSUB_iq_toD(VURegs *VU, int regd, int addr, int info) -{ - //Console.WriteLn ("recVUMI_MSUB_iq_toD"); - if (CHECK_VU_EXTRA_OVERFLOW) { - if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); - vuFloat5_useEAX( EEREC_ACC, EEREC_TEMP, _X_Y_Z_W ); - vuFloat3(addr); - } - xMOVSSZX(xRegisterSSE(EEREC_TEMP), ptr[(void*)(addr)]); - xSHUF.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_TEMP), 0x00); - recVUMI_MSUB_temp_toD(VU, regd, info); -} - -void recVUMI_MSUB_xyzw_toD(VURegs *VU, int regd, int xyzw, int info) -{ - //Console.WriteLn ("recVUMI_MSUB_xyzw_toD"); - if (CHECK_VU_EXTRA_OVERFLOW) { - if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); - if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, 1 << (3 - xyzw)); - vuFloat5_useEAX( EEREC_ACC, EEREC_TEMP, _X_Y_Z_W ); - } - _unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw); - recVUMI_MSUB_temp_toD(VU, regd, info); -} - -void recVUMI_MSUB(VURegs *VU, int info) -{ - //Console.WriteLn ("recVUMI_MSUB"); - if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP); - recVUMI_MSUB_toD(VU, EEREC_D, info); - recUpdateFlags(VU, EEREC_D, info); -} - -void recVUMI_MSUB_iq(VURegs *VU, int addr, int info) -{ - //Console.WriteLn ("recVUMI_MSUB_iq"); - if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP); - recVUMI_MSUB_iq_toD(VU, EEREC_D, addr, info); - recUpdateFlags(VU, EEREC_D, info); -} - -void recVUMI_MSUBi(VURegs *VU, int info) { recVUMI_MSUB_iq(VU, VU_VI_ADDR(REG_I, 1), info); } -void recVUMI_MSUBq(VURegs *VU, int info) { recVUMI_MSUB_iq(VU, VU_REGQ_ADDR, info); } -void recVUMI_MSUBx(VURegs *VU, int info) -{ - //Console.WriteLn ("recVUMI_MSUBx"); - if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP); - recVUMI_MSUB_xyzw_toD(VU, EEREC_D, 0, info); - recUpdateFlags(VU, EEREC_D, info); -} - -void recVUMI_MSUBy(VURegs *VU, int info) -{ - //Console.WriteLn ("recVUMI_MSUBy"); - if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP); - recVUMI_MSUB_xyzw_toD(VU, EEREC_D, 1, info); - recUpdateFlags(VU, EEREC_D, info); -} - -void recVUMI_MSUBz(VURegs *VU, int info) -{ - //Console.WriteLn ("recVUMI_MSUBz"); - if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP); - recVUMI_MSUB_xyzw_toD(VU, EEREC_D, 2, info); - recUpdateFlags(VU, EEREC_D, info); -} - -void recVUMI_MSUBw(VURegs *VU, int info) -{ - //Console.WriteLn ("recVUMI_MSUBw"); - if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP); - recVUMI_MSUB_xyzw_toD(VU, EEREC_D, 3, info); - recUpdateFlags(VU, EEREC_D, info); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// MSUBA -//------------------------------------------------------------------ -void recVUMI_MSUBA( VURegs *VU, int info ) -{ - //Console.WriteLn ("recVUMI_MSUBA"); - recVUMI_MSUB_toD(VU, EEREC_ACC, info); - recUpdateFlags(VU, EEREC_ACC, info); -} - -void recVUMI_MSUBAi( VURegs *VU, int info ) -{ - //Console.WriteLn ("recVUMI_MSUBAi "); - recVUMI_MSUB_iq_toD( VU, EEREC_ACC, VU_VI_ADDR(REG_I, 1), info ); - recUpdateFlags(VU, EEREC_ACC, info); -} - -void recVUMI_MSUBAq( VURegs *VU, int info ) -{ - //Console.WriteLn ("recVUMI_MSUBAq"); - recVUMI_MSUB_iq_toD( VU, EEREC_ACC, VU_REGQ_ADDR, info ); - recUpdateFlags(VU, EEREC_ACC, info); -} - -void recVUMI_MSUBAx( VURegs *VU, int info ) -{ - //Console.WriteLn ("recVUMI_MSUBAx"); - recVUMI_MSUB_xyzw_toD(VU, EEREC_ACC, 0, info); - recUpdateFlags(VU, EEREC_ACC, info); -} - -void recVUMI_MSUBAy( VURegs *VU, int info ) -{ - //Console.WriteLn ("recVUMI_MSUBAy"); - recVUMI_MSUB_xyzw_toD(VU, EEREC_ACC, 1, info); - recUpdateFlags(VU, EEREC_ACC, info); -} - -void recVUMI_MSUBAz( VURegs *VU, int info ) -{ - //Console.WriteLn ("recVUMI_MSUBAz "); - recVUMI_MSUB_xyzw_toD(VU, EEREC_ACC, 2, info); - recUpdateFlags(VU, EEREC_ACC, info); -} - -void recVUMI_MSUBAw( VURegs *VU, int info ) -{ - //Console.WriteLn ("recVUMI_MSUBAw"); - recVUMI_MSUB_xyzw_toD(VU, EEREC_ACC, 3, info); - recUpdateFlags(VU, EEREC_ACC, info); -} -//------------------------------------------------------------------ - - -static const __aligned16 u32 special_mask[4] = {0xffffffff, 0x80000000, 0xffffffff, 0x80000000}; -static const __aligned16 u32 special_mask2[4] = {0, 0x40000000, 0, 0x40000000}; - -__aligned16 u32 temp_loc[4]; -__aligned16 u32 temp_loc2[4]; - -//MAX/MINI are non-arithmetic operations that implicitly support numbers with the EXP field being 0 ("denormals"). -// -//As such, they are sometimes used for integer move and (positive!) integer max/min, knowing that integers that -//represent denormals will not be flushed to 0. -// -//As such, this implementation performs a non-arithmetic operation that supports "denormals" and "infs/nans". -//There might be an easier way to do it but here, MAX/MIN is performed with PMAXPD/PMINPD. -//Fake double-precision numbers are constructed by copying the sign of the original numbers, clearing the upper 32 bits, -//setting the 62nd bit to 1 (to ensure double-precision number is "normalized") and having the lower 32bits -//being the same as the original number. - -void MINMAXlogical(VURegs *VU, int info, int min, int mode, uptr addr = 0, int xyzw = 0) -//mode1 = iq, mode2 = xyzw, mode0 = normal -{ - int t1regbool = 0; - int t1reg = _vuGetTempXMMreg(info); - if (t1reg < 0) - { - t1regbool = 1; - for (t1reg = 0; ( (t1reg == EEREC_D) || (t1reg == EEREC_S) || (mode != 1 && t1reg == EEREC_T) - || (t1reg == EEREC_TEMP) ); t1reg++); // Find unused reg (For first temp reg) - xMOVAPS(ptr[temp_loc], xRegisterSSE(t1reg)); // Backup t1reg XMM reg - } - int t2regbool = -1; - int t2reg = EEREC_TEMP; - if (EEREC_TEMP == EEREC_D || EEREC_TEMP == EEREC_S || (mode != 1 && EEREC_TEMP == EEREC_T)) - { - t2regbool = 0; - t2reg = _vuGetTempXMMreg(info); - if (t2reg < 0) - { - t2regbool = 1; - for (t2reg = 0; ( (t2reg == EEREC_D) || (t2reg == EEREC_S) || (mode != 1 && t2reg == EEREC_T) || - (t2reg == t1reg) || (t2reg == EEREC_TEMP) ); t2reg++); // Find unused reg (For second temp reg) - xMOVAPS(ptr[temp_loc2], xRegisterSSE(t2reg)); // Backup t2reg XMM reg - } - } - - if (_X || _Y) - { - xPSHUF.D(xRegisterSSE(t1reg), xRegisterSSE(EEREC_S), 0x50); - xPAND(xRegisterSSE(t1reg), ptr[special_mask]); - xPOR(xRegisterSSE(t1reg), ptr[special_mask2]); - if (mode == 0) - xPSHUF.D(xRegisterSSE(t2reg), xRegisterSSE(EEREC_T), 0x50); - else if (mode == 1) - { - xMOVDZX(xRegisterSSE(t2reg), ptr[(void*)(addr)]); - xPSHUF.D(xRegisterSSE(t2reg), xRegisterSSE(t2reg), 0x00); - } - else if (mode == 2) - _unpackVF_xyzw(t2reg, EEREC_T, xyzw); - xPAND(xRegisterSSE(t2reg), ptr[special_mask]); - xPOR(xRegisterSSE(t2reg), ptr[special_mask2]); - if (min) - xMIN.PD(xRegisterSSE(t1reg), xRegisterSSE(t2reg)); - else - xMAX.PD(xRegisterSSE(t1reg), xRegisterSSE(t2reg)); - xPSHUF.D(xRegisterSSE(t1reg), xRegisterSSE(t1reg), 0x88); - VU_MERGE_REGS_CUSTOM(EEREC_D, t1reg, 0xc & _X_Y_Z_W); - } - - if (_Z || _W) - { - xPSHUF.D(xRegisterSSE(t1reg), xRegisterSSE(EEREC_S), 0xfa); - xPAND(xRegisterSSE(t1reg), ptr[special_mask]); - xPOR(xRegisterSSE(t1reg), ptr[special_mask2]); - if (mode == 0) - xPSHUF.D(xRegisterSSE(t2reg), xRegisterSSE(EEREC_T), 0xfa); - else if (mode == 1) - { - xMOVDZX(xRegisterSSE(t2reg), ptr[(void*)(addr)]); - xPSHUF.D(xRegisterSSE(t2reg), xRegisterSSE(t2reg), 0x00); - } - else if (mode == 2) - _unpackVF_xyzw(t2reg, EEREC_T, xyzw); - xPAND(xRegisterSSE(t2reg), ptr[special_mask]); - xPOR(xRegisterSSE(t2reg), ptr[special_mask2]); - if (min) - xMIN.PD(xRegisterSSE(t1reg), xRegisterSSE(t2reg)); - else - xMAX.PD(xRegisterSSE(t1reg), xRegisterSSE(t2reg)); - xPSHUF.D(xRegisterSSE(t1reg), xRegisterSSE(t1reg), 0x88); - VU_MERGE_REGS_CUSTOM(EEREC_D, t1reg, 0x3 & _X_Y_Z_W); - } - - if (t1regbool == 0) - _freeXMMreg(t1reg); - else if (t1regbool == 1) - xMOVAPS(xRegisterSSE(t1reg), ptr[temp_loc]); // Restore t1reg XMM reg - if (t2regbool == 0) - _freeXMMreg(t2reg); - else if (t2regbool == 1) - xMOVAPS(xRegisterSSE(t2reg), ptr[temp_loc2]); // Restore t2reg XMM reg -} - -//------------------------------------------------------------------ -// MAX -//------------------------------------------------------------------ - -void recVUMI_MAX(VURegs *VU, int info) -{ - if ( _Fd_ == 0 ) return; - //Console.WriteLn ("recVUMI_MAX"); - - if (MINMAXFIX) - MINMAXlogical(VU, info, 0, 0); - else - { - - if (_Fs_) vuFloat4_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); // Always do Preserved Sign Clamping - if (_Ft_) vuFloat4_useEAX( EEREC_T, EEREC_TEMP, _X_Y_Z_W ); - - if( _X_Y_Z_W == 8 ) { - if (EEREC_D == EEREC_S) xMAX.SS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - else if (EEREC_D == EEREC_T) xMAX.SS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - else { - xMOVSS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - xMAX.SS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - } - } - else if (_X_Y_Z_W != 0xf) { - xMOVAPS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - xMAX.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_T)); - - VU_MERGE_REGS(EEREC_D, EEREC_TEMP); - } - else { - if( EEREC_D == EEREC_S ) xMAX.PS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - else if( EEREC_D == EEREC_T ) xMAX.PS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - else { - xMOVAPS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - xMAX.PS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - } - } - } -} - -void recVUMI_MAX_iq(VURegs *VU, uptr addr, int info) -{ - if ( _Fd_ == 0 ) return; - //Console.WriteLn ("recVUMI_MAX_iq"); - - if (MINMAXFIX) - MINMAXlogical(VU, info, 0, 1, addr); - else - { - if (_Fs_) vuFloat4_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); // Always do Preserved Sign Clamping - vuFloat3(addr); - - if( _XYZW_SS ) { - if( EEREC_D == EEREC_TEMP ) { - _vuFlipRegSS(VU, EEREC_S); - xMOVSS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - xMAX.SS(xRegisterSSE(EEREC_D), ptr[(void*)(addr)]); - _vuFlipRegSS(VU, EEREC_S); - - // have to flip over EEREC_D if computing flags! - //if( (info & PROCESS_VU_UPDATEFLAGS) ) - _vuFlipRegSS(VU, EEREC_D); - } - else if( EEREC_D == EEREC_S ) { - _vuFlipRegSS(VU, EEREC_D); - xMAX.SS(xRegisterSSE(EEREC_D), ptr[(void*)(addr)]); - _vuFlipRegSS(VU, EEREC_D); - } - else { - if( _X ) { - xMOVSS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - xMAX.SS(xRegisterSSE(EEREC_D), ptr[(void*)(addr)]); - } - else { - _vuMoveSS(VU, EEREC_TEMP, EEREC_S); - _vuFlipRegSS(VU, EEREC_D); - xMAX.SS(xRegisterSSE(EEREC_TEMP), ptr[(void*)(addr)]); - xMOVSS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_TEMP)); - _vuFlipRegSS(VU, EEREC_D); - } - } - } - else if (_X_Y_Z_W != 0xf) { - xMOVSSZX(xRegisterSSE(EEREC_TEMP), ptr[(void*)(addr)]); - xSHUF.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_TEMP), 0x00); - xMAX.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - VU_MERGE_REGS(EEREC_D, EEREC_TEMP); - } - else { - if(EEREC_D == EEREC_S) { - xMOVSSZX(xRegisterSSE(EEREC_TEMP), ptr[(void*)(addr)]); - xSHUF.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_TEMP), 0x00); - xMAX.PS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_TEMP)); - } - else { - xMOVSSZX(xRegisterSSE(EEREC_D), ptr[(void*)(addr)]); - xSHUF.PS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D), 0x00); - xMAX.PS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - } - } - } -} - -void recVUMI_MAX_xyzw(VURegs *VU, int xyzw, int info) -{ - if ( _Fd_ == 0 ) return; - //Console.WriteLn ("recVUMI_MAX_xyzw"); - - if (_Fs_ == 0 && _Ft_ == 0) - { - if( _X_Y_Z_W == 8 && (EEREC_D != EEREC_TEMP)) { - if( xyzw < 3 ) { - xXOR.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_TEMP)); - xMOVSS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_TEMP)); - } - else { - xMOVSSZX(xRegisterSSE(EEREC_TEMP), ptr[s_fones]); - xMOVSS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_TEMP)); - } - } - else if (_X_Y_Z_W != 0xf) { - if( xyzw < 3 ) { - if( _X_Y_Z_W & 1 ) xMOVAPS(xRegisterSSE(EEREC_TEMP), ptr[(&VU->VF[0].UL[0])]); // w included, so insert the whole reg - else xXOR.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_TEMP)); // w not included, can zero out - } - else xMOVAPS(xRegisterSSE(EEREC_TEMP), ptr[s_fones]); - VU_MERGE_REGS(EEREC_D, EEREC_TEMP); - } - else { - //If VF0.w isnt chosen as the constant, then its going to be MAX( 0, VF0 ), so the result is VF0 - if( xyzw < 3 ) { xMOVAPS(xRegisterSSE(EEREC_D), ptr[(&VU->VF[0].UL[0])]); } - else xMOVAPS(xRegisterSSE(EEREC_D), ptr[s_fones]); - } - return; - } - - if (MINMAXFIX) - MINMAXlogical(VU, info, 0, 2, 0, xyzw); - else - { - if (_Fs_) vuFloat4_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); // Always do Preserved Sign Clamping - if (_Ft_) vuFloat4_useEAX( EEREC_T, EEREC_TEMP, ( 1 << (3 - xyzw) ) ); - - if( _X_Y_Z_W == 8 && (EEREC_D != EEREC_TEMP)) { - if( xyzw == 0 ) { - if( EEREC_D == EEREC_S ) xMAX.SS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - else if( EEREC_D == EEREC_T ) xMAX.SS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - else { - xMOVSS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - xMAX.SS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - } - } - else { - _unpackVFSS_xyzw(EEREC_TEMP, EEREC_T, xyzw); - xMOVSS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - xMAX.SS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_TEMP)); - } - } - else if (_X_Y_Z_W != 0xf) { - _unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw); - xMAX.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - VU_MERGE_REGS(EEREC_D, EEREC_TEMP); - } - else { - if (EEREC_D == EEREC_S) { - _unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw); - xMAX.PS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_TEMP)); - } - else { - _unpackVF_xyzw(EEREC_D, EEREC_T, xyzw); - xMAX.PS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - } - } - } -} - -void recVUMI_MAXi(VURegs *VU, int info) { recVUMI_MAX_iq(VU, VU_VI_ADDR(REG_I, 1), info); } -void recVUMI_MAXx(VURegs *VU, int info) { recVUMI_MAX_xyzw(VU, 0, info); } -void recVUMI_MAXy(VURegs *VU, int info) { recVUMI_MAX_xyzw(VU, 1, info); } -void recVUMI_MAXz(VURegs *VU, int info) { recVUMI_MAX_xyzw(VU, 2, info); } -void recVUMI_MAXw(VURegs *VU, int info) { recVUMI_MAX_xyzw(VU, 3, info); } -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// MINI -//------------------------------------------------------------------ -void recVUMI_MINI(VURegs *VU, int info) -{ - if ( _Fd_ == 0 ) return; - //Console.WriteLn ("recVUMI_MINI"); - - if (MINMAXFIX) - MINMAXlogical(VU, info, 1, 0); - else - { - - if (_Fs_) vuFloat4_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); // Always do Preserved Sign Clamping - if (_Ft_) vuFloat4_useEAX( EEREC_T, EEREC_TEMP, _X_Y_Z_W ); - - if( _X_Y_Z_W == 8 ) { - if (EEREC_D == EEREC_S) xMIN.SS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - else if (EEREC_D == EEREC_T) xMIN.SS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - else { - xMOVSS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - xMIN.SS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - } - } - else if (_X_Y_Z_W != 0xf) { - xMOVAPS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - xMIN.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_T)); - - VU_MERGE_REGS(EEREC_D, EEREC_TEMP); - } - else { - if( EEREC_D == EEREC_S ) { - //ClampUnordered(EEREC_T, EEREC_TEMP, 0); // need for GT4 vu0rec - xMIN.PS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - } - else if( EEREC_D == EEREC_T ) { - //ClampUnordered(EEREC_S, EEREC_TEMP, 0); // need for GT4 vu0rec - xMIN.PS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - } - else { - xMOVAPS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - xMIN.PS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - } - } - } -} - -void recVUMI_MINI_iq(VURegs *VU, uptr addr, int info) -{ - if ( _Fd_ == 0 ) return; - //Console.WriteLn ("recVUMI_MINI_iq"); - - if (MINMAXFIX) - MINMAXlogical(VU, info, 1, 1, addr); - else - { - - if (_Fs_) vuFloat4_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); // Always do Preserved Sign Clamping - vuFloat3(addr); - - if( _XYZW_SS ) { - if( EEREC_D == EEREC_TEMP ) { - _vuFlipRegSS(VU, EEREC_S); - xMOVSS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - xMIN.SS(xRegisterSSE(EEREC_D), ptr[(void*)(addr)]); - _vuFlipRegSS(VU, EEREC_S); - - // have to flip over EEREC_D if computing flags! - //if( (info & PROCESS_VU_UPDATEFLAGS) ) - _vuFlipRegSS(VU, EEREC_D); - } - else if( EEREC_D == EEREC_S ) { - _vuFlipRegSS(VU, EEREC_D); - xMIN.SS(xRegisterSSE(EEREC_D), ptr[(void*)(addr)]); - _vuFlipRegSS(VU, EEREC_D); - } - else { - if( _X ) { - xMOVSS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - xMIN.SS(xRegisterSSE(EEREC_D), ptr[(void*)(addr)]); - } - else { - _vuMoveSS(VU, EEREC_TEMP, EEREC_S); - _vuFlipRegSS(VU, EEREC_D); - xMIN.SS(xRegisterSSE(EEREC_TEMP), ptr[(void*)(addr)]); - xMOVSS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_TEMP)); - _vuFlipRegSS(VU, EEREC_D); - } - } - } - else if (_X_Y_Z_W != 0xf) { - xMOVSSZX(xRegisterSSE(EEREC_TEMP), ptr[(void*)(addr)]); - xSHUF.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_TEMP), 0x00); - xMIN.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - VU_MERGE_REGS(EEREC_D, EEREC_TEMP); - } - else { - if(EEREC_D == EEREC_S) { - xMOVSSZX(xRegisterSSE(EEREC_TEMP), ptr[(void*)(addr)]); - xSHUF.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_TEMP), 0x00); - xMIN.PS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_TEMP)); - } - else { - xMOVSSZX(xRegisterSSE(EEREC_D), ptr[(void*)(addr)]); - xSHUF.PS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D), 0x00); - xMIN.PS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - } - } - } -} - -void recVUMI_MINI_xyzw(VURegs *VU, int xyzw, int info) -{ - if ( _Fd_ == 0 ) return; - //Console.WriteLn ("recVUMI_MINI_xyzw"); - - if (_Fs_ == 0 && _Ft_ == 0) - { - if( _X_Y_Z_W == 0xf ) - { - //If VF0.w is the constant, the result will match VF0, else its all 0's - if(xyzw == 3) xMOVAPS(xRegisterSSE(EEREC_D), ptr[(&VU->VF[0].UL[0])]); - else xXOR.PS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); - } - else - { - //If VF0.w is the constant, the result will match VF0, else its all 0's - if(xyzw == 3) xMOVAPS(xRegisterSSE(EEREC_TEMP), ptr[(&VU->VF[0].UL[0])]); - else xXOR.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_TEMP)); - VU_MERGE_REGS(EEREC_D, EEREC_TEMP); - } - return; - } - if (MINMAXFIX) - MINMAXlogical(VU, info, 1, 2, 0, xyzw); - else - { - if (_Fs_) vuFloat4_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); // Always do Preserved Sign Clamping - if (_Ft_) vuFloat4_useEAX( EEREC_T, EEREC_TEMP, ( 1 << (3 - xyzw) ) ); - - if( _X_Y_Z_W == 8 && (EEREC_D != EEREC_TEMP)) { - if( xyzw == 0 ) { - if( EEREC_D == EEREC_S ) xMIN.SS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - else if( EEREC_D == EEREC_T ) xMIN.SS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - else { - xMOVSS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - xMIN.SS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); - } - } - else { - _unpackVFSS_xyzw(EEREC_TEMP, EEREC_T, xyzw); - xMOVSS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - xMIN.SS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_TEMP)); - } - } - else if (_X_Y_Z_W != 0xf) { - _unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw); - xMIN.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - VU_MERGE_REGS(EEREC_D, EEREC_TEMP); - } - else { - if (EEREC_D == EEREC_S) { - _unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw); - xMIN.PS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_TEMP)); - } - else { - _unpackVF_xyzw(EEREC_D, EEREC_T, xyzw); - xMIN.PS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); - } - } - } -} - -void recVUMI_MINIi(VURegs *VU, int info) { recVUMI_MINI_iq(VU, VU_VI_ADDR(REG_I, 1), info); } -void recVUMI_MINIx(VURegs *VU, int info) { recVUMI_MINI_xyzw(VU, 0, info); } -void recVUMI_MINIy(VURegs *VU, int info) { recVUMI_MINI_xyzw(VU, 1, info); } -void recVUMI_MINIz(VURegs *VU, int info) { recVUMI_MINI_xyzw(VU, 2, info); } -void recVUMI_MINIw(VURegs *VU, int info) { recVUMI_MINI_xyzw(VU, 3, info); } -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// OPMULA -//------------------------------------------------------------------ -void recVUMI_OPMULA( VURegs *VU, int info ) -{ - //Console.WriteLn ("recVUMI_OPMULA"); - if (CHECK_VU_EXTRA_OVERFLOW) { - if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, 0xE); - if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, 0xE); - } - - xMOVAPS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S )); - xSHUF.PS(xRegisterSSE(EEREC_T), xRegisterSSE(EEREC_T), 0xD2 ); // EEREC_T = WYXZ - xSHUF.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_TEMP), 0xC9 ); // EEREC_TEMP = WXZY - xMUL.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_T )); - - VU_MERGE_REGS_CUSTOM(EEREC_ACC, EEREC_TEMP, 14); - - // revert EEREC_T - if( EEREC_T != EEREC_ACC ) - xSHUF.PS(xRegisterSSE(EEREC_T), xRegisterSSE(EEREC_T), 0xC9); - - recUpdateFlags(VU, EEREC_ACC, info); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// OPMSUB -//------------------------------------------------------------------ -void recVUMI_OPMSUB( VURegs *VU, int info ) -{ - //Console.WriteLn ("recVUMI_OPMSUB"); - if (CHECK_VU_EXTRA_OVERFLOW) { - if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, 0xE); - if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, 0xE); - } - - if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP); - xMOVAPS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - xSHUF.PS(xRegisterSSE(EEREC_T), xRegisterSSE(EEREC_T), 0xD2); // EEREC_T = WYXZ - xSHUF.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_TEMP), 0xC9); // EEREC_TEMP = WXZY - xMUL.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_T)); - - // negate and add - xXOR.PS(xRegisterSSE(EEREC_TEMP), ptr[&const_clip[4]]); - xADD.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_ACC)); - VU_MERGE_REGS_CUSTOM(EEREC_D, EEREC_TEMP, 14); - - // revert EEREC_T - if( EEREC_T != EEREC_D ) xSHUF.PS(xRegisterSSE(EEREC_T), xRegisterSSE(EEREC_T), 0xC9); - - recUpdateFlags(VU, EEREC_D, info); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// NOP -//------------------------------------------------------------------ -void recVUMI_NOP( VURegs *VU, int info ) -{ - //Console.WriteLn ("recVUMI_NOP"); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// recVUMI_FTOI_Saturate() - Saturates result from FTOI Instructions -//------------------------------------------------------------------ - -// unused, but leaving here for possible reference.. -//static const __aligned16 int rec_const_0x8000000[4] = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 }; - -void recVUMI_FTOI_Saturate(int rec_s, int rec_t, int rec_tmp1, int rec_tmp2) -{ - //Console.WriteLn ("recVUMI_FTOI_Saturate"); - //Duplicate the xor'd sign bit to the whole value - //FFFF FFFF for positive, 0 for negative - xMOVAPS(xRegisterSSE(rec_tmp1), xRegisterSSE(rec_s)); - xPXOR(xRegisterSSE(rec_tmp1), ptr[&const_clip[4]]); - xPSRA.D(xRegisterSSE(rec_tmp1), 31); - - //Create mask: 0 where !=8000 0000 - xMOVAPS(xRegisterSSE(rec_tmp2), xRegisterSSE(rec_t)); - xPCMP.EQD(xRegisterSSE(rec_tmp2), ptr[&const_clip[4]]); - - //AND the mask w/ the edit values - xAND.PS(xRegisterSSE(rec_tmp1), xRegisterSSE(rec_tmp2)); - - //if v==8000 0000 && positive -> 8000 0000 + FFFF FFFF -> 7FFF FFFF - //if v==8000 0000 && negative -> 8000 0000 + 0 -> 8000 0000 - //if v!=8000 0000 -> v+0 (masked from the and) - - //Add the values as needed - xPADD.D(xRegisterSSE(rec_t), xRegisterSSE(rec_tmp1)); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// FTOI 0/4/12/15 -//------------------------------------------------------------------ -static __aligned16 float FTIO_Temp1[4] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }; -static __aligned16 float FTIO_Temp2[4] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }; -void recVUMI_FTOI0(VURegs *VU, int info) -{ - int t1reg, t2reg; // Temp XMM regs - - if ( _Ft_ == 0 ) return; - - //Console.WriteLn ("recVUMI_FTOI0"); - - if (_X_Y_Z_W != 0xf) { - xMOVAPS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - vuFloat_useEAX( info, EEREC_TEMP, 0xf ); // Clamp Infs and NaNs to pos/neg fmax (NaNs always to positive fmax) - xCVTTPS2DQ(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_TEMP)); - - t1reg = _vuGetTempXMMreg(info); - - if( t1reg >= 0 ) { // If theres a temp XMM reg available - for (t2reg = 0; ( (t2reg == EEREC_S) || (t2reg == EEREC_T) || (t2reg == EEREC_TEMP) || (t2reg == t1reg) ); t2reg++) - ; // Find unused reg (For second temp reg) - xMOVAPS(ptr[FTIO_Temp1], xRegisterSSE(t2reg)); // Backup XMM reg - - recVUMI_FTOI_Saturate(EEREC_S, EEREC_TEMP, t1reg, t2reg); // Saturate if Float->Int conversion returned illegal result - - xMOVAPS(xRegisterSSE(t2reg), ptr[FTIO_Temp1]); // Restore XMM reg - _freeXMMreg(t1reg); // Free temp reg - } - else { // No temp reg available - for (t1reg = 0; ( (t1reg == EEREC_S) || (t1reg == EEREC_T) || (t1reg == EEREC_TEMP) ); t1reg++) - ; // Find unused reg (For first temp reg) - xMOVAPS(ptr[FTIO_Temp1], xRegisterSSE(t1reg)); // Backup t1reg XMM reg - - for (t2reg = 0; ( (t2reg == EEREC_S) || (t2reg == EEREC_T) || (t2reg == EEREC_TEMP) || (t2reg == t1reg) ); t2reg++) - ; // Find unused reg (For second temp reg) - xMOVAPS(ptr[FTIO_Temp2], xRegisterSSE(t2reg)); // Backup t2reg XMM reg - - recVUMI_FTOI_Saturate(EEREC_S, EEREC_TEMP, t1reg, t2reg); // Saturate if Float->Int conversion returned illegal result - - xMOVAPS(xRegisterSSE(t1reg), ptr[FTIO_Temp1]); // Restore t1reg XMM reg - xMOVAPS(xRegisterSSE(t2reg), ptr[FTIO_Temp2]); // Restore t2reg XMM reg - } - - VU_MERGE_REGS(EEREC_T, EEREC_TEMP); - } - else { - if (EEREC_T != EEREC_S) { - xMOVAPS(xRegisterSSE(EEREC_T), xRegisterSSE(EEREC_S)); - vuFloat_useEAX( info, EEREC_T, 0xf ); // Clamp Infs and NaNs to pos/neg fmax (NaNs always to positive fmax) - xCVTTPS2DQ(xRegisterSSE(EEREC_T), xRegisterSSE(EEREC_T)); - - t1reg = _vuGetTempXMMreg(info); - - if( t1reg >= 0 ) { // If theres a temp XMM reg available - recVUMI_FTOI_Saturate(EEREC_S, EEREC_T, EEREC_TEMP, t1reg); // Saturate if Float->Int conversion returned illegal result - _freeXMMreg(t1reg); // Free temp reg - } - else { // No temp reg available - for (t1reg = 0; ( (t1reg == EEREC_S) || (t1reg == EEREC_T) || (t1reg == EEREC_TEMP) ); t1reg++) - ; // Find unused reg - xMOVAPS(ptr[FTIO_Temp1], xRegisterSSE(t1reg)); // Backup t1reg XMM reg - - recVUMI_FTOI_Saturate(EEREC_S, EEREC_T, EEREC_TEMP, t1reg); // Saturate if Float->Int conversion returned illegal result - - xMOVAPS(xRegisterSSE(t1reg), ptr[FTIO_Temp1]); // Restore t1reg XMM reg - } - } - else { - xMOVAPS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - vuFloat_useEAX( info, EEREC_TEMP, 0xf ); // Clamp Infs and NaNs to pos/neg fmax (NaNs always to positive fmax) - xCVTTPS2DQ(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_TEMP)); - - t1reg = _vuGetTempXMMreg(info); - - if( t1reg >= 0 ) { // If theres a temp XMM reg available - for (t2reg = 0; ( (t2reg == EEREC_S) || (t2reg == EEREC_T) || (t2reg == EEREC_TEMP) || (t2reg == t1reg)); t2reg++) - ; // Find unused reg (For second temp reg) - xMOVAPS(ptr[FTIO_Temp1], xRegisterSSE(t2reg)); // Backup XMM reg - - recVUMI_FTOI_Saturate(EEREC_S, EEREC_TEMP, t1reg, t2reg); // Saturate if Float->Int conversion returned illegal result - - xMOVAPS(xRegisterSSE(t2reg), ptr[FTIO_Temp1]); // Restore XMM reg - _freeXMMreg(t1reg); // Free temp reg - } - else { // No temp reg available - for (t1reg = 0; ( (t1reg == EEREC_S) || (t1reg == EEREC_T) || (t1reg == EEREC_TEMP) ); t1reg++) - ; // Find unused reg (For first temp reg) - xMOVAPS(ptr[FTIO_Temp1], xRegisterSSE(t1reg)); // Backup t1reg XMM reg - - for (t2reg = 0; ( (t2reg == EEREC_S) || (t2reg == EEREC_T) || (t2reg == EEREC_TEMP) || (t2reg == t1reg) ); t2reg++) - ; // Find unused reg (For second temp reg) - xMOVAPS(ptr[FTIO_Temp2], xRegisterSSE(t2reg)); // Backup t2reg XMM reg - - recVUMI_FTOI_Saturate(EEREC_S, EEREC_TEMP, t1reg, t2reg); // Saturate if Float->Int conversion returned illegal result - - xMOVAPS(xRegisterSSE(t1reg), ptr[FTIO_Temp1]); // Restore t1reg XMM reg - xMOVAPS(xRegisterSSE(t2reg), ptr[FTIO_Temp2]); // Restore t2reg XMM reg - } - - xMOVAPS(xRegisterSSE(EEREC_T), xRegisterSSE(EEREC_TEMP)); - } - } -} - -void recVUMI_FTOIX(VURegs *VU, int addr, int info) -{ - int t1reg, t2reg; // Temp XMM regs - - if ( _Ft_ == 0 ) return; - - //Console.WriteLn ("recVUMI_FTOIX"); - if (_X_Y_Z_W != 0xf) { - xMOVAPS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - xMUL.PS(xRegisterSSE(EEREC_TEMP), ptr[(void*)(addr)]); - vuFloat_useEAX( info, EEREC_TEMP, 0xf ); // Clamp Infs and NaNs to pos/neg fmax (NaNs always to positive fmax) - xCVTTPS2DQ(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_TEMP)); - - t1reg = _vuGetTempXMMreg(info); - - if( t1reg >= 0 ) { // If theres a temp XMM reg available - for (t2reg = 0; ( (t2reg == EEREC_S) || (t2reg == EEREC_T) || (t2reg == EEREC_TEMP) || (t2reg == t1reg)); t2reg++) - ; // Find unused reg (For second temp reg) - xMOVAPS(ptr[FTIO_Temp1], xRegisterSSE(t2reg)); // Backup XMM reg - - recVUMI_FTOI_Saturate(EEREC_S, EEREC_TEMP, t1reg, t2reg); // Saturate if Float->Int conversion returned illegal result - - xMOVAPS(xRegisterSSE(t2reg), ptr[FTIO_Temp1]); // Restore XMM reg - _freeXMMreg(t1reg); // Free temp reg - } - else { // No temp reg available - for (t1reg = 0; ( (t1reg == EEREC_S) || (t1reg == EEREC_T) || (t1reg == EEREC_TEMP) ); t1reg++) - ; // Find unused reg (For first temp reg) - xMOVAPS(ptr[FTIO_Temp1], xRegisterSSE(t1reg)); // Backup t1reg XMM reg - - for (t2reg = 0; ( (t2reg == EEREC_S) || (t2reg == EEREC_T) || (t2reg == EEREC_TEMP) || (t2reg == t1reg) ); t2reg++) - ; // Find unused reg (For second temp reg) - xMOVAPS(ptr[FTIO_Temp2], xRegisterSSE(t2reg)); // Backup t2reg XMM reg - - recVUMI_FTOI_Saturate(EEREC_S, EEREC_TEMP, t1reg, t2reg); // Saturate if Float->Int conversion returned illegal result - - xMOVAPS(xRegisterSSE(t1reg), ptr[FTIO_Temp1]); // Restore t1reg XMM reg - xMOVAPS(xRegisterSSE(t2reg), ptr[FTIO_Temp2]); // Restore t2reg XMM reg - } - - VU_MERGE_REGS(EEREC_T, EEREC_TEMP); - } - else { - if (EEREC_T != EEREC_S) { - xMOVAPS(xRegisterSSE(EEREC_T), xRegisterSSE(EEREC_S)); - xMUL.PS(xRegisterSSE(EEREC_T), ptr[(void*)(addr)]); - vuFloat_useEAX( info, EEREC_T, 0xf ); // Clamp Infs and NaNs to pos/neg fmax (NaNs always to positive fmax) - xCVTTPS2DQ(xRegisterSSE(EEREC_T), xRegisterSSE(EEREC_T)); - - t1reg = _vuGetTempXMMreg(info); - - if( t1reg >= 0 ) { // If theres a temp XMM reg available - recVUMI_FTOI_Saturate(EEREC_S, EEREC_T, EEREC_TEMP, t1reg); // Saturate if Float->Int conversion returned illegal result - _freeXMMreg(t1reg); // Free temp reg - } - else { // No temp reg available - for (t1reg = 0; ( (t1reg == EEREC_S) || (t1reg == EEREC_T) || (t1reg == EEREC_TEMP) ); t1reg++) - ; // Find unused reg - xMOVAPS(ptr[FTIO_Temp1], xRegisterSSE(t1reg)); // Backup t1reg XMM reg - - recVUMI_FTOI_Saturate(EEREC_S, EEREC_T, EEREC_TEMP, t1reg); // Saturate if Float->Int conversion returned illegal result - - xMOVAPS(xRegisterSSE(t1reg), ptr[FTIO_Temp1]); // Restore t1reg XMM reg - } - } - else { - xMOVAPS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - xMUL.PS(xRegisterSSE(EEREC_TEMP), ptr[(void*)(addr)]); - vuFloat_useEAX( info, EEREC_TEMP, 0xf ); // Clamp Infs and NaNs to pos/neg fmax (NaNs always to positive fmax) - xCVTTPS2DQ(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_TEMP)); - - t1reg = _vuGetTempXMMreg(info); - - if( t1reg >= 0 ) { // If theres a temp XMM reg available - for (t2reg = 0; ( (t2reg == EEREC_S) || (t2reg == EEREC_T) || (t2reg == EEREC_TEMP) || (t2reg == t1reg)); t2reg++) - ; // Find unused reg (For second temp reg) - xMOVAPS(ptr[FTIO_Temp1], xRegisterSSE(t2reg)); // Backup XMM reg - - recVUMI_FTOI_Saturate(EEREC_S, EEREC_TEMP, t1reg, t2reg); // Saturate if Float->Int conversion returned illegal result - - xMOVAPS(xRegisterSSE(t2reg), ptr[FTIO_Temp1]); // Restore XMM reg - _freeXMMreg(t1reg); // Free temp reg - } - else { // No temp reg available - for (t1reg = 0; ( (t1reg == EEREC_S) || (t1reg == EEREC_T) || (t1reg == EEREC_TEMP) ); t1reg++) - ; // Find unused reg (For first temp reg) - xMOVAPS(ptr[FTIO_Temp1], xRegisterSSE(t1reg)); // Backup t1reg XMM reg - - for (t2reg = 0; ( (t2reg == EEREC_S) || (t2reg == EEREC_T) || (t2reg == EEREC_TEMP) || (t2reg == t1reg) ); t2reg++) - ; // Find unused reg (For second temp reg) - xMOVAPS(ptr[FTIO_Temp2], xRegisterSSE(t2reg)); // Backup t2reg XMM reg - - recVUMI_FTOI_Saturate(EEREC_S, EEREC_TEMP, t1reg, t2reg); // Saturate if Float->Int conversion returned illegal result - - xMOVAPS(xRegisterSSE(t1reg), ptr[FTIO_Temp1]); // Restore t1reg XMM reg - xMOVAPS(xRegisterSSE(t2reg), ptr[FTIO_Temp2]); // Restore t2reg XMM reg - } - - xMOVAPS(xRegisterSSE(EEREC_T), xRegisterSSE(EEREC_TEMP)); - } - } -} - -void recVUMI_FTOI4( VURegs *VU, int info ) { recVUMI_FTOIX(VU, (uptr)&recMult_float_to_int4[0], info); } -void recVUMI_FTOI12( VURegs *VU, int info ) { recVUMI_FTOIX(VU, (uptr)&recMult_float_to_int12[0], info); } -void recVUMI_FTOI15( VURegs *VU, int info ) { recVUMI_FTOIX(VU, (uptr)&recMult_float_to_int15[0], info); } -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// ITOF 0/4/12/15 -//------------------------------------------------------------------ -void recVUMI_ITOF0( VURegs *VU, int info ) -{ - if ( _Ft_ == 0 ) return; - - //Console.WriteLn ("recVUMI_ITOF0"); - if (_X_Y_Z_W != 0xf) { - xCVTDQ2PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - vuFloat_useEAX( info, EEREC_TEMP, 15); // Clamp infinities - VU_MERGE_REGS(EEREC_T, EEREC_TEMP); - xmmregs[EEREC_T].mode |= MODE_WRITE; - } - else { - xCVTDQ2PS(xRegisterSSE(EEREC_T), xRegisterSSE(EEREC_S)); - vuFloat2(EEREC_T, EEREC_TEMP, 15); // Clamp infinities - } -} - -void recVUMI_ITOFX(VURegs *VU, int addr, int info) -{ - if ( _Ft_ == 0 ) return; - - //Console.WriteLn ("recVUMI_ITOFX"); - if (_X_Y_Z_W != 0xf) { - xCVTDQ2PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); - xMUL.PS(xRegisterSSE(EEREC_TEMP), ptr[(void*)(addr)]); - vuFloat_useEAX( info, EEREC_TEMP, 15); // Clamp infinities - VU_MERGE_REGS(EEREC_T, EEREC_TEMP); - xmmregs[EEREC_T].mode |= MODE_WRITE; - } - else { - xCVTDQ2PS(xRegisterSSE(EEREC_T), xRegisterSSE(EEREC_S)); - xMUL.PS(xRegisterSSE(EEREC_T), ptr[(void*)(addr)]); - vuFloat2(EEREC_T, EEREC_TEMP, 15); // Clamp infinities - } -} - -void recVUMI_ITOF4( VURegs *VU, int info ) { recVUMI_ITOFX(VU, (uptr)&recMult_int_to_float4[0], info); } -void recVUMI_ITOF12( VURegs *VU, int info ) { recVUMI_ITOFX(VU, (uptr)&recMult_int_to_float12[0], info); } -void recVUMI_ITOF15( VURegs *VU, int info ) { recVUMI_ITOFX(VU, (uptr)&recMult_int_to_float15[0], info); } -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// CLIP -//------------------------------------------------------------------ -void recVUMI_CLIP(VURegs *VU, int info) -{ - int t1reg = EEREC_D; - int t2reg = EEREC_ACC; - int x86temp1, x86temp2; - - u32 clipaddr = VU_VI_ADDR(REG_CLIP_FLAG, 0); - u32 prevclipaddr = VU_VI_ADDR(REG_CLIP_FLAG, 2); - - if( clipaddr == 0 ) { // battle star has a clip right before fcset - Console.WriteLn("skipping vu clip"); - return; - } - - //Flush the clip flag before processing, incase of double clip commands (GoW) - - if( prevclipaddr != (uptr)&VU->VI[REG_CLIP_FLAG] ) { - xMOV(eax, ptr[(void*)(prevclipaddr)]); - xMOV(ptr[(&VU->VI[REG_CLIP_FLAG])], eax); - } - - assert( clipaddr != 0 ); - assert( t1reg != t2reg && t1reg != EEREC_TEMP && t2reg != EEREC_TEMP ); - - x86temp1 = ALLOCTEMPX86(MODE_8BITREG); - x86temp2 = ALLOCTEMPX86(MODE_8BITREG); - - //if ( (x86temp1 == 0) || (x86temp2 == 0) ) Console.Error("VU CLIP Allocation Error: EAX being allocated!"); - - _freeXMMreg(t1reg); // These should have been freed at allocation in eeVURecompileCode() - _freeXMMreg(t2reg); // but if they've been used since then, then free them. (just doing this incase :p (cottonvibes)) - - if( _Ft_ == 0 ) { - xMOVAPS(xRegisterSSE(EEREC_TEMP), ptr[&s_fones[0]]); // all 1s - xMOVAPS(xRegisterSSE(t1reg), ptr[&s_fones[4]]); - } - else { - _unpackVF_xyzw(EEREC_TEMP, EEREC_T, 3); - xAND.PS(xRegisterSSE(EEREC_TEMP), ptr[&const_clip[0]]); - xMOVAPS(xRegisterSSE(t1reg), xRegisterSSE(EEREC_TEMP)); - xOR.PS(xRegisterSSE(t1reg), ptr[&const_clip[4]]); - } - - xMOV(eax, ptr[(void*)(prevclipaddr)]); - - xCMPNLE.PS(xRegisterSSE(t1reg), xRegisterSSE(EEREC_S)); //-w, -z, -y, -x - xCMPLT.PS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(EEREC_S)); //+w, +z, +y, +x - - xSHL(eax, 6); - - xMOVAPS(xRegisterSSE(t2reg), xRegisterSSE(EEREC_TEMP)); //t2 = +w, +z, +y, +x - xUNPCK.LPS(xRegisterSSE(EEREC_TEMP), xRegisterSSE(t1reg)); //EEREC_TEMP = -y,+y,-x,+x - xUNPCK.HPS(xRegisterSSE(t2reg), xRegisterSSE(t1reg)); //t2reg = -w,+w,-z,+z - xMOVMSKPS(xRegister32(x86temp2), xRegisterSSE(EEREC_TEMP)); // -y,+y,-x,+x - xMOVMSKPS(xRegister32(x86temp1), xRegisterSSE(t2reg)); // -w,+w,-z,+z - - xAND(xRegister8(x86temp1), 0x3); - xSHL(xRegister8(x86temp1), 4); - xOR(al, xRegister8(x86temp1)); - xAND(xRegister8(x86temp2), 0xf); - xOR(al, xRegister8(x86temp2)); - xAND(eax, 0xffffff); - - xMOV(ptr[(void*)(clipaddr)], eax); - - if (( !(info & (PROCESS_VU_SUPER|PROCESS_VU_COP2)) ) ) //Instantly update the flag if its called from elsewhere (unlikely, but ok) - xMOV(ptr[(&VU->VI[REG_CLIP_FLAG])], eax); - - _freeX86reg(x86temp1); - _freeX86reg(x86temp2); -} diff --git a/pcsx2/x86/sVU_zerorec.cpp b/pcsx2/x86/sVU_zerorec.cpp deleted file mode 100644 index 3ca7289529..0000000000 --- a/pcsx2/x86/sVU_zerorec.cpp +++ /dev/null @@ -1,4681 +0,0 @@ -/* PCSX2 - PS2 Emulator for PCs - * Copyright (C) 2002-2010 PCSX2 Dev Team - * - * PCSX2 is free software: you can redistribute it and/or modify it under the terms - * of the GNU Lesser General Public License as published by the Free Software Found- - * ation, either version 3 of the License, or (at your option) any later version. - * - * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; - * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR - * PURPOSE. See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along with PCSX2. - * If not, see . - */ - -// Super VU recompiler - author: zerofrog(@gmail.com) - -#include "PrecompiledHeader.h" - -#include -#include -#include - -#include "Utilities/AsciiFile.h" - -#ifndef _WIN32 -#include -#endif - -#include "Common.h" - -#include "GS.h" -#include "Gif.h" -#include "VU.h" -#include "MTVU.h" - -#include "R5900.h" -#include "iR5900.h" -#include "System/RecTypes.h" - -#include "sVU_zerorec.h" -#include "NakedAsm.h" -#include "AppConfig.h" - -// Needed in gcc for find. -#include - -using namespace x86Emitter; - -// temporary externs -extern void iDumpVU0Registers(); -extern void iDumpVU1Registers(); - -// SuperVURec optimization options, uncomment only for debugging purposes -#define SUPERVU_CACHING // vu programs are saved and queried via memcompare (should be no reason to disable this) -#define SUPERVU_WRITEBACKS // don't flush the writebacks after every block -#define SUPERVU_VIBRANCHDELAY // when integers are modified right before a branch that uses the integer, - // the old integer value is used in the branch, fixes kh2 - -#define SUPERVU_PROPAGATEFLAGS // the correct behavior of VUs, for some reason superman breaks gfx with it on... - -// use x86reg caching (faster) (not really. rather lots slower :p (rama) ) -// ... and buggy too since we disabled EBP. Causes GoW2 to hang. Let's get rid of it, -// sVU is only here to serve as a regression model for Nan/INF behavior anyway. (--air) -//#define SUPERVU_X86CACHING - - -// registers won't be flushed at block boundaries (faster) (nothing noticeable speed-wise, causes SPS in Ratchet and clank (Nneeve) ) -#ifndef PCSX2_DEBUG -//#define SUPERVU_INTERCACHING -#endif - -#define SUPERVU_CHECKCONDITION 0 // has to be 0!! - -static const uint sVU_EXESIZE = _8mb; - -#define _Imm11_ (s32)( (vucode & 0x400) ? (0xfffffc00 | (vucode & 0x3ff)) : (vucode & 0x3ff) ) -#define _UImm11_ (s32)(vucode & 0x7ff) - -#define _Ft_ ((VU->code >> 16) & 0x1F) // The rt part of the instruction register -#define _Fs_ ((VU->code >> 11) & 0x1F) // The rd part of the instruction register -#define _Fd_ ((VU->code >> 6) & 0x1F) // The sa part of the instruction register -#define _It_ (_Ft_ & 15) -#define _Is_ (_Fs_ & 15) -#define _Id_ (_Fd_ & 15) - -static const u32 QWaitTimes[] = { 6, 12 }; -static const u32 PWaitTimes[] = { 53, 43, 28, 23, 17, 11, 10 }; - -static u32 s_vuInfo; // info passed into rec insts - -static const u32 s_MemSize[2] = {VU0_MEMSIZE, VU1_MEMSIZE}; -//static u8* s_recVUMem = NULL, *s_recVUPtr = NULL; -static RecompiledCodeReserve* s_recVUMem[2] = { NULL, NULL }; -static u8* s_recVUPtr[2] = { NULL, NULL }; - -// tables which are defined at the bottom of this massive file. -extern void (*recVU_UPPER_OPCODE[64])(VURegs* VU, s32 info); -extern void (*recVU_LOWER_OPCODE[128])(VURegs* VU, s32 info); - -#define INST_Q_READ 0x0001 // flush Q -#define INST_P_READ 0x0002 // flush P -#define INST_BRANCH_DELAY 0x0004 -#define INST_CLIP_WRITE 0x0040 // inst writes CLIP in the future -#define INST_STATUS_WRITE 0x0080 -#define INST_MAC_WRITE 0x0100 -#define INST_Q_WRITE 0x0200 -#define INST_CACHE_VI 0x0400 // write old vi value to s_VIBranchDelay - -// Let's tempt fate by defining two different constants with almost identical names -#define INST_DUMMY_ 0x8000 -#define INST_DUMMY 0x83c0 - -#define VFFREE_INVALID0 0x80000000 // (vffree[i]&0xf) is invalid - -//#define FORIT(it, v) for(it = (v).begin(); it != (v).end(); ++(it)) - -#ifdef PCSX2_DEBUG -u32 s_vucount = 0; - -static u32 g_vu1lastrec = 0, skipparent = -1; -static u32 s_svulast = 0, s_vufnheader; -static u32 badaddrs[][2] = {0, 0xffff}; -#endif - -union VURecRegs -{ - struct - { - u16 reg; - u16 type; - }; - u32 id; -}; - -#define SUPERVU_XGKICKDELAY 1 // yes this is needed as default (wipeout) - -class VuBaseBlock; - -struct VuFunctionHeader -{ - struct RANGE - { - RANGE() : start(0), size(0), pmem(NULL) {} - - u16 start, size; - void* pmem; // all the mem - }; - - VuFunctionHeader() : startpc(0xffffffff), pprogfunc(NULL) {} - ~VuFunctionHeader() - { - for (std::vector::iterator it = ranges.begin(); it != ranges.end(); ++it) - { - free(it->pmem); - } - } - - // returns true if the checksum for the current mem is the same as this fn - bool IsSame(void* pmem); - - u32 startpc; - void* pprogfunc; - - std::vector ranges; -}; - -struct VuBlockHeader -{ - VuBaseBlock* pblock; - u32 delay; -}; - -// one vu inst (lower and upper) -class VuInstruction -{ - public: - VuInstruction() - { - memzero(*this); - nParentPc = -1; - vicached = -1; - } - - int nParentPc; // used for syncing with flag writes, -1 for no parent - - _vuopinfo info; - - _VURegsNum regs[2]; // [0] - lower, [1] - upper - u32 livevars[2]; // live variables right before this inst, [0] - inst, [1] - float - u32 addvars[2]; // live variables to add - u32 usedvars[2]; // set if var is used in the future including vars used in this inst - u32 keepvars[2]; - u16 pqcycles; // the number of cycles to stall if function writes to the regs - u16 type; // INST_ - - u32 pClipWrite, pMACWrite, pStatusWrite; // addrs to write the flags - u32 vffree[2]; - s8 vfwrite[2], vfread0[2], vfread1[2], vfacc[2]; - s8 vfflush[2]; // extra flush regs - s8 vicached; // if >= 0, then use the cached integer s_VIBranchDelay - VuInstruction *pPrevInst; - - int SetCachedRegs(int upper, u32 vuxyz); - void Recompile(std::list::iterator& itinst, u32 vuxyz); -}; - -enum BlockType -{ - BLOCKTYPE_EOP = 0x01, // at least one of the children of the block contains eop (or the block itself) - BLOCKTYPE_FUNCTION = 0x02, - BLOCKTYPE_HASEOP = 0x04, // last inst of block is an eop - BLOCKTYPE_MACFLAGS = 0x08, - BLOCKTYPE_ANALYZED = 0x40, - BLOCKTYPE_IGNORE = 0x80, // special for recursive fns - BLOCKTYPE_ANALYZEDPARENT = 0x100 -}; - -// base block used when recompiling -class VuBaseBlock -{ - public: - typedef std::list LISTBLOCKS; - - VuBaseBlock(); - - // returns true if the leads to a EOP (ALL VU blocks must ret true) - void AssignVFRegs(); - void AssignVIRegs(int parent); - - std::list::iterator GetInstIterAtPc(int instpc); - void GetInstsAtPc(int instpc, std::list& listinsts); - - void Recompile(); - - u16 type; // BLOCKTYPE_ - u16 id; - u16 startpc; - u16 endpc; // first inst not in block - void* pcode; // x86 code pointer - void* pendcode; // end of the x86 code pointer - int cycles; - std::list insts; - std::list parents; - LISTBLOCKS blocks; // blocks branches to - u32* pChildJumps[4]; // addrs that need to be filled with the children's start addrs - // if highest bit is set, addr needs to be relational - u32 vuxyz; // corresponding bit is set if reg's xyz channels are used only - u32 vuxy; // corresponding bit is set if reg's xyz channels are used only - - _xmmregs startregs[iREGCNT_XMM], endregs[iREGCNT_XMM]; - int nStartx86, nEndx86; // indices into s_vecRegArray - - int allocX86Regs; - int prevFlagsOutOfBlock; -}; - -struct WRITEBACK -{ - WRITEBACK() : nParentPc(0), cycle(0) //, pStatusWrite(NULL), pMACWrite(NULL) - { - viwrite[0] = viwrite[1] = 0; - viread[0] = viread[1] = 0; - } - - void InitInst(VuInstruction* pinst, int cycle) const - { - u32 write = viwrite[0] | viwrite[1]; - pinst->type = ((write & (1 << REG_CLIP_FLAG)) ? INST_CLIP_WRITE : 0) | - ((write & (1 << REG_MAC_FLAG)) ? INST_MAC_WRITE : 0) | - ((write & (1 << REG_STATUS_FLAG)) ? INST_STATUS_WRITE : 0) | - ((write & (1 << REG_Q)) ? INST_Q_WRITE : 0); - pinst->nParentPc = nParentPc; - pinst->info.cycle = cycle; - for (int i = 0; i < 2; ++i) - { - pinst->regs[i].VIwrite = viwrite[i]; - pinst->regs[i].VIread = viread[i]; - } - } - - static int SortWritebacks(const WRITEBACK& w1, const WRITEBACK& w2) - { - return w1.cycle < w2.cycle; - } - - int nParentPc; - int cycle; - u32 viwrite[2]; - u32 viread[2]; -}; - -struct VUPIPELINES -{ - fmacPipe fmac[8]; - fdivPipe fdiv; - efuPipe efu; - ialuPipe ialu[8]; - std::list< WRITEBACK > listWritebacks; -}; - -VuBaseBlock::VuBaseBlock() -{ - type = 0; - endpc = 0; - cycles = 0; - id = 0; - memzero(pChildJumps); - memzero(startregs); - memzero(endregs); - allocX86Regs = nStartx86 = nEndx86 = -1; - prevFlagsOutOfBlock = 0; - startpc = 0; - vuxy = 0; - vuxyz = 0; - pendcode = nullptr; - pcode = nullptr; -} - -#define SUPERVU_STACKSIZE 0x1000 - -static std::list s_listVUHeaders[2]; -static std::list* s_plistCachedHeaders[2] = {NULL, NULL}; -static VuFunctionHeader** recVUHeaders[2] = { NULL, NULL }; -static VuBlockHeader* recVUBlocks[2] = { NULL, NULL }; -static u8* recVUStack[2] = { NULL, NULL }; -static u8* recVUStackPtr[2] = { NULL, NULL }; - -static std::vector<_x86regs> s_vecRegArray(128); - -static VURegs* VU = NULL; -static std::list s_listBlocks; -static u32 s_vu = 0; -static u32 s_UnconditionalDelay = 0; // 1 if there are two sequential branches and the last is unconditional -static u32 g_nLastBlockExecuted = 0; - -static VuFunctionHeader* SuperVURecompileProgram(u32 startpc, int vuindex); -static VuBaseBlock* SuperVUBuildBlocks(VuBaseBlock* parent, u32 startpc, const VUPIPELINES& pipes); -static void SuperVUInitLiveness(VuBaseBlock* pblock); -static void SuperVULivenessAnalysis(); -static void SuperVUEliminateDeadCode(); -static void SuperVUAssignRegs(); - -//void SuperVUFreeXMMreg(int xmmreg, int xmmtype, int reg); -#define SuperVUFreeXMMreg(...) (void)0 -void SuperVUFreeXMMregs(u32* livevars); - -static u32* SuperVUStaticAlloc(u32 size); -static void SuperVURecompile(); - -// allocate VU resources -static void SuperVUAlloc(int vuindex) -{ - if (s_recVUMem[vuindex]) return; - - wxString mem_name = pxsFmt("SuperVU%u Recompiler Cache", vuindex); - s_recVUMem[vuindex] = new RecompiledCodeReserve( mem_name, 0 ); - s_recVUMem[vuindex]->Reserve( sVU_EXESIZE, vuindex ? HostMemoryMap::sVU1rec : HostMemoryMap::sVU0rec, _256mb ); - s_recVUMem[vuindex]->SetProfilerName(pxsFmt("sVU%urec",vuindex)); - - // upper 4 bits must be zero! - if (!s_recVUMem[vuindex]->IsOk()) - { - safe_delete(s_recVUMem[vuindex]); - throw Exception::VirtualMemoryMapConflict(mem_name) - .SetDiagMsg(pxsFmt( L"SuperVU failed to allocate virtual memory below 256MB." )) - .SetUserMsg(pxE( L"Out of Memory (sorta): The SuperVU recompiler was unable to reserve the specific memory ranges required, and will not be available for use. This is not a critical error, since the sVU rec is obsolete, and you should use microVU instead anyway. :)" - )); - } -} - -void DestroyCachedHeaders(int vuindex, int j) -{ - std::list::iterator it = s_plistCachedHeaders[vuindex][j].begin(); - - while (it != s_plistCachedHeaders[vuindex][j].end()) - { - delete *it; - it++; - } - - s_plistCachedHeaders[vuindex][j].clear(); -} - -void DestroyVUHeaders(int vuindex) -{ - std::list::iterator it = s_listVUHeaders[vuindex].begin(); - - while (it != s_listVUHeaders[vuindex].end()) - { - delete *it; - it++; - } - - s_listVUHeaders[vuindex].clear(); -} - -// destroy VU resources -void SuperVUDestroy(int vuindex) -{ - pxAssertDev(vuindex == 0 || vuindex == 1, "Invalid VU index parameter!"); - - safe_delete_array(recVUHeaders[vuindex]); - safe_delete_array(recVUBlocks[vuindex]); - - if (s_plistCachedHeaders[vuindex] != NULL) - { - for (u32 j = 0; j < s_MemSize[vuindex] / 8; ++j) - { - DestroyCachedHeaders(vuindex, j); - } - safe_delete_array(s_plistCachedHeaders[vuindex]); - } - DestroyVUHeaders(vuindex); - - safe_delete(s_recVUMem[vuindex]); - safe_delete_array(recVUStack[vuindex]); -} - -// reset VU -void SuperVUReset(int vuindex) -{ - pxAssertDev(vuindex == 0 || vuindex == 1, "Invalid VU index parameter!"); - -#ifdef PCSX2_DEBUG - s_vucount = 0; -#endif - - DevCon.WriteLn("SuperVU%d: Resetting function and block lists.", vuindex); - - if (recVUHeaders[vuindex] == NULL) - recVUHeaders[vuindex] = new VuFunctionHeader* [s_MemSize[vuindex] / 8]; - if (recVUBlocks[vuindex] == NULL) - recVUBlocks[vuindex] = new VuBlockHeader[s_MemSize[vuindex] / 8]; - if (s_plistCachedHeaders[vuindex] == NULL) - s_plistCachedHeaders[vuindex] = new std::list[s_MemSize[vuindex] / 8]; - - if (recVUHeaders[vuindex]) memset(recVUHeaders[vuindex], 0, sizeof(VuFunctionHeader*) * (s_MemSize[vuindex] / 8)); - if (recVUBlocks[vuindex]) memset(recVUBlocks[vuindex], 0, sizeof(VuBlockHeader) * (s_MemSize[vuindex] / 8)); - - if (s_plistCachedHeaders[vuindex] != NULL) - { - for (u32 j = 0; j < s_MemSize[vuindex] / 8; ++j) - { - DestroyCachedHeaders(vuindex, j); - } - } - DestroyVUHeaders(vuindex); - - if (!s_recVUMem[vuindex] || !s_recVUMem[vuindex]->IsOk()) return; - - DevCon.WriteLn("SuperVU%u: Resetting recompiler cache.", vuindex); - - if (!recVUStack[vuindex]) recVUStack[vuindex] = new u8[SUPERVU_STACKSIZE * 4]; - memset(recVUStack[vuindex], 0, SUPERVU_STACKSIZE); - - s_recVUMem[vuindex]->Reset(); - s_recVUPtr[vuindex] = *s_recVUMem[vuindex]; -} - -// clear the block and any joining blocks (size given in bytes) -static void __fastcall SuperVUClear(u32 startpc, u32 size, int vuindex) -{ - std::vector::iterator itrange; - std::list::iterator it = s_listVUHeaders[vuindex].begin(); - u32 endpc = startpc + ((size + 7) & ~7); // Ensure size is a multiple of u64 (round up) - while (it != s_listVUHeaders[vuindex].end()) - { - // for every fn, check if it has code in the range - for(itrange = (*it)->ranges.begin(); itrange != (*it)->ranges.end(); itrange++) - { - if (startpc < (u32)itrange->start + itrange->size && itrange->start < endpc) - break; - } - - if (itrange != (*it)->ranges.end()) - { - recVUHeaders[vuindex][(*it)->startpc/8] = NULL; -#ifdef SUPERVU_CACHING - std::list* plist = &s_plistCachedHeaders[vuindex][(*it)->startpc / 8]; - plist->push_back(*it); - if (plist->size() > 30) - { - // list is too big, delete - //Console.Warning("Performance warning: deleting cached VU program!"); - delete plist->front(); - plist->pop_front(); - } - it = s_listVUHeaders[vuindex].erase(it); -#else - delete *it; - it = s_listVUHeaders[vuindex].erase(it); -#endif - } - else ++it; - } -} - -static VuFunctionHeader* s_pFnHeader = NULL; -static VuBaseBlock* s_pCurBlock = NULL; -static VuInstruction* s_pCurInst = NULL; -static u32 s_StatusRead = 0, s_MACRead = 0, s_ClipRead = 0; // read addrs -static u32 s_PrevStatusWrite = 0, s_PrevMACWrite = 0, s_PrevClipWrite = 0, s_PrevIWrite = 0; -static u32 s_WriteToReadQ = 0; - -static u32 s_VIBranchDelay = 0; //Value of register to use in a vi branch delayed situation - - -u32 s_TotalVUCycles; // total cycles since start of program execution - - -u32 SuperVUGetVIAddr(int reg, int read) -{ - pxAssert(s_pCurInst != NULL); - - switch (reg) - { - case REG_STATUS_FLAG: - { - u32 addr = (read == 2) ? s_PrevStatusWrite : (read ? s_StatusRead : s_pCurInst->pStatusWrite); - pxAssert(!read || addr != 0); - return addr; - } - case REG_MAC_FLAG: - { - u32 addr = (read == 2) ? s_PrevMACWrite : (read ? s_MACRead : s_pCurInst->pMACWrite); - return addr; - } - case REG_CLIP_FLAG: - { - u32 addr = (read == 2) ? s_PrevClipWrite : (read ? s_ClipRead : s_pCurInst->pClipWrite); - pxAssert(!read || addr != 0); - return addr; - } - case REG_Q: - return (read || s_WriteToReadQ) ? (uptr)&VU->VI[REG_Q] : (uptr)&VU->q; - case REG_P: - return read ? (uptr)&VU->VI[REG_P] : (uptr)&VU->p; - case REG_I: - return s_PrevIWrite; - } - -#ifdef SUPERVU_VIBRANCHDELAY - if ((read != 0) && (s_pCurInst->regs[0].pipe == VUPIPE_BRANCH) && (s_pCurInst->vicached >= 0) && (s_pCurInst->vicached == reg)) - { - return (uptr)&s_VIBranchDelay; // test for branch delays - } -#endif - - return (uptr)&VU->VI[reg]; -} - -void SuperVUDumpBlock(std::list& blocks, int vuindex) -{ - u32 *mem; - u32 i; - - g_Conf->Folders.Logs.Mkdir(); - AsciiFile eff( - Path::Combine( g_Conf->Folders.Logs, wxsFormat(L"svu%cdump%.4X.txt", s_vu?L'0':L'1', s_pFnHeader->startpc) ), L"w" - ); - - eff.Printf("Format: upper_inst lower_inst\ntype f:vf_live_vars vf_used_vars i:vi_live_vars vi_used_vars inst_cycle pq_inst\n"); - eff.Printf("Type: %.2x - qread, %.2x - pread, %.2x - clip_write, %.2x - status_write\n" - "%.2x - mac_write, %.2x -qflush\n", - INST_Q_READ, INST_P_READ, INST_CLIP_WRITE, INST_STATUS_WRITE, INST_MAC_WRITE, INST_Q_WRITE); - eff.Printf("XMM: Upper: read0 read1 write acc temp; Lower: read0 read1 write acc temp\n\n"); - - std::list::iterator itblock; - std::list::iterator itinst; - VuBaseBlock::LISTBLOCKS::iterator itchild; - - for(itblock = blocks.begin(); itblock != blocks.end(); itblock++) - { - eff.Printf( "block:%c %x-%x; children: ", ((*itblock)->type&BLOCKTYPE_HASEOP) ? '*' : ' ', - (*itblock)->startpc, (*itblock)->endpc - 8); - - for(itchild = (*itblock)->blocks.begin(); itchild != (*itblock)->blocks.end(); itchild++) - { - eff.Printf("%x ", (*itchild)->startpc); - } - eff.Printf("; vuxyz = %x, vuxy = %x\n", (*itblock)->vuxyz&(*itblock)->insts.front().usedvars[1], - (*itblock)->vuxy&(*itblock)->insts.front().usedvars[1]); - - itinst = (*itblock)->insts.begin(); - i = (*itblock)->startpc; - while (itinst != (*itblock)->insts.end()) - { - pxAssert(i <= (*itblock)->endpc); - if (itinst->type & INST_DUMMY) - { - if (itinst->nParentPc >= 0 && !(itinst->type&INST_DUMMY_)) - { - // search for the parent - eff.Printf("writeback 0x%x (%x)\n", itinst->type, itinst->nParentPc); - } - } - else - { - mem = (u32*) & VU->Micro[i]; - char* pstr = disVU1MicroUF(mem[1], i + 4); - eff.Printf( "%.4x: %-40s", i, pstr); - if (mem[1] & 0x80000000) eff.Printf( " I=%f(%.8x)\n", *(float*)mem, mem[0]); - else eff.Printf( "%s\n", disVU1MicroLF(mem[0], i)); - i += 8; - } - - ++itinst; - } - - eff.Printf("\n"); - - _x86regs* pregs; - if ((*itblock)->nStartx86 >= 0 || (*itblock)->nEndx86 >= 0) - { - eff.Printf( "X86: AX CX DX BX SP BP SI DI\n"); - } - - if ((*itblock)->nStartx86 >= 0) - { - pregs = &s_vecRegArray[(*itblock)->nStartx86]; - eff.Printf( "STR: "); - for (i = 0; i < iREGCNT_GPR; ++i) - { - if (pregs[i].inuse) - eff.Printf( "%.2d ", pregs[i].reg); - else - eff.Printf( "-1 "); - } - eff.Printf( "\n"); - } - - if ((*itblock)->nEndx86 >= 0) - { - eff.Printf( "END: "); - pregs = &s_vecRegArray[(*itblock)->nEndx86]; - for (i = 0; i < iREGCNT_GPR; ++i) - { - if (pregs[i].inuse) - eff.Printf( "%.2d ", pregs[i].reg); - else - eff.Printf( "-1 "); - } - eff.Printf( "\n"); - } - - itinst = (*itblock)->insts.begin(); - for (i = (*itblock)->startpc; i < (*itblock)->endpc; ++itinst) - { - - if (itinst->type & INST_DUMMY) - { - } - else - { - char str[256]; - sprintf(str, "%.4x:%x f:%.8x_%.8x", i, itinst->type, itinst->livevars[1], itinst->usedvars[1]); - eff.Printf( "%-46s i:%.8x_%.8x c:%d pq:%d\n", str, - itinst->livevars[0], itinst->usedvars[0], (int)itinst->info.cycle, (int)itinst->pqcycles); - - sprintf(str, "XMM r0:%d r1:%d w:%d a:%d t:%x;", - itinst->vfread0[1], itinst->vfread1[1], itinst->vfwrite[1], itinst->vfacc[1], itinst->vffree[1]); - eff.Printf( "%-46s r0:%d r1:%d w:%d a:%d t:%x\n", str, - itinst->vfread0[0], itinst->vfread1[0], itinst->vfwrite[0], itinst->vfacc[0], itinst->vffree[0]); - i += 8; - } - } - -// -#if 0 // __linux__ - - // dump the asm - if ((*itblock)->pcode != NULL) - { - char command[255]; - FILE* fasm = fopen("mydump1", "wb"); - //Console.WriteLn("writing: %x, %x", (*itblock)->startpc, (uptr)(*itblock)->pendcode - (uptr)(*itblock)->pcode); - fwrite((*itblock)->pcode, 1, (uptr)(*itblock)->pendcode - (uptr)(*itblock)->pcode, fasm); - fclose(fasm); - sprintf(command, "objdump -D --target=binary --architecture=i386 -M intel mydump1 > tempdump"); - system(command); - fasm = fopen("tempdump", "r"); - // read all of it and write it to f - fseek(fasm, 0, SEEK_END); - vector vbuffer(ftell(fasm)); - fseek(fasm, 0, SEEK_SET); - fread(&vbuffer[0], vbuffer.size(), 1, fasm); - - fprintf(f, "\n\n"); - fwrite(&vbuffer[0], vbuffer.size(), 1, f); - fclose(fasm); - } -#endif - - eff.Printf("\n---------------\n"); - } -} - -// uncomment to count svu exec time -//#define SUPERVU_COUNT - -// Private methods -void* SuperVUGetProgram(u32 startpc, int vuindex) -{ - pxAssert(startpc < s_MemSize[vuindex]); - pxAssert((startpc % 8) == 0); - pxAssert(recVUHeaders[vuindex] != NULL); - VuFunctionHeader** pheader = &recVUHeaders[vuindex][startpc/8]; - - if (*pheader == NULL) - { - -#ifdef SUPERVU_CACHING - void* pmem = (vuindex & 1) ? VU1.Micro : VU0.Micro; - // check if program exists in cache - std::list::iterator it; - for(it = s_plistCachedHeaders[vuindex][startpc/8].begin(); it != s_plistCachedHeaders[vuindex][startpc/8].end(); it++) - { - if ((*it)->IsSame(pmem)) - { - // found, transfer to regular lists - void* pfn = (*it)->pprogfunc; - recVUHeaders[vuindex][startpc/8] = *it; - s_listVUHeaders[vuindex].push_back(*it); - s_plistCachedHeaders[vuindex][startpc/8].erase(it); - return pfn; - } - } -#endif - - *pheader = SuperVURecompileProgram(startpc, vuindex); - - if (*pheader == NULL) - { - pxAssert(s_TotalVUCycles > 0); - if (vuindex) - VU1.VI[REG_TPC].UL = startpc; - else - VU0.VI[REG_TPC].UL = startpc; - - return (void*)SuperVUEndProgram; - } - - pxAssert((*pheader)->pprogfunc != NULL); - } - //else pxAssert( (*pheader)->IsSame((vuindex&1) ? VU1.Micro : VU0.Micro) ); - - pxAssert((*pheader)->startpc == startpc); - - return (*pheader)->pprogfunc; -} - -bool VuFunctionHeader::IsSame(void* pmem) -{ -#ifdef SUPERVU_CACHING - std::vector::iterator it; - for(it = ranges.begin(); it != ranges.end(); it++) - { - if (memcmp_mmx((u8*)pmem + it->start, it->pmem, it->size)) - return false; - } -#endif - return true; -} - -std::list::iterator VuBaseBlock::GetInstIterAtPc(int instpc) -{ - pxAssert(instpc >= 0); - - int curpc = startpc; - std::list::iterator it; - for (it = insts.begin(); it != insts.end(); ++it) - { - if (it->type & INST_DUMMY) continue; - if (curpc == instpc) break; - curpc += 8; - } - - if (it != insts.end()) return it; - - pxAssert(0); - return insts.begin(); -} - -void VuBaseBlock::GetInstsAtPc(int instpc, std::list& listinsts) -{ - pxAssert(instpc >= 0); - - listinsts.clear(); - - int curpc = startpc; - std::list::iterator it; - for (it = insts.begin(); it != insts.end(); ++it) - { - if (it->type & INST_DUMMY) continue; - if (curpc == instpc) break; - curpc += 8; - } - - if (it != insts.end()) - { - listinsts.push_back(&(*it)); - return; - } - - // look for the pc in other blocks - for (std::list::iterator itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); ++itblock) - { - if (*itblock == this) continue; - - if (instpc >= (*itblock)->startpc && instpc < (*itblock)->endpc) - { - listinsts.push_back(&(*(*itblock)->GetInstIterAtPc(instpc))); - } - } - - pxAssert(!listinsts.empty()); -} - -static VuFunctionHeader* SuperVURecompileProgram(u32 startpc, int vuindex) -{ - pxAssert(vuindex == 0 || vuindex == 1); - pxAssert(s_recVUPtr[vuindex] != NULL); - //Console.WriteLn("svu%c rec: %x", '0'+vuindex, startpc); - - // if recPtr reached the mem limit reset whole mem - if ((s_recVUPtr[vuindex] < s_recVUMem[vuindex]->GetPtr()) || (s_recVUPtr[vuindex] >= s_recVUMem[vuindex]->GetPtrEnd() - _256kb)) - { - Console.WriteLn("SuperVU%u: Recompiler cache reset...", vuindex); - SuperVUReset(0); - SuperVUReset(1); - if (s_TotalVUCycles > 0) - { - // already executing, so return NULL - return NULL; - } - } - - std::list::iterator itblock; - - s_vu = vuindex; - VU = s_vu ? &VU1 : &VU0; - s_pFnHeader = new VuFunctionHeader(); - s_listVUHeaders[vuindex].push_back(s_pFnHeader); - s_pFnHeader->startpc = startpc; - - memset(recVUBlocks[s_vu], 0, sizeof(VuBlockHeader) * (s_MemSize[s_vu] / 8)); - - // analyze the global graph - s_listBlocks.clear(); - VUPIPELINES pipes; - memzero(pipes.fmac); - memzero(pipes.fdiv); - memzero(pipes.efu); - memzero(pipes.ialu); - SuperVUBuildBlocks(NULL, startpc, pipes); - - // fill parents - VuBaseBlock::LISTBLOCKS::iterator itchild; - for(itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); itblock++) - { - for(itchild = (*itblock)->blocks.begin(); itchild != (*itblock)->blocks.end(); itchild++) - { - (*itchild)->parents.push_back(*itblock); - } - - //(*itblock)->type &= ~(BLOCKTYPE_IGNORE|BLOCKTYPE_ANALYZED); - } - - pxAssert(s_listBlocks.front()->startpc == startpc); - s_listBlocks.front()->type |= BLOCKTYPE_FUNCTION; - - for(itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); itblock++) - { - SuperVUInitLiveness(*itblock); - } - - SuperVULivenessAnalysis(); - SuperVUEliminateDeadCode(); - SuperVUAssignRegs(); - -#ifdef PCSX2_DEBUG - if ((s_vu && (vudump&1)) || (!s_vu && (vudump&16))) SuperVUDumpBlock(s_listBlocks, s_vu); -#endif - - // code generation - xSetPtr(s_recVUPtr[vuindex]); - g_branch = 0; - - SuperVURecompile(); - - s_recVUPtr[vuindex] = xGetPtr(); - - // set the function's range - VuFunctionHeader::RANGE r; - s_pFnHeader->ranges.reserve(s_listBlocks.size()); - - for(itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); itblock++) - { - r.start = (*itblock)->startpc; - r.size = (*itblock)->endpc - (*itblock)->startpc; -#ifdef SUPERVU_CACHING - //memxor_mmx(r.checksum, &VU->Micro[r.start], r.size); - r.pmem = malloc(r.size); - memcpy(r.pmem, &VU->Micro[r.start], r.size); -#endif - s_pFnHeader->ranges.push_back(r); - } - -#if defined(PCSX2_DEBUG) && defined(__linux__) - // dump at the end to capture the actual code - if ((s_vu && (vudump&1)) || (!s_vu && (vudump&16))) SuperVUDumpBlock(s_listBlocks, s_vu); -#endif - - // destroy - for (std::list::iterator itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); ++itblock) - { - delete *itblock; - } - s_listBlocks.clear(); - - pxAssertDev(s_recVUPtr[vuindex] < s_recVUMem[vuindex]->GetPtrEnd(), "SuperVU recompiler cache exceeded! (possible memory corruption)"); - - return s_pFnHeader; -} - -static int _recbranchAddr(u32 vucode) -{ - s32 bpc = pc + (_Imm11_ << 3); - /* - if ( bpc < 0 ) { - Console.WriteLn("zerorec branch warning: bpc < 0 ( %x ); Using unsigned imm11", bpc); - bpc = pc + (_UImm11_ << 3); - }*/ - bpc &= (s_MemSize[s_vu] - 1); - - return bpc; -} - -// return inst that flushes everything -static VuInstruction SuperVUFlushInst() -{ - VuInstruction inst; - // don't need to read q/p - inst.type = INST_DUMMY_;//|INST_Q_READ|INST_P_READ; - return inst; -} - -void SuperVUAddWritebacks(VuBaseBlock* pblock, const std::list& listWritebacks) -{ -#ifdef SUPERVU_WRITEBACKS - // regardless of repetition, add the pipes (for selfloops) - std::list::const_iterator itwriteback = listWritebacks.begin(); - std::list::iterator itinst = pblock->insts.begin(), itinst2; - - while (itwriteback != listWritebacks.end()) - { - if (itinst != pblock->insts.end() && (itinst->info.cycle < itwriteback->cycle || (itinst->type&INST_DUMMY))) - { - ++itinst; - continue; - } - - itinst2 = pblock->insts.insert(itinst, VuInstruction()); - itwriteback->InitInst(&(*itinst2), vucycle); - ++itwriteback; - } -#endif -} - -#ifdef SUPERVU_VIBRANCHDELAY -static VuInstruction* getDelayInst(VuInstruction* pInst) -{ - // check for the N cycle branch delay - // example of 2 cycles delay (monster house) : - // sqi vi05 - // sqi vi05 - // ibeq vi05, vi03 - // The ibeq should read the vi05 before the first sqi - - //more info: - - // iaddiu vi01, 0, 1 - // ibeq vi01, 0 <- reads vi01 before the iaddiu - - // iaddiu vi01, 0, 1 - // iaddiu vi01, vi01, 1 - // iaddiu vi01, vi01, 1 - // ibeq vi01, 0 <- reads vi01 before the last two iaddiu's (so the value read is 1) - - // ilw vi02, addr - // iaddiu vi01, 0, 1 - // ibeq vi01, vi02 <- reads current values of both vi01 and vi02 because the branch instruction stalls - - int delay = 1; - VuInstruction* pDelayInst = NULL; - VuInstruction* pTargetInst = pInst->pPrevInst; - while (1) - { - if (pTargetInst != NULL - && pTargetInst->info.cycle + delay == pInst->info.cycle - && (pTargetInst->regs[0].pipe == VUPIPE_IALU || pTargetInst->regs[0].pipe == VUPIPE_FMAC) - && ((pTargetInst->regs[0].VIwrite & pInst->regs[0].VIread) & 0xffff) - && (delay == 1 || ((pTargetInst->regs[0].VIwrite & pInst->regs[0].VIread) & 0xffff) == ((pTargetInst->regs[0].VIwrite & pInst->pPrevInst->regs[0].VIread) & 0xffff)) - && !(pTargetInst->regs[0].VIread&((1 << REG_STATUS_FLAG) | (1 << REG_MAC_FLAG) | (1 << REG_CLIP_FLAG)))) - { - pDelayInst = pTargetInst; - pTargetInst = pTargetInst->pPrevInst; - delay++; - if (delay == 5) //maximum delay is 4 (length of the pipeline) - { - DevCon.WriteLn("supervu: cycle branch delay maximum (4) is reached"); - break; - } - } - else break; - } - if (delay > 1) DevCon.WriteLn("supervu: %d cycle branch delay detected: %x %x", delay - 1, pc, s_pFnHeader->startpc); - return pDelayInst; -} -#endif - -static VuBaseBlock* SuperVUBuildBlocks(VuBaseBlock* parent, u32 startpc, const VUPIPELINES& pipes) -{ - // check if block already exists - //Console.WriteLn("startpc %x", startpc); - startpc &= (s_vu ? 0x3fff : 0xfff); - VuBlockHeader* pbh = &recVUBlocks[s_vu][startpc/8]; - - if (pbh->pblock != NULL) - { - - VuBaseBlock* pblock = pbh->pblock; - std::list::iterator itinst; - - if (pblock->startpc == startpc) - { - SuperVUAddWritebacks(pblock, pipes.listWritebacks); - return pblock; - } - - // have to divide the blocks, pnewblock is first block - pxAssert(startpc > pblock->startpc); - pxAssert(startpc < pblock->endpc); - - u32 dummyinst = (startpc - pblock->startpc) >> 3; - - // count inst non-dummy insts - itinst = pblock->insts.begin(); - int cycleoff = 0; - - while (dummyinst > 0) - { - if (itinst->type & INST_DUMMY) - ++itinst; - else - { - cycleoff = itinst->info.cycle; - ++itinst; - --dummyinst; - } - } - - // NOTE: still leaves insts with their writebacks in different blocks - while (itinst->type & INST_DUMMY) - ++itinst; - - // the difference in cycles between dummy insts (naruto utlimate ninja) - int cyclediff = 0; - if (parent == pblock) - cyclediff = itinst->info.cycle - cycleoff; - cycleoff = itinst->info.cycle; - - // new block - VuBaseBlock* pnewblock = new VuBaseBlock(); - s_listBlocks.push_back(pnewblock); - - pnewblock->startpc = startpc; - pnewblock->endpc = pblock->endpc; - pnewblock->cycles = pblock->cycles - cycleoff + cyclediff; - - pnewblock->blocks.splice(pnewblock->blocks.end(), pblock->blocks); - pnewblock->insts.splice(pnewblock->insts.end(), pblock->insts, itinst, pblock->insts.end()); - pnewblock->type = pblock->type; - - // any writebacks in the next 3 cycles also belong to original block -// for(itinst = pnewblock->insts.begin(); itinst != pnewblock->insts.end(); ) { -// if( (itinst->type & INST_DUMMY) && itinst->nParentPc >= 0 && itinst->nParentPc < (int)startpc ) { -// -// if( !(itinst->type & INST_Q_WRITE) ) -// pblock->insts.push_back(*itinst); -// itinst = pnewblock->insts.erase(itinst); -// continue; -// } -// -// ++itinst; -// } - - pbh = &recVUBlocks[s_vu][startpc/8]; - for (u32 inst = startpc; inst < pblock->endpc; inst += 8) - { - if (pbh->pblock == pblock) - pbh->pblock = pnewblock; - ++pbh; - } - - for(itinst = pnewblock->insts.begin(); itinst != pnewblock->insts.end(); itinst++) - { - itinst->info.cycle -= cycleoff; - } - - SuperVUAddWritebacks(pnewblock, pipes.listWritebacks); - - // old block - pblock->blocks.push_back(pnewblock); - pblock->endpc = startpc; - pblock->cycles = cycleoff; - pblock->type &= BLOCKTYPE_MACFLAGS; - //pblock->insts.push_back(SuperVUFlushInst()); //don't need - - return pnewblock; - } - - VuBaseBlock* pblock = new VuBaseBlock(); - s_listBlocks.push_back(pblock); - - int i = 0; - g_branch = 0; - pc = startpc; - pblock->startpc = startpc; - - // clear stalls (might be a prob) - memcpy(VU->fmac, pipes.fmac, sizeof(pipes.fmac)); - memcpy(&VU->fdiv, &pipes.fdiv, sizeof(pipes.fdiv)); - memcpy(&VU->efu, &pipes.efu, sizeof(pipes.efu)); - memcpy(VU->ialu, pipes.ialu, sizeof(pipes.ialu)); -// memset(VU->fmac, 0, sizeof(VU->fmac)); -// memset(&VU->fdiv, 0, sizeof(VU->fdiv)); -// memset(&VU->efu, 0, sizeof(VU->efu)); - - vucycle = 0; - - u8 macflags = 0; - - std::list< WRITEBACK > listWritebacks; - std::list< WRITEBACK >::iterator itwriteback; - std::list::iterator itinst; - u32 hasSecondBranch = 0; - u32 needFullStatusFlag = 0; - -#ifdef SUPERVU_WRITEBACKS - listWritebacks = pipes.listWritebacks; -#endif - - // first analysis pass for status flags - while (1) - { - u32* ptr = (u32*) & VU->Micro[pc]; - pc += 8; - int prevbranch = g_branch; - - if (ptr[1] & 0x40000000) - g_branch = 1; - - if (!(ptr[1] & 0x80000000)) // not I - { - switch (ptr[0] >> 25) - { - case 0x24: // jr - case 0x25: // jalr - case 0x20: // B - case 0x21: // BAL - case 0x28: // IBEQ - case 0x2f: // IBGEZ - case 0x2d: // IBGTZ - case 0x2e: // IBLEZ - case 0x2c: // IBLTZ - case 0x29: // IBNE - g_branch = 1; - break; - - case 0x14: // fseq - case 0x17: // fsor - //needFullStatusFlag = 2; - break; - - case 0x16: // fsand - if ((ptr[0]&0xc0)) - { - // sometimes full sticky bits are needed (simple series 2000 - oane chapara) - //Console.WriteLn("needSticky: %x-%x", s_pFnHeader->startpc, startpc); - needFullStatusFlag = 2; - } - break; - } - } - - if (prevbranch) - break; - - if (pc >= s_MemSize[s_vu]) - { - Console.Error("inf vu0 prog %x", startpc); - break; - } - } - - // second full pass - pc = startpc; - g_branch = 0; - VuInstruction* pprevinst = NULL, *pinst = NULL; - - while (1) - { - - if (pc == s_MemSize[s_vu]) - { - g_branch |= 8; - break; - } - - if (!g_branch && pbh->pblock != NULL) - { - pblock->blocks.push_back(pbh->pblock); - break; - } - - int prevbranch = g_branch; - - if (!prevbranch) - { - pbh->pblock = pblock; - } - else pxAssert(prevbranch || pbh->pblock == NULL); - - pblock->insts.push_back(VuInstruction()); - - pprevinst = pinst; - pinst = &pblock->insts.back(); - pinst->pPrevInst = pprevinst; - SuperVUAnalyzeOp(VU, &pinst->info, pinst->regs); - -#ifdef SUPERVU_VIBRANCHDELAY - if (pinst->regs[0].pipe == VUPIPE_BRANCH && pblock->insts.size() > 1) - { - - VuInstruction* pdelayinst = getDelayInst(pinst); - if (pdelayinst) - { - pdelayinst->type |= INST_CACHE_VI; - - // find the correct register - u32 mask = pdelayinst->regs[0].VIwrite & pinst->regs[0].VIread; - for (int i = 0; i < 16; ++i) - { - if (mask & (1 << i)) - { - pdelayinst->vicached = i; - break; - } - } - - pinst->vicached = pdelayinst->vicached; - } - } -#endif - - if (prevbranch) - { - if (pinst->regs[0].pipe == VUPIPE_BRANCH) - hasSecondBranch = 1; - pinst->type |= INST_BRANCH_DELAY; - } - - // check write back - for (itwriteback = listWritebacks.begin(); itwriteback != listWritebacks.end();) - { - if (pinst->info.cycle >= itwriteback->cycle) - { - itinst = pblock->insts.insert(--pblock->insts.end(), VuInstruction()); - itwriteback->InitInst(&(*itinst), pinst->info.cycle); - itwriteback = listWritebacks.erase(itwriteback); - } - else ++itwriteback; - } - - // add new writebacks - WRITEBACK w; - const u32 allflags = (1 << REG_CLIP_FLAG) | (1 << REG_MAC_FLAG) | (1 << REG_STATUS_FLAG); - for (int j = 0; j < 2; ++j) w.viwrite[j] = pinst->regs[j].VIwrite & allflags; - - if (pinst->info.macflag & VUOP_WRITE) w.viwrite[1] |= (1 << REG_MAC_FLAG); - if (pinst->info.statusflag & VUOP_WRITE) w.viwrite[1] |= (1 << REG_STATUS_FLAG); - - if ((pinst->info.macflag | pinst->info.statusflag) & VUOP_READ) - macflags = 1; - if (pinst->regs[0].VIread & ((1 << REG_MAC_FLAG) | (1 << REG_STATUS_FLAG))) - macflags = 1; - -// if( pinst->regs[1].pipe == VUPIPE_FMAC && (pinst->regs[1].VFwrite==0&&!(pinst->regs[1].VIwrite&(1<regs[0].VIread |= (1<VIwrite |= lregs->VIwrite & (1<info.statusflag&VUOP_WRITE) && !(pinst->regs[0].VIwrite&(1 << REG_STATUS_FLAG))) && needFullStatusFlag) - { - // don't read if first inst - if (needFullStatusFlag == 1) - w.viread[1] |= (1 << REG_STATUS_FLAG); - else --needFullStatusFlag; - } - - for (int j = 0; j < 2; ++j) - { - w.viread[j] |= pinst->regs[j].VIread & allflags; - - if ((pinst->regs[j].VIread&(1 << REG_STATUS_FLAG)) && (pinst->regs[j].VIwrite&(1 << REG_STATUS_FLAG))) - { - // don't need the read anymore - pinst->regs[j].VIread &= ~(1 << REG_STATUS_FLAG); - } - if ((pinst->regs[j].VIread&(1 << REG_MAC_FLAG)) && (pinst->regs[j].VIwrite&(1 << REG_MAC_FLAG))) - { - // don't need the read anymore - pinst->regs[j].VIread &= ~(1 << REG_MAC_FLAG); - } - - pinst->regs[j].VIwrite &= ~allflags; - } - - if (pinst->info.macflag & VUOP_READ) w.viread[1] |= 1 << REG_MAC_FLAG; - if (pinst->info.statusflag & VUOP_READ) w.viread[1] |= 1 << REG_STATUS_FLAG; - - w.nParentPc = pc - 8; - w.cycle = pinst->info.cycle + 4; - listWritebacks.push_back(w); - } - - if (pinst->info.q&VUOP_READ) pinst->type |= INST_Q_READ; - if (pinst->info.p&VUOP_READ) pinst->type |= INST_P_READ; - - if (pinst->info.q&VUOP_WRITE) - { - pinst->pqcycles = QWaitTimes[pinst->info.pqinst] + 1; - - memset(&w, 0, sizeof(w)); - w.nParentPc = pc - 8; - w.cycle = pinst->info.cycle + pinst->pqcycles; - w.viwrite[0] = 1 << REG_Q; - listWritebacks.push_back(w); - } - if (pinst->info.p&VUOP_WRITE) - pinst->pqcycles = PWaitTimes[pinst->info.pqinst] + 1; - - if (prevbranch) - { - break; - } - - // make sure there is always a branch - // sensible soccer overflows on vu0, so increase the limit... - if ((s_vu == 1 && i >= 0x799) || (s_vu == 0 && i >= 0x201)) - { - Console.Error("VuRec base block doesn't terminate!"); - pxAssert(0); - break; - } - - i++; - pbh++; - } - - if (macflags) - pblock->type |= BLOCKTYPE_MACFLAGS; - - pblock->endpc = pc; - u32 lastpc = pc; - - pblock->cycles = vucycle; - -#ifdef SUPERVU_WRITEBACKS - if (!g_branch || (g_branch&8)) -#endif - { - // flush writebacks - if (!listWritebacks.empty()) - { - listWritebacks.sort(WRITEBACK::SortWritebacks); - for (itwriteback = listWritebacks.begin(); itwriteback != listWritebacks.end(); ++itwriteback) - { - if (itwriteback->viwrite[0] & (1 << REG_Q)) - { - // ignore all Q writebacks - continue; - } - - pblock->insts.push_back(VuInstruction()); - itwriteback->InitInst(&pblock->insts.back(), vucycle); - } - - listWritebacks.clear(); - } - } - - if (!g_branch) return pblock; - - if (g_branch & 8) - { - // what if also a jump? - pblock->type |= BLOCKTYPE_EOP | BLOCKTYPE_HASEOP; - - // add an instruction to flush p and q (if written) - pblock->insts.push_back(SuperVUFlushInst()); - return pblock; - } - - // it is a (cond) branch or a jump - u32 vucode = *(u32*)(VU->Micro + lastpc - 16); - int bpc = _recbranchAddr(vucode) - 8; - - VUPIPELINES newpipes; - memcpy(newpipes.fmac, VU->fmac, sizeof(newpipes.fmac)); - memcpy(&newpipes.fdiv, &VU->fdiv, sizeof(newpipes.fdiv)); - memcpy(&newpipes.efu, &VU->efu, sizeof(newpipes.efu)); - memcpy(newpipes.ialu, VU->ialu, sizeof(newpipes.ialu)); - - for (i = 0; i < 8; ++i) newpipes.fmac[i].sCycle -= vucycle; - newpipes.fdiv.sCycle -= vucycle; - newpipes.efu.sCycle -= vucycle; - for (i = 0; i < 8; ++i) newpipes.ialu[i].sCycle -= vucycle; - - if (!listWritebacks.empty()) - { - // flush all when jumping, send down the pipe when in branching - bool bFlushWritebacks = (vucode >> 25) == 0x24 || (vucode >> 25) == 0x25;//||(vucode>>25)==0x20||(vucode>>25)==0x21; - - listWritebacks.sort(WRITEBACK::SortWritebacks); - for (itwriteback = listWritebacks.begin(); itwriteback != listWritebacks.end(); ++itwriteback) - { - if (itwriteback->viwrite[0] & (1 << REG_Q)) - { - // ignore all Q writebacks - continue; - } - - if (itwriteback->cycle < vucycle || bFlushWritebacks) - { - pblock->insts.push_back(VuInstruction()); - itwriteback->InitInst(&pblock->insts.back(), vucycle); - } - else - { - newpipes.listWritebacks.push_back(*itwriteback); - newpipes.listWritebacks.back().cycle -= vucycle; - } - } - } - - if (!newpipes.listWritebacks.empty()) // other blocks might read the mac flags - pblock->type |= BLOCKTYPE_MACFLAGS; - - u32 firstbranch = vucode >> 25; - switch (firstbranch) - { - case 0x24: // jr - pblock->type |= BLOCKTYPE_EOP; // jump out of procedure, since not returning, set EOP - pblock->insts.push_back(SuperVUFlushInst()); - firstbranch = 0xff; //Non-Conditional Jump - break; - - case 0x25: // jalr - { - // linking, so will return to procedure - pblock->insts.push_back(SuperVUFlushInst()); - - VuBaseBlock* pjumpblock = SuperVUBuildBlocks(pblock, lastpc, newpipes); - - // update pblock since could have changed - pblock = recVUBlocks[s_vu][lastpc/8-2].pblock; - pxAssert(pblock != NULL); - - pblock->blocks.push_back(pjumpblock); - firstbranch = 0xff; //Non-Conditional Jump - break; - } - case 0x20: // B - { - VuBaseBlock* pbranchblock = SuperVUBuildBlocks(pblock, bpc, newpipes); - - // update pblock since could have changed - pblock = recVUBlocks[s_vu][lastpc/8-2].pblock; - pxAssert(pblock != NULL); - - pblock->blocks.push_back(pbranchblock); - firstbranch = 0xff; //Non-Conditional Jump - break; - } - case 0x21: // BAL - { - VuBaseBlock* pbranchblock = SuperVUBuildBlocks(pblock, bpc, newpipes); - - // update pblock since could have changed - pblock = recVUBlocks[s_vu][lastpc/8-2].pblock; - pxAssert(pblock != NULL); - pblock->blocks.push_back(pbranchblock); - firstbranch = 0xff; //Non-Conditional Jump - break; - } - case 0x28: // IBEQ - case 0x2f: // IBGEZ - case 0x2d: // IBGTZ - case 0x2e: // IBLEZ - case 0x2c: // IBLTZ - case 0x29: // IBNE - { - VuBaseBlock* pbranchblock = SuperVUBuildBlocks(pblock, bpc, newpipes); - - // update pblock since could have changed - pblock = recVUBlocks[s_vu][lastpc/8-2].pblock; - pxAssert(pblock != NULL); - pblock->blocks.push_back(pbranchblock); - - // if has a second branch that is B or BAL, skip this - u32 secondbranch = (*(u32*)(VU->Micro + lastpc - 8)) >> 25; - if (!hasSecondBranch || (secondbranch != 0x21 && secondbranch != 0x20)) - { - pbranchblock = SuperVUBuildBlocks(pblock, lastpc, newpipes); - - pblock = recVUBlocks[s_vu][lastpc/8-2].pblock; - pblock->blocks.push_back(pbranchblock); - } - - break; - } - default: - pxAssert(pblock->blocks.size() == 1); - break; - } - - pblock = recVUBlocks[s_vu][lastpc/8-2].pblock; - -#ifdef SUPERVU_VIBRANCHDELAY -///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -///// NOTE! This could still be a hack for KH2/GoW, but until we know how it properly works, this will do for now./// -///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - - if (hasSecondBranch && firstbranch != 0xff) //check the previous jump was conditional and there is a second branch - { -#else - if (hasSecondBranch) - { -#endif - - u32 vucode = *(u32*)(VU->Micro + lastpc - 8); - pc = lastpc; - int bpc = _recbranchAddr(vucode); - - switch (vucode >> 25) - { - case 0x24: // jr - Console.Error("svurec bad jr jump!"); - pxAssert(0); - break; - - case 0x25: // jalr - { - Console.Error("svurec bad jalr jump!"); - pxAssert(0); - break; - } - case 0x20: // B - { - VuBaseBlock* pbranchblock = SuperVUBuildBlocks(pblock, bpc, newpipes); - - // update pblock since could have changed - pblock = recVUBlocks[s_vu][lastpc/8-2].pblock; - - pblock->blocks.push_back(pbranchblock); - break; - } - case 0x21: // BAL - { - VuBaseBlock* pbranchblock = SuperVUBuildBlocks(pblock, bpc, newpipes); - - // replace instead of pushing a new block - pblock = recVUBlocks[s_vu][lastpc/8-2].pblock; - pblock->blocks.push_back(pbranchblock); - break; - } - case 0x28: // IBEQ - case 0x2f: // IBGEZ - case 0x2d: // IBGTZ - case 0x2e: // IBLEZ - case 0x2c: // IBLTZ - case 0x29: // IBNE - { - VuBaseBlock* pbranchblock = SuperVUBuildBlocks(pblock, bpc, newpipes); - - // update pblock since could have changed - pblock = recVUBlocks[s_vu][lastpc/8-2].pblock; - pblock->blocks.push_back(pbranchblock); - - // only add the block if the previous branch doesn't include the next instruction (ie, if a direct jump) - if (firstbranch == 0x24 || firstbranch == 0x25 || firstbranch == 0x20 || firstbranch == 0x21) - { - pbranchblock = SuperVUBuildBlocks(pblock, lastpc, newpipes); - - pblock = recVUBlocks[s_vu][lastpc/8-2].pblock; - pblock->blocks.push_back(pbranchblock); - } - - break; - } - - jNO_DEFAULT; - } - } - - return recVUBlocks[s_vu][startpc/8].pblock; -} - -static void SuperVUInitLiveness(VuBaseBlock* pblock) -{ - std::list::iterator itinst, itnext; - - pxAssert(!pblock->insts.empty()); - - for (itinst = pblock->insts.begin(); itinst != pblock->insts.end(); ++itinst) - { - - if (itinst->type & INST_DUMMY_) - { - itinst->addvars[0] = itinst->addvars[1] = 0xffffffff; - itinst->livevars[0] = itinst->livevars[1] = 0xffffffff; - itinst->keepvars[0] = itinst->keepvars[1] = 0xffffffff; - itinst->usedvars[0] = itinst->usedvars[1] = 0; - } - else - { - itinst->addvars[0] = itinst->regs[0].VIread | itinst->regs[1].VIread; - itinst->addvars[1] = (itinst->regs[0].VFread0 ? (1 << itinst->regs[0].VFread0) : 0) | - (itinst->regs[0].VFread1 ? (1 << itinst->regs[0].VFread1) : 0) | - (itinst->regs[1].VFread0 ? (1 << itinst->regs[1].VFread0) : 0) | - (itinst->regs[1].VFread1 ? (1 << itinst->regs[1].VFread1) : 0); - - // vf0 is not handled by VFread - if (!itinst->regs[0].VFread0 && (itinst->regs[0].VIread & (1 << REG_VF0_FLAG))) itinst->addvars[1] |= 1; - if (!itinst->regs[1].VFread0 && (itinst->regs[1].VIread & (1 << REG_VF0_FLAG))) itinst->addvars[1] |= 1; - if (!itinst->regs[0].VFread1 && (itinst->regs[0].VIread & (1 << REG_VF0_FLAG)) && itinst->regs[0].VFr1xyzw != 0xff) itinst->addvars[1] |= 1; - if (!itinst->regs[1].VFread1 && (itinst->regs[1].VIread & (1 << REG_VF0_FLAG)) && itinst->regs[1].VFr1xyzw != 0xff) itinst->addvars[1] |= 1; - - - u32 vfwrite = 0; - if (itinst->regs[0].VFwrite != 0) - { - if (itinst->regs[0].VFwxyzw != 0xf) itinst->addvars[1] |= 1 << itinst->regs[0].VFwrite; - else vfwrite |= 1 << itinst->regs[0].VFwrite; - } - if (itinst->regs[1].VFwrite != 0) - { - if (itinst->regs[1].VFwxyzw != 0xf) itinst->addvars[1] |= 1 << itinst->regs[1].VFwrite; - else vfwrite |= 1 << itinst->regs[1].VFwrite; - } - if ((itinst->regs[1].VIwrite & (1 << REG_ACC_FLAG)) && itinst->regs[1].VFwxyzw != 0xf) - itinst->addvars[1] |= 1 << REG_ACC_FLAG; - - u32 viwrite = (itinst->regs[0].VIwrite | itinst->regs[1].VIwrite); - - itinst->usedvars[0] = itinst->addvars[0] | viwrite; - itinst->usedvars[1] = itinst->addvars[1] | vfwrite; - -// itinst->addvars[0] &= ~viwrite; -// itinst->addvars[1] &= ~vfwrite; - itinst->keepvars[0] = ~viwrite; - itinst->keepvars[1] = ~vfwrite; - } - } - - itinst = --pblock->insts.end(); - while (itinst != pblock->insts.begin()) - { - itnext = itinst; - --itnext; - - itnext->usedvars[0] |= itinst->usedvars[0]; - itnext->usedvars[1] |= itinst->usedvars[1]; - - itinst = itnext; - } -} - -u32 COMPUTE_LIVE(u32 R, u32 K, u32 L) -{ - u32 live = R | ((L) & (K)); - // special process mac and status flags - // only propagate liveness if doesn't write to the flag - if (!(L&(1 << REG_STATUS_FLAG)) && !(K&(1 << REG_STATUS_FLAG))) - live &= ~(1 << REG_STATUS_FLAG); - if (!(L&(1 << REG_MAC_FLAG)) && !(K&(1 << REG_MAC_FLAG))) - live &= ~(1 << REG_MAC_FLAG); - return live;//|(1<::reverse_iterator itblock; - std::list::iterator itinst, itnext; - VuBaseBlock::LISTBLOCKS::iterator itchild; - - u32 livevars[2]; - - do - { - changed = FALSE; - for (itblock = s_listBlocks.rbegin(); itblock != s_listBlocks.rend(); ++itblock) - { - - u32 newlive; - VuBaseBlock* pb = *itblock; - - // the last inst relies on the neighbor's insts - itinst = --pb->insts.end(); - - if (!pb->blocks.empty()) - { - livevars[0] = 0; - livevars[1] = 0; - for (itchild = pb->blocks.begin(); itchild != pb->blocks.end(); ++itchild) - { - VuInstruction& front = (*itchild)->insts.front(); - livevars[0] |= front.livevars[0]; - livevars[1] |= front.livevars[1]; - } - - newlive = COMPUTE_LIVE(itinst->addvars[0], itinst->keepvars[0], livevars[0]); - - // should propagate status flags whose parent insts are not in this block -// if( itinst->nParentPc >= 0 && (itinst->type & (INST_STATUS_WRITE|INST_MAC_WRITE)) ) -// newlive |= livevars[0]&((1<livevars[0] != newlive) - { - changed = TRUE; - itinst->livevars[0] = newlive; - } - - newlive = COMPUTE_LIVE(itinst->addvars[1], itinst->keepvars[1], livevars[1]); - if (itinst->livevars[1] != newlive) - { - changed = TRUE; - itinst->livevars[1] = newlive; - } - } - - while (itinst != pb->insts.begin()) - { - - itnext = itinst; - --itnext; - - newlive = COMPUTE_LIVE(itnext->addvars[0], itnext->keepvars[0], itinst->livevars[0]); - - // should propagate status flags whose parent insts are not in this block -// if( itnext->nParentPc >= 0 && (itnext->type & (INST_STATUS_WRITE|INST_MAC_WRITE)) && !(itinst->type & (INST_STATUS_WRITE|INST_MAC_WRITE)) ) -// newlive |= itinst->livevars[0]&((1<livevars[0] != newlive) - { - changed = TRUE; - itnext->livevars[0] = newlive; - itnext->livevars[1] = COMPUTE_LIVE(itnext->addvars[1], itnext->keepvars[1], itinst->livevars[1]); - } - else - { - newlive = COMPUTE_LIVE(itnext->addvars[1], itnext->keepvars[1], itinst->livevars[1]); - if (itnext->livevars[1] != newlive) - { - changed = TRUE; - itnext->livevars[1] = newlive; - } - } - - itinst = itnext; - } - -// if( (livevars[0] | itinst->livevars[0]) != itinst->livevars[0] ) { -// changed = TRUE; -// itinst->livevars[0] |= livevars[0]; -// } -// if( (livevars[1] | itinst->livevars[1]) != itinst->livevars[1] ) { -// changed = TRUE; -// itinst->livevars[1] |= livevars[1]; -// } -// -// while( itinst != pb->insts.begin() ) { -// -// itnext = itinst; --itnext; -// if( (itnext->livevars[0] | (itinst->livevars[0] & itnext->keepvars[0])) != itnext->livevars[0] ) { -// changed = TRUE; -// itnext->livevars[0] |= itinst->livevars[0] & itnext->keepvars[0]; -// itnext->livevars[1] |= itinst->livevars[1] & itnext->keepvars[1]; -// } -// else if( (itnext->livevars[1] | (itinst->livevars[1] & itnext->keepvars[1])) != itnext->livevars[1] ) { -// changed = TRUE; -// itnext->livevars[1] |= itinst->livevars[1] & itnext->keepvars[1]; -// } -// -// itinst = itnext; -// } - } - - } - while (changed); -} - -static void SuperVUEliminateDeadCode() -{ - std::list::iterator itblock; - VuBaseBlock::LISTBLOCKS::iterator itchild; - std::list::iterator itinst, itnext; - std::list listParents; - std::list::iterator itparent; - - for(itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); itblock++) - { - -#ifdef PCSX2_DEBUG - u32 startpc = (*itblock)->startpc; - u32 curpc = startpc; -#endif - - itnext = (*itblock)->insts.begin(); - itinst = itnext++; - while (itnext != (*itblock)->insts.end()) - { - if (itinst->type & (INST_CLIP_WRITE | INST_MAC_WRITE | INST_STATUS_WRITE)) - { - u32 live0 = itnext->livevars[0]; - if (itinst->nParentPc >= 0 && itnext->nParentPc >= 0 && itinst->nParentPc != itnext->nParentPc) // superman returns - { - // take the live vars from the next next inst - std::list::iterator itnextnext = itnext; - ++itnextnext; - if (itnextnext != (*itblock)->insts.end()) - { - live0 = itnextnext->livevars[0]; - } - } - - itinst->regs[0].VIwrite &= live0; - itinst->regs[1].VIwrite &= live0; - - u32 viwrite = itinst->regs[0].VIwrite | itinst->regs[1].VIwrite; - - (*itblock)->GetInstsAtPc(itinst->nParentPc, listParents); - int removetype = 0; - - for(itparent = listParents.begin(); itparent != listParents.end(); itparent++) - { - VuInstruction* parent = *itparent; - - if (viwrite & (1 << REG_CLIP_FLAG)) - { - parent->regs[0].VIwrite |= (itinst->regs[0].VIwrite & (1 << REG_CLIP_FLAG)); - parent->regs[1].VIwrite |= (itinst->regs[1].VIwrite & (1 << REG_CLIP_FLAG)); - } - else - removetype |= INST_CLIP_WRITE; - - if (parent->info.macflag && (itinst->type & INST_MAC_WRITE)) - { - if (!(viwrite&(1 << REG_MAC_FLAG))) - { - //parent->info.macflag = 0; - // parent->regs[0].VIwrite &= ~(1<regs[1].VIwrite &= ~(1<regs[0].VIwrite & (1 << REG_MAC_FLAG)) && !(parent->regs[1].VIwrite & (1 << REG_MAC_FLAG))); -#endif - // if VUPIPE_FMAC and destination is vf00, probably need to keep the mac flag - if (parent->regs[1].pipe == VUPIPE_FMAC && (parent->regs[1].VFwrite == 0 && !(parent->regs[1].VIwrite&(1 << REG_ACC_FLAG)))) - { - parent->regs[0].VIwrite |= ((1 << REG_MAC_FLAG)); - parent->regs[1].VIwrite |= ((1 << REG_MAC_FLAG)); - } - else - removetype |= INST_MAC_WRITE; - } - else - { - parent->regs[0].VIwrite |= (itinst->regs[0].VIwrite & (1 << REG_MAC_FLAG)); - parent->regs[1].VIwrite |= (itinst->regs[1].VIwrite & (1 << REG_MAC_FLAG)); - } - } - else removetype |= INST_MAC_WRITE; - - if (parent->info.statusflag && (itinst->type & INST_STATUS_WRITE)) - { - if (!(viwrite&(1 << REG_STATUS_FLAG))) - { - //parent->info.statusflag = 0; - // parent->regs[0].VIwrite &= ~(1<regs[1].VIwrite &= ~(1<regs[0].VIwrite & (1 << REG_STATUS_FLAG)) && !(parent->regs[1].VIwrite & (1 << REG_STATUS_FLAG))); -#endif - if (parent->regs[1].pipe == VUPIPE_FMAC && (parent->regs[1].VFwrite == 0 && !(parent->regs[1].VIwrite&(1 << REG_ACC_FLAG)))) - { - parent->regs[0].VIwrite |= ((1 << REG_STATUS_FLAG)); - parent->regs[1].VIwrite |= ((1 << REG_STATUS_FLAG)); - } - else - removetype |= INST_STATUS_WRITE; - } - else - { - parent->regs[0].VIwrite |= (itinst->regs[0].VIwrite & (1 << REG_STATUS_FLAG)); - parent->regs[1].VIwrite |= (itinst->regs[1].VIwrite & (1 << REG_STATUS_FLAG)); - } - } - else removetype |= INST_STATUS_WRITE; - } - - itinst->type &= ~removetype; - if (itinst->type == 0) - { - itnext = (*itblock)->insts.erase(itinst); - itinst = itnext++; - continue; - } - } -#ifdef PCSX2_DEBUG - else - { - curpc += 8; - } -#endif - itinst = itnext; - ++itnext; - } - - if (itinst->type & INST_DUMMY) - { - // last inst with the children - u32 mask = 0; - for (itchild = (*itblock)->blocks.begin(); itchild != (*itblock)->blocks.end(); ++itchild) - { - mask |= (*itchild)->insts.front().livevars[0]; - } - itinst->regs[0].VIwrite &= mask; - itinst->regs[1].VIwrite &= mask; - u32 viwrite = itinst->regs[0].VIwrite | itinst->regs[1].VIwrite; - - if (itinst->nParentPc >= 0) - { - - (*itblock)->GetInstsAtPc(itinst->nParentPc, listParents); - int removetype = 0; - - for(itparent = listParents.begin(); itparent != listParents.end(); itparent++) - { - VuInstruction* parent = *itparent; - - if (viwrite & (1 << REG_CLIP_FLAG)) - { - parent->regs[0].VIwrite |= (itinst->regs[0].VIwrite & (1 << REG_CLIP_FLAG)); - parent->regs[1].VIwrite |= (itinst->regs[1].VIwrite & (1 << REG_CLIP_FLAG)); - } - else removetype |= INST_CLIP_WRITE; - - if (parent->info.macflag && (itinst->type & INST_MAC_WRITE)) - { - if (!(viwrite&(1 << REG_MAC_FLAG))) - { - //parent->info.macflag = 0; -#ifndef SUPERVU_WRITEBACKS - pxAssert(!(parent->regs[0].VIwrite & (1 << REG_MAC_FLAG)) && !(parent->regs[1].VIwrite & (1 << REG_MAC_FLAG))); -#endif - removetype |= INST_MAC_WRITE; - } - else - { - parent->regs[0].VIwrite |= (itinst->regs[0].VIwrite & (1 << REG_MAC_FLAG)); - parent->regs[1].VIwrite |= (itinst->regs[1].VIwrite & (1 << REG_MAC_FLAG)); - } - } - else removetype |= INST_MAC_WRITE; - - if (parent->info.statusflag && (itinst->type & INST_STATUS_WRITE)) - { - if (!(viwrite&(1 << REG_STATUS_FLAG))) - { - //parent->info.statusflag = 0; -#ifndef SUPERVU_WRITEBACKS - pxAssert(!(parent->regs[0].VIwrite & (1 << REG_STATUS_FLAG)) && !(parent->regs[1].VIwrite & (1 << REG_STATUS_FLAG))); -#endif - removetype |= INST_STATUS_WRITE; - } - else - { - parent->regs[0].VIwrite |= (itinst->regs[0].VIwrite & (1 << REG_STATUS_FLAG)); - parent->regs[1].VIwrite |= (itinst->regs[1].VIwrite & (1 << REG_STATUS_FLAG)); - } - } - else removetype |= INST_STATUS_WRITE; - } - - itinst->type &= ~removetype; - if (itinst->type == 0) - { - (*itblock)->insts.erase(itinst); - } - } - } - } -} - -void VuBaseBlock::AssignVFRegs() -{ - VuBaseBlock::LISTBLOCKS::iterator itchild; - std::list::iterator itblock; - std::list::iterator itinst, itnext, itinst2; - - // init the start regs - if (type & BLOCKTYPE_ANALYZED) return; // nothing changed - memcpy(xmmregs, startregs, sizeof(xmmregs)); - - if (type & BLOCKTYPE_ANALYZED) - { - u32 i; - // check if changed - for (i = 0; i < iREGCNT_XMM; ++i) - { - if (xmmregs[i].inuse != startregs[i].inuse) - break; - if (xmmregs[i].inuse && (xmmregs[i].reg != startregs[i].reg || xmmregs[i].type != startregs[i].type)) - break; - } - - if (i == iREGCNT_XMM) return; // nothing changed - } - - u8* oldX86 = x86Ptr; - - for(itinst = insts.begin(); itinst != insts.end(); itinst++) - { - - if (itinst->type & INST_DUMMY) continue; - - // reserve, go from upper to lower - int lastwrite = -1; - - for (int i = 1; i >= 0; --i) - { - _VURegsNum* regs = itinst->regs + i; - - - // redo the counters so that the proper regs are released - for (u32 j = 0; j < iREGCNT_XMM; ++j) - { - if (xmmregs[j].inuse) - { - if (xmmregs[j].type == XMMTYPE_VFREG) - { - int count = 0; - itinst2 = itinst; - - if (i) - { - if (itinst2->regs[0].VFread0 == xmmregs[j].reg || itinst2->regs[0].VFread1 == xmmregs[j].reg || itinst2->regs[0].VFwrite == xmmregs[j].reg) - { - itinst2 = insts.end(); - break; - } - else - { - ++count; - ++itinst2; - } - } - - while (itinst2 != insts.end()) - { - if (itinst2->regs[0].VFread0 == xmmregs[j].reg || itinst2->regs[0].VFread1 == xmmregs[j].reg || itinst2->regs[0].VFwrite == xmmregs[j].reg || - itinst2->regs[1].VFread0 == xmmregs[j].reg || itinst2->regs[1].VFread1 == xmmregs[j].reg || itinst2->regs[1].VFwrite == xmmregs[j].reg) - break; - - ++count; - ++itinst2; - } - xmmregs[j].counter = 1000 - count; - } - else - { - pxAssert(xmmregs[j].type == XMMTYPE_ACC); - - int count = 0; - itinst2 = itinst; - - if (i) ++itinst2; // acc isn't used in lower insts - - while (itinst2 != insts.end()) - { - pxAssert(!((itinst2->regs[0].VIread | itinst2->regs[0].VIwrite) & (1 << REG_ACC_FLAG))); - - if ((itinst2->regs[1].VIread | itinst2->regs[1].VIwrite) & (1 << REG_ACC_FLAG)) - break; - - ++count; - ++itinst2; - } - - xmmregs[j].counter = 1000 - count; - } - } - } - - if (regs->VFread0) _addNeededVFtoXMMreg(regs->VFread0); - if (regs->VFread1) _addNeededVFtoXMMreg(regs->VFread1); - if (regs->VFwrite) _addNeededVFtoXMMreg(regs->VFwrite); - if (regs->VIread & (1 << REG_ACC_FLAG)) _addNeededACCtoXMMreg(); - if (regs->VIread & (1 << REG_VF0_FLAG)) _addNeededVFtoXMMreg(0); - - // alloc - itinst->vfread0[i] = itinst->vfread1[i] = itinst->vfwrite[i] = itinst->vfacc[i] = -1; - itinst->vfflush[i] = -1; - - if (regs->VFread0) - itinst->vfread0[i] = _allocVFtoXMMreg(VU, -1, regs->VFread0, 0); - else if (regs->VIread & (1 << REG_VF0_FLAG)) - itinst->vfread0[i] = _allocVFtoXMMreg(VU, -1, 0, 0); - - if (regs->VFread1) - itinst->vfread1[i] = _allocVFtoXMMreg(VU, -1, regs->VFread1, 0); - else if ((regs->VIread & (1 << REG_VF0_FLAG)) && regs->VFr1xyzw != 0xff) - itinst->vfread1[i] = _allocVFtoXMMreg(VU, -1, 0, 0); - - if (regs->VIread & (1 << REG_ACC_FLAG)) itinst->vfacc[i] = _allocACCtoXMMreg(VU, -1, 0); - - int reusereg = -1; // 0 - VFwrite, 1 - VFAcc - - if (regs->VFwrite) - { - pxAssert(!(regs->VIwrite&(1 << REG_ACC_FLAG))); - - if (regs->VFwxyzw == 0xf) - { - itinst->vfwrite[i] = _checkXMMreg(XMMTYPE_VFREG, regs->VFwrite, 0); - if (itinst->vfwrite[i] < 0) reusereg = 0; - } - else - { - itinst->vfwrite[i] = _allocVFtoXMMreg(VU, -1, regs->VFwrite, 0); - } - } - else if (regs->VIwrite & (1 << REG_ACC_FLAG)) - { - - if (regs->VFwxyzw == 0xf) - { - itinst->vfacc[i] = _checkXMMreg(XMMTYPE_ACC, 0, 0); - if (itinst->vfacc[i] < 0) reusereg = 1; - } - else - { - itinst->vfacc[i] = _allocACCtoXMMreg(VU, -1, 0); - } - } - - if (reusereg >= 0) - { - // reuse - itnext = itinst; - itnext++; - - u8 type = reusereg ? XMMTYPE_ACC : XMMTYPE_VFREG; - u8 reg = reusereg ? 0 : regs->VFwrite; - - if (itinst->vfacc[i] >= 0 && lastwrite != itinst->vfacc[i] && - (itnext == insts.end() || ((regs->VIread&(1 << REG_ACC_FLAG)) && (!(itnext->usedvars[0]&(1 << REG_ACC_FLAG)) || !(itnext->livevars[0]&(1 << REG_ACC_FLAG)))))) - { - - pxAssert(reusereg == 0); - if (itnext == insts.end() || (itnext->livevars[0]&(1 << REG_ACC_FLAG))) _freeXMMreg(itinst->vfacc[i]); - xmmregs[itinst->vfacc[i]].inuse = 1; - xmmregs[itinst->vfacc[i]].reg = reg; - xmmregs[itinst->vfacc[i]].type = type; - xmmregs[itinst->vfacc[i]].mode = 0; - itinst->vfwrite[i] = itinst->vfacc[i]; - } - else if (itinst->vfread0[i] >= 0 && lastwrite != itinst->vfread0[i] && - (itnext == insts.end() || (regs->VFread0 > 0 && (!(itnext->usedvars[1]&(1 << regs->VFread0)) || !(itnext->livevars[1]&(1 << regs->VFread0)))))) - { - - if (itnext == insts.end() || (itnext->livevars[1]®s->VFread0)) _freeXMMreg(itinst->vfread0[i]); - - xmmregs[itinst->vfread0[i]].inuse = 1; - xmmregs[itinst->vfread0[i]].reg = reg; - xmmregs[itinst->vfread0[i]].type = type; - xmmregs[itinst->vfread0[i]].mode = 0; - - if (reusereg) - itinst->vfacc[i] = itinst->vfread0[i]; - else - itinst->vfwrite[i] = itinst->vfread0[i]; - } - else if (itinst->vfread1[i] >= 0 && lastwrite != itinst->vfread1[i] && - (itnext == insts.end() || (regs->VFread1 > 0 && (!(itnext->usedvars[1]&(1 << regs->VFread1)) || !(itnext->livevars[1]&(1 << regs->VFread1)))))) - { - - if (itnext == insts.end() || (itnext->livevars[1]®s->VFread1)) _freeXMMreg(itinst->vfread1[i]); - - xmmregs[itinst->vfread1[i]].inuse = 1; - xmmregs[itinst->vfread1[i]].reg = reg; - xmmregs[itinst->vfread1[i]].type = type; - xmmregs[itinst->vfread1[i]].mode = 0; - if (reusereg) - itinst->vfacc[i] = itinst->vfread1[i]; - else - itinst->vfwrite[i] = itinst->vfread1[i]; - } - else - { - if (reusereg) - itinst->vfacc[i] = _allocACCtoXMMreg(VU, -1, 0); - else - itinst->vfwrite[i] = _allocVFtoXMMreg(VU, -1, regs->VFwrite, 0); - } - } - - if (itinst->vfwrite[i] >= 0) lastwrite = itinst->vfwrite[i]; - else if (itinst->vfacc[i] >= 0) lastwrite = itinst->vfacc[i]; - - // always alloc at least 1 temp reg - int free0 = (i || regs->VFwrite || regs->VFread0 || regs->VFread1 || (regs->VIwrite & (1 << REG_ACC_FLAG)) || (regs->VIread & (1 << REG_VF0_FLAG))) - ? _allocTempXMMreg(XMMT_FPS, -1) : -1; - int free1 = 0, free2 = 0; - - if (i == 0 && itinst->vfwrite[1] >= 0 && (itinst->vfread0[0] == itinst->vfwrite[1] || itinst->vfread1[0] == itinst->vfwrite[1])) - { - itinst->vfflush[i] = _allocTempXMMreg(XMMT_FPS, -1); - } - - if (i == 1 && (regs->VIwrite & (1 << REG_CLIP_FLAG))) - { - // CLIP inst, need two extra regs - if (free0 < 0) free0 = _allocTempXMMreg(XMMT_FPS, -1); - - free1 = _allocTempXMMreg(XMMT_FPS, -1); - free2 = _allocTempXMMreg(XMMT_FPS, -1); - _freeXMMreg(free1); - _freeXMMreg(free2); - } - else if (regs->VIwrite & (1 << REG_P)) - { - // EFU inst, need extra reg - free1 = _allocTempXMMreg(XMMT_FPS, -1); - if (free0 == -1) free0 = free1; - _freeXMMreg(free1); - } - - if (itinst->vfflush[i] >= 0) _freeXMMreg(itinst->vfflush[i]); - if (free0 >= 0) _freeXMMreg(free0); - - itinst->vffree[i] = (free0 & 0xf) | (free1 << 8) | (free2 << 16); - if (free0 == -1) itinst->vffree[i] |= VFFREE_INVALID0; - - _clearNeededXMMregs(); - } - } - - pxAssert(x86Ptr == oldX86); - u32 analyzechildren = !(type & BLOCKTYPE_ANALYZED); - type |= BLOCKTYPE_ANALYZED; - - //memset(endregs, 0, sizeof(endregs)); - - if (analyzechildren) - { - for(itchild = blocks.begin(); itchild != blocks.end(); itchild++) - { - (*itchild)->AssignVFRegs(); - } - } -} - -struct MARKOVBLANKET -{ - std::list parents; - std::list children; -}; - -static MARKOVBLANKET s_markov; - -void VuBaseBlock::AssignVIRegs(int parent) -{ - const int maxregs = 6; - - if (parent) - { - if ((type&BLOCKTYPE_ANALYZEDPARENT)) - return; - - type |= BLOCKTYPE_ANALYZEDPARENT; - s_markov.parents.push_back(this); - for (LISTBLOCKS::iterator it = blocks.begin(); it != blocks.end(); ++it) - { - (*it)->AssignVIRegs(0); - } - return; - } - - if ((type&BLOCKTYPE_ANALYZED)) - return; - - // child - pxAssert(allocX86Regs == -1); - allocX86Regs = s_vecRegArray.size(); - s_vecRegArray.resize(allocX86Regs + iREGCNT_GPR); - - _x86regs* pregs = &s_vecRegArray[allocX86Regs]; - memset(pregs, 0, sizeof(_x86regs)*iREGCNT_GPR); - - pxAssert(!parents.empty()); - - std::list::iterator itparent; - u32 usedvars = insts.front().usedvars[0]; - u32 livevars = insts.front().livevars[0]; - - if (!parents.empty()) - { - u32 usedvars2 = 0xffffffff; - - for(itparent = parents.begin(); itparent != parents.end(); itparent++) - { - usedvars2 &= (*itparent)->insts.front().usedvars[0]; - } - - usedvars |= usedvars2; - } - - usedvars &= livevars; - - // currently order doesn't matter - int num = 0; - - if (usedvars) - { - for (int i = 1; i < 16; ++i) - { - if (usedvars & (1 << i)) - { - pregs[num].inuse = 1; - pregs[num].reg = i; - - livevars &= ~(1 << i); - - if (++num >= maxregs) break; - } - } - } - - if (num < maxregs) - { - livevars &= ~usedvars; - livevars &= insts.back().usedvars[0]; - - if (livevars) - { - for (int i = 1; i < 16; ++i) - { - if (livevars & (1 << i)) - { - pregs[num].inuse = 1; - pregs[num].reg = i; - - if (++num >= maxregs) break; - } - } - } - } - - s_markov.children.push_back(this); - type |= BLOCKTYPE_ANALYZED; - - for(itparent = parents.begin(); itparent != parents.end(); itparent++) - { - (*itparent)->AssignVIRegs(1); - } -} - -static void SuperVUAssignRegs() -{ - std::list::iterator itblock, itblock2; - - for(itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); itblock++) - { - (*itblock)->type &= ~BLOCKTYPE_ANALYZED; - } - s_listBlocks.front()->AssignVFRegs(); - - // VI assignments, find markov blanket for each node in the graph - // then allocate regs based on the commonly used ones -#ifdef SUPERVU_X86CACHING - for(itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); itblock++) - { - (*itblock)->type &= ~(BLOCKTYPE_ANALYZED | BLOCKTYPE_ANALYZEDPARENT); - } - s_vecRegArray.resize(0); - u8 usedregs[16]; - - // note: first block always has to start with no alloc regs - bool bfirst = true; - - for(itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); itblock++) - { - - if (!((*itblock)->type & BLOCKTYPE_ANALYZED)) - { - - if ((*itblock)->parents.size() == 0) - { - (*itblock)->type |= BLOCKTYPE_ANALYZED; - bfirst = false; - continue; - } - - s_markov.children.clear(); - s_markov.parents.clear(); - (*itblock)->AssignVIRegs(0); - - // assign the regs - int regid = s_vecRegArray.size(); - s_vecRegArray.resize(regid + iREGCNT_GPR); - - _x86regs* mergedx86 = &s_vecRegArray[regid]; - memset(mergedx86, 0, sizeof(_x86regs)*iREGCNT_GPR); - - if (!bfirst) - { - *(u32*)usedregs = *((u32*)usedregs + 1) = *((u32*)usedregs + 2) = *((u32*)usedregs + 3) = 0; - - for(itblock2 = s_markov.children.begin(); itblock2 != s_markov.children.end(); itblock2++) - { - pxAssert((*itblock2)->allocX86Regs >= 0); - _x86regs* pregs = &s_vecRegArray[(*itblock2)->allocX86Regs]; - for (int i = 0; i < iREGCNT_GPR; ++i) - { - if (pregs[i].inuse && pregs[i].reg < 16) - { - //pxAssert( pregs[i].reg < 16); - usedregs[pregs[i].reg]++; - } - } - } - - int num = 1; - for (int i = 0; i < 16; ++i) - { - if (usedregs[i] == s_markov.children.size()) - { - // use - mergedx86[num].inuse = 1; - mergedx86[num].reg = i; - mergedx86[num].type = (s_vu ? X86TYPE_VU1 : 0) | X86TYPE_VI; - mergedx86[num].mode = MODE_READ; - if (++num >= iREGCNT_GPR) - break; - if (num == ESP) - ++num; - } - } - - for(itblock2 = s_markov.children.begin(); itblock2 != s_markov.children.end(); itblock2++) - { - pxAssert((*itblock2)->nStartx86 == -1); - (*itblock2)->nStartx86 = regid; - } - - for(itblock2 = s_markov.parents.begin(); itblock2 != s_markov.parents.end(); itblock2++) - { - pxAssert((*itblock2)->nEndx86 == -1); - (*itblock2)->nEndx86 = regid; - } - } - - bfirst = false; - } - } -#endif -} - -////////////////// -// Recompilation -////////////////// - -// cycles in which the last Q,P regs were finished (written to VU->VI[]) -// the write occurs before the instruction is executed at that cycle -// compare with s_TotalVUCycles -// if less than 0, already flushed -int s_writeQ, s_writeP; - -// declare the saved registers -uptr s_vu1esp, s_callstack; -uptr s_vuebx, s_vuedi, s_vu1esi; - -static int s_recWriteQ, s_recWriteP; // wait times during recompilation -static int s_needFlush; // first bit - Q, second bit - P, third bit - Q has been written, fourth bit - P has been written - -static int s_JumpX86; -static int s_ScheduleXGKICK = 0, s_XGKICKReg = -1; - -void recVUMI_XGKICK_(VURegs *VU); - -void SuperVUCleanupProgram(u32 startpc, int vuindex) -{ -#ifdef SUPERVU_COUNT - QueryPerformanceCounter(&svufinal); - svutime += (u32)(svufinal.QuadPart - svubase.QuadPart); -#endif - - VU = vuindex ? &VU1 : &VU0; - VU->cycle += s_TotalVUCycles; - - //VU cycle stealing hack, 3000 cycle maximum so it doesn't get out of hand - if (s_TotalVUCycles < 3000) - cpuRegs.cycle += s_TotalVUCycles * EmuConfig.Speedhacks.EECycleSkip; - else - cpuRegs.cycle += 3000 * EmuConfig.Speedhacks.EECycleSkip; - - if ((int)s_writeQ > 0) VU->VI[REG_Q] = VU->q; - if ((int)s_writeP > 0) - { - pxAssert(VU == &VU1); - VU1.VI[REG_P] = VU1.p; // only VU1 - } - - //memset(recVUStack, 0, SUPERVU_STACKSIZE * 4); - - // Could clear allocation info to prevent possibly bad data being used in other parts of pcsx2; - // not doing this because it's slow and not needed (rama) - // _initXMMregs(); - // _initX86regs(); -} - -#if defined(_MSC_VER) - -// entry point of all vu programs from emulator calls -__declspec(naked) void SuperVUExecuteProgram(u32 startpc, int vuindex) -{ - // Stackframe setup for the recompiler: - // We rewind the stack 4 bytes, which places the parameters of this function before - // any calls we might make from recompiled code. The return address for this function - // call is subsequently stored in s_callstack. - - __asm - { - mov eax, dword ptr [esp] - mov s_TotalVUCycles, 0 // necessary to be here! - add esp, 4 - mov s_callstack, eax - call SuperVUGetProgram - - // save cpu state - //mov s_vu1ebp, ebp - mov s_vu1esi, esi - mov s_vuedi, edi - mov s_vuebx, ebx - - mov s_vu1esp, esp - and esp, -16 // align stack for GCC compilance - - //stmxcsr s_ssecsr - ldmxcsr g_sseVUMXCSR - - // init vars - mov s_writeQ, 0xffffffff - mov s_writeP, 0xffffffff - - jmp eax - } -} - -// exit point of all vu programs -__declspec(naked) static void SuperVUEndProgram() -{ - __asm - { - // restore cpu state - ldmxcsr g_sseMXCSR - - //mov ebp, s_vu1ebp - mov esi, s_vu1esi - mov edi, s_vuedi - mov ebx, s_vuebx - - mov esp, s_vu1esp // restore from aligned stack - - call SuperVUCleanupProgram - jmp s_callstack // so returns correctly - } -} - -#endif - -// Flushes P/Q regs -void SuperVUFlush(int p, int wait) -{ - u8* pjmp[3] = { nullptr, nullptr, nullptr }; - if (!(s_needFlush&(1 << p))) return; - - int recwait = p ? s_recWriteP : s_recWriteQ; - if (!wait && s_pCurInst->info.cycle < recwait) return; - - if (recwait == 0) - { - // write didn't happen this block - xMOV(eax, ptr[(void*)(p ? (uptr)&s_writeP : (uptr)&s_writeQ)]); - xOR(eax, eax); - pjmp[0] = JS8(0); - - if (s_pCurInst->info.cycle) xSUB(eax, s_pCurInst->info.cycle); - - // if writeQ <= total+offset - if (!wait) // only write back if time is up - { - xCMP(eax, ptr[&s_TotalVUCycles]); - pjmp[1] = JG8(0); - } - else - { - // add (writeQ-total-offset) to s_TotalVUCycles - // necessary? - xCMP(eax, ptr[&s_TotalVUCycles]); - pjmp[2] = JLE8(0); - xMOV(ptr[&s_TotalVUCycles], eax); - x86SetJ8(pjmp[2]); - } - } - else if (wait && s_pCurInst->info.cycle < recwait) - { - xADD(ptr32[&s_TotalVUCycles], recwait); - } - - xMOV(eax, ptr[(void*)(SuperVUGetVIAddr(p ? REG_P : REG_Q, 0))]); - xMOV(ptr32[(u32*)(p ? (uptr)&s_writeP : (uptr)&s_writeQ)], 0x80000000); - xMOV(ptr[(void*)(SuperVUGetVIAddr(p ? REG_P : REG_Q, 1))], eax); - - if (recwait == 0) - { - if (!wait) x86SetJ8(pjmp[1]); - x86SetJ8(pjmp[0]); - } - - if (wait || (!p && recwait == 0 && s_pCurInst->info.cycle >= 12) || (!p && recwait > 0 && s_pCurInst->info.cycle >= recwait)) - s_needFlush &= ~(1 << p); -} - -// executed only once per program -static u32* SuperVUStaticAlloc(u32 size) -{ - pxAssert(recVUStackPtr[s_vu] + size <= recVUStack[s_vu] + SUPERVU_STACKSIZE); - // always zero - if (size == 4) *(u32*)recVUStackPtr[s_vu] = 0; - else memset(recVUStackPtr[s_vu], 0, size); - recVUStackPtr[s_vu] += size; - return (u32*)(recVUStackPtr[s_vu] - size); -} - -static void SuperVURecompile() -{ - // save cpu state - recVUStackPtr[s_vu] = recVUStack[s_vu]; - - _initXMMregs(); - - std::list::iterator itblock; - - for(itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); itblock++) - { - (*itblock)->type &= ~BLOCKTYPE_ANALYZED; - } - - s_listBlocks.front()->Recompile(); - - // make sure everything compiled - for(itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); itblock++) - { - pxAssert(((*itblock)->type & BLOCKTYPE_ANALYZED) && (*itblock)->pcode != NULL); - } - - // link all blocks - for(itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); itblock++) - { - VuBaseBlock::LISTBLOCKS::iterator itchild; - - pxAssert((*itblock)->blocks.size() <= ArraySize((*itblock)->pChildJumps)); - - int i = 0; - for(itchild = (*itblock)->blocks.begin(); itchild != (*itblock)->blocks.end(); itchild++) - { - - if ((u32)(uptr)(*itblock)->pChildJumps[i] == 0xffffffff) - continue; - - if ((*itblock)->pChildJumps[i] == NULL) - { - VuBaseBlock* pchild = *itchild; - - if (pchild->type & BLOCKTYPE_HASEOP) - { - pxAssert(pchild->blocks.empty()); - - xAND(ptr32[&VU0.VI[ REG_VPU_STAT ].UL], s_vu ? ~0x100 : ~0x001); // E flag - //xAND(ptr32[(&VU->GetVifRegs().stat)], ~VIF1_STAT_VEW); - - xMOV(ptr32[(&VU->VI[REG_TPC])], pchild->endpc); - JMP32((uptr)SuperVUEndProgram - ((uptr)x86Ptr + 5)); - } - // only other case is when there are two branches - else - { - pxAssert((*itblock)->insts.back().regs[0].pipe == VUPIPE_BRANCH); - } - - continue; - } - - if ((u32)(uptr)(*itblock)->pChildJumps[i] & 0x80000000) - { - // relative - pxAssert((uptr)(*itblock)->pChildJumps[i] <= 0xffffffff); - (*itblock)->pChildJumps[i] = (u32*)((uptr)(*itblock)->pChildJumps[i] & 0x7fffffff); - *(*itblock)->pChildJumps[i] = (uptr)(*itchild)->pcode - ((uptr)(*itblock)->pChildJumps[i] + 4); - } - else - { - *(*itblock)->pChildJumps[i] = (uptr)(*itchild)->pcode; - } - - ++i; - } - } - - s_pFnHeader->pprogfunc = s_listBlocks.front()->pcode; -} - -// debug - - -u32 s_saveecx, s_saveedx, s_saveebx, s_saveesi, s_saveedi, s_saveebp; -u32 g_curdebugvu; - -//float vuDouble(u32 f); - -#ifdef PCSX2_DEBUG -static void __fastcall svudispfn( int g_curdebugvu ) -{ - static u32 i; - - if (((vudump&8) && g_curdebugvu) || ((vudump&0x80) && !g_curdebugvu)) //&& g_vu1lastrec != g_vu1last ) { - { - - if (skipparent != g_vu1lastrec) - { - for (i = 0; i < ArraySize(badaddrs); ++i) - { - if (s_svulast == badaddrs[i][1] && g_vu1lastrec == badaddrs[i][0]) - break; - } - - if (i == ArraySize(badaddrs)) - { - //static int curesp; - //__asm mov curesp, esp - //Console.WriteLn("tVU: %x %x %x", s_svulast, s_vucount, s_vufnheader); - if (g_curdebugvu) iDumpVU1Registers(); - else iDumpVU0Registers(); - s_vucount++; - } - } - - g_vu1lastrec = s_svulast; - } -} -#endif - -// frees all regs taking into account the livevars -void SuperVUFreeXMMregs(u32* livevars) -{ - for (u32 i = 0; i < iREGCNT_XMM; ++i) - { - if (xmmregs[i].inuse) - { - // same reg - if ((xmmregs[i].mode & MODE_WRITE)) - { - -#ifdef SUPERVU_INTERCACHING - if (xmmregs[i].type == XMMTYPE_VFREG) - { - if (!(livevars[1] & (1 << xmmregs[i].reg))) continue; - } - else if (xmmregs[i].type == XMMTYPE_ACC) - { - if (!(livevars[0] & (1 << REG_ACC_FLAG))) continue; - } -#endif - - if (xmmregs[i].mode & MODE_VUXYZ) - { - // ALWAYS update - u32 addr = xmmregs[i].type == XMMTYPE_VFREG ? (uptr) & VU->VF[xmmregs[i].reg] : (uptr) & VU->ACC; - - if (xmmregs[i].mode & MODE_VUZ) - { - xMOVH.PS(ptr[(void*)(addr)], xRegisterSSE((x86SSERegType)i)); - xSHUF.PS(xRegisterSSE((x86SSERegType)i), ptr[(void*)(addr)], 0xc4); - } - else - { - xMOVH.PS(xRegisterSSE((x86SSERegType)i), ptr[(void*)(addr + 8)]); - } - - xmmregs[i].mode &= ~MODE_VUXYZ; - } - - _freeXMMreg(i); - } - } - } - - //_freeXMMregs(); -} - -static u32 runCycles = 0; // Cycles to Compare to for early exit -static u32 backupEAX = 0; // Backup EAX (not sure if this is needed) -void SuperVUTestVU0Condition(u32 incstack) -{ - if (s_vu && !SUPERVU_CHECKCONDITION) return; // vu0 only - - // sometimes games spin on vu0, so be careful with - // runCycles value... woody hangs if too high - // Edit: Need to test this again, if anyone ever has a "Woody" game :p - xMOV(ptr[&backupEAX], eax); - xMOV(eax, ptr[&s_TotalVUCycles]); - xCMP(eax, ptr[&runCycles]); - xMOV(eax, ptr[&backupEAX]); - - if (incstack) - { - u8* jptr = JB8(0); - xADD(esp, incstack); - //xCALL((void*)(u32)timeout); - JMP32((uptr)SuperVUEndProgram - ((uptr)x86Ptr + 5)); - - x86SetJ8(jptr); - } - else JAE32((uptr)SuperVUEndProgram - ((uptr)x86Ptr + 6)); -} - -void VuBaseBlock::Recompile() -{ - if (type & BLOCKTYPE_ANALYZED) return; - - x86Align(16); - pcode = x86Ptr; - -#ifdef PCSX2_DEBUG - xMOV(ptr32[&s_vufnheader], s_pFnHeader->startpc); - xMOV(ptr32[(&VU->VI[REG_TPC])], startpc); - xMOV(ptr32[&s_svulast], startpc); - - std::list::iterator itparent; - for (itparent = parents.begin(); itparent != parents.end(); ++itparent) - { - if ((*itparent)->blocks.size() == 1 && (*itparent)->blocks.front()->startpc == startpc && - ((*itparent)->insts.size() < 2 || (----(*itparent)->insts.end())->regs[0].pipe != VUPIPE_BRANCH)) - { - xMOV(ptr32[&skipparent], (*itparent)->startpc); - break; - } - } - - if (itparent == parents.end()) xMOV(ptr32[&skipparent], -1); - - xMOV( ecx, s_vu ); - xCALL( (void*)svudispfn ); -#endif - - s_pCurBlock = this; - s_needFlush = 3; - pc = startpc; - g_branch = 0; - s_recWriteQ = s_recWriteP = 0; - s_XGKICKReg = -1; - s_ScheduleXGKICK = 0; - - s_ClipRead = s_PrevClipWrite = (uptr) & VU->VI[REG_CLIP_FLAG]; - s_StatusRead = s_PrevStatusWrite = (uptr) & VU->VI[REG_STATUS_FLAG]; - s_MACRead = s_PrevMACWrite = (uptr) & VU->VI[REG_MAC_FLAG]; - s_PrevIWrite = (uptr) & VU->VI[REG_I]; - s_JumpX86 = 0; - s_UnconditionalDelay = 0; - - memcpy(xmmregs, startregs, sizeof(xmmregs)); -#ifdef SUPERVU_X86CACHING - if (nStartx86 >= 0) - memcpy(x86regs, &s_vecRegArray[nStartx86], sizeof(x86regs)); - else - _initX86regs(); -#else - _initX86regs(); -#endif - - std::list::iterator itinst; - for(itinst = insts.begin(); itinst != insts.end(); itinst++) - { - s_pCurInst = &(*itinst); - if (s_JumpX86 > 0) - { - if (!x86regs[s_JumpX86].inuse) - { - // load - s_JumpX86 = _allocX86reg(xEmptyReg, X86TYPE_VUJUMP, 0, MODE_READ); - } - x86regs[s_JumpX86].needed = 1; - } - - if (s_ScheduleXGKICK && s_XGKICKReg > 0) - { - pxAssert(x86regs[s_XGKICKReg].inuse); - x86regs[s_XGKICKReg].needed = 1; - } - itinst->Recompile(itinst, vuxyz); - - if (s_ScheduleXGKICK > 0) - { - if (s_ScheduleXGKICK-- == 1) - { - recVUMI_XGKICK_(VU); - } - } - } - pxAssert(pc == endpc); - pxAssert(s_ScheduleXGKICK == 0); - - // flush flags - if (s_PrevClipWrite != (uptr)&VU->VI[REG_CLIP_FLAG]) - { - xMOV(eax, ptr[(void*)(s_PrevClipWrite)]); - xMOV(ptr[(&VU->VI[REG_CLIP_FLAG])], eax); - } - if (s_PrevStatusWrite != (uptr)&VU->VI[REG_STATUS_FLAG]) - { - xMOV(eax, ptr[(void*)(s_PrevStatusWrite)]); - xMOV(ptr[(&VU->VI[REG_STATUS_FLAG])], eax); - } - if (s_PrevMACWrite != (uptr)&VU->VI[REG_MAC_FLAG]) - { - xMOV(eax, ptr[(void*)(s_PrevMACWrite)]); - xMOV(ptr[(&VU->VI[REG_MAC_FLAG])], eax); - } -// if( s_StatusRead != (uptr)&VU->VI[REG_STATUS_FLAG] ) { -// // only lower 8 bits valid! -// xMOVZX(eax, ptr8[(u8*)(s_StatusRead)]); -// xMOV(ptr[(&VU->VI[REG_STATUS_FLAG])], eax); -// } -// if( s_MACRead != (uptr)&VU->VI[REG_MAC_FLAG] ) { -// // only lower 8 bits valid! -// xMOVZX(eax, ptr8[(u8*)(s_MACRead)]); -// xMOV(ptr[(&VU->VI[REG_MAC_FLAG])], eax); -// } - if (s_PrevIWrite != (uptr)&VU->VI[REG_I]) - { - xMOV(ptr32[(&VU->VI[REG_I])], *(u32*)s_PrevIWrite); // never changes - } - - xADD(ptr32[&s_TotalVUCycles], cycles); - - // compute branches, jumps, eop - if (type & BLOCKTYPE_HASEOP) - { - // end - _freeXMMregs(); - _freeX86regs(); - xAND(ptr32[&VU0.VI[ REG_VPU_STAT ].UL], s_vu ? ~0x100 : ~0x001); // E flag - //xAND(ptr32[(&VU->GetVifRegs().stat)], ~VIF1_STAT_VEW); - - if (!g_branch) xMOV(ptr32[(&VU->VI[REG_TPC])], endpc); - - JMP32((uptr)SuperVUEndProgram - ((uptr)x86Ptr + 5)); - } - else - { - - u32 livevars[2] = {0}; - - std::list::iterator lastinst = GetInstIterAtPc(endpc - 8); - lastinst++; - - if (lastinst != insts.end()) - { - livevars[0] = lastinst->livevars[0]; - livevars[1] = lastinst->livevars[1]; - } - else - { - // take from children - if (!blocks.empty()) - { - LISTBLOCKS::iterator itchild; - for(itchild = blocks.begin(); itchild != blocks.end(); itchild++) - { - livevars[0] |= (*itchild)->insts.front().livevars[0]; - livevars[1] |= (*itchild)->insts.front().livevars[1]; - } - } - else - { - livevars[0] = ~0; - livevars[1] = ~0; - } - } - - SuperVUFreeXMMregs(livevars); - - // get rid of any writes, otherwise _freeX86regs will write - x86regs[s_JumpX86].mode &= ~MODE_WRITE; - - if (g_branch == 1) - { - if (!x86regs[s_JumpX86].inuse) - { - pxAssert(x86regs[s_JumpX86].type == X86TYPE_VUJUMP); - s_JumpX86 = -1; // notify to jump from g_recWriteback - } - } - - // align VI regs -#ifdef SUPERVU_X86CACHING - if (nEndx86 >= 0) - { - _x86regs* endx86 = &s_vecRegArray[nEndx86]; - for (int i = 0; i < iREGCNT_GPR; ++i) - { - if( i == ESP || i == EBP ) continue; - - if (endx86[i].inuse) - { - - if (s_JumpX86 == i && x86regs[s_JumpX86].inuse) - { - x86regs[s_JumpX86].inuse = 0; - x86regs[eax.GetId()].inuse = 1; - xMOV(eax, xRegister32(s_JumpX86)); - s_JumpX86 = eax.GetId(); - } - - if (x86regs[i].inuse) - { - if (x86regs[i].type == endx86[i].type && x86regs[i].reg == endx86[i].reg) - { - _freeX86reg(i); - // will continue to use it - continue; - } - -#ifdef SUPERVU_INTERCACHING - if (x86regs[i].type == (X86TYPE_VI | (s_vu ? X86TYPE_VU1 : 0))) - { - if (livevars[0] & (1 << x86regs[i].reg)) - _freeX86reg(i); - else - x86regs[i].inuse = 0; - } - else -#endif - { - _freeX86reg(i); - } - } - - // realloc - _allocX86reg(i, endx86[i].type, endx86[i].reg, MODE_READ); - if (x86regs[i].mode & MODE_WRITE) - { - _freeX86reg(i); - x86regs[i].inuse = 1; - } - } - else _freeX86reg(i); - } - } - else _freeX86regs(); -#else - _freeX86regs(); -#endif - - // store the last block executed - xMOV(ptr32[&g_nLastBlockExecuted], s_pCurBlock->startpc); - - switch (g_branch) - { - case 1: // branch, esi has new prog - - SuperVUTestVU0Condition(0); - - if (s_JumpX86 == -1) - xJMP(ptr32[&g_recWriteback]); - else - xJMP(xRegister32(s_JumpX86)); - - break; - case 4: // jalr - pChildJumps[0] = (u32*)0xffffffff; - // fall through - - case 0x10: // jump, esi has new vupc - { - _freeXMMregs(); - _freeX86regs(); - - SuperVUTestVU0Condition(8); - - // already onto stack - xCALL((void*)(uptr)SuperVUGetProgram); - xADD(esp, 8); - xJMP(eax); - break; - } - - case 0x13: // jr with uncon branch, uncond branch takes precendence (dropship) - { -// s32 delta = (s32)(VU->code & 0x400 ? 0xfffffc00 | (VU->code & 0x3ff) : VU->code & 0x3ff) << 3; -// ADD32ItoRmOffset(ESP, delta, 0); - - xADD(esp, 8); // restore - pChildJumps[0] = (u32*)((uptr)JMP32(0) | 0x80000000); - break; - } - case 0: - case 3: // unconditional branch - pChildJumps[s_UnconditionalDelay] = (u32*)((uptr)JMP32(0) | 0x80000000); - break; - - default: - DevCon.Error("Bad branch %x\n", g_branch); - pxAssert(0); - break; - } - } - - pendcode = x86Ptr; - type |= BLOCKTYPE_ANALYZED; - - LISTBLOCKS::iterator itchild; - for(itchild = blocks.begin(); itchild != blocks.end(); itchild++) - { - (*itchild)->Recompile(); - } -} - -#define GET_VUXYZMODE(reg) 0//((vuxyz&(1<<(reg)))?MODE_VUXYZ:0) - -int VuInstruction::SetCachedRegs(int upper, u32 vuxyz) -{ - if (vfread0[upper] >= 0) - { - SuperVUFreeXMMreg(vfread0[upper], XMMTYPE_VFREG, regs[upper].VFread0); - _allocVFtoXMMreg(VU, vfread0[upper], regs[upper].VFread0, MODE_READ | GET_VUXYZMODE(regs[upper].VFread0)); - } - if (vfread1[upper] >= 0) - { - SuperVUFreeXMMreg(vfread1[upper], XMMTYPE_VFREG, regs[upper].VFread1); - _allocVFtoXMMreg(VU, vfread1[upper], regs[upper].VFread1, MODE_READ | GET_VUXYZMODE(regs[upper].VFread1)); - } - if (vfacc[upper] >= 0 && (regs[upper].VIread&(1 << REG_ACC_FLAG))) - { - SuperVUFreeXMMreg(vfacc[upper], XMMTYPE_ACC, 0); - _allocACCtoXMMreg(VU, vfacc[upper], MODE_READ); - } - if (vfwrite[upper] >= 0) - { - pxAssert(regs[upper].VFwrite > 0); - SuperVUFreeXMMreg(vfwrite[upper], XMMTYPE_VFREG, regs[upper].VFwrite); - _allocVFtoXMMreg(VU, vfwrite[upper], regs[upper].VFwrite, - MODE_WRITE | (regs[upper].VFwxyzw != 0xf ? MODE_READ : 0) | GET_VUXYZMODE(regs[upper].VFwrite)); - } - if (vfacc[upper] >= 0 && (regs[upper].VIwrite&(1 << REG_ACC_FLAG))) - { - SuperVUFreeXMMreg(vfacc[upper], XMMTYPE_ACC, 0); - _allocACCtoXMMreg(VU, vfacc[upper], MODE_WRITE | (regs[upper].VFwxyzw != 0xf ? MODE_READ : 0)); - } - - int info = PROCESS_VU_SUPER; - if (vfread0[upper] >= 0) info |= PROCESS_EE_SET_S(vfread0[upper]); - if (vfread1[upper] >= 0) info |= PROCESS_EE_SET_T(vfread1[upper]); - if (vfacc[upper] >= 0) info |= PROCESS_VU_SET_ACC(vfacc[upper]); - if (vfwrite[upper] >= 0) - { - if (regs[upper].VFwrite == _Ft_ && vfread1[upper] < 0) - { - info |= PROCESS_EE_SET_T(vfwrite[upper]); - } - else - { - pxAssert(regs[upper].VFwrite == _Fd_); - info |= PROCESS_EE_SET_D(vfwrite[upper]); - } - } - - if (!(vffree[upper]&VFFREE_INVALID0)) - { - SuperVUFreeXMMreg(vffree[upper]&0xf, XMMTYPE_TEMP, 0); - _allocTempXMMreg(XMMT_FPS, vffree[upper]&0xf); - } - info |= PROCESS_VU_SET_TEMP(vffree[upper] & 0xf); - - if (vfflush[upper] >= 0) - { - SuperVUFreeXMMreg(vfflush[upper], XMMTYPE_TEMP, 0); - _allocTempXMMreg(XMMT_FPS, vfflush[upper]); - } - - if (upper && (regs[upper].VIwrite & (1 << REG_CLIP_FLAG))) - { - // CLIP inst, need two extra temp registers, put it EEREC_D and EEREC_ACC - pxAssert(vfwrite[upper] == -1); - SuperVUFreeXMMreg((vffree[upper] >> 8)&0xf, XMMTYPE_TEMP, 0); - _allocTempXMMreg(XMMT_FPS, (vffree[upper] >> 8)&0xf); - info |= PROCESS_EE_SET_D((vffree[upper] >> 8) & 0xf); - - SuperVUFreeXMMreg((vffree[upper] >> 16)&0xf, XMMTYPE_TEMP, 0); - _allocTempXMMreg(XMMT_FPS, (vffree[upper] >> 16)&0xf); - info |= PROCESS_EE_SET_ACC((vffree[upper] >> 16) & 0xf); - - _freeXMMreg((vffree[upper] >> 8)&0xf); // don't need anymore - _freeXMMreg((vffree[upper] >> 16)&0xf); // don't need anymore - } - else if (regs[upper].VIwrite & (1 << REG_P)) - { - SuperVUFreeXMMreg((vffree[upper] >> 8)&0xf, XMMTYPE_TEMP, 0); - _allocTempXMMreg(XMMT_FPS, (vffree[upper] >> 8)&0xf); - info |= PROCESS_EE_SET_D((vffree[upper] >> 8) & 0xf); - _freeXMMreg((vffree[upper] >> 8)&0xf); // don't need anymore - } - - if (vfflush[upper] >= 0) _freeXMMreg(vfflush[upper]); - if (!(vffree[upper]&VFFREE_INVALID0)) - _freeXMMreg(vffree[upper]&0xf); // don't need anymore - - if ((regs[0].VIwrite | regs[1].VIwrite) & ((1 << REG_STATUS_FLAG) | (1 << REG_MAC_FLAG))) - info |= PROCESS_VU_UPDATEFLAGS; - - return info; -} - -void VuInstruction::Recompile(std::list::iterator& itinst, u32 vuxyz) -{ - //static PCSX2_ALIGNED16(VECTOR _VF); - //static PCSX2_ALIGNED16(VECTOR _VFc); - u32 *code_ptr; - u8* pjmp; - int vfregstore = 0; - - pxAssert(s_pCurInst == this); - s_WriteToReadQ = 0; - - code_ptr = (u32*) & VU->Micro[ pc ]; - - if (type & INST_Q_READ) - SuperVUFlush(0, (code_ptr[0] == 0x800003bf) || !!(regs[0].VIwrite & (1 << REG_Q))); - if (type & INST_P_READ) - SuperVUFlush(1, (code_ptr[0] == 0x800007bf) || !!(regs[0].VIwrite & (1 << REG_P))); - - if (type & INST_DUMMY) - { - - // find nParentPc - VuInstruction* pparentinst = NULL; - - // if true, will check if parent block was executed before getting the results of the flags (superman returns) - int nParentCheckForExecution = -1; - -// int badaddrs[] = { -// 0x60,0x68,0x70,0x60,0x68,0x70,0x88,0x90,0x98,0x98,0xa8,0xb8,0x88,0x90, -// 0x4a8,0x4a8,0x398,0x3a0,0x3a8,0xa0 -// }; - -#ifdef SUPERVU_PROPAGATEFLAGS - if (nParentPc != -1 && (nParentPc < s_pCurBlock->startpc || nParentPc >= (int)pc)) - { - -// if( !s_vu ) { -// for(int j = 0; j < ARRAYSIZE(badaddrs); ++j) { -// if( badaddrs[j] == nParentPc ) -// goto NoParent; -// } -// } - - std::list::iterator itblock; - for(itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); itblock++) - { - if (nParentPc >= (*itblock)->startpc && nParentPc < (*itblock)->endpc) - { - pparentinst = &(*(*itblock)->GetInstIterAtPc(nParentPc)); - //if( !s_vu ) SysPrintf("%x ", nParentPc); - if (find(s_pCurBlock->parents.begin(), s_pCurBlock->parents.end(), *itblock) != s_pCurBlock->parents.end()) - nParentCheckForExecution = (*itblock)->startpc; - break; - } - } - - pxAssert(pparentinst != NULL); - } -#endif - - if (type & INST_CLIP_WRITE) - { - if (nParentPc < s_pCurBlock->startpc || nParentPc >= (int)pc) - { - - if (!CHECK_VUCLIPFLAGHACK && pparentinst != NULL) - { - - if (nParentCheckForExecution >= 0) - { - if (pparentinst->pClipWrite == 0) - pparentinst->pClipWrite = (uptr)SuperVUStaticAlloc(4); - - if (s_ClipRead == 0) - s_ClipRead = (uptr) & VU->VI[REG_CLIP_FLAG]; - - xCMP(ptr32[&g_nLastBlockExecuted], nParentCheckForExecution); - u8* jptr = JNE8(0); - xCMP(ptr32[&s_ClipRead], (uptr)&VU->VI[REG_CLIP_FLAG]); - u8* jptr2 = JE8(0); - xMOV(eax, ptr[(void*)(pparentinst->pClipWrite)]); - xMOV(ptr[(void*)(s_ClipRead)], eax); - x86SetJ8(jptr); - x86SetJ8(jptr2); - } - } - else s_ClipRead = (uptr) & VU->VI[REG_CLIP_FLAG]; - } - else - { - s_ClipRead = s_pCurBlock->GetInstIterAtPc(nParentPc)->pClipWrite; - if (s_ClipRead == 0) Console.WriteLn("super ClipRead allocation error!"); - } - } - - // before modifying, check if they will ever be read - if (s_pCurBlock->type & BLOCKTYPE_MACFLAGS) - { - - u8 outofblock = 0; - if (type & INST_STATUS_WRITE) - { - - if (nParentPc < s_pCurBlock->startpc || nParentPc >= (int)pc) - { - - // reading from out of this block, so already flushed to mem - if (pparentinst != NULL) //&& pparentinst->pStatusWrite != NULL ) { - { - - // might not have processed it yet, so reserve a mem loc - if (pparentinst->pStatusWrite == 0) - { - pparentinst->pStatusWrite = (uptr)SuperVUStaticAlloc(4); - //xMOV(ptr32[(u32*)(pparentinst->pStatusWrite)], 0); - } - -// if( s_pCurBlock->prevFlagsOutOfBlock && s_StatusRead != NULL ) { -// // or instead since don't now which parent we came from -// xMOV(eax, ptr[(void*)(pparentinst->pStatusWrite)]); -// xOR(ptr[(void*)(s_StatusRead)], eax); -// xMOV(ptr32[(u32*)(pparentinst->pStatusWrite)], 0); -// } - - if (nParentCheckForExecution >= 0) - { - - // don't now which parent we came from, so have to check -// uptr tempstatus = (uptr)SuperVUStaticAlloc(4); -// if( s_StatusRead != NULL ) -// xMOV(eax, ptr[(void*)(s_StatusRead)]); -// else -// xMOV(eax, ptr[(&VU->VI[REG_STATUS_FLAG])]); -// s_StatusRead = tempstatus; - - if (s_StatusRead == 0) - s_StatusRead = (uptr) & VU->VI[REG_STATUS_FLAG]; - - xCMP(ptr32[&g_nLastBlockExecuted], nParentCheckForExecution); - u8* jptr = JNE8(0); - xMOV(eax, ptr[(void*)(pparentinst->pStatusWrite)]); - xMOV(ptr32[(u32*)(pparentinst->pStatusWrite)], 0); - xMOV(ptr[(void*)(s_StatusRead)], eax); - x86SetJ8(jptr); - } - else - { - uptr tempstatus = (uptr)SuperVUStaticAlloc(4); - xMOV(eax, ptr[(void*)(pparentinst->pStatusWrite)]); - xMOV(ptr[(void*)(tempstatus)], eax); - xMOV(ptr32[(u32*)(pparentinst->pStatusWrite)], 0); - s_StatusRead = tempstatus; - } - - outofblock = 2; - } - else - s_StatusRead = (uptr) & VU->VI[REG_STATUS_FLAG]; - } - else - { - s_StatusRead = s_pCurBlock->GetInstIterAtPc(nParentPc)->pStatusWrite; - if (s_StatusRead == 0) Console.WriteLn("super StatusRead allocation error!"); -// if( pc >= (u32)s_pCurBlock->endpc-8 ) { -// // towards the end, so variable might be leaded to another block (silent hill 4) -// uptr tempstatus = (uptr)SuperVUStaticAlloc(4); -// xMOV(eax, ptr[(void*)(s_StatusRead)]); -// xMOV(ptr[(void*)(tempstatus)], eax); -// xMOV(ptr32[(u32*)(s_StatusRead)], 0); -// s_StatusRead = tempstatus; -// } - } - } - if (type & INST_MAC_WRITE) - { - - if (nParentPc < s_pCurBlock->startpc || nParentPc >= (int)pc) - { - // reading from out of this block, so already flushed to mem - - if (pparentinst != NULL) //&& pparentinst->pMACWrite != NULL ) { - { - // necessary for (katamari) - // towards the end, so variable might be leaked to another block (silent hill 4) - - // might not have processed it yet, so reserve a mem loc - if (pparentinst->pMACWrite == 0) - { - pparentinst->pMACWrite = (uptr)SuperVUStaticAlloc(4); - //xMOV(ptr32[(u32*)(pparentinst->pMACWrite)], 0); - } - -// if( s_pCurBlock->prevFlagsOutOfBlock && s_MACRead != NULL ) { -// // or instead since don't now which parent we came from -// xMOV(eax, ptr[(void*)(pparentinst->pMACWrite)]); -// xOR(ptr[(void*)(s_MACRead)], eax); -// xMOV(ptr32[(u32*)(pparentinst->pMACWrite)], 0); -// } - if (nParentCheckForExecution >= 0) - { - - // don't now which parent we came from, so have to check -// uptr tempmac = (uptr)SuperVUStaticAlloc(4); -// if( s_MACRead != NULL ) -// xMOV(eax, ptr[(void*)(s_MACRead)]); -// else -// xMOV(eax, ptr[(&VU->VI[REG_MAC_FLAG])]); -// s_MACRead = tempmac; - - if (s_MACRead == 0) s_MACRead = (uptr) & VU->VI[REG_MAC_FLAG]; - - xCMP(ptr32[&g_nLastBlockExecuted], nParentCheckForExecution); - u8* jptr = JNE8(0); - xMOV(eax, ptr[(void*)(pparentinst->pMACWrite)]); - xMOV(ptr32[(u32*)(pparentinst->pMACWrite)], 0); - xMOV(ptr[(void*)(s_MACRead)], eax); - x86SetJ8(jptr); - } - else - { - uptr tempMAC = (uptr)SuperVUStaticAlloc(4); - xMOV(eax, ptr[(void*)(pparentinst->pMACWrite)]); - xMOV(ptr[(void*)(tempMAC)], eax); - xMOV(ptr32[(u32*)(pparentinst->pMACWrite)], 0); - s_MACRead = tempMAC; - } - - outofblock = 2; - } - else - s_MACRead = (uptr) & VU->VI[REG_MAC_FLAG]; - -// if( pc >= (u32)s_pCurBlock->endpc-8 ) { -// // towards the end, so variable might be leaked to another block (silent hill 4) -// uptr tempMAC = (uptr)SuperVUStaticAlloc(4); -// xMOV(eax, ptr[(void*)(s_MACRead)]); -// xMOV(ptr[(void*)(tempMAC)], eax); -// xMOV(ptr32[(u32*)(s_MACRead)], 0); -// s_MACRead = tempMAC; -// } - } - else - { - s_MACRead = s_pCurBlock->GetInstIterAtPc(nParentPc)->pMACWrite; - } - } - - s_pCurBlock->prevFlagsOutOfBlock = outofblock; - } - else if (pparentinst != NULL) - { - // make sure to reset the mac and status flags! (katamari) - if (pparentinst->pStatusWrite) - xMOV(ptr32[(u32*)(pparentinst->pStatusWrite)], 0); - if (pparentinst->pMACWrite) - xMOV(ptr32[(u32*)(pparentinst->pMACWrite)], 0); - } - - pxAssert(s_ClipRead != 0); - pxAssert(s_MACRead != 0); - pxAssert(s_StatusRead != 0); - - return; - } - - s_pCurBlock->prevFlagsOutOfBlock = 0; - - if( IsDebugBuild ) - xMOV(eax, pc); - - pxAssert(!(type & (INST_CLIP_WRITE | INST_STATUS_WRITE | INST_MAC_WRITE))); - pc += 8; - - std::list::const_iterator itinst2; - - if ((regs[0].VIwrite | regs[1].VIwrite) & ((1 << REG_MAC_FLAG) | (1 << REG_STATUS_FLAG))) - { - if (s_pCurBlock->type & BLOCKTYPE_MACFLAGS) - { - if (pMACWrite == 0) - { - pMACWrite = (uptr)SuperVUStaticAlloc(4); - //xMOV(ptr32[(u32*)(pMACWrite)], 0); - } - if (pStatusWrite == 0) - { - pStatusWrite = (uptr)SuperVUStaticAlloc(4); - //xMOV(ptr32[(u32*)(pStatusWrite)], 0); - } - } - else - { - pxAssert(s_StatusRead == (uptr)&VU->VI[REG_STATUS_FLAG]); - pxAssert(s_MACRead == (uptr)&VU->VI[REG_MAC_FLAG]); - pMACWrite = s_MACRead; - pStatusWrite = s_StatusRead; - } - } - - if ((pClipWrite == 0) && ((regs[0].VIwrite | regs[1].VIwrite) & (1 << REG_CLIP_FLAG))) - { - pClipWrite = (uptr)SuperVUStaticAlloc(4); - //xMOV(ptr32[(u32*)(pClipWrite)], 0); - } - -#ifdef SUPERVU_X86CACHING - // redo the counters so that the proper regs are released - for (int j = 0; j < iREGCNT_GPR; ++j) - { - if (x86regs[j].inuse && X86_ISVI(x86regs[j].type)) - { - int count = 0; - itinst2 = itinst; - - while (itinst2 != s_pCurBlock->insts.end()) - { - if ((itinst2->regs[0].VIread | itinst2->regs[0].VIwrite | itinst2->regs[1].VIread | itinst2->regs[1].VIwrite) && (1 << x86regs[j].reg)) - break; - - ++count; - ++itinst2; - } - - x86regs[j].counter = 1000 - count; - } - } -#endif - - if (s_vu == 0 && (code_ptr[1] & 0x20000000)) // M flag - { - xOR(ptr8[(u8*)((uptr)&VU->flags)], VUFLAG_MFLAGSET); - } - if (code_ptr[1] & 0x10000000) // D flag - { - xTEST(ptr32[&VU0.VI[REG_FBRST].UL], s_vu ? 0x400 : 0x004); - u8* jptr = JZ8(0); - xOR(ptr32[&VU0.VI[REG_VPU_STAT].UL], s_vu ? 0x200 : 0x002); - xMOV( ecx, s_vu ? INTC_VU1 : INTC_VU0 ); - xCALL( (void*)hwIntcIrq ); - x86SetJ8(jptr); - } - if (code_ptr[1] & 0x08000000) // T flag - { - xTEST(ptr32[&VU0.VI[REG_FBRST].UL], s_vu ? 0x800 : 0x008); - u8* jptr = JZ8(0); - xOR(ptr32[&VU0.VI[REG_VPU_STAT].UL], s_vu ? 0x400 : 0x004); - xMOV( ecx, s_vu ? INTC_VU1 : INTC_VU0 ); - xCALL( (void*)hwIntcIrq ); - x86SetJ8(jptr); - } - - // check upper flags - if (code_ptr[1] & 0x80000000) // I flag - { - - pxAssert(!(regs[0].VIwrite & ((1 << REG_Q) | (1 << REG_P)))); - - VU->code = code_ptr[1]; - s_vuInfo = SetCachedRegs(1, vuxyz); - if (s_JumpX86 > 0) x86regs[s_JumpX86].needed = 1; - if (s_ScheduleXGKICK && s_XGKICKReg > 0) x86regs[s_XGKICKReg].needed = 1; - - recVU_UPPER_OPCODE[ VU->code & 0x3f ](VU, s_vuInfo); - - s_PrevIWrite = (uptr)code_ptr; - _clearNeededXMMregs(); - _clearNeededX86regs(); - } - else - { - if (regs[0].VIwrite & (1 << REG_Q)) - { - - // search for all the insts between this inst and writeback - itinst2 = itinst; - ++itinst2; - u32 cacheq = (itinst2 == s_pCurBlock->insts.end()); - u32* codeptr2 = code_ptr + 2; - - while (itinst2 != s_pCurBlock->insts.end()) - { - if (!(itinst2->type & INST_DUMMY) && ((itinst2->regs[0].VIwrite&(1 << REG_Q)) || codeptr2[0] == 0x800003bf)) // waitq, or fdiv inst - { - break; - } - if ((itinst2->type & INST_Q_WRITE) && itinst2->nParentPc == (int)pc - 8) - { - break; - } - if (itinst2->type & INST_Q_READ) - { - cacheq = 1; - break; - } - if (itinst2->type & INST_DUMMY) - { - ++itinst2; - continue; - } - codeptr2 += 2; - ++itinst2; - } - - if (itinst2 == s_pCurBlock->insts.end()) - cacheq = 1; - - int x86temp = -1; - if (cacheq) - x86temp = _allocX86reg(xEmptyReg, X86TYPE_TEMP, 0, 0); - - // new is written so flush old - // if type & INST_Q_READ, already flushed - if (!(type & INST_Q_READ) && s_recWriteQ == 0) xMOV(eax, ptr[&s_writeQ]); - - if (cacheq) - xMOV(xRegister32(x86temp), ptr[&s_TotalVUCycles]); - - if (!(type & INST_Q_READ)) - { - if (s_recWriteQ == 0) - { - xOR(eax, eax); - pjmp = JS8(0); - xMOV(eax, ptr[(void*)(SuperVUGetVIAddr(REG_Q, 0))]); - xMOV(ptr[(void*)(SuperVUGetVIAddr(REG_Q, 1))], eax); - x86SetJ8(pjmp); - } - else if (s_needFlush & 1) - { - xMOV(eax, ptr[(void*)(SuperVUGetVIAddr(REG_Q, 0))]); - xMOV(ptr[(void*)(SuperVUGetVIAddr(REG_Q, 1))], eax); - s_needFlush &= ~1; - } - } - - // write new Q - if (cacheq) - { - pxAssert(s_pCurInst->pqcycles > 1); - xADD(xRegister32(x86temp), s_pCurInst->info.cycle + s_pCurInst->pqcycles); - xMOV(ptr[&s_writeQ], xRegister32(x86temp)); - s_needFlush |= 1; - } - else - { - // won't be writing back - s_WriteToReadQ = 1; - s_needFlush &= ~1; - xMOV(ptr32[&s_writeQ], 0x80000001); - } - - s_recWriteQ = s_pCurInst->info.cycle + s_pCurInst->pqcycles; - - if (x86temp >= 0) - _freeX86reg(x86temp); - } - - if (regs[0].VIwrite & (1 << REG_P)) - { - int x86temp = _allocX86reg(xEmptyReg, X86TYPE_TEMP, 0, 0); - - // new is written so flush old - if (!(type & INST_P_READ) && s_recWriteP == 0) - xMOV(eax, ptr[&s_writeP]); - xMOV(xRegister32(x86temp), ptr[&s_TotalVUCycles]); - - if (!(type & INST_P_READ)) - { - if (s_recWriteP == 0) - { - xOR(eax, eax); - pjmp = JS8(0); - xMOV(eax, ptr[(void*)(SuperVUGetVIAddr(REG_P, 0))]); - xMOV(ptr[(void*)(SuperVUGetVIAddr(REG_P, 1))], eax); - x86SetJ8(pjmp); - } - else if (s_needFlush & 2) - { - xMOV(eax, ptr[(void*)(SuperVUGetVIAddr(REG_P, 0))]); - xMOV(ptr[(void*)(SuperVUGetVIAddr(REG_P, 1))], eax); - s_needFlush &= ~2; - } - } - - // write new P - pxAssert(s_pCurInst->pqcycles > 1); - xADD(xRegister32(x86temp), s_pCurInst->info.cycle + s_pCurInst->pqcycles); - xMOV(ptr[&s_writeP], xRegister32(x86temp)); - s_needFlush |= 2; - - s_recWriteP = s_pCurInst->info.cycle + s_pCurInst->pqcycles; - - _freeX86reg(x86temp); - } - - // waitq - if (code_ptr[0] == 0x800003bf) SuperVUFlush(0, 1); - // waitp - if (code_ptr[0] == 0x800007bf) SuperVUFlush(1, 1); - -#ifdef PCSX2_DEVBUILD - if (regs[1].VIread & regs[0].VIwrite & ~((1 << REG_Q) | (1 << REG_P) | (1 << REG_VF0_FLAG) | (1 << REG_ACC_FLAG))) - { - Console.Warning("*PCSX2*: Warning, VI write to the same reg %x in both lower/upper cycle %x", regs[1].VIread & regs[0].VIwrite, s_pCurBlock->startpc); - } -#endif - - u32 modewrite = 0; - if (vfwrite[1] >= 0 && xmmregs[vfwrite[1]].inuse && xmmregs[vfwrite[1]].type == XMMTYPE_VFREG && xmmregs[vfwrite[1]].reg == regs[1].VFwrite) - modewrite = xmmregs[vfwrite[1]].mode & MODE_WRITE; - - VU->code = code_ptr[1]; - s_vuInfo = SetCachedRegs(1, vuxyz); - - if (vfwrite[1] >= 0) - { - pxAssert(regs[1].VFwrite > 0); - - if (vfwrite[0] == vfwrite[1]) - { - //Console.WriteLn("*PCSX2*: Warning, VF write to the same reg in both lower/upper cycle %x", s_pCurBlock->startpc); - } - - if (vfread0[0] == vfwrite[1] || vfread1[0] == vfwrite[1]) - { - pxAssert(regs[0].VFread0 == regs[1].VFwrite || regs[0].VFread1 == regs[1].VFwrite); - pxAssert(vfflush[0] >= 0); - if (modewrite) - { - xMOVAPS(ptr[(&VU->VF[regs[1].VFwrite])], xRegisterSSE((x86SSERegType)vfwrite[1])); - } - vfregstore = 1; - } - } - - if (s_JumpX86 > 0) x86regs[s_JumpX86].needed = 1; - if (s_ScheduleXGKICK && s_XGKICKReg > 0) x86regs[s_XGKICKReg].needed = 1; - - recVU_UPPER_OPCODE[ VU->code & 0x3f ](VU, s_vuInfo); - _clearNeededXMMregs(); - _clearNeededX86regs(); - - // necessary because status can be set by both upper and lower - if (regs[1].VIwrite & (1 << REG_STATUS_FLAG)) - { - pxAssert(pStatusWrite != 0); - s_PrevStatusWrite = pStatusWrite; - } - - VU->code = code_ptr[0]; - s_vuInfo = SetCachedRegs(0, vuxyz); - - if (vfregstore) - { - // load - xMOVAPS(xRegisterSSE(vfflush[0]), ptr[(&VU->VF[regs[1].VFwrite])]); - - pxAssert(xmmregs[vfwrite[1]].mode & MODE_WRITE); - - // replace with vfflush - if (_Fs_ == regs[1].VFwrite) - { - s_vuInfo &= ~PROCESS_EE_SET_S(0xf); - s_vuInfo |= PROCESS_EE_SET_S(vfflush[0]); - } - if (_Ft_ == regs[1].VFwrite) - { - s_vuInfo &= ~PROCESS_EE_SET_T(0xf); - s_vuInfo |= PROCESS_EE_SET_T(vfflush[0]); - } - - xmmregs[vfflush[0]].mode |= MODE_NOFLUSH | MODE_WRITE; // so that lower inst doesn't flush - } - - // notify vuinsts that upper inst is a fmac - if (regs[1].pipe == VUPIPE_FMAC) - s_vuInfo |= PROCESS_VU_SET_FMAC(); - - if (s_JumpX86 > 0) x86regs[s_JumpX86].needed = 1; - if (s_ScheduleXGKICK && s_XGKICKReg > 0) x86regs[s_XGKICKReg].needed = 1; - -#ifdef SUPERVU_VIBRANCHDELAY - if (type & INST_CACHE_VI) - { - pxAssert(vicached >= 0); - int cachedreg = _allocX86reg(xEmptyReg, X86TYPE_VI | (s_vu ? X86TYPE_VU1 : 0), vicached, MODE_READ); - xMOV(ptr[&s_VIBranchDelay], xRegister32(cachedreg)); - } -#endif - - // check if inst before branch and the write is the same as the read in the branch (wipeout) -// int oldreg=0; -// if( pc == s_pCurBlock->endpc-16 ) { -// itinst2 = itinst; ++itinst2; -// if( itinst2->regs[0].pipe == VUPIPE_BRANCH && (itinst->regs[0].VIwrite&itinst2->regs[0].VIread) ) { -// -// xCALL((void*)(u32)branchfn); -// pxAssert( itinst->regs[0].VIwrite & 0xffff ); -// Console.WriteLn("vi write before branch"); -// for(s_CacheVIReg = 0; s_CacheVIReg < 16; ++s_CacheVIReg) { -// if( itinst->regs[0].VIwrite & (1<endpc-8 && s_CacheVIReg >= 0 ) { -// pxAssert( s_CacheVIX86 > 0 && x86regs[s_CacheVIX86].inuse && x86regs[s_CacheVIX86].reg == s_CacheVIReg && x86regs[s_CacheVIX86].type == X86TYPE_VITEMP ); -// -// oldreg = _allocX86reg(xEmptyReg, X86TYPE_VI|(s_vu?X86TYPE_VU1:0), s_CacheVIReg, MODE_READ); -// x86regs[s_CacheVIX86].needed = 1; -// pxAssert( x86regs[oldreg].mode & MODE_WRITE ); -// -// x86regs[s_CacheVIX86].type = X86TYPE_VI|(s_vu?X86TYPE_VU1:0); -// x86regs[oldreg].type = X86TYPE_VITEMP; -// } - - recVU_LOWER_OPCODE[ VU->code >> 25 ](VU, s_vuInfo); - -// if( pc == s_pCurBlock->endpc-8 && s_CacheVIReg >= 0 ) { -// // revert -// x86regs[s_CacheVIX86].inuse = 0; -// x86regs[oldreg].type = X86TYPE_VI|(s_vu?X86TYPE_VU1:0); -// } - - _clearNeededXMMregs(); - _clearNeededX86regs(); - } - - // clip is always written so ok - if ((regs[0].VIwrite | regs[1].VIwrite) & (1 << REG_CLIP_FLAG)) - { - pxAssert(pClipWrite != 0); - s_PrevClipWrite = pClipWrite; - } - - if ((regs[0].VIwrite | regs[1].VIwrite) & (1 << REG_STATUS_FLAG)) - { - pxAssert(pStatusWrite != 0); - s_PrevStatusWrite = pStatusWrite; - } - - if ((regs[0].VIwrite | regs[1].VIwrite) & (1 << REG_MAC_FLAG)) - { - pxAssert(pMACWrite != 0); - s_PrevMACWrite = pMACWrite; - } -} - -/////////////////////////////////// -// Super VU Recompilation Tables // -/////////////////////////////////// - -void recVUMI_BranchHandle() -{ - int bpc = _recbranchAddr(VU->code); - int curjump = 0; - - if (s_pCurInst->type & INST_BRANCH_DELAY) - { - pxAssert((g_branch&0x17) != 0x10 && (g_branch&0x17) != 4); // no jump handlig for now - - if ((g_branch & 0x7) == 3) - { - // previous was a direct jump - curjump = 1; - } - else if (g_branch & 1) curjump = 2; - } - - pxAssert(s_JumpX86 > 0); - - if ((s_pCurBlock->type & BLOCKTYPE_HASEOP) || s_vu == 0 || SUPERVU_CHECKCONDITION) - xMOV(ptr32[(u32*)(SuperVUGetVIAddr(REG_TPC, 0))], bpc); - xMOV(xRegister32(s_JumpX86), 1); // use 1 to disable optimization to XOR - s_pCurBlock->pChildJumps[curjump] = (u32*)x86Ptr - 1; - - if (!(s_pCurInst->type & INST_BRANCH_DELAY)) - { - j8Ptr[1] = JMP8(0); - x86SetJ8(j8Ptr[ 0 ]); - - if ((s_pCurBlock->type & BLOCKTYPE_HASEOP) || s_vu == 0 || SUPERVU_CHECKCONDITION) - xMOV(ptr32[(u32*)(SuperVUGetVIAddr(REG_TPC, 0))], pc + 8); - xMOV(xRegister32(s_JumpX86), 1); // use 1 to disable optimization to XOR - s_pCurBlock->pChildJumps[curjump+1] = (u32*)x86Ptr - 1; - - x86SetJ8(j8Ptr[ 1 ]); - } - else - x86SetJ8(j8Ptr[ 0 ]); - - g_branch |= 1; -} - -// supervu specific insts -void recVUMI_IBQ_prep() -{ - int isreg, itreg; - - if (_Is_ == 0) - { -#ifdef SUPERVU_VIBRANCHDELAY - if (s_pCurInst->vicached >= 0 && s_pCurInst->vicached == (s8)_It_) - { - itreg = -1; - } - else -#endif - { - itreg = _checkX86reg(X86TYPE_VI | (VU == &VU1 ? X86TYPE_VU1 : 0), _It_, MODE_READ); - } - - s_JumpX86 = _allocX86reg(xEmptyReg, X86TYPE_VUJUMP, 0, MODE_WRITE); - - if (itreg >= 0) - { - xCMP(xRegister16(itreg), 0); - } - else xCMP(ptr16[(u16*)(SuperVUGetVIAddr(_It_, 1))], 0); - } - else if (_It_ == 0) - { -#ifdef SUPERVU_VIBRANCHDELAY - if (s_pCurInst->vicached >= 0 && s_pCurInst->vicached == (s8)_Is_) - { - isreg = -1; - } - else -#endif - { - isreg = _checkX86reg(X86TYPE_VI | (VU == &VU1 ? X86TYPE_VU1 : 0), _Is_, MODE_READ); - } - - s_JumpX86 = _allocX86reg(xEmptyReg, X86TYPE_VUJUMP, 0, MODE_WRITE); - - if (isreg >= 0) - { - xCMP(xRegister16(isreg), 0); - } - else xCMP(ptr16[(u16*)(SuperVUGetVIAddr(_Is_, 1))], 0); - - } - else - { - _addNeededX86reg(X86TYPE_VI | (VU == &VU1 ? X86TYPE_VU1 : 0), _It_); - -#ifdef SUPERVU_VIBRANCHDELAY - if (s_pCurInst->vicached >= 0 && s_pCurInst->vicached == (s8)_Is_) - { - isreg = -1; - } - else -#endif - { - isreg = _checkX86reg(X86TYPE_VI | (VU == &VU1 ? X86TYPE_VU1 : 0), _Is_, MODE_READ); - } - -#ifdef SUPERVU_VIBRANCHDELAY - if (s_pCurInst->vicached >= 0 && s_pCurInst->vicached == (s8)_It_) - { - itreg = -1; - - if (isreg <= 0) - { - // allocate fsreg - if (s_pCurInst->vicached >= 0 && s_pCurInst->vicached == (s8)_Is_) - { - isreg = _allocX86reg(xEmptyReg, X86TYPE_TEMP, 0, MODE_READ | MODE_WRITE); - xMOV(xRegister32(isreg), ptr[(void*)(SuperVUGetVIAddr(_Is_, 1))]); - } - else - isreg = _allocX86reg(xEmptyReg, X86TYPE_VI | (VU == &VU1 ? X86TYPE_VU1 : 0), _Is_, MODE_READ); - } - } - else -#endif - { - itreg = _checkX86reg(X86TYPE_VI | (VU == &VU1 ? X86TYPE_VU1 : 0), _It_, MODE_READ); - } - - s_JumpX86 = _allocX86reg(xEmptyReg, X86TYPE_VUJUMP, 0, MODE_WRITE); - - if (isreg >= 0) - { - if (itreg >= 0) - { - xCMP(xRegister16(isreg), xRegister16(itreg)); - } - else xCMP(xRegister16(isreg), ptr[(void*)(SuperVUGetVIAddr(_It_, 1))]); - } - else if (itreg >= 0) - { - xCMP(xRegister16(itreg), ptr[(void*)(SuperVUGetVIAddr(_Is_, 1))]); - } - else - { - isreg = _allocX86reg(xEmptyReg, X86TYPE_VI | (VU == &VU1 ? X86TYPE_VU1 : 0), _Is_, MODE_READ); - xCMP(xRegister16(isreg), ptr[(void*)(SuperVUGetVIAddr(_It_, 1))]); - } - } -} - -void recVUMI_IBEQ(VURegs* vuu, s32 info) -{ - recVUMI_IBQ_prep(); - j8Ptr[ 0 ] = JNE8(0); - recVUMI_BranchHandle(); -} - -void recVUMI_IBGEZ(VURegs* vuu, s32 info) -{ - int isreg; - s_JumpX86 = _allocX86reg(xEmptyReg, X86TYPE_VUJUMP, 0, MODE_WRITE); - -#ifdef SUPERVU_VIBRANCHDELAY - if (s_pCurInst->vicached >= 0 && s_pCurInst->vicached == (s8)_Is_) - { - isreg = -1; - } - else -#endif - { - isreg = _checkX86reg(X86TYPE_VI | (VU == &VU1 ? X86TYPE_VU1 : 0), _Is_, MODE_READ); - } - - if (isreg >= 0) - { - xTEST(xRegister16(isreg), xRegister16(isreg)); - j8Ptr[ 0 ] = JS8(0); - } - else - { - xCMP(ptr16[(u16*)(SuperVUGetVIAddr(_Is_, 1))], 0x0); - j8Ptr[ 0 ] = JL8(0); - } - - recVUMI_BranchHandle(); -} - -void recVUMI_IBGTZ(VURegs* vuu, s32 info) -{ - int isreg; - s_JumpX86 = _allocX86reg(xEmptyReg, X86TYPE_VUJUMP, 0, MODE_WRITE); - -#ifdef SUPERVU_VIBRANCHDELAY - if (s_pCurInst->vicached >= 0 && s_pCurInst->vicached == (s8)_Is_) - { - isreg = -1; - } - else -#endif - { - isreg = _checkX86reg(X86TYPE_VI | (VU == &VU1 ? X86TYPE_VU1 : 0), _Is_, MODE_READ); - } - - if (isreg >= 0) - { - xCMP(xRegister16(isreg), 0); - j8Ptr[ 0 ] = JLE8(0); - } - else - { - xCMP(ptr16[(u16*)(SuperVUGetVIAddr(_Is_, 1))], 0x0); - j8Ptr[ 0 ] = JLE8(0); - } - recVUMI_BranchHandle(); -} - -void recVUMI_IBLEZ(VURegs* vuu, s32 info) -{ - int isreg; - s_JumpX86 = _allocX86reg(xEmptyReg, X86TYPE_VUJUMP, 0, MODE_WRITE); - -#ifdef SUPERVU_VIBRANCHDELAY - if (s_pCurInst->vicached >= 0 && s_pCurInst->vicached == (s8)_Is_) - { - isreg = -1; - } - else -#endif - { - isreg = _checkX86reg(X86TYPE_VI | (VU == &VU1 ? X86TYPE_VU1 : 0), _Is_, MODE_READ); - } - - if (isreg >= 0) - { - xCMP(xRegister16(isreg), 0); - j8Ptr[ 0 ] = JG8(0); - } - else - { - xCMP(ptr16[(u16*)(SuperVUGetVIAddr(_Is_, 1))], 0x0); - j8Ptr[ 0 ] = JG8(0); - } - recVUMI_BranchHandle(); -} - -void recVUMI_IBLTZ(VURegs* vuu, s32 info) -{ - int isreg; - s_JumpX86 = _allocX86reg(xEmptyReg, X86TYPE_VUJUMP, 0, MODE_WRITE); - -#ifdef SUPERVU_VIBRANCHDELAY - if (s_pCurInst->vicached >= 0 && s_pCurInst->vicached == (s8)_Is_) - { - isreg = -1; - } - else -#endif - { - isreg = _checkX86reg(X86TYPE_VI | (VU == &VU1 ? X86TYPE_VU1 : 0), _Is_, MODE_READ); - } - - if (isreg >= 0) - { - xTEST(xRegister16(isreg), xRegister16(isreg)); - j8Ptr[ 0 ] = JNS8(0); - } - else - { - xCMP(ptr16[(u16*)(SuperVUGetVIAddr(_Is_, 1))], 0x0); - j8Ptr[ 0 ] = JGE8(0); - } - recVUMI_BranchHandle(); -} - -void recVUMI_IBNE(VURegs* vuu, s32 info) -{ - recVUMI_IBQ_prep(); - j8Ptr[ 0 ] = JE8(0); - recVUMI_BranchHandle(); -} - -void recVUMI_B(VURegs* vuu, s32 info) -{ - // supervu will take care of the rest - int bpc = _recbranchAddr(VU->code); - if ((s_pCurBlock->type & BLOCKTYPE_HASEOP) || s_vu == 0 || SUPERVU_CHECKCONDITION) - xMOV(ptr32[(u32*)(SuperVUGetVIAddr(REG_TPC, 0))], bpc); - - // loops to self, so check condition - if (bpc == s_pCurBlock->startpc && (s_vu == 0 || SUPERVU_CHECKCONDITION)) - { - SuperVUTestVU0Condition(0); - } - - if (s_pCurBlock->blocks.size() > 1) - { - s_JumpX86 = _allocX86reg(xEmptyReg, X86TYPE_VUJUMP, 0, MODE_WRITE); - xMOV(xRegister32(s_JumpX86), 1); - s_pCurBlock->pChildJumps[(s_pCurInst->type & INST_BRANCH_DELAY)?1:0] = (u32*)x86Ptr - 1; - s_UnconditionalDelay = 1; - } - - g_branch |= 3; -} - -void recVUMI_BAL(VURegs* vuu, s32 info) -{ - int bpc = _recbranchAddr(VU->code); - if ((s_pCurBlock->type & BLOCKTYPE_HASEOP) || s_vu == 0 || SUPERVU_CHECKCONDITION) - xMOV(ptr32[(u32*)(SuperVUGetVIAddr(REG_TPC, 0))], bpc); - - // loops to self, so check condition - if (bpc == s_pCurBlock->startpc && (s_vu == 0 || SUPERVU_CHECKCONDITION)) - { - SuperVUTestVU0Condition(0); - } - - if (_It_) - { - _deleteX86reg(X86TYPE_VI | (s_vu ? X86TYPE_VU1 : 0), _It_, 2); - xMOV(ptr16[(u16*)(SuperVUGetVIAddr(_It_, 0))], (pc + 8) >> 3); - } - - if (s_pCurBlock->blocks.size() > 1) - { - s_JumpX86 = _allocX86reg(xEmptyReg, X86TYPE_VUJUMP, 0, MODE_WRITE); - xMOV(xRegister32(s_JumpX86), 1); - s_pCurBlock->pChildJumps[(s_pCurInst->type & INST_BRANCH_DELAY)?1:0] = (u32*)x86Ptr - 1; - s_UnconditionalDelay = 1; - } - - g_branch |= 3; -} - -void recVUMI_JR(VURegs* vuu, s32 info) -{ - int isreg = _allocX86reg(xEmptyReg, X86TYPE_VI | (s_vu ? X86TYPE_VU1 : 0), _Is_, MODE_READ); - xLEA(eax, ptr[xAddressReg(isreg) * (1<<3)]); - - //Mask the address to something valid - if (vuu == &VU0) - xAND(eax, 0xfff); - else - xAND(eax, 0x3fff); - - if ((s_pCurBlock->type & BLOCKTYPE_HASEOP) || s_vu == 0) xMOV(ptr[(void*)(SuperVUGetVIAddr(REG_TPC, 0))], eax); - - if (!(s_pCurBlock->type & BLOCKTYPE_HASEOP)) - { - xPUSH(s_vu); - xPUSH(eax); - } - g_branch |= 0x10; // 0x08 is reserved -} - -void recVUMI_JALR(VURegs* vuu, s32 info) -{ - _addNeededX86reg(X86TYPE_VI | (s_vu ? X86TYPE_VU1 : 0), _It_); - - int isreg = _allocX86reg(xEmptyReg, X86TYPE_VI | (s_vu ? X86TYPE_VU1 : 0), _Is_, MODE_READ); - xLEA(eax, ptr[xAddressReg(isreg) * (1<<3)]); - - //Mask the address to something valid - if (vuu == &VU0) - xAND(eax, 0xfff); - else - xAND(eax, 0x3fff); - - if (_It_) - { - _deleteX86reg(X86TYPE_VI | (s_vu ? X86TYPE_VU1 : 0), _It_, 2); - xMOV(ptr16[(u16*)(SuperVUGetVIAddr(_It_, 0))], (pc + 8) >> 3); - } - - if ((s_pCurBlock->type & BLOCKTYPE_HASEOP) || s_vu == 0) xMOV(ptr[(void*)(SuperVUGetVIAddr(REG_TPC, 0))], eax); - - if (!(s_pCurBlock->type & BLOCKTYPE_HASEOP)) - { - xPUSH(s_vu); - xPUSH(eax); - } - - g_branch |= 4; -} - -void recVUMI_XGKICK_(VURegs *VU) -{ - pxAssert(s_XGKICKReg > 0 && x86regs[s_XGKICKReg].inuse && x86regs[s_XGKICKReg].type == X86TYPE_VITEMP); - - x86regs[s_XGKICKReg].inuse = 0; // so free doesn't flush - _freeX86regs(); - _freeXMMregs(); - - xMOV(ecx, xRegister32(s_XGKICKReg)); - xCALL((void*)VU1XGKICK_MTGSTransfer); - - s_ScheduleXGKICK = 0; -} - -void recVUMI_XGKICK(VURegs *VU, int info) -{ - if (s_ScheduleXGKICK) { - // second xgkick, so launch the first - recVUMI_XGKICK_(VU); - } - - int isreg = _allocX86reg(ecx, X86TYPE_VI | (s_vu ? X86TYPE_VU1 : 0), _Is_, MODE_READ); - _freeX86reg(isreg); // flush - x86regs[isreg].inuse = 1; - x86regs[isreg].type = X86TYPE_VITEMP; - x86regs[isreg].needed = 1; - x86regs[isreg].mode = MODE_WRITE | MODE_READ; - xSHL(xRegister32(isreg), 4); - xAND(xRegister32(isreg), 0x3fff); - s_XGKICKReg = isreg; - - if (!SUPERVU_XGKICKDELAY || pc == s_pCurBlock->endpc) { - recVUMI_XGKICK_(VU); - } - else { - s_ScheduleXGKICK = (CHECK_XGKICKHACK) ? (std::min((u32)4, (s_pCurBlock->endpc-pc)/8)) : 2; - } -} - -void recVU_UPPER_FD_00(VURegs* VU, s32 info); -void recVU_UPPER_FD_01(VURegs* VU, s32 info); -void recVU_UPPER_FD_10(VURegs* VU, s32 info); -void recVU_UPPER_FD_11(VURegs* VU, s32 info); -void recVULowerOP(VURegs* VU, s32 info); -void recVULowerOP_T3_00(VURegs* VU, s32 info); -void recVULowerOP_T3_01(VURegs* VU, s32 info); -void recVULowerOP_T3_10(VURegs* VU, s32 info); -void recVULowerOP_T3_11(VURegs* VU, s32 info); -void recVUunknown(VURegs* VU, s32 info); - -void (*recVU_LOWER_OPCODE[128])(VURegs* VU, s32 info) = -{ - recVUMI_LQ , recVUMI_SQ , recVUunknown , recVUunknown, - recVUMI_ILW , recVUMI_ISW , recVUunknown , recVUunknown, - recVUMI_IADDIU, recVUMI_ISUBIU, recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUMI_FCEQ , recVUMI_FCSET , recVUMI_FCAND, recVUMI_FCOR, /* 0x10 */ - recVUMI_FSEQ , recVUMI_FSSET , recVUMI_FSAND, recVUMI_FSOR, - recVUMI_FMEQ , recVUunknown , recVUMI_FMAND, recVUMI_FMOR, - recVUMI_FCGET , recVUunknown , recVUunknown , recVUunknown, - recVUMI_B , recVUMI_BAL , recVUunknown , recVUunknown, /* 0x20 */ - recVUMI_JR , recVUMI_JALR , recVUunknown , recVUunknown, - recVUMI_IBEQ , recVUMI_IBNE , recVUunknown , recVUunknown, - recVUMI_IBLTZ , recVUMI_IBGTZ , recVUMI_IBLEZ, recVUMI_IBGEZ, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, /* 0x30 */ - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVULowerOP , recVUunknown , recVUunknown , recVUunknown, /* 0x40*/ - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, /* 0x50 */ - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, /* 0x60 */ - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, /* 0x70 */ - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, -}; - -void (*recVULowerOP_T3_00_OPCODE[32])(VURegs* VU, s32 info) = -{ - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUMI_MOVE , recVUMI_LQI , recVUMI_DIV , recVUMI_MTIR, - recVUMI_RNEXT , recVUunknown , recVUunknown , recVUunknown, /* 0x10 */ - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUMI_MFP , recVUMI_XTOP , recVUMI_XGKICK, - recVUMI_ESADD , recVUMI_EATANxy, recVUMI_ESQRT, recVUMI_ESIN, -}; - -void (*recVULowerOP_T3_01_OPCODE[32])(VURegs* VU, s32 info) = -{ - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUMI_MR32 , recVUMI_SQI , recVUMI_SQRT , recVUMI_MFIR, - recVUMI_RGET , recVUunknown , recVUunknown , recVUunknown, /* 0x10 */ - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUMI_XITOP, recVUunknown, - recVUMI_ERSADD, recVUMI_EATANxz, recVUMI_ERSQRT, recVUMI_EATAN, -}; - -void (*recVULowerOP_T3_10_OPCODE[32])(VURegs* VU, s32 info) = -{ - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUMI_LQD , recVUMI_RSQRT, recVUMI_ILWR, - recVUMI_RINIT , recVUunknown , recVUunknown , recVUunknown, /* 0x10 */ - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUMI_ELENG , recVUMI_ESUM , recVUMI_ERCPR, recVUMI_EEXP, -}; - -void (*recVULowerOP_T3_11_OPCODE[32])(VURegs* VU, s32 info) = -{ - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUMI_SQD , recVUMI_WAITQ, recVUMI_ISWR, - recVUMI_RXOR , recVUunknown , recVUunknown , recVUunknown, /* 0x10 */ - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUMI_ERLENG, recVUunknown , recVUMI_WAITP, recVUunknown, -}; - -void (*recVULowerOP_OPCODE[64])(VURegs* VU, s32 info) = -{ - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, /* 0x10 */ - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, /* 0x20 */ - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUMI_IADD , recVUMI_ISUB , recVUMI_IADDI, recVUunknown, /* 0x30 */ - recVUMI_IAND , recVUMI_IOR , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVULowerOP_T3_00, recVULowerOP_T3_01, recVULowerOP_T3_10, recVULowerOP_T3_11, -}; - -void (*recVU_UPPER_OPCODE[64])(VURegs* VU, s32 info) = -{ - recVUMI_ADDx , recVUMI_ADDy , recVUMI_ADDz , recVUMI_ADDw, - recVUMI_SUBx , recVUMI_SUBy , recVUMI_SUBz , recVUMI_SUBw, - recVUMI_MADDx , recVUMI_MADDy , recVUMI_MADDz , recVUMI_MADDw, - recVUMI_MSUBx , recVUMI_MSUBy , recVUMI_MSUBz , recVUMI_MSUBw, - recVUMI_MAXx , recVUMI_MAXy , recVUMI_MAXz , recVUMI_MAXw, /* 0x10 */ - recVUMI_MINIx , recVUMI_MINIy , recVUMI_MINIz , recVUMI_MINIw, - recVUMI_MULx , recVUMI_MULy , recVUMI_MULz , recVUMI_MULw, - recVUMI_MULq , recVUMI_MAXi , recVUMI_MULi , recVUMI_MINIi, - recVUMI_ADDq , recVUMI_MADDq , recVUMI_ADDi , recVUMI_MADDi, /* 0x20 */ - recVUMI_SUBq , recVUMI_MSUBq , recVUMI_SUBi , recVUMI_MSUBi, - recVUMI_ADD , recVUMI_MADD , recVUMI_MUL , recVUMI_MAX, - recVUMI_SUB , recVUMI_MSUB , recVUMI_OPMSUB, recVUMI_MINI, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, /* 0x30 */ - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVU_UPPER_FD_00, recVU_UPPER_FD_01, recVU_UPPER_FD_10, recVU_UPPER_FD_11, -}; - -void (*recVU_UPPER_FD_00_TABLE[32])(VURegs* VU, s32 info) = -{ - recVUMI_ADDAx, recVUMI_SUBAx , recVUMI_MADDAx, recVUMI_MSUBAx, - recVUMI_ITOF0, recVUMI_FTOI0, recVUMI_MULAx , recVUMI_MULAq , - recVUMI_ADDAq, recVUMI_SUBAq, recVUMI_ADDA , recVUMI_SUBA , - recVUunknown , recVUunknown , recVUunknown , recVUunknown , - recVUunknown , recVUunknown , recVUunknown , recVUunknown , - recVUunknown , recVUunknown , recVUunknown , recVUunknown , - recVUunknown , recVUunknown , recVUunknown , recVUunknown , - recVUunknown , recVUunknown , recVUunknown , recVUunknown , -}; - -void (*recVU_UPPER_FD_01_TABLE[32])(VURegs* VU, s32 info) = -{ - recVUMI_ADDAy , recVUMI_SUBAy , recVUMI_MADDAy, recVUMI_MSUBAy, - recVUMI_ITOF4 , recVUMI_FTOI4 , recVUMI_MULAy , recVUMI_ABS , - recVUMI_MADDAq, recVUMI_MSUBAq, recVUMI_MADDA , recVUMI_MSUBA , - recVUunknown , recVUunknown , recVUunknown , recVUunknown , - recVUunknown , recVUunknown , recVUunknown , recVUunknown , - recVUunknown , recVUunknown , recVUunknown , recVUunknown , - recVUunknown , recVUunknown , recVUunknown , recVUunknown , - recVUunknown , recVUunknown , recVUunknown , recVUunknown , -}; - -void (*recVU_UPPER_FD_10_TABLE[32])(VURegs* VU, s32 info) = -{ - recVUMI_ADDAz , recVUMI_SUBAz , recVUMI_MADDAz, recVUMI_MSUBAz, - recVUMI_ITOF12, recVUMI_FTOI12, recVUMI_MULAz , recVUMI_MULAi , - recVUMI_ADDAi, recVUMI_SUBAi , recVUMI_MULA , recVUMI_OPMULA, - recVUunknown , recVUunknown , recVUunknown , recVUunknown , - recVUunknown , recVUunknown , recVUunknown , recVUunknown , - recVUunknown , recVUunknown , recVUunknown , recVUunknown , - recVUunknown , recVUunknown , recVUunknown , recVUunknown , - recVUunknown , recVUunknown , recVUunknown , recVUunknown , -}; - -void (*recVU_UPPER_FD_11_TABLE[32])(VURegs* VU, s32 info) = -{ - recVUMI_ADDAw , recVUMI_SUBAw , recVUMI_MADDAw, recVUMI_MSUBAw, - recVUMI_ITOF15, recVUMI_FTOI15, recVUMI_MULAw , recVUMI_CLIP , - recVUMI_MADDAi, recVUMI_MSUBAi, recVUunknown , recVUMI_NOP , - recVUunknown , recVUunknown , recVUunknown , recVUunknown , - recVUunknown , recVUunknown , recVUunknown , recVUunknown , - recVUunknown , recVUunknown , recVUunknown , recVUunknown , - recVUunknown , recVUunknown , recVUunknown , recVUunknown , - recVUunknown , recVUunknown , recVUunknown , recVUunknown , -}; - -void recVU_UPPER_FD_00(VURegs* VU, s32 info) -{ - recVU_UPPER_FD_00_TABLE[(VU->code >> 6) & 0x1f ](VU, info); -} - -void recVU_UPPER_FD_01(VURegs* VU, s32 info) -{ - recVU_UPPER_FD_01_TABLE[(VU->code >> 6) & 0x1f ](VU, info); -} - -void recVU_UPPER_FD_10(VURegs* VU, s32 info) -{ - recVU_UPPER_FD_10_TABLE[(VU->code >> 6) & 0x1f ](VU, info); -} - -void recVU_UPPER_FD_11(VURegs* VU, s32 info) -{ - recVU_UPPER_FD_11_TABLE[(VU->code >> 6) & 0x1f ](VU, info); -} - -void recVULowerOP(VURegs* VU, s32 info) -{ - recVULowerOP_OPCODE[ VU->code & 0x3f ](VU, info); -} - -void recVULowerOP_T3_00(VURegs* VU, s32 info) -{ - recVULowerOP_T3_00_OPCODE[(VU->code >> 6) & 0x1f ](VU, info); -} - -void recVULowerOP_T3_01(VURegs* VU, s32 info) -{ - recVULowerOP_T3_01_OPCODE[(VU->code >> 6) & 0x1f ](VU, info); -} - -void recVULowerOP_T3_10(VURegs* VU, s32 info) -{ - recVULowerOP_T3_10_OPCODE[(VU->code >> 6) & 0x1f ](VU, info); -} - -void recVULowerOP_T3_11(VURegs* VU, s32 info) -{ - recVULowerOP_T3_11_OPCODE[(VU->code >> 6) & 0x1f ](VU, info); -} - -void recVUunknown(VURegs* VU, s32 info) -{ - Console.Warning("Unknown SVU micromode opcode called"); -} - -// -------------------------------------------------------------------------------------- -// recSuperVU0 Interface -// -------------------------------------------------------------------------------------- -recSuperVU0::recSuperVU0() -{ - m_Idx = 0; - IsInterpreter = false; -} - -void recSuperVU0::Reserve() -{ - SuperVUAlloc(0); -} - -void recSuperVU0::Shutdown() noexcept -{ - SuperVUDestroy( 0 ); -} - -void recSuperVU0::Reset() -{ - SuperVUReset( 0 ); -} - -void recSuperVU0::Execute(u32 cycles) -{ - if ((VU0.VI[REG_VPU_STAT].UL & 1) == 0) return; - - runCycles = cycles; - VU0.VI[REG_TPC].UL <<= 3; - SuperVUExecuteProgram(VU0.VI[REG_TPC].UL & 0xfff, 0); - VU0.VI[REG_TPC].UL >>= 3; -} - -void recSuperVU0::Clear(u32 Addr, u32 Size) -{ - SuperVUClear(Addr, Size, 0); -} - -uint recSuperVU0::GetCacheReserve() const -{ - return sVU_EXESIZE / _1mb; -} - -void recSuperVU0::SetCacheReserve( uint reserveInMegs ) const -{ - //microVU0.cacheSize = reserveInMegs * _1mb; -} - -// -------------------------------------------------------------------------------------- -// recSuperVU1 Interface -// -------------------------------------------------------------------------------------- -recSuperVU1::recSuperVU1() -{ - m_Idx = 1; - IsInterpreter = false; -} - -void recSuperVU1::Reserve() -{ - SuperVUAlloc(1); -} - -void recSuperVU1::Shutdown() noexcept -{ - vu1Thread.WaitVU(); - SuperVUDestroy( 1 ); -} - -void recSuperVU1::Reset() -{ - vu1Thread.WaitVU(); - SuperVUReset( 1 ); -} - -uint recSuperVU1::GetCacheReserve() const -{ - return sVU_EXESIZE / _1mb; -} - -void recSuperVU1::SetCacheReserve( uint reserveInMegs ) const -{ - //microVU0.cacheSize = reserveInMegs * _1mb; -} - -void recSuperVU1::Execute(u32 cycles) -{ - if ((VU0.VI[REG_VPU_STAT].UL & 0x100) == 0) return; - // [TODO] Debugging pre- and post- hooks? - - VU1.VI[REG_TPC].UL <<= 3; - do { // while loop needed since not always will return finished - SuperVUExecuteProgram(VU1.VI[REG_TPC].UL & VU1_PROGMASK, 1); - } while (VU0.VI[REG_VPU_STAT].UL & 0x100); - VU1.VI[REG_TPC].UL >>= 3; -} - -void recSuperVU1::Clear(u32 Addr, u32 Size) -{ - SuperVUClear(Addr, Size, 1); -} diff --git a/pcsx2/x86/sVU_zerorec.h b/pcsx2/x86/sVU_zerorec.h deleted file mode 100644 index 15a64a8daa..0000000000 --- a/pcsx2/x86/sVU_zerorec.h +++ /dev/null @@ -1,42 +0,0 @@ -/* PCSX2 - PS2 Emulator for PCs - * Copyright (C) 2002-2010 PCSX2 Dev Team - * - * PCSX2 is free software: you can redistribute it and/or modify it under the terms - * of the GNU Lesser General Public License as published by the Free Software Found- - * ation, either version 3 of the License, or (at your option) any later version. - * - * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; - * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR - * PURPOSE. See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along with PCSX2. - * If not, see . - */ - -// Super VU recompiler - author: zerofrog(@gmail.com) - -#pragma once - -#include "sVU_Micro.h" - -//Using assembly code from an external file. -#ifdef __POSIX__ -extern "C" { -#endif -extern void SuperVUExecuteProgram(u32 startpc, int vuindex); -extern void SuperVUEndProgram(); -extern void svudispfntemp(); -#ifdef __POSIX__ -} -#endif - -extern void SuperVUDestroy(int vuindex); -extern void SuperVUReset(int vuindex); - -// read = 0, will write to reg -// read = 1, will read from reg -// read = 2, addr of previously written reg (used for status and clip flags) -extern u32 SuperVUGetVIAddr(int reg, int read); - -// if p == 0, flush q else flush p; if wait is != 0, waits for p/q -extern void SuperVUFlush(int p, int wait);