Merge branch 'recompiler-prof'

This commit is contained in:
Gregory Hainaut 2016-01-10 14:15:42 +01:00
commit 9bcd34349f
28 changed files with 1223 additions and 14 deletions

View File

@ -117,6 +117,7 @@
<ClCompile Include="..\..\src\Utilities\VirtualMemory.cpp" />
<ClCompile Include="..\..\src\Utilities\x86\MemcpyFast.cpp" />
<ClCompile Include="..\..\src\Utilities\PathUtils.cpp" />
<ClCompile Include="..\..\src\Utilities\Perf.cpp" />
<ClCompile Include="..\..\src\Utilities\PrecompiledHeader.cpp">
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Create</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Devel|Win32'">Create</PrecompiledHeader>

View File

@ -41,6 +41,9 @@
<ClCompile Include="..\..\src\Utilities\PathUtils.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\Utilities\Perf.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\Utilities\PrecompiledHeader.cpp">
<Filter>Source Files</Filter>
</ClCompile>

View File

@ -0,0 +1,57 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2015 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
namespace Perf {
struct Info
{
uptr m_x86;
u32 m_size;
char m_symbol[20];
// The idea is to keep static zones that are set only
// once.
bool m_dynamic;
Info(uptr x86, u32 size, const char* symbol);
Info(uptr x86, u32 size, const char* symbol, u32 pc);
void Print(FILE* fp);
};
class InfoVector
{
std::vector<Info> m_v;
char m_prefix[20];
public:
InfoVector(const char* prefix);
void print(FILE* fp);
void map(uptr x86, u32 size, const char* symbol);
void map(uptr x86, u32 size, u32 pc);
void reset();
};
void dump();
void dump_and_reset();
extern InfoVector any;
extern InfoVector ee;
extern InfoVector iop;
extern InfoVector vu;
}

View File

@ -0,0 +1,61 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2015 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
// Implement BMI1/BMI2 instruction set
namespace x86Emitter {
struct xImplBMI_RVM
{
u8 Prefix;
u8 MbPrefix;
u8 Opcode;
// RVM
// MULX Unsigned multiply without affecting flags, and arbitrary destination registers
// PDEP Parallel bits deposit
// PEXT Parallel bits extract
// ANDN Logical and not ~x & y
void operator()( const xRegisterInt& to, const xRegisterInt& from1, const xRegisterInt& from2) const;
void operator()( const xRegisterInt& to, const xRegisterInt& from1, const xIndirectVoid& from2) const;
#if 0
// RMV
// BEXTR Bit field extract (with register) (src >> start) & ((1 << len)-1)[9]
// BZHI Zero high bits starting with specified bit position
// SARX Shift arithmetic right without affecting flags
// SHRX Shift logical right without affecting flags
// SHLX Shift logical left without affecting flags
// FIXME: WARNING same as above but V and M are inverted
//void operator()( const xRegisterInt& to, const xRegisterInt& from1, const xRegisterInt& from2) const;
//void operator()( const xRegisterInt& to, const xIndirectVoid& from1, const xRegisterInt& from2) const;
// VM
// BLSI Extract lowest set isolated bit x & -x
// BLSMSK Get mask up to lowest set bit x ^ (x - 1)
// BLSR Reset lowest set bit x & (x - 1)
void operator()( const xRegisterInt& to, const xRegisterInt& from) const;
void operator()( const xRegisterInt& to, const xIndirectVoid& from) const;
// RMI
//RORX Rotate right logical without affecting flags
void operator()( const xRegisterInt& to, const xRegisterInt& from, u8 imm) const;
void operator()( const xRegisterInt& to, const xIndirectVoid& from, u8 imm) const;
#endif
};
}

View File

@ -124,6 +124,10 @@ namespace x86Emitter
xSETS, xSETNS,
xSETPE, xSETPO;
// ------------------------------------------------------------------------
// BMI extra instruction requires BMI1/BMI2
extern const xImplBMI_RVM xMULX, xPDEP, xPEXT, xANDN_S; // Warning xANDN is already used by SSE
//////////////////////////////////////////////////////////////////////////////////////////
// Miscellaneous Instructions
// These are all defined inline or in ix86.cpp.

View File

@ -71,5 +71,73 @@ namespace x86Emitter {
template< typename T1, typename T2 > __emitinline
void xOpWrite0F( u16 opcode, const T1& param1, const T2& param2, u8 imm8 ) { xOpWrite0F( 0, opcode, param1, param2, imm8 ); }
// VEX 2 Bytes Prefix
template< typename T1, typename T2, typename T3 > __emitinline
void xOpWriteC5( u8 prefix, u8 opcode, const T1& param1, const T2& param2, const T3& param3 )
{
pxAssert( prefix == 0 || prefix == 0x66 || prefix == 0xF3 || prefix == 0xF2 );
const xRegisterInt& reg = param1.IsReg() ? param1 : param2;
#ifdef __x86_64__
u8 nR = reg.IsExtended() ? 0x00 : 0x80;
#else
u8 nR = 0x80;
#endif
u8 L = reg.IsWideSIMD() ? 4 : 0;
u8 nv = (~param2.GetId() & 0xF) << 3;
u8 p =
prefix == 0xF2 ? 3 :
prefix == 0xF3 ? 2 :
prefix == 0x66 ? 1 : 0;
xWrite8( 0xC5 );
xWrite8( nR | nv | L | p );
xWrite8( opcode );
EmitSibMagic( param1, param3 );
}
// VEX 3 Bytes Prefix
template< typename T1, typename T2, typename T3 > __emitinline
void xOpWriteC4( u8 prefix, u8 mb_prefix, u8 opcode, const T1& param1, const T2& param2, const T3& param3, int w = -1 )
{
pxAssert( prefix == 0 || prefix == 0x66 || prefix == 0xF3 || prefix == 0xF2 );
pxAssert( mb_prefix == 0x0F || mb_prefix == 0x38 || mb_prefix == 0x3A );
const xRegisterInt& reg = param1.IsReg() ? param1 : param2;
#ifdef __x86_64__
u8 nR = reg.IsExtended() ? 0x00 : 0x80;
u8 nB = param3.IsExtended() ? 0x00 : 0x20;
u8 nX = 0x40; // likely unused so hardwired to disabled
#else
u8 nR = 0x80;
u8 nB = 0x20;
u8 nX = 0x40;
#endif
u8 L = reg.IsWideSIMD() ? 4 : 0;
u8 W = (w == -1) ? (reg.GetOperandSize() == 8 ? 0x80 : 0) : // autodetect the size
0x80 * w; // take directly the W value
u8 nv = (~param2.GetId() & 0xF) << 3;
u8 p =
prefix == 0xF2 ? 3 :
prefix == 0xF3 ? 2 :
prefix == 0x66 ? 1 : 0;
u8 m =
mb_prefix == 0x3A ? 3 :
mb_prefix == 0x38 ? 2 : 1;
xWrite8( 0xC4 );
xWrite8( nR | nX | nB | m );
xWrite8( W | nv | L | p );
xWrite8( opcode );
EmitSibMagic( param1, param3 );
}
}

View File

@ -90,6 +90,8 @@ public:
u32 hasStreamingSIMD4Extensions2 :1;
u32 hasAVX :1;
u32 hasAVX2 :1;
u32 hasBMI1 :1;
u32 hasBMI2 :1;
u32 hasFMA :1;
// AMD-specific CPU Features

View File

@ -255,12 +255,16 @@ template< typename T > void xWrite( T val );
bool IsEmpty() const { return Id < 0 ; }
bool IsInvalid() const { return Id == xRegId_Invalid; }
bool IsExtended() const { return Id > 7; } // Register 8-15 need an extra bit to be selected
bool IsMem() const { return false; }
bool IsReg() const { return true; }
// Returns true if the register is a valid accumulator: Eax, Ax, Al, XMM0.
bool IsAccumulator() const { return Id == 0; }
// returns true if the register is a valid MMX or XMM register.
bool IsSIMD() const { return GetOperandSize() == 8 || GetOperandSize() == 16; }
bool IsWideSIMD() const { return GetOperandSize() == 32; }
bool operator==( const xRegisterBase& src ) const { return (Id == src.Id); }
bool operator!=( const xRegisterBase& src ) const { return (Id != src.Id); }
@ -690,6 +694,8 @@ template< typename T > void xWrite( T val );
xIndirectVoid& Add( s32 imm );
bool IsByteSizeDisp() const { return is_s8( Displacement ); }
bool IsMem() const { return true; }
bool IsReg() const { return false; }
operator xAddressVoid()
{
@ -996,3 +1002,4 @@ template< typename T > void xWrite( T val );
#include "implement/test.h"
#include "implement/jmpcall.h"
#include "implement/bmi.h"

View File

@ -61,6 +61,7 @@ set(UtilitiesSources
Mutex.cpp
PathUtils.cpp
PrecompiledHeader.cpp
Perf.cpp
pxCheckBox.cpp
pxRadioPanel.cpp
pxStaticText.cpp

View File

@ -0,0 +1,140 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2015 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#include "PrecompiledHeader.h"
#include "Perf.h"
//#define ProfileWithPerf
#define MERGE_BLOCK_RESULT
namespace Perf
{
// Warning object aren't thread safe
InfoVector any("");
InfoVector ee("EE");
InfoVector iop("IOP");
InfoVector vu("VU");
// Perf is only supported on linux
#if defined(__linux__) && defined(ProfileWithPerf)
////////////////////////////////////////////////////////////////////////////////
// Implementation of the Info object
////////////////////////////////////////////////////////////////////////////////
Info::Info(uptr x86, u32 size, const char* symbol) : m_x86(x86), m_size(size), m_dynamic(false)
{
strncpy(m_symbol, symbol, sizeof(m_symbol));
}
Info::Info(uptr x86, u32 size, const char* symbol, u32 pc) : m_x86(x86), m_size(size), m_dynamic(true)
{
snprintf(m_symbol, sizeof(m_symbol), "%s_0x%08x", symbol, pc);
}
void Info::Print(FILE* fp)
{
fprintf(fp, "%x %x %s\n", m_x86, m_size, m_symbol);
}
////////////////////////////////////////////////////////////////////////////////
// Implementation of the InfoVector object
////////////////////////////////////////////////////////////////////////////////
InfoVector::InfoVector(const char* prefix)
{
strncpy(m_prefix, prefix, sizeof(m_prefix));
}
void InfoVector::print(FILE* fp)
{
for(auto&& it : m_v) it.Print(fp);
}
void InfoVector::map(uptr x86, u32 size, const char* symbol)
{
// This function is typically used for dispatcher and recompiler.
// Dispatchers are on a page and must always be kept.
// Recompilers are much bigger (TODO check VIF) and are only
// useful when MERGE_BLOCK_RESULT is defined
#ifdef MERGE_BLOCK_RESULT
m_v.emplace_back(x86, size, symbol);
#else
if (size < 8 * _1kb) m_v.emplace_back(x86, size, symbol);
#endif
}
void InfoVector::map(uptr x86, u32 size, u32 pc)
{
#ifndef MERGE_BLOCK_RESULT
m_v.emplace_back(x86, size, m_prefix, pc);
#endif
}
void InfoVector::reset()
{
auto dynamic = std::remove_if(m_v.begin(), m_v.end(), [](Info i) { return i.m_dynamic; });
m_v.erase(dynamic, m_v.end());
}
////////////////////////////////////////////////////////////////////////////////
// Global function
////////////////////////////////////////////////////////////////////////////////
void dump()
{
char file[256];
snprintf(file, 250, "/tmp/perf-%d.map", getpid());
FILE* fp = fopen(file, "w");
any.print(fp);
ee.print(fp);
iop.print(fp);
vu.print(fp);
if (fp)
fclose(fp);
}
void dump_and_reset()
{
dump();
any.reset();
ee.reset();
iop.reset();
vu.reset();
}
#else
////////////////////////////////////////////////////////////////////////////////
// Dummy implementation
////////////////////////////////////////////////////////////////////////////////
InfoVector::InfoVector(const char* prefix) {}
void InfoVector::map(uptr x86, u32 size, const char* symbol) {}
void InfoVector::map(uptr x86, u32 size, u32 pc) {}
void InfoVector::reset() {}
void dump() {}
void dump_and_reset() {}
#endif
}

View File

@ -42,6 +42,7 @@ endif(CMAKE_BUILD_TYPE STREQUAL Release)
# variable with all sources of this library
set(x86emitterSources
bmi.cpp
cpudetect.cpp
fpu.cpp
groups.cpp

View File

@ -0,0 +1,32 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2015 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#include "PrecompiledHeader.h"
#include "internal.h"
#include "tools.h"
namespace x86Emitter {
const xImplBMI_RVM xMULX = { 0xF2, 0x38, 0xF6 };
const xImplBMI_RVM xPDEP = { 0xF2, 0x38, 0xF5 };
const xImplBMI_RVM xPEXT = { 0xF3, 0x38, 0xF5 };
const xImplBMI_RVM xANDN_S = { 0x00, 0x38, 0xF2 };
void xImplBMI_RVM::operator()( const xRegisterInt& to, const xRegisterInt& from1, const xRegisterInt& from2) const
{ xOpWriteC4(Prefix, MbPrefix, Opcode, to, from1, from2); }
void xImplBMI_RVM::operator()( const xRegisterInt& to, const xRegisterInt& from1, const xIndirectVoid& from2) const
{ xOpWriteC4(Prefix, MbPrefix, Opcode, to, from1, from2); }
}

View File

@ -278,6 +278,9 @@ void x86capabilities::Identify()
}
}
hasBMI1 = ( SEFlag >> 3 ) & 1;
hasBMI2 = ( SEFlag >> 8 ) & 1;
// Ones only for AMDs:
hasMultimediaExtensionsExt = ( EFlags >> 22 ) & 1; //mmx2
hasAMD64BitArchitecture = ( EFlags >> 29 ) & 1; //64bit cpu

View File

@ -0,0 +1,95 @@
#!/usr/bin/perl
use strict;
use warnings;
open(my $in, $ARGV[0]) or die "failed to get first param: $!";
my @pp_name = (
# GPR
"0", "0", "0", "0",
"at", "at", "at", "at",
"v0", "v0", "v0", "v0",
"v1", "v1", "v1", "v1",
"a0", "a0", "a0", "a0",
"a1", "a1", "a1", "a1",
"a2", "a2", "a2", "a2",
"a3", "a3", "a3", "a3",
"t0", "t0", "t0", "t0",
"t1", "t1", "t1", "t1",
"t2", "t2", "t2", "t2",
"t3", "t3", "t3", "t3",
"t4", "t4", "t4", "t4",
"t5", "t5", "t5", "t5",
"t6", "t6", "t6", "t6",
"t7", "t7", "t7", "t7",
"s0", "s0", "s0", "s0",
"s1", "s1", "s1", "s1",
"s2", "s2", "s2", "s2",
"s3", "s3", "s3", "s3",
"s4", "s4", "s4", "s4",
"s5", "s5", "s5", "s5",
"s6", "s6", "s6", "s6",
"s7", "s7", "s7", "s7",
"t8", "t8", "t8", "t8",
"t9", "t9", "t9", "t9",
"k0", "k0", "k0", "k0",
"k1", "k1", "k1", "k1",
"gp", "gp", "gp", "gp",
"sp", "sp", "sp", "sp",
"s8", "s8", "s8", "s8",
"ra", "ra", "ra", "ra",
"hi", "hi", "hi", "hi",
"lo", "lo", "lo", "lo",
# CP0
"Index" , "Random" , "EntryLo0" , "EntryLo1" ,
"Context" , "PageMask" , "Wired" , "Reserved0" ,
"BadVAddr" , "Count" , "EntryHi" , "Compare" ,
"Status" , "Cause" , "EPC" , "PRid" ,
"Config" , "LLAddr" , "WatchLO" , "WatchHI" ,
"XContext" , "Reserved1" , "Reserved2" , "Debug" ,
"DEPC" , "PerfCnt" , "ErrCtl" , "CacheErr" ,
"TagLo" , "TagHi" , "ErrorEPC" , "DESAVE" ,
"sa",
"IsDelaySlot",
"pc",
"code",
"PERF", "PERF", "PERF", "PERF",
"eCycle0" , "eCycle1" , "eCycle2" , "eCycle3" , "eCycle4" , "eCycle5" , "eCycle6" , "eCycle7" ,
"eCycle8" , "eCycle9" , "eCycle10" , "eCycle11" , "eCycle12" , "eCycle13" , "eCycle14" , "eCycle15" ,
"eCycle16" , "eCycle17" , "eCycle18" , "eCycle19" , "eCycle20" , "eCycle21" , "eCycle22" , "eCycle23" ,
"eCycle24" , "eCycle25" , "eCycle26" , "eCycle27" , "eCycle28" , "eCycle29" , "eCycle30" , "eCycle31" ,
"sCycle0" , "sCycle1" , "sCycle2" , "sCycle3" , "sCycle4" , "sCycle5" , "sCycle6" , "sCycle7" ,
"sCycle8" , "sCycle9" , "sCycle10" , "sCycle11" , "sCycle12" , "sCycle13" , "sCycle14" , "sCycle15" ,
"sCycle16" , "sCycle17" , "sCycle18" , "sCycle19" , "sCycle20" , "sCycle21" , "sCycle22" , "sCycle23" ,
"sCycle24" , "sCycle25" , "sCycle26" , "sCycle27" , "sCycle28" , "sCycle29" , "sCycle30" , "sCycle31" ,
"cycle", "interrupt", "branch", "opmode", "tempcycles"
);
my $line;
my $cpu;
while($line = <$in>) {
if ($line =~ /Dump register data: (0x[0-9a-f]+)/) {
$cpu = hex($1);
}
if ($line =~ /ds:(0x[0-9a-f]+)/) {
my $mem = hex($1);
my $offset = $mem - $cpu;
if ($offset >= 0 && $offset < 980) {
# Inside the cpuRegisters structure
my $byte = ($offset >= 544) ? $offset % 4 : $offset % 16;
my $dw = $offset / 4;
# FIXME B doesn't work for duplicated register
my $pretty = "&$pp_name[$dw]_B$byte";
#print "AH $pretty\n";
$line =~ s/ds:0x[0-9a-f]+/$pretty/;
}
}
print $line;
}

View File

@ -197,6 +197,69 @@ void iDumpVU1Registers()
#endif
}
// This function is close of iDumpBlock but it doesn't rely too much on
// global variable. Beside it doesn't print the flag info.
//
// However you could call it anytime to dump any block. And we have both
// x86 and EE disassembly code
void iDumpBlock(u32 ee_pc, u32 ee_size, uptr x86_pc, u32 x86_size)
{
u32 ee_end = ee_pc + ee_size;
DbgCon.WriteLn( Color_Gray, "dump block %x:%x (x86:0x%x)", ee_pc, ee_end, x86_pc );
g_Conf->Folders.Logs.Mkdir();
wxString dump_filename = Path::Combine( g_Conf->Folders.Logs, wxsFormat(L"R5900dump_%.8X:%.8X.txt", ee_pc, ee_end) );
AsciiFile eff( dump_filename, L"w" );
// Print register content to detect the memory access type. Warning value are taken
// during the call of this function. There aren't the real value of the block.
eff.Printf("Dump register data: 0x%x\n", (uptr)&cpuRegs.GPR.r[0].UL[0]);
for (int reg = 0; reg < 32; reg++) {
// Only lower 32 bits (enough for address)
eff.Printf("\t%2s <= 0x%08x_%08x\n", R5900::GPR_REG[reg], cpuRegs.GPR.r[reg].UL[1],cpuRegs.GPR.r[reg].UL[0]);
}
eff.Printf("\n");
if (!symbolMap.GetLabelString(ee_pc).empty())
{
eff.Printf( "%s\n", symbolMap.GetLabelString(ee_pc).c_str() );
}
for ( u32 i = ee_pc; i < ee_end; i += 4 )
{
std::string output;
//TLB Issue disR5900Fasm( output, memRead32( i ), i, false );
disR5900Fasm( output, psMu32(i), i, false );
eff.Printf( "0x%.X : %s\n", i, output.c_str() );
}
// Didn't find (search) a better solution
eff.Printf( "\nRaw x86 dump (https://www.onlinedisassembler.com/odaweb/):\n");
u8* x86 = (u8*)x86_pc;
for (u32 i = 0; i < x86_size; i++) {
eff.Printf("%.2X", x86[i]);
}
eff.Printf("\n\n");
eff.Close(); // Close the file so it can be appended by objdump
// handy but slow solution (system call)
#ifdef __linux__
wxString obj_filename = Path::Combine(g_Conf->Folders.Logs, wxString(L"objdump_tmp.o"));
wxFFile objdump(obj_filename , L"wb");
objdump.Write(x86, x86_size);
objdump.Close();
std::system(
wxsFormat("objdump -D -b binary -mi386 --disassembler-options=intel --no-show-raw-insn --adjust-vma=%d %s >> %s",
(u32) x86_pc, WX_STR(obj_filename), WX_STR(dump_filename))
);
#endif
}
// Originally from iR5900-32.cpp
void iDumpBlock( int startpc, u8 * ptr )
{

View File

@ -19,5 +19,6 @@ extern void iDumpRegisters(u32 startpc, u32 temp);
extern void iDumpPsxRegisters(u32 startpc, u32 temp);
extern void iDumpVU0Registers();
extern void iDumpVU1Registers();
extern void iDumpBlock(u32 ee_pc, u32 ee_size, uptr x86_pc, u32 x86_size);
extern void iDumpBlock( int startpc, u8 * ptr );
extern void iIopDumpBlock( int startpc, u8 * ptr );

View File

@ -27,6 +27,7 @@
#include "System/RecTypes.h"
#include "Utilities/MemsetFast.inl"
#include "Utilities/Perf.h"
// --------------------------------------------------------------------------------------
@ -56,6 +57,9 @@ void RecompiledCodeReserve::_registerProfiler()
if (m_profiler_name.IsEmpty() || !IsOk()) return;
ProfilerRegisterSource( m_profiler_name, m_baseptr, GetReserveSizeInBytes() );
m_profiler_registered = true;
// Could potentially be integrated into ProfilerRegisterSource
Perf::any.map((uptr)m_baseptr, GetReserveSizeInBytes(), m_profiler_name.ToUTF8());
}
void RecompiledCodeReserve::_termProfiler()

372
pcsx2/x86/R5900_Profiler.h Normal file
View File

@ -0,0 +1,372 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2015 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
// Keep my nice alignment please!
#define MOVZ MOVZtemp
#define MOVN MOVNtemp
enum class eeOpcode {
// Core
special , regimm , J , JAL , BEQ , BNE , BLEZ , BGTZ ,
ADDI , ADDIU , SLTI , SLTIU , ANDI , ORI , XORI , LUI ,
cop0 , cop1 , cop2 , /*,*/ BEQL , BNEL , BLEZL , BGTZL ,
DADDI , DADDIU , LDL , LDR , mmi , /*,*/ LQ , SQ ,
LB , LH , LWL , LW , LBU , LHU , LWR , LWU ,
SB , SH , SWL , SW , SDL , SDR , SWR , CACHE ,
/*,*/ LWC1 , /*,*/ PREF , /*,*/ /*,*/ LQC2 , LD ,
/*,*/ SWC1 , /*,*/ /*,*/ /*,*/ /*,*/ SQC2 , SD ,
// Special
SLL , /*,*/ SRL , SRA , SLLV , /*,*/ SRLV , SRAV ,
JR , JALR , MOVZ , MOVN , SYSCALL , BREAK , /*,*/ SYNC ,
MFHI , MTHI , MFLO , MTLO , DSLLV , /*,*/ DSRLV , DSRAV ,
MULT , MULTU , DIV , DIVU , /*,*/ /*,*/ /*,*/ /*,*/
ADD , ADDU , SUB , SUBU , AND , OR , XOR , NOR ,
MFSA , MTSA , SLT , SLTU , DADD , DADDU , DSUB , DSUBU ,
TGE , TGEU , TLT , TLTU , TEQ , /*,*/ TNE , /*,*/
DSLL , /*,*/ DSRL , DSRA , DSLL32 , /*,*/ DSRL32 , DSRA32 ,
// Regimm
BLTZ , BGEZ , BLTZL , BGEZL , /*,*/ /*,*/ /*,*/ /*,*/
TGEI , TGEIU , TLTI , TLTIU , TEQI , /*,*/ TNEI , /*,*/
BLTZAL , BGEZAL , BLTZALL , BGEZALL , /*,*/ /*,*/ /*,*/ /*,*/
MTSAB , MTSAH , /*,*/ /*,*/ /*,*/ /*,*/ /*,*/ /*,*/
// MMI
MADD , MADDU , /*,*/ /*,*/ PLZCW , /*,*/ /*,*/ /*,*/
MMI0 , MMI2 , /*,*/ /*,*/ /*,*/ /*,*/ /*,*/ /*,*/
MFHI1 , MTHI1 , MFLO1 , MTLO1 , /*,*/ /*,*/ /*,*/ /*,*/
MULT1 , MULTU1 , DIV1 , DIVU1 , /*,*/ /*,*/ /*,*/ /*,*/
MADD1 , MADDU1 , /*,*/ /*,*/ /*,*/ /*,*/ /*,*/ /*,*/
MMI1 , MMI3 , /*,*/ /*,*/ /*,*/ /*,*/ /*,*/ /*,*/
PMFHL , PMTHL , /*,*/ /*,*/ PSLLH , /*,*/ PSRLH , PSRAH ,
/*,*/ /*,*/ /*,*/ /*,*/ PSLLW , /*,*/ PSRLW , PSRAW ,
// MMI0
PADDW , PSUBW , PCGTW , PMAXW ,
PADDH , PSUBH , PCGTH , PMAXH ,
PADDB , PSUBB , PCGTB , /*,*/
/*,*/ /*,*/ /*,*/ /*,*/
PADDSW , PSUBSW , PEXTLW , PPACW ,
PADDSH , PSUBSH , PEXTLH , PPACH ,
PADDSB , PSUBSB , PEXTLB , PPACB ,
/*,*/ /*,*/ PEXT5 , PPAC5 ,
// MMI1
/*,*/ PABSW , PCEQW , PMINW ,
PADSBH , PABSH , PCEQH , PMINH ,
/*,*/ /*,*/ PCEQB , /*,*/
/*,*/ /*,*/ /*,*/ /*,*/
PADDUW , PSUBUW , PEXTUW , /*,*/
PADDUH , PSUBUH , PEXTUH , /*,*/
PADDUB , PSUBUB , PEXTUB , QFSRV ,
/*,*/ /*,*/ /*,*/ /*,*/
// MMI2
PMADDW , /*,*/ PSLLVW , PSRLVW ,
PMSUBW , /*,*/ /*,*/ /*,*/
PMFHI , PMFLO , PINTH , /*,*/
PMULTW , PDIVW , PCPYLD , /*,*/
PMADDH , PHMADH , PAND , PXOR ,
PMSUBH , PHMSBH , /*,*/ /*,*/
/*,*/ /*,*/ PEXEH , PREVH ,
PMULTH , PDIVBW , PEXEW , PROT3W ,
// MMI3
PMADDUW , /*,*/ /*,*/ PSRAVW ,
/*,*/ /*,*/ /*,*/ /*,*/
PMTHI , PMTLO , PINTEH , /*,*/
PMULTUW , PDIVUW , PCPYUD , /*,*/
/*,*/ /*,*/ POR , PNOR ,
/*,*/ /*,*/ /*,*/ /*,*/
/*,*/ /*,*/ PEXCH , PCPYH ,
/*,*/ /*,*/ PEXCW , /*,*/
// ADD COP0/1 ??
LAST
};
#undef MOVZ
#undef MOVN
static const char eeOpcodeName[][16] = {
// "Core"
"special" , "regimm" , "J" , "JAL" , "BEQ" , "BNE" , "BLEZ" , "BGTZ" ,
"ADDI" , "ADDIU" , "SLTI" , "SLTIU" , "ANDI" , "ORI" , "XORI" , "LUI" ,
"cop0" , "cop1" , "cop2" , /* , */ "BEQL" , "BNEL" , "BLEZL" , "BGTZL" ,
"DADDI" , "DADDIU" , "LDL" , "LDR" , "mmi" , /* , */ "LQ" , "SQ" ,
"LB" , "LH" , "LWL" , "LW" , "LBU" , "LHU" , "LWR" , "LWU" ,
"SB" , "SH" , "SWL" , "SW" , "SDL" , "SDR" , "SWR" , "CACHE" ,
/* , */ "LWC1" , /* , */ "PREF" , /* , */ /* , */ "LQC2" , "LD" ,
/* , */ "SWC1" , /* , */ /* , */ /* , */ /* , */ "SQC2" , "SD" ,
// "Special"
"SLL" , /* , */ "SRL" , "SRA" , "SLLV" , /* , */ "SRLV" , "SRAV" ,
"JR" , "JALR" , "MOVZ" , "MOVN" , "SYSCALL" , "BREAK" , /* , */ "SYNC" ,
"MFHI" , "MTHI" , "MFLO" , "MTLO" , "DSLLV" , /* , */ "DSRLV" , "DSRAV" ,
"MULT" , "MULTU" , "DIV" , "DIVU" , /* , */ /* , */ /* , */ /* , */
"ADD" , "ADDU" , "SUB" , "SUBU" , "AND" , "OR" , "XOR" , "NOR" ,
"MFSA" , "MTSA" , "SLT" , "SLTU" , "DADD" , "DADDU" , "DSUB" , "DSUBU" ,
"TGE" , "TGEU" , "TLT" , "TLTU" , "TEQ" , /* , */ "TNE" , /* , */
"DSLL" , /* , */ "DSRL" , "DSRA" , "DSLL32" , /* , */ "DSRL32" , "DSRA32" ,
// "Regimm"
"BLTZ" , "BGEZ" , "BLTZL" , "BGEZL" , /* , */ /* , */ /* , */ /* , */
"TGEI" , "TGEIU" , "TLTI" , "TLTIU" , "TEQI" , /* , */ "TNEI" , /* , */
"BLTZAL" , "BGEZAL" , "BLTZALL" , "BGEZALL" , /* , */ /* , */ /* , */ /* , */
"MTSAB" , "MTSAH" , /* , */ /* , */ /* , */ /* , */ /* , */ /* , */
// "MMI"
"MADD" , "MADDU" , /* , */ /* , */ "PLZCW" , /* , */ /* , */ /* , */
"MMI0" , "MMI2" , /* , */ /* , */ /* , */ /* , */ /* , */ /* , */
"MFHI1" , "MTHI1" , "MFLO1" , "MTLO1" , /* , */ /* , */ /* , */ /* , */
"MULT1" , "MULTU1" , "DIV1" , "DIVU1" , /* , */ /* , */ /* , */ /* , */
"MADD1" , "MADDU1" , /* , */ /* , */ /* , */ /* , */ /* , */ /* , */
"MMI1" , "MMI3" , /* , */ /* , */ /* , */ /* , */ /* , */ /* , */
"PMFHL" , "PMTHL" , /* , */ /* , */ "PSLLH" , /* , */ "PSRLH" , "PSRAH" ,
/* , */ /* , */ /* , */ /* , */ "PSLLW" , /* , */ "PSRLW" , "PSRAW" ,
// "MMI0"
"PADDW" , "PSUBW" , "PCGTW" , "PMAXW" ,
"PADDH" , "PSUBH" , "PCGTH" , "PMAXH" ,
"PADDB" , "PSUBB" , "PCGTB" , /* , */
/* , */ /* , */ /* , */ /* , */
"PADDSW" , "PSUBSW" , "PEXTLW" , "PPACW" ,
"PADDSH" , "PSUBSH" , "PEXTLH" , "PPACH" ,
"PADDSB" , "PSUBSB" , "PEXTLB" , "PPACB" ,
/* , */ /* , */ "PEXT5" , "PPAC5" ,
// "MMI1"
/* , */ "PABSW" , "PCEQW" , "PMINW" ,
"PADSBH" , "PABSH" , "PCEQH" , "PMINH" ,
/* , */ /* , */ "PCEQB" , /* , */
/* , */ /* , */ /* , */ /* , */
"PADDUW" , "PSUBUW" , "PEXTUW" , /* , */
"PADDUH" , "PSUBUH" , "PEXTUH" , /* , */
"PADDUB" , "PSUBUB" , "PEXTUB" , "QFSRV" ,
/* , */ /* , */ /* , */ /* , */
// "MMI2"
"PMADDW" , /* , */ "PSLLVW" , "PSRLVW" ,
"PMSUBW" , /* , */ /* , */ /* , */
"PMFHI" , "PMFLO" , "PINTH" , /* , */
"PMULTW" , "PDIVW" , "PCPYLD" , /* , */
"PMADDH" , "PHMADH" , "PAND" , "PXOR" ,
"PMSUBH" , "PHMSBH" , /* , */ /* , */
/* , */ /* , */ "PEXEH" , "PREVH" ,
"PMULTH" , "PDIVBW" , "PEXEW" , "PROT3W" ,
// "MMI3"
"PMADDUW" , /* , */ /* , */ "PSRAVW" ,
/* , */ /* , */ /* , */ /* , */
"PMTHI" , "PMTLO" , "PINTEH" , /* , */
"PMULTUW" , "PDIVUW" , "PCPYUD" , /* , */
/* , */ /* , */ "POR" , "PNOR" ,
/* , */ /* , */ /* , */ /* , */
/* , */ /* , */ "PEXCH" , "PCPYH" ,
/* , */ /* , */ "PEXCW" , /* , */
"!"
};
//#define eeProfileProg
#ifdef eeProfileProg
#include <utility>
#include <algorithm>
using namespace x86Emitter;
struct eeProfiler {
static const u32 memSpace = 1 << 19;
u64 opStats[static_cast<int>(eeOpcode::LAST)];
u32 memStats[memSpace];
u32 memStatsConst[memSpace];
u64 memStatsSlow;
u64 memStatsFast;
u32 memMask;
void Reset() {
memzero(opStats);
memzero(memStats);
memzero(memStatsConst);
memStatsSlow = 0;
memStatsFast = 0;
memMask = 0xF700FFF0;
pxAssert(eeOpcodeName[static_cast<int>(eeOpcode::LAST)][0] == '!');
}
void EmitOp(eeOpcode opcode) {
int op = static_cast<int>(opcode);
xADD(ptr32[&(((u32*)opStats)[op*2+0])], 1);
xADC(ptr32[&(((u32*)opStats)[op*2+1])], 0);
}
double per(u64 part, u64 total) {
return (double) part / (double) total * 100.0;
}
void Print() {
// Compute opcode stat
u64 total = 0;
std::vector< std::pair<u32, u32> > v;
std::vector< std::pair<u32, u32> > vc;
for(int i = 0; i < static_cast<int>(eeOpcode::LAST); i++) {
total += opStats[i];
v.push_back(std::make_pair(opStats[i], i));
}
std::sort (v.begin(), v.end());
std::reverse(v.begin(), v.end());
DevCon.WriteLn("EE Profiler:");
for(u32 i = 0; i < v.size(); i++) {
u64 count = v[i].first;
double stat = (double)count / (double)total * 100.0;
DevCon.WriteLn("%-8s - [%3.4f%%][count=%u]",
eeOpcodeName[v[i].second], stat, (u32)count);
if (stat < 0.01)
break;
}
//DevCon.WriteLn("Total = 0x%x_%x", (u32)(u64)(total>>32),(u32)total);
// Compute memory stat
total = 0;
u64 reg = 0;
u64 gs = 0;
u64 vu = 0;
// FIXME: MAYBE count the scratch pad
for (size_t i = 0; i < memSpace ; i++)
total += memStats[i];
int ou = 32 * _1kb; // user segment (0x10000000)
int ok = 352 * _1kb; // kernel segment (0xB0000000)
for (int i = 0; i < 4 * _1kb; i++) reg += memStats[ou + 0 * _1kb + i] + memStats[ok + 0 * _1kb + i];
for (int i = 0; i < 4 * _1kb; i++) gs += memStats[ou + 4 * _1kb + i] + memStats[ok + 4 * _1kb + i];
for (int i = 0; i < 4 * _1kb; i++) vu += memStats[ou + 8 * _1kb + i] + memStats[ok + 8 * _1kb + i];
u64 ram = total - reg - gs - vu;
double ram_p = per(ram, total);
double reg_p = per(reg, total);
double gs_p = per(gs , total);
double vu_p = per(vu , total);
// Compute const memory stat
u64 total_const = 0;
u64 reg_const = 0;
for (size_t i = 0; i < memSpace ; i++)
total_const += memStatsConst[i];
for (int i = 0; i < 4 * _1kb; i++) reg_const += memStatsConst[ou + i] + memStatsConst[ok + i];
u64 ram_const = total_const - reg_const; // value is slightly wrong but good enough
double ram_const_p = per(ram_const, ram);
double reg_const_p = per(reg_const, reg);
DevCon.WriteLn("\nEE Memory Profiler:");
DevCon.WriteLn("Total = 0x%08x_%08x", (u32)(u64)(total>>32),(u32)total);
DevCon.WriteLn(" RAM = 0x%08x_%08x [%3.4f%%] Const[%3.4f%%]", (u32)(u64)(ram>>32),(u32)ram, ram_p, ram_const_p);
DevCon.WriteLn(" REG = 0x%08x_%08x [%3.4f%%] Const[%3.4f%%]", (u32)(u64)(reg>>32),(u32)reg, reg_p, reg_const_p);
DevCon.WriteLn(" GS = 0x%08x_%08x [%3.4f%%]", (u32)(u64)( gs>>32),(u32) gs, gs_p);
DevCon.WriteLn(" VU = 0x%08x_%08x [%3.4f%%]", (u32)(u64) (vu>>32),(u32) vu, vu_p);
u64 total_ram = memStatsSlow + memStatsFast;
DevCon.WriteLn("\n RAM Fast [%3.4f%%] RAM Slow [%3.4f%%]. Total 0x%08x_%08x [%3.4f%%]",
per(memStatsFast, total_ram), per(memStatsSlow, total_ram), (u32)(u64)(total_ram>>32),(u32)total_ram, per(total_ram, total));
v.clear();
vc.clear();
for (int i = 0; i < 4 * _1kb; i++) {
u32 reg_c = memStatsConst[ou + i] + memStatsConst[ok + i];
u32 reg = memStats[ok + i] + memStats[ou + i] - reg_c;
if (reg)
v.push_back(std::make_pair(reg, i * 16));
if (reg_c)
vc.push_back(std::make_pair(reg_c, i * 16));
}
std::sort (v.begin(), v.end());
std::reverse(v.begin(), v.end());
std::sort (vc.begin(), vc.end());
std::reverse(vc.begin(), vc.end());
DevCon.WriteLn("\nEE Reg Profiler:");
for(u32 i = 0; i < v.size(); i++) {
u64 count = v[i].first;
double stat = (double)count / (double)(reg - reg_const) * 100.0;
DevCon.WriteLn("%04x - [%3.4f%%][count=%u]",
v[i].second, stat, (u32)count);
if (stat < 0.01)
break;
}
DevCon.WriteLn("\nEE Const Reg Profiler:");
for(u32 i = 0; i < vc.size(); i++) {
u64 count = vc[i].first;
double stat = (double)count / (double)reg_const * 100.0;
DevCon.WriteLn("%04x - [%3.4f%%][count=%u]",
vc[i].second, stat, (u32)count);
if (stat < 0.01)
break;
}
}
// Warning dirty ebx
void EmitMem() {
// Compact the 4GB virtual address to a 512KB virtual address
if (x86caps.hasBMI2) {
xPEXT(ebx, ecx, ptr[&memMask]);
xADD(ptr32[(ebx*4) + memStats], 1);
}
}
void EmitConstMem(u32 add) {
if (x86caps.hasBMI2) {
u32 a = _pext_u32(add, memMask);
xADD(ptr32[a + memStats], 1);
xADD(ptr32[a + memStatsConst], 1);
}
}
void EmitSlowMem() {
xADD(ptr32[(u32*)&memStatsSlow], 1);
xADC(ptr32[(u32*)&memStatsSlow + 1], 0);
}
void EmitFastMem() {
xADD(ptr32[(u32*)&memStatsFast], 1);
xADC(ptr32[(u32*)&memStatsFast + 1], 0);
}
};
#else
struct eeProfiler {
__fi void Reset() {}
__fi void EmitOp(eeOpcode op) {}
__fi void Print() {}
__fi void EmitMem() {}
__fi void EmitConstMem(u32 add) {}
__fi void EmitSlowMem() {}
__fi void EmitFastMem() {}
};
#endif
namespace EE {
extern eeProfiler Profiler;
}

View File

@ -62,6 +62,8 @@ void recPLZCW()
if ( ! _Rd_ ) return;
EE::Profiler.EmitOp(eeOpcode::PLZCW);
if( GPR_IS_CONST1(_Rs_) ) {
_eeOnWriteReg(_Rd_, 0);
_deleteEEreg(_Rd_, 0);
@ -154,6 +156,8 @@ void recPMFHL()
{
if ( ! _Rd_ ) return;
EE::Profiler.EmitOp(eeOpcode::PMFHL);
int info = eeRecompileCodeXMM( XMMINFO_WRITED|XMMINFO_READLO|XMMINFO_READHI );
int t0reg;
@ -221,6 +225,8 @@ void recPMTHL()
{
if ( _Sa_ != 0 ) return;
EE::Profiler.EmitOp(eeOpcode::PMTHL);
int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READLO|XMMINFO_READHI|XMMINFO_WRITELO|XMMINFO_WRITEHI );
if ( x86caps.hasStreamingSIMD4Extensions ) {
@ -284,6 +290,8 @@ void recPSRLH()
{
if ( !_Rd_ ) return;
EE::Profiler.EmitOp(eeOpcode::PSRLH);
int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED );
if( (_Sa_&0xf) == 0 ) {
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
@ -300,6 +308,8 @@ void recPSRLW()
{
if( !_Rd_ ) return;
EE::Profiler.EmitOp(eeOpcode::PSRLW);
int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED );
if( _Sa_ == 0 ) {
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
@ -316,6 +326,8 @@ void recPSRAH()
{
if ( !_Rd_ ) return;
EE::Profiler.EmitOp(eeOpcode::PSRAH);
int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED );
if( (_Sa_&0xf) == 0 ) {
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
@ -332,6 +344,8 @@ void recPSRAW()
{
if ( !_Rd_ ) return;
EE::Profiler.EmitOp(eeOpcode::PSRAW);
int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED );
if( _Sa_ == 0 ) {
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
@ -348,6 +362,8 @@ void recPSLLH()
{
if ( !_Rd_ ) return;
EE::Profiler.EmitOp(eeOpcode::PSLLH);
int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED );
if( (_Sa_&0xf) == 0 ) {
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
@ -364,6 +380,8 @@ void recPSLLW()
{
if ( !_Rd_ ) return;
EE::Profiler.EmitOp(eeOpcode::PSLLW);
int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED );
if( _Sa_ == 0 ) {
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
@ -434,6 +452,8 @@ void recPMAXW()
{
if ( ! _Rd_ ) return;
EE::Profiler.EmitOp(eeOpcode::PMAXW);
int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED );
if ( x86caps.hasStreamingSIMD4Extensions ) {
if( EEREC_S == EEREC_T ) xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
@ -485,6 +505,8 @@ void recPPACW()
{
if ( ! _Rd_ ) return;
EE::Profiler.EmitOp(eeOpcode::PPACW);
int info = eeRecompileCodeXMM( ((_Rs_!=0)?XMMINFO_READS:0)|XMMINFO_READT|XMMINFO_WRITED );
if( _Rs_ == 0 ) {
@ -517,6 +539,8 @@ void recPPACH()
{
if (!_Rd_) return;
EE::Profiler.EmitOp(eeOpcode::PPACH);
int info = eeRecompileCodeXMM( (_Rs_!=0?XMMINFO_READS:0)|XMMINFO_READT|XMMINFO_WRITED );
if( _Rs_ == 0 ) {
xPSHUF.LW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0x88);
@ -545,6 +569,8 @@ void recPPACB()
{
if ( ! _Rd_ ) return;
EE::Profiler.EmitOp(eeOpcode::PPACB);
int info = eeRecompileCodeXMM( (_Rs_!=0?XMMINFO_READS:0)|XMMINFO_READT|XMMINFO_WRITED );
if( _Rs_ == 0 ) {
if( _hasFreeXMMreg() ) {
@ -585,6 +611,8 @@ void recPEXT5()
{
if ( ! _Rd_ ) return;
EE::Profiler.EmitOp(eeOpcode::PEXT5);
int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED );
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
int t1reg = _allocTempXMMreg(XMMT_INT, -1);
@ -621,6 +649,8 @@ void recPPAC5()
{
if ( ! _Rd_ ) return;
EE::Profiler.EmitOp(eeOpcode::PPAC5);
int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED );
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
int t1reg = _allocTempXMMreg(XMMT_INT, -1);
@ -659,6 +689,8 @@ void recPMAXH()
{
if ( ! _Rd_ ) return;
EE::Profiler.EmitOp(eeOpcode::PMAXH);
int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED );
if( EEREC_D == EEREC_S ) xPMAX.SW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
else if( EEREC_D == EEREC_T ) xPMAX.SW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
@ -674,6 +706,8 @@ void recPCGTB()
{
if ( ! _Rd_ ) return;
EE::Profiler.EmitOp(eeOpcode::PCGTB);
int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED );
if( EEREC_D != EEREC_T ) {
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
@ -694,6 +728,8 @@ void recPCGTH()
{
if ( ! _Rd_ ) return;
EE::Profiler.EmitOp(eeOpcode::PCGTH);
int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED );
if( EEREC_D != EEREC_T ) {
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
@ -715,6 +751,8 @@ void recPCGTW()
//TODO:optimize RS | RT== 0
if ( ! _Rd_ ) return;
EE::Profiler.EmitOp(eeOpcode::PCGTW);
int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED );
if( EEREC_D != EEREC_T ) {
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
@ -735,6 +773,8 @@ void recPADDSB()
{
if ( ! _Rd_ ) return;
EE::Profiler.EmitOp(eeOpcode::PADDSB);
int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED );
if( EEREC_D == EEREC_S ) xPADD.SB(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
else if( EEREC_D == EEREC_T ) xPADD.SB(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
@ -750,6 +790,8 @@ void recPADDSH()
{
if ( ! _Rd_ ) return;
EE::Profiler.EmitOp(eeOpcode::PADDSH);
int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED );
if( EEREC_D == EEREC_S ) xPADD.SW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
else if( EEREC_D == EEREC_T ) xPADD.SW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
@ -766,6 +808,8 @@ void recPADDSW()
{
if ( ! _Rd_ ) return;
EE::Profiler.EmitOp(eeOpcode::PADDSW);
int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED );
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
int t1reg = _allocTempXMMreg(XMMT_INT, -1);
@ -816,6 +860,8 @@ void recPSUBSB()
{
if ( ! _Rd_ ) return;
EE::Profiler.EmitOp(eeOpcode::PSUBSB);
int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED );
if( EEREC_D == EEREC_S ) xPSUB.SB(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
else if( EEREC_D == EEREC_T ) {
@ -837,6 +883,8 @@ void recPSUBSH()
{
if ( ! _Rd_ ) return;
EE::Profiler.EmitOp(eeOpcode::PSUBSH);
int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED );
if( EEREC_D == EEREC_S ) xPSUB.SW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
else if( EEREC_D == EEREC_T ) {
@ -859,6 +907,8 @@ void recPSUBSW()
{
if ( ! _Rd_ ) return;
EE::Profiler.EmitOp(eeOpcode::PSUBSW);
int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED );
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
int t1reg = _allocTempXMMreg(XMMT_INT, -1);
@ -914,6 +964,8 @@ void recPADDB()
{
if ( ! _Rd_ ) return;
EE::Profiler.EmitOp(eeOpcode::PADDB);
int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED );
if( EEREC_D == EEREC_S ) xPADD.B(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
else if( EEREC_D == EEREC_T ) xPADD.B(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
@ -929,6 +981,8 @@ void recPADDH()
{
if ( ! _Rd_ ) return;
EE::Profiler.EmitOp(eeOpcode::PADDH);
int info = eeRecompileCodeXMM( (_Rs_!=0?XMMINFO_READS:0)|(_Rt_!=0?XMMINFO_READT:0)|XMMINFO_WRITED );
if( _Rs_ == 0 ) {
if( _Rt_ == 0 ) xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D));
@ -953,6 +1007,8 @@ void recPADDW()
{
if ( ! _Rd_ ) return;
EE::Profiler.EmitOp(eeOpcode::PADDW);
int info = eeRecompileCodeXMM( (_Rs_!=0?XMMINFO_READS:0)|(_Rt_!=0?XMMINFO_READT:0)|XMMINFO_WRITED );
if( _Rs_ == 0 ) {
if( _Rt_ == 0 ) xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D));
@ -977,6 +1033,8 @@ void recPSUBB()
{
if ( ! _Rd_ ) return;
EE::Profiler.EmitOp(eeOpcode::PSUBB);
int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED );
if( EEREC_D == EEREC_S ) xPSUB.B(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
else if( EEREC_D == EEREC_T ) {
@ -998,6 +1056,8 @@ void recPSUBH()
{
if ( ! _Rd_ ) return;
EE::Profiler.EmitOp(eeOpcode::PSUBH);
int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED );
if( EEREC_D == EEREC_S ) xPSUB.W(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
else if( EEREC_D == EEREC_T ) {
@ -1019,6 +1079,8 @@ void recPSUBW()
{
if ( ! _Rd_ ) return;
EE::Profiler.EmitOp(eeOpcode::PSUBW);
int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED );
if( EEREC_D == EEREC_S ) xPSUB.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
else if( EEREC_D == EEREC_T ) {
@ -1040,6 +1102,8 @@ void recPEXTLW()
{
if ( ! _Rd_ ) return;
EE::Profiler.EmitOp(eeOpcode::PEXTLW);
int info = eeRecompileCodeXMM( (_Rs_!=0?XMMINFO_READS:0)|XMMINFO_READT|XMMINFO_WRITED );
if( _Rs_ == 0 ) {
xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
@ -1066,6 +1130,8 @@ void recPEXTLB()
{
if (!_Rd_) return;
EE::Profiler.EmitOp(eeOpcode::PEXTLB);
int info = eeRecompileCodeXMM( (_Rs_!=0?XMMINFO_READS:0)|XMMINFO_READT|XMMINFO_WRITED );
if( _Rs_ == 0 ) {
xPUNPCK.LBW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
@ -1092,6 +1158,8 @@ void recPEXTLH()
{
if (!_Rd_) return;
EE::Profiler.EmitOp(eeOpcode::PEXTLH);
int info = eeRecompileCodeXMM( (_Rs_!=0?XMMINFO_READS:0)|XMMINFO_READT|XMMINFO_WRITED );
if( _Rs_ == 0 ) {
xPUNPCK.LWD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
@ -1153,6 +1221,8 @@ void recPABSW() //needs clamping
{
if( !_Rd_ ) return;
EE::Profiler.EmitOp(eeOpcode::PABSW);
int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED );
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
xPCMP.EQD(xRegisterSSE(t0reg), xRegisterSSE(t0reg));
@ -1181,6 +1251,8 @@ void recPABSH()
{
if( !_Rd_ ) return;
EE::Profiler.EmitOp(eeOpcode::PABSH);
int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED );
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
xPCMP.EQW(xRegisterSSE(t0reg), xRegisterSSE(t0reg));
@ -1208,6 +1280,8 @@ void recPMINW()
{
if ( ! _Rd_ ) return;
EE::Profiler.EmitOp(eeOpcode::PMINW);
int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED );
if ( x86caps.hasStreamingSIMD4Extensions ) {
if( EEREC_S == EEREC_T ) xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
@ -1259,6 +1333,8 @@ void recPADSBH()
{
if ( ! _Rd_ ) return;
EE::Profiler.EmitOp(eeOpcode::PADSBH);
int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED );
int t0reg;
@ -1298,6 +1374,8 @@ void recPADDUW()
{
if ( ! _Rd_ ) return;
EE::Profiler.EmitOp(eeOpcode::PADDUW);
int info = eeRecompileCodeXMM( (_Rs_?XMMINFO_READS:0)|(_Rt_?XMMINFO_READT:0)|XMMINFO_WRITED );
if( _Rt_ == 0 ) {
@ -1344,6 +1422,8 @@ void recPSUBUB()
{
if ( ! _Rd_ ) return;
EE::Profiler.EmitOp(eeOpcode::PSUBUB);
int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED );
if( EEREC_D == EEREC_S ) xPSUB.USB(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
else if( EEREC_D == EEREC_T ) {
@ -1365,6 +1445,8 @@ void recPSUBUH()
{
if ( ! _Rd_ ) return;
EE::Profiler.EmitOp(eeOpcode::PSUBUH);
int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED );
if( EEREC_D == EEREC_S ) xPSUB.USW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
else if( EEREC_D == EEREC_T ) {
@ -1386,6 +1468,8 @@ void recPSUBUW()
{
if ( ! _Rd_ ) return;
EE::Profiler.EmitOp(eeOpcode::PSUBUW);
int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED );
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
int t1reg = _allocTempXMMreg(XMMT_INT, -1);
@ -1432,6 +1516,8 @@ void recPEXTUH()
{
if ( ! _Rd_ ) return;
EE::Profiler.EmitOp(eeOpcode::PEXTUH);
int info = eeRecompileCodeXMM( (_Rs_!=0?XMMINFO_READS:0)|XMMINFO_READT|XMMINFO_WRITED );
if( _Rs_ == 0 ) {
xPUNPCK.HWD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
@ -1461,6 +1547,8 @@ void recQFSRV()
if ( !_Rd_ ) return;
//Console.WriteLn("recQFSRV()");
EE::Profiler.EmitOp(eeOpcode::QFSRV);
if (_Rs_ == _Rt_ + 1) {
_flushEEreg(_Rs_);
_flushEEreg(_Rt_);
@ -1486,6 +1574,8 @@ void recPEXTUB()
{
if (!_Rd_) return;
EE::Profiler.EmitOp(eeOpcode::PEXTUB);
int info = eeRecompileCodeXMM( (_Rs_!=0?XMMINFO_READS:0)|XMMINFO_READT|XMMINFO_WRITED );
if( _Rs_ == 0 ) {
@ -1514,6 +1604,8 @@ void recPEXTUW()
{
if ( ! _Rd_ ) return;
EE::Profiler.EmitOp(eeOpcode::PEXTUW);
int info = eeRecompileCodeXMM( (_Rs_!=0?XMMINFO_READS:0)|XMMINFO_READT|XMMINFO_WRITED );
if( _Rs_ == 0 ) {
xPUNPCK.HDQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
@ -1541,6 +1633,8 @@ void recPMINH()
{
if ( ! _Rd_ ) return;
EE::Profiler.EmitOp(eeOpcode::PMINH);
int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED );
if( EEREC_D == EEREC_S ) xPMIN.SW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
else if( EEREC_D == EEREC_T ) xPMIN.SW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
@ -1556,6 +1650,8 @@ void recPCEQB()
{
if ( ! _Rd_ ) return;
EE::Profiler.EmitOp(eeOpcode::PCEQB);
int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED );
if( EEREC_D == EEREC_S ) xPCMP.EQB(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
else if( EEREC_D == EEREC_T ) xPCMP.EQB(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
@ -1571,6 +1667,8 @@ void recPCEQH()
{
if ( ! _Rd_ ) return;
EE::Profiler.EmitOp(eeOpcode::PCEQH);
int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED );
if( EEREC_D == EEREC_S ) xPCMP.EQW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
else if( EEREC_D == EEREC_T ) xPCMP.EQW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
@ -1586,6 +1684,8 @@ void recPCEQW()
{
if ( ! _Rd_ ) return;
EE::Profiler.EmitOp(eeOpcode::PCEQW);
int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED );
if( EEREC_D == EEREC_S ) xPCMP.EQD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
else if( EEREC_D == EEREC_T ) xPCMP.EQD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
@ -1601,6 +1701,8 @@ void recPADDUB()
{
if ( ! _Rd_ ) return;
EE::Profiler.EmitOp(eeOpcode::PADDUB);
int info = eeRecompileCodeXMM( XMMINFO_READS|(_Rt_?XMMINFO_READT:0)|XMMINFO_WRITED );
if( _Rt_ ) {
if( EEREC_D == EEREC_S ) xPADD.USB(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
@ -1619,6 +1721,8 @@ void recPADDUH()
{
if ( ! _Rd_ ) return;
EE::Profiler.EmitOp(eeOpcode::PADDUH);
int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED );
if( EEREC_D == EEREC_S ) xPADD.USW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
else if( EEREC_D == EEREC_T ) xPADD.USW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
@ -1665,6 +1769,8 @@ REC_FUNC_DEL( PROT3W, _Rd_ );
////////////////////////////////////////////////////
void recPMADDW()
{
EE::Profiler.EmitOp(eeOpcode::PMADDW);
if( !x86caps.hasStreamingSIMD4Extensions ) {
_deleteEEreg(_Rd_, 0);
recCall(Interp::PMADDW);
@ -1714,6 +1820,8 @@ void recPSLLVW()
{
if ( ! _Rd_ ) return;
EE::Profiler.EmitOp(eeOpcode::PSLLVW);
int info = eeRecompileCodeXMM( (_Rs_?XMMINFO_READS:0)|(_Rt_?XMMINFO_READT:0)|XMMINFO_WRITED );
if( _Rs_ == 0 ) {
if( _Rt_ == 0 ) {
@ -1780,6 +1888,8 @@ void recPSRLVW()
{
if ( ! _Rd_ ) return;
EE::Profiler.EmitOp(eeOpcode::PSRLVW);
int info = eeRecompileCodeXMM( (_Rs_?XMMINFO_READS:0)|(_Rt_?XMMINFO_READT:0)|XMMINFO_WRITED );
if( _Rs_ == 0 ) {
if( _Rt_ == 0 ) {
@ -1844,6 +1954,8 @@ void recPSRLVW()
////////////////////////////////////////////////////
void recPMSUBW()
{
EE::Profiler.EmitOp(eeOpcode::PMSUBW);
if( !x86caps.hasStreamingSIMD4Extensions ) {
_deleteEEreg(_Rd_, 0);
recCall(Interp::PMSUBW);
@ -1896,6 +2008,8 @@ void recPMSUBW()
////////////////////////////////////////////////////
void recPMULTW()
{
EE::Profiler.EmitOp(eeOpcode::PMULTW);
if( !x86caps.hasStreamingSIMD4Extensions ) {
_deleteEEreg(_Rd_, 0);
recCall(Interp::PMULTW);
@ -1938,6 +2052,8 @@ void recPMULTW()
////////////////////////////////////////////////////
void recPDIVW()
{
EE::Profiler.EmitOp(eeOpcode::PDIVW);
_deleteEEreg(_Rd_, 0);
recCall(Interp::PDIVW);
}
@ -1945,6 +2061,8 @@ void recPDIVW()
////////////////////////////////////////////////////
void recPDIVBW()
{
EE::Profiler.EmitOp(eeOpcode::PDIVBW);
_deleteEEreg(_Rd_, 0);
recCall(Interp::PDIVBW); //--
}
@ -1955,6 +2073,8 @@ void recPDIVBW()
//contains the upper multiplication result (before the addition with the lower multiplication result)
void recPHMADH()
{
EE::Profiler.EmitOp(eeOpcode::PHMADH);
int info = eeRecompileCodeXMM( (_Rd_?XMMINFO_WRITED:0)|XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITELO|XMMINFO_WRITEHI );
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
@ -1995,6 +2115,8 @@ void recPHMADH()
void recPMSUBH()
{
EE::Profiler.EmitOp(eeOpcode::PMSUBH);
int info = eeRecompileCodeXMM( (_Rd_?XMMINFO_WRITED:0)|XMMINFO_READS|XMMINFO_READT|XMMINFO_READLO|XMMINFO_READHI|XMMINFO_WRITELO|XMMINFO_WRITEHI );
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
int t1reg = _allocTempXMMreg(XMMT_INT, -1);
@ -2057,6 +2179,8 @@ void recPMSUBH()
//it contains the NOT of the upper multiplication result (before the substraction of the lower multiplication result)
void recPHMSBH()
{
EE::Profiler.EmitOp(eeOpcode::PHMSBH);
int info = eeRecompileCodeXMM( (_Rd_?XMMINFO_WRITED:0)|XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITELO|XMMINFO_WRITEHI );
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
@ -2092,6 +2216,8 @@ void recPEXEH()
{
if (!_Rd_) return;
EE::Profiler.EmitOp(eeOpcode::PEXEH);
int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED );
xPSHUF.LW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0xc6);
xPSHUF.HW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D), 0xc6);
@ -2103,6 +2229,7 @@ void recPREVH()
{
if (!_Rd_) return;
EE::Profiler.EmitOp(eeOpcode::PREVH);
int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED );
xPSHUF.LW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0x1B);
@ -2115,6 +2242,8 @@ void recPINTH()
{
if (!_Rd_) return;
EE::Profiler.EmitOp(eeOpcode::PINTH);
int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED );
if( EEREC_D == EEREC_S ) {
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
@ -2134,6 +2263,8 @@ void recPEXEW()
{
if (!_Rd_) return;
EE::Profiler.EmitOp(eeOpcode::PEXEW);
int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED );
xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0xc6);
_clearNeededXMMregs();
@ -2143,6 +2274,8 @@ void recPROT3W()
{
if (!_Rd_) return;
EE::Profiler.EmitOp(eeOpcode::PROT3W);
int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED );
xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0xc9);
_clearNeededXMMregs();
@ -2150,6 +2283,8 @@ void recPROT3W()
void recPMULTH()
{
EE::Profiler.EmitOp(eeOpcode::PMULTH);
int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|(_Rd_?XMMINFO_WRITED:0)|XMMINFO_WRITELO|XMMINFO_WRITEHI );
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
@ -2187,6 +2322,8 @@ void recPMFHI()
{
if ( ! _Rd_ ) return;
EE::Profiler.EmitOp(eeOpcode::PMFHI);
int info = eeRecompileCodeXMM( XMMINFO_WRITED|XMMINFO_READHI );
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_HI));
_clearNeededXMMregs();
@ -2197,6 +2334,8 @@ void recPMFLO()
{
if ( ! _Rd_ ) return;
EE::Profiler.EmitOp(eeOpcode::PMFLO);
int info = eeRecompileCodeXMM( XMMINFO_WRITED|XMMINFO_READLO );
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_LO));
_clearNeededXMMregs();
@ -2207,6 +2346,8 @@ void recPAND()
{
if ( ! _Rd_ ) return;
EE::Profiler.EmitOp(eeOpcode::PAND);
int info = eeRecompileCodeXMM( XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT );
if( EEREC_D == EEREC_T ) {
xPAND(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
@ -2226,6 +2367,8 @@ void recPXOR()
{
if ( ! _Rd_ ) return;
EE::Profiler.EmitOp(eeOpcode::PXOR);
int info = eeRecompileCodeXMM( XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT );
if( EEREC_D == EEREC_T ) {
xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
@ -2245,6 +2388,8 @@ void recPCPYLD()
{
if ( ! _Rd_ ) return;
EE::Profiler.EmitOp(eeOpcode::PCPYLD);
int info = eeRecompileCodeXMM( XMMINFO_WRITED|(( _Rs_== 0) ? 0:XMMINFO_READS)|XMMINFO_READT );
if( _Rs_ == 0 ) {
xMOVQZX(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
@ -2266,6 +2411,8 @@ void recPCPYLD()
void recPMADDH()
{
EE::Profiler.EmitOp(eeOpcode::PMADDH);
int info = eeRecompileCodeXMM( (_Rd_?XMMINFO_WRITED:0)|XMMINFO_READS|XMMINFO_READT|XMMINFO_READLO|XMMINFO_READHI|XMMINFO_WRITELO|XMMINFO_WRITEHI );
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
int t1reg = _allocTempXMMreg(XMMT_INT, -1);
@ -2353,6 +2500,8 @@ void recPSRAVW()
{
if ( ! _Rd_ ) return;
EE::Profiler.EmitOp(eeOpcode::PSRAVW);
int info = eeRecompileCodeXMM( (_Rs_?XMMINFO_READS:0)|(_Rt_?XMMINFO_READT:0)|XMMINFO_WRITED );
if( _Rs_ == 0 ) {
if( _Rt_ == 0 ) {
@ -2423,6 +2572,8 @@ void recPINTEH()
{
if ( ! _Rd_ ) return;
EE::Profiler.EmitOp(eeOpcode::PINTEH);
int info = eeRecompileCodeXMM( (_Rs_?XMMINFO_READS:0)|(_Rt_?XMMINFO_READT:0)|XMMINFO_WRITED );
int t0reg = -1;
@ -2472,6 +2623,8 @@ void recPINTEH()
////////////////////////////////////////////////////
void recPMULTUW()
{
EE::Profiler.EmitOp(eeOpcode::PMULTUW);
int info = eeRecompileCodeXMM( (((_Rs_)&&(_Rt_))?XMMINFO_READS:0)|(((_Rs_)&&(_Rt_))?XMMINFO_READT:0)|(_Rd_?XMMINFO_WRITED:0)|XMMINFO_WRITELO|XMMINFO_WRITEHI );
if( !_Rs_ || !_Rt_ ) {
if( _Rd_ ) xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D));
@ -2518,6 +2671,8 @@ void recPMULTUW()
////////////////////////////////////////////////////
void recPMADDUW()
{
EE::Profiler.EmitOp(eeOpcode::PMADDUW);
int info = eeRecompileCodeXMM( (((_Rs_)&&(_Rt_))?XMMINFO_READS:0)|(((_Rs_)&&(_Rt_))?XMMINFO_READT:0)|(_Rd_?XMMINFO_WRITED:0)|XMMINFO_WRITELO|XMMINFO_WRITEHI|XMMINFO_READLO|XMMINFO_READHI );
xSHUF.PS(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_HI), 0x88);
xPSHUF.D(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_LO), 0xd8); // LO = {LO[0], HI[0], LO[2], HI[2]}
@ -2569,6 +2724,8 @@ void recPMADDUW()
////////////////////////////////////////////////////
void recPDIVUW()
{
EE::Profiler.EmitOp(eeOpcode::PDIVUW);
_deleteEEreg(_Rd_, 0);
recCall(Interp::PDIVUW);
}
@ -2576,6 +2733,8 @@ void recPDIVUW()
////////////////////////////////////////////////////
void recPEXCW()
{
EE::Profiler.EmitOp(eeOpcode::PEXCW);
if (!_Rd_) return;
int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED );
@ -2586,6 +2745,8 @@ void recPEXCW()
////////////////////////////////////////////////////
void recPEXCH()
{
EE::Profiler.EmitOp(eeOpcode::PEXCH);
if (!_Rd_) return;
int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED );
@ -2599,6 +2760,8 @@ void recPNOR()
{
if ( ! _Rd_ ) return;
EE::Profiler.EmitOp(eeOpcode::PNOR);
int info = eeRecompileCodeXMM( (_Rs_!=0?XMMINFO_READS:0)|(_Rt_!=0?XMMINFO_READT:0)|XMMINFO_WRITED );
if( _Rs_ == 0 ) {
@ -2650,6 +2813,8 @@ void recPNOR()
////////////////////////////////////////////////////
void recPMTHI()
{
EE::Profiler.EmitOp(eeOpcode::PMTHI);
int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_WRITEHI );
xMOVDQA(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_S));
_clearNeededXMMregs();
@ -2658,6 +2823,8 @@ void recPMTHI()
////////////////////////////////////////////////////
void recPMTLO()
{
EE::Profiler.EmitOp(eeOpcode::PMTLO);
int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_WRITELO );
xMOVDQA(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_S));
_clearNeededXMMregs();
@ -2668,6 +2835,8 @@ void recPCPYUD()
{
if ( ! _Rd_ ) return;
EE::Profiler.EmitOp(eeOpcode::PCPYUD);
int info = eeRecompileCodeXMM( XMMINFO_READS|(( _Rt_ == 0) ? 0:XMMINFO_READT)|XMMINFO_WRITED );
if( _Rt_ == 0 ) {
@ -2705,6 +2874,8 @@ void recPOR()
{
if ( ! _Rd_ ) return;
EE::Profiler.EmitOp(eeOpcode::POR);
int info = eeRecompileCodeXMM( (_Rs_!=0?XMMINFO_READS:0)|(_Rt_!=0?XMMINFO_READT:0)|XMMINFO_WRITED );
if( _Rs_ == 0 ) {
@ -2738,6 +2909,8 @@ void recPCPYH()
{
if ( ! _Rd_ ) return;
EE::Profiler.EmitOp(eeOpcode::PCPYH);
int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED );
xPSHUF.LW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0);
xPSHUF.HW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D), 0);

View File

@ -36,6 +36,7 @@
#include "NakedAsm.h"
#include "AppConfig.h"
#include "Utilities/Perf.h"
using namespace x86Emitter;
@ -361,6 +362,8 @@ static void _DynGen_Dispatchers()
HostSys::MemProtectStatic( iopRecDispatchers, PageAccess_ExecOnly() );
recBlocks.SetJITCompile( iopJITCompile );
Perf::any.map((uptr)&iopRecDispatchers, 4096, "IOP Dispatcher");
}
////////////////////////////////////////////////////
@ -812,6 +815,8 @@ void recResetIOP()
{
DevCon.WriteLn( "iR3000A Recompiler reset." );
Perf::iop.reset();
recAlloc();
recMem->Reset();
@ -868,6 +873,9 @@ static void recShutdown()
safe_free( s_pInstCache );
s_nInstCacheSize = 0;
// FIXME Warning thread unsafe
Perf::dump();
}
static void iopClearRecLUT(BASEBLOCK* base, int count)
@ -1411,6 +1419,8 @@ StartRecomp:
pxAssert(xGetPtr() - recPtr < _64kb);
s_pCurBlockEx->x86size = xGetPtr() - recPtr;
Perf::iop.map(s_pCurBlockEx->fnptr, s_pCurBlockEx->x86size, s_pCurBlockEx->startpc);
recPtr = xGetPtr();
pxAssert( (g_psxHasConstReg&g_psxFlushedConstReg) == g_psxHasConstReg );

View File

@ -21,6 +21,7 @@
#include "R5900.h"
#include "VU.h"
#include "iCore.h"
#include "R5900_Profiler.h"
extern u32 maxrecmem;
extern u32 pc; // recompiler pc (also used by the SuperVU! .. why? (air))
@ -133,12 +134,14 @@ typedef void (*R5900FNPTR_INFO)(int info);
#define EERECOMPILE_CODE0(fn, xmminfo) \
void rec##fn(void) \
{ \
EE::Profiler.EmitOp(eeOpcode::fn); \
eeRecompileCode0(rec##fn##_const, rec##fn##_consts, rec##fn##_constt, rec##fn##_, xmminfo); \
}
#define EERECOMPILE_CODEX(codename, fn) \
void rec##fn(void) \
{ \
EE::Profiler.EmitOp(eeOpcode::fn); \
codename(rec##fn##_const, rec##fn##_); \
}

View File

@ -41,6 +41,7 @@
#include "Utilities/MemsetFast.inl"
#include "Utilities/Perf.h"
using namespace x86Emitter;
@ -63,6 +64,7 @@ __aligned16 GPR_reg64 g_cpuConstRegs[32] = {0};
u32 g_cpuHasConstReg = 0, g_cpuFlushedConstReg = 0;
bool g_cpuFlushedPC, g_cpuFlushedCode, g_recompilingDelaySlot, g_maySignalException;
eeProfiler EE::Profiler;
////////////////////////////////////////////////////////////////
// Static Private Variables - R5900 Dynarec
@ -573,6 +575,8 @@ static void _DynGen_Dispatchers()
HostSys::MemProtectStatic( eeRecDispatchers, PageAccess_ExecOnly() );
recBlocks.SetJITCompile( JITCompile );
Perf::any.map((uptr)&eeRecDispatchers, 4096, "EE Dispatcher");
}
@ -698,6 +702,10 @@ static bool eeCpuExecuting = false;
////////////////////////////////////////////////////
static void recResetRaw()
{
Perf::ee.reset();
EE::Profiler.Reset();
recAlloc();
if( AtomicExchange( eeRecIsReset, true ) ) return;
@ -741,6 +749,9 @@ static void recShutdown()
safe_aligned_free( recConstBuf );
safe_free( s_pInstCache );
s_nInstCacheSize = 0;
// FIXME Warning thread unsafe
Perf::dump();
}
static void recResetEE()
@ -837,12 +848,19 @@ static void recExecute()
if(m_cpuException) m_cpuException->Rethrow();
if(m_Exception) m_Exception->Rethrow();
// FIXME Warning thread unsafe
Perf::dump();
#endif
EE::Profiler.Print();
}
////////////////////////////////////////////////////
void R5900::Dynarec::OpcodeImpl::recSYSCALL()
{
EE::Profiler.EmitOp(eeOpcode::SYSCALL);
recCall(R5900::Interpreter::OpcodeImpl::SYSCALL);
xCMP(ptr32[&cpuRegs.pc], pc);
@ -858,6 +876,8 @@ void R5900::Dynarec::OpcodeImpl::recSYSCALL()
////////////////////////////////////////////////////
void R5900::Dynarec::OpcodeImpl::recBREAK()
{
EE::Profiler.EmitOp(eeOpcode::BREAK);
recCall(R5900::Interpreter::OpcodeImpl::BREAK);
xCMP(ptr32[&cpuRegs.pc], pc);
@ -2183,6 +2203,14 @@ StartRecomp:
pxAssert(xGetPtr() - recPtr < _64kb);
s_pCurBlockEx->x86size = xGetPtr() - recPtr;
#if 0
// Example: Dump both x86/EE code
if (startpc == 0x456630) {
iDumpBlock(s_pCurBlockEx->startpc, s_pCurBlockEx->size*4, s_pCurBlockEx->fnptr, s_pCurBlockEx->x86size);
}
#endif
Perf::ee.map(s_pCurBlockEx->fnptr, s_pCurBlockEx->x86size, s_pCurBlockEx->startpc);
recPtr = xGetPtr();
pxAssert( (g_cpuHasConstReg&g_cpuFlushedConstReg) == g_cpuHasConstReg );

View File

@ -403,6 +403,8 @@ EERECOMPILE_CODE0(BNEL, XMMINFO_READS|XMMINFO_READT);
////////////////////////////////////////////////////
void recBLTZAL()
{
EE::Profiler.EmitOp(eeOpcode::BLTZAL);
u32 branchTo = ((s32)_Imm_ * 4) + pc;
_eeOnWriteReg(31, 0);
@ -442,6 +444,8 @@ void recBLTZAL()
////////////////////////////////////////////////////
void recBGEZAL()
{
EE::Profiler.EmitOp(eeOpcode::BGEZAL);
u32 branchTo = ((s32)_Imm_ * 4) + pc;
_eeOnWriteReg(31, 0);
@ -481,6 +485,8 @@ void recBGEZAL()
////////////////////////////////////////////////////
void recBLTZALL()
{
EE::Profiler.EmitOp(eeOpcode::BLTZALL);
u32 branchTo = ((s32)_Imm_ * 4) + pc;
_eeOnWriteReg(31, 0);
@ -515,6 +521,8 @@ void recBLTZALL()
////////////////////////////////////////////////////
void recBGEZALL()
{
EE::Profiler.EmitOp(eeOpcode::BGEZALL);
u32 branchTo = ((s32)_Imm_ * 4) + pc;
_eeOnWriteReg(31, 0);
@ -550,6 +558,8 @@ void recBGEZALL()
//// BLEZ
void recBLEZ()
{
EE::Profiler.EmitOp(eeOpcode::BLEZ);
u32 branchTo = ((s32)_Imm_ * 4) + pc;
_eeFlushAllUnused();
@ -596,6 +606,8 @@ void recBLEZ()
//// BGTZ
void recBGTZ()
{
EE::Profiler.EmitOp(eeOpcode::BGTZ);
u32 branchTo = ((s32)_Imm_ * 4) + pc;
_eeFlushAllUnused();
@ -642,6 +654,8 @@ void recBGTZ()
////////////////////////////////////////////////////
void recBLTZ()
{
EE::Profiler.EmitOp(eeOpcode::BLTZ);
u32 branchTo = ((s32)_Imm_ * 4) + pc;
_eeFlushAllUnused();
@ -675,6 +689,8 @@ void recBLTZ()
////////////////////////////////////////////////////
void recBGEZ()
{
EE::Profiler.EmitOp(eeOpcode::BGEZ);
u32 branchTo = ((s32)_Imm_ * 4) + pc;
_eeFlushAllUnused();
@ -708,6 +724,8 @@ void recBGEZ()
////////////////////////////////////////////////////
void recBLTZL()
{
EE::Profiler.EmitOp(eeOpcode::BLTZL);
u32 branchTo = ((s32)_Imm_ * 4) + pc;
_eeFlushAllUnused();
@ -738,6 +756,8 @@ void recBLTZL()
////////////////////////////////////////////////////
void recBGEZL()
{
EE::Profiler.EmitOp(eeOpcode::BGEZL);
u32 branchTo = ((s32)_Imm_ * 4) + pc;
_eeFlushAllUnused();
@ -775,6 +795,8 @@ void recBGEZL()
////////////////////////////////////////////////////
void recBLEZL()
{
EE::Profiler.EmitOp(eeOpcode::BLEZL);
u32 branchTo = ((s32)_Imm_ * 4) + pc;
_eeFlushAllUnused();
@ -819,6 +841,8 @@ void recBLEZL()
////////////////////////////////////////////////////
void recBGTZL()
{
EE::Profiler.EmitOp(eeOpcode::BGTZL);
u32 branchTo = ((s32)_Imm_ * 4) + pc;
_eeFlushAllUnused();

View File

@ -47,6 +47,8 @@ REC_SYS_DEL(JALR, _Rd_);
////////////////////////////////////////////////////
void recJ()
{
EE::Profiler.EmitOp(eeOpcode::J);
// SET_FPUSTATE;
u32 newpc = (_Target_ << 2) + ( pc & 0xf0000000 );
recompileNextInstruction(1);
@ -59,6 +61,8 @@ void recJ()
////////////////////////////////////////////////////
void recJAL()
{
EE::Profiler.EmitOp(eeOpcode::JAL);
u32 newpc = (_Target_ << 2) + ( pc & 0xf0000000 );
_deleteEEreg(31, 0);
if(EE_CONST_PROP)
@ -88,12 +92,16 @@ void recJAL()
////////////////////////////////////////////////////
void recJR()
{
EE::Profiler.EmitOp(eeOpcode::JR);
SetBranchReg( _Rs_);
}
////////////////////////////////////////////////////
void recJALR()
{
EE::Profiler.EmitOp(eeOpcode::JALR);
int newpc = pc + 4;
_allocX86reg(esi, X86TYPE_PCWRITEBACK, 0, MODE_WRITE);
_eeMoveGPRtoR(esi, _Rs_);

View File

@ -238,20 +238,20 @@ void recStore(u32 bits)
//////////////////////////////////////////////////////////////////////////////////////////
//
void recLB() { recLoad32(8,true); }
void recLBU() { recLoad32(8,false); }
void recLH() { recLoad32(16,true); }
void recLHU() { recLoad32(16,false); }
void recLW() { recLoad32(32,true); }
void recLWU() { recLoad32(32,false); }
void recLD() { recLoad64(64,false); }
void recLQ() { recLoad64(128,false); }
void recLB() { recLoad32(8,true); EE::Profiler.EmitOp(eeOpcode::LB);}
void recLBU() { recLoad32(8,false); EE::Profiler.EmitOp(eeOpcode::LBU);}
void recLH() { recLoad32(16,true); EE::Profiler.EmitOp(eeOpcode::LH);}
void recLHU() { recLoad32(16,false); EE::Profiler.EmitOp(eeOpcode::LHU);}
void recLW() { recLoad32(32,true); EE::Profiler.EmitOp(eeOpcode::LW);}
void recLWU() { recLoad32(32,false); EE::Profiler.EmitOp(eeOpcode::LWU);}
void recLD() { recLoad64(64,false); EE::Profiler.EmitOp(eeOpcode::LD);}
void recLQ() { recLoad64(128,false); EE::Profiler.EmitOp(eeOpcode::LQ);}
void recSB() { recStore(8); }
void recSH() { recStore(16); }
void recSW() { recStore(32); }
void recSQ() { recStore(128); }
void recSD() { recStore(64); }
void recSB() { recStore(8); EE::Profiler.EmitOp(eeOpcode::SB);}
void recSH() { recStore(16); EE::Profiler.EmitOp(eeOpcode::SH);}
void recSW() { recStore(32); EE::Profiler.EmitOp(eeOpcode::SW);}
void recSQ() { recStore(128); EE::Profiler.EmitOp(eeOpcode::SQ);}
void recSD() { recStore(64); EE::Profiler.EmitOp(eeOpcode::SD);}
////////////////////////////////////////////////////
@ -298,6 +298,8 @@ void recLWL()
recCall(LWL);
#endif
EE::Profiler.EmitOp(eeOpcode::LWL);
}
////////////////////////////////////////////////////
@ -347,6 +349,8 @@ void recLWR()
recCall(LWR);
#endif
EE::Profiler.EmitOp(eeOpcode::LWR);
}
////////////////////////////////////////////////////
@ -395,6 +399,8 @@ void recSWL()
_deleteEEreg(_Rt_, 1);
recCall(SWL);
#endif
EE::Profiler.EmitOp(eeOpcode::SWL);
}
////////////////////////////////////////////////////
@ -443,6 +449,8 @@ void recSWR()
_deleteEEreg(_Rt_, 1);
recCall(SWR);
#endif
EE::Profiler.EmitOp(eeOpcode::SWR);
}
////////////////////////////////////////////////////
@ -452,6 +460,8 @@ void recLDL()
_deleteEEreg(_Rs_, 1);
_deleteEEreg(_Rt_, 1);
recCall(LDL);
EE::Profiler.EmitOp(eeOpcode::LDL);
}
////////////////////////////////////////////////////
@ -461,6 +471,8 @@ void recLDR()
_deleteEEreg(_Rs_, 1);
_deleteEEreg(_Rt_, 1);
recCall(LDR);
EE::Profiler.EmitOp(eeOpcode::LDR);
}
////////////////////////////////////////////////////
@ -471,6 +483,8 @@ void recSDL()
_deleteEEreg(_Rs_, 1);
_deleteEEreg(_Rt_, 1);
recCall(SDL);
EE::Profiler.EmitOp(eeOpcode::SDL);
}
////////////////////////////////////////////////////
@ -480,6 +494,8 @@ void recSDR()
_deleteEEreg(_Rs_, 1);
_deleteEEreg(_Rt_, 1);
recCall(SDR);
EE::Profiler.EmitOp(eeOpcode::SDR);
}
//////////////////////////////////////////////////////////////////////////////////////////
@ -511,6 +527,8 @@ void recLWC1()
}
xMOV(ptr32[&fpuRegs.fpr[_Rt_].UL], eax);
EE::Profiler.EmitOp(eeOpcode::LWC1);
}
////////////////////////////////////////////////////
@ -536,6 +554,8 @@ void recSWC1()
vtlb_DynGenWrite(32);
}
EE::Profiler.EmitOp(eeOpcode::SWC1);
}
////////////////////////////////////////////////////
@ -576,6 +596,8 @@ void recLQC2()
vtlb_DynGenRead64(128);
}
EE::Profiler.EmitOp(eeOpcode::LQC2);
}
////////////////////////////////////////////////////
@ -601,6 +623,8 @@ void recSQC2()
vtlb_DynGenWrite(128);
}
EE::Profiler.EmitOp(eeOpcode::SQC2);
}
#endif

View File

@ -84,7 +84,9 @@ void recLUI()
xCDQ();
xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[0]], eax);
xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[1]], edx);
}
}
EE::Profiler.EmitOp(eeOpcode::LUI);
}
////////////////////////////////////////////////////
@ -285,21 +287,25 @@ void recMTHILO(int hi)
void recMFHI()
{
recMFHILO(1);
EE::Profiler.EmitOp(eeOpcode::MFHI);
}
void recMFLO()
{
recMFHILO(0);
EE::Profiler.EmitOp(eeOpcode::MFLO);
}
void recMTHI()
{
recMTHILO(1);
EE::Profiler.EmitOp(eeOpcode::MTHI);
}
void recMTLO()
{
recMTHILO(0);
EE::Profiler.EmitOp(eeOpcode::MTLO);
}
////////////////////////////////////////////////////
@ -407,21 +413,25 @@ void recMTHILO1(int hi)
void recMFHI1()
{
recMFHILO1(1);
EE::Profiler.EmitOp(eeOpcode::MFHI1);
}
void recMFLO1()
{
recMFHILO1(0);
EE::Profiler.EmitOp(eeOpcode::MFLO1);
}
void recMTHI1()
{
recMTHILO1(1);
EE::Profiler.EmitOp(eeOpcode::MTHI1);
}
void recMTLO1()
{
recMTHILO1(0);
EE::Profiler.EmitOp(eeOpcode::MTLO1);
}
//// MOVZ

View File

@ -161,6 +161,9 @@ namespace vtlb_private
//
static uptr* DynGen_PrepRegs()
{
// Warning dirty ebx (in case someone got the very bad idea to move this code)
EE::Profiler.EmitMem();
xMOV( eax, ecx );
xSHR( eax, VTLB_PAGE_BITS );
xMOV( eax, ptr[(eax*4) + vtlbdata.vmap] );
@ -370,6 +373,8 @@ void vtlb_DynGenRead32(u32 bits, bool sign)
// recompiler if the TLB is changed.
void vtlb_DynGenRead64_Const( u32 bits, u32 addr_const )
{
EE::Profiler.EmitConstMem(addr_const);
u32 vmv_ptr = vtlbdata.vmap[addr_const>>VTLB_PAGE_BITS];
s32 ppf = addr_const + vmv_ptr;
if( ppf >= 0 )
@ -416,6 +421,8 @@ void vtlb_DynGenRead64_Const( u32 bits, u32 addr_const )
//
void vtlb_DynGenRead32_Const( u32 bits, bool sign, u32 addr_const )
{
EE::Profiler.EmitConstMem(addr_const);
u32 vmv_ptr = vtlbdata.vmap[addr_const>>VTLB_PAGE_BITS];
s32 ppf = addr_const + vmv_ptr;
if( ppf >= 0 )
@ -506,6 +513,8 @@ void vtlb_DynGenWrite(u32 sz)
// recompiler if the TLB is changed.
void vtlb_DynGenWrite_Const( u32 bits, u32 addr_const )
{
EE::Profiler.EmitConstMem(addr_const);
u32 vmv_ptr = vtlbdata.vmap[addr_const>>VTLB_PAGE_BITS];
s32 ppf = addr_const + vmv_ptr;
if( ppf >= 0 )

View File

@ -18,6 +18,8 @@
#include "PrecompiledHeader.h"
#include "microVU.h"
#include "Utilities/Perf.h"
//------------------------------------------------------------------
// Micro VU - Main Functions
//------------------------------------------------------------------
@ -73,6 +75,9 @@ void mVUreset(microVU& mVU, bool resetReserve) {
// Restore reserve to uncommitted state
if (resetReserve) mVU.cache_reserve->Reset();
if (mVU.index) Perf::any.map((uptr)&mVU.dispCache, mVUdispCacheSize, "mVU1 Dispatcher");
else Perf::any.map((uptr)&mVU.dispCache, mVUdispCacheSize, "mVU0 Dispatcher");
x86SetPtr(mVU.dispCache);
mVUdispatcherA(mVU);