diff --git a/common/build/Utilities/utilities.vcxproj b/common/build/Utilities/utilities.vcxproj index 1ee3d0194f..c52ff622bd 100644 --- a/common/build/Utilities/utilities.vcxproj +++ b/common/build/Utilities/utilities.vcxproj @@ -117,6 +117,7 @@ + Create Create diff --git a/common/build/Utilities/utilities.vcxproj.filters b/common/build/Utilities/utilities.vcxproj.filters index 35d467a66d..e28bf1b2fb 100644 --- a/common/build/Utilities/utilities.vcxproj.filters +++ b/common/build/Utilities/utilities.vcxproj.filters @@ -41,6 +41,9 @@ Source Files + + Source Files + Source Files diff --git a/common/include/Utilities/Perf.h b/common/include/Utilities/Perf.h new file mode 100644 index 0000000000..f24e897df2 --- /dev/null +++ b/common/include/Utilities/Perf.h @@ -0,0 +1,57 @@ +/* PCSX2 - PS2 Emulator for PCs + * Copyright (C) 2002-2015 PCSX2 Dev Team + * + * PCSX2 is free software: you can redistribute it and/or modify it under the terms + * of the GNU Lesser General Public License as published by the Free Software Found- + * ation, either version 3 of the License, or (at your option) any later version. + * + * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with PCSX2. + * If not, see . + */ + +#pragma once + +namespace Perf { + + struct Info + { + uptr m_x86; + u32 m_size; + char m_symbol[20]; + // The idea is to keep static zones that are set only + // once. + bool m_dynamic; + + Info(uptr x86, u32 size, const char* symbol); + Info(uptr x86, u32 size, const char* symbol, u32 pc); + void Print(FILE* fp); + }; + + class InfoVector + { + std::vector m_v; + char m_prefix[20]; + + public: + + InfoVector(const char* prefix); + + void print(FILE* fp); + void map(uptr x86, u32 size, const char* symbol); + void map(uptr x86, u32 size, u32 pc); + void reset(); + + }; + + void dump(); + void dump_and_reset(); + + extern InfoVector any; + extern InfoVector ee; + extern InfoVector iop; + extern InfoVector vu; +} diff --git a/common/include/x86emitter/implement/bmi.h b/common/include/x86emitter/implement/bmi.h new file mode 100644 index 0000000000..52e576b751 --- /dev/null +++ b/common/include/x86emitter/implement/bmi.h @@ -0,0 +1,61 @@ +/* PCSX2 - PS2 Emulator for PCs + * Copyright (C) 2002-2015 PCSX2 Dev Team + * + * PCSX2 is free software: you can redistribute it and/or modify it under the terms + * of the GNU Lesser General Public License as published by the Free Software Found- + * ation, either version 3 of the License, or (at your option) any later version. + * + * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with PCSX2. + * If not, see . + */ + +#pragma once + +// Implement BMI1/BMI2 instruction set + +namespace x86Emitter { + + struct xImplBMI_RVM + { + u8 Prefix; + u8 MbPrefix; + u8 Opcode; + + // RVM + // MULX Unsigned multiply without affecting flags, and arbitrary destination registers + // PDEP Parallel bits deposit + // PEXT Parallel bits extract + // ANDN Logical and not ~x & y + void operator()( const xRegisterInt& to, const xRegisterInt& from1, const xRegisterInt& from2) const; + void operator()( const xRegisterInt& to, const xRegisterInt& from1, const xIndirectVoid& from2) const; + +#if 0 + // RMV + // BEXTR Bit field extract (with register) (src >> start) & ((1 << len)-1)[9] + // BZHI Zero high bits starting with specified bit position + // SARX Shift arithmetic right without affecting flags + // SHRX Shift logical right without affecting flags + // SHLX Shift logical left without affecting flags + // FIXME: WARNING same as above but V and M are inverted + //void operator()( const xRegisterInt& to, const xRegisterInt& from1, const xRegisterInt& from2) const; + //void operator()( const xRegisterInt& to, const xIndirectVoid& from1, const xRegisterInt& from2) const; + + // VM + // BLSI Extract lowest set isolated bit x & -x + // BLSMSK Get mask up to lowest set bit x ^ (x - 1) + // BLSR Reset lowest set bit x & (x - 1) + void operator()( const xRegisterInt& to, const xRegisterInt& from) const; + void operator()( const xRegisterInt& to, const xIndirectVoid& from) const; + + // RMI + //RORX Rotate right logical without affecting flags + void operator()( const xRegisterInt& to, const xRegisterInt& from, u8 imm) const; + void operator()( const xRegisterInt& to, const xIndirectVoid& from, u8 imm) const; +#endif + }; + +} diff --git a/common/include/x86emitter/instructions.h b/common/include/x86emitter/instructions.h index ccc83b3091..8d80012ae9 100644 --- a/common/include/x86emitter/instructions.h +++ b/common/include/x86emitter/instructions.h @@ -124,6 +124,10 @@ namespace x86Emitter xSETS, xSETNS, xSETPE, xSETPO; + // ------------------------------------------------------------------------ + // BMI extra instruction requires BMI1/BMI2 + extern const xImplBMI_RVM xMULX, xPDEP, xPEXT, xANDN_S; // Warning xANDN is already used by SSE + ////////////////////////////////////////////////////////////////////////////////////////// // Miscellaneous Instructions // These are all defined inline or in ix86.cpp. diff --git a/common/include/x86emitter/internal.h b/common/include/x86emitter/internal.h index b4122693f7..c8e555ae27 100644 --- a/common/include/x86emitter/internal.h +++ b/common/include/x86emitter/internal.h @@ -71,5 +71,73 @@ namespace x86Emitter { template< typename T1, typename T2 > __emitinline void xOpWrite0F( u16 opcode, const T1& param1, const T2& param2, u8 imm8 ) { xOpWrite0F( 0, opcode, param1, param2, imm8 ); } + // VEX 2 Bytes Prefix + template< typename T1, typename T2, typename T3 > __emitinline + void xOpWriteC5( u8 prefix, u8 opcode, const T1& param1, const T2& param2, const T3& param3 ) + { + pxAssert( prefix == 0 || prefix == 0x66 || prefix == 0xF3 || prefix == 0xF2 ); + + const xRegisterInt& reg = param1.IsReg() ? param1 : param2; + +#ifdef __x86_64__ + u8 nR = reg.IsExtended() ? 0x00 : 0x80; +#else + u8 nR = 0x80; +#endif + u8 L = reg.IsWideSIMD() ? 4 : 0; + + u8 nv = (~param2.GetId() & 0xF) << 3; + + u8 p = + prefix == 0xF2 ? 3 : + prefix == 0xF3 ? 2 : + prefix == 0x66 ? 1 : 0; + + xWrite8( 0xC5 ); + xWrite8( nR | nv | L | p ); + xWrite8( opcode ); + EmitSibMagic( param1, param3 ); + } + + // VEX 3 Bytes Prefix + template< typename T1, typename T2, typename T3 > __emitinline + void xOpWriteC4( u8 prefix, u8 mb_prefix, u8 opcode, const T1& param1, const T2& param2, const T3& param3, int w = -1 ) + { + pxAssert( prefix == 0 || prefix == 0x66 || prefix == 0xF3 || prefix == 0xF2 ); + pxAssert( mb_prefix == 0x0F || mb_prefix == 0x38 || mb_prefix == 0x3A ); + + const xRegisterInt& reg = param1.IsReg() ? param1 : param2; + +#ifdef __x86_64__ + u8 nR = reg.IsExtended() ? 0x00 : 0x80; + u8 nB = param3.IsExtended() ? 0x00 : 0x20; + u8 nX = 0x40; // likely unused so hardwired to disabled +#else + u8 nR = 0x80; + u8 nB = 0x20; + u8 nX = 0x40; +#endif + u8 L = reg.IsWideSIMD() ? 4 : 0; + u8 W = (w == -1) ? (reg.GetOperandSize() == 8 ? 0x80 : 0) : // autodetect the size + 0x80 * w; // take directly the W value + + u8 nv = (~param2.GetId() & 0xF) << 3; + + u8 p = + prefix == 0xF2 ? 3 : + prefix == 0xF3 ? 2 : + prefix == 0x66 ? 1 : 0; + + u8 m = + mb_prefix == 0x3A ? 3 : + mb_prefix == 0x38 ? 2 : 1; + + xWrite8( 0xC4 ); + xWrite8( nR | nX | nB | m ); + xWrite8( W | nv | L | p ); + xWrite8( opcode ); + EmitSibMagic( param1, param3 ); + } + } diff --git a/common/include/x86emitter/tools.h b/common/include/x86emitter/tools.h index 0ab33fa76c..53fa79ddcd 100644 --- a/common/include/x86emitter/tools.h +++ b/common/include/x86emitter/tools.h @@ -90,6 +90,8 @@ public: u32 hasStreamingSIMD4Extensions2 :1; u32 hasAVX :1; u32 hasAVX2 :1; + u32 hasBMI1 :1; + u32 hasBMI2 :1; u32 hasFMA :1; // AMD-specific CPU Features diff --git a/common/include/x86emitter/x86types.h b/common/include/x86emitter/x86types.h index c836e53c8a..3d00a3fc2a 100644 --- a/common/include/x86emitter/x86types.h +++ b/common/include/x86emitter/x86types.h @@ -255,12 +255,16 @@ template< typename T > void xWrite( T val ); bool IsEmpty() const { return Id < 0 ; } bool IsInvalid() const { return Id == xRegId_Invalid; } + bool IsExtended() const { return Id > 7; } // Register 8-15 need an extra bit to be selected + bool IsMem() const { return false; } + bool IsReg() const { return true; } // Returns true if the register is a valid accumulator: Eax, Ax, Al, XMM0. bool IsAccumulator() const { return Id == 0; } // returns true if the register is a valid MMX or XMM register. bool IsSIMD() const { return GetOperandSize() == 8 || GetOperandSize() == 16; } + bool IsWideSIMD() const { return GetOperandSize() == 32; } bool operator==( const xRegisterBase& src ) const { return (Id == src.Id); } bool operator!=( const xRegisterBase& src ) const { return (Id != src.Id); } @@ -690,6 +694,8 @@ template< typename T > void xWrite( T val ); xIndirectVoid& Add( s32 imm ); bool IsByteSizeDisp() const { return is_s8( Displacement ); } + bool IsMem() const { return true; } + bool IsReg() const { return false; } operator xAddressVoid() { @@ -996,3 +1002,4 @@ template< typename T > void xWrite( T val ); #include "implement/test.h" #include "implement/jmpcall.h" +#include "implement/bmi.h" diff --git a/common/src/Utilities/CMakeLists.txt b/common/src/Utilities/CMakeLists.txt index dea7c7dcc8..0e125e20c1 100644 --- a/common/src/Utilities/CMakeLists.txt +++ b/common/src/Utilities/CMakeLists.txt @@ -61,6 +61,7 @@ set(UtilitiesSources Mutex.cpp PathUtils.cpp PrecompiledHeader.cpp + Perf.cpp pxCheckBox.cpp pxRadioPanel.cpp pxStaticText.cpp diff --git a/common/src/Utilities/Perf.cpp b/common/src/Utilities/Perf.cpp new file mode 100644 index 0000000000..ce6c284ca6 --- /dev/null +++ b/common/src/Utilities/Perf.cpp @@ -0,0 +1,140 @@ +/* PCSX2 - PS2 Emulator for PCs + * Copyright (C) 2002-2015 PCSX2 Dev Team + * + * PCSX2 is free software: you can redistribute it and/or modify it under the terms + * of the GNU Lesser General Public License as published by the Free Software Found- + * ation, either version 3 of the License, or (at your option) any later version. + * + * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with PCSX2. + * If not, see . + */ + +#include "PrecompiledHeader.h" + +#include "Perf.h" + +//#define ProfileWithPerf +#define MERGE_BLOCK_RESULT + + +namespace Perf +{ + // Warning object aren't thread safe + InfoVector any(""); + InfoVector ee("EE"); + InfoVector iop("IOP"); + InfoVector vu("VU"); + +// Perf is only supported on linux +#if defined(__linux__) && defined(ProfileWithPerf) + + //////////////////////////////////////////////////////////////////////////////// + // Implementation of the Info object + //////////////////////////////////////////////////////////////////////////////// + + Info::Info(uptr x86, u32 size, const char* symbol) : m_x86(x86), m_size(size), m_dynamic(false) + { + strncpy(m_symbol, symbol, sizeof(m_symbol)); + } + + Info::Info(uptr x86, u32 size, const char* symbol, u32 pc) : m_x86(x86), m_size(size), m_dynamic(true) + { + snprintf(m_symbol, sizeof(m_symbol), "%s_0x%08x", symbol, pc); + } + + void Info::Print(FILE* fp) + { + fprintf(fp, "%x %x %s\n", m_x86, m_size, m_symbol); + } + + //////////////////////////////////////////////////////////////////////////////// + // Implementation of the InfoVector object + //////////////////////////////////////////////////////////////////////////////// + + InfoVector::InfoVector(const char* prefix) + { + strncpy(m_prefix, prefix, sizeof(m_prefix)); + } + + void InfoVector::print(FILE* fp) + { + for(auto&& it : m_v) it.Print(fp); + } + + void InfoVector::map(uptr x86, u32 size, const char* symbol) + { + // This function is typically used for dispatcher and recompiler. + // Dispatchers are on a page and must always be kept. + // Recompilers are much bigger (TODO check VIF) and are only + // useful when MERGE_BLOCK_RESULT is defined + +#ifdef MERGE_BLOCK_RESULT + m_v.emplace_back(x86, size, symbol); +#else + if (size < 8 * _1kb) m_v.emplace_back(x86, size, symbol); +#endif + } + + void InfoVector::map(uptr x86, u32 size, u32 pc) + { +#ifndef MERGE_BLOCK_RESULT + m_v.emplace_back(x86, size, m_prefix, pc); +#endif + } + + void InfoVector::reset() + { + auto dynamic = std::remove_if(m_v.begin(), m_v.end(), [](Info i) { return i.m_dynamic; }); + m_v.erase(dynamic, m_v.end()); + } + + //////////////////////////////////////////////////////////////////////////////// + // Global function + //////////////////////////////////////////////////////////////////////////////// + + void dump() + { + char file[256]; + snprintf(file, 250, "/tmp/perf-%d.map", getpid()); + FILE* fp = fopen(file, "w"); + + any.print(fp); + ee.print(fp); + iop.print(fp); + vu.print(fp); + + if (fp) + fclose(fp); + } + + void dump_and_reset() + { + dump(); + + any.reset(); + ee.reset(); + iop.reset(); + vu.reset(); + } + +#else + + //////////////////////////////////////////////////////////////////////////////// + // Dummy implementation + //////////////////////////////////////////////////////////////////////////////// + + InfoVector::InfoVector(const char* prefix) {} + void InfoVector::map(uptr x86, u32 size, const char* symbol) {} + void InfoVector::map(uptr x86, u32 size, u32 pc) {} + void InfoVector::reset() {} + + void dump() {} + void dump_and_reset() {} + +#endif + +} diff --git a/common/src/x86emitter/CMakeLists.txt b/common/src/x86emitter/CMakeLists.txt index 8472947561..5fab97c9b1 100644 --- a/common/src/x86emitter/CMakeLists.txt +++ b/common/src/x86emitter/CMakeLists.txt @@ -42,6 +42,7 @@ endif(CMAKE_BUILD_TYPE STREQUAL Release) # variable with all sources of this library set(x86emitterSources + bmi.cpp cpudetect.cpp fpu.cpp groups.cpp diff --git a/common/src/x86emitter/bmi.cpp b/common/src/x86emitter/bmi.cpp new file mode 100644 index 0000000000..cd5ce7db8d --- /dev/null +++ b/common/src/x86emitter/bmi.cpp @@ -0,0 +1,32 @@ +/* PCSX2 - PS2 Emulator for PCs + * Copyright (C) 2002-2015 PCSX2 Dev Team + * + * PCSX2 is free software: you can redistribute it and/or modify it under the terms + * of the GNU Lesser General Public License as published by the Free Software Found- + * ation, either version 3 of the License, or (at your option) any later version. + * + * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with PCSX2. + * If not, see . + */ + +#include "PrecompiledHeader.h" +#include "internal.h" +#include "tools.h" + +namespace x86Emitter { + + const xImplBMI_RVM xMULX = { 0xF2, 0x38, 0xF6 }; + const xImplBMI_RVM xPDEP = { 0xF2, 0x38, 0xF5 }; + const xImplBMI_RVM xPEXT = { 0xF3, 0x38, 0xF5 }; + const xImplBMI_RVM xANDN_S = { 0x00, 0x38, 0xF2 }; + + void xImplBMI_RVM::operator()( const xRegisterInt& to, const xRegisterInt& from1, const xRegisterInt& from2) const + { xOpWriteC4(Prefix, MbPrefix, Opcode, to, from1, from2); } + void xImplBMI_RVM::operator()( const xRegisterInt& to, const xRegisterInt& from1, const xIndirectVoid& from2) const + { xOpWriteC4(Prefix, MbPrefix, Opcode, to, from1, from2); } + +} diff --git a/common/src/x86emitter/cpudetect.cpp b/common/src/x86emitter/cpudetect.cpp index 717ca8972a..2658e9dab9 100644 --- a/common/src/x86emitter/cpudetect.cpp +++ b/common/src/x86emitter/cpudetect.cpp @@ -278,6 +278,9 @@ void x86capabilities::Identify() } } + hasBMI1 = ( SEFlag >> 3 ) & 1; + hasBMI2 = ( SEFlag >> 8 ) & 1; + // Ones only for AMDs: hasMultimediaExtensionsExt = ( EFlags >> 22 ) & 1; //mmx2 hasAMD64BitArchitecture = ( EFlags >> 29 ) & 1; //64bit cpu diff --git a/linux_various/pretty_print_dump.pl b/linux_various/pretty_print_dump.pl new file mode 100755 index 0000000000..ca24e69911 --- /dev/null +++ b/linux_various/pretty_print_dump.pl @@ -0,0 +1,95 @@ +#!/usr/bin/perl + +use strict; +use warnings; + +open(my $in, $ARGV[0]) or die "failed to get first param: $!"; + +my @pp_name = ( + # GPR + "0", "0", "0", "0", + "at", "at", "at", "at", + "v0", "v0", "v0", "v0", + "v1", "v1", "v1", "v1", + "a0", "a0", "a0", "a0", + "a1", "a1", "a1", "a1", + "a2", "a2", "a2", "a2", + "a3", "a3", "a3", "a3", + "t0", "t0", "t0", "t0", + "t1", "t1", "t1", "t1", + "t2", "t2", "t2", "t2", + "t3", "t3", "t3", "t3", + "t4", "t4", "t4", "t4", + "t5", "t5", "t5", "t5", + "t6", "t6", "t6", "t6", + "t7", "t7", "t7", "t7", + "s0", "s0", "s0", "s0", + "s1", "s1", "s1", "s1", + "s2", "s2", "s2", "s2", + "s3", "s3", "s3", "s3", + "s4", "s4", "s4", "s4", + "s5", "s5", "s5", "s5", + "s6", "s6", "s6", "s6", + "s7", "s7", "s7", "s7", + "t8", "t8", "t8", "t8", + "t9", "t9", "t9", "t9", + "k0", "k0", "k0", "k0", + "k1", "k1", "k1", "k1", + "gp", "gp", "gp", "gp", + "sp", "sp", "sp", "sp", + "s8", "s8", "s8", "s8", + "ra", "ra", "ra", "ra", + "hi", "hi", "hi", "hi", + "lo", "lo", "lo", "lo", + + # CP0 + "Index" , "Random" , "EntryLo0" , "EntryLo1" , + "Context" , "PageMask" , "Wired" , "Reserved0" , + "BadVAddr" , "Count" , "EntryHi" , "Compare" , + "Status" , "Cause" , "EPC" , "PRid" , + "Config" , "LLAddr" , "WatchLO" , "WatchHI" , + "XContext" , "Reserved1" , "Reserved2" , "Debug" , + "DEPC" , "PerfCnt" , "ErrCtl" , "CacheErr" , + "TagLo" , "TagHi" , "ErrorEPC" , "DESAVE" , + + "sa", + "IsDelaySlot", + "pc", + "code", + "PERF", "PERF", "PERF", "PERF", + + "eCycle0" , "eCycle1" , "eCycle2" , "eCycle3" , "eCycle4" , "eCycle5" , "eCycle6" , "eCycle7" , + "eCycle8" , "eCycle9" , "eCycle10" , "eCycle11" , "eCycle12" , "eCycle13" , "eCycle14" , "eCycle15" , + "eCycle16" , "eCycle17" , "eCycle18" , "eCycle19" , "eCycle20" , "eCycle21" , "eCycle22" , "eCycle23" , + "eCycle24" , "eCycle25" , "eCycle26" , "eCycle27" , "eCycle28" , "eCycle29" , "eCycle30" , "eCycle31" , + + "sCycle0" , "sCycle1" , "sCycle2" , "sCycle3" , "sCycle4" , "sCycle5" , "sCycle6" , "sCycle7" , + "sCycle8" , "sCycle9" , "sCycle10" , "sCycle11" , "sCycle12" , "sCycle13" , "sCycle14" , "sCycle15" , + "sCycle16" , "sCycle17" , "sCycle18" , "sCycle19" , "sCycle20" , "sCycle21" , "sCycle22" , "sCycle23" , + "sCycle24" , "sCycle25" , "sCycle26" , "sCycle27" , "sCycle28" , "sCycle29" , "sCycle30" , "sCycle31" , + + "cycle", "interrupt", "branch", "opmode", "tempcycles" +); + +my $line; +my $cpu; +while($line = <$in>) { + if ($line =~ /Dump register data: (0x[0-9a-f]+)/) { + $cpu = hex($1); + } + if ($line =~ /ds:(0x[0-9a-f]+)/) { + my $mem = hex($1); + my $offset = $mem - $cpu; + if ($offset >= 0 && $offset < 980) { + # Inside the cpuRegisters structure + my $byte = ($offset >= 544) ? $offset % 4 : $offset % 16; + my $dw = $offset / 4; + + # FIXME B doesn't work for duplicated register + my $pretty = "&$pp_name[$dw]_B$byte"; + #print "AH $pretty\n"; + $line =~ s/ds:0x[0-9a-f]+/$pretty/; + } + } + print $line; +} diff --git a/pcsx2/Dump.cpp b/pcsx2/Dump.cpp index 28aff30d98..2b895bc311 100644 --- a/pcsx2/Dump.cpp +++ b/pcsx2/Dump.cpp @@ -197,6 +197,69 @@ void iDumpVU1Registers() #endif } +// This function is close of iDumpBlock but it doesn't rely too much on +// global variable. Beside it doesn't print the flag info. +// +// However you could call it anytime to dump any block. And we have both +// x86 and EE disassembly code +void iDumpBlock(u32 ee_pc, u32 ee_size, uptr x86_pc, u32 x86_size) +{ + u32 ee_end = ee_pc + ee_size; + + DbgCon.WriteLn( Color_Gray, "dump block %x:%x (x86:0x%x)", ee_pc, ee_end, x86_pc ); + + g_Conf->Folders.Logs.Mkdir(); + wxString dump_filename = Path::Combine( g_Conf->Folders.Logs, wxsFormat(L"R5900dump_%.8X:%.8X.txt", ee_pc, ee_end) ); + AsciiFile eff( dump_filename, L"w" ); + + // Print register content to detect the memory access type. Warning value are taken + // during the call of this function. There aren't the real value of the block. + eff.Printf("Dump register data: 0x%x\n", (uptr)&cpuRegs.GPR.r[0].UL[0]); + for (int reg = 0; reg < 32; reg++) { + // Only lower 32 bits (enough for address) + eff.Printf("\t%2s <= 0x%08x_%08x\n", R5900::GPR_REG[reg], cpuRegs.GPR.r[reg].UL[1],cpuRegs.GPR.r[reg].UL[0]); + } + eff.Printf("\n"); + + + if (!symbolMap.GetLabelString(ee_pc).empty()) + { + eff.Printf( "%s\n", symbolMap.GetLabelString(ee_pc).c_str() ); + } + + for ( u32 i = ee_pc; i < ee_end; i += 4 ) + { + std::string output; + //TLB Issue disR5900Fasm( output, memRead32( i ), i, false ); + disR5900Fasm( output, psMu32(i), i, false ); + eff.Printf( "0x%.X : %s\n", i, output.c_str() ); + } + + // Didn't find (search) a better solution + eff.Printf( "\nRaw x86 dump (https://www.onlinedisassembler.com/odaweb/):\n"); + u8* x86 = (u8*)x86_pc; + for (u32 i = 0; i < x86_size; i++) { + eff.Printf("%.2X", x86[i]); + } + eff.Printf("\n\n"); + + eff.Close(); // Close the file so it can be appended by objdump + + // handy but slow solution (system call) +#ifdef __linux__ + wxString obj_filename = Path::Combine(g_Conf->Folders.Logs, wxString(L"objdump_tmp.o")); + wxFFile objdump(obj_filename , L"wb"); + objdump.Write(x86, x86_size); + objdump.Close(); + + std::system( + wxsFormat("objdump -D -b binary -mi386 --disassembler-options=intel --no-show-raw-insn --adjust-vma=%d %s >> %s", + (u32) x86_pc, WX_STR(obj_filename), WX_STR(dump_filename)) + ); +#endif +} + + // Originally from iR5900-32.cpp void iDumpBlock( int startpc, u8 * ptr ) { diff --git a/pcsx2/Dump.h b/pcsx2/Dump.h index b3a034f260..d0bf40e647 100644 --- a/pcsx2/Dump.h +++ b/pcsx2/Dump.h @@ -19,5 +19,6 @@ extern void iDumpRegisters(u32 startpc, u32 temp); extern void iDumpPsxRegisters(u32 startpc, u32 temp); extern void iDumpVU0Registers(); extern void iDumpVU1Registers(); +extern void iDumpBlock(u32 ee_pc, u32 ee_size, uptr x86_pc, u32 x86_size); extern void iDumpBlock( int startpc, u8 * ptr ); extern void iIopDumpBlock( int startpc, u8 * ptr ); diff --git a/pcsx2/System.cpp b/pcsx2/System.cpp index 277312c90c..fff21a7e4a 100644 --- a/pcsx2/System.cpp +++ b/pcsx2/System.cpp @@ -27,6 +27,7 @@ #include "System/RecTypes.h" #include "Utilities/MemsetFast.inl" +#include "Utilities/Perf.h" // -------------------------------------------------------------------------------------- @@ -56,6 +57,9 @@ void RecompiledCodeReserve::_registerProfiler() if (m_profiler_name.IsEmpty() || !IsOk()) return; ProfilerRegisterSource( m_profiler_name, m_baseptr, GetReserveSizeInBytes() ); m_profiler_registered = true; + + // Could potentially be integrated into ProfilerRegisterSource + Perf::any.map((uptr)m_baseptr, GetReserveSizeInBytes(), m_profiler_name.ToUTF8()); } void RecompiledCodeReserve::_termProfiler() diff --git a/pcsx2/x86/R5900_Profiler.h b/pcsx2/x86/R5900_Profiler.h new file mode 100644 index 0000000000..748ea4dbae --- /dev/null +++ b/pcsx2/x86/R5900_Profiler.h @@ -0,0 +1,372 @@ +/* PCSX2 - PS2 Emulator for PCs + * Copyright (C) 2002-2015 PCSX2 Dev Team + * + * PCSX2 is free software: you can redistribute it and/or modify it under the terms + * of the GNU Lesser General Public License as published by the Free Software Found- + * ation, either version 3 of the License, or (at your option) any later version. + * + * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with PCSX2. + * If not, see . + */ + +#pragma once + +// Keep my nice alignment please! +#define MOVZ MOVZtemp +#define MOVN MOVNtemp + +enum class eeOpcode { + // Core + special , regimm , J , JAL , BEQ , BNE , BLEZ , BGTZ , + ADDI , ADDIU , SLTI , SLTIU , ANDI , ORI , XORI , LUI , + cop0 , cop1 , cop2 , /*,*/ BEQL , BNEL , BLEZL , BGTZL , + DADDI , DADDIU , LDL , LDR , mmi , /*,*/ LQ , SQ , + LB , LH , LWL , LW , LBU , LHU , LWR , LWU , + SB , SH , SWL , SW , SDL , SDR , SWR , CACHE , + /*,*/ LWC1 , /*,*/ PREF , /*,*/ /*,*/ LQC2 , LD , + /*,*/ SWC1 , /*,*/ /*,*/ /*,*/ /*,*/ SQC2 , SD , + + // Special + SLL , /*,*/ SRL , SRA , SLLV , /*,*/ SRLV , SRAV , + JR , JALR , MOVZ , MOVN , SYSCALL , BREAK , /*,*/ SYNC , + MFHI , MTHI , MFLO , MTLO , DSLLV , /*,*/ DSRLV , DSRAV , + MULT , MULTU , DIV , DIVU , /*,*/ /*,*/ /*,*/ /*,*/ + ADD , ADDU , SUB , SUBU , AND , OR , XOR , NOR , + MFSA , MTSA , SLT , SLTU , DADD , DADDU , DSUB , DSUBU , + TGE , TGEU , TLT , TLTU , TEQ , /*,*/ TNE , /*,*/ + DSLL , /*,*/ DSRL , DSRA , DSLL32 , /*,*/ DSRL32 , DSRA32 , + + // Regimm + BLTZ , BGEZ , BLTZL , BGEZL , /*,*/ /*,*/ /*,*/ /*,*/ + TGEI , TGEIU , TLTI , TLTIU , TEQI , /*,*/ TNEI , /*,*/ + BLTZAL , BGEZAL , BLTZALL , BGEZALL , /*,*/ /*,*/ /*,*/ /*,*/ + MTSAB , MTSAH , /*,*/ /*,*/ /*,*/ /*,*/ /*,*/ /*,*/ + + // MMI + MADD , MADDU , /*,*/ /*,*/ PLZCW , /*,*/ /*,*/ /*,*/ + MMI0 , MMI2 , /*,*/ /*,*/ /*,*/ /*,*/ /*,*/ /*,*/ + MFHI1 , MTHI1 , MFLO1 , MTLO1 , /*,*/ /*,*/ /*,*/ /*,*/ + MULT1 , MULTU1 , DIV1 , DIVU1 , /*,*/ /*,*/ /*,*/ /*,*/ + MADD1 , MADDU1 , /*,*/ /*,*/ /*,*/ /*,*/ /*,*/ /*,*/ + MMI1 , MMI3 , /*,*/ /*,*/ /*,*/ /*,*/ /*,*/ /*,*/ + PMFHL , PMTHL , /*,*/ /*,*/ PSLLH , /*,*/ PSRLH , PSRAH , + /*,*/ /*,*/ /*,*/ /*,*/ PSLLW , /*,*/ PSRLW , PSRAW , + + // MMI0 + PADDW , PSUBW , PCGTW , PMAXW , + PADDH , PSUBH , PCGTH , PMAXH , + PADDB , PSUBB , PCGTB , /*,*/ + /*,*/ /*,*/ /*,*/ /*,*/ + PADDSW , PSUBSW , PEXTLW , PPACW , + PADDSH , PSUBSH , PEXTLH , PPACH , + PADDSB , PSUBSB , PEXTLB , PPACB , + /*,*/ /*,*/ PEXT5 , PPAC5 , + + // MMI1 + /*,*/ PABSW , PCEQW , PMINW , + PADSBH , PABSH , PCEQH , PMINH , + /*,*/ /*,*/ PCEQB , /*,*/ + /*,*/ /*,*/ /*,*/ /*,*/ + PADDUW , PSUBUW , PEXTUW , /*,*/ + PADDUH , PSUBUH , PEXTUH , /*,*/ + PADDUB , PSUBUB , PEXTUB , QFSRV , + /*,*/ /*,*/ /*,*/ /*,*/ + + // MMI2 + PMADDW , /*,*/ PSLLVW , PSRLVW , + PMSUBW , /*,*/ /*,*/ /*,*/ + PMFHI , PMFLO , PINTH , /*,*/ + PMULTW , PDIVW , PCPYLD , /*,*/ + PMADDH , PHMADH , PAND , PXOR , + PMSUBH , PHMSBH , /*,*/ /*,*/ + /*,*/ /*,*/ PEXEH , PREVH , + PMULTH , PDIVBW , PEXEW , PROT3W , + + // MMI3 + PMADDUW , /*,*/ /*,*/ PSRAVW , + /*,*/ /*,*/ /*,*/ /*,*/ + PMTHI , PMTLO , PINTEH , /*,*/ + PMULTUW , PDIVUW , PCPYUD , /*,*/ + /*,*/ /*,*/ POR , PNOR , + /*,*/ /*,*/ /*,*/ /*,*/ + /*,*/ /*,*/ PEXCH , PCPYH , + /*,*/ /*,*/ PEXCW , /*,*/ + + // ADD COP0/1 ?? + + LAST +}; + +#undef MOVZ +#undef MOVN + +static const char eeOpcodeName[][16] = { + // "Core" + "special" , "regimm" , "J" , "JAL" , "BEQ" , "BNE" , "BLEZ" , "BGTZ" , + "ADDI" , "ADDIU" , "SLTI" , "SLTIU" , "ANDI" , "ORI" , "XORI" , "LUI" , + "cop0" , "cop1" , "cop2" , /* , */ "BEQL" , "BNEL" , "BLEZL" , "BGTZL" , + "DADDI" , "DADDIU" , "LDL" , "LDR" , "mmi" , /* , */ "LQ" , "SQ" , + "LB" , "LH" , "LWL" , "LW" , "LBU" , "LHU" , "LWR" , "LWU" , + "SB" , "SH" , "SWL" , "SW" , "SDL" , "SDR" , "SWR" , "CACHE" , + /* , */ "LWC1" , /* , */ "PREF" , /* , */ /* , */ "LQC2" , "LD" , + /* , */ "SWC1" , /* , */ /* , */ /* , */ /* , */ "SQC2" , "SD" , + + // "Special" + "SLL" , /* , */ "SRL" , "SRA" , "SLLV" , /* , */ "SRLV" , "SRAV" , + "JR" , "JALR" , "MOVZ" , "MOVN" , "SYSCALL" , "BREAK" , /* , */ "SYNC" , + "MFHI" , "MTHI" , "MFLO" , "MTLO" , "DSLLV" , /* , */ "DSRLV" , "DSRAV" , + "MULT" , "MULTU" , "DIV" , "DIVU" , /* , */ /* , */ /* , */ /* , */ + "ADD" , "ADDU" , "SUB" , "SUBU" , "AND" , "OR" , "XOR" , "NOR" , + "MFSA" , "MTSA" , "SLT" , "SLTU" , "DADD" , "DADDU" , "DSUB" , "DSUBU" , + "TGE" , "TGEU" , "TLT" , "TLTU" , "TEQ" , /* , */ "TNE" , /* , */ + "DSLL" , /* , */ "DSRL" , "DSRA" , "DSLL32" , /* , */ "DSRL32" , "DSRA32" , + + // "Regimm" + "BLTZ" , "BGEZ" , "BLTZL" , "BGEZL" , /* , */ /* , */ /* , */ /* , */ + "TGEI" , "TGEIU" , "TLTI" , "TLTIU" , "TEQI" , /* , */ "TNEI" , /* , */ + "BLTZAL" , "BGEZAL" , "BLTZALL" , "BGEZALL" , /* , */ /* , */ /* , */ /* , */ + "MTSAB" , "MTSAH" , /* , */ /* , */ /* , */ /* , */ /* , */ /* , */ + + // "MMI" + "MADD" , "MADDU" , /* , */ /* , */ "PLZCW" , /* , */ /* , */ /* , */ + "MMI0" , "MMI2" , /* , */ /* , */ /* , */ /* , */ /* , */ /* , */ + "MFHI1" , "MTHI1" , "MFLO1" , "MTLO1" , /* , */ /* , */ /* , */ /* , */ + "MULT1" , "MULTU1" , "DIV1" , "DIVU1" , /* , */ /* , */ /* , */ /* , */ + "MADD1" , "MADDU1" , /* , */ /* , */ /* , */ /* , */ /* , */ /* , */ + "MMI1" , "MMI3" , /* , */ /* , */ /* , */ /* , */ /* , */ /* , */ + "PMFHL" , "PMTHL" , /* , */ /* , */ "PSLLH" , /* , */ "PSRLH" , "PSRAH" , + /* , */ /* , */ /* , */ /* , */ "PSLLW" , /* , */ "PSRLW" , "PSRAW" , + + // "MMI0" + "PADDW" , "PSUBW" , "PCGTW" , "PMAXW" , + "PADDH" , "PSUBH" , "PCGTH" , "PMAXH" , + "PADDB" , "PSUBB" , "PCGTB" , /* , */ + /* , */ /* , */ /* , */ /* , */ + "PADDSW" , "PSUBSW" , "PEXTLW" , "PPACW" , + "PADDSH" , "PSUBSH" , "PEXTLH" , "PPACH" , + "PADDSB" , "PSUBSB" , "PEXTLB" , "PPACB" , + /* , */ /* , */ "PEXT5" , "PPAC5" , + + // "MMI1" + /* , */ "PABSW" , "PCEQW" , "PMINW" , + "PADSBH" , "PABSH" , "PCEQH" , "PMINH" , + /* , */ /* , */ "PCEQB" , /* , */ + /* , */ /* , */ /* , */ /* , */ + "PADDUW" , "PSUBUW" , "PEXTUW" , /* , */ + "PADDUH" , "PSUBUH" , "PEXTUH" , /* , */ + "PADDUB" , "PSUBUB" , "PEXTUB" , "QFSRV" , + /* , */ /* , */ /* , */ /* , */ + + // "MMI2" + "PMADDW" , /* , */ "PSLLVW" , "PSRLVW" , + "PMSUBW" , /* , */ /* , */ /* , */ + "PMFHI" , "PMFLO" , "PINTH" , /* , */ + "PMULTW" , "PDIVW" , "PCPYLD" , /* , */ + "PMADDH" , "PHMADH" , "PAND" , "PXOR" , + "PMSUBH" , "PHMSBH" , /* , */ /* , */ + /* , */ /* , */ "PEXEH" , "PREVH" , + "PMULTH" , "PDIVBW" , "PEXEW" , "PROT3W" , + + // "MMI3" + "PMADDUW" , /* , */ /* , */ "PSRAVW" , + /* , */ /* , */ /* , */ /* , */ + "PMTHI" , "PMTLO" , "PINTEH" , /* , */ + "PMULTUW" , "PDIVUW" , "PCPYUD" , /* , */ + /* , */ /* , */ "POR" , "PNOR" , + /* , */ /* , */ /* , */ /* , */ + /* , */ /* , */ "PEXCH" , "PCPYH" , + /* , */ /* , */ "PEXCW" , /* , */ + + "!" +}; + +//#define eeProfileProg + +#ifdef eeProfileProg +#include +#include + +using namespace x86Emitter; + +struct eeProfiler { + static const u32 memSpace = 1 << 19; + + u64 opStats[static_cast(eeOpcode::LAST)]; + u32 memStats[memSpace]; + u32 memStatsConst[memSpace]; + u64 memStatsSlow; + u64 memStatsFast; + u32 memMask; + + void Reset() { + memzero(opStats); + memzero(memStats); + memzero(memStatsConst); + memStatsSlow = 0; + memStatsFast = 0; + memMask = 0xF700FFF0; + pxAssert(eeOpcodeName[static_cast(eeOpcode::LAST)][0] == '!'); + } + + void EmitOp(eeOpcode opcode) { + int op = static_cast(opcode); + xADD(ptr32[&(((u32*)opStats)[op*2+0])], 1); + xADC(ptr32[&(((u32*)opStats)[op*2+1])], 0); + } + + double per(u64 part, u64 total) { + return (double) part / (double) total * 100.0; + } + + void Print() { + // Compute opcode stat + u64 total = 0; + std::vector< std::pair > v; + std::vector< std::pair > vc; + for(int i = 0; i < static_cast(eeOpcode::LAST); i++) { + total += opStats[i]; + v.push_back(std::make_pair(opStats[i], i)); + } + std::sort (v.begin(), v.end()); + std::reverse(v.begin(), v.end()); + + DevCon.WriteLn("EE Profiler:"); + for(u32 i = 0; i < v.size(); i++) { + u64 count = v[i].first; + double stat = (double)count / (double)total * 100.0; + DevCon.WriteLn("%-8s - [%3.4f%%][count=%u]", + eeOpcodeName[v[i].second], stat, (u32)count); + if (stat < 0.01) + break; + } + //DevCon.WriteLn("Total = 0x%x_%x", (u32)(u64)(total>>32),(u32)total); + + // Compute memory stat + total = 0; + u64 reg = 0; + u64 gs = 0; + u64 vu = 0; + // FIXME: MAYBE count the scratch pad + for (size_t i = 0; i < memSpace ; i++) + total += memStats[i]; + + int ou = 32 * _1kb; // user segment (0x10000000) + int ok = 352 * _1kb; // kernel segment (0xB0000000) + for (int i = 0; i < 4 * _1kb; i++) reg += memStats[ou + 0 * _1kb + i] + memStats[ok + 0 * _1kb + i]; + for (int i = 0; i < 4 * _1kb; i++) gs += memStats[ou + 4 * _1kb + i] + memStats[ok + 4 * _1kb + i]; + for (int i = 0; i < 4 * _1kb; i++) vu += memStats[ou + 8 * _1kb + i] + memStats[ok + 8 * _1kb + i]; + + + u64 ram = total - reg - gs - vu; + double ram_p = per(ram, total); + double reg_p = per(reg, total); + double gs_p = per(gs , total); + double vu_p = per(vu , total); + + // Compute const memory stat + u64 total_const = 0; + u64 reg_const = 0; + for (size_t i = 0; i < memSpace ; i++) + total_const += memStatsConst[i]; + + for (int i = 0; i < 4 * _1kb; i++) reg_const += memStatsConst[ou + i] + memStatsConst[ok + i]; + u64 ram_const = total_const - reg_const; // value is slightly wrong but good enough + + double ram_const_p = per(ram_const, ram); + double reg_const_p = per(reg_const, reg); + + DevCon.WriteLn("\nEE Memory Profiler:"); + DevCon.WriteLn("Total = 0x%08x_%08x", (u32)(u64)(total>>32),(u32)total); + DevCon.WriteLn(" RAM = 0x%08x_%08x [%3.4f%%] Const[%3.4f%%]", (u32)(u64)(ram>>32),(u32)ram, ram_p, ram_const_p); + DevCon.WriteLn(" REG = 0x%08x_%08x [%3.4f%%] Const[%3.4f%%]", (u32)(u64)(reg>>32),(u32)reg, reg_p, reg_const_p); + DevCon.WriteLn(" GS = 0x%08x_%08x [%3.4f%%]", (u32)(u64)( gs>>32),(u32) gs, gs_p); + DevCon.WriteLn(" VU = 0x%08x_%08x [%3.4f%%]", (u32)(u64) (vu>>32),(u32) vu, vu_p); + + u64 total_ram = memStatsSlow + memStatsFast; + DevCon.WriteLn("\n RAM Fast [%3.4f%%] RAM Slow [%3.4f%%]. Total 0x%08x_%08x [%3.4f%%]", + per(memStatsFast, total_ram), per(memStatsSlow, total_ram), (u32)(u64)(total_ram>>32),(u32)total_ram, per(total_ram, total)); + + v.clear(); + vc.clear(); + for (int i = 0; i < 4 * _1kb; i++) { + u32 reg_c = memStatsConst[ou + i] + memStatsConst[ok + i]; + u32 reg = memStats[ok + i] + memStats[ou + i] - reg_c; + if (reg) + v.push_back(std::make_pair(reg, i * 16)); + if (reg_c) + vc.push_back(std::make_pair(reg_c, i * 16)); + } + std::sort (v.begin(), v.end()); + std::reverse(v.begin(), v.end()); + + std::sort (vc.begin(), vc.end()); + std::reverse(vc.begin(), vc.end()); + + DevCon.WriteLn("\nEE Reg Profiler:"); + for(u32 i = 0; i < v.size(); i++) { + u64 count = v[i].first; + double stat = (double)count / (double)(reg - reg_const) * 100.0; + DevCon.WriteLn("%04x - [%3.4f%%][count=%u]", + v[i].second, stat, (u32)count); + if (stat < 0.01) + break; + } + + DevCon.WriteLn("\nEE Const Reg Profiler:"); + for(u32 i = 0; i < vc.size(); i++) { + u64 count = vc[i].first; + double stat = (double)count / (double)reg_const * 100.0; + DevCon.WriteLn("%04x - [%3.4f%%][count=%u]", + vc[i].second, stat, (u32)count); + if (stat < 0.01) + break; + } + + } + + // Warning dirty ebx + void EmitMem() { + // Compact the 4GB virtual address to a 512KB virtual address + if (x86caps.hasBMI2) { + xPEXT(ebx, ecx, ptr[&memMask]); + xADD(ptr32[(ebx*4) + memStats], 1); + } + } + + void EmitConstMem(u32 add) { + if (x86caps.hasBMI2) { + u32 a = _pext_u32(add, memMask); + xADD(ptr32[a + memStats], 1); + xADD(ptr32[a + memStatsConst], 1); + } + } + + void EmitSlowMem() { + xADD(ptr32[(u32*)&memStatsSlow], 1); + xADC(ptr32[(u32*)&memStatsSlow + 1], 0); + } + + void EmitFastMem() { + xADD(ptr32[(u32*)&memStatsFast], 1); + xADC(ptr32[(u32*)&memStatsFast + 1], 0); + } +}; +#else +struct eeProfiler { + __fi void Reset() {} + __fi void EmitOp(eeOpcode op) {} + __fi void Print() {} + __fi void EmitMem() {} + __fi void EmitConstMem(u32 add) {} + __fi void EmitSlowMem() {} + __fi void EmitFastMem() {} +}; +#endif + +namespace EE { + extern eeProfiler Profiler; +} diff --git a/pcsx2/x86/iMMI.cpp b/pcsx2/x86/iMMI.cpp index 7822ef7e03..eb99ab0f30 100644 --- a/pcsx2/x86/iMMI.cpp +++ b/pcsx2/x86/iMMI.cpp @@ -62,6 +62,8 @@ void recPLZCW() if ( ! _Rd_ ) return; + EE::Profiler.EmitOp(eeOpcode::PLZCW); + if( GPR_IS_CONST1(_Rs_) ) { _eeOnWriteReg(_Rd_, 0); _deleteEEreg(_Rd_, 0); @@ -154,6 +156,8 @@ void recPMFHL() { if ( ! _Rd_ ) return; + EE::Profiler.EmitOp(eeOpcode::PMFHL); + int info = eeRecompileCodeXMM( XMMINFO_WRITED|XMMINFO_READLO|XMMINFO_READHI ); int t0reg; @@ -221,6 +225,8 @@ void recPMTHL() { if ( _Sa_ != 0 ) return; + EE::Profiler.EmitOp(eeOpcode::PMTHL); + int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READLO|XMMINFO_READHI|XMMINFO_WRITELO|XMMINFO_WRITEHI ); if ( x86caps.hasStreamingSIMD4Extensions ) { @@ -284,6 +290,8 @@ void recPSRLH() { if ( !_Rd_ ) return; + EE::Profiler.EmitOp(eeOpcode::PSRLH); + int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED ); if( (_Sa_&0xf) == 0 ) { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); @@ -300,6 +308,8 @@ void recPSRLW() { if( !_Rd_ ) return; + EE::Profiler.EmitOp(eeOpcode::PSRLW); + int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED ); if( _Sa_ == 0 ) { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); @@ -316,6 +326,8 @@ void recPSRAH() { if ( !_Rd_ ) return; + EE::Profiler.EmitOp(eeOpcode::PSRAH); + int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED ); if( (_Sa_&0xf) == 0 ) { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); @@ -332,6 +344,8 @@ void recPSRAW() { if ( !_Rd_ ) return; + EE::Profiler.EmitOp(eeOpcode::PSRAW); + int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED ); if( _Sa_ == 0 ) { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); @@ -348,6 +362,8 @@ void recPSLLH() { if ( !_Rd_ ) return; + EE::Profiler.EmitOp(eeOpcode::PSLLH); + int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED ); if( (_Sa_&0xf) == 0 ) { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); @@ -364,6 +380,8 @@ void recPSLLW() { if ( !_Rd_ ) return; + EE::Profiler.EmitOp(eeOpcode::PSLLW); + int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED ); if( _Sa_ == 0 ) { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); @@ -434,6 +452,8 @@ void recPMAXW() { if ( ! _Rd_ ) return; + EE::Profiler.EmitOp(eeOpcode::PMAXW); + int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); if ( x86caps.hasStreamingSIMD4Extensions ) { if( EEREC_S == EEREC_T ) xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); @@ -485,6 +505,8 @@ void recPPACW() { if ( ! _Rd_ ) return; + EE::Profiler.EmitOp(eeOpcode::PPACW); + int info = eeRecompileCodeXMM( ((_Rs_!=0)?XMMINFO_READS:0)|XMMINFO_READT|XMMINFO_WRITED ); if( _Rs_ == 0 ) { @@ -517,6 +539,8 @@ void recPPACH() { if (!_Rd_) return; + EE::Profiler.EmitOp(eeOpcode::PPACH); + int info = eeRecompileCodeXMM( (_Rs_!=0?XMMINFO_READS:0)|XMMINFO_READT|XMMINFO_WRITED ); if( _Rs_ == 0 ) { xPSHUF.LW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0x88); @@ -545,6 +569,8 @@ void recPPACB() { if ( ! _Rd_ ) return; + EE::Profiler.EmitOp(eeOpcode::PPACB); + int info = eeRecompileCodeXMM( (_Rs_!=0?XMMINFO_READS:0)|XMMINFO_READT|XMMINFO_WRITED ); if( _Rs_ == 0 ) { if( _hasFreeXMMreg() ) { @@ -585,6 +611,8 @@ void recPEXT5() { if ( ! _Rd_ ) return; + EE::Profiler.EmitOp(eeOpcode::PEXT5); + int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED ); int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t1reg = _allocTempXMMreg(XMMT_INT, -1); @@ -621,6 +649,8 @@ void recPPAC5() { if ( ! _Rd_ ) return; + EE::Profiler.EmitOp(eeOpcode::PPAC5); + int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED ); int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t1reg = _allocTempXMMreg(XMMT_INT, -1); @@ -659,6 +689,8 @@ void recPMAXH() { if ( ! _Rd_ ) return; + EE::Profiler.EmitOp(eeOpcode::PMAXH); + int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); if( EEREC_D == EEREC_S ) xPMAX.SW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); else if( EEREC_D == EEREC_T ) xPMAX.SW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); @@ -674,6 +706,8 @@ void recPCGTB() { if ( ! _Rd_ ) return; + EE::Profiler.EmitOp(eeOpcode::PCGTB); + int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); if( EEREC_D != EEREC_T ) { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); @@ -694,6 +728,8 @@ void recPCGTH() { if ( ! _Rd_ ) return; + EE::Profiler.EmitOp(eeOpcode::PCGTH); + int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); if( EEREC_D != EEREC_T ) { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); @@ -715,6 +751,8 @@ void recPCGTW() //TODO:optimize RS | RT== 0 if ( ! _Rd_ ) return; + EE::Profiler.EmitOp(eeOpcode::PCGTW); + int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); if( EEREC_D != EEREC_T ) { xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); @@ -735,6 +773,8 @@ void recPADDSB() { if ( ! _Rd_ ) return; + EE::Profiler.EmitOp(eeOpcode::PADDSB); + int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); if( EEREC_D == EEREC_S ) xPADD.SB(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); else if( EEREC_D == EEREC_T ) xPADD.SB(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); @@ -750,6 +790,8 @@ void recPADDSH() { if ( ! _Rd_ ) return; + EE::Profiler.EmitOp(eeOpcode::PADDSH); + int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); if( EEREC_D == EEREC_S ) xPADD.SW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); else if( EEREC_D == EEREC_T ) xPADD.SW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); @@ -766,6 +808,8 @@ void recPADDSW() { if ( ! _Rd_ ) return; + EE::Profiler.EmitOp(eeOpcode::PADDSW); + int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t1reg = _allocTempXMMreg(XMMT_INT, -1); @@ -816,6 +860,8 @@ void recPSUBSB() { if ( ! _Rd_ ) return; + EE::Profiler.EmitOp(eeOpcode::PSUBSB); + int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); if( EEREC_D == EEREC_S ) xPSUB.SB(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); else if( EEREC_D == EEREC_T ) { @@ -837,6 +883,8 @@ void recPSUBSH() { if ( ! _Rd_ ) return; + EE::Profiler.EmitOp(eeOpcode::PSUBSH); + int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); if( EEREC_D == EEREC_S ) xPSUB.SW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); else if( EEREC_D == EEREC_T ) { @@ -859,6 +907,8 @@ void recPSUBSW() { if ( ! _Rd_ ) return; + EE::Profiler.EmitOp(eeOpcode::PSUBSW); + int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t1reg = _allocTempXMMreg(XMMT_INT, -1); @@ -914,6 +964,8 @@ void recPADDB() { if ( ! _Rd_ ) return; + EE::Profiler.EmitOp(eeOpcode::PADDB); + int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); if( EEREC_D == EEREC_S ) xPADD.B(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); else if( EEREC_D == EEREC_T ) xPADD.B(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); @@ -929,6 +981,8 @@ void recPADDH() { if ( ! _Rd_ ) return; + EE::Profiler.EmitOp(eeOpcode::PADDH); + int info = eeRecompileCodeXMM( (_Rs_!=0?XMMINFO_READS:0)|(_Rt_!=0?XMMINFO_READT:0)|XMMINFO_WRITED ); if( _Rs_ == 0 ) { if( _Rt_ == 0 ) xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); @@ -953,6 +1007,8 @@ void recPADDW() { if ( ! _Rd_ ) return; + EE::Profiler.EmitOp(eeOpcode::PADDW); + int info = eeRecompileCodeXMM( (_Rs_!=0?XMMINFO_READS:0)|(_Rt_!=0?XMMINFO_READT:0)|XMMINFO_WRITED ); if( _Rs_ == 0 ) { if( _Rt_ == 0 ) xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); @@ -977,6 +1033,8 @@ void recPSUBB() { if ( ! _Rd_ ) return; + EE::Profiler.EmitOp(eeOpcode::PSUBB); + int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); if( EEREC_D == EEREC_S ) xPSUB.B(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); else if( EEREC_D == EEREC_T ) { @@ -998,6 +1056,8 @@ void recPSUBH() { if ( ! _Rd_ ) return; + EE::Profiler.EmitOp(eeOpcode::PSUBH); + int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); if( EEREC_D == EEREC_S ) xPSUB.W(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); else if( EEREC_D == EEREC_T ) { @@ -1019,6 +1079,8 @@ void recPSUBW() { if ( ! _Rd_ ) return; + EE::Profiler.EmitOp(eeOpcode::PSUBW); + int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); if( EEREC_D == EEREC_S ) xPSUB.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); else if( EEREC_D == EEREC_T ) { @@ -1040,6 +1102,8 @@ void recPEXTLW() { if ( ! _Rd_ ) return; + EE::Profiler.EmitOp(eeOpcode::PEXTLW); + int info = eeRecompileCodeXMM( (_Rs_!=0?XMMINFO_READS:0)|XMMINFO_READT|XMMINFO_WRITED ); if( _Rs_ == 0 ) { xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); @@ -1066,6 +1130,8 @@ void recPEXTLB() { if (!_Rd_) return; + EE::Profiler.EmitOp(eeOpcode::PEXTLB); + int info = eeRecompileCodeXMM( (_Rs_!=0?XMMINFO_READS:0)|XMMINFO_READT|XMMINFO_WRITED ); if( _Rs_ == 0 ) { xPUNPCK.LBW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); @@ -1092,6 +1158,8 @@ void recPEXTLH() { if (!_Rd_) return; + EE::Profiler.EmitOp(eeOpcode::PEXTLH); + int info = eeRecompileCodeXMM( (_Rs_!=0?XMMINFO_READS:0)|XMMINFO_READT|XMMINFO_WRITED ); if( _Rs_ == 0 ) { xPUNPCK.LWD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); @@ -1153,6 +1221,8 @@ void recPABSW() //needs clamping { if( !_Rd_ ) return; + EE::Profiler.EmitOp(eeOpcode::PABSW); + int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED ); int t0reg = _allocTempXMMreg(XMMT_INT, -1); xPCMP.EQD(xRegisterSSE(t0reg), xRegisterSSE(t0reg)); @@ -1181,6 +1251,8 @@ void recPABSH() { if( !_Rd_ ) return; + EE::Profiler.EmitOp(eeOpcode::PABSH); + int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED ); int t0reg = _allocTempXMMreg(XMMT_INT, -1); xPCMP.EQW(xRegisterSSE(t0reg), xRegisterSSE(t0reg)); @@ -1208,6 +1280,8 @@ void recPMINW() { if ( ! _Rd_ ) return; + EE::Profiler.EmitOp(eeOpcode::PMINW); + int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); if ( x86caps.hasStreamingSIMD4Extensions ) { if( EEREC_S == EEREC_T ) xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); @@ -1259,6 +1333,8 @@ void recPADSBH() { if ( ! _Rd_ ) return; + EE::Profiler.EmitOp(eeOpcode::PADSBH); + int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); int t0reg; @@ -1298,6 +1374,8 @@ void recPADDUW() { if ( ! _Rd_ ) return; + EE::Profiler.EmitOp(eeOpcode::PADDUW); + int info = eeRecompileCodeXMM( (_Rs_?XMMINFO_READS:0)|(_Rt_?XMMINFO_READT:0)|XMMINFO_WRITED ); if( _Rt_ == 0 ) { @@ -1344,6 +1422,8 @@ void recPSUBUB() { if ( ! _Rd_ ) return; + EE::Profiler.EmitOp(eeOpcode::PSUBUB); + int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); if( EEREC_D == EEREC_S ) xPSUB.USB(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); else if( EEREC_D == EEREC_T ) { @@ -1365,6 +1445,8 @@ void recPSUBUH() { if ( ! _Rd_ ) return; + EE::Profiler.EmitOp(eeOpcode::PSUBUH); + int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); if( EEREC_D == EEREC_S ) xPSUB.USW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); else if( EEREC_D == EEREC_T ) { @@ -1386,6 +1468,8 @@ void recPSUBUW() { if ( ! _Rd_ ) return; + EE::Profiler.EmitOp(eeOpcode::PSUBUW); + int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t1reg = _allocTempXMMreg(XMMT_INT, -1); @@ -1432,6 +1516,8 @@ void recPEXTUH() { if ( ! _Rd_ ) return; + EE::Profiler.EmitOp(eeOpcode::PEXTUH); + int info = eeRecompileCodeXMM( (_Rs_!=0?XMMINFO_READS:0)|XMMINFO_READT|XMMINFO_WRITED ); if( _Rs_ == 0 ) { xPUNPCK.HWD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); @@ -1461,6 +1547,8 @@ void recQFSRV() if ( !_Rd_ ) return; //Console.WriteLn("recQFSRV()"); + EE::Profiler.EmitOp(eeOpcode::QFSRV); + if (_Rs_ == _Rt_ + 1) { _flushEEreg(_Rs_); _flushEEreg(_Rt_); @@ -1486,6 +1574,8 @@ void recPEXTUB() { if (!_Rd_) return; + EE::Profiler.EmitOp(eeOpcode::PEXTUB); + int info = eeRecompileCodeXMM( (_Rs_!=0?XMMINFO_READS:0)|XMMINFO_READT|XMMINFO_WRITED ); if( _Rs_ == 0 ) { @@ -1514,6 +1604,8 @@ void recPEXTUW() { if ( ! _Rd_ ) return; + EE::Profiler.EmitOp(eeOpcode::PEXTUW); + int info = eeRecompileCodeXMM( (_Rs_!=0?XMMINFO_READS:0)|XMMINFO_READT|XMMINFO_WRITED ); if( _Rs_ == 0 ) { xPUNPCK.HDQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); @@ -1541,6 +1633,8 @@ void recPMINH() { if ( ! _Rd_ ) return; + EE::Profiler.EmitOp(eeOpcode::PMINH); + int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); if( EEREC_D == EEREC_S ) xPMIN.SW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); else if( EEREC_D == EEREC_T ) xPMIN.SW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); @@ -1556,6 +1650,8 @@ void recPCEQB() { if ( ! _Rd_ ) return; + EE::Profiler.EmitOp(eeOpcode::PCEQB); + int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); if( EEREC_D == EEREC_S ) xPCMP.EQB(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); else if( EEREC_D == EEREC_T ) xPCMP.EQB(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); @@ -1571,6 +1667,8 @@ void recPCEQH() { if ( ! _Rd_ ) return; + EE::Profiler.EmitOp(eeOpcode::PCEQH); + int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); if( EEREC_D == EEREC_S ) xPCMP.EQW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); else if( EEREC_D == EEREC_T ) xPCMP.EQW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); @@ -1586,6 +1684,8 @@ void recPCEQW() { if ( ! _Rd_ ) return; + EE::Profiler.EmitOp(eeOpcode::PCEQW); + int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); if( EEREC_D == EEREC_S ) xPCMP.EQD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); else if( EEREC_D == EEREC_T ) xPCMP.EQD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); @@ -1601,6 +1701,8 @@ void recPADDUB() { if ( ! _Rd_ ) return; + EE::Profiler.EmitOp(eeOpcode::PADDUB); + int info = eeRecompileCodeXMM( XMMINFO_READS|(_Rt_?XMMINFO_READT:0)|XMMINFO_WRITED ); if( _Rt_ ) { if( EEREC_D == EEREC_S ) xPADD.USB(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); @@ -1619,6 +1721,8 @@ void recPADDUH() { if ( ! _Rd_ ) return; + EE::Profiler.EmitOp(eeOpcode::PADDUH); + int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); if( EEREC_D == EEREC_S ) xPADD.USW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); else if( EEREC_D == EEREC_T ) xPADD.USW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); @@ -1665,6 +1769,8 @@ REC_FUNC_DEL( PROT3W, _Rd_ ); //////////////////////////////////////////////////// void recPMADDW() { + EE::Profiler.EmitOp(eeOpcode::PMADDW); + if( !x86caps.hasStreamingSIMD4Extensions ) { _deleteEEreg(_Rd_, 0); recCall(Interp::PMADDW); @@ -1714,6 +1820,8 @@ void recPSLLVW() { if ( ! _Rd_ ) return; + EE::Profiler.EmitOp(eeOpcode::PSLLVW); + int info = eeRecompileCodeXMM( (_Rs_?XMMINFO_READS:0)|(_Rt_?XMMINFO_READT:0)|XMMINFO_WRITED ); if( _Rs_ == 0 ) { if( _Rt_ == 0 ) { @@ -1780,6 +1888,8 @@ void recPSRLVW() { if ( ! _Rd_ ) return; + EE::Profiler.EmitOp(eeOpcode::PSRLVW); + int info = eeRecompileCodeXMM( (_Rs_?XMMINFO_READS:0)|(_Rt_?XMMINFO_READT:0)|XMMINFO_WRITED ); if( _Rs_ == 0 ) { if( _Rt_ == 0 ) { @@ -1844,6 +1954,8 @@ void recPSRLVW() //////////////////////////////////////////////////// void recPMSUBW() { + EE::Profiler.EmitOp(eeOpcode::PMSUBW); + if( !x86caps.hasStreamingSIMD4Extensions ) { _deleteEEreg(_Rd_, 0); recCall(Interp::PMSUBW); @@ -1896,6 +2008,8 @@ void recPMSUBW() //////////////////////////////////////////////////// void recPMULTW() { + EE::Profiler.EmitOp(eeOpcode::PMULTW); + if( !x86caps.hasStreamingSIMD4Extensions ) { _deleteEEreg(_Rd_, 0); recCall(Interp::PMULTW); @@ -1938,6 +2052,8 @@ void recPMULTW() //////////////////////////////////////////////////// void recPDIVW() { + EE::Profiler.EmitOp(eeOpcode::PDIVW); + _deleteEEreg(_Rd_, 0); recCall(Interp::PDIVW); } @@ -1945,6 +2061,8 @@ void recPDIVW() //////////////////////////////////////////////////// void recPDIVBW() { + EE::Profiler.EmitOp(eeOpcode::PDIVBW); + _deleteEEreg(_Rd_, 0); recCall(Interp::PDIVBW); //-- } @@ -1955,6 +2073,8 @@ void recPDIVBW() //contains the upper multiplication result (before the addition with the lower multiplication result) void recPHMADH() { + EE::Profiler.EmitOp(eeOpcode::PHMADH); + int info = eeRecompileCodeXMM( (_Rd_?XMMINFO_WRITED:0)|XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITELO|XMMINFO_WRITEHI ); int t0reg = _allocTempXMMreg(XMMT_INT, -1); @@ -1995,6 +2115,8 @@ void recPHMADH() void recPMSUBH() { + EE::Profiler.EmitOp(eeOpcode::PMSUBH); + int info = eeRecompileCodeXMM( (_Rd_?XMMINFO_WRITED:0)|XMMINFO_READS|XMMINFO_READT|XMMINFO_READLO|XMMINFO_READHI|XMMINFO_WRITELO|XMMINFO_WRITEHI ); int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t1reg = _allocTempXMMreg(XMMT_INT, -1); @@ -2057,6 +2179,8 @@ void recPMSUBH() //it contains the NOT of the upper multiplication result (before the substraction of the lower multiplication result) void recPHMSBH() { + EE::Profiler.EmitOp(eeOpcode::PHMSBH); + int info = eeRecompileCodeXMM( (_Rd_?XMMINFO_WRITED:0)|XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITELO|XMMINFO_WRITEHI ); int t0reg = _allocTempXMMreg(XMMT_INT, -1); @@ -2092,6 +2216,8 @@ void recPEXEH() { if (!_Rd_) return; + EE::Profiler.EmitOp(eeOpcode::PEXEH); + int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED ); xPSHUF.LW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0xc6); xPSHUF.HW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D), 0xc6); @@ -2103,6 +2229,7 @@ void recPREVH() { if (!_Rd_) return; + EE::Profiler.EmitOp(eeOpcode::PREVH); int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED ); xPSHUF.LW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0x1B); @@ -2115,6 +2242,8 @@ void recPINTH() { if (!_Rd_) return; + EE::Profiler.EmitOp(eeOpcode::PINTH); + int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); if( EEREC_D == EEREC_S ) { int t0reg = _allocTempXMMreg(XMMT_INT, -1); @@ -2134,6 +2263,8 @@ void recPEXEW() { if (!_Rd_) return; + EE::Profiler.EmitOp(eeOpcode::PEXEW); + int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED ); xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0xc6); _clearNeededXMMregs(); @@ -2143,6 +2274,8 @@ void recPROT3W() { if (!_Rd_) return; + EE::Profiler.EmitOp(eeOpcode::PROT3W); + int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED ); xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0xc9); _clearNeededXMMregs(); @@ -2150,6 +2283,8 @@ void recPROT3W() void recPMULTH() { + EE::Profiler.EmitOp(eeOpcode::PMULTH); + int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|(_Rd_?XMMINFO_WRITED:0)|XMMINFO_WRITELO|XMMINFO_WRITEHI ); int t0reg = _allocTempXMMreg(XMMT_INT, -1); @@ -2187,6 +2322,8 @@ void recPMFHI() { if ( ! _Rd_ ) return; + EE::Profiler.EmitOp(eeOpcode::PMFHI); + int info = eeRecompileCodeXMM( XMMINFO_WRITED|XMMINFO_READHI ); xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_HI)); _clearNeededXMMregs(); @@ -2197,6 +2334,8 @@ void recPMFLO() { if ( ! _Rd_ ) return; + EE::Profiler.EmitOp(eeOpcode::PMFLO); + int info = eeRecompileCodeXMM( XMMINFO_WRITED|XMMINFO_READLO ); xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_LO)); _clearNeededXMMregs(); @@ -2207,6 +2346,8 @@ void recPAND() { if ( ! _Rd_ ) return; + EE::Profiler.EmitOp(eeOpcode::PAND); + int info = eeRecompileCodeXMM( XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT ); if( EEREC_D == EEREC_T ) { xPAND(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); @@ -2226,6 +2367,8 @@ void recPXOR() { if ( ! _Rd_ ) return; + EE::Profiler.EmitOp(eeOpcode::PXOR); + int info = eeRecompileCodeXMM( XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT ); if( EEREC_D == EEREC_T ) { xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); @@ -2245,6 +2388,8 @@ void recPCPYLD() { if ( ! _Rd_ ) return; + EE::Profiler.EmitOp(eeOpcode::PCPYLD); + int info = eeRecompileCodeXMM( XMMINFO_WRITED|(( _Rs_== 0) ? 0:XMMINFO_READS)|XMMINFO_READT ); if( _Rs_ == 0 ) { xMOVQZX(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); @@ -2266,6 +2411,8 @@ void recPCPYLD() void recPMADDH() { + EE::Profiler.EmitOp(eeOpcode::PMADDH); + int info = eeRecompileCodeXMM( (_Rd_?XMMINFO_WRITED:0)|XMMINFO_READS|XMMINFO_READT|XMMINFO_READLO|XMMINFO_READHI|XMMINFO_WRITELO|XMMINFO_WRITEHI ); int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t1reg = _allocTempXMMreg(XMMT_INT, -1); @@ -2353,6 +2500,8 @@ void recPSRAVW() { if ( ! _Rd_ ) return; + EE::Profiler.EmitOp(eeOpcode::PSRAVW); + int info = eeRecompileCodeXMM( (_Rs_?XMMINFO_READS:0)|(_Rt_?XMMINFO_READT:0)|XMMINFO_WRITED ); if( _Rs_ == 0 ) { if( _Rt_ == 0 ) { @@ -2423,6 +2572,8 @@ void recPINTEH() { if ( ! _Rd_ ) return; + EE::Profiler.EmitOp(eeOpcode::PINTEH); + int info = eeRecompileCodeXMM( (_Rs_?XMMINFO_READS:0)|(_Rt_?XMMINFO_READT:0)|XMMINFO_WRITED ); int t0reg = -1; @@ -2472,6 +2623,8 @@ void recPINTEH() //////////////////////////////////////////////////// void recPMULTUW() { + EE::Profiler.EmitOp(eeOpcode::PMULTUW); + int info = eeRecompileCodeXMM( (((_Rs_)&&(_Rt_))?XMMINFO_READS:0)|(((_Rs_)&&(_Rt_))?XMMINFO_READT:0)|(_Rd_?XMMINFO_WRITED:0)|XMMINFO_WRITELO|XMMINFO_WRITEHI ); if( !_Rs_ || !_Rt_ ) { if( _Rd_ ) xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D)); @@ -2518,6 +2671,8 @@ void recPMULTUW() //////////////////////////////////////////////////// void recPMADDUW() { + EE::Profiler.EmitOp(eeOpcode::PMADDUW); + int info = eeRecompileCodeXMM( (((_Rs_)&&(_Rt_))?XMMINFO_READS:0)|(((_Rs_)&&(_Rt_))?XMMINFO_READT:0)|(_Rd_?XMMINFO_WRITED:0)|XMMINFO_WRITELO|XMMINFO_WRITEHI|XMMINFO_READLO|XMMINFO_READHI ); xSHUF.PS(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_HI), 0x88); xPSHUF.D(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_LO), 0xd8); // LO = {LO[0], HI[0], LO[2], HI[2]} @@ -2569,6 +2724,8 @@ void recPMADDUW() //////////////////////////////////////////////////// void recPDIVUW() { + EE::Profiler.EmitOp(eeOpcode::PDIVUW); + _deleteEEreg(_Rd_, 0); recCall(Interp::PDIVUW); } @@ -2576,6 +2733,8 @@ void recPDIVUW() //////////////////////////////////////////////////// void recPEXCW() { + EE::Profiler.EmitOp(eeOpcode::PEXCW); + if (!_Rd_) return; int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED ); @@ -2586,6 +2745,8 @@ void recPEXCW() //////////////////////////////////////////////////// void recPEXCH() { + EE::Profiler.EmitOp(eeOpcode::PEXCH); + if (!_Rd_) return; int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED ); @@ -2599,6 +2760,8 @@ void recPNOR() { if ( ! _Rd_ ) return; + EE::Profiler.EmitOp(eeOpcode::PNOR); + int info = eeRecompileCodeXMM( (_Rs_!=0?XMMINFO_READS:0)|(_Rt_!=0?XMMINFO_READT:0)|XMMINFO_WRITED ); if( _Rs_ == 0 ) { @@ -2650,6 +2813,8 @@ void recPNOR() //////////////////////////////////////////////////// void recPMTHI() { + EE::Profiler.EmitOp(eeOpcode::PMTHI); + int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_WRITEHI ); xMOVDQA(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_S)); _clearNeededXMMregs(); @@ -2658,6 +2823,8 @@ void recPMTHI() //////////////////////////////////////////////////// void recPMTLO() { + EE::Profiler.EmitOp(eeOpcode::PMTLO); + int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_WRITELO ); xMOVDQA(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_S)); _clearNeededXMMregs(); @@ -2668,6 +2835,8 @@ void recPCPYUD() { if ( ! _Rd_ ) return; + EE::Profiler.EmitOp(eeOpcode::PCPYUD); + int info = eeRecompileCodeXMM( XMMINFO_READS|(( _Rt_ == 0) ? 0:XMMINFO_READT)|XMMINFO_WRITED ); if( _Rt_ == 0 ) { @@ -2705,6 +2874,8 @@ void recPOR() { if ( ! _Rd_ ) return; + EE::Profiler.EmitOp(eeOpcode::POR); + int info = eeRecompileCodeXMM( (_Rs_!=0?XMMINFO_READS:0)|(_Rt_!=0?XMMINFO_READT:0)|XMMINFO_WRITED ); if( _Rs_ == 0 ) { @@ -2738,6 +2909,8 @@ void recPCPYH() { if ( ! _Rd_ ) return; + EE::Profiler.EmitOp(eeOpcode::PCPYH); + int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED ); xPSHUF.LW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0); xPSHUF.HW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D), 0); diff --git a/pcsx2/x86/iR3000A.cpp b/pcsx2/x86/iR3000A.cpp index e55ce1327a..9bca128013 100644 --- a/pcsx2/x86/iR3000A.cpp +++ b/pcsx2/x86/iR3000A.cpp @@ -36,6 +36,7 @@ #include "NakedAsm.h" #include "AppConfig.h" +#include "Utilities/Perf.h" using namespace x86Emitter; @@ -361,6 +362,8 @@ static void _DynGen_Dispatchers() HostSys::MemProtectStatic( iopRecDispatchers, PageAccess_ExecOnly() ); recBlocks.SetJITCompile( iopJITCompile ); + + Perf::any.map((uptr)&iopRecDispatchers, 4096, "IOP Dispatcher"); } //////////////////////////////////////////////////// @@ -812,6 +815,8 @@ void recResetIOP() { DevCon.WriteLn( "iR3000A Recompiler reset." ); + Perf::iop.reset(); + recAlloc(); recMem->Reset(); @@ -868,6 +873,9 @@ static void recShutdown() safe_free( s_pInstCache ); s_nInstCacheSize = 0; + + // FIXME Warning thread unsafe + Perf::dump(); } static void iopClearRecLUT(BASEBLOCK* base, int count) @@ -1411,6 +1419,8 @@ StartRecomp: pxAssert(xGetPtr() - recPtr < _64kb); s_pCurBlockEx->x86size = xGetPtr() - recPtr; + Perf::iop.map(s_pCurBlockEx->fnptr, s_pCurBlockEx->x86size, s_pCurBlockEx->startpc); + recPtr = xGetPtr(); pxAssert( (g_psxHasConstReg&g_psxFlushedConstReg) == g_psxHasConstReg ); diff --git a/pcsx2/x86/iR5900.h b/pcsx2/x86/iR5900.h index 9dcdcb9671..28ef0fa136 100644 --- a/pcsx2/x86/iR5900.h +++ b/pcsx2/x86/iR5900.h @@ -21,6 +21,7 @@ #include "R5900.h" #include "VU.h" #include "iCore.h" +#include "R5900_Profiler.h" extern u32 maxrecmem; extern u32 pc; // recompiler pc (also used by the SuperVU! .. why? (air)) @@ -133,12 +134,14 @@ typedef void (*R5900FNPTR_INFO)(int info); #define EERECOMPILE_CODE0(fn, xmminfo) \ void rec##fn(void) \ { \ + EE::Profiler.EmitOp(eeOpcode::fn); \ eeRecompileCode0(rec##fn##_const, rec##fn##_consts, rec##fn##_constt, rec##fn##_, xmminfo); \ } #define EERECOMPILE_CODEX(codename, fn) \ void rec##fn(void) \ { \ + EE::Profiler.EmitOp(eeOpcode::fn); \ codename(rec##fn##_const, rec##fn##_); \ } diff --git a/pcsx2/x86/ix86-32/iR5900-32.cpp b/pcsx2/x86/ix86-32/iR5900-32.cpp index 067b91f69c..0a1da6de6b 100644 --- a/pcsx2/x86/ix86-32/iR5900-32.cpp +++ b/pcsx2/x86/ix86-32/iR5900-32.cpp @@ -41,6 +41,7 @@ #include "Utilities/MemsetFast.inl" +#include "Utilities/Perf.h" using namespace x86Emitter; @@ -63,6 +64,7 @@ __aligned16 GPR_reg64 g_cpuConstRegs[32] = {0}; u32 g_cpuHasConstReg = 0, g_cpuFlushedConstReg = 0; bool g_cpuFlushedPC, g_cpuFlushedCode, g_recompilingDelaySlot, g_maySignalException; +eeProfiler EE::Profiler; //////////////////////////////////////////////////////////////// // Static Private Variables - R5900 Dynarec @@ -573,6 +575,8 @@ static void _DynGen_Dispatchers() HostSys::MemProtectStatic( eeRecDispatchers, PageAccess_ExecOnly() ); recBlocks.SetJITCompile( JITCompile ); + + Perf::any.map((uptr)&eeRecDispatchers, 4096, "EE Dispatcher"); } @@ -698,6 +702,10 @@ static bool eeCpuExecuting = false; //////////////////////////////////////////////////// static void recResetRaw() { + Perf::ee.reset(); + + EE::Profiler.Reset(); + recAlloc(); if( AtomicExchange( eeRecIsReset, true ) ) return; @@ -741,6 +749,9 @@ static void recShutdown() safe_aligned_free( recConstBuf ); safe_free( s_pInstCache ); s_nInstCacheSize = 0; + + // FIXME Warning thread unsafe + Perf::dump(); } static void recResetEE() @@ -837,12 +848,19 @@ static void recExecute() if(m_cpuException) m_cpuException->Rethrow(); if(m_Exception) m_Exception->Rethrow(); + + // FIXME Warning thread unsafe + Perf::dump(); #endif + + EE::Profiler.Print(); } //////////////////////////////////////////////////// void R5900::Dynarec::OpcodeImpl::recSYSCALL() { + EE::Profiler.EmitOp(eeOpcode::SYSCALL); + recCall(R5900::Interpreter::OpcodeImpl::SYSCALL); xCMP(ptr32[&cpuRegs.pc], pc); @@ -858,6 +876,8 @@ void R5900::Dynarec::OpcodeImpl::recSYSCALL() //////////////////////////////////////////////////// void R5900::Dynarec::OpcodeImpl::recBREAK() { + EE::Profiler.EmitOp(eeOpcode::BREAK); + recCall(R5900::Interpreter::OpcodeImpl::BREAK); xCMP(ptr32[&cpuRegs.pc], pc); @@ -2183,6 +2203,14 @@ StartRecomp: pxAssert(xGetPtr() - recPtr < _64kb); s_pCurBlockEx->x86size = xGetPtr() - recPtr; +#if 0 + // Example: Dump both x86/EE code + if (startpc == 0x456630) { + iDumpBlock(s_pCurBlockEx->startpc, s_pCurBlockEx->size*4, s_pCurBlockEx->fnptr, s_pCurBlockEx->x86size); + } +#endif + Perf::ee.map(s_pCurBlockEx->fnptr, s_pCurBlockEx->x86size, s_pCurBlockEx->startpc); + recPtr = xGetPtr(); pxAssert( (g_cpuHasConstReg&g_cpuFlushedConstReg) == g_cpuHasConstReg ); diff --git a/pcsx2/x86/ix86-32/iR5900Branch.cpp b/pcsx2/x86/ix86-32/iR5900Branch.cpp index 9795fc5aa8..dd8861ea76 100644 --- a/pcsx2/x86/ix86-32/iR5900Branch.cpp +++ b/pcsx2/x86/ix86-32/iR5900Branch.cpp @@ -403,6 +403,8 @@ EERECOMPILE_CODE0(BNEL, XMMINFO_READS|XMMINFO_READT); //////////////////////////////////////////////////// void recBLTZAL() { + EE::Profiler.EmitOp(eeOpcode::BLTZAL); + u32 branchTo = ((s32)_Imm_ * 4) + pc; _eeOnWriteReg(31, 0); @@ -442,6 +444,8 @@ void recBLTZAL() //////////////////////////////////////////////////// void recBGEZAL() { + EE::Profiler.EmitOp(eeOpcode::BGEZAL); + u32 branchTo = ((s32)_Imm_ * 4) + pc; _eeOnWriteReg(31, 0); @@ -481,6 +485,8 @@ void recBGEZAL() //////////////////////////////////////////////////// void recBLTZALL() { + EE::Profiler.EmitOp(eeOpcode::BLTZALL); + u32 branchTo = ((s32)_Imm_ * 4) + pc; _eeOnWriteReg(31, 0); @@ -515,6 +521,8 @@ void recBLTZALL() //////////////////////////////////////////////////// void recBGEZALL() { + EE::Profiler.EmitOp(eeOpcode::BGEZALL); + u32 branchTo = ((s32)_Imm_ * 4) + pc; _eeOnWriteReg(31, 0); @@ -550,6 +558,8 @@ void recBGEZALL() //// BLEZ void recBLEZ() { + EE::Profiler.EmitOp(eeOpcode::BLEZ); + u32 branchTo = ((s32)_Imm_ * 4) + pc; _eeFlushAllUnused(); @@ -596,6 +606,8 @@ void recBLEZ() //// BGTZ void recBGTZ() { + EE::Profiler.EmitOp(eeOpcode::BGTZ); + u32 branchTo = ((s32)_Imm_ * 4) + pc; _eeFlushAllUnused(); @@ -642,6 +654,8 @@ void recBGTZ() //////////////////////////////////////////////////// void recBLTZ() { + EE::Profiler.EmitOp(eeOpcode::BLTZ); + u32 branchTo = ((s32)_Imm_ * 4) + pc; _eeFlushAllUnused(); @@ -675,6 +689,8 @@ void recBLTZ() //////////////////////////////////////////////////// void recBGEZ() { + EE::Profiler.EmitOp(eeOpcode::BGEZ); + u32 branchTo = ((s32)_Imm_ * 4) + pc; _eeFlushAllUnused(); @@ -708,6 +724,8 @@ void recBGEZ() //////////////////////////////////////////////////// void recBLTZL() { + EE::Profiler.EmitOp(eeOpcode::BLTZL); + u32 branchTo = ((s32)_Imm_ * 4) + pc; _eeFlushAllUnused(); @@ -738,6 +756,8 @@ void recBLTZL() //////////////////////////////////////////////////// void recBGEZL() { + EE::Profiler.EmitOp(eeOpcode::BGEZL); + u32 branchTo = ((s32)_Imm_ * 4) + pc; _eeFlushAllUnused(); @@ -775,6 +795,8 @@ void recBGEZL() //////////////////////////////////////////////////// void recBLEZL() { + EE::Profiler.EmitOp(eeOpcode::BLEZL); + u32 branchTo = ((s32)_Imm_ * 4) + pc; _eeFlushAllUnused(); @@ -819,6 +841,8 @@ void recBLEZL() //////////////////////////////////////////////////// void recBGTZL() { + EE::Profiler.EmitOp(eeOpcode::BGTZL); + u32 branchTo = ((s32)_Imm_ * 4) + pc; _eeFlushAllUnused(); diff --git a/pcsx2/x86/ix86-32/iR5900Jump.cpp b/pcsx2/x86/ix86-32/iR5900Jump.cpp index 1a4cc96976..2fceac01d6 100644 --- a/pcsx2/x86/ix86-32/iR5900Jump.cpp +++ b/pcsx2/x86/ix86-32/iR5900Jump.cpp @@ -47,6 +47,8 @@ REC_SYS_DEL(JALR, _Rd_); //////////////////////////////////////////////////// void recJ() { + EE::Profiler.EmitOp(eeOpcode::J); + // SET_FPUSTATE; u32 newpc = (_Target_ << 2) + ( pc & 0xf0000000 ); recompileNextInstruction(1); @@ -59,6 +61,8 @@ void recJ() //////////////////////////////////////////////////// void recJAL() { + EE::Profiler.EmitOp(eeOpcode::JAL); + u32 newpc = (_Target_ << 2) + ( pc & 0xf0000000 ); _deleteEEreg(31, 0); if(EE_CONST_PROP) @@ -88,12 +92,16 @@ void recJAL() //////////////////////////////////////////////////// void recJR() { + EE::Profiler.EmitOp(eeOpcode::JR); + SetBranchReg( _Rs_); } //////////////////////////////////////////////////// void recJALR() { + EE::Profiler.EmitOp(eeOpcode::JALR); + int newpc = pc + 4; _allocX86reg(esi, X86TYPE_PCWRITEBACK, 0, MODE_WRITE); _eeMoveGPRtoR(esi, _Rs_); diff --git a/pcsx2/x86/ix86-32/iR5900LoadStore.cpp b/pcsx2/x86/ix86-32/iR5900LoadStore.cpp index 306e9fe3f5..1c4a7ac4c0 100644 --- a/pcsx2/x86/ix86-32/iR5900LoadStore.cpp +++ b/pcsx2/x86/ix86-32/iR5900LoadStore.cpp @@ -238,20 +238,20 @@ void recStore(u32 bits) ////////////////////////////////////////////////////////////////////////////////////////// // -void recLB() { recLoad32(8,true); } -void recLBU() { recLoad32(8,false); } -void recLH() { recLoad32(16,true); } -void recLHU() { recLoad32(16,false); } -void recLW() { recLoad32(32,true); } -void recLWU() { recLoad32(32,false); } -void recLD() { recLoad64(64,false); } -void recLQ() { recLoad64(128,false); } +void recLB() { recLoad32(8,true); EE::Profiler.EmitOp(eeOpcode::LB);} +void recLBU() { recLoad32(8,false); EE::Profiler.EmitOp(eeOpcode::LBU);} +void recLH() { recLoad32(16,true); EE::Profiler.EmitOp(eeOpcode::LH);} +void recLHU() { recLoad32(16,false); EE::Profiler.EmitOp(eeOpcode::LHU);} +void recLW() { recLoad32(32,true); EE::Profiler.EmitOp(eeOpcode::LW);} +void recLWU() { recLoad32(32,false); EE::Profiler.EmitOp(eeOpcode::LWU);} +void recLD() { recLoad64(64,false); EE::Profiler.EmitOp(eeOpcode::LD);} +void recLQ() { recLoad64(128,false); EE::Profiler.EmitOp(eeOpcode::LQ);} -void recSB() { recStore(8); } -void recSH() { recStore(16); } -void recSW() { recStore(32); } -void recSQ() { recStore(128); } -void recSD() { recStore(64); } +void recSB() { recStore(8); EE::Profiler.EmitOp(eeOpcode::SB);} +void recSH() { recStore(16); EE::Profiler.EmitOp(eeOpcode::SH);} +void recSW() { recStore(32); EE::Profiler.EmitOp(eeOpcode::SW);} +void recSQ() { recStore(128); EE::Profiler.EmitOp(eeOpcode::SQ);} +void recSD() { recStore(64); EE::Profiler.EmitOp(eeOpcode::SD);} //////////////////////////////////////////////////// @@ -298,6 +298,8 @@ void recLWL() recCall(LWL); #endif + + EE::Profiler.EmitOp(eeOpcode::LWL); } //////////////////////////////////////////////////// @@ -347,6 +349,8 @@ void recLWR() recCall(LWR); #endif + + EE::Profiler.EmitOp(eeOpcode::LWR); } //////////////////////////////////////////////////// @@ -395,6 +399,8 @@ void recSWL() _deleteEEreg(_Rt_, 1); recCall(SWL); #endif + + EE::Profiler.EmitOp(eeOpcode::SWL); } //////////////////////////////////////////////////// @@ -443,6 +449,8 @@ void recSWR() _deleteEEreg(_Rt_, 1); recCall(SWR); #endif + + EE::Profiler.EmitOp(eeOpcode::SWR); } //////////////////////////////////////////////////// @@ -452,6 +460,8 @@ void recLDL() _deleteEEreg(_Rs_, 1); _deleteEEreg(_Rt_, 1); recCall(LDL); + + EE::Profiler.EmitOp(eeOpcode::LDL); } //////////////////////////////////////////////////// @@ -461,6 +471,8 @@ void recLDR() _deleteEEreg(_Rs_, 1); _deleteEEreg(_Rt_, 1); recCall(LDR); + + EE::Profiler.EmitOp(eeOpcode::LDR); } //////////////////////////////////////////////////// @@ -471,6 +483,8 @@ void recSDL() _deleteEEreg(_Rs_, 1); _deleteEEreg(_Rt_, 1); recCall(SDL); + + EE::Profiler.EmitOp(eeOpcode::SDL); } //////////////////////////////////////////////////// @@ -480,6 +494,8 @@ void recSDR() _deleteEEreg(_Rs_, 1); _deleteEEreg(_Rt_, 1); recCall(SDR); + + EE::Profiler.EmitOp(eeOpcode::SDR); } ////////////////////////////////////////////////////////////////////////////////////////// @@ -511,6 +527,8 @@ void recLWC1() } xMOV(ptr32[&fpuRegs.fpr[_Rt_].UL], eax); + + EE::Profiler.EmitOp(eeOpcode::LWC1); } //////////////////////////////////////////////////// @@ -536,6 +554,8 @@ void recSWC1() vtlb_DynGenWrite(32); } + + EE::Profiler.EmitOp(eeOpcode::SWC1); } //////////////////////////////////////////////////// @@ -576,6 +596,8 @@ void recLQC2() vtlb_DynGenRead64(128); } + + EE::Profiler.EmitOp(eeOpcode::LQC2); } //////////////////////////////////////////////////// @@ -601,6 +623,8 @@ void recSQC2() vtlb_DynGenWrite(128); } + + EE::Profiler.EmitOp(eeOpcode::SQC2); } #endif diff --git a/pcsx2/x86/ix86-32/iR5900Move.cpp b/pcsx2/x86/ix86-32/iR5900Move.cpp index 023ea9e2d6..97a7bd35e6 100644 --- a/pcsx2/x86/ix86-32/iR5900Move.cpp +++ b/pcsx2/x86/ix86-32/iR5900Move.cpp @@ -84,7 +84,9 @@ void recLUI() xCDQ(); xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[0]], eax); xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[1]], edx); - } + } + + EE::Profiler.EmitOp(eeOpcode::LUI); } //////////////////////////////////////////////////// @@ -285,21 +287,25 @@ void recMTHILO(int hi) void recMFHI() { recMFHILO(1); + EE::Profiler.EmitOp(eeOpcode::MFHI); } void recMFLO() { recMFHILO(0); + EE::Profiler.EmitOp(eeOpcode::MFLO); } void recMTHI() { recMTHILO(1); + EE::Profiler.EmitOp(eeOpcode::MTHI); } void recMTLO() { recMTHILO(0); + EE::Profiler.EmitOp(eeOpcode::MTLO); } //////////////////////////////////////////////////// @@ -407,21 +413,25 @@ void recMTHILO1(int hi) void recMFHI1() { recMFHILO1(1); + EE::Profiler.EmitOp(eeOpcode::MFHI1); } void recMFLO1() { recMFHILO1(0); + EE::Profiler.EmitOp(eeOpcode::MFLO1); } void recMTHI1() { recMTHILO1(1); + EE::Profiler.EmitOp(eeOpcode::MTHI1); } void recMTLO1() { recMTHILO1(0); + EE::Profiler.EmitOp(eeOpcode::MTLO1); } //// MOVZ diff --git a/pcsx2/x86/ix86-32/recVTLB.cpp b/pcsx2/x86/ix86-32/recVTLB.cpp index b37eccfe5c..aace596c2c 100644 --- a/pcsx2/x86/ix86-32/recVTLB.cpp +++ b/pcsx2/x86/ix86-32/recVTLB.cpp @@ -161,6 +161,9 @@ namespace vtlb_private // static uptr* DynGen_PrepRegs() { + // Warning dirty ebx (in case someone got the very bad idea to move this code) + EE::Profiler.EmitMem(); + xMOV( eax, ecx ); xSHR( eax, VTLB_PAGE_BITS ); xMOV( eax, ptr[(eax*4) + vtlbdata.vmap] ); @@ -370,6 +373,8 @@ void vtlb_DynGenRead32(u32 bits, bool sign) // recompiler if the TLB is changed. void vtlb_DynGenRead64_Const( u32 bits, u32 addr_const ) { + EE::Profiler.EmitConstMem(addr_const); + u32 vmv_ptr = vtlbdata.vmap[addr_const>>VTLB_PAGE_BITS]; s32 ppf = addr_const + vmv_ptr; if( ppf >= 0 ) @@ -416,6 +421,8 @@ void vtlb_DynGenRead64_Const( u32 bits, u32 addr_const ) // void vtlb_DynGenRead32_Const( u32 bits, bool sign, u32 addr_const ) { + EE::Profiler.EmitConstMem(addr_const); + u32 vmv_ptr = vtlbdata.vmap[addr_const>>VTLB_PAGE_BITS]; s32 ppf = addr_const + vmv_ptr; if( ppf >= 0 ) @@ -506,6 +513,8 @@ void vtlb_DynGenWrite(u32 sz) // recompiler if the TLB is changed. void vtlb_DynGenWrite_Const( u32 bits, u32 addr_const ) { + EE::Profiler.EmitConstMem(addr_const); + u32 vmv_ptr = vtlbdata.vmap[addr_const>>VTLB_PAGE_BITS]; s32 ppf = addr_const + vmv_ptr; if( ppf >= 0 ) diff --git a/pcsx2/x86/microVU.cpp b/pcsx2/x86/microVU.cpp index 8b71c5ca1d..f9e43bbdf2 100644 --- a/pcsx2/x86/microVU.cpp +++ b/pcsx2/x86/microVU.cpp @@ -18,6 +18,8 @@ #include "PrecompiledHeader.h" #include "microVU.h" +#include "Utilities/Perf.h" + //------------------------------------------------------------------ // Micro VU - Main Functions //------------------------------------------------------------------ @@ -73,6 +75,9 @@ void mVUreset(microVU& mVU, bool resetReserve) { // Restore reserve to uncommitted state if (resetReserve) mVU.cache_reserve->Reset(); + + if (mVU.index) Perf::any.map((uptr)&mVU.dispCache, mVUdispCacheSize, "mVU1 Dispatcher"); + else Perf::any.map((uptr)&mVU.dispCache, mVUdispCacheSize, "mVU0 Dispatcher"); x86SetPtr(mVU.dispCache); mVUdispatcherA(mVU);