Merge pull request #12691 from mitaclaw/jit-profiling-restoration

JitCache: Software Profiling Restoration
This commit is contained in:
Admiral H. Curtiss 2024-04-13 01:35:25 +02:00 committed by GitHub
commit 0c1a76398b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
32 changed files with 283 additions and 302 deletions

View File

@ -385,16 +385,9 @@ public final class NativeLibrary
public static native boolean IsRunningAndUnpaused();
/**
* Enables or disables CPU block profiling
*
* @param enable
* Writes out the JitBlock Cache log dump
*/
public static native void SetProfiling(boolean enable);
/**
* Writes out the block profile results
*/
public static native void WriteProfileResults();
public static native void WriteJitBlockLogDump();
/**
* Native EGL functions not exposed by Java bindings
@ -454,6 +447,14 @@ public final class NativeLibrary
private static native String GetCurrentTitleDescriptionUnchecked();
@Keep
public static void displayToastMsg(final String text, final boolean long_length)
{
final int length = long_length ? Toast.LENGTH_LONG : Toast.LENGTH_SHORT;
new Handler(Looper.getMainLooper())
.post(() -> Toast.makeText(DolphinApplication.getAppContext(), text, length).show());
}
@Keep
public static boolean displayAlertMsg(final String caption, final String text,
final boolean yesNo, final boolean isWarning, final boolean nonBlocking)
@ -466,9 +467,7 @@ public final class NativeLibrary
// and are allowed to block. As a fallback, we can use toasts.
if (emulationActivity == null || nonBlocking)
{
new Handler(Looper.getMainLooper()).post(
() -> Toast.makeText(DolphinApplication.getAppContext(), text, Toast.LENGTH_LONG)
.show());
displayToastMsg(text, true);
}
else
{

View File

@ -214,6 +214,12 @@ enum class BooleanSetting(
"JitRegisterCacheOff",
false
),
MAIN_DEBUG_JIT_ENABLE_PROFILING(
Settings.FILE_DOLPHIN,
Settings.SECTION_DEBUG,
"JitEnableProfiling",
false
),
MAIN_EMULATE_SKYLANDER_PORTAL(
Settings.FILE_DOLPHIN,
Settings.SECTION_EMULATED_USB_DEVICES,

View File

@ -1978,6 +1978,26 @@ class SettingsFragmentPresenter(
)
)
sl.add(HeaderSetting(context, R.string.debug_jit_profiling_header, 0))
sl.add(
SwitchSetting(
context,
BooleanSetting.MAIN_DEBUG_JIT_ENABLE_PROFILING,
R.string.debug_jit_enable_block_profiling,
0
)
)
sl.add(
RunRunnable(
context,
R.string.debug_jit_write_block_log_dump,
0,
0,
0,
true
) { NativeLibrary.WriteJitBlockLogDump() }
)
sl.add(HeaderSetting(context, R.string.debug_jit_header, 0))
sl.add(
SwitchSetting(

View File

@ -406,6 +406,9 @@
<string name="debug_fastmem">Disable Fastmem</string>
<string name="debug_fastmem_arena">Disable Fastmem Arena</string>
<string name="debug_large_entry_points_map">Disable Large Entry Points Map</string>
<string name="debug_jit_profiling_header">Jit Profiling</string>
<string name="debug_jit_enable_block_profiling">Enable Jit Block Profiling</string>
<string name="debug_jit_write_block_log_dump">Write Jit Block Log Dump</string>
<string name="debug_jit_header">Jit</string>
<string name="debug_jitoff">Jit Disabled</string>
<string name="debug_jitloadstoreoff">Jit Load Store Disabled</string>

View File

@ -12,6 +12,7 @@ static JavaVM* s_java_vm;
static jclass s_string_class;
static jclass s_native_library_class;
static jmethodID s_display_toast_msg;
static jmethodID s_display_alert_msg;
static jmethodID s_update_touch_pointer;
static jmethodID s_on_title_changed;
@ -146,6 +147,11 @@ jclass GetNativeLibraryClass()
return s_native_library_class;
}
jmethodID GetDisplayToastMsg()
{
return s_display_toast_msg;
}
jmethodID GetDisplayAlertMsg()
{
return s_display_alert_msg;
@ -528,6 +534,8 @@ JNIEXPORT jint JNI_OnLoad(JavaVM* vm, void* reserved)
const jclass native_library_class = env->FindClass("org/dolphinemu/dolphinemu/NativeLibrary");
s_native_library_class = reinterpret_cast<jclass>(env->NewGlobalRef(native_library_class));
s_display_toast_msg =
env->GetStaticMethodID(s_native_library_class, "displayToastMsg", "(Ljava/lang/String;Z)V");
s_display_alert_msg = env->GetStaticMethodID(s_native_library_class, "displayAlertMsg",
"(Ljava/lang/String;Ljava/lang/String;ZZZ)Z");
s_update_touch_pointer =

View File

@ -12,6 +12,7 @@ JNIEnv* GetEnvForThread();
jclass GetStringClass();
jclass GetNativeLibraryClass();
jmethodID GetDisplayToastMsg();
jmethodID GetDisplayAlertMsg();
jmethodID GetUpdateTouchPointer();
jmethodID GetOnTitleChanged();

View File

@ -6,6 +6,7 @@
#include <android/native_window_jni.h>
#include <cstdio>
#include <cstdlib>
#include <fmt/format.h>
#include <jni.h>
#include <memory>
#include <mutex>
@ -22,6 +23,7 @@
#include "Common/Event.h"
#include "Common/FileUtil.h"
#include "Common/Flag.h"
#include "Common/IOFile.h"
#include "Common/IniFile.h"
#include "Common/Logging/LogManager.h"
#include "Common/MsgHandler.h"
@ -42,7 +44,6 @@
#include "Core/Host.h"
#include "Core/PowerPC/JitInterface.h"
#include "Core/PowerPC/PowerPC.h"
#include "Core/PowerPC/Profiler.h"
#include "Core/State.h"
#include "Core/System.h"
@ -404,26 +405,34 @@ JNIEXPORT jint JNICALL Java_org_dolphinemu_dolphinemu_NativeLibrary_GetMaxLogLev
return static_cast<jint>(Common::Log::MAX_LOGLEVEL);
}
JNIEXPORT void JNICALL Java_org_dolphinemu_dolphinemu_NativeLibrary_SetProfiling(JNIEnv*, jclass,
jboolean enable)
JNIEXPORT void JNICALL Java_org_dolphinemu_dolphinemu_NativeLibrary_WriteJitBlockLogDump(
JNIEnv* env, jclass native_library_class)
{
HostThreadLock guard;
auto& system = Core::System::GetInstance();
auto& jit_interface = system.GetJitInterface();
const Core::CPUThreadGuard cpu_guard(system);
jit_interface.ClearCache(cpu_guard);
jit_interface.SetProfilingState(enable ? JitInterface::ProfilingState::Enabled :
JitInterface::ProfilingState::Disabled);
}
JNIEXPORT void JNICALL Java_org_dolphinemu_dolphinemu_NativeLibrary_WriteProfileResults(JNIEnv*,
jclass)
{
HostThreadLock guard;
std::string filename = File::GetUserPath(D_DUMP_IDX) + "Debug/profiler.txt";
File::CreateFullPath(filename);
auto& jit_interface = Core::System::GetInstance().GetJitInterface();
jit_interface.WriteProfileResults(filename);
if (jit_interface.GetCore() == nullptr)
{
env->CallStaticVoidMethod(native_library_class, IDCache::GetDisplayToastMsg(),
ToJString(env, Common::GetStringT("JIT is not active")),
static_cast<jboolean>(false));
return;
}
const std::string filename = fmt::format("{}{}.txt", File::GetUserPath(D_DUMPDEBUG_JITBLOCKS_IDX),
SConfig::GetInstance().GetGameID());
File::IOFile f(filename, "w");
if (!f)
{
env->CallStaticVoidMethod(
native_library_class, IDCache::GetDisplayToastMsg(),
ToJString(env, Common::FmtFormatT("Failed to open \"{0}\" for writing.", filename)),
static_cast<jboolean>(false));
return;
}
jit_interface.JitBlockLogDump(Core::CPUThreadGuard{system}, f.GetHandle());
env->CallStaticVoidMethod(native_library_class, IDCache::GetDisplayToastMsg(),
ToJString(env, Common::FmtFormatT("Wrote to \"{0}\".", filename)),
static_cast<jboolean>(false));
}
// Surface Handling

View File

@ -108,8 +108,6 @@ add_library(common
Network.h
PcapFile.cpp
PcapFile.h
PerformanceCounter.cpp
PerformanceCounter.h
Profiler.cpp
Profiler.h
QoSSession.cpp

View File

@ -77,6 +77,7 @@
#define DUMP_SSL_DIR "SSL"
#define DUMP_DEBUG_DIR "Debug"
#define DUMP_DEBUG_BRANCHWATCH_DIR "BranchWatch"
#define DUMP_DEBUG_JITBLOCKS_DIR "JitBlocks"
#define LOGS_DIR "Logs"
#define MAIL_LOGS_DIR "Mail"
#define SHADERS_DIR "Shaders"

View File

@ -859,6 +859,8 @@ static void RebuildUserDirectories(unsigned int dir_index)
s_user_paths[D_DUMPDEBUG_IDX] = s_user_paths[D_DUMP_IDX] + DUMP_DEBUG_DIR DIR_SEP;
s_user_paths[D_DUMPDEBUG_BRANCHWATCH_IDX] =
s_user_paths[D_DUMPDEBUG_IDX] + DUMP_DEBUG_BRANCHWATCH_DIR DIR_SEP;
s_user_paths[D_DUMPDEBUG_JITBLOCKS_IDX] =
s_user_paths[D_DUMPDEBUG_IDX] + DUMP_DEBUG_JITBLOCKS_DIR DIR_SEP;
s_user_paths[D_LOGS_IDX] = s_user_paths[D_USER_IDX] + LOGS_DIR DIR_SEP;
s_user_paths[D_MAILLOGS_IDX] = s_user_paths[D_LOGS_IDX] + MAIL_LOGS_DIR DIR_SEP;
s_user_paths[D_THEMES_IDX] = s_user_paths[D_USER_IDX] + THEMES_DIR DIR_SEP;
@ -938,6 +940,8 @@ static void RebuildUserDirectories(unsigned int dir_index)
s_user_paths[D_DUMPDEBUG_IDX] = s_user_paths[D_DUMP_IDX] + DUMP_DEBUG_DIR DIR_SEP;
s_user_paths[D_DUMPDEBUG_BRANCHWATCH_IDX] =
s_user_paths[D_DUMPDEBUG_IDX] + DUMP_DEBUG_BRANCHWATCH_DIR DIR_SEP;
s_user_paths[D_DUMPDEBUG_JITBLOCKS_IDX] =
s_user_paths[D_DUMPDEBUG_IDX] + DUMP_DEBUG_JITBLOCKS_DIR DIR_SEP;
s_user_paths[F_MEM1DUMP_IDX] = s_user_paths[D_DUMP_IDX] + MEM1_DUMP;
s_user_paths[F_MEM2DUMP_IDX] = s_user_paths[D_DUMP_IDX] + MEM2_DUMP;
s_user_paths[F_ARAMDUMP_IDX] = s_user_paths[D_DUMP_IDX] + ARAM_DUMP;

View File

@ -54,6 +54,7 @@ enum
D_DUMPSSL_IDX,
D_DUMPDEBUG_IDX,
D_DUMPDEBUG_BRANCHWATCH_IDX,
D_DUMPDEBUG_JITBLOCKS_IDX,
D_LOAD_IDX,
D_LOGS_IDX,
D_MAILLOGS_IDX,

View File

@ -1,47 +0,0 @@
// Copyright 2014 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#if !defined(_WIN32)
#include "Common/PerformanceCounter.h"
#include <cstdint>
#include <ctime>
#include <unistd.h>
#include "Common/CommonTypes.h"
#if defined(_POSIX_TIMERS) && _POSIX_TIMERS > 0
#if defined(_POSIX_MONOTONIC_CLOCK) && _POSIX_MONOTONIC_CLOCK > 0
#define DOLPHIN_CLOCK CLOCK_MONOTONIC
#else
#define DOLPHIN_CLOCK CLOCK_REALTIME
#endif
#endif
bool QueryPerformanceCounter(u64* out)
{
#if defined(_POSIX_TIMERS) && _POSIX_TIMERS > 0
timespec tp;
if (clock_gettime(DOLPHIN_CLOCK, &tp))
return false;
*out = (u64)tp.tv_nsec + (u64)1000000000 * (u64)tp.tv_sec;
return true;
#else
*out = 0;
return false;
#endif
}
bool QueryPerformanceFrequency(u64* out)
{
#if defined(_POSIX_TIMERS) && _POSIX_TIMERS > 0
*out = 1000000000;
return true;
#else
*out = 1;
return false;
#endif
}
#endif

View File

@ -1,16 +0,0 @@
// Copyright 2014 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#if !defined(_WIN32)
#include <cstdint>
#include "Common/CommonTypes.h"
typedef u64 LARGE_INTEGER;
bool QueryPerformanceCounter(u64* out);
bool QueryPerformanceFrequency(u64* lpFrequency);
#endif

View File

@ -523,7 +523,6 @@ add_library(core
PowerPC/PPCSymbolDB.h
PowerPC/PPCTables.cpp
PowerPC/PPCTables.h
PowerPC/Profiler.h
PowerPC/SignatureDB/CSVSignatureDB.cpp
PowerPC/SignatureDB/CSVSignatureDB.h
PowerPC/SignatureDB/DSYSignatureDB.cpp

View File

@ -509,6 +509,8 @@ const Info<bool> MAIN_DEBUG_JIT_SYSTEM_REGISTERS_OFF{
const Info<bool> MAIN_DEBUG_JIT_BRANCH_OFF{{System::Main, "Debug", "JitBranchOff"}, false};
const Info<bool> MAIN_DEBUG_JIT_REGISTER_CACHE_OFF{{System::Main, "Debug", "JitRegisterCacheOff"},
false};
const Info<bool> MAIN_DEBUG_JIT_ENABLE_PROFILING{{System::Main, "Debug", "JitEnableProfiling"},
false};
// Main.BluetoothPassthrough

View File

@ -335,6 +335,7 @@ extern const Info<bool> MAIN_DEBUG_JIT_PAIRED_OFF;
extern const Info<bool> MAIN_DEBUG_JIT_SYSTEM_REGISTERS_OFF;
extern const Info<bool> MAIN_DEBUG_JIT_BRANCH_OFF;
extern const Info<bool> MAIN_DEBUG_JIT_REGISTER_CACHE_OFF;
extern const Info<bool> MAIN_DEBUG_JIT_ENABLE_PROFILING;
// Main.BluetoothPassthrough

View File

@ -316,7 +316,7 @@ void CachedInterpreter::Jit(u32 address)
js.numFloatingPointInst = 0;
js.curBlock = b;
b->normalEntry = GetCodePtr();
b->normalEntry = b->near_begin = GetCodePtr();
for (u32 i = 0; i < code_block.m_num_instructions; i++)
{
@ -382,6 +382,10 @@ void CachedInterpreter::Jit(u32 address)
}
m_code.emplace_back();
b->near_end = GetCodePtr();
b->far_begin = nullptr;
b->far_end = nullptr;
b->codeSize = static_cast<u32>(GetCodePtr() - b->normalEntry);
b->originalSize = code_block.m_num_instructions;

View File

@ -931,6 +931,7 @@ enum CPUEmuFeatureFlags : u32
FEATURE_FLAG_MSR_DR = 1 << 0,
FEATURE_FLAG_MSR_IR = 1 << 1,
FEATURE_FLAG_PERFMON = 1 << 2,
FEATURE_FLAG_END_OF_ENUMERATION,
};
constexpr s32 SignExt16(s16 x)

View File

@ -19,7 +19,6 @@
#include "Common/GekkoDisassembler.h"
#include "Common/IOFile.h"
#include "Common/Logging/Log.h"
#include "Common/PerformanceCounter.h"
#include "Common/StringUtil.h"
#include "Common/Swap.h"
#include "Common/x64ABI.h"
@ -43,7 +42,6 @@
#include "Core/PowerPC/MMU.h"
#include "Core/PowerPC/PPCAnalyst.h"
#include "Core/PowerPC/PowerPC.h"
#include "Core/PowerPC/Profiler.h"
#include "Core/System.h"
using namespace Gen;
@ -454,20 +452,11 @@ bool Jit64::Cleanup()
did_something = true;
}
if (jo.profile_blocks)
if (IsProfilingEnabled())
{
ABI_PushRegistersAndAdjustStack({}, 0);
// get end tic
MOV(64, R(ABI_PARAM1), ImmPtr(&js.curBlock->profile_data.ticStop));
ABI_CallFunction(QueryPerformanceCounter);
// tic counter += (end tic - start tic)
MOV(64, R(RSCRATCH2), ImmPtr(&js.curBlock->profile_data));
MOV(64, R(RSCRATCH), MDisp(RSCRATCH2, offsetof(JitBlock::ProfileData, ticStop)));
SUB(64, R(RSCRATCH), MDisp(RSCRATCH2, offsetof(JitBlock::ProfileData, ticStart)));
ADD(64, R(RSCRATCH), MDisp(RSCRATCH2, offsetof(JitBlock::ProfileData, ticCounter)));
ADD(64, MDisp(RSCRATCH2, offsetof(JitBlock::ProfileData, downcountCounter)),
Imm32(js.downcountAmount));
MOV(64, MDisp(RSCRATCH2, offsetof(JitBlock::ProfileData, ticCounter)), R(RSCRATCH));
ABI_CallFunctionPC(&JitBlock::ProfileData::EndProfiling, js.curBlock->profile_data.get(),
js.downcountAmount);
ABI_PopRegistersAndAdjustStack({}, 0);
did_something = true;
}
@ -773,7 +762,7 @@ void Jit64::Jit(u32 em_address, bool clear_cache_and_retry_on_failure)
EnableBlockLink();
EnableOptimization();
if (!jo.profile_blocks)
if (!IsProfilingEnabled())
{
if (m_system.GetCPU().IsStepping())
{
@ -899,15 +888,9 @@ bool Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
}
// Conditionally add profiling code.
if (jo.profile_blocks)
{
// get start tic
MOV(64, R(ABI_PARAM1), ImmPtr(&b->profile_data.ticStart));
int offset = static_cast<int>(offsetof(JitBlock::ProfileData, runCount)) -
static_cast<int>(offsetof(JitBlock::ProfileData, ticStart));
ADD(64, MDisp(ABI_PARAM1, offset), Imm8(1));
ABI_CallFunction(QueryPerformanceCounter);
}
if (IsProfilingEnabled())
ABI_CallFunctionP(&JitBlock::ProfileData::BeginProfiling, b->profile_data.get());
#if defined(_DEBUG) || defined(DEBUGFAST) || defined(NAN_CHECK)
// should help logged stack-traces become more accurate
MOV(32, PPCSTATE(pc), Imm32(js.blockStart));

View File

@ -10,7 +10,6 @@
#include "Common/Logging/Log.h"
#include "Common/MathUtil.h"
#include "Common/MsgHandler.h"
#include "Common/PerformanceCounter.h"
#include "Common/StringUtil.h"
#include "Core/ConfigManager.h"
@ -26,7 +25,6 @@
#include "Core/PowerPC/JitArm64/JitArm64_RegCache.h"
#include "Core/PowerPC/JitInterface.h"
#include "Core/PowerPC/PowerPC.h"
#include "Core/PowerPC/Profiler.h"
#include "Core/System.h"
using namespace Arm64Gen;
@ -408,7 +406,11 @@ void JitArm64::WriteExit(u32 destination, bool LK, u32 exit_address_after_return
ARM64Reg exit_address_after_return_reg)
{
Cleanup();
EndTimeProfile(js.curBlock);
if (IsProfilingEnabled())
{
ABI_CallFunction(&JitBlock::ProfileData::EndProfiling, js.curBlock->profile_data.get(),
js.downcountAmount);
}
DoDownCount();
LK &= m_enable_blr_optimization;
@ -509,7 +511,11 @@ void JitArm64::WriteExit(Arm64Gen::ARM64Reg dest, bool LK, u32 exit_address_afte
MOV(DISPATCHER_PC, dest);
Cleanup();
EndTimeProfile(js.curBlock);
if (IsProfilingEnabled())
{
ABI_CallFunction(&JitBlock::ProfileData::EndProfiling, js.curBlock->profile_data.get(),
js.downcountAmount);
}
DoDownCount();
LK &= m_enable_blr_optimization;
@ -672,7 +678,11 @@ void JitArm64::WriteBLRExit(Arm64Gen::ARM64Reg dest)
MOV(DISPATCHER_PC, dest);
Cleanup();
EndTimeProfile(js.curBlock);
if (IsProfilingEnabled())
{
ABI_CallFunction(&JitBlock::ProfileData::EndProfiling, js.curBlock->profile_data.get(),
js.downcountAmount);
}
// Check if {PPC_PC, feature_flags} matches the current state, then RET to ARM_PC.
LDP(IndexType::Post, ARM64Reg::X2, ARM64Reg::X1, ARM64Reg::SP, 16);
@ -736,7 +746,11 @@ void JitArm64::WriteExceptionExit(ARM64Reg dest, bool only_external, bool always
if (!always_exception)
SetJumpTarget(no_exceptions);
EndTimeProfile(js.curBlock);
if (IsProfilingEnabled())
{
ABI_CallFunction(&JitBlock::ProfileData::EndProfiling, js.curBlock->profile_data.get(),
js.downcountAmount);
}
DoDownCount();
B(dispatcher);
@ -804,44 +818,6 @@ void JitArm64::DumpCode(const u8* start, const u8* end)
WARN_LOG_FMT(DYNA_REC, "Code dump from {} to {}:\n{}", fmt::ptr(start), fmt::ptr(end), output);
}
void JitArm64::BeginTimeProfile(JitBlock* b)
{
MOVP2R(ARM64Reg::X0, &b->profile_data);
LDR(IndexType::Unsigned, ARM64Reg::X1, ARM64Reg::X0, offsetof(JitBlock::ProfileData, runCount));
ADD(ARM64Reg::X1, ARM64Reg::X1, 1);
// Fetch the current counter register
CNTVCT(ARM64Reg::X2);
// stores runCount and ticStart
STP(IndexType::Signed, ARM64Reg::X1, ARM64Reg::X2, ARM64Reg::X0,
offsetof(JitBlock::ProfileData, runCount));
}
void JitArm64::EndTimeProfile(JitBlock* b)
{
if (!jo.profile_blocks)
return;
// Fetch the current counter register
CNTVCT(ARM64Reg::X1);
MOVP2R(ARM64Reg::X0, &b->profile_data);
LDR(IndexType::Unsigned, ARM64Reg::X2, ARM64Reg::X0, offsetof(JitBlock::ProfileData, ticStart));
SUB(ARM64Reg::X1, ARM64Reg::X1, ARM64Reg::X2);
// loads ticCounter and downcountCounter
LDP(IndexType::Signed, ARM64Reg::X2, ARM64Reg::X3, ARM64Reg::X0,
offsetof(JitBlock::ProfileData, ticCounter));
ADD(ARM64Reg::X2, ARM64Reg::X2, ARM64Reg::X1);
ADDI2R(ARM64Reg::X3, ARM64Reg::X3, js.downcountAmount, ARM64Reg::X1);
// stores ticCounter and downcountCounter
STP(IndexType::Signed, ARM64Reg::X2, ARM64Reg::X3, ARM64Reg::X0,
offsetof(JitBlock::ProfileData, ticCounter));
}
void JitArm64::Run()
{
ProtectStack();
@ -933,7 +909,7 @@ void JitArm64::Jit(u32 em_address, bool clear_cache_and_retry_on_failure)
SetBlockLinkingEnabled(true);
SetOptimizationEnabled(true);
if (!jo.profile_blocks)
if (!IsProfilingEnabled())
{
if (cpu.IsStepping())
{
@ -1052,11 +1028,8 @@ bool JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
b->normalEntry = GetWritableCodePtr();
// Conditionally add profiling code.
if (jo.profile_blocks)
{
// get start tic
BeginTimeProfile(b);
}
if (IsProfilingEnabled())
ABI_CallFunction(&JitBlock::ProfileData::BeginProfiling, b->profile_data.get());
if (code_block.m_gqr_used.Count() == 1 &&
js.pairedQuantizeAddresses.find(js.blockStart) == js.pairedQuantizeAddresses.end())
@ -1246,7 +1219,11 @@ bool JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
FixupBranch no_breakpoint = CBZ(ARM64Reg::W0);
Cleanup();
EndTimeProfile(js.curBlock);
if (IsProfilingEnabled())
{
ABI_CallFunction(&JitBlock::ProfileData::EndProfiling, b->profile_data.get(),
js.downcountAmount);
}
DoDownCount();
B(dispatcher_exit);

View File

@ -307,10 +307,6 @@ protected:
void GenerateQuantizedLoads();
void GenerateQuantizedStores();
// Profiling
void BeginTimeProfile(JitBlock* b);
void EndTimeProfile(JitBlock* b);
void EmitUpdateMembase();
void MSRUpdated(u32 msr);
void MSRUpdated(Arm64Gen::ARM64Reg msr);

View File

@ -57,7 +57,7 @@
// After resetting the stack to the top, we call _resetstkoflw() to restore
// the guard page at the 256kb mark.
const std::array<std::pair<bool JitBase::*, const Config::Info<bool>*>, 22> JitBase::JIT_SETTINGS{{
const std::array<std::pair<bool JitBase::*, const Config::Info<bool>*>, 23> JitBase::JIT_SETTINGS{{
{&JitBase::bJITOff, &Config::MAIN_DEBUG_JIT_OFF},
{&JitBase::bJITLoadStoreOff, &Config::MAIN_DEBUG_JIT_LOAD_STORE_OFF},
{&JitBase::bJITLoadStorelXzOff, &Config::MAIN_DEBUG_JIT_LOAD_STORE_LXZ_OFF},
@ -71,6 +71,7 @@ const std::array<std::pair<bool JitBase::*, const Config::Info<bool>*>, 22> JitB
{&JitBase::bJITSystemRegistersOff, &Config::MAIN_DEBUG_JIT_SYSTEM_REGISTERS_OFF},
{&JitBase::bJITBranchOff, &Config::MAIN_DEBUG_JIT_BRANCH_OFF},
{&JitBase::bJITRegisterCacheOff, &Config::MAIN_DEBUG_JIT_REGISTER_CACHE_OFF},
{&JitBase::m_enable_profiling, &Config::MAIN_DEBUG_JIT_ENABLE_PROFILING},
{&JitBase::m_enable_debugging, &Config::MAIN_ENABLE_DEBUGGING},
{&JitBase::m_enable_branch_following, &Config::MAIN_JIT_FOLLOW_BRANCH},
{&JitBase::m_enable_float_exceptions, &Config::MAIN_FLOAT_EXCEPTIONS},

View File

@ -86,7 +86,6 @@ protected:
bool memcheck;
bool fp_exceptions;
bool div_by_zero_exceptions;
bool profile_blocks;
};
struct JitState
{
@ -149,6 +148,7 @@ protected:
bool bJITSystemRegistersOff = false;
bool bJITBranchOff = false;
bool bJITRegisterCacheOff = false;
bool m_enable_profiling = false;
bool m_enable_debugging = false;
bool m_enable_branch_following = false;
bool m_enable_float_exceptions = false;
@ -163,7 +163,7 @@ protected:
bool m_cleanup_after_stackfault = false;
u8* m_stack_guard = nullptr;
static const std::array<std::pair<bool JitBase::*, const Config::Info<bool>*>, 22> JIT_SETTINGS;
static const std::array<std::pair<bool JitBase::*, const Config::Info<bool>*>, 23> JIT_SETTINGS;
bool DoesConfigNeedRefresh();
void RefreshConfig();
@ -187,6 +187,7 @@ public:
JitBase& operator=(JitBase&&) = delete;
~JitBase() override;
bool IsProfilingEnabled() const { return m_enable_profiling; }
bool IsDebuggingEnabled() const { return m_enable_debugging; }
static const u8* Dispatch(JitBase& jit);

View File

@ -32,6 +32,18 @@ bool JitBlock::OverlapsPhysicalRange(u32 address, u32 length) const
physical_addresses.lower_bound(address + length);
}
void JitBlock::ProfileData::BeginProfiling(ProfileData* data)
{
data->run_count += 1;
data->time_start = Clock::now();
}
void JitBlock::ProfileData::EndProfiling(ProfileData* data, int downcount_amount)
{
data->cycles_spent += downcount_amount;
data->time_spent += Clock::now() - data->time_start;
}
JitBaseBlockCache::JitBaseBlockCache(JitBase& jit) : m_jit{jit}
{
}
@ -98,7 +110,8 @@ JitBlock** JitBaseBlockCache::GetFastBlockMapFallback()
return m_fast_block_map_fallback.data();
}
void JitBaseBlockCache::RunOnBlocks(std::function<void(const JitBlock&)> f)
void JitBaseBlockCache::RunOnBlocks(const Core::CPUThreadGuard&,
std::function<void(const JitBlock&)> f) const
{
for (const auto& e : block_map)
f(e.second);
@ -107,7 +120,7 @@ void JitBaseBlockCache::RunOnBlocks(std::function<void(const JitBlock&)> f)
JitBlock* JitBaseBlockCache::AllocateBlock(u32 em_address)
{
const u32 physical_address = m_jit.m_mmu.JitCache_TranslateAddress(em_address).address;
JitBlock& b = block_map.emplace(physical_address, JitBlock())->second;
JitBlock& b = block_map.emplace(physical_address, m_jit.IsProfilingEnabled())->second;
b.effectiveAddress = em_address;
b.physicalAddress = physical_address;
b.feature_flags = m_jit.m_ppc_state.feature_flags;

View File

@ -5,6 +5,7 @@
#include <array>
#include <bitset>
#include <chrono>
#include <cstring>
#include <functional>
#include <map>
@ -64,6 +65,27 @@ static_assert(std::is_standard_layout_v<JitBlockData>, "JitBlockData must have a
// address.
struct JitBlock : public JitBlockData
{
// Software profiling data for JIT block.
struct ProfileData
{
using Clock = std::chrono::steady_clock;
static void BeginProfiling(ProfileData* data);
static void EndProfiling(ProfileData* data, int downcount_amount);
std::size_t run_count = 0;
u64 cycles_spent = 0;
Clock::duration time_spent = {};
private:
Clock::time_point time_start;
};
explicit JitBlock(bool profiling_enabled)
: profile_data(profiling_enabled ? std::make_unique<ProfileData>() : nullptr)
{
}
bool OverlapsPhysicalRange(u32 address, u32 length) const;
// Information about exits to a known address from this block.
@ -83,15 +105,7 @@ struct JitBlock : public JitBlockData
// This set stores all physical addresses of all occupied instructions.
std::set<u32> physical_addresses;
// Block profiling data, structure is inlined in Jit.cpp
struct ProfileData
{
u64 ticCounter;
u64 downcountCounter;
u64 runCount;
u64 ticStart;
u64 ticStop;
} profile_data = {};
std::unique_ptr<ProfileData> profile_data;
};
typedef void (*CompiledCode)();
@ -146,7 +160,7 @@ public:
// Code Cache
u8** GetEntryPoints();
JitBlock** GetFastBlockMapFallback();
void RunOnBlocks(std::function<void(const JitBlock&)> f);
void RunOnBlocks(const Core::CPUThreadGuard& guard, std::function<void(const JitBlock&)> f) const;
JitBlock* AllocateBlock(u32 em_address);
void FinalizeBlock(JitBlock& block, bool block_link, const std::set<u32>& physical_addresses);

View File

@ -4,22 +4,14 @@
#include "Core/PowerPC/JitInterface.h"
#include <algorithm>
#include <cstdio>
#include <string>
#include <unordered_set>
#ifdef _WIN32
#include <windows.h>
#else
#include "Common/PerformanceCounter.h"
#endif
#include <fmt/format.h>
#include "Common/Assert.h"
#include "Common/ChunkFile.h"
#include "Common/CommonTypes.h"
#include "Common/IOFile.h"
#include "Common/MsgHandler.h"
#include "Core/Core.h"
@ -29,7 +21,6 @@
#include "Core/PowerPC/MMU.h"
#include "Core/PowerPC/PPCSymbolDB.h"
#include "Core/PowerPC/PowerPC.h"
#include "Core/PowerPC/Profiler.h"
#include "Core/System.h"
#ifdef _M_X86_64
@ -90,14 +81,6 @@ CPUCoreBase* JitInterface::GetCore() const
return m_jit.get();
}
void JitInterface::SetProfilingState(ProfilingState state)
{
if (!m_jit)
return;
m_jit->jo.profile_blocks = state == ProfilingState::Enabled;
}
void JitInterface::UpdateMembase()
{
if (!m_jit)
@ -123,58 +106,80 @@ void JitInterface::UpdateMembase()
}
}
void JitInterface::WriteProfileResults(const std::string& filename) const
static std::string_view GetDescription(const CPUEmuFeatureFlags flags)
{
Profiler::ProfileStats prof_stats;
GetProfileResults(&prof_stats);
File::IOFile f(filename, "w");
if (!f)
{
PanicAlertFmt("Failed to open {}", filename);
return;
}
f.WriteString("origAddr\tblkName\trunCount\tcost\ttimeCost\tpercent\ttimePercent\tOvAllinBlkTime("
"ms)\tblkCodeSize\n");
for (auto& stat : prof_stats.block_stats)
{
std::string name = m_system.GetPPCSymbolDB().GetDescription(stat.addr);
double percent = 100.0 * (double)stat.cost / (double)prof_stats.cost_sum;
double timePercent = 100.0 * (double)stat.tick_counter / (double)prof_stats.timecost_sum;
f.WriteString(fmt::format("{0:08x}\t{1}\t{2}\t{3}\t{4}\t{5:.2f}\t{6:.2f}\t{7:.2f}\t{8}\n",
stat.addr, name, stat.run_count, stat.cost, stat.tick_counter,
percent, timePercent,
static_cast<double>(stat.tick_counter) * 1000.0 /
static_cast<double>(prof_stats.countsPerSec),
stat.block_size));
}
static constexpr std::array<std::string_view, (FEATURE_FLAG_END_OF_ENUMERATION - 1) << 1>
descriptions = {
"", "DR", "IR", "DR|IR", "PERFMON", "DR|PERFMON", "IR|PERFMON", "DR|IR|PERFMON",
};
return descriptions[flags];
}
void JitInterface::GetProfileResults(Profiler::ProfileStats* prof_stats) const
void JitInterface::JitBlockLogDump(const Core::CPUThreadGuard& guard, std::FILE* file) const
{
// Can't really do this with no m_jit core available
std::fputs(
"ppcFeatureFlags\tppcAddress\tppcSize\thostNearSize\thostFarSize\trunCount\tcyclesSpent"
"\tcyclesAverage\tcyclesPercent\ttimeSpent(ns)\ttimeAverage(ns)\ttimePercent\tsymbol\n",
file);
if (!m_jit)
return;
prof_stats->cost_sum = 0;
prof_stats->timecost_sum = 0;
prof_stats->block_stats.clear();
const Core::CPUThreadGuard guard(m_system);
QueryPerformanceFrequency((LARGE_INTEGER*)&prof_stats->countsPerSec);
m_jit->GetBlockCache()->RunOnBlocks([&prof_stats](const JitBlock& block) {
const auto& data = block.profile_data;
u64 cost = data.downcountCounter;
u64 timecost = data.ticCounter;
// Todo: tweak.
if (data.runCount >= 1)
prof_stats->block_stats.emplace_back(block.effectiveAddress, cost, timecost, data.runCount,
block.codeSize);
prof_stats->cost_sum += cost;
prof_stats->timecost_sum += timecost;
if (m_jit->IsProfilingEnabled())
{
u64 overall_cycles_spent = 0;
JitBlock::ProfileData::Clock::duration overall_time_spent = {};
m_jit->GetBlockCache()->RunOnBlocks(guard, [&](const JitBlock& block) {
overall_cycles_spent += block.profile_data->cycles_spent;
overall_time_spent += block.profile_data->time_spent;
});
m_jit->GetBlockCache()->RunOnBlocks(guard, [&](const JitBlock& block) {
const Common::Symbol* const symbol =
m_jit->m_ppc_symbol_db.GetSymbolFromAddr(block.effectiveAddress);
const JitBlock::ProfileData* const data = block.profile_data.get();
sort(prof_stats->block_stats.begin(), prof_stats->block_stats.end());
const double cycles_percent =
overall_cycles_spent == 0 ? double{} : 100.0 * data->cycles_spent / overall_cycles_spent;
const double time_percent = overall_time_spent == JitBlock::ProfileData::Clock::duration{} ?
double{} :
100.0 * data->time_spent.count() / overall_time_spent.count();
const double cycles_average = data->run_count == 0 ?
double{} :
static_cast<double>(data->cycles_spent) / data->run_count;
const double time_average =
data->run_count == 0 ?
double{} :
std::chrono::duration_cast<std::chrono::duration<double, std::nano>>(data->time_spent)
.count() /
data->run_count;
const std::size_t host_near_code_size = block.near_end - block.near_begin;
const std::size_t host_far_code_size = block.far_end - block.far_begin;
fmt::println(
file, "{}\t{:08x}\t{}\t{}\t{}\t{}\t{}\t{:.6f}\t{:.6f}\t{}\t{:.6f}\t{:.6f}\t\"{}\"",
GetDescription(block.feature_flags), block.effectiveAddress,
block.originalSize * sizeof(UGeckoInstruction), host_near_code_size, host_far_code_size,
data->run_count, data->cycles_spent, cycles_average, cycles_percent,
std::chrono::duration_cast<std::chrono::nanoseconds>(data->time_spent).count(),
time_average, time_percent, symbol ? std::string_view{symbol->name} : "");
});
}
else
{
m_jit->GetBlockCache()->RunOnBlocks(guard, [&](const JitBlock& block) {
const Common::Symbol* const symbol =
m_jit->m_ppc_symbol_db.GetSymbolFromAddr(block.effectiveAddress);
const std::size_t host_near_code_size = block.near_end - block.near_begin;
const std::size_t host_far_code_size = block.far_end - block.far_begin;
fmt::println(file, "{}\t{:08x}\t{}\t{}\t{}\t-\t-\t-\t-\t-\t-\t-\t\"{}\"",
GetDescription(block.feature_flags), block.effectiveAddress,
block.originalSize * sizeof(UGeckoInstruction), host_near_code_size,
host_far_code_size, symbol ? std::string_view{symbol->name} : "");
});
}
}
std::variant<JitInterface::GetHostCodeError, JitInterface::GetHostCodeResult>

View File

@ -3,6 +3,9 @@
#pragma once
#include <cstddef>
#include <cstdio>
#include <functional>
#include <memory>
#include <string>
#include <variant>
@ -24,11 +27,6 @@ namespace PowerPC
enum class CPUCore;
}
namespace Profiler
{
struct ProfileStats;
}
class JitInterface
{
public:
@ -45,11 +43,6 @@ public:
CPUCoreBase* GetCore() const;
// Debugging
enum class ProfilingState
{
Enabled,
Disabled
};
enum class GetHostCodeError
{
NoJitActive,
@ -63,9 +56,7 @@ public:
};
void UpdateMembase();
void SetProfilingState(ProfilingState state);
void WriteProfileResults(const std::string& filename) const;
void GetProfileResults(Profiler::ProfileStats* prof_stats) const;
void JitBlockLogDump(const Core::CPUThreadGuard& guard, std::FILE* file) const;
std::variant<GetHostCodeError, GetHostCodeResult> GetHostCode(u32 address) const;
// Memory Utilities

View File

@ -1,36 +0,0 @@
// Copyright 2008 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <cstddef>
#include <string>
#include <vector>
#include "Common/CommonTypes.h"
namespace Profiler
{
struct BlockStat
{
BlockStat(u32 _addr, u64 c, u64 ticks, u64 run, u32 size)
: addr(_addr), cost(c), tick_counter(ticks), run_count(run), block_size(size)
{
}
u32 addr;
u64 cost;
u64 tick_counter;
u64 run_count;
u32 block_size;
bool operator<(const BlockStat& other) const { return cost > other.cost; }
};
struct ProfileStats
{
std::vector<BlockStat> block_stats;
u64 cost_sum = 0;
u64 timecost_sum = 0;
u64 countsPerSec = 0;
};
} // namespace Profiler

View File

@ -141,7 +141,6 @@
<ClInclude Include="Common\NandPaths.h" />
<ClInclude Include="Common\Network.h" />
<ClInclude Include="Common\PcapFile.h" />
<ClInclude Include="Common\PerformanceCounter.h" />
<ClInclude Include="Common\Profiler.h" />
<ClInclude Include="Common\QoSSession.h" />
<ClInclude Include="Common\Random.h" />
@ -447,7 +446,6 @@
<ClInclude Include="Core\PowerPC\PPCCache.h" />
<ClInclude Include="Core\PowerPC\PPCSymbolDB.h" />
<ClInclude Include="Core\PowerPC\PPCTables.h" />
<ClInclude Include="Core\PowerPC\Profiler.h" />
<ClInclude Include="Core\PowerPC\SignatureDB\CSVSignatureDB.h" />
<ClInclude Include="Core\PowerPC\SignatureDB\DSYSignatureDB.h" />
<ClInclude Include="Core\PowerPC\SignatureDB\MEGASignatureDB.h" />
@ -824,7 +822,6 @@
<ClCompile Include="Common\NandPaths.cpp" />
<ClCompile Include="Common\Network.cpp" />
<ClCompile Include="Common\PcapFile.cpp" />
<ClCompile Include="Common\PerformanceCounter.cpp" />
<ClCompile Include="Common\Profiler.cpp" />
<ClCompile Include="Common\QoSSession.cpp" />
<ClCompile Include="Common\Random.cpp" />

View File

@ -15,9 +15,12 @@
#include <QMap>
#include <QUrl>
#include <fmt/format.h>
#include "Common/Align.h"
#include "Common/CommonPaths.h"
#include "Common/FileUtil.h"
#include "Common/IOFile.h"
#include "Common/StringUtil.h"
#include "Core/AchievementManager.h"
@ -150,6 +153,7 @@ void MenuBar::OnEmulationStateChanged(Core::State state)
!Core::System::GetInstance().GetMovie().IsPlayingInput());
// JIT
const bool jit_exists = Core::System::GetInstance().GetJitInterface().GetCore() != nullptr;
m_jit_interpreter_core->setEnabled(running);
m_jit_block_linking->setEnabled(!running);
m_jit_disable_cache->setEnabled(!running);
@ -158,6 +162,7 @@ void MenuBar::OnEmulationStateChanged(Core::State state)
m_jit_clear_cache->setEnabled(running);
m_jit_log_coverage->setEnabled(!running);
m_jit_search_instruction->setEnabled(running);
m_jit_write_cache_log_dump->setEnabled(running && jit_exists);
// Symbols
m_symbols->setEnabled(running);
@ -198,6 +203,30 @@ void MenuBar::OnDebugModeToggled(bool enabled)
}
}
void MenuBar::OnWriteJitBlockLogDump()
{
const std::string filename = fmt::format("{}{}.txt", File::GetUserPath(D_DUMPDEBUG_JITBLOCKS_IDX),
SConfig::GetInstance().GetGameID());
File::IOFile f(filename, "w");
if (!f)
{
ModalMessageBox::warning(
this, tr("Error"),
tr("Failed to open \"%1\" for writing.").arg(QString::fromStdString(filename)));
return;
}
auto& system = Core::System::GetInstance();
system.GetJitInterface().JitBlockLogDump(Core::CPUThreadGuard{system}, f.GetHandle());
if (static bool ignore = false; ignore == false)
{
const int button_pressed = ModalMessageBox::information(
this, tr("Success"), tr("Wrote to \"%1\".").arg(QString::fromStdString(filename)),
QMessageBox::Ok | QMessageBox::Ignore);
if (button_pressed == QMessageBox::Ignore)
ignore = true;
}
}
void MenuBar::AddFileMenu()
{
QMenu* file_menu = addMenu(tr("&File"));
@ -892,6 +921,17 @@ void MenuBar::AddJITMenu()
m_jit->addSeparator();
m_jit_profile_blocks = m_jit->addAction(tr("Enable JIT Block Profiling"));
m_jit_profile_blocks->setCheckable(true);
m_jit_profile_blocks->setChecked(Config::Get(Config::MAIN_DEBUG_JIT_ENABLE_PROFILING));
connect(m_jit_profile_blocks, &QAction::toggled, [](bool enabled) {
Config::SetBaseOrCurrent(Config::MAIN_DEBUG_JIT_ENABLE_PROFILING, enabled);
});
m_jit_write_cache_log_dump =
m_jit->addAction(tr("Write JIT Block Log Dump"), this, &MenuBar::OnWriteJitBlockLogDump);
m_jit->addSeparator();
m_jit_off = m_jit->addAction(tr("JIT Off (JIT Core)"));
m_jit_off->setCheckable(true);
m_jit_off->setChecked(Config::Get(Config::MAIN_DEBUG_JIT_OFF));

View File

@ -185,6 +185,7 @@ private:
void OnRecordingStatusChanged(bool recording);
void OnReadOnlyModeChanged(bool read_only);
void OnDebugModeToggled(bool enabled);
void OnWriteJitBlockLogDump();
QString GetSignatureSelector() const;
@ -268,6 +269,8 @@ private:
QAction* m_jit_clear_cache;
QAction* m_jit_log_coverage;
QAction* m_jit_search_instruction;
QAction* m_jit_profile_blocks;
QAction* m_jit_write_cache_log_dump;
QAction* m_jit_off;
QAction* m_jit_loadstore_off;
QAction* m_jit_loadstore_lbzx_off;

View File

@ -76,6 +76,7 @@ static void CreateDumpPath(std::string path)
File::CreateFullPath(File::GetUserPath(D_DUMPTEXTURES_IDX));
File::CreateFullPath(File::GetUserPath(D_DUMPDEBUG_IDX));
File::CreateFullPath(File::GetUserPath(D_DUMPDEBUG_BRANCHWATCH_IDX));
File::CreateFullPath(File::GetUserPath(D_DUMPDEBUG_JITBLOCKS_IDX));
}
static void CreateLoadPath(std::string path)
@ -257,6 +258,7 @@ void CreateDirectories()
File::CreateFullPath(File::GetUserPath(D_DUMPTEXTURES_IDX));
File::CreateFullPath(File::GetUserPath(D_DUMPDEBUG_IDX));
File::CreateFullPath(File::GetUserPath(D_DUMPDEBUG_BRANCHWATCH_IDX));
File::CreateFullPath(File::GetUserPath(D_DUMPDEBUG_JITBLOCKS_IDX));
File::CreateFullPath(File::GetUserPath(D_GAMESETTINGS_IDX));
File::CreateFullPath(File::GetUserPath(D_GCUSER_IDX));
File::CreateFullPath(File::GetUserPath(D_GCUSER_IDX) + USA_DIR DIR_SEP);