mirror of https://github.com/PCSX2/pcsx2.git
3rdparty: Upgrade xbyak to 6.00
This commit is contained in:
parent
1917f2b98a
commit
791f2a63ac
|
@ -0,0 +1,47 @@
|
|||
|
||||
Copyright (c) 2007 MITSUNARI Shigeo
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
Neither the name of the copyright owner nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software without
|
||||
specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||
THE POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
ソースコード形式かバイナリ形式か、変更するかしないかを問わず、以下の条件を満た
|
||||
す場合に限り、再頒布および使用が許可されます。
|
||||
|
||||
ソースコードを再頒布する場合、上記の著作権表示、本条件一覧、および下記免責条項
|
||||
を含めること。
|
||||
バイナリ形式で再頒布する場合、頒布物に付属のドキュメント等の資料に、上記の著作
|
||||
権表示、本条件一覧、および下記免責条項を含めること。
|
||||
書面による特別の許可なしに、本ソフトウェアから派生した製品の宣伝または販売促進
|
||||
に、著作権者の名前またはコントリビューターの名前を使用してはならない。
|
||||
本ソフトウェアは、著作権者およびコントリビューターによって「現状のまま」提供さ
|
||||
れており、明示黙示を問わず、商業的な使用可能性、および特定の目的に対する適合性
|
||||
に関する暗黙の保証も含め、またそれに限定されない、いかなる保証もありません。
|
||||
著作権者もコントリビューターも、事由のいかんを問わず、 損害発生の原因いかんを
|
||||
問わず、かつ責任の根拠が契約であるか厳格責任であるか(過失その他の)不法行為で
|
||||
あるかを問わず、仮にそのような損害が発生する可能性を知らされていたとしても、
|
||||
本ソフトウェアの使用によって発生した(代替品または代用サービスの調達、使用の
|
||||
喪失、データの喪失、利益の喪失、業務の中断も含め、またそれに限定されない)直接
|
||||
損害、間接損害、偶発的な損害、特別損害、懲罰的損害、または結果損害について、
|
||||
一切責任を負わないものとします。
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1,36 +1,17 @@
|
|||
/* Copyright (c) 2007 MITSUNARI Shigeo
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright owner nor the names of its contributors may
|
||||
* be used to endorse or promote products derived from this software without
|
||||
* specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||
* THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef XBYAK_XBYAK_UTIL_H_
|
||||
#define XBYAK_XBYAK_UTIL_H_
|
||||
|
||||
// We want to keep this file similar to the original xbyak
|
||||
// clang-format off
|
||||
#ifdef XBYAK_ONLY_CLASS_CPU
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <algorithm>
|
||||
#include <assert.h>
|
||||
#ifndef XBYAK_THROW
|
||||
#define XBYAK_THROW(x) ;
|
||||
#define XBYAK_THROW_RET(x, y) return y;
|
||||
#endif
|
||||
#else
|
||||
#include <string.h>
|
||||
|
||||
/**
|
||||
utility class and functions for Xbyak
|
||||
|
@ -39,7 +20,13 @@
|
|||
@note this header is UNDER CONSTRUCTION!
|
||||
*/
|
||||
#include "xbyak.h"
|
||||
#endif // XBYAK_ONLY_CLASS_CPU
|
||||
|
||||
#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)
|
||||
#define XBYAK_INTEL_CPU_SPECIFIC
|
||||
#endif
|
||||
|
||||
#ifdef XBYAK_INTEL_CPU_SPECIFIC
|
||||
#ifdef _MSC_VER
|
||||
#if (_MSC_VER < 1400) && defined(XBYAK32)
|
||||
static inline __declspec(naked) void __cpuid(int[4], int)
|
||||
|
@ -78,35 +65,44 @@
|
|||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef XBYAK_USE_VTUNE
|
||||
// -I /opt/intel/vtune_amplifier/include/ -L /opt/intel/vtune_amplifier/lib64 -ljitprofiling -ldl
|
||||
#include <jitprofiling.h>
|
||||
#ifdef _MSC_VER
|
||||
extern "C" unsigned __int64 __xgetbv(int);
|
||||
#pragma comment(lib, "libittnotify.lib")
|
||||
#endif
|
||||
#ifdef __linux__
|
||||
#include <dlfcn.h>
|
||||
#endif
|
||||
#endif
|
||||
#ifdef __linux__
|
||||
#define XBYAK_USE_PERF
|
||||
#endif
|
||||
|
||||
namespace Xbyak { namespace util {
|
||||
|
||||
/* GCC uses AVX/SSE4 operation to handle the uint64 type.
|
||||
*
|
||||
* It is quite annoying because the purpose of the code is to test the support
|
||||
* of AVX/SSEn
|
||||
*
|
||||
* So far, we don't need other ISA on i386 so I hacked the code to limit the
|
||||
* type to 32 bits. If we want to support AVX512 we might need to shuffle the
|
||||
* code a bit.
|
||||
*
|
||||
* Extra note: it would be waste to use AVX512 on 32 bits, registers are
|
||||
* limited to 8 instead of 32.
|
||||
*/
|
||||
typedef enum {
|
||||
SmtLevel = 1,
|
||||
CoreLevel = 2
|
||||
} IntelCpuTopologyLevel;
|
||||
|
||||
/**
|
||||
CPU detection class
|
||||
*/
|
||||
class Cpu {
|
||||
#ifdef XBYAK64
|
||||
uint64 type_;
|
||||
#else
|
||||
uint32 type_;
|
||||
#endif
|
||||
uint64_t type_;
|
||||
//system topology
|
||||
bool x2APIC_supported_;
|
||||
static const size_t maxTopologyLevels = 2;
|
||||
unsigned int numCores_[maxTopologyLevels];
|
||||
|
||||
static const unsigned int maxNumberCacheLevels = 10;
|
||||
unsigned int dataCacheSize_[maxNumberCacheLevels];
|
||||
unsigned int coresSharignDataCache_[maxNumberCacheLevels];
|
||||
unsigned int dataCacheLevels_;
|
||||
|
||||
unsigned int get32bitAsBE(const char *x) const
|
||||
{
|
||||
return x[0] | (x[1] << 8) | (x[2] << 16) | (x[3] << 24);
|
||||
|
@ -117,7 +113,7 @@ class Cpu {
|
|||
}
|
||||
void setFamily()
|
||||
{
|
||||
unsigned int data[4];
|
||||
unsigned int data[4] = {};
|
||||
getCpuid(1, data);
|
||||
stepping = data[0] & mask(4);
|
||||
model = (data[0] >> 4) & mask(4);
|
||||
|
@ -136,6 +132,96 @@ class Cpu {
|
|||
displayModel = model;
|
||||
}
|
||||
}
|
||||
unsigned int extractBit(unsigned int val, unsigned int base, unsigned int end)
|
||||
{
|
||||
return (val >> base) & ((1u << (end - base)) - 1);
|
||||
}
|
||||
void setNumCores()
|
||||
{
|
||||
if ((type_ & tINTEL) == 0) return;
|
||||
|
||||
unsigned int data[4] = {};
|
||||
|
||||
/* CAUTION: These numbers are configuration as shipped by Intel. */
|
||||
getCpuidEx(0x0, 0, data);
|
||||
if (data[0] >= 0xB) {
|
||||
/*
|
||||
if leaf 11 exists(x2APIC is supported),
|
||||
we use it to get the number of smt cores and cores on socket
|
||||
|
||||
leaf 0xB can be zeroed-out by a hypervisor
|
||||
*/
|
||||
x2APIC_supported_ = true;
|
||||
for (unsigned int i = 0; i < maxTopologyLevels; i++) {
|
||||
getCpuidEx(0xB, i, data);
|
||||
IntelCpuTopologyLevel level = (IntelCpuTopologyLevel)extractBit(data[2], 8, 15);
|
||||
if (level == SmtLevel || level == CoreLevel) {
|
||||
numCores_[level - 1] = extractBit(data[1], 0, 15);
|
||||
}
|
||||
}
|
||||
/*
|
||||
Fallback values in case a hypervisor has 0xB leaf zeroed-out.
|
||||
*/
|
||||
numCores_[SmtLevel - 1] = (std::max)(1u, numCores_[SmtLevel - 1]);
|
||||
numCores_[CoreLevel - 1] = (std::max)(numCores_[SmtLevel - 1], numCores_[CoreLevel - 1]);
|
||||
} else {
|
||||
/*
|
||||
Failed to deremine num of cores without x2APIC support.
|
||||
TODO: USE initial APIC ID to determine ncores.
|
||||
*/
|
||||
numCores_[SmtLevel - 1] = 0;
|
||||
numCores_[CoreLevel - 1] = 0;
|
||||
}
|
||||
|
||||
}
|
||||
void setCacheHierarchy()
|
||||
{
|
||||
if ((type_ & tINTEL) == 0) return;
|
||||
const unsigned int NO_CACHE = 0;
|
||||
const unsigned int DATA_CACHE = 1;
|
||||
// const unsigned int INSTRUCTION_CACHE = 2;
|
||||
const unsigned int UNIFIED_CACHE = 3;
|
||||
unsigned int smt_width = 0;
|
||||
unsigned int logical_cores = 0;
|
||||
unsigned int data[4] = {};
|
||||
|
||||
if (x2APIC_supported_) {
|
||||
smt_width = numCores_[0];
|
||||
logical_cores = numCores_[1];
|
||||
}
|
||||
|
||||
/*
|
||||
Assumptions:
|
||||
the first level of data cache is not shared (which is the
|
||||
case for every existing architecture) and use this to
|
||||
determine the SMT width for arch not supporting leaf 11.
|
||||
when leaf 4 reports a number of core less than numCores_
|
||||
on socket reported by leaf 11, then it is a correct number
|
||||
of cores not an upperbound.
|
||||
*/
|
||||
for (int i = 0; dataCacheLevels_ < maxNumberCacheLevels; i++) {
|
||||
getCpuidEx(0x4, i, data);
|
||||
unsigned int cacheType = extractBit(data[0], 0, 4);
|
||||
if (cacheType == NO_CACHE) break;
|
||||
if (cacheType == DATA_CACHE || cacheType == UNIFIED_CACHE) {
|
||||
unsigned int actual_logical_cores = extractBit(data[0], 14, 25) + 1;
|
||||
if (logical_cores != 0) { // true only if leaf 0xB is supported and valid
|
||||
actual_logical_cores = (std::min)(actual_logical_cores, logical_cores);
|
||||
}
|
||||
assert(actual_logical_cores != 0);
|
||||
dataCacheSize_[dataCacheLevels_] =
|
||||
(extractBit(data[1], 22, 31) + 1)
|
||||
* (extractBit(data[1], 12, 21) + 1)
|
||||
* (extractBit(data[1], 0, 11) + 1)
|
||||
* (data[2] + 1);
|
||||
if (cacheType == DATA_CACHE && smt_width == 0) smt_width = actual_logical_cores;
|
||||
assert(smt_width != 0);
|
||||
coresSharignDataCache_[dataCacheLevels_] = (std::max)(actual_logical_cores / smt_width, 1u);
|
||||
dataCacheLevels_++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
int model;
|
||||
int family;
|
||||
|
@ -144,42 +230,76 @@ public:
|
|||
int extFamily;
|
||||
int displayFamily; // family + extFamily
|
||||
int displayModel; // model + extModel
|
||||
|
||||
unsigned int getNumCores(IntelCpuTopologyLevel level) const {
|
||||
if (!x2APIC_supported_) XBYAK_THROW_RET(ERR_X2APIC_IS_NOT_SUPPORTED, 0)
|
||||
switch (level) {
|
||||
case SmtLevel: return numCores_[level - 1];
|
||||
case CoreLevel: return numCores_[level - 1] / numCores_[SmtLevel - 1];
|
||||
default: XBYAK_THROW_RET(ERR_X2APIC_IS_NOT_SUPPORTED, 0)
|
||||
}
|
||||
}
|
||||
|
||||
unsigned int getDataCacheLevels() const { return dataCacheLevels_; }
|
||||
unsigned int getCoresSharingDataCache(unsigned int i) const
|
||||
{
|
||||
if (i >= dataCacheLevels_) XBYAK_THROW_RET(ERR_BAD_PARAMETER, 0)
|
||||
return coresSharignDataCache_[i];
|
||||
}
|
||||
unsigned int getDataCacheSize(unsigned int i) const
|
||||
{
|
||||
if (i >= dataCacheLevels_) XBYAK_THROW_RET(ERR_BAD_PARAMETER, 0)
|
||||
return dataCacheSize_[i];
|
||||
}
|
||||
|
||||
/*
|
||||
data[] = { eax, ebx, ecx, edx }
|
||||
*/
|
||||
static inline void getCpuid(unsigned int eaxIn, unsigned int data[4])
|
||||
{
|
||||
#ifdef XBYAK_INTEL_CPU_SPECIFIC
|
||||
#ifdef _MSC_VER
|
||||
__cpuid(reinterpret_cast<int*>(data), eaxIn);
|
||||
#else
|
||||
__cpuid(eaxIn, data[0], data[1], data[2], data[3]);
|
||||
#endif
|
||||
#else
|
||||
(void)eaxIn;
|
||||
(void)data;
|
||||
#endif
|
||||
}
|
||||
static inline void getCpuidEx(unsigned int eaxIn, unsigned int ecxIn, unsigned int data[4])
|
||||
{
|
||||
#ifdef XBYAK_INTEL_CPU_SPECIFIC
|
||||
#ifdef _MSC_VER
|
||||
__cpuidex(reinterpret_cast<int*>(data), eaxIn, ecxIn);
|
||||
#else
|
||||
__cpuid_count(eaxIn, ecxIn, data[0], data[1], data[2], data[3]);
|
||||
#endif
|
||||
#else
|
||||
(void)eaxIn;
|
||||
(void)ecxIn;
|
||||
(void)data;
|
||||
#endif
|
||||
}
|
||||
static inline uint64 getXfeature()
|
||||
static inline uint64_t getXfeature()
|
||||
{
|
||||
#ifdef XBYAK_INTEL_CPU_SPECIFIC
|
||||
#ifdef _MSC_VER
|
||||
return __xgetbv(0);
|
||||
return _xgetbv(0);
|
||||
#else
|
||||
unsigned int eax, edx;
|
||||
// xgetvb is not support on gcc 4.2
|
||||
// __asm__ volatile("xgetbv" : "=a"(eax), "=d"(edx) : "c"(0));
|
||||
__asm__ volatile(".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c"(0));
|
||||
return ((uint64)edx << 32) | eax;
|
||||
return ((uint64_t)edx << 32) | eax;
|
||||
#endif
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
#ifdef XBYAK64
|
||||
typedef uint64 Type;
|
||||
#else
|
||||
typedef uint32 Type;
|
||||
#endif
|
||||
typedef uint64_t Type;
|
||||
|
||||
static const Type NONE = 0;
|
||||
static const Type tMMX = 1 << 0;
|
||||
static const Type tMMX2 = 1 << 1;
|
||||
|
@ -192,7 +312,6 @@ public:
|
|||
static const Type tSSE42 = 1 << 8;
|
||||
static const Type tPOPCNT = 1 << 9;
|
||||
static const Type tAESNI = 1 << 10;
|
||||
static const Type tSSE5 = 1 << 11;
|
||||
static const Type tOSXSAVE = 1 << 12;
|
||||
static const Type tPCLMULQDQ = 1 << 13;
|
||||
static const Type tAVX = 1 << 14;
|
||||
|
@ -200,7 +319,6 @@ public:
|
|||
|
||||
static const Type t3DN = 1 << 16;
|
||||
static const Type tE3DN = 1 << 17;
|
||||
static const Type tSSE4a = 1 << 18;
|
||||
static const Type tRDTSCP = 1 << 19;
|
||||
static const Type tAVX2 = 1 << 20;
|
||||
static const Type tBMI1 = 1 << 21; // andn, bextr, blsi, blsmsk, blsr, tzcnt
|
||||
|
@ -215,111 +333,184 @@ public:
|
|||
static const Type tADX = 1 << 28; // adcx, adox
|
||||
static const Type tRDSEED = 1 << 29; // rdseed
|
||||
static const Type tSMAP = 1 << 30; // stac
|
||||
#ifdef XBYAK64
|
||||
static const Type tHLE = uint64(1) << 31; // xacquire, xrelease, xtest
|
||||
static const Type tRTM = uint64(1) << 32; // xbegin, xend, xabort
|
||||
static const Type tF16C = uint64(1) << 33; // vcvtph2ps, vcvtps2ph
|
||||
static const Type tMOVBE = uint64(1) << 34; // mobve
|
||||
static const Type tAVX512F = uint64(1) << 35;
|
||||
static const Type tAVX512DQ = uint64(1) << 36;
|
||||
static const Type tAVX512IFMA = uint64(1) << 37;
|
||||
static const Type tAVX512PF = uint64(1) << 38;
|
||||
static const Type tAVX512ER = uint64(1) << 39;
|
||||
static const Type tAVX512CD = uint64(1) << 40;
|
||||
static const Type tAVX512BW = uint64(1) << 41;
|
||||
static const Type tAVX512VL = uint64(1) << 42;
|
||||
static const Type tAVX512VBMI = uint64(1) << 43;
|
||||
#endif
|
||||
static const Type tHLE = uint64_t(1) << 31; // xacquire, xrelease, xtest
|
||||
static const Type tRTM = uint64_t(1) << 32; // xbegin, xend, xabort
|
||||
static const Type tF16C = uint64_t(1) << 33; // vcvtph2ps, vcvtps2ph
|
||||
static const Type tMOVBE = uint64_t(1) << 34; // mobve
|
||||
static const Type tAVX512F = uint64_t(1) << 35;
|
||||
static const Type tAVX512DQ = uint64_t(1) << 36;
|
||||
static const Type tAVX512_IFMA = uint64_t(1) << 37;
|
||||
static const Type tAVX512IFMA = tAVX512_IFMA;
|
||||
static const Type tAVX512PF = uint64_t(1) << 38;
|
||||
static const Type tAVX512ER = uint64_t(1) << 39;
|
||||
static const Type tAVX512CD = uint64_t(1) << 40;
|
||||
static const Type tAVX512BW = uint64_t(1) << 41;
|
||||
static const Type tAVX512VL = uint64_t(1) << 42;
|
||||
static const Type tAVX512_VBMI = uint64_t(1) << 43;
|
||||
static const Type tAVX512VBMI = tAVX512_VBMI; // changed by Intel's manual
|
||||
static const Type tAVX512_4VNNIW = uint64_t(1) << 44;
|
||||
static const Type tAVX512_4FMAPS = uint64_t(1) << 45;
|
||||
static const Type tPREFETCHWT1 = uint64_t(1) << 46;
|
||||
static const Type tPREFETCHW = uint64_t(1) << 47;
|
||||
static const Type tSHA = uint64_t(1) << 48;
|
||||
static const Type tMPX = uint64_t(1) << 49;
|
||||
static const Type tAVX512_VBMI2 = uint64_t(1) << 50;
|
||||
static const Type tGFNI = uint64_t(1) << 51;
|
||||
static const Type tVAES = uint64_t(1) << 52;
|
||||
static const Type tVPCLMULQDQ = uint64_t(1) << 53;
|
||||
static const Type tAVX512_VNNI = uint64_t(1) << 54;
|
||||
static const Type tAVX512_BITALG = uint64_t(1) << 55;
|
||||
static const Type tAVX512_VPOPCNTDQ = uint64_t(1) << 56;
|
||||
static const Type tAVX512_BF16 = uint64_t(1) << 57;
|
||||
static const Type tAVX512_VP2INTERSECT = uint64_t(1) << 58;
|
||||
static const Type tAMX_TILE = uint64_t(1) << 59;
|
||||
static const Type tAMX_INT8 = uint64_t(1) << 60;
|
||||
static const Type tAMX_BF16 = uint64_t(1) << 61;
|
||||
static const Type tAVX_VNNI = uint64_t(1) << 62;
|
||||
static const Type tAVX512_FP16 = uint64_t(1) << 11;
|
||||
// 18, 63
|
||||
|
||||
Cpu()
|
||||
: type_(NONE)
|
||||
, x2APIC_supported_(false)
|
||||
, numCores_()
|
||||
, dataCacheSize_()
|
||||
, coresSharignDataCache_()
|
||||
, dataCacheLevels_(0)
|
||||
{
|
||||
unsigned int data[4];
|
||||
unsigned int data[4] = {};
|
||||
const unsigned int& EAX = data[0];
|
||||
const unsigned int& EBX = data[1];
|
||||
const unsigned int& ECX = data[2];
|
||||
const unsigned int& EDX = data[3];
|
||||
getCpuid(0, data);
|
||||
const unsigned int maxNum = data[0];
|
||||
const unsigned int maxNum = EAX;
|
||||
static const char intel[] = "ntel";
|
||||
static const char amd[] = "cAMD";
|
||||
if (data[2] == get32bitAsBE(amd)) {
|
||||
if (ECX == get32bitAsBE(amd)) {
|
||||
type_ |= tAMD;
|
||||
getCpuid(0x80000001, data);
|
||||
if (data[3] & (1U << 31)) type_ |= t3DN;
|
||||
if (data[3] & (1U << 15)) type_ |= tCMOV;
|
||||
if (data[3] & (1U << 30)) type_ |= tE3DN;
|
||||
if (data[3] & (1U << 22)) type_ |= tMMX2;
|
||||
if (data[3] & (1U << 27)) type_ |= tRDTSCP;
|
||||
if (EDX & (1U << 31)) {
|
||||
type_ |= t3DN;
|
||||
// 3DNow! implies support for PREFETCHW on AMD
|
||||
type_ |= tPREFETCHW;
|
||||
}
|
||||
if (data[2] == get32bitAsBE(intel)) {
|
||||
type_ |= tINTEL;
|
||||
getCpuid(0x80000001, data);
|
||||
if (data[3] & (1U << 27)) type_ |= tRDTSCP;
|
||||
if (data[2] & (1U << 5)) type_ |= tLZCNT;
|
||||
}
|
||||
getCpuid(1, data);
|
||||
if (data[2] & (1U << 0)) type_ |= tSSE3;
|
||||
if (data[2] & (1U << 9)) type_ |= tSSSE3;
|
||||
if (data[2] & (1U << 19)) type_ |= tSSE41;
|
||||
if (data[2] & (1U << 20)) type_ |= tSSE42;
|
||||
#ifdef XBYAK64
|
||||
if (data[2] & (1U << 22)) type_ |= tMOVBE;
|
||||
if (data[2] & (1U << 29)) type_ |= tF16C;
|
||||
#endif
|
||||
if (data[2] & (1U << 23)) type_ |= tPOPCNT;
|
||||
if (data[2] & (1U << 25)) type_ |= tAESNI;
|
||||
if (data[2] & (1U << 1)) type_ |= tPCLMULQDQ;
|
||||
if (data[2] & (1U << 27)) type_ |= tOSXSAVE;
|
||||
if (data[2] & (1U << 30)) type_ |= tRDRAND;
|
||||
|
||||
if (data[3] & (1U << 15)) type_ |= tCMOV;
|
||||
if (data[3] & (1U << 23)) type_ |= tMMX;
|
||||
if (data[3] & (1U << 25)) type_ |= tMMX2 | tSSE;
|
||||
if (data[3] & (1U << 26)) type_ |= tSSE2;
|
||||
if (EDX & (1U << 29)) {
|
||||
// Long mode implies support for PREFETCHW on AMD
|
||||
type_ |= tPREFETCHW;
|
||||
}
|
||||
}
|
||||
if (ECX == get32bitAsBE(intel)) {
|
||||
type_ |= tINTEL;
|
||||
}
|
||||
|
||||
// Extended flags information
|
||||
getCpuid(0x80000000, data);
|
||||
if (EAX >= 0x80000001) {
|
||||
getCpuid(0x80000001, data);
|
||||
|
||||
if (EDX & (1U << 31)) type_ |= t3DN;
|
||||
if (EDX & (1U << 30)) type_ |= tE3DN;
|
||||
if (EDX & (1U << 27)) type_ |= tRDTSCP;
|
||||
if (EDX & (1U << 22)) type_ |= tMMX2;
|
||||
if (EDX & (1U << 15)) type_ |= tCMOV;
|
||||
if (ECX & (1U << 5)) type_ |= tLZCNT;
|
||||
if (ECX & (1U << 8)) type_ |= tPREFETCHW;
|
||||
}
|
||||
|
||||
getCpuid(1, data);
|
||||
if (ECX & (1U << 0)) type_ |= tSSE3;
|
||||
if (ECX & (1U << 9)) type_ |= tSSSE3;
|
||||
if (ECX & (1U << 19)) type_ |= tSSE41;
|
||||
if (ECX & (1U << 20)) type_ |= tSSE42;
|
||||
if (ECX & (1U << 22)) type_ |= tMOVBE;
|
||||
if (ECX & (1U << 23)) type_ |= tPOPCNT;
|
||||
if (ECX & (1U << 25)) type_ |= tAESNI;
|
||||
if (ECX & (1U << 1)) type_ |= tPCLMULQDQ;
|
||||
if (ECX & (1U << 27)) type_ |= tOSXSAVE;
|
||||
if (ECX & (1U << 30)) type_ |= tRDRAND;
|
||||
if (ECX & (1U << 29)) type_ |= tF16C;
|
||||
|
||||
if (EDX & (1U << 15)) type_ |= tCMOV;
|
||||
if (EDX & (1U << 23)) type_ |= tMMX;
|
||||
if (EDX & (1U << 25)) type_ |= tMMX2 | tSSE;
|
||||
if (EDX & (1U << 26)) type_ |= tSSE2;
|
||||
|
||||
if (type_ & tOSXSAVE) {
|
||||
// check XFEATURE_ENABLED_MASK[2:1] = '11b'
|
||||
uint64 bv = getXfeature();
|
||||
uint64_t bv = getXfeature();
|
||||
if ((bv & 6) == 6) {
|
||||
if (data[2] & (1U << 28)) type_ |= tAVX;
|
||||
if (data[2] & (1U << 12)) type_ |= tFMA;
|
||||
#ifdef XBYAK64
|
||||
if (((bv >> 5) & 7) == 7) {
|
||||
getCpuid(7, data);
|
||||
if (data[1] & (1U << 16)) type_ |= tAVX512F;
|
||||
if (type_ & tAVX512F) {
|
||||
getCpuidEx(7, 0, data);
|
||||
if (data[1] & (1U << 17)) type_ |= tAVX512DQ;
|
||||
if (data[1] & (1U << 21)) type_ |= tAVX512IFMA;
|
||||
if (data[1] & (1U << 26)) type_ |= tAVX512PF;
|
||||
if (data[1] & (1U << 27)) type_ |= tAVX512ER;
|
||||
if (data[1] & (1U << 28)) type_ |= tAVX512CD;
|
||||
if (data[1] & (1U << 30)) type_ |= tAVX512BW;
|
||||
if (data[1] & (1U << 31)) type_ |= tAVX512VL;
|
||||
if (data[2] & (1U << 1)) type_ |= tAVX512VBMI;
|
||||
}
|
||||
}
|
||||
if (ECX & (1U << 28)) type_ |= tAVX;
|
||||
if (ECX & (1U << 12)) type_ |= tFMA;
|
||||
// do *not* check AVX-512 state on macOS because it has on-demand AVX-512 support
|
||||
#if !defined(__APPLE__)
|
||||
if (((bv >> 5) & 7) == 7)
|
||||
#endif
|
||||
{
|
||||
getCpuidEx(7, 0, data);
|
||||
if (EBX & (1U << 16)) type_ |= tAVX512F;
|
||||
if (type_ & tAVX512F) {
|
||||
if (EBX & (1U << 17)) type_ |= tAVX512DQ;
|
||||
if (EBX & (1U << 21)) type_ |= tAVX512_IFMA;
|
||||
if (EBX & (1U << 26)) type_ |= tAVX512PF;
|
||||
if (EBX & (1U << 27)) type_ |= tAVX512ER;
|
||||
if (EBX & (1U << 28)) type_ |= tAVX512CD;
|
||||
if (EBX & (1U << 30)) type_ |= tAVX512BW;
|
||||
if (EBX & (1U << 31)) type_ |= tAVX512VL;
|
||||
if (ECX & (1U << 1)) type_ |= tAVX512_VBMI;
|
||||
if (ECX & (1U << 6)) type_ |= tAVX512_VBMI2;
|
||||
if (ECX & (1U << 8)) type_ |= tGFNI;
|
||||
if (ECX & (1U << 9)) type_ |= tVAES;
|
||||
if (ECX & (1U << 10)) type_ |= tVPCLMULQDQ;
|
||||
if (ECX & (1U << 11)) type_ |= tAVX512_VNNI;
|
||||
if (ECX & (1U << 12)) type_ |= tAVX512_BITALG;
|
||||
if (ECX & (1U << 14)) type_ |= tAVX512_VPOPCNTDQ;
|
||||
if (EDX & (1U << 2)) type_ |= tAVX512_4VNNIW;
|
||||
if (EDX & (1U << 3)) type_ |= tAVX512_4FMAPS;
|
||||
if (EDX & (1U << 8)) type_ |= tAVX512_VP2INTERSECT;
|
||||
if ((type_ & tAVX512BW) && (EDX & (1U << 23))) type_ |= tAVX512_FP16;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (maxNum >= 7) {
|
||||
getCpuidEx(7, 0, data);
|
||||
if (type_ & tAVX && data[1] & 0x20) type_ |= tAVX2;
|
||||
if (data[1] & (1U << 3)) type_ |= tBMI1;
|
||||
if (data[1] & (1U << 8)) type_ |= tBMI2;
|
||||
if (data[1] & (1U << 9)) type_ |= tENHANCED_REP;
|
||||
if (data[1] & (1U << 18)) type_ |= tRDSEED;
|
||||
if (data[1] & (1U << 19)) type_ |= tADX;
|
||||
if (data[1] & (1U << 20)) type_ |= tSMAP;
|
||||
#ifdef XBYAK64
|
||||
if (data[1] & (1U << 4)) type_ |= tHLE;
|
||||
if (data[1] & (1U << 11)) type_ |= tRTM;
|
||||
#endif
|
||||
const uint32_t maxNumSubLeaves = EAX;
|
||||
if (type_ & tAVX && (EBX & (1U << 5))) type_ |= tAVX2;
|
||||
if (EBX & (1U << 3)) type_ |= tBMI1;
|
||||
if (EBX & (1U << 8)) type_ |= tBMI2;
|
||||
if (EBX & (1U << 9)) type_ |= tENHANCED_REP;
|
||||
if (EBX & (1U << 18)) type_ |= tRDSEED;
|
||||
if (EBX & (1U << 19)) type_ |= tADX;
|
||||
if (EBX & (1U << 20)) type_ |= tSMAP;
|
||||
if (EBX & (1U << 4)) type_ |= tHLE;
|
||||
if (EBX & (1U << 11)) type_ |= tRTM;
|
||||
if (EBX & (1U << 14)) type_ |= tMPX;
|
||||
if (EBX & (1U << 29)) type_ |= tSHA;
|
||||
if (ECX & (1U << 0)) type_ |= tPREFETCHWT1;
|
||||
if (EDX & (1U << 24)) type_ |= tAMX_TILE;
|
||||
if (EDX & (1U << 25)) type_ |= tAMX_INT8;
|
||||
if (EDX & (1U << 22)) type_ |= tAMX_BF16;
|
||||
if (maxNumSubLeaves >= 1) {
|
||||
getCpuidEx(7, 1, data);
|
||||
if (EAX & (1U << 4)) type_ |= tAVX_VNNI;
|
||||
if (type_ & tAVX512F) {
|
||||
if (EAX & (1U << 5)) type_ |= tAVX512_BF16;
|
||||
}
|
||||
}
|
||||
}
|
||||
setFamily();
|
||||
setNumCores();
|
||||
setCacheHierarchy();
|
||||
}
|
||||
void putFamily()
|
||||
void putFamily() const
|
||||
{
|
||||
#ifndef XBYAK_ONLY_CLASS_CPU
|
||||
printf("family=%d, model=%X, stepping=%d, extFamily=%d, extModel=%X\n",
|
||||
family, model, stepping, extFamily, extModel);
|
||||
printf("display:family=%X, model=%X\n", displayFamily, displayModel);
|
||||
#endif
|
||||
}
|
||||
bool has(Type type) const
|
||||
{
|
||||
|
@ -327,16 +518,22 @@ public:
|
|||
}
|
||||
};
|
||||
|
||||
#ifndef XBYAK_ONLY_CLASS_CPU
|
||||
class Clock {
|
||||
public:
|
||||
static inline uint64 getRdtsc()
|
||||
static inline uint64_t getRdtsc()
|
||||
{
|
||||
#ifdef XBYAK_INTEL_CPU_SPECIFIC
|
||||
#ifdef _MSC_VER
|
||||
return __rdtsc();
|
||||
#else
|
||||
unsigned int eax, edx;
|
||||
__asm__ volatile("rdtsc" : "=a"(eax), "=d"(edx));
|
||||
return ((uint64)edx << 32) | eax;
|
||||
return ((uint64_t)edx << 32) | eax;
|
||||
#endif
|
||||
#else
|
||||
// TODO: Need another impl of Clock or rdtsc-equivalent for non-x86 cpu
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
Clock()
|
||||
|
@ -354,10 +551,10 @@ public:
|
|||
count_++;
|
||||
}
|
||||
int getCount() const { return count_; }
|
||||
uint64 getClock() const { return clock_; }
|
||||
uint64_t getClock() const { return clock_; }
|
||||
void clear() { count_ = 0; clock_ = 0; }
|
||||
private:
|
||||
uint64 clock_;
|
||||
uint64_t clock_;
|
||||
int count_;
|
||||
};
|
||||
|
||||
|
@ -366,7 +563,7 @@ const int UseRCX = 1 << 6;
|
|||
const int UseRDX = 1 << 7;
|
||||
|
||||
class Pack {
|
||||
static const size_t maxTblNum = 10;
|
||||
static const size_t maxTblNum = 15;
|
||||
const Xbyak::Reg64 *tbl_[maxTblNum];
|
||||
size_t n_;
|
||||
public:
|
||||
|
@ -375,9 +572,14 @@ public:
|
|||
Pack(const Pack& rhs)
|
||||
: n_(rhs.n_)
|
||||
{
|
||||
if (n_ > maxTblNum) throw Error(ERR_INTERNAL);
|
||||
for (size_t i = 0; i < n_; i++) tbl_[i] = rhs.tbl_[i];
|
||||
}
|
||||
Pack& operator=(const Pack& rhs)
|
||||
{
|
||||
n_ = rhs.n_;
|
||||
for (size_t i = 0; i < n_; i++) tbl_[i] = rhs.tbl_[i];
|
||||
return *this;
|
||||
}
|
||||
Pack(const Xbyak::Reg64& t0)
|
||||
{ n_ = 1; tbl_[0] = &t0; }
|
||||
Pack(const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
|
||||
|
@ -400,9 +602,9 @@ public:
|
|||
{ n_ = 10; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; tbl_[4] = &t4; tbl_[5] = &t5; tbl_[6] = &t6; tbl_[7] = &t7; tbl_[8] = &t8; tbl_[9] = &t9; }
|
||||
Pack& append(const Xbyak::Reg64& t)
|
||||
{
|
||||
if (n_ == 10) {
|
||||
if (n_ == maxTblNum) {
|
||||
fprintf(stderr, "ERR Pack::can't append\n");
|
||||
throw Error(ERR_BAD_PARAMETER);
|
||||
XBYAK_THROW_RET(ERR_BAD_PARAMETER, *this)
|
||||
}
|
||||
tbl_[n_++] = &t;
|
||||
return *this;
|
||||
|
@ -411,7 +613,7 @@ public:
|
|||
{
|
||||
if (n > maxTblNum) {
|
||||
fprintf(stderr, "ERR Pack::init bad n=%d\n", (int)n);
|
||||
throw Error(ERR_BAD_PARAMETER);
|
||||
XBYAK_THROW(ERR_BAD_PARAMETER)
|
||||
}
|
||||
n_ = n;
|
||||
for (size_t i = 0; i < n; i++) {
|
||||
|
@ -421,8 +623,8 @@ public:
|
|||
const Xbyak::Reg64& operator[](size_t n) const
|
||||
{
|
||||
if (n >= n_) {
|
||||
fprintf(stderr, "ERR Pack bad n=%d\n", (int)n);
|
||||
throw Error(ERR_BAD_PARAMETER);
|
||||
fprintf(stderr, "ERR Pack bad n=%d(%d)\n", (int)n, (int)n_);
|
||||
XBYAK_THROW_RET(ERR_BAD_PARAMETER, rax)
|
||||
}
|
||||
return *tbl_[n];
|
||||
}
|
||||
|
@ -435,7 +637,7 @@ public:
|
|||
if (num == size_t(-1)) num = n_ - pos;
|
||||
if (pos + num > n_) {
|
||||
fprintf(stderr, "ERR Pack::sub bad pos=%d, num=%d\n", (int)pos, (int)num);
|
||||
throw Error(ERR_BAD_PARAMETER);
|
||||
XBYAK_THROW_RET(ERR_BAD_PARAMETER, Pack())
|
||||
}
|
||||
Pack pack;
|
||||
pack.n_ = num;
|
||||
|
@ -463,6 +665,7 @@ class StackFrame {
|
|||
static const int rcxPos = 3;
|
||||
static const int rdxPos = 2;
|
||||
#endif
|
||||
static const int maxRegNum = 14; // maxRegNum = 16 - rsp - rax
|
||||
Xbyak::CodeGenerator *code_;
|
||||
int pNum_;
|
||||
int tNum_;
|
||||
|
@ -472,7 +675,7 @@ class StackFrame {
|
|||
int P_;
|
||||
bool makeEpilog_;
|
||||
Xbyak::Reg64 pTbl_[4];
|
||||
Xbyak::Reg64 tTbl_[10];
|
||||
Xbyak::Reg64 tTbl_[maxRegNum];
|
||||
Pack p_;
|
||||
Pack t_;
|
||||
StackFrame(const StackFrame&);
|
||||
|
@ -484,7 +687,7 @@ public:
|
|||
make stack frame
|
||||
@param sf [in] this
|
||||
@param pNum [in] num of function parameter(0 <= pNum <= 4)
|
||||
@param tNum [in] num of temporary register(0 <= tNum <= 10, with UseRCX, UseRDX)
|
||||
@param tNum [in] num of temporary register(0 <= tNum, with UseRCX, UseRDX) #{pNum + tNum [+rcx] + [rdx]} <= 14
|
||||
@param stackSizeByte [in] local stack size
|
||||
@param makeEpilog [in] automatically call close() if true
|
||||
|
||||
|
@ -509,29 +712,19 @@ public:
|
|||
, t(t_)
|
||||
{
|
||||
using namespace Xbyak;
|
||||
if (pNum < 0 || pNum > 4) throw Error(ERR_BAD_PNUM);
|
||||
if (pNum < 0 || pNum > 4) XBYAK_THROW(ERR_BAD_PNUM)
|
||||
const int allRegNum = pNum + tNum_ + (useRcx_ ? 1 : 0) + (useRdx_ ? 1 : 0);
|
||||
if (allRegNum < pNum || allRegNum > 14) throw Error(ERR_BAD_TNUM);
|
||||
if (tNum_ < 0 || allRegNum > maxRegNum) XBYAK_THROW(ERR_BAD_TNUM)
|
||||
const Reg64& _rsp = code->rsp;
|
||||
const AddressFrame& _ptr = code->ptr;
|
||||
saveNum_ = (std::max)(0, allRegNum - noSaveNum);
|
||||
const int *tbl = getOrderTbl() + noSaveNum;
|
||||
P_ = saveNum_ + (stackSizeByte + 7) / 8;
|
||||
if (P_ > 0 && (P_ & 1) == 0) P_++; // here (rsp % 16) == 8, then increment P_ for 16 byte alignment
|
||||
for (int i = 0; i < saveNum_; i++) {
|
||||
code->push(Reg64(tbl[i]));
|
||||
}
|
||||
P_ = (stackSizeByte + 7) / 8;
|
||||
if (P_ > 0 && (P_ & 1) == (saveNum_ & 1)) P_++; // (rsp % 16) == 8, then increment P_ for 16 byte alignment
|
||||
P_ *= 8;
|
||||
if (P_ > 0) code->sub(_rsp, P_);
|
||||
#ifdef XBYAK64_WIN
|
||||
for (int i = 0; i < (std::min)(saveNum_, 4); i++) {
|
||||
code->mov(_ptr [_rsp + P_ + (i + 1) * 8], Reg64(tbl[i]));
|
||||
}
|
||||
for (int i = 4; i < saveNum_; i++) {
|
||||
code->mov(_ptr [_rsp + P_ - 8 * (saveNum_ - i)], Reg64(tbl[i]));
|
||||
}
|
||||
#else
|
||||
for (int i = 0; i < saveNum_; i++) {
|
||||
code->mov(_ptr [_rsp + P_ - 8 * (saveNum_ - i)], Reg64(tbl[i]));
|
||||
}
|
||||
#endif
|
||||
int pos = 0;
|
||||
for (int i = 0; i < pNum; i++) {
|
||||
pTbl_[i] = Xbyak::Reg64(getRegIdx(pos));
|
||||
|
@ -552,36 +745,18 @@ public:
|
|||
{
|
||||
using namespace Xbyak;
|
||||
const Reg64& _rsp = code_->rsp;
|
||||
const AddressFrame& _ptr = code_->ptr;
|
||||
const int *tbl = getOrderTbl() + noSaveNum;
|
||||
#ifdef XBYAK64_WIN
|
||||
for (int i = 0; i < (std::min)(saveNum_, 4); i++) {
|
||||
code_->mov(Reg64(tbl[i]), _ptr [_rsp + P_ + (i + 1) * 8]);
|
||||
}
|
||||
for (int i = 4; i < saveNum_; i++) {
|
||||
code_->mov(Reg64(tbl[i]), _ptr [_rsp + P_ - 8 * (saveNum_ - i)]);
|
||||
}
|
||||
#else
|
||||
for (int i = 0; i < saveNum_; i++) {
|
||||
code_->mov(Reg64(tbl[i]), _ptr [_rsp + P_ - 8 * (saveNum_ - i)]);
|
||||
}
|
||||
#endif
|
||||
if (P_ > 0) code_->add(_rsp, P_);
|
||||
for (int i = 0; i < saveNum_; i++) {
|
||||
code_->pop(Reg64(tbl[saveNum_ - 1 - i]));
|
||||
}
|
||||
|
||||
if (callRet) code_->ret();
|
||||
}
|
||||
~StackFrame()
|
||||
{
|
||||
if (!makeEpilog_) return;
|
||||
try {
|
||||
close();
|
||||
} catch (std::exception& e) {
|
||||
printf("ERR:StackFrame %s\n", e.what());
|
||||
exit(1);
|
||||
} catch (...) {
|
||||
printf("ERR:StackFrame otherwise\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
private:
|
||||
const int *getOrderTbl() const
|
||||
|
@ -599,7 +774,7 @@ private:
|
|||
}
|
||||
int getRegIdx(int& pos) const
|
||||
{
|
||||
assert(pos < 14);
|
||||
assert(pos < maxRegNum);
|
||||
using namespace Xbyak;
|
||||
const int *tbl = getOrderTbl();
|
||||
int r = tbl[pos++];
|
||||
|
@ -616,5 +791,137 @@ private:
|
|||
};
|
||||
#endif
|
||||
|
||||
} } // end of util
|
||||
class Profiler {
|
||||
int mode_;
|
||||
const char *suffix_;
|
||||
const void *startAddr_;
|
||||
#ifdef XBYAK_USE_PERF
|
||||
FILE *fp_;
|
||||
#endif
|
||||
public:
|
||||
enum {
|
||||
None = 0,
|
||||
Perf = 1,
|
||||
VTune = 2
|
||||
};
|
||||
Profiler()
|
||||
: mode_(None)
|
||||
, suffix_("")
|
||||
, startAddr_(0)
|
||||
#ifdef XBYAK_USE_PERF
|
||||
, fp_(0)
|
||||
#endif
|
||||
{
|
||||
}
|
||||
// append suffix to funcName
|
||||
void setNameSuffix(const char *suffix)
|
||||
{
|
||||
suffix_ = suffix;
|
||||
}
|
||||
void setStartAddr(const void *startAddr)
|
||||
{
|
||||
startAddr_ = startAddr;
|
||||
}
|
||||
void init(int mode)
|
||||
{
|
||||
mode_ = None;
|
||||
switch (mode) {
|
||||
default:
|
||||
case None:
|
||||
return;
|
||||
case Perf:
|
||||
#ifdef XBYAK_USE_PERF
|
||||
close();
|
||||
{
|
||||
const int pid = getpid();
|
||||
char name[128];
|
||||
snprintf(name, sizeof(name), "/tmp/perf-%d.map", pid);
|
||||
fp_ = fopen(name, "a+");
|
||||
if (fp_ == 0) {
|
||||
fprintf(stderr, "can't open %s\n", name);
|
||||
return;
|
||||
}
|
||||
}
|
||||
mode_ = Perf;
|
||||
#endif
|
||||
return;
|
||||
case VTune:
|
||||
#ifdef XBYAK_USE_VTUNE
|
||||
dlopen("dummy", RTLD_LAZY); // force to load dlopen to enable jit profiling
|
||||
if (iJIT_IsProfilingActive() != iJIT_SAMPLING_ON) {
|
||||
fprintf(stderr, "VTune profiling is not active\n");
|
||||
return;
|
||||
}
|
||||
mode_ = VTune;
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
}
|
||||
~Profiler()
|
||||
{
|
||||
close();
|
||||
}
|
||||
void close()
|
||||
{
|
||||
#ifdef XBYAK_USE_PERF
|
||||
if (fp_ == 0) return;
|
||||
fclose(fp_);
|
||||
fp_ = 0;
|
||||
#endif
|
||||
}
|
||||
void set(const char *funcName, const void *startAddr, size_t funcSize) const
|
||||
{
|
||||
if (mode_ == None) return;
|
||||
#if !defined(XBYAK_USE_PERF) && !defined(XBYAK_USE_VTUNE)
|
||||
(void)funcName;
|
||||
(void)startAddr;
|
||||
(void)funcSize;
|
||||
#endif
|
||||
#ifdef XBYAK_USE_PERF
|
||||
if (mode_ == Perf) {
|
||||
if (fp_ == 0) return;
|
||||
fprintf(fp_, "%llx %zx %s%s", (long long)startAddr, funcSize, funcName, suffix_);
|
||||
/*
|
||||
perf does not recognize the function name which is less than 3,
|
||||
so append '_' at the end of the name if necessary
|
||||
*/
|
||||
size_t n = strlen(funcName) + strlen(suffix_);
|
||||
for (size_t i = n; i < 3; i++) {
|
||||
fprintf(fp_, "_");
|
||||
}
|
||||
fprintf(fp_, "\n");
|
||||
fflush(fp_);
|
||||
}
|
||||
#endif
|
||||
#ifdef XBYAK_USE_VTUNE
|
||||
if (mode_ != VTune) return;
|
||||
char className[] = "";
|
||||
char fileName[] = "";
|
||||
iJIT_Method_Load jmethod = {};
|
||||
jmethod.method_id = iJIT_GetNewMethodID();
|
||||
jmethod.class_file_name = className;
|
||||
jmethod.source_file_name = fileName;
|
||||
jmethod.method_load_address = const_cast<void*>(startAddr);
|
||||
jmethod.method_size = funcSize;
|
||||
jmethod.line_number_size = 0;
|
||||
char buf[128];
|
||||
snprintf(buf, sizeof(buf), "%s%s", funcName, suffix_);
|
||||
jmethod.method_name = buf;
|
||||
iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED, (void*)&jmethod);
|
||||
#endif
|
||||
}
|
||||
/*
|
||||
for continuous set
|
||||
funcSize = endAddr - <previous set endAddr>
|
||||
*/
|
||||
void set(const char *funcName, const void *endAddr)
|
||||
{
|
||||
set(funcName, startAddr_, (size_t)endAddr - (size_t)startAddr_);
|
||||
startAddr_ = endAddr;
|
||||
}
|
||||
};
|
||||
#endif // XBYAK_ONLY_CLASS_CPU
|
||||
|
||||
} } // end of util
|
||||
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue