RandomX: added performance profiler (for developers)
Also optimized Blake2b SSE4.1 code size to avoid code cache pollution.
This commit is contained in:
parent
adf833b60a
commit
a05393727c
19 changed files with 390 additions and 481 deletions
|
@ -222,3 +222,15 @@ if (WITH_KAWPOW)
|
|||
src/base/net/stratum/EthStratumClient.cpp
|
||||
)
|
||||
endif()
|
||||
|
||||
if (WITH_PROFILING)
|
||||
add_definitions(/DXMRIG_FEATURE_PROFILING)
|
||||
|
||||
list(APPEND HEADERS_BASE
|
||||
src/base/tools/Profiler.h
|
||||
)
|
||||
|
||||
list(APPEND SOURCES_BASE
|
||||
src/base/tools/Profiler.cpp
|
||||
)
|
||||
endif()
|
||||
|
|
|
@ -101,3 +101,13 @@ const char *xmrig::Tags::opencl()
|
|||
return tag;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef XMRIG_FEATURE_PROFILING
|
||||
const char* xmrig::Tags::profiler()
|
||||
{
|
||||
static const char* tag = CYAN_BG_BOLD(WHITE_BOLD_S " profile ");
|
||||
|
||||
return tag;
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -53,6 +53,10 @@ public:
|
|||
# ifdef XMRIG_FEATURE_OPENCL
|
||||
static const char *opencl();
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_FEATURE_PROFILING
|
||||
static const char* profiler();
|
||||
# endif
|
||||
};
|
||||
|
||||
|
||||
|
|
100
src/base/tools/Profiler.cpp
Normal file
100
src/base/tools/Profiler.cpp
Normal file
|
@ -0,0 +1,100 @@
|
|||
/* XMRig
|
||||
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
#include "base/tools/Profiler.h"
|
||||
#include "base/io/log/Log.h"
|
||||
#include "base/io/log/Tags.h"
|
||||
#include <sstream>
|
||||
#include <thread>
|
||||
#include <chrono>
|
||||
#include <algorithm>
|
||||
|
||||
|
||||
#ifdef XMRIG_FEATURE_PROFILING
|
||||
|
||||
|
||||
ProfileScopeData* ProfileScopeData::s_data[MAX_DATA_COUNT] = {};
|
||||
volatile long ProfileScopeData::s_dataCount = 0;
|
||||
double ProfileScopeData::s_tscSpeed = 0.0;
|
||||
|
||||
|
||||
#ifndef NOINLINE
|
||||
#ifdef __GNUC__
|
||||
#define NOINLINE __attribute__ ((noinline))
|
||||
#elif _MSC_VER
|
||||
#define NOINLINE __declspec(noinline)
|
||||
#else
|
||||
#define NOINLINE
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
static std::string get_thread_id()
|
||||
{
|
||||
std::stringstream ss;
|
||||
ss << std::this_thread::get_id();
|
||||
|
||||
std::string s = ss.str();
|
||||
if (s.length() > ProfileScopeData::MAX_THREAD_ID_LENGTH) {
|
||||
s.resize(ProfileScopeData::MAX_THREAD_ID_LENGTH);
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
|
||||
NOINLINE void ProfileScopeData::Register(ProfileScopeData* data)
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
const long id = _InterlockedIncrement(&s_dataCount) - 1;
|
||||
#else
|
||||
const long id = __sync_fetch_and_add(&s_dataCount, 1);
|
||||
#endif
|
||||
|
||||
if (static_cast<unsigned long>(id) < MAX_DATA_COUNT) {
|
||||
s_data[id] = data;
|
||||
|
||||
const std::string s = get_thread_id();
|
||||
memcpy(data->m_threadId, s.c_str(), s.length() + 1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
NOINLINE void ProfileScopeData::Init()
|
||||
{
|
||||
using namespace std::chrono;
|
||||
|
||||
const uint64_t t1 = static_cast<uint64_t>(time_point_cast<nanoseconds>(high_resolution_clock::now()).time_since_epoch().count());
|
||||
const uint64_t count1 = ReadTSC();
|
||||
|
||||
for (;;)
|
||||
{
|
||||
const uint64_t t2 = static_cast<uint64_t>(time_point_cast<nanoseconds>(high_resolution_clock::now()).time_since_epoch().count());
|
||||
const uint64_t count2 = ReadTSC();
|
||||
|
||||
if (t2 - t1 > 1000000000) {
|
||||
s_tscSpeed = (count2 - count1) * 1e9 / (t2 - t1);
|
||||
LOG_INFO("%s TSC speed = %.3f GHz", xmrig::Tags::profiler(), s_tscSpeed / 1e9);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#endif /* XMRIG_FEATURE_PROFILING */
|
132
src/base/tools/Profiler.h
Normal file
132
src/base/tools/Profiler.h
Normal file
|
@ -0,0 +1,132 @@
|
|||
/* XMRig
|
||||
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef XMRIG_PROFILER_H
|
||||
#define XMRIG_PROFILER_H
|
||||
|
||||
|
||||
#ifndef FORCE_INLINE
|
||||
#if defined(_MSC_VER)
|
||||
#define FORCE_INLINE __forceinline
|
||||
#elif defined(__GNUC__)
|
||||
#define FORCE_INLINE __attribute__((always_inline)) inline
|
||||
#elif defined(__clang__)
|
||||
#define FORCE_INLINE __inline__
|
||||
#else
|
||||
#define FORCE_INLINE
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef XMRIG_FEATURE_PROFILING
|
||||
|
||||
|
||||
#include <cstdint>
|
||||
#include <type_traits>
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
|
||||
|
||||
static FORCE_INLINE uint64_t ReadTSC()
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
return __rdtsc();
|
||||
#else
|
||||
uint32_t hi, lo;
|
||||
__asm__ __volatile__("rdtsc" : "=a"(lo), "=d"(hi));
|
||||
return (((uint64_t)hi) << 32) | lo;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
struct ProfileScopeData
|
||||
{
|
||||
const char* m_name;
|
||||
uint64_t m_totalCycles;
|
||||
uint32_t m_totalSamples;
|
||||
|
||||
enum
|
||||
{
|
||||
MAX_THREAD_ID_LENGTH = 11,
|
||||
MAX_SAMPLE_COUNT = 128,
|
||||
MAX_DATA_COUNT = 1024
|
||||
};
|
||||
|
||||
char m_threadId[MAX_THREAD_ID_LENGTH + 1];
|
||||
|
||||
static ProfileScopeData* s_data[MAX_DATA_COUNT];
|
||||
static volatile long s_dataCount;
|
||||
static double s_tscSpeed;
|
||||
|
||||
static void Register(ProfileScopeData* data);
|
||||
static void Init();
|
||||
};
|
||||
|
||||
static_assert(std::is_trivial<ProfileScopeData>::value, "ProfileScopeData must be a trivial struct");
|
||||
static_assert(sizeof(ProfileScopeData) <= 32, "ProfileScopeData struct is too big");
|
||||
|
||||
|
||||
class ProfileScope
|
||||
{
|
||||
public:
|
||||
FORCE_INLINE ProfileScope(ProfileScopeData& data)
|
||||
: m_data(data)
|
||||
{
|
||||
if (m_data.m_totalCycles == 0) {
|
||||
ProfileScopeData::Register(&data);
|
||||
}
|
||||
|
||||
m_startCounter = ReadTSC();
|
||||
}
|
||||
|
||||
FORCE_INLINE ~ProfileScope()
|
||||
{
|
||||
m_data.m_totalCycles += ReadTSC() - m_startCounter;
|
||||
++m_data.m_totalSamples;
|
||||
}
|
||||
|
||||
private:
|
||||
ProfileScopeData& m_data;
|
||||
uint64_t m_startCounter;
|
||||
};
|
||||
|
||||
|
||||
#define PROFILE_SCOPE(x) static thread_local ProfileScopeData x##_data{#x}; ProfileScope x(x##_data);
|
||||
|
||||
|
||||
#else /* XMRIG_FEATURE_PROFILING */
|
||||
#define PROFILE_SCOPE(x)
|
||||
#endif /* XMRIG_FEATURE_PROFILING */
|
||||
|
||||
|
||||
#include "crypto/randomx/blake2/blake2.h"
|
||||
|
||||
|
||||
struct rx_blake2b_wrapper
|
||||
{
|
||||
FORCE_INLINE static void run(void* out, size_t outlen, const void* in, size_t inlen)
|
||||
{
|
||||
PROFILE_SCOPE(RandomX_Blake2b);
|
||||
rx_blake2b(out, outlen, in, inlen);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
#endif /* XMRIG_PROFILER_H */
|
Loading…
Add table
Add a link
Reference in a new issue