Implemented CUDA config generation.

This commit is contained in:
XMRig 2019-10-26 03:12:55 +07:00
parent 77d5b73724
commit d4a3024996
16 changed files with 208 additions and 42 deletions

View file

@ -97,10 +97,14 @@ public:
return printDisabled(RED_S " (no devices)"); return printDisabled(RED_S " (no devices)");
} }
if (!devices.empty()) {
return;
}
Log::print(GREEN_BOLD(" * ") WHITE_BOLD("%-13s") WHITE_BOLD("%u.%u") "/" WHITE_BOLD("%u.%u") BLACK_BOLD("/%s"), "CUDA", Log::print(GREEN_BOLD(" * ") WHITE_BOLD("%-13s") WHITE_BOLD("%u.%u") "/" WHITE_BOLD("%u.%u") BLACK_BOLD("/%s"), "CUDA",
runtimeVersion / 1000, runtimeVersion % 100, driverVersion / 1000, driverVersion % 100, CudaLib::pluginVersion()); runtimeVersion / 1000, runtimeVersion % 100, driverVersion / 1000, driverVersion % 100, CudaLib::pluginVersion());
devices = CudaLib::devices(); devices = CudaLib::devices(cuda.bfactor(), cuda.bsleep());
for (const CudaDevice &device : devices) { for (const CudaDevice &device : devices) {
Log::print(GREEN_BOLD(" * ") WHITE_BOLD("%-13s") CYAN_BOLD("#%zu") YELLOW(" %s") GREEN_BOLD(" %s ") WHITE_BOLD("%u/%u MHz") " smx:" WHITE_BOLD("%u") " arch:" WHITE_BOLD("%u%u") " mem:" CYAN("%zu/%zu") " MB", Log::print(GREEN_BOLD(" * ") WHITE_BOLD("%-13s") CYAN_BOLD("#%zu") YELLOW(" %s") GREEN_BOLD(" %s ") WHITE_BOLD("%u/%u MHz") " smx:" WHITE_BOLD("%u") " arch:" WHITE_BOLD("%u%u") " mem:" CYAN("%zu/%zu") " MB",
@ -156,13 +160,13 @@ xmrig::CudaBackend::~CudaBackend()
bool xmrig::CudaBackend::isEnabled() const bool xmrig::CudaBackend::isEnabled() const
{ {
return false; return d_ptr->controller->config()->cuda().isEnabled() && CudaLib::isInitialized() && !d_ptr->devices.empty();;
} }
bool xmrig::CudaBackend::isEnabled(const Algorithm &algorithm) const bool xmrig::CudaBackend::isEnabled(const Algorithm &algorithm) const
{ {
return false; return !d_ptr->controller->config()->cuda().threads().get(algorithm).isEmpty();
} }

View file

@ -25,6 +25,7 @@
#include "backend/cuda/CudaConfig.h" #include "backend/cuda/CudaConfig.h"
#include "backend/cuda/CudaConfig_gen.h" #include "backend/cuda/CudaConfig_gen.h"
#include "backend/cuda/wrappers/CudaLib.h"
#include "base/io/json/Json.h" #include "base/io/json/Json.h"
#include "base/io/log/Log.h" #include "base/io/log/Log.h"
#include "rapidjson/document.h" #include "rapidjson/document.h"
@ -91,13 +92,26 @@ void xmrig::CudaConfig::generate()
return; return;
} }
if (!CudaLib::init(loader())) {
return;
}
if (!CudaLib::runtimeVersion() || !CudaLib::driverVersion() || !CudaLib::deviceCount()) {
return;
}
const auto devices = CudaLib::devices(bfactor(), bsleep());
if (devices.empty()) {
return;
}
size_t count = 0; size_t count = 0;
// count += xmrig::generate<Algorithm::CN>(m_threads, devices); count += xmrig::generate<Algorithm::CN>(m_threads, devices);
// count += xmrig::generate<Algorithm::CN_LITE>(m_threads, devices); count += xmrig::generate<Algorithm::CN_LITE>(m_threads, devices);
// count += xmrig::generate<Algorithm::CN_HEAVY>(m_threads, devices); count += xmrig::generate<Algorithm::CN_HEAVY>(m_threads, devices);
// count += xmrig::generate<Algorithm::CN_PICO>(m_threads, devices); count += xmrig::generate<Algorithm::CN_PICO>(m_threads, devices);
// count += xmrig::generate<Algorithm::RANDOM_X>(m_threads, devices); count += xmrig::generate<Algorithm::RANDOM_X>(m_threads, devices);
m_shouldSave = count > 0; m_shouldSave = count > 0;
} }

View file

@ -45,6 +45,8 @@ public:
inline bool isShouldSave() const { return m_shouldSave; } inline bool isShouldSave() const { return m_shouldSave; }
inline const String &loader() const { return m_loader; } inline const String &loader() const { return m_loader; }
inline const Threads<CudaThreads> &threads() const { return m_threads; } inline const Threads<CudaThreads> &threads() const { return m_threads; }
inline int32_t bfactor() const { return m_bfactor; }
inline int32_t bsleep() const { return m_bsleep; }
private: private:
void generate(); void generate();
@ -55,6 +57,14 @@ private:
std::vector<uint32_t> m_devicesHint; std::vector<uint32_t> m_devicesHint;
String m_loader; String m_loader;
Threads<CudaThreads> m_threads; Threads<CudaThreads> m_threads;
# ifdef _WIN32
int32_t m_bfactor = 6;
int32_t m_bsleep = 25;
# else
int32_t m_bfactor = 0;
int32_t m_bsleep = 0;
# endif
}; };

View file

@ -22,12 +22,13 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
#ifndef XMRIG_OCLCONFIG_GEN_H #ifndef XMRIG_CUDACONFIG_GEN_H
#define XMRIG_OCLCONFIG_GEN_H #define XMRIG_CUDACONFIG_GEN_H
#include "backend/common/Threads.h" #include "backend/common/Threads.h"
#include "backend/cuda/CudaThreads.h" #include "backend/cuda/CudaThreads.h"
#include "backend/cuda/wrappers/CudaDevice.h"
#include <algorithm> #include <algorithm>
@ -36,7 +37,76 @@
namespace xmrig { namespace xmrig {
static inline size_t generate(const char *key, Threads<CudaThreads> &threads, const Algorithm &algorithm, const std::vector<CudaDevice> &devices)
{
if (threads.isExist(algorithm) || threads.has(key)) {
return 0;
}
return threads.move(key, CudaThreads(devices, algorithm));
}
template<Algorithm::Family FAMILY>
static inline size_t generate(Threads<CudaThreads> &, const std::vector<CudaDevice> &) { return 0; }
template<>
size_t inline generate<Algorithm::CN>(Threads<CudaThreads> &threads, const std::vector<CudaDevice> &devices)
{
size_t count = 0;
count += generate("cn", threads, Algorithm::CN_1, devices);
count += generate("cn/2", threads, Algorithm::CN_2, devices);
if (!threads.isExist(Algorithm::CN_0)) {
threads.disable(Algorithm::CN_0);
count++;
}
# ifdef XMRIG_ALGO_CN_GPU
count += generate("cn/gpu", threads, Algorithm::CN_GPU, devices);
# endif
return count;
}
#ifdef XMRIG_ALGO_CN_LITE
template<>
size_t inline generate<Algorithm::CN_LITE>(Threads<CudaThreads> &threads, const std::vector<CudaDevice> &devices)
{
size_t count = generate("cn-lite", threads, Algorithm::CN_LITE_1, devices);
if (!threads.isExist(Algorithm::CN_LITE_0)) {
threads.disable(Algorithm::CN_LITE_0);
++count;
}
return count;
}
#endif
#ifdef XMRIG_ALGO_CN_HEAVY
template<>
size_t inline generate<Algorithm::CN_HEAVY>(Threads<CudaThreads> &threads, const std::vector<CudaDevice> &devices)
{
return generate("cn-heavy", threads, Algorithm::CN_HEAVY_0, devices);
}
#endif
#ifdef XMRIG_ALGO_CN_PICO
template<>
size_t inline generate<Algorithm::CN_PICO>(Threads<CudaThreads> &threads, const std::vector<CudaDevice> &devices)
{
return generate("cn-pico", threads, Algorithm::CN_PICO_0, devices);
}
#endif
} /* namespace xmrig */ } /* namespace xmrig */
#endif /* XMRIG_OCLCONFIG_GEN_H */ #endif /* XMRIG_CUDACONFIG_GEN_H */

View file

@ -24,7 +24,7 @@
#include "backend/cuda/CudaThread.h" #include "backend/cuda/CudaThread.h"
#include "backend/cuda/wrappers/CudaLib.h"
#include "base/io/json/Json.h" #include "base/io/json/Json.h"
#include "rapidjson/document.h" #include "rapidjson/document.h"
@ -34,6 +34,12 @@
namespace xmrig { namespace xmrig {
static const char *kAffinity = "affinity";
static const char *kBFactor = "bfactor";
static const char *kBlocks = "blocks";
static const char *kBSleep = "bsleep";
static const char *kIndex = "index";
static const char *kThreads = "threads";
} // namespace xmrig } // namespace xmrig
@ -43,12 +49,35 @@ xmrig::CudaThread::CudaThread(const rapidjson::Value &value)
if (!value.IsObject()) { if (!value.IsObject()) {
return; return;
} }
m_index = Json::getUint(value, kIndex);
m_threads = Json::getInt(value, kThreads);
m_blocks = Json::getInt(value, kBlocks);
m_bfactor = std::min(Json::getUint(value, kBFactor, m_bfactor), 12u);
m_bsleep = Json::getUint(value, kBSleep, m_bsleep);
m_affinity = Json::getUint64(value, kAffinity, m_affinity);
}
xmrig::CudaThread::CudaThread(uint32_t index, nvid_ctx *ctx) :
m_blocks(CudaLib::deviceInt(ctx, CudaLib::DeviceBlocks)),
m_threads(CudaLib::deviceInt(ctx, CudaLib::DeviceThreads)),
m_index(index),
m_bfactor(CudaLib::deviceUint(ctx, CudaLib::DeviceBFactor)),
m_bsleep(CudaLib::deviceUint(ctx, CudaLib::DeviceBSleep))
{
} }
bool xmrig::CudaThread::isEqual(const CudaThread &other) const bool xmrig::CudaThread::isEqual(const CudaThread &other) const
{ {
return false; return m_blocks == other.m_blocks &&
m_threads == other.m_threads &&
m_affinity == other.m_affinity &&
m_index == other.m_index &&
m_bfactor == other.m_bfactor &&
m_bsleep == other.m_bsleep;
} }
@ -59,5 +88,12 @@ rapidjson::Value xmrig::CudaThread::toJSON(rapidjson::Document &doc) const
Value out(kObjectType); Value out(kObjectType);
out.AddMember(StringRef(kIndex), index(), allocator);
out.AddMember(StringRef(kThreads), threads(), allocator);
out.AddMember(StringRef(kBlocks), blocks(), allocator);
out.AddMember(StringRef(kBFactor), bfactor(), allocator);
out.AddMember(StringRef(kBSleep), bsleep(), allocator);
out.AddMember(StringRef(kAffinity), affinity(), allocator);
return out; return out;
} }

View file

@ -26,14 +26,13 @@
#define XMRIG_CUDATHREAD_H #define XMRIG_CUDATHREAD_H
using nvid_ctx = struct nvid_ctx;
#include "crypto/common/Algorithm.h" #include "crypto/common/Algorithm.h"
#include "rapidjson/fwd.h" #include "rapidjson/fwd.h"
#include <bitset>
#include <vector>
namespace xmrig { namespace xmrig {
@ -41,10 +40,16 @@ class CudaThread
{ {
public: public:
CudaThread() = delete; CudaThread() = delete;
CudaThread(const rapidjson::Value &value); CudaThread(const rapidjson::Value &value);
CudaThread(uint32_t index, nvid_ctx *ctx);
inline bool isValid() const { return false; } inline bool isValid() const { return m_blocks > 0 && m_threads > 0; }
inline int32_t bfactor() const { return static_cast<int32_t>(m_bfactor); }
inline int32_t blocks() const { return m_blocks; }
inline int32_t bsleep() const { return static_cast<int32_t>(m_bsleep); }
inline int32_t threads() const { return m_threads; }
inline int64_t affinity() const { return m_affinity; }
inline uint32_t index() const { return m_index; }
inline bool operator!=(const CudaThread &other) const { return !isEqual(other); } inline bool operator!=(const CudaThread &other) const { return !isEqual(other); }
inline bool operator==(const CudaThread &other) const { return isEqual(other); } inline bool operator==(const CudaThread &other) const { return isEqual(other); }
@ -53,6 +58,18 @@ public:
rapidjson::Value toJSON(rapidjson::Document &doc) const; rapidjson::Value toJSON(rapidjson::Document &doc) const;
private: private:
int32_t m_blocks = 0;
int32_t m_threads = 0;
int64_t m_affinity = -1;
uint32_t m_index = 0;
# ifdef _WIN32
uint32_t m_bfactor = 6;
uint32_t m_bsleep = 25;
# else
uint32_t m_bfactor = 0;
uint32_t m_bsleep = 0;
# endif
}; };

View file

@ -44,6 +44,14 @@ xmrig::CudaThreads::CudaThreads(const rapidjson::Value &value)
} }
xmrig::CudaThreads::CudaThreads(const std::vector<CudaDevice> &devices, const Algorithm &algorithm)
{
for (const auto &device : devices) {
device.generate(algorithm, *this);
}
}
bool xmrig::CudaThreads::isEqual(const CudaThreads &other) const bool xmrig::CudaThreads::isEqual(const CudaThreads &other) const
{ {
if (isEmpty() && other.isEmpty()) { if (isEmpty() && other.isEmpty()) {

View file

@ -30,6 +30,7 @@
#include "backend/cuda/CudaThread.h" #include "backend/cuda/CudaThread.h"
#include "backend/cuda/wrappers/CudaDevice.h"
namespace xmrig { namespace xmrig {
@ -40,6 +41,7 @@ class CudaThreads
public: public:
CudaThreads() = default; CudaThreads() = default;
CudaThreads(const rapidjson::Value &value); CudaThreads(const rapidjson::Value &value);
CudaThreads(const std::vector<CudaDevice> &devices, const Algorithm &algorithm);
inline bool isEmpty() const { return m_data.empty(); } inline bool isEmpty() const { return m_data.empty(); }
inline const std::vector<CudaThread> &data() const { return m_data; } inline const std::vector<CudaThread> &data() const { return m_data; }

View file

@ -34,12 +34,11 @@
#include <algorithm> #include <algorithm>
xmrig::CudaDevice::CudaDevice(uint32_t index, int32_t bfactor, int32_t bsleep) :
xmrig::CudaDevice::CudaDevice(uint32_t index) :
m_index(index) m_index(index)
{ {
auto ctx = CudaLib::alloc(index, 0, 0, 0, 0, Algorithm::INVALID); auto ctx = CudaLib::alloc(index, bfactor, bsleep);
if (CudaLib::deviceInfo(ctx) != 0) { if (CudaLib::deviceInfo(ctx, 0, 0, Algorithm::INVALID) != 0) {
CudaLib::release(ctx); CudaLib::release(ctx);
return; return;
@ -105,6 +104,11 @@ uint32_t xmrig::CudaDevice::smx() const
void xmrig::CudaDevice::generate(const Algorithm &algorithm, CudaThreads &threads) const void xmrig::CudaDevice::generate(const Algorithm &algorithm, CudaThreads &threads) const
{ {
if (CudaLib::deviceInfo(m_ctx, -1, -1, algorithm) != 0) {
return;
}
threads.add(CudaThread(m_index, m_ctx));
} }

View file

@ -46,7 +46,7 @@ public:
CudaDevice() = delete; CudaDevice() = delete;
CudaDevice(const CudaDevice &other) = delete; CudaDevice(const CudaDevice &other) = delete;
CudaDevice(CudaDevice &&other) noexcept; CudaDevice(CudaDevice &&other) noexcept;
CudaDevice(uint32_t index); CudaDevice(uint32_t index, int32_t bfactor, int32_t bsleep);
~CudaDevice(); ~CudaDevice();
size_t freeMemSize() const; size_t freeMemSize() const;

View file

@ -58,9 +58,9 @@ static const char *kSymbolNotFound = "symbol not found";
static const char *kVersion = "version"; static const char *kVersion = "version";
using alloc_t = nvid_ctx * (*)(size_t, int32_t, int32_t, int32_t, int32_t, int32_t); using alloc_t = nvid_ctx * (*)(uint32_t, int32_t, int32_t);
using deviceCount_t = uint32_t (*)(); using deviceCount_t = uint32_t (*)();
using deviceInfo_t = int32_t (*)(nvid_ctx *); using deviceInfo_t = int32_t (*)(nvid_ctx *, int32_t, int32_t, int32_t);
using deviceInt_t = int32_t (*)(nvid_ctx *, CudaLib::DeviceProperty); using deviceInt_t = int32_t (*)(nvid_ctx *, CudaLib::DeviceProperty);
using deviceName_t = const char * (*)(nvid_ctx *); using deviceName_t = const char * (*)(nvid_ctx *);
using deviceUint_t = uint32_t (*)(nvid_ctx *, CudaLib::DeviceProperty); using deviceUint_t = uint32_t (*)(nvid_ctx *, CudaLib::DeviceProperty);
@ -129,9 +129,9 @@ const char *xmrig::CudaLib::pluginVersion() noexcept
} }
int xmrig::CudaLib::deviceInfo(nvid_ctx *ctx) noexcept int xmrig::CudaLib::deviceInfo(nvid_ctx *ctx, int32_t blocks, int32_t threads, const Algorithm &algorithm) noexcept
{ {
return pDeviceInfo(ctx); return pDeviceInfo(ctx, blocks, threads, algorithm);
} }
@ -141,13 +141,13 @@ int32_t xmrig::CudaLib::deviceInt(nvid_ctx *ctx, DeviceProperty property) noexce
} }
nvid_ctx *xmrig::CudaLib::alloc(size_t id, int blocks, int threads, int bfactor, int bsleep, const Algorithm &algorithm) noexcept nvid_ctx *xmrig::CudaLib::alloc(uint32_t id, int32_t bfactor, int32_t bsleep) noexcept
{ {
return pAlloc(id, blocks, threads, bfactor, bsleep, algorithm); return pAlloc(id, bfactor, bsleep);
} }
std::vector<xmrig::CudaDevice> xmrig::CudaLib::devices() noexcept std::vector<xmrig::CudaDevice> xmrig::CudaLib::devices(int32_t bfactor, int32_t bsleep) noexcept
{ {
const uint32_t count = deviceCount(); const uint32_t count = deviceCount();
if (!count) { if (!count) {
@ -158,7 +158,7 @@ std::vector<xmrig::CudaDevice> xmrig::CudaLib::devices() noexcept
out.reserve(count); out.reserve(count);
for (uint32_t i = 0; i < count; ++i) { for (uint32_t i = 0; i < count; ++i) {
CudaDevice device(i); CudaDevice device(i, bfactor, bsleep);
if (device.isValid()) { if (device.isValid()) {
out.emplace_back(std::move(device)); out.emplace_back(std::move(device));
} }

View file

@ -72,10 +72,10 @@ public:
static const char *deviceName(nvid_ctx *ctx) noexcept; static const char *deviceName(nvid_ctx *ctx) noexcept;
static const char *pluginVersion() noexcept; static const char *pluginVersion() noexcept;
static int deviceInfo(nvid_ctx *ctx) noexcept; static int deviceInfo(nvid_ctx *ctx, int32_t blocks, int32_t threads, const Algorithm &algorithm) noexcept;
static int32_t deviceInt(nvid_ctx *ctx, DeviceProperty property) noexcept; static int32_t deviceInt(nvid_ctx *ctx, DeviceProperty property) noexcept;
static nvid_ctx *alloc(size_t id, int blocks, int threads, int bfactor, int bsleep, const Algorithm &algorithm) noexcept; static nvid_ctx *alloc(uint32_t id, int32_t bfactor, int32_t bsleep) noexcept;
static std::vector<CudaDevice> devices() noexcept; static std::vector<CudaDevice> devices(int32_t bfactor, int32_t bsleep) noexcept;
static uint32_t deviceCount() noexcept; static uint32_t deviceCount() noexcept;
static uint32_t deviceUint(nvid_ctx *ctx, DeviceProperty property) noexcept; static uint32_t deviceUint(nvid_ctx *ctx, DeviceProperty property) noexcept;
static uint32_t driverVersion() noexcept; static uint32_t driverVersion() noexcept;

View file

@ -319,7 +319,7 @@ void xmrig::OclBackend::setJob(const Job &job)
return stop(); return stop();
} }
std::vector<OclLaunchData> threads = cl.get(d_ptr->controller->miner(), job.algorithm(), d_ptr->platform, d_ptr->devices, tag); std::vector<OclLaunchData> threads = cl.get(d_ptr->controller->miner(), job.algorithm(), d_ptr->platform, d_ptr->devices);
if (!d_ptr->threads.empty() && d_ptr->threads.size() == threads.size() && std::equal(d_ptr->threads.begin(), d_ptr->threads.end(), threads.begin())) { if (!d_ptr->threads.empty() && d_ptr->threads.size() == threads.size() && std::equal(d_ptr->threads.begin(), d_ptr->threads.end(), threads.begin())) {
return; return;
} }

View file

@ -24,6 +24,7 @@
#include "backend/opencl/OclConfig.h" #include "backend/opencl/OclConfig.h"
#include "backend/common/Tags.h"
#include "backend/opencl/OclConfig_gen.h" #include "backend/opencl/OclConfig_gen.h"
#include "backend/opencl/wrappers/OclLib.h" #include "backend/opencl/wrappers/OclLib.h"
#include "base/io/json/Json.h" #include "base/io/json/Json.h"
@ -113,7 +114,7 @@ rapidjson::Value xmrig::OclConfig::toJSON(rapidjson::Document &doc) const
} }
std::vector<xmrig::OclLaunchData> xmrig::OclConfig::get(const Miner *miner, const Algorithm &algorithm, const OclPlatform &platform, const std::vector<OclDevice> &devices, const char *tag) const std::vector<xmrig::OclLaunchData> xmrig::OclConfig::get(const Miner *miner, const Algorithm &algorithm, const OclPlatform &platform, const std::vector<OclDevice> &devices) const
{ {
std::vector<OclLaunchData> out; std::vector<OclLaunchData> out;
const OclThreads &threads = m_threads.get(algorithm); const OclThreads &threads = m_threads.get(algorithm);
@ -126,7 +127,7 @@ std::vector<xmrig::OclLaunchData> xmrig::OclConfig::get(const Miner *miner, cons
for (const OclThread &thread : threads.data()) { for (const OclThread &thread : threads.data()) {
if (thread.index() >= devices.size()) { if (thread.index() >= devices.size()) {
LOG_INFO("%s" YELLOW(" skip non-existing device with index ") YELLOW_BOLD("%u"), tag, thread.index()); LOG_INFO("%s" YELLOW(" skip non-existing device with index ") YELLOW_BOLD("%u"), ocl_tag(), thread.index());
continue; continue;
} }

View file

@ -42,7 +42,7 @@ public:
OclPlatform platform() const; OclPlatform platform() const;
rapidjson::Value toJSON(rapidjson::Document &doc) const; rapidjson::Value toJSON(rapidjson::Document &doc) const;
std::vector<OclLaunchData> get(const Miner *miner, const Algorithm &algorithm, const OclPlatform &platform, const std::vector<OclDevice> &devices, const char *tag) const; std::vector<OclLaunchData> get(const Miner *miner, const Algorithm &algorithm, const OclPlatform &platform, const std::vector<OclDevice> &devices) const;
void read(const rapidjson::Value &value); void read(const rapidjson::Value &value);
inline bool isCacheEnabled() const { return m_cache; } inline bool isCacheEnabled() const { return m_cache; }

View file

@ -46,7 +46,7 @@ xmrig::OclThreads::OclThreads(const rapidjson::Value &value)
xmrig::OclThreads::OclThreads(const std::vector<OclDevice> &devices, const Algorithm &algorithm) xmrig::OclThreads::OclThreads(const std::vector<OclDevice> &devices, const Algorithm &algorithm)
{ {
for (const OclDevice &device : devices) { for (const auto &device : devices) {
device.generate(algorithm, *this); device.generate(algorithm, *this);
} }
} }