From 83f437f9793e3aa7b5cd8e3a702ce49aa9a7ab6d Mon Sep 17 00:00:00 2001 From: XMRig Date: Wed, 30 Oct 2019 20:26:21 +0700 Subject: [PATCH] Implemented NvmlLib. --- src/backend/cuda/CudaBackend.cpp | 2 + src/backend/cuda/cuda.cmake | 1 + src/backend/cuda/wrappers/CudaDevice.cpp | 23 ++++++ src/backend/cuda/wrappers/CudaDevice.h | 22 ++++-- src/backend/cuda/wrappers/NvmlHealth.h | 43 ++++++++++ src/backend/cuda/wrappers/NvmlLib.cpp | 99 +++++++++++++++++++++++- src/backend/cuda/wrappers/NvmlLib.h | 6 +- src/backend/cuda/wrappers/nvml_lite.h | 19 ++++- 8 files changed, 203 insertions(+), 12 deletions(-) create mode 100644 src/backend/cuda/wrappers/NvmlHealth.h diff --git a/src/backend/cuda/CudaBackend.cpp b/src/backend/cuda/CudaBackend.cpp index a8cfa688..6abd79b5 100644 --- a/src/backend/cuda/CudaBackend.cpp +++ b/src/backend/cuda/CudaBackend.cpp @@ -163,6 +163,8 @@ public: # ifdef XMRIG_FEATURE_NVML if (cuda.isNvmlEnabled()) { if (NvmlLib::init(cuda.nvmlLoader())) { + NvmlLib::assign(devices); + Log::print(GREEN_BOLD(" * ") WHITE_BOLD("%-13s") WHITE_BOLD("%s") "/" GREEN_BOLD("%s"), kNvmlLabel, NvmlLib::version(), NvmlLib::driverVersion()); } diff --git a/src/backend/cuda/cuda.cmake b/src/backend/cuda/cuda.cmake index a5dd27b8..58ba3f5a 100644 --- a/src/backend/cuda/cuda.cmake +++ b/src/backend/cuda/cuda.cmake @@ -36,6 +36,7 @@ if (WITH_CUDA) list(APPEND HEADERS_BACKEND_CUDA src/backend/cuda/wrappers/nvml_lite.h + src/backend/cuda/wrappers/NvmlHealth.h src/backend/cuda/wrappers/NvmlLib.h ) diff --git a/src/backend/cuda/wrappers/CudaDevice.cpp b/src/backend/cuda/wrappers/CudaDevice.cpp index 740a063c..efacc800 100644 --- a/src/backend/cuda/wrappers/CudaDevice.cpp +++ b/src/backend/cuda/wrappers/CudaDevice.cpp @@ -30,6 +30,9 @@ #include "crypto/common/Algorithm.h" #include "rapidjson/document.h" +#ifdef XMRIG_FEATURE_NVML +# include "backend/cuda/wrappers/NvmlLib.h" +#endif #include @@ -125,5 +128,25 @@ void xmrig::CudaDevice::toJSON(rapidjson::Value &out, rapidjson::Document &doc) out.AddMember("global_mem", static_cast(globalMemSize()), allocator); out.AddMember("clock", clock(), allocator); out.AddMember("memory_clock", memoryClock(), allocator); + +# ifdef XMRIG_FEATURE_NVML + if (m_nvmlDevice) { + auto data = NvmlLib::health(m_nvmlDevice); + + Value health(kObjectType); + health.AddMember("temperature", data.temperature, allocator); + health.AddMember("power", data.power, allocator); + health.AddMember("clock", data.clock, allocator); + health.AddMember("mem_clock", data.memClock, allocator); + + Value fanSpeed(kArrayType); + for (auto speed : data.fanSpeed) { + fanSpeed.PushBack(speed, allocator); + } + health.AddMember("fan_speed", fanSpeed, allocator); + + out.AddMember("health", health, allocator); + } +# endif } #endif diff --git a/src/backend/cuda/wrappers/CudaDevice.h b/src/backend/cuda/wrappers/CudaDevice.h index 07866c82..8c624c85 100644 --- a/src/backend/cuda/wrappers/CudaDevice.h +++ b/src/backend/cuda/wrappers/CudaDevice.h @@ -30,7 +30,8 @@ #include "base/tools/String.h" -using nvid_ctx = struct nvid_ctx; +using nvid_ctx = struct nvid_ctx; +using nvmlDevice_t = struct nvmlDevice_st *; namespace xmrig { @@ -57,11 +58,16 @@ public: uint32_t smx() const; void generate(const Algorithm &algorithm, CudaThreads &threads) const; - inline bool isValid() const { return m_ctx != nullptr; } - inline const PciTopology &topology() const { return m_topology; } - inline const String &name() const { return m_name; } - inline uint32_t arch() const { return (computeCapability(true) * 10) + computeCapability(false); } - inline uint32_t index() const { return m_index; } + inline bool isValid() const { return m_ctx != nullptr; } + inline const PciTopology &topology() const { return m_topology; } + inline const String &name() const { return m_name; } + inline uint32_t arch() const { return (computeCapability(true) * 10) + computeCapability(false); } + inline uint32_t index() const { return m_index; } + +# ifdef XMRIG_FEATURE_NVML + inline nvmlDevice_t nvmlDevice() const { return m_nvmlDevice; } + inline void setNvmlDevice(nvmlDevice_t device) { m_nvmlDevice = device; } +# endif # ifdef XMRIG_FEATURE_API void toJSON(rapidjson::Value &out, rapidjson::Document &doc) const; @@ -75,6 +81,10 @@ private: nvid_ctx *m_ctx = nullptr; PciTopology m_topology; String m_name; + +# ifdef XMRIG_FEATURE_NVML + nvmlDevice_t m_nvmlDevice = nullptr; +# endif }; diff --git a/src/backend/cuda/wrappers/NvmlHealth.h b/src/backend/cuda/wrappers/NvmlHealth.h new file mode 100644 index 00000000..58c1d3eb --- /dev/null +++ b/src/backend/cuda/wrappers/NvmlHealth.h @@ -0,0 +1,43 @@ +/* XMRig + * Copyright 2010 Jeff Garzik + * Copyright 2012-2014 pooler + * Copyright 2014 Lucas Jones + * Copyright 2014-2016 Wolf9466 + * Copyright 2016 Jay D Dee + * Copyright 2017-2018 XMR-Stak , + * Copyright 2018-2019 SChernykh + * Copyright 2016-2019 XMRig , + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef XMRIG_NVMLHEALTH_H +#define XMRIG_NVMLHEALTH_H + + +#include +#include + + +struct NvmlHealth +{ + std::vector fanSpeed; + uint32_t clock = 0; + uint32_t memClock = 0; + uint32_t power = 0; + uint32_t temperature = 0; +}; + + +#endif /* XMRIG_NVMLHEALTH_H */ diff --git a/src/backend/cuda/wrappers/NvmlLib.cpp b/src/backend/cuda/wrappers/NvmlLib.cpp index 2eb731cc..cb05bdc1 100644 --- a/src/backend/cuda/wrappers/NvmlLib.cpp +++ b/src/backend/cuda/wrappers/NvmlLib.cpp @@ -39,6 +39,14 @@ namespace xmrig { static uv_lib_t nvmlLib; +static const char *kNvmlDeviceGetClockInfo = "nvmlDeviceGetClockInfo"; +static const char *kNvmlDeviceGetCount = "nvmlDeviceGetCount_v2"; +static const char *kNvmlDeviceGetFanSpeed = "nvmlDeviceGetFanSpeed"; +static const char *kNvmlDeviceGetFanSpeed_v2 = "nvmlDeviceGetFanSpeed_v2"; +static const char *kNvmlDeviceGetHandleByIndex = "nvmlDeviceGetHandleByIndex_v2"; +static const char *kNvmlDeviceGetPciInfo = "nvmlDeviceGetPciInfo_v2"; +static const char *kNvmlDeviceGetPowerUsage = "nvmlDeviceGetPowerUsage"; +static const char *kNvmlDeviceGetTemperature = "nvmlDeviceGetTemperature"; static const char *kNvmlInit = "nvmlInit_v2"; static const char *kNvmlShutdown = "nvmlShutdown"; static const char *kNvmlSystemGetDriverVersion = "nvmlSystemGetDriverVersion"; @@ -46,10 +54,18 @@ static const char *kNvmlSystemGetNVMLVersion = "nvmlSyste static const char *kSymbolNotFound = "symbol not found"; -static nvmlReturn_t (*pNvmlInit)() = nullptr; -static nvmlReturn_t (*pNvmlShutdown)() = nullptr; -static nvmlReturn_t (*pNvmlSystemGetDriverVersion)(char *version, unsigned int length) = nullptr; -static nvmlReturn_t (*pNvmlSystemGetNVMLVersion)(char *version, unsigned int length) = nullptr; +static nvmlReturn_t (*pNvmlDeviceGetClockInfo)(nvmlDevice_t device, uint32_t type, uint32_t *clock) = nullptr; +static nvmlReturn_t (*pNvmlDeviceGetCount)(uint32_t *deviceCount) = nullptr; +static nvmlReturn_t (*pNvmlDeviceGetFanSpeed_v2)(nvmlDevice_t device, uint32_t fan, uint32_t *speed) = nullptr; +static nvmlReturn_t (*pNvmlDeviceGetFanSpeed)(nvmlDevice_t device, uint32_t *speed) = nullptr; +static nvmlReturn_t (*pNvmlDeviceGetHandleByIndex)(uint32_t index, nvmlDevice_t *device) = nullptr; +static nvmlReturn_t (*pNvmlDeviceGetPciInfo)(nvmlDevice_t device, nvmlPciInfo_t *pci) = nullptr; +static nvmlReturn_t (*pNvmlDeviceGetPowerUsage)(nvmlDevice_t device, uint32_t *power) = nullptr; +static nvmlReturn_t (*pNvmlDeviceGetTemperature)(nvmlDevice_t device, uint32_t sensorType, uint32_t *temp) = nullptr; +static nvmlReturn_t (*pNvmlInit)() = nullptr; +static nvmlReturn_t (*pNvmlShutdown)() = nullptr; +static nvmlReturn_t (*pNvmlSystemGetDriverVersion)(char *version, uint32_t length) = nullptr; +static nvmlReturn_t (*pNvmlSystemGetNVMLVersion)(char *version, uint32_t length) = nullptr; #define DLSYM(x) if (uv_dlsym(&nvmlLib, k##x, reinterpret_cast(&p##x)) == -1) { throw std::runtime_error(kSymbolNotFound); } @@ -93,6 +109,72 @@ void xmrig::NvmlLib::close() } +bool xmrig::NvmlLib::assign(std::vector &devices) +{ + uint32_t count = 0; + if (pNvmlDeviceGetCount(&count) != NVML_SUCCESS) { + return false; + } + + for (uint32_t i = 0; i < count; i++) { + nvmlDevice_t nvmlDevice; + if (pNvmlDeviceGetHandleByIndex(i, &nvmlDevice) != NVML_SUCCESS) { + continue; + } + + nvmlPciInfo_t pci; + if (pNvmlDeviceGetPciInfo(nvmlDevice, &pci) != NVML_SUCCESS) { + continue; + } + + for (auto &device : devices) { + if (device.topology().bus() == pci.bus && device.topology().device() == pci.device) { + device.setNvmlDevice(nvmlDevice); + } + } + } + + return true; +} + + +NvmlHealth xmrig::NvmlLib::health(nvmlDevice_t device) +{ + if (!device) { + return {}; + } + + NvmlHealth health; + pNvmlDeviceGetTemperature(device, NVML_TEMPERATURE_GPU, &health.temperature); + pNvmlDeviceGetPowerUsage(device, &health.power); + pNvmlDeviceGetClockInfo(device, NVML_CLOCK_SM, &health.clock); + pNvmlDeviceGetClockInfo(device, NVML_CLOCK_MEM, &health.memClock); + + if (health.power) { + health.power /= 1000; + } + + uint32_t speed = 0; + + if (pNvmlDeviceGetFanSpeed_v2) { + uint32_t i = 0; + + while (pNvmlDeviceGetFanSpeed_v2(device, i, &speed) == NVML_SUCCESS) { + health.fanSpeed.push_back(speed); + ++i; + } + + } + else { + pNvmlDeviceGetFanSpeed(device, &speed); + + health.fanSpeed.push_back(speed); + } + + return health; +} + + bool xmrig::NvmlLib::dlopen() { if (!m_loader.isNull()) { @@ -117,6 +199,13 @@ bool xmrig::NvmlLib::dlopen() bool xmrig::NvmlLib::load() { try { + DLSYM(NvmlDeviceGetClockInfo); + DLSYM(NvmlDeviceGetCount); + DLSYM(NvmlDeviceGetFanSpeed); + DLSYM(NvmlDeviceGetHandleByIndex); + DLSYM(NvmlDeviceGetPciInfo); + DLSYM(NvmlDeviceGetPowerUsage); + DLSYM(NvmlDeviceGetTemperature); DLSYM(NvmlInit); DLSYM(NvmlShutdown); DLSYM(NvmlSystemGetDriverVersion); @@ -125,6 +214,8 @@ bool xmrig::NvmlLib::load() return false; } + uv_dlsym(&nvmlLib, kNvmlDeviceGetFanSpeed_v2, reinterpret_cast(&pNvmlDeviceGetFanSpeed_v2)); + if (pNvmlInit() != NVML_SUCCESS) { return false; } diff --git a/src/backend/cuda/wrappers/NvmlLib.h b/src/backend/cuda/wrappers/NvmlLib.h index 395858f5..85b80d0c 100644 --- a/src/backend/cuda/wrappers/NvmlLib.h +++ b/src/backend/cuda/wrappers/NvmlLib.h @@ -26,7 +26,8 @@ #define XMRIG_NVMLLIB_H -#include "base/tools/String.h" +#include "backend/cuda/wrappers/CudaDevice.h" +#include "backend/cuda/wrappers/NvmlHealth.h" namespace xmrig { @@ -39,6 +40,9 @@ public: static const char *lastError() noexcept; static void close(); + static bool assign(std::vector &devices); + static NvmlHealth health(nvmlDevice_t device); + static inline bool isInitialized() noexcept { return m_initialized; } static inline bool isReady() noexcept { return m_ready; } static inline const char *driverVersion() noexcept { return m_driverVersion; } diff --git a/src/backend/cuda/wrappers/nvml_lite.h b/src/backend/cuda/wrappers/nvml_lite.h index 1de6e657..4472847c 100644 --- a/src/backend/cuda/wrappers/nvml_lite.h +++ b/src/backend/cuda/wrappers/nvml_lite.h @@ -29,10 +29,27 @@ #include -#define NVML_SUCCESS 0 +#define NVML_SUCCESS 0 +#define NVML_TEMPERATURE_GPU 0 +#define NVML_CLOCK_SM 1 +#define NVML_CLOCK_MEM 2 using nvmlReturn_t = uint32_t; +using nvmlDevice_t = struct nvmlDevice_st *; + + +struct nvmlPciInfo_t +{ + char busIdLegacy[16]{}; + unsigned int domain = 0; + unsigned int bus = 0; + unsigned int device = 0; + unsigned int pciDeviceId = 0; + unsigned int pciSubSystemId = 0; + + char busId[32]{}; +}; #endif /* XMRIG_NVML_LITE_H */