diff --git a/CHANGELOG.md b/CHANGELOG.md index c9909de4..68cf87ed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,111 +1,25 @@ -# v4.6.1-beta +# v5.0.0 +This version is first stable unified 3 in 1 GPU+CPU release, OpenCL support built in in miner and not require additional external dependencies on compile time, NVIDIA CUDA available as external [CUDA plugin](https://github.com/xmrig/xmrig-cuda), for convenient, 3 in 1 downloads with recent CUDA version also provided. + +This release based on 4.x.x series and include all features from v4.6.2-beta, changelog below include only the most important changes, [full changelog](doc/CHANGELOG_OLD.md) available separately. + - [#1272](https://github.com/xmrig/xmrig/pull/1272) Optimized hashrate calculation. -- [#1273](https://github.com/xmrig/xmrig/issues/1273) Fixed crash when use `GET /2/backends` API endpoint with disabled CUDA. - -# v4.6.0-beta - [#1263](https://github.com/xmrig/xmrig/pull/1263) Added new option `dataset_host` for NVIDIA GPUs with less than 4 GB memory (RandomX only). - -# v4.5.0-beta -- Added NVIDIA CUDA support via external [CUDA plugun](https://github.com/xmrig/xmrig-cuda). XMRig now is unified 3 in 1 miner. - -# v4.4.0-beta - [#1068](https://github.com/xmrig/xmrig/pull/1068) Added support for `self-select` stratum protocol extension. -- [#1240](https://github.com/xmrig/xmrig/pull/1240) Sync with the latest RandomX code. -- [#1241](https://github.com/xmrig/xmrig/issues/1241) Fixed regression with colors on old Windows systems. -- [#1243](https://github.com/xmrig/xmrig/pull/1243) Fixed incorrect OpenCL memory size detection in some cases. -- [#1247](https://github.com/xmrig/xmrig/pull/1247) Fixed ARM64 RandomX code alignment. -- [#1248](https://github.com/xmrig/xmrig/pull/1248) Fixed RandomX code cache cleanup on iOS/Darwin. - -# v4.3.1-beta -- Fixed regression in v4.3.0, miner didn't create `cn` mining profile with default config example. - -# v4.3.0-beta - [#1227](https://github.com/xmrig/xmrig/pull/1227) Added new algorithm `rx/arq`, RandomX variant for upcoming ArQmA fork. - [#808](https://github.com/xmrig/xmrig/issues/808#issuecomment-539297156) Added experimental support for persistent memory for CPU mining threads. - [#1221](https://github.com/xmrig/xmrig/issues/1221) Improved RandomX dataset memory usage and initialization speed for NUMA machines. - -# v4.2.1-beta -- [#1150](https://github.com/xmrig/xmrig/issues/1150) Fixed build on FreeBSD. - [#1175](https://github.com/xmrig/xmrig/issues/1175) Fixed support for systems where total count of NUMA nodes not equal usable nodes count. -- [#1199](https://github.com/xmrig/xmrig/issues/1199) Fixed excessive memory allocation for OpenCL threads with low intensity. -- [#1212](https://github.com/xmrig/xmrig/issues/1212) Fixed low RandomX performance after fast algorithm switching. - -# v4.2.0-beta -- [#1202](https://github.com/xmrig/xmrig/issues/1202) Fixed algorithm verification in donate strategy. -- Added per pool option `coin` with single possible value `monero` for pools without algorithm negotiation, for upcoming Monero fork. - Added config option `cpu/max-threads-hint` and command line option `--cpu-max-threads-hint`. - -# v4.1.0-beta -- **OpenCL backend disabled by default.**. -- [#1183](https://github.com/xmrig/xmrig/issues/1183) Fixed compatibility with systemd. - [#1185](https://github.com/xmrig/xmrig/pull/1185) Added JIT compiler for RandomX on ARMv8. - Improved API endpoint `GET /2/backends` and added support for this endpoint to [workers.xmrig.info](http://workers.xmrig.info). - Added command line option `--no-cpu` to disable CPU backend. - Added OpenCL specific command line options: `--opencl`, `--opencl-devices`, `--opencl-platform`, `--opencl-loader` and `--opencl-no-cache`. +- Added CUDA specific command line options: `--cuda`, `--cuda-loader` and `--no-nvml`. - Removed command line option `--http-enabled`, HTTP API enabled automatically if any other `--http-*` option provided. - -# v4.0.1-beta -- [#1177](https://github.com/xmrig/xmrig/issues/1177) Fixed compatibility with old AMD drivers. -- [#1180](https://github.com/xmrig/xmrig/issues/1180) Fixed possible duplicated shares after algorithm switching. -- Added support for case if not all backend threads successfully started. -- Fixed wrong config file permissions after write (only gcc builds on recent Windows 10 affected). - -# v4.0.0-beta - [#1172](https://github.com/xmrig/xmrig/issues/1172) **Added OpenCL mining backend.** - [#268](https://github.com/xmrig/xmrig-amd/pull/268) [#270](https://github.com/xmrig/xmrig-amd/pull/270) [#271](https://github.com/xmrig/xmrig-amd/pull/271) [#273](https://github.com/xmrig/xmrig-amd/pull/273) [#274](https://github.com/xmrig/xmrig-amd/pull/274) [#1171](https://github.com/xmrig/xmrig/pull/1171) Added RandomX support for OpenCL, thanks [@SChernykh](https://github.com/SChernykh). - Algorithm `cn/wow` removed, as no longer alive. -# v3.2.0 -- Added per pool option `coin` with single possible value `monero` for pools without algorithm negotiation, for upcoming Monero fork. -- [#1183](https://github.com/xmrig/xmrig/issues/1183) Fixed compatibility with systemd. - -# v3.1.3 -- [#1180](https://github.com/xmrig/xmrig/issues/1180) Fixed possible duplicated shares after algorithm switching. -- Fixed wrong config file permissions after write (only gcc builds on recent Windows 10 affected). - -# v3.1.2 -- Many RandomX optimizations and fixes. - - [#1132](https://github.com/xmrig/xmrig/issues/1132) Fixed build on CentOS 7. - - [#1163](https://github.com/xmrig/xmrig/pull/1163) Optimized soft AES code, up to +30% hashrate on CPU without AES support and other optimizations. - - [#1166](https://github.com/xmrig/xmrig/pull/1166) Fixed crash when initialize dataset with big threads count (eg 272). - - [#1168](https://github.com/xmrig/xmrig/pull/1168) Optimized loading from scratchpad. -- [#1128](https://github.com/xmrig/xmrig/issues/1128) Fixed CMake 2.8 compatibility. - -# v3.1.1 -- [#1133](https://github.com/xmrig/xmrig/issues/1133) Fixed syslog regression. -- [#1138](https://github.com/xmrig/xmrig/issues/1138) Fixed multiple network bugs. -- [#1141](https://github.com/xmrig/xmrig/issues/1141) Fixed log in background mode. -- [#1142](https://github.com/xmrig/xmrig/pull/1142) RandomX hashrate improved by 0.5-1.5% depending on variant and CPU. -- [#1146](https://github.com/xmrig/xmrig/pull/1146) Fixed race condition in RandomX thread init. -- [#1148](https://github.com/xmrig/xmrig/pull/1148) Fixed, on Linux linker marking entire executable as having an executable stack. -- Fixed, for Argon2 algorithms command line options like `--threads` was ignored. -- Fixed command line options for single pool, free order allowed again. - -# v3.1.0 -- [#1107](https://github.com/xmrig/xmrig/issues/1107#issuecomment-522235892) Added Argon2 algorithm family: `argon2/chukwa` and `argon2/wrkz`. - -# v3.0.0 -- **[#1111](https://github.com/xmrig/xmrig/pull/1111) Added RandomX (`rx/test`) algorithm for testing and benchmarking.** -- **[#1036](https://github.com/xmrig/xmrig/pull/1036) Added RandomWOW (`rx/wow`) algorithm for [Wownero](http://wownero.org/).** -- **[#1050](https://github.com/xmrig/xmrig/pull/1050) Added RandomXL (`rx/loki`) algorithm for [Loki](https://loki.network/).** -- **[#1077](https://github.com/xmrig/xmrig/issues/1077) Added NUMA support via hwloc**. -- **Added flexible [multi algorithm](doc/CPU.md) configuration.** -- **Added unlimited switching between incompatible algorithms, all mining options can be changed in runtime.** -- [#257](https://github.com/xmrig/xmrig-nvidia/pull/257) New logging subsystem, file and syslog now always without colors. -- [#314](https://github.com/xmrig/xmrig-proxy/issues/314) Added donate over proxy feature. -- [#1007](https://github.com/xmrig/xmrig/issues/1007) Old HTTP API backend based on libmicrohttpd, replaced to custom HTTP server (libuv + http_parser). -- [#1010](https://github.com/xmrig/xmrig/pull/1010#issuecomment-482632107) Added daemon support (solo mining). -- [#1066](https://github.com/xmrig/xmrig/issues/1066#issuecomment-518080529) Added error message if pool not ready for RandomX. -- [#1105](https://github.com/xmrig/xmrig/issues/1105) Improved auto configuration for `cn-pico` algorithm. -- Added commands `pause` and `resume` via JSON RPC 2.0 API (`POST /json_rpc`). -- Added command line option `--export-topology` for export hwloc topology to a XML file. -- Breaked backward compatibility with previous configs and command line, `variant` option replaced to `algo`, global option `algo` removed, all CPU related settings moved to `cpu` object. -- Options `av`, `safe` and `max-cpu-usage` removed. -- Algorithm `cn/msr` renamed to `cn/fast`. -- Algorithm `cn/xtl` removed. -- API endpoint `GET /1/threads` replaced to `GET /2/backends`. -- Added global uptime and extended connection information in API. -- API now return current algorithm. - # Previous versions [doc/CHANGELOG_OLD.md](doc/CHANGELOG_OLD.md) diff --git a/README.md b/README.md index ac978393..cf286f86 100644 --- a/README.md +++ b/README.md @@ -74,7 +74,7 @@ API: OpenCL backend: --opencl enable OpenCL mining backend - --opencl-devices=N list of OpenCL devices to use + --opencl-devices=N comma separated list of OpenCL devices to use --opencl-platform=N OpenCL platform index or name --opencl-loader=PATH path to OpenCL-ICD-Loader (OpenCL.dll or libOpenCL.so) --opencl-no-cache disable OpenCL cache @@ -83,6 +83,7 @@ OpenCL backend: CUDA backend: --cuda enable CUDA mining backend --cuda-loader=PATH path to CUDA plugin (xmrig-cuda.dll or libxmrig-cuda.so) + --cuda-devices=N comma separated list of CUDA devices to use --no-nvml disable NVML (NVIDIA Management Library) support Logging: diff --git a/doc/CHANGELOG_OLD.md b/doc/CHANGELOG_OLD.md index 58be062b..70059190 100644 --- a/doc/CHANGELOG_OLD.md +++ b/doc/CHANGELOG_OLD.md @@ -1,3 +1,116 @@ +# v4.6.2-beta +- [#1274](https://github.com/xmrig/xmrig/issues/1274) Added `--cuda-devices` command line option. +- [#1277](https://github.com/xmrig/xmrig/pull/1277) Fixed function names for clang on Apple. + +# v4.6.1-beta +- [#1272](https://github.com/xmrig/xmrig/pull/1272) Optimized hashrate calculation. +- [#1273](https://github.com/xmrig/xmrig/issues/1273) Fixed crash when use `GET /2/backends` API endpoint with disabled CUDA. + +# v4.6.0-beta +- [#1263](https://github.com/xmrig/xmrig/pull/1263) Added new option `dataset_host` for NVIDIA GPUs with less than 4 GB memory (RandomX only). + +# v4.5.0-beta +- Added NVIDIA CUDA support via external [CUDA plugun](https://github.com/xmrig/xmrig-cuda). XMRig now is unified 3 in 1 miner. + +# v4.4.0-beta +- [#1068](https://github.com/xmrig/xmrig/pull/1068) Added support for `self-select` stratum protocol extension. +- [#1240](https://github.com/xmrig/xmrig/pull/1240) Sync with the latest RandomX code. +- [#1241](https://github.com/xmrig/xmrig/issues/1241) Fixed regression with colors on old Windows systems. +- [#1243](https://github.com/xmrig/xmrig/pull/1243) Fixed incorrect OpenCL memory size detection in some cases. +- [#1247](https://github.com/xmrig/xmrig/pull/1247) Fixed ARM64 RandomX code alignment. +- [#1248](https://github.com/xmrig/xmrig/pull/1248) Fixed RandomX code cache cleanup on iOS/Darwin. + +# v4.3.1-beta +- Fixed regression in v4.3.0, miner didn't create `cn` mining profile with default config example. + +# v4.3.0-beta +- [#1227](https://github.com/xmrig/xmrig/pull/1227) Added new algorithm `rx/arq`, RandomX variant for upcoming ArQmA fork. +- [#808](https://github.com/xmrig/xmrig/issues/808#issuecomment-539297156) Added experimental support for persistent memory for CPU mining threads. +- [#1221](https://github.com/xmrig/xmrig/issues/1221) Improved RandomX dataset memory usage and initialization speed for NUMA machines. + +# v4.2.1-beta +- [#1150](https://github.com/xmrig/xmrig/issues/1150) Fixed build on FreeBSD. +- [#1175](https://github.com/xmrig/xmrig/issues/1175) Fixed support for systems where total count of NUMA nodes not equal usable nodes count. +- [#1199](https://github.com/xmrig/xmrig/issues/1199) Fixed excessive memory allocation for OpenCL threads with low intensity. +- [#1212](https://github.com/xmrig/xmrig/issues/1212) Fixed low RandomX performance after fast algorithm switching. + +# v4.2.0-beta +- [#1202](https://github.com/xmrig/xmrig/issues/1202) Fixed algorithm verification in donate strategy. +- Added per pool option `coin` with single possible value `monero` for pools without algorithm negotiation, for upcoming Monero fork. +- Added config option `cpu/max-threads-hint` and command line option `--cpu-max-threads-hint`. + +# v4.1.0-beta +- **OpenCL backend disabled by default.**. +- [#1183](https://github.com/xmrig/xmrig/issues/1183) Fixed compatibility with systemd. +- [#1185](https://github.com/xmrig/xmrig/pull/1185) Added JIT compiler for RandomX on ARMv8. +- Improved API endpoint `GET /2/backends` and added support for this endpoint to [workers.xmrig.info](http://workers.xmrig.info). +- Added command line option `--no-cpu` to disable CPU backend. +- Added OpenCL specific command line options: `--opencl`, `--opencl-devices`, `--opencl-platform`, `--opencl-loader` and `--opencl-no-cache`. +- Removed command line option `--http-enabled`, HTTP API enabled automatically if any other `--http-*` option provided. + +# v4.0.1-beta +- [#1177](https://github.com/xmrig/xmrig/issues/1177) Fixed compatibility with old AMD drivers. +- [#1180](https://github.com/xmrig/xmrig/issues/1180) Fixed possible duplicated shares after algorithm switching. +- Added support for case if not all backend threads successfully started. +- Fixed wrong config file permissions after write (only gcc builds on recent Windows 10 affected). + +# v4.0.0-beta +- [#1172](https://github.com/xmrig/xmrig/issues/1172) **Added OpenCL mining backend.** + - [#268](https://github.com/xmrig/xmrig-amd/pull/268) [#270](https://github.com/xmrig/xmrig-amd/pull/270) [#271](https://github.com/xmrig/xmrig-amd/pull/271) [#273](https://github.com/xmrig/xmrig-amd/pull/273) [#274](https://github.com/xmrig/xmrig-amd/pull/274) [#1171](https://github.com/xmrig/xmrig/pull/1171) Added RandomX support for OpenCL, thanks [@SChernykh](https://github.com/SChernykh). +- Algorithm `cn/wow` removed, as no longer alive. + +# v3.2.0 +- Added per pool option `coin` with single possible value `monero` for pools without algorithm negotiation, for upcoming Monero fork. +- [#1183](https://github.com/xmrig/xmrig/issues/1183) Fixed compatibility with systemd. + +# v3.1.3 +- [#1180](https://github.com/xmrig/xmrig/issues/1180) Fixed possible duplicated shares after algorithm switching. +- Fixed wrong config file permissions after write (only gcc builds on recent Windows 10 affected). + +# v3.1.2 +- Many RandomX optimizations and fixes. + - [#1132](https://github.com/xmrig/xmrig/issues/1132) Fixed build on CentOS 7. + - [#1163](https://github.com/xmrig/xmrig/pull/1163) Optimized soft AES code, up to +30% hashrate on CPU without AES support and other optimizations. + - [#1166](https://github.com/xmrig/xmrig/pull/1166) Fixed crash when initialize dataset with big threads count (eg 272). + - [#1168](https://github.com/xmrig/xmrig/pull/1168) Optimized loading from scratchpad. +- [#1128](https://github.com/xmrig/xmrig/issues/1128) Fixed CMake 2.8 compatibility. + +# v3.1.1 +- [#1133](https://github.com/xmrig/xmrig/issues/1133) Fixed syslog regression. +- [#1138](https://github.com/xmrig/xmrig/issues/1138) Fixed multiple network bugs. +- [#1141](https://github.com/xmrig/xmrig/issues/1141) Fixed log in background mode. +- [#1142](https://github.com/xmrig/xmrig/pull/1142) RandomX hashrate improved by 0.5-1.5% depending on variant and CPU. +- [#1146](https://github.com/xmrig/xmrig/pull/1146) Fixed race condition in RandomX thread init. +- [#1148](https://github.com/xmrig/xmrig/pull/1148) Fixed, on Linux linker marking entire executable as having an executable stack. +- Fixed, for Argon2 algorithms command line options like `--threads` was ignored. +- Fixed command line options for single pool, free order allowed again. + +# v3.1.0 +- [#1107](https://github.com/xmrig/xmrig/issues/1107#issuecomment-522235892) Added Argon2 algorithm family: `argon2/chukwa` and `argon2/wrkz`. + +# v3.0.0 +- **[#1111](https://github.com/xmrig/xmrig/pull/1111) Added RandomX (`rx/test`) algorithm for testing and benchmarking.** +- **[#1036](https://github.com/xmrig/xmrig/pull/1036) Added RandomWOW (`rx/wow`) algorithm for [Wownero](http://wownero.org/).** +- **[#1050](https://github.com/xmrig/xmrig/pull/1050) Added RandomXL (`rx/loki`) algorithm for [Loki](https://loki.network/).** +- **[#1077](https://github.com/xmrig/xmrig/issues/1077) Added NUMA support via hwloc**. +- **Added flexible [multi algorithm](doc/CPU.md) configuration.** +- **Added unlimited switching between incompatible algorithms, all mining options can be changed in runtime.** +- [#257](https://github.com/xmrig/xmrig-nvidia/pull/257) New logging subsystem, file and syslog now always without colors. +- [#314](https://github.com/xmrig/xmrig-proxy/issues/314) Added donate over proxy feature. +- [#1007](https://github.com/xmrig/xmrig/issues/1007) Old HTTP API backend based on libmicrohttpd, replaced to custom HTTP server (libuv + http_parser). +- [#1010](https://github.com/xmrig/xmrig/pull/1010#issuecomment-482632107) Added daemon support (solo mining). +- [#1066](https://github.com/xmrig/xmrig/issues/1066#issuecomment-518080529) Added error message if pool not ready for RandomX. +- [#1105](https://github.com/xmrig/xmrig/issues/1105) Improved auto configuration for `cn-pico` algorithm. +- Added commands `pause` and `resume` via JSON RPC 2.0 API (`POST /json_rpc`). +- Added command line option `--export-topology` for export hwloc topology to a XML file. +- Breaked backward compatibility with previous configs and command line, `variant` option replaced to `algo`, global option `algo` removed, all CPU related settings moved to `cpu` object. +- Options `av`, `safe` and `max-cpu-usage` removed. +- Algorithm `cn/msr` renamed to `cn/fast`. +- Algorithm `cn/xtl` removed. +- API endpoint `GET /1/threads` replaced to `GET /2/backends`. +- Added global uptime and extended connection information in API. +- API now return current algorithm. + # v2.99.6-beta - Added commands `pause` and `resume` via JSON RPC 2.0 API (`POST /json_rpc`). - Fixed autoconfig regression (since 2.99.5), mostly `rx/wow` was affected by this bug. diff --git a/src/backend/cuda/CudaBackend.cpp b/src/backend/cuda/CudaBackend.cpp index 812ee270..b351df75 100644 --- a/src/backend/cuda/CudaBackend.cpp +++ b/src/backend/cuda/CudaBackend.cpp @@ -155,11 +155,14 @@ public: return; } + devices = CudaLib::devices(cuda.bfactor(), cuda.bsleep(), cuda.devicesHint()); + if (devices.empty()) { + return printDisabled(kLabel, RED_S " (no devices)"); + } + Log::print(GREEN_BOLD(" * ") WHITE_BOLD("%-13s") WHITE_BOLD("%s") "/" WHITE_BOLD("%s") BLACK_BOLD("/%s"), kLabel, CudaLib::version(runtimeVersion).c_str(), CudaLib::version(driverVersion).c_str(), CudaLib::pluginVersion()); - devices = CudaLib::devices(cuda.bfactor(), cuda.bsleep()); - # ifdef XMRIG_FEATURE_NVML if (cuda.isNvmlEnabled()) { if (NvmlLib::init(cuda.nvmlLoader())) { @@ -172,7 +175,7 @@ public: ); } else { - printDisabled(kLabel, RED_S " (failed to load NVML)"); + printDisabled(kNvmlLabel, RED_S " (failed to load NVML)"); } } else { diff --git a/src/backend/cuda/CudaConfig.cpp b/src/backend/cuda/CudaConfig.cpp index 49a28d11..8f26c14c 100644 --- a/src/backend/cuda/CudaConfig.cpp +++ b/src/backend/cuda/CudaConfig.cpp @@ -78,6 +78,16 @@ rapidjson::Value xmrig::CudaConfig::toJSON(rapidjson::Document &doc) const std::vector xmrig::CudaConfig::get(const Miner *miner, const Algorithm &algorithm, const std::vector &devices) const { + auto deviceIndex = [&devices](uint32_t index) -> int { + for (uint32_t i = 0; i < devices.size(); ++i) { + if (devices[i].index() == index) { + return i; + } + } + + return -1; + }; + std::vector out; const auto &threads = m_threads.get(algorithm); @@ -85,15 +95,16 @@ std::vector xmrig::CudaConfig::get(const Miner *miner, co return out; } - out.reserve(threads.count() * 2); + out.reserve(threads.count()); for (const auto &thread : threads.data()) { - if (thread.index() >= devices.size()) { + const int index = deviceIndex(thread.index()); + if (index == -1) { LOG_INFO("%s" YELLOW(" skip non-existing device with index ") YELLOW_BOLD("%u"), cuda_tag(), thread.index()); continue; } - out.emplace_back(miner, algorithm, thread, devices[thread.index()]); + out.emplace_back(miner, algorithm, thread, devices[static_cast(index)]); } return out; @@ -153,7 +164,7 @@ void xmrig::CudaConfig::generate() return; } - const auto devices = CudaLib::devices(bfactor(), bsleep()); + const auto devices = CudaLib::devices(bfactor(), bsleep(), m_devicesHint); if (devices.empty()) { return; } diff --git a/src/backend/cuda/CudaConfig.h b/src/backend/cuda/CudaConfig.h index 77be3dd4..3f3957e6 100644 --- a/src/backend/cuda/CudaConfig.h +++ b/src/backend/cuda/CudaConfig.h @@ -43,16 +43,17 @@ public: std::vector get(const Miner *miner, const Algorithm &algorithm, const std::vector &devices) const; void read(const rapidjson::Value &value); - inline bool isEnabled() const { return m_enabled; } - inline bool isShouldSave() const { return m_shouldSave; } - inline const String &loader() const { return m_loader; } - inline const Threads &threads() const { return m_threads; } - inline int32_t bfactor() const { return m_bfactor; } - inline int32_t bsleep() const { return m_bsleep; } + inline bool isEnabled() const { return m_enabled; } + inline bool isShouldSave() const { return m_shouldSave; } + inline const std::vector &devicesHint() const { return m_devicesHint; } + inline const String &loader() const { return m_loader; } + inline const Threads &threads() const { return m_threads; } + inline int32_t bfactor() const { return m_bfactor; } + inline int32_t bsleep() const { return m_bsleep; } # ifdef XMRIG_FEATURE_NVML - inline bool isNvmlEnabled() const { return m_nvml; } - inline const String &nvmlLoader() const { return m_nvmlLoader; } + inline bool isNvmlEnabled() const { return m_nvml; } + inline const String &nvmlLoader() const { return m_nvmlLoader; } # endif private: diff --git a/src/backend/cuda/wrappers/CudaLib.cpp b/src/backend/cuda/wrappers/CudaLib.cpp index 8d22fefb..37924457 100644 --- a/src/backend/cuda/wrappers/CudaLib.cpp +++ b/src/backend/cuda/wrappers/CudaLib.cpp @@ -209,7 +209,7 @@ std::string xmrig::CudaLib::version(uint32_t version) } -std::vector xmrig::CudaLib::devices(int32_t bfactor, int32_t bsleep) noexcept +std::vector xmrig::CudaLib::devices(int32_t bfactor, int32_t bsleep, const std::vector &hints) noexcept { const uint32_t count = deviceCount(); if (!count) { @@ -219,10 +219,24 @@ std::vector xmrig::CudaLib::devices(int32_t bfactor, int32_t std::vector out; out.reserve(count); - for (uint32_t i = 0; i < count; ++i) { - CudaDevice device(i, bfactor, bsleep); - if (device.isValid()) { - out.emplace_back(std::move(device)); + if (hints.empty()) { + for (uint32_t i = 0; i < count; ++i) { + CudaDevice device(i, bfactor, bsleep); + if (device.isValid()) { + out.emplace_back(std::move(device)); + } + } + } + else { + for (const uint32_t i : hints) { + if (i >= count) { + continue; + } + + CudaDevice device(i, bfactor, bsleep); + if (device.isValid()) { + out.emplace_back(std::move(device)); + } } } diff --git a/src/backend/cuda/wrappers/CudaLib.h b/src/backend/cuda/wrappers/CudaLib.h index f18ed350..4874112f 100644 --- a/src/backend/cuda/wrappers/CudaLib.h +++ b/src/backend/cuda/wrappers/CudaLib.h @@ -85,7 +85,7 @@ public: static int32_t deviceInt(nvid_ctx *ctx, DeviceProperty property) noexcept; static nvid_ctx *alloc(uint32_t id, int32_t bfactor, int32_t bsleep) noexcept; static std::string version(uint32_t version); - static std::vector devices(int32_t bfactor, int32_t bsleep) noexcept; + static std::vector devices(int32_t bfactor, int32_t bsleep, const std::vector &hints) noexcept; static uint32_t deviceCount() noexcept; static uint32_t deviceUint(nvid_ctx *ctx, DeviceProperty property) noexcept; static uint32_t driverVersion() noexcept; diff --git a/src/base/io/json/Json.cpp b/src/base/io/json/Json.cpp index 03d4c65a..9a578293 100644 --- a/src/base/io/json/Json.cpp +++ b/src/base/io/json/Json.cpp @@ -50,7 +50,7 @@ bool xmrig::Json::getBool(const rapidjson::Value &obj, const char *key, bool def } -const char *xmrig::Json::getString(const rapidjson::Value &obj, const char *key, const char *defaultValue) +const char *xmrig::Json::getString(const rapidjson::Value &obj, const char *key, const char *defaultValue) { assert(obj.IsObject()); diff --git a/src/core/config/ConfigTransform.cpp b/src/core/config/ConfigTransform.cpp index ffd3fbd8..4ece6186 100644 --- a/src/core/config/ConfigTransform.cpp +++ b/src/core/config/ConfigTransform.cpp @@ -187,10 +187,14 @@ void xmrig::ConfigTransform::transform(rapidjson::Document &doc, int key, const # ifdef XMRIG_FEATURE_CUDA case IConfig::CudaKey: /* --cuda */ - return set(doc, kCuda, "enabled", true); + return set(doc, kCuda, kEnabled, true); case IConfig::CudaLoaderKey: /* --cuda-loader */ return set(doc, kCuda, "loader", arg); + + case IConfig::CudaDevicesKey: /* --cuda-devices */ + set(doc, kCuda, kEnabled, true); + return set(doc, kCuda, "devices-hint", arg); # endif # ifdef XMRIG_FEATURE_NVML diff --git a/src/core/config/Config_platform.h b/src/core/config/Config_platform.h index e0df3d68..05b39952 100644 --- a/src/core/config/Config_platform.h +++ b/src/core/config/Config_platform.h @@ -109,6 +109,7 @@ static const option options[] = { # ifdef XMRIG_FEATURE_CUDA { "cuda", 0, nullptr, IConfig::CudaKey }, { "cuda-loader", 1, nullptr, IConfig::CudaLoaderKey }, + { "cuda-devices", 1, nullptr, IConfig::CudaDevicesKey }, # endif # ifdef XMRIG_FEATURE_NVML { "no-nvml", 0, nullptr, IConfig::NvmlKey }, diff --git a/src/core/config/usage.h b/src/core/config/usage.h index d9ef2316..bcc3abd1 100644 --- a/src/core/config/usage.h +++ b/src/core/config/usage.h @@ -101,7 +101,7 @@ static inline const std::string &usage() # ifdef XMRIG_FEATURE_OPENCL u += "\nOpenCL backend:\n"; u += " --opencl enable OpenCL mining backend\n"; - u += " --opencl-devices=N list of OpenCL devices to use\n"; + u += " --opencl-devices=N comma separated list of OpenCL devices to use\n"; u += " --opencl-platform=N OpenCL platform index or name\n"; u += " --opencl-loader=PATH path to OpenCL-ICD-Loader (OpenCL.dll or libOpenCL.so)\n"; u += " --opencl-no-cache disable OpenCL cache\n"; @@ -112,6 +112,7 @@ static inline const std::string &usage() u += "\nCUDA backend:\n"; u += " --cuda enable CUDA mining backend\n"; u += " --cuda-loader=PATH path to CUDA plugin (xmrig-cuda.dll or libxmrig-cuda.so)\n"; + u += " --cuda-devices=N comma separated list of CUDA devices to use\n"; # endif # ifdef XMRIG_FEATURE_NVML u += " --no-nvml disable NVML (NVIDIA Management Library) support\n"; diff --git a/src/crypto/randomx/jit_compiler_a64_static.S b/src/crypto/randomx/jit_compiler_a64_static.S index 13fd5c57..37c044c8 100644 --- a/src/crypto/randomx/jit_compiler_a64_static.S +++ b/src/crypto/randomx/jit_compiler_a64_static.S @@ -25,26 +25,32 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#if defined(__APPLE__) +#define DECL(x) _##x +#else +#define DECL(x) x +#endif + .arch armv8-a .text - .global randomx_program_aarch64 - .global randomx_program_aarch64_main_loop - .global randomx_program_aarch64_vm_instructions - .global randomx_program_aarch64_imul_rcp_literals_end - .global randomx_program_aarch64_vm_instructions_end - .global randomx_program_aarch64_cacheline_align_mask1 - .global randomx_program_aarch64_cacheline_align_mask2 - .global randomx_program_aarch64_update_spMix1 - .global randomx_program_aarch64_vm_instructions_end_light - .global randomx_program_aarch64_light_cacheline_align_mask - .global randomx_program_aarch64_light_dataset_offset - .global randomx_init_dataset_aarch64 - .global randomx_init_dataset_aarch64_end - .global randomx_calc_dataset_item_aarch64 - .global randomx_calc_dataset_item_aarch64_prefetch - .global randomx_calc_dataset_item_aarch64_mix - .global randomx_calc_dataset_item_aarch64_store_result - .global randomx_calc_dataset_item_aarch64_end + .global DECL(randomx_program_aarch64) + .global DECL(randomx_program_aarch64_main_loop) + .global DECL(randomx_program_aarch64_vm_instructions) + .global DECL(randomx_program_aarch64_imul_rcp_literals_end) + .global DECL(randomx_program_aarch64_vm_instructions_end) + .global DECL(randomx_program_aarch64_cacheline_align_mask1) + .global DECL(randomx_program_aarch64_cacheline_align_mask2) + .global DECL(randomx_program_aarch64_update_spMix1) + .global DECL(randomx_program_aarch64_vm_instructions_end_light) + .global DECL(randomx_program_aarch64_light_cacheline_align_mask) + .global DECL(randomx_program_aarch64_light_dataset_offset) + .global DECL(randomx_init_dataset_aarch64) + .global DECL(randomx_init_dataset_aarch64_end) + .global DECL(randomx_calc_dataset_item_aarch64) + .global DECL(randomx_calc_dataset_item_aarch64_prefetch) + .global DECL(randomx_calc_dataset_item_aarch64_mix) + .global DECL(randomx_calc_dataset_item_aarch64_store_result) + .global DECL(randomx_calc_dataset_item_aarch64_end) # Register allocation @@ -99,7 +105,7 @@ # v31 -> scale mask = 0x81f000000000000081f0000000000000 .balign 4 -randomx_program_aarch64: +DECL(randomx_program_aarch64): # Save callee-saved registers sub sp, sp, 192 stp x16, x17, [sp] @@ -187,7 +193,7 @@ randomx_program_aarch64: ldr q14, literal_v14 ldr q15, literal_v15 -randomx_program_aarch64_main_loop: +DECL(randomx_program_aarch64_main_loop): # spAddr0 = spMix1 & ScratchpadL3Mask64; # spAddr1 = (spMix1 >> 32) & ScratchpadL3Mask64; lsr x18, x10, 32 @@ -260,7 +266,7 @@ randomx_program_aarch64_main_loop: orr v23.16b, v23.16b, v30.16b # Execute VM instructions -randomx_program_aarch64_vm_instructions: +DECL(randomx_program_aarch64_vm_instructions): # 16 KB buffer for generated instructions .fill 4096,4,0 @@ -278,7 +284,7 @@ literal_x27: .fill 1,8,0 literal_x28: .fill 1,8,0 literal_x29: .fill 1,8,0 literal_x30: .fill 1,8,0 -randomx_program_aarch64_imul_rcp_literals_end: +DECL(randomx_program_aarch64_imul_rcp_literals_end): literal_v0: .fill 2,8,0 literal_v1: .fill 2,8,0 @@ -297,14 +303,14 @@ literal_v13: .fill 2,8,0 literal_v14: .fill 2,8,0 literal_v15: .fill 2,8,0 -randomx_program_aarch64_vm_instructions_end: +DECL(randomx_program_aarch64_vm_instructions_end): # mx ^= r[readReg2] ^ r[readReg3]; eor x9, x9, x18 # Calculate dataset pointer for dataset prefetch mov w18, w9 -randomx_program_aarch64_cacheline_align_mask1: +DECL(randomx_program_aarch64_cacheline_align_mask1): # Actual mask will be inserted by JIT compiler and x18, x18, 1 add x18, x18, x1 @@ -317,12 +323,12 @@ randomx_program_aarch64_cacheline_align_mask1: # Calculate dataset pointer for dataset read mov w10, w9 -randomx_program_aarch64_cacheline_align_mask2: +DECL(randomx_program_aarch64_cacheline_align_mask2): # Actual mask will be inserted by JIT compiler and x10, x10, 1 add x10, x10, x1 -randomx_program_aarch64_xor_with_dataset_line: +DECL(randomx_program_aarch64_xor_with_dataset_line): # xor integer registers with dataset data ldp x18, x19, [x10] eor x4, x4, x18 @@ -337,7 +343,7 @@ randomx_program_aarch64_xor_with_dataset_line: eor x14, x14, x18 eor x15, x15, x19 -randomx_program_aarch64_update_spMix1: +DECL(randomx_program_aarch64_update_spMix1): # JIT compiler will replace it with "eor x10, config.readReg0, config.readReg1" eor x10, x0, x0 @@ -358,7 +364,7 @@ randomx_program_aarch64_update_spMix1: stp q18, q19, [x16, 32] subs x3, x3, 1 - bne randomx_program_aarch64_main_loop + bne DECL(randomx_program_aarch64_main_loop) # Restore x0 ldr x0, [sp], 16 @@ -392,7 +398,7 @@ randomx_program_aarch64_update_spMix1: ret -randomx_program_aarch64_vm_instructions_end_light: +DECL(randomx_program_aarch64_vm_instructions_end_light): sub sp, sp, 96 stp x0, x1, [sp, 64] stp x2, x30, [sp, 80] @@ -409,26 +415,26 @@ randomx_program_aarch64_vm_instructions_end_light: # x1 -> pointer to output mov x1, sp -randomx_program_aarch64_light_cacheline_align_mask: +DECL(randomx_program_aarch64_light_cacheline_align_mask): # Actual mask will be inserted by JIT compiler and w2, w9, 1 # x2 -> item number lsr x2, x2, 6 -randomx_program_aarch64_light_dataset_offset: +DECL(randomx_program_aarch64_light_dataset_offset): # Apply dataset offset (filled in by JIT compiler) add x2, x2, 0 add x2, x2, 0 - bl randomx_calc_dataset_item_aarch64 + bl DECL(randomx_calc_dataset_item_aarch64) mov x10, sp ldp x0, x1, [sp, 64] ldp x2, x30, [sp, 80] add sp, sp, 96 - b randomx_program_aarch64_xor_with_dataset_line + b DECL(randomx_program_aarch64_xor_with_dataset_line) @@ -439,26 +445,26 @@ randomx_program_aarch64_light_dataset_offset: # x2 -> start item # x3 -> end item -randomx_init_dataset_aarch64: +DECL(randomx_init_dataset_aarch64): # Save x30 (return address) str x30, [sp, -16]! # Load pointer to cache memory ldr x0, [x0] -randomx_init_dataset_aarch64_main_loop: - bl randomx_calc_dataset_item_aarch64 +DECL(randomx_init_dataset_aarch64_main_loop): + bl DECL(randomx_calc_dataset_item_aarch64) add x1, x1, 64 add x2, x2, 1 cmp x2, x3 - bne randomx_init_dataset_aarch64_main_loop + bne DECL(randomx_init_dataset_aarch64_main_loop) # Restore x30 (return address) ldr x30, [sp], 16 ret -randomx_init_dataset_aarch64_end: +DECL(randomx_init_dataset_aarch64_end): # Input parameters # @@ -476,7 +482,7 @@ randomx_init_dataset_aarch64_end: # x12 -> temporary # x13 -> temporary -randomx_calc_dataset_item_aarch64: +DECL(randomx_calc_dataset_item_aarch64): sub sp, sp, 112 stp x0, x1, [sp] stp x2, x3, [sp, 16] @@ -523,7 +529,7 @@ randomx_calc_dataset_item_aarch64: ldr x12, superscalarAdd7 eor x7, x0, x12 - b randomx_calc_dataset_item_aarch64_prefetch + b DECL(randomx_calc_dataset_item_aarch64_prefetch) superscalarMul0: .quad 6364136223846793005 superscalarAdd1: .quad 9298411001130361340 @@ -536,7 +542,7 @@ superscalarAdd7: .quad 9549104520008361294 # Prefetch -> SuperScalar hash -> Mix will be repeated N times -randomx_calc_dataset_item_aarch64_prefetch: +DECL(randomx_calc_dataset_item_aarch64_prefetch): # Actual mask will be inserted by JIT compiler and x11, x10, 1 add x11, x8, x11, lsl 6 @@ -544,7 +550,7 @@ randomx_calc_dataset_item_aarch64_prefetch: # Generated SuperScalar hash program goes here -randomx_calc_dataset_item_aarch64_mix: +DECL(randomx_calc_dataset_item_aarch64_mix): ldp x12, x13, [x11] eor x0, x0, x12 eor x1, x1, x13 @@ -558,7 +564,7 @@ randomx_calc_dataset_item_aarch64_mix: eor x6, x6, x12 eor x7, x7, x13 -randomx_calc_dataset_item_aarch64_store_result: +DECL(randomx_calc_dataset_item_aarch64_store_result): stp x0, x1, [x9] stp x2, x3, [x9, 16] stp x4, x5, [x9, 32] @@ -575,4 +581,4 @@ randomx_calc_dataset_item_aarch64_store_result: ret -randomx_calc_dataset_item_aarch64_end: +DECL(randomx_calc_dataset_item_aarch64_end): diff --git a/src/version.h b/src/version.h index ac6d4eec..287a92bd 100644 --- a/src/version.h +++ b/src/version.h @@ -28,15 +28,15 @@ #define APP_ID "xmrig" #define APP_NAME "XMRig" #define APP_DESC "XMRig miner" -#define APP_VERSION "4.6.1-beta-mo1" +#define APP_VERSION "5.0.0-mo1" #define APP_DOMAIN "xmrig.com" #define APP_SITE "www.xmrig.com" #define APP_COPYRIGHT "Copyright (C) 2016-2019 xmrig.com" #define APP_KIND "miner" -#define APP_VER_MAJOR 4 -#define APP_VER_MINOR 6 -#define APP_VER_PATCH 1 +#define APP_VER_MAJOR 5 +#define APP_VER_MINOR 0 +#define APP_VER_PATCH 0 #ifdef _MSC_VER # if (_MSC_VER >= 1920)