diff --git a/CHANGELOG.md b/CHANGELOG.md index feba3b05..5e908ca5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +# v4.6.0-beta +- [#1263](https://github.com/xmrig/xmrig/pull/1263) Added new option `dataset_host` for NVIDIA GPUs with less than 4 GB memory (RandomX only). + # v4.5.0-beta - Added NVIDIA CUDA support via external [CUDA plugun](https://github.com/xmrig/xmrig-cuda). XMRig now is unified 3 in 1 miner. diff --git a/src/backend/cuda/CudaThread.cpp b/src/backend/cuda/CudaThread.cpp index 3100b662..d98989ab 100644 --- a/src/backend/cuda/CudaThread.cpp +++ b/src/backend/cuda/CudaThread.cpp @@ -40,6 +40,7 @@ static const char *kBlocks = "blocks"; static const char *kBSleep = "bsleep"; static const char *kIndex = "index"; static const char *kThreads = "threads"; +static const char *kDatasetHost = "dataset_host"; } // namespace xmrig @@ -56,11 +57,19 @@ xmrig::CudaThread::CudaThread(const rapidjson::Value &value) m_bfactor = std::min(Json::getUint(value, kBFactor, m_bfactor), 12u); m_bsleep = Json::getUint(value, kBSleep, m_bsleep); m_affinity = Json::getUint64(value, kAffinity, m_affinity); + + if (Json::getValue(value, kDatasetHost).IsInt()) { + m_datasetHost = Json::getInt(value, kDatasetHost, m_datasetHost) != 0; + } + else { + m_datasetHost = Json::getBool(value, kDatasetHost); + } } xmrig::CudaThread::CudaThread(uint32_t index, nvid_ctx *ctx) : m_blocks(CudaLib::deviceInt(ctx, CudaLib::DeviceBlocks)), + m_datasetHost(CudaLib::deviceInt(ctx, CudaLib::DeviceDatasetHost)), m_threads(CudaLib::deviceInt(ctx, CudaLib::DeviceThreads)), m_index(index), m_bfactor(CudaLib::deviceUint(ctx, CudaLib::DeviceBFactor)), @@ -72,12 +81,13 @@ xmrig::CudaThread::CudaThread(uint32_t index, nvid_ctx *ctx) : bool xmrig::CudaThread::isEqual(const CudaThread &other) const { - return m_blocks == other.m_blocks && - m_threads == other.m_threads && - m_affinity == other.m_affinity && - m_index == other.m_index && - m_bfactor == other.m_bfactor && - m_bsleep == other.m_bsleep; + return m_blocks == other.m_blocks && + m_threads == other.m_threads && + m_affinity == other.m_affinity && + m_index == other.m_index && + m_bfactor == other.m_bfactor && + m_bsleep == other.m_bsleep && + m_datasetHost == other.m_datasetHost; } @@ -95,5 +105,9 @@ rapidjson::Value xmrig::CudaThread::toJSON(rapidjson::Document &doc) const out.AddMember(StringRef(kBSleep), bsleep(), allocator); out.AddMember(StringRef(kAffinity), affinity(), allocator); + if (m_datasetHost >= 0) { + out.AddMember(StringRef(kDatasetHost), m_datasetHost > 0, allocator); + } + return out; } diff --git a/src/backend/cuda/CudaThread.h b/src/backend/cuda/CudaThread.h index f6523d95..8943ac22 100644 --- a/src/backend/cuda/CudaThread.h +++ b/src/backend/cuda/CudaThread.h @@ -47,6 +47,7 @@ public: inline int32_t bfactor() const { return static_cast(m_bfactor); } inline int32_t blocks() const { return m_blocks; } inline int32_t bsleep() const { return static_cast(m_bsleep); } + inline int32_t datasetHost() const { return m_datasetHost; } inline int32_t threads() const { return m_threads; } inline int64_t affinity() const { return m_affinity; } inline uint32_t index() const { return m_index; } @@ -58,10 +59,11 @@ public: rapidjson::Value toJSON(rapidjson::Document &doc) const; private: - int32_t m_blocks = 0; - int32_t m_threads = 0; - int64_t m_affinity = -1; - uint32_t m_index = 0; + int32_t m_blocks = 0; + int32_t m_datasetHost = -1; + int32_t m_threads = 0; + int64_t m_affinity = -1; + uint32_t m_index = 0; # ifdef _WIN32 uint32_t m_bfactor = 6; diff --git a/src/backend/cuda/runners/CudaBaseRunner.cpp b/src/backend/cuda/runners/CudaBaseRunner.cpp index 032d50c9..757f91de 100644 --- a/src/backend/cuda/runners/CudaBaseRunner.cpp +++ b/src/backend/cuda/runners/CudaBaseRunner.cpp @@ -47,7 +47,7 @@ xmrig::CudaBaseRunner::~CudaBaseRunner() bool xmrig::CudaBaseRunner::init() { m_ctx = CudaLib::alloc(m_data.thread.index(), m_data.thread.bfactor(), m_data.thread.bsleep()); - if (CudaLib::deviceInfo(m_ctx, m_data.thread.blocks(), m_data.thread.threads(), m_data.algorithm) != 0) { + if (CudaLib::deviceInfo(m_ctx, m_data.thread.blocks(), m_data.thread.threads(), m_data.algorithm, m_data.thread.datasetHost()) != 0) { return false; } diff --git a/src/backend/cuda/runners/CudaRxRunner.cpp b/src/backend/cuda/runners/CudaRxRunner.cpp index 83bf21ff..20603e76 100644 --- a/src/backend/cuda/runners/CudaRxRunner.cpp +++ b/src/backend/cuda/runners/CudaRxRunner.cpp @@ -31,7 +31,8 @@ #include "crypto/rx/RxDataset.h" -xmrig::CudaRxRunner::CudaRxRunner(size_t index, const CudaLaunchData &data) : CudaBaseRunner(index, data) +xmrig::CudaRxRunner::CudaRxRunner(size_t index, const CudaLaunchData &data) : CudaBaseRunner(index, data), + m_datasetHost(data.thread.datasetHost() > 0) { m_intensity = m_data.thread.threads() * m_data.thread.blocks(); const size_t scratchpads_size = m_intensity * m_data.algorithm.l3(); @@ -59,7 +60,7 @@ bool xmrig::CudaRxRunner::set(const Job &job, uint8_t *blob) } auto dataset = Rx::dataset(job, 0); - m_ready = callWrapper(CudaLib::rxPrepare(m_ctx, dataset->raw(), dataset->size(false), m_intensity)); + m_ready = callWrapper(CudaLib::rxPrepare(m_ctx, dataset->raw(), dataset->size(false), m_datasetHost, m_intensity)); return m_ready; } diff --git a/src/backend/cuda/runners/CudaRxRunner.h b/src/backend/cuda/runners/CudaRxRunner.h index 06ed1b90..448400bc 100644 --- a/src/backend/cuda/runners/CudaRxRunner.h +++ b/src/backend/cuda/runners/CudaRxRunner.h @@ -44,8 +44,9 @@ protected: bool set(const Job &job, uint8_t *blob) override; private: - bool m_ready = false; - size_t m_intensity = 0; + bool m_ready = false; + const bool m_datasetHost = false; + size_t m_intensity = 0; }; diff --git a/src/backend/cuda/wrappers/CudaLib.cpp b/src/backend/cuda/wrappers/CudaLib.cpp index 7264d67d..8d22fefb 100644 --- a/src/backend/cuda/wrappers/CudaLib.cpp +++ b/src/backend/cuda/wrappers/CudaLib.cpp @@ -67,7 +67,7 @@ static const char *kVersion = "version"; using alloc_t = nvid_ctx * (*)(uint32_t, int32_t, int32_t); using cnHash_t = bool (*)(nvid_ctx *, uint32_t, uint64_t, uint64_t, uint32_t *, uint32_t *); using deviceCount_t = uint32_t (*)(); -using deviceInfo_t = int32_t (*)(nvid_ctx *, int32_t, int32_t, int32_t); +using deviceInfo_t = int32_t (*)(nvid_ctx *, int32_t, int32_t, int32_t, int32_t); using deviceInit_t = bool (*)(nvid_ctx *); using deviceInt_t = int32_t (*)(nvid_ctx *, CudaLib::DeviceProperty); using deviceName_t = const char * (*)(nvid_ctx *); @@ -78,7 +78,7 @@ using lastError_t = const char * (*)(nvid_ using pluginVersion_t = const char * (*)(); using release_t = void (*)(nvid_ctx *); using rxHash_t = bool (*)(nvid_ctx *, uint32_t, uint64_t, uint32_t *, uint32_t *); -using rxPrepare_t = bool (*)(nvid_ctx *, const void *, size_t, uint32_t); +using rxPrepare_t = bool (*)(nvid_ctx *, const void *, size_t, bool, uint32_t); using setJob_t = bool (*)(nvid_ctx *, const void *, size_t, int32_t); using version_t = uint32_t (*)(Version); @@ -155,9 +155,9 @@ bool xmrig::CudaLib::rxHash(nvid_ctx *ctx, uint32_t startNonce, uint64_t target, } -bool xmrig::CudaLib::rxPrepare(nvid_ctx *ctx, const void *dataset, size_t datasetSize, uint32_t batchSize) noexcept +bool xmrig::CudaLib::rxPrepare(nvid_ctx *ctx, const void *dataset, size_t datasetSize, bool dataset_host, uint32_t batchSize) noexcept { - return pRxPrepare(ctx, dataset, datasetSize, batchSize); + return pRxPrepare(ctx, dataset, datasetSize, dataset_host, batchSize); } @@ -185,9 +185,9 @@ const char *xmrig::CudaLib::pluginVersion() noexcept } -int xmrig::CudaLib::deviceInfo(nvid_ctx *ctx, int32_t blocks, int32_t threads, const Algorithm &algorithm) noexcept +int xmrig::CudaLib::deviceInfo(nvid_ctx *ctx, int32_t blocks, int32_t threads, const Algorithm &algorithm, int32_t dataset_host) noexcept { - return pDeviceInfo(ctx, blocks, threads, algorithm); + return pDeviceInfo(ctx, blocks, threads, algorithm, dataset_host); } @@ -272,7 +272,7 @@ bool xmrig::CudaLib::load() return false; } - if (pVersion(ApiVersion) != 1u) { + if (pVersion(ApiVersion) != 2u) { return false; } diff --git a/src/backend/cuda/wrappers/CudaLib.h b/src/backend/cuda/wrappers/CudaLib.h index 7fb1c1eb..10ef24fe 100644 --- a/src/backend/cuda/wrappers/CudaLib.h +++ b/src/backend/cuda/wrappers/CudaLib.h @@ -61,7 +61,8 @@ public: DeviceMemoryFree, DevicePciBusID, DevicePciDeviceID, - DevicePciDomainID + DevicePciDomainID, + DeviceDatasetHost, }; static bool init(const char *fileName = nullptr); @@ -74,12 +75,12 @@ public: static bool cnHash(nvid_ctx *ctx, uint32_t startNonce, uint64_t height, uint64_t target, uint32_t *rescount, uint32_t *resnonce); static bool deviceInit(nvid_ctx *ctx) noexcept; static bool rxHash(nvid_ctx *ctx, uint32_t startNonce, uint64_t target, uint32_t *rescount, uint32_t *resnonce) noexcept; - static bool rxPrepare(nvid_ctx *ctx, const void *dataset, size_t datasetSize, uint32_t batchSize) noexcept; + static bool rxPrepare(nvid_ctx *ctx, const void *dataset, size_t datasetSize, bool dataset_host, uint32_t batchSize) noexcept; static bool setJob(nvid_ctx *ctx, const void *data, size_t size, const Algorithm &algorithm) noexcept; static const char *deviceName(nvid_ctx *ctx) noexcept; static const char *lastError(nvid_ctx *ctx) noexcept; static const char *pluginVersion() noexcept; - static int deviceInfo(nvid_ctx *ctx, int32_t blocks, int32_t threads, const Algorithm &algorithm) noexcept; + static int deviceInfo(nvid_ctx *ctx, int32_t blocks, int32_t threads, const Algorithm &algorithm, int32_t dataset_host = -1) noexcept; static int32_t deviceInt(nvid_ctx *ctx, DeviceProperty property) noexcept; static nvid_ctx *alloc(uint32_t id, int32_t bfactor, int32_t bsleep) noexcept; static std::string version(uint32_t version);