diff --git a/src/backend/cuda/CudaBackend.cpp b/src/backend/cuda/CudaBackend.cpp index b6878408..25a77336 100644 --- a/src/backend/cuda/CudaBackend.cpp +++ b/src/backend/cuda/CudaBackend.cpp @@ -46,6 +46,11 @@ #include "rapidjson/document.h" +#ifdef XMRIG_ALGO_ASTROBWT +# include "backend/cuda/runners/CudaAstroBWTRunner.h" +#endif + + #ifdef XMRIG_FEATURE_API # include "base/api/interfaces/IApiRequest.h" #endif @@ -212,6 +217,14 @@ public: Log::print(WHITE_BOLD("| # | GPU | BUS ID | I | T | B | BF | BS | MEM | NAME")); + size_t algo_l3 = algo.l3(); + +# ifdef XMRIG_ALGO_ASTROBWT + if (algo.family() == Algorithm::ASTROBWT) { + algo_l3 = CudaAstroBWTRunner::BWT_DATA_STRIDE * 17 + 1024; + } +# endif + size_t i = 0; for (const auto &data : threads) { Log::print("|" CYAN_BOLD("%3zu") " |" CYAN_BOLD("%4u") " |" YELLOW(" %7s") " |" CYAN_BOLD("%5d") " |" CYAN_BOLD("%4d") " |" @@ -224,7 +237,7 @@ public: data.thread.blocks(), data.thread.bfactor(), data.thread.bsleep(), - (data.thread.threads() * data.thread.blocks()) * algo.l3() / oneMiB, + (data.thread.threads() * data.thread.blocks()) * algo_l3 / oneMiB, data.device.name().data() ); diff --git a/src/backend/cuda/CudaConfig.cpp b/src/backend/cuda/CudaConfig.cpp index dbad4220..618aefa9 100644 --- a/src/backend/cuda/CudaConfig.cpp +++ b/src/backend/cuda/CudaConfig.cpp @@ -5,8 +5,8 @@ * Copyright 2014-2016 Wolf9466 * Copyright 2016 Jay D Dee * Copyright 2017-2018 XMR-Stak , - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig , + * Copyright 2018-2020 SChernykh + * Copyright 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -180,6 +180,7 @@ void xmrig::CudaConfig::generate() count += xmrig::generate(m_threads, devices); count += xmrig::generate(m_threads, devices); count += xmrig::generate(m_threads, devices); + count += xmrig::generate(m_threads, devices); generated = true; m_shouldSave = count > 0; diff --git a/src/backend/cuda/CudaConfig.h b/src/backend/cuda/CudaConfig.h index 3f3957e6..1c2f2e03 100644 --- a/src/backend/cuda/CudaConfig.h +++ b/src/backend/cuda/CudaConfig.h @@ -5,8 +5,8 @@ * Copyright 2014-2016 Wolf9466 * Copyright 2016 Jay D Dee * Copyright 2017-2018 XMR-Stak , - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig , + * Copyright 2018-2020 SChernykh + * Copyright 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/src/backend/cuda/CudaConfig_gen.h b/src/backend/cuda/CudaConfig_gen.h index 7db982db..a5a03110 100644 --- a/src/backend/cuda/CudaConfig_gen.h +++ b/src/backend/cuda/CudaConfig_gen.h @@ -136,6 +136,15 @@ size_t inline generate(Threads &threads, const #endif +#ifdef XMRIG_ALGO_ASTROBWT +template<> +size_t inline generate(Threads &threads, const std::vector &devices) +{ + return generate("astrobwt", threads, Algorithm::ASTROBWT_DERO, devices); +} +#endif + + } /* namespace xmrig */ diff --git a/src/backend/cuda/CudaWorker.cpp b/src/backend/cuda/CudaWorker.cpp index 6823a919..35acfc77 100644 --- a/src/backend/cuda/CudaWorker.cpp +++ b/src/backend/cuda/CudaWorker.cpp @@ -39,6 +39,11 @@ #endif +#ifdef XMRIG_ALGO_ASTROBWT +# include "backend/cuda/runners/CudaAstroBWTRunner.h" +#endif + + #include #include @@ -73,6 +78,12 @@ xmrig::CudaWorker::CudaWorker(size_t id, const CudaLaunchData &data) : case Algorithm::ARGON2: break; + case Algorithm::ASTROBWT: +# ifdef XMRIG_ALGO_ASTROBWT + m_runner = new CudaAstroBWTRunner(id, data); +# endif + break; + default: m_runner = new CudaCnRunner(id, data); break; @@ -104,7 +115,7 @@ bool xmrig::CudaWorker::selfTest() size_t xmrig::CudaWorker::intensity() const { - return m_runner ? m_runner->intensity() : 0; + return m_runner ? m_runner->roundSize() : 0; } @@ -173,7 +184,7 @@ void xmrig::CudaWorker::storeStats() return; } - m_count += intensity(); + m_count += m_runner ? m_runner->processedHashes() : 0; Worker::storeStats(); } diff --git a/src/backend/cuda/cuda.cmake b/src/backend/cuda/cuda.cmake index 8a913aaf..e01d84c0 100644 --- a/src/backend/cuda/cuda.cmake +++ b/src/backend/cuda/cuda.cmake @@ -47,6 +47,11 @@ if (WITH_CUDA) list(APPEND HEADERS_BACKEND_CUDA src/backend/cuda/runners/CudaRxRunner.h) list(APPEND SOURCES_BACKEND_CUDA src/backend/cuda/runners/CudaRxRunner.cpp) endif() + + if (WITH_ASTROBWT) + list(APPEND HEADERS_BACKEND_CUDA src/backend/cuda/runners/CudaAstroBWTRunner.h) + list(APPEND SOURCES_BACKEND_CUDA src/backend/cuda/runners/CudaAstroBWTRunner.cpp) + endif() else() remove_definitions(/DXMRIG_FEATURE_CUDA) remove_definitions(/DXMRIG_FEATURE_NVML) diff --git a/src/backend/cuda/interfaces/ICudaRunner.h b/src/backend/cuda/interfaces/ICudaRunner.h index b5772c89..25bf5af0 100644 --- a/src/backend/cuda/interfaces/ICudaRunner.h +++ b/src/backend/cuda/interfaces/ICudaRunner.h @@ -5,8 +5,8 @@ * Copyright 2014-2016 Wolf9466 * Copyright 2016 Jay D Dee * Copyright 2017-2018 XMR-Stak , - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig , + * Copyright 2018-2020 SChernykh + * Copyright 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -46,22 +46,12 @@ public: ICudaRunner() = default; virtual ~ICudaRunner() = default; -// virtual cl_context ctx() const = 0; -// virtual const Algorithm &algorithm() const = 0; -// virtual const char *buildOptions() const = 0; -// virtual const char *deviceKey() const = 0; -// virtual const char *source() const = 0; -// virtual const OclLaunchData &data() const = 0; - virtual size_t intensity() const = 0; -// virtual size_t threadId() const = 0; -// virtual uint32_t deviceIndex() const = 0; -// virtual void build() = 0; - virtual bool init() = 0; + virtual size_t intensity() const = 0; + virtual size_t roundSize() const = 0; + virtual size_t processedHashes() const = 0; + virtual bool init() = 0; virtual bool run(uint32_t startNonce, uint32_t *rescount, uint32_t *resnonce) = 0; - virtual bool set(const Job &job, uint8_t *blob) = 0; - -protected: -// virtual size_t bufferSize() const = 0; + virtual bool set(const Job &job, uint8_t *blob) = 0; }; diff --git a/src/backend/cuda/runners/CudaAstroBWTRunner.cpp b/src/backend/cuda/runners/CudaAstroBWTRunner.cpp new file mode 100644 index 00000000..af3f4d77 --- /dev/null +++ b/src/backend/cuda/runners/CudaAstroBWTRunner.cpp @@ -0,0 +1,81 @@ +/* XMRig + * Copyright 2010 Jeff Garzik + * Copyright 2012-2014 pooler + * Copyright 2014 Lucas Jones + * Copyright 2014-2016 Wolf9466 + * Copyright 2016 Jay D Dee + * Copyright 2017-2018 XMR-Stak , + * Copyright 2018-2020 SChernykh + * Copyright 2016-2020 XMRig , + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + + +#include "backend/cuda/runners/CudaAstroBWTRunner.h" +#include "backend/cuda/CudaLaunchData.h" +#include "backend/cuda/wrappers/CudaLib.h" +#include "base/net/stratum/Job.h" +#include "crypto/rx/Rx.h" +#include "crypto/rx/RxDataset.h" + + +namespace xmrig { + + + constexpr uint32_t CudaAstroBWTRunner::BWT_DATA_STRIDE; + + +} // namespace xmrig + + +xmrig::CudaAstroBWTRunner::CudaAstroBWTRunner(size_t index, const CudaLaunchData &data) : + CudaBaseRunner(index, data) +{ + m_intensity = m_data.thread.threads() * m_data.thread.blocks(); + m_intensity -= m_intensity % 32; +} + + +bool xmrig::CudaAstroBWTRunner::run(uint32_t startNonce, uint32_t *rescount, uint32_t *resnonce) +{ + return callWrapper(CudaLib::astroBWTHash(m_ctx, startNonce, m_target, rescount, resnonce)); +} + + +bool xmrig::CudaAstroBWTRunner::set(const Job &job, uint8_t *blob) +{ + if (!CudaBaseRunner::set(job, blob)) { + return false; + } + + return callWrapper(CudaLib::astroBWTPrepare(m_ctx, static_cast(m_intensity))); +} + +size_t xmrig::CudaAstroBWTRunner::roundSize() const +{ + constexpr uint32_t STAGE1_SIZE = 147253; + constexpr uint32_t STAGE1_DATA_STRIDE = (STAGE1_SIZE + 256 + 255) & ~255U; + + const uint32_t BATCH2_SIZE = m_intensity; + const uint32_t BWT_ALLOCATION_SIZE = BATCH2_SIZE * BWT_DATA_STRIDE; + const uint32_t BATCH1_SIZE = (BWT_ALLOCATION_SIZE / STAGE1_DATA_STRIDE) & ~255U; + + return BATCH1_SIZE; +} + +size_t xmrig::CudaAstroBWTRunner::processedHashes() const +{ + return CudaLib::deviceInt(m_ctx, CudaLib::DeviceAstroBWTProcessedHashes); +} diff --git a/src/backend/cuda/runners/CudaAstroBWTRunner.h b/src/backend/cuda/runners/CudaAstroBWTRunner.h new file mode 100644 index 00000000..94ea451f --- /dev/null +++ b/src/backend/cuda/runners/CudaAstroBWTRunner.h @@ -0,0 +1,60 @@ +/* XMRig + * Copyright 2010 Jeff Garzik + * Copyright 2012-2014 pooler + * Copyright 2014 Lucas Jones + * Copyright 2014-2016 Wolf9466 + * Copyright 2016 Jay D Dee + * Copyright 2017-2018 XMR-Stak , + * Copyright 2018-2020 SChernykh + * Copyright 2016-2020 XMRig , + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef XMRIG_CUDAASTROBWTRUNNER_H +#define XMRIG_CUDAASTROBWTRUNNER_H + + +#include "backend/cuda/runners/CudaBaseRunner.h" + + +namespace xmrig { + + +class CudaAstroBWTRunner : public CudaBaseRunner +{ +public: + static constexpr uint32_t BWT_DATA_MAX_SIZE = 560 * 1024 - 256; + static constexpr uint32_t BWT_DATA_STRIDE = (BWT_DATA_MAX_SIZE + 256 + 255) & ~255U; + + CudaAstroBWTRunner(size_t index, const CudaLaunchData &data); + +protected: + inline size_t intensity() const override { return m_intensity; } + inline size_t roundSize() const override; + inline size_t processedHashes() const override; + + bool run(uint32_t startNonce, uint32_t *rescount, uint32_t *resnonce) override; + bool set(const Job &job, uint8_t *blob) override; + +private: + bool m_ready = false; + size_t m_intensity = 0; +}; + + +} /* namespace xmrig */ + + +#endif // XMRIG_CUDAASTROBWTRUNNER_H diff --git a/src/backend/cuda/runners/CudaBaseRunner.h b/src/backend/cuda/runners/CudaBaseRunner.h index c0e1aef0..c9590b4e 100644 --- a/src/backend/cuda/runners/CudaBaseRunner.h +++ b/src/backend/cuda/runners/CudaBaseRunner.h @@ -50,6 +50,8 @@ protected: bool init() override; bool set(const Job &job, uint8_t *blob) override; size_t intensity() const override; + size_t roundSize() const override { return intensity(); } + size_t processedHashes() const override { return intensity(); } protected: bool callWrapper(bool result) const; diff --git a/src/backend/cuda/wrappers/CudaLib.cpp b/src/backend/cuda/wrappers/CudaLib.cpp index e6eb2757..db1ff904 100644 --- a/src/backend/cuda/wrappers/CudaLib.cpp +++ b/src/backend/cuda/wrappers/CudaLib.cpp @@ -5,8 +5,8 @@ * Copyright 2014-2016 Wolf9466 * Copyright 2016 Jay D Dee * Copyright 2017-2018 XMR-Stak , - * Copyright 2018-2019 SChernykh - * Copyright 2016-2019 XMRig , + * Copyright 2018-2020 SChernykh + * Copyright 2016-2020 XMRig , * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -47,6 +47,8 @@ static uv_lib_t cudaLib; static const char *kAlloc = "alloc"; +static const char *kAstroBWTHash = "astroBWTHash"; +static const char *kAstroBWTPrepare = "astroBWTPrepare"; static const char *kCnHash = "cnHash"; static const char *kDeviceCount = "deviceCount"; static const char *kDeviceInfo = "deviceInfo"; @@ -69,6 +71,8 @@ static const char *kVersion = "version"; using alloc_t = nvid_ctx * (*)(uint32_t, int32_t, int32_t); +using astroBWTHash_t = bool (*)(nvid_ctx *, uint32_t, uint64_t, uint32_t *, uint32_t *); +using astroBWTPrepare_t = bool (*)(nvid_ctx *, uint32_t); using cnHash_t = bool (*)(nvid_ctx *, uint32_t, uint64_t, uint64_t, uint32_t *, uint32_t *); using deviceCount_t = uint32_t (*)(); using deviceInfo_t = int32_t (*)(nvid_ctx *, int32_t, int32_t, int32_t, int32_t); @@ -90,6 +94,8 @@ using version_t = uint32_t (*)(Version); static alloc_t pAlloc = nullptr; +static astroBWTHash_t pAstroBWTHash = nullptr; +static astroBWTPrepare_t pAstroBWTPrepare = nullptr; static cnHash_t pCnHash = nullptr; static deviceCount_t pDeviceCount = nullptr; static deviceInfo_t pDeviceInfo = nullptr; @@ -145,6 +151,18 @@ void xmrig::CudaLib::close() } +bool xmrig::CudaLib::astroBWTHash(nvid_ctx *ctx, uint32_t startNonce, uint64_t target, uint32_t *rescount, uint32_t *resnonce) noexcept +{ + return pAstroBWTHash(ctx, startNonce, target, rescount, resnonce); +} + + +bool xmrig::CudaLib::astroBWTPrepare(nvid_ctx *ctx, uint32_t batchSize) noexcept +{ + return pAstroBWTPrepare(ctx, batchSize); +} + + bool xmrig::CudaLib::cnHash(nvid_ctx *ctx, uint32_t startNonce, uint64_t height, uint64_t target, uint32_t *rescount, uint32_t *resnonce) { return pCnHash(ctx, startNonce, height, target, rescount, resnonce); @@ -305,7 +323,7 @@ bool xmrig::CudaLib::load() return false; } - if (pVersion(ApiVersion) != 2u) { + if (pVersion(ApiVersion) != 3u) { return false; } @@ -327,6 +345,8 @@ bool xmrig::CudaLib::load() DLSYM(Release); DLSYM(RxHash); DLSYM(RxPrepare); + DLSYM(AstroBWTHash); + DLSYM(AstroBWTPrepare); DLSYM(Version); if (!pDeviceInfo_v2) { diff --git a/src/backend/cuda/wrappers/CudaLib.h b/src/backend/cuda/wrappers/CudaLib.h index b48e720c..cd24c94b 100644 --- a/src/backend/cuda/wrappers/CudaLib.h +++ b/src/backend/cuda/wrappers/CudaLib.h @@ -62,6 +62,7 @@ public: DevicePciDeviceID, DevicePciDomainID, DeviceDatasetHost, + DeviceAstroBWTProcessedHashes, }; static bool init(const char *fileName = nullptr); @@ -72,6 +73,8 @@ public: static inline bool isReady() noexcept { return m_ready; } static inline const String &loader() { return m_loader; } + static bool astroBWTHash(nvid_ctx *ctx, uint32_t startNonce, uint64_t target, uint32_t *rescount, uint32_t *resnonce) noexcept; + static bool astroBWTPrepare(nvid_ctx *ctx, uint32_t batchSize) noexcept; static bool cnHash(nvid_ctx *ctx, uint32_t startNonce, uint64_t height, uint64_t target, uint32_t *rescount, uint32_t *resnonce); static bool deviceInfo(nvid_ctx *ctx, int32_t blocks, int32_t threads, const Algorithm &algorithm, int32_t dataset_host = -1) noexcept; static bool deviceInit(nvid_ctx *ctx) noexcept; diff --git a/src/backend/opencl/runners/OclAstroBWTRunner.cpp b/src/backend/opencl/runners/OclAstroBWTRunner.cpp index ed28387d..71d3637e 100644 --- a/src/backend/opencl/runners/OclAstroBWTRunner.cpp +++ b/src/backend/opencl/runners/OclAstroBWTRunner.cpp @@ -70,7 +70,7 @@ xmrig::OclAstroBWTRunner::OclAstroBWTRunner(size_t index, const OclLaunchData &d m_options += " -DBWT_GROUP_SIZE=" + std::to_string(m_workgroup_size); m_bwt_allocation_size = static_cast(m_intensity) * BWT_DATA_STRIDE; - m_batch_size1 = static_cast(m_bwt_allocation_size / STAGE1_DATA_STRIDE) & ~255U; + m_batch_size1 = static_cast(m_bwt_allocation_size / STAGE1_DATA_STRIDE + 255U) & ~255U; m_bwt_data_sizes_host = new uint32_t[m_batch_size1]; } diff --git a/src/backend/opencl/runners/OclAstroBWTRunner.h b/src/backend/opencl/runners/OclAstroBWTRunner.h index f16e0001..5b337e9c 100644 --- a/src/backend/opencl/runners/OclAstroBWTRunner.h +++ b/src/backend/opencl/runners/OclAstroBWTRunner.h @@ -51,7 +51,7 @@ public: OclAstroBWTRunner(size_t index, const OclLaunchData &data); ~OclAstroBWTRunner() override; - inline uint32_t roundSize() const override { return m_batch_size1; } + inline uint32_t roundSize() const override { return static_cast(m_batch_size1); } // ~0.5% of all hashes are invalid inline uint32_t processedHashes() const override { return static_cast(m_processedHashes * 0.995); } @@ -83,7 +83,7 @@ private: uint32_t m_workgroup_size = 0; uint64_t m_bwt_allocation_size = 0; uint64_t m_batch_size1 = 0; - uint32_t m_processedHashes = 0; + uint32_t m_processedHashes = 0; uint32_t* m_bwt_data_sizes_host = nullptr; };