Preparation for #1.4.0 (#30)

- Fixed CPU affinity on Windows for NUMA and CPUs with lot of cores
- Implemented per thread configurable Multihash mode (double, triple, quadruple, quintuple)
- Rebased from XMRig 2.4.4
This commit is contained in:
Ben Gräf 2018-01-19 19:42:06 +01:00 committed by GitHub
parent 990bf8d963
commit cf868666d4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
41 changed files with 2575 additions and 1104 deletions

3
.gitignore vendored
View file

@ -1,3 +1,5 @@
/test/CMakeFiles
/test/*/CMakeFiles
/build
/CMakeLists.txt.user
/.idea
@ -7,3 +9,4 @@
CMakeCache.txt
cmake_install.cmake
Makefile
cmake-build-release/

View file

@ -1,3 +1,7 @@
# 1.4.0
- Fixed CPU affinity on Windows for NUMA and CPUs with lot of cores
- Implemented per thread configurable Multihash mode (double, triple, quadruple, quintuple)
- Rebased from XMRig 2.4.4
# v1.3.2
- Added start xmrigDaemonr without config file and only CCServer/auth token as params needed #14
- Dashboard now uses servertime for calculation to avoid clock drifts and false offline detection

View file

@ -1,8 +1,9 @@
cmake_minimum_required(VERSION 2.8)
project(xmrig)
#set(CMAKE_BUILD_TYPE Debug)
set(CMAKE_BUILD_TYPE Release)
if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel." FORCE)
endif(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
option(WITH_LIBCPUID "Use Libcpuid" ON)
option(WITH_AEON "CryptoNight-Lite support" ON)
@ -13,57 +14,6 @@ option(WITH_CC_SERVER "CC Server" ON)
include (CheckIncludeFile)
include (cmake/cpu.cmake)
set(HEADERS
src/3rdparty/align.h
src/api/Api.h
src/api/ApiState.h
src/api/NetworkState.h
src/App.h
src/interfaces/IClientListener.h
src/interfaces/IJobResultListener.h
src/interfaces/ILogBackend.h
src/interfaces/IStrategy.h
src/interfaces/IStrategyListener.h
src/interfaces/IWorker.h
src/net/Client.h
src/net/Job.h
src/net/JobId.h
src/net/JobResult.h
src/net/Network.h
src/net/strategies/DonateStrategy.h
src/net/strategies/FailoverStrategy.h
src/net/strategies/SinglePoolStrategy.h
src/net/SubmitResult.h
src/version.h
src/workers/DoubleWorker.h
src/workers/Handle.h
src/workers/Hashrate.h
src/workers/SingleWorker.h
src/workers/Worker.h
src/workers/Workers.h
)
set(HEADERS_CRYPTO
src/crypto/c_blake256.h
src/crypto/c_groestl.h
src/crypto/c_jh.h
src/crypto/c_keccak.h
src/crypto/c_skein.h
src/crypto/CryptoNight.h
src/crypto/CryptoNight_test.h
src/crypto/groestl_tables.h
src/crypto/hash.h
src/crypto/skein_port.h
src/crypto/soft_aes.h
)
if (XMRIG_ARM)
set(HEADERS_CRYPTO "${HEADERS_CRYPTO}" src/crypto/CryptoNight_arm.h)
else()
set(HEADERS_CRYPTO "${HEADERS_CRYPTO}" src/crypto/CryptoNight_x86.h)
endif()
set(SOURCES
src/api/Api.cpp
src/api/ApiState.cpp
@ -77,10 +27,9 @@ set(SOURCES
src/net/strategies/SinglePoolStrategy.cpp
src/net/SubmitResult.cpp
src/Summary.cpp
src/workers/DoubleWorker.cpp
src/workers/MultiWorker.cpp
src/workers/Handle.cpp
src/workers/Hashrate.cpp
src/workers/SingleWorker.cpp
src/workers/Worker.cpp
src/workers/Workers.cpp
src/xmrig.cpp
@ -95,20 +44,6 @@ set(SOURCES_CRYPTO
src/crypto/CryptoNight.cpp
)
set(HEADERS_COMMON
src/interfaces/IConsoleListener.h
src/Console.h
src/Mem.h
src/Cpu.h
src/net/Url.h
src/Options.h
src/log/ConsoleLog.h
src/log/FileLog.h
src/log/Log.h
src/Platform.h
src/Summary.h
)
set(SOURCES_COMMON
src/Console.cpp
src/Mem.cpp
@ -219,11 +154,6 @@ if (WITH_CC_SERVER)
message(FATAL_ERROR "microhttpd NOT found: use `-DWITH_CC=OFF` to build without CC Server support")
endif()
set(HEADERS_CC_SERVER
src/cc/CCServer.h
src/cc/Service.h
src/cc/Httpd.h)
set(SOURCES_CC_SERVER
src/cc/CCServer.cpp
src/cc/Service.cpp
@ -234,18 +164,11 @@ if (WITH_CC_SERVER)
endif()
if (WITH_CC_CLIENT)
set(HEADERS_CC_CLIENT
src/cc/CCClient.h)
set(SOURCES_CC_CLIENT
src/cc/CCClient.cpp)
endif()
if (WITH_CC_SERVER OR WITH_CC_CLIENT)
set(HEADERS_CC_COMMON
src/cc/ControlCommand.h
src/cc/ClientStatus.h)
set(SOURCES_CC_COMMON
src/cc/ControlCommand.cpp
src/cc/ClientStatus.cpp)
@ -257,13 +180,31 @@ include_directories(src)
include_directories(src/3rdparty)
include_directories(${UV_INCLUDE_DIR})
add_executable(xmrigMiner ${HEADERS} ${SOURCES} ${HEADERS_COMMON} ${SOURCES_COMMON} ${SOURCES_OS} ${SOURCES_CPUID} ${HEADERS_CRYPTO} ${SOURCES_CRYPTO} ${SOURCES_SYSLOG} ${HTTPD_SOURCES} ${SOURCES_CC_COMMON} ${HEADERS_CC_COMMON} ${SOURCES_CC_CLIENT} ${HEADERS_CC_CLIENT})
target_link_libraries(xmrigMiner ${UV_LIBRARIES} ${MHD_LIBRARY} ${EXTRA_LIBS} ${CPUID_LIB})
add_library(xmrig_common STATIC ${SOURCES_COMMON})
add_library(xmrig_os_dependencies STATIC ${SOURCES_OS} ${SOURCES_SYSLOG})
add_library(xmrig_cpuid STATIC ${SOURCES_CPUID})
if (WITH_CC_SERVER OR WITH_CC_CLIENT)
add_library(xmrig_cc_common STATIC ${SOURCES_CC_COMMON})
endif (WITH_CC_SERVER OR WITH_CC_CLIENT)
add_executable(xmrigMiner ${SOURCES} ${SOURCES_CRYPTO} ${HTTPD_SOURCES} ${SOURCES_CC_CLIENT} res/app.rc)
target_link_libraries(xmrigMiner xmrig_common xmrig_cpuid xmrig_os_dependencies
${UV_LIBRARIES} ${MHD_LIBRARY} ${EXTRA_LIBS} ${CPUID_LIB})
if (WITH_CC_CLIENT)
target_link_libraries(xmrigMiner xmrig_cc_common)
endif (WITH_CC_CLIENT)
add_executable(xmrigDaemon src/cc/XMRigd.cpp res/app.rc)
if (WITH_CC_SERVER AND MHD_FOUND)
add_executable(xmrigCCServer ${HEADERS_COMMON} ${SOURCES_COMMON} ${SOURCES_OS} ${SOURCES_CPUID} ${SOURCES_SYSLOG} ${SOURCES_CC_COMMON} ${HEADERS_CC_COMMON} ${SOURCES_CC_SERVER} ${HEADERS_CC_SERVER})
target_link_libraries(xmrigCCServer ${UV_LIBRARIES} ${MHD_LIBRARY} ${EXTRA_LIBS} ${CPUID_LIB})
add_executable(xmrigCCServer ${SOURCES_CC_SERVER} res/app.rc)
target_link_libraries(xmrigCCServer
xmrig_common xmrig_cpuid xmrig_os_dependencies xmrig_cc_common
${UV_LIBRARIES} ${MHD_LIBRARY} ${EXTRA_LIBS} ${CPUID_LIB})
set_target_properties(xmrigCCServer PROPERTIES COMPILE_FLAGS "-DXMRIG_CC_SERVER ${SHARED_FLAGS}")
endif()
add_subdirectory(test EXCLUDE_FROM_ALL)

View file

@ -106,12 +106,13 @@ xmrigDaemon -o pool.minemonero.pro:5555 -u YOUR_WALLET -p x -k --cc-url=IP_OF_CC
-O, --userpass=U:P username:password pair for mining server
-u, --user=USERNAME username for mining server
-p, --pass=PASSWORD password for mining server
-t, --threads=N number of miner threads
-v, --av=N algorithm variation, 0 auto select
-t, --threads=N number of miner threads (0 enables automatic selection of optimal number of threads, default: 0)
-m, --multihash-factor=N number of hash blocks per thread to process at a time (0 enables automatic selection of optimal number of hash blocks, default: 0)
-A, --aesni=N selection of AES-NI mode (0 auto, 1 on, 2 off, default: 0)
-k, --keepalive send keepalived for prevent timeout (need pool support)
-r, --retries=N number of times to retry before switch to backup server (default: 5)
-R, --retry-pause=N time to pause between retries (default: 5)
--doublehash-thread-mask for av=2/4 only, limits doublehash to given threads (mask), (default: all threads)
--multihash-thread-mask for multihash-factor > 1 only, limits multihash to given threads (mask), (default: all threads)
--cpu-affinity set process affinity to CPU core(s), mask 0x3 for cores 0 and 1
--cpu-priority set process priority (0 idle, 2 normal to 5 highest)
--no-huge-pages disable huge pages support
@ -135,33 +136,51 @@ xmrigDaemon -o pool.minemonero.pro:5555 -u YOUR_WALLET -p x -k --cc-url=IP_OF_CC
-l, --log-file=FILE log all output to a file
-h, --help display this help and exit
-V, --version output version information and exit
-v, --av=N DEPRECATED - algorithm variation, 0 auto select
--doublehash-thread-mask DEPRECATED - same as multihash-thread-mask
```
Also you can use configuration via config file, default **[config.json](https://github.com/Bendr0id/xmrigCC/wiki/Config-XMRigDaemon)**. You can load multiple config files and combine it with command line options.
## Algorithm variations
* `--av=1` For CPUs with hardware AES.
* `--av=2` Lower power mode (double hash) of `1`.
* `--av=3` Software AES implementation.
* `--av=4` Lower power mode (double hash) of `3`.
## Multihash (multihash-factor)
With this option it is possible to increase the number of hashblocks calculated by a single thread in each round.
Selecting multihash-factors greater than 1 increases the L3 cache demands relative to the multihash-factor.
E.g. at multihash-factor 2, each Cryptonight thread requires 4MB and each Cryptonight-lite thread requires 2 MB of L3 cache.
With multihash-factor 3, they need 6MB or 3MB respectively.
## Doublehash thread Mask (only for low power mode (av=2 and av=4))
With this option you can limit doublehash to the given threads (mask). This can significantly improve your hashrate by using unused l3 cache. The default is to run av2/av4 mode on all threads.
Setting multihash-factor to 0 will allow automatic detection of the optimal value.
Xmrig will then try to utilize as much of the L3 cache as possible for the selected number of threads.
If the threads parameter has been set to auto, Xmrig will detect the optimal number of threads first.
After that it finds the greatest possible multihash-factor.
### Multihash for low power operation
Depending the CPU and its L3 caches, it can make sense to replace multiple single hash threads with single multi-hash counterparts.
This change might come at the price of a minor drop in effective hash-rate, yet it will also reduce heat production and power consumption of the used CPU.
### Multihash for optimal CPU exploitation
In certain environments (e.g. vServer) the system running xmrig can have access to relatively large amounts of L3 cache, but may has access to only a few CPU cores.
In such cases, running xmrig with higher multihash-factors can lead to improvements.
## Multihash thread Mask (only for multihash-factor > 1)
With this option you can limit multihash to the given threads (mask).
This can significantly improve your hashrate by using unused l3 cache.
The default is to run the configured multihash-factor on all threads.
```
{
...
"av":2,
"doublehash-thread-mask":"0x5", // in binary -> 0101
"multihash-factor":2,
"multihash-thread-mask":"0x5", // in binary -> 0101
"threads": 4,
...
}
```
This will limit doublehash mode (av=2,av=4) to thread 0 and thread 2, thread 1 and thread 3 will run in single hashmode (av=1,av=3).
This will limit multihash mode (multihash-factor = 2) to thread 0 and thread 2, thread 1 and thread 3 will run in single hashmode.
## Common Issues
@ -193,13 +212,18 @@ This will limit doublehash mode (av=2,av=4) to thread 0 and thread 2, thread 1 a
### CPU mining performance
Please note performance is highly dependent on system load. The numbers above are obtained on an idle system. Tasks heavily using a processor cache, such as video playback, can greatly degrade hashrate. Optimal number of threads depends on the size of the L3 cache of a processor, 1 thread requires 2 MB (Cryptonight) or 1MB (Cryptonigh-Lite) of cache.
Please note performance is highly dependent on system load.
The numbers above are obtained on an idle system.
Tasks heavily using a processor cache, such as video playback, can greatly degrade hashrate.
Optimal number of threads depends on the size of the L3 cache of a processor, 1 thread requires 2 MB (Cryptonight) or 1MB (Cryptonigh-Lite) of cache.
### Maximum performance checklist
* Idle operating system.
* Do not exceed optimal thread count.
* Use modern CPUs with AES-NI instruction set.
* Try setup optimal cpu affinity.
* Try decreasing number of threads while increasing multihash-factor.
Allocate unused cores and L3 cache with the help of multihash-thread-mask.
* Enable fast memory (Large/Huge pages).
## Donations

View file

@ -1,12 +0,0 @@
{
"background": false,
"colors": true,
"log-file": null,
"syslog": false,
"cc-server": {
"port": 3344, // port the CC Server will listens on
"access-token": "mySecret", // access token for CC Clients
"user": "admin", // admin user for access CC Dashboard
"pass": "pass" // admin pass for access CC Dashboard
}
}

View file

@ -386,7 +386,7 @@
var clientId = $('#minerEditor').find('.form-group')["0"].dataset.value;
var clientConfig = $('#config').val();
setClientConfig(clientId, clientConfig, false);
setClientConfig(clientId, clientConfig);
});
$('#multiMinerEditorReplace').click(function(event) {
@ -406,7 +406,7 @@
jsonClientConfig = JSON.stringify(jsonClientConfig,undefined, 2);
jsonClientConfig = jsonClientConfig.replace(new RegExp(search.trim(), 'g'), replacement.trim()).trim();
setClientConfig(clientId, jsonClientConfig, true);
setClientConfig(clientId, jsonClientConfig);
},
error: function (data) {
setError('<strong>Unable to fetch ' + clientId + '_config.json</strong> - Please make sure that you pulled the config before!');
@ -471,7 +471,7 @@
tooltip += (row.client_status.hugepages_enabled ? "enabled" : "disabled");
tooltip += '\n';
tooltip += "Used Threads: " + row.client_status.current_threads;
tooltip += (row.client_status.double_hash_mode ? " [double hash mode]" :"");
tooltip += (row.client_status.hash_factor > 1 ? " [" + row.client_status.hash_factor + "x multi hash mode]" :"");
tooltip += '\n';
tooltip += "Client IP: " + row.client_status.external_ip;
tooltip += '\n';
@ -531,26 +531,18 @@
}, 10000);
}
function setClientConfig(clientId, clientConfig, isMultiConfigEdit) {
function setClientConfig(clientId, clientConfig) {
$.ajax({
url: "/admin/setClientConfig?clientId=" + clientId,
type: 'POST',
dataType: "text",
data: clientConfig,
success: function(data){
if (isMultiConfigEdit) {
setSuccess('<strong>Successfully updated configs for the selected miners</strong> - You need push the config to the miners to apply the config.');
} else {
setSuccess('<strong>Successfully updated config for: ' + clientId + '</strong> - You need push the config to the miner to apply the config.');
}
},
error: function (data) {
if (isMultiConfigEdit) {
setError('<strong>Failed to update configs for the selected miners</strong> \nError:' + JSON.stringify(data,undefined, 2));
} else {
setError('<strong>Failed to update config for: ' + clientId + '</strong> \nError: ' + JSON.stringify(data,undefined, 2));
}
}
});
}
</script>

View file

@ -67,6 +67,7 @@ App::App(int argc, char **argv) :
m_self = this;
Cpu::init();
m_options = Options::parse(argc, argv);
if (!m_options) {
return;
@ -137,12 +138,13 @@ int App::start()
background();
if (!CryptoNight::init(m_options->algo(), m_options->algoVariant())) {
if (!CryptoNight::init(m_options->algo(), m_options->aesni())) {
LOG_ERR("\"%s\" hash self-test failed.", m_options->algoName());
return EINVAL;
}
Mem::allocate(m_options);
Summary::print();
# ifndef XMRIG_NO_API

View file

@ -5,6 +5,7 @@
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2016-2017 XMRig <support@xmrig.com>
* Copyright 2018 Sebastian Stolzenberg <https://github.com/sebastianstolzenberg>
*
*
* This program is free software: you can redistribute it and/or modify
@ -22,60 +23,116 @@
*/
#include <cmath>
#include <cstring>
#include <algorithm>
#include <memory>
#include <libcpuid.h>
#include <math.h>
#include <string.h>
#include <iostream>
#include "Cpu.h"
#include "CpuImpl.h"
bool Cpu::m_l2_exclusive = false;
char Cpu::m_brand[64] = { 0 };
int Cpu::m_flags = 0;
int Cpu::m_l2_cache = 0;
int Cpu::m_l3_cache = 0;
int Cpu::m_sockets = 1;
int Cpu::m_totalCores = 0;
int Cpu::m_totalThreads = 0;
int Cpu::optimalThreadsCount(int algo, bool doubleHash, int maxCpuUsage)
CpuImpl& CpuImpl::instance()
{
if (m_totalThreads == 1) {
return 1;
static CpuImpl cpu;
return cpu;
}
CpuImpl::CpuImpl()
: m_l2_exclusive(false)
, m_brand{ 0 }
, m_flags(0)
, m_l2_cache(0)
, m_l3_cache(0)
, m_sockets(1)
, m_totalCores(0)
, m_totalThreads(0)
{
}
void CpuImpl::optimizeParameters(size_t& threadsCount, size_t& hashFactor,
Options::Algo algo, size_t maxCpuUsage, bool safeMode)
{
// limits hashfactor to maximum possible value defined by compiler flag
hashFactor = std::min(hashFactor, static_cast<size_t>(MAX_NUM_HASH_BLOCKS));
if (!safeMode && threadsCount > 0 && hashFactor > 0)
{
// all parameters have been set manually and safe mode is off ... no optimization necessary
return;
}
int cache = 0;
size_t cache = availableCache();
size_t algoBlockSize;
switch (algo) {
case Options::ALGO_CRYPTONIGHT_LITE:
algoBlockSize = 1024;
break;
case Options::ALGO_CRYPTONIGHT:
default:
algoBlockSize = 2048;
break;
}
size_t maximumReasonableFactor = std::max(cache / algoBlockSize, static_cast<size_t>(1ul));
size_t maximumReasonableThreadCount = std::min(maximumReasonableFactor, m_totalThreads);
size_t maximumReasonableHashFactor = std::min(maximumReasonableFactor, static_cast<size_t>(MAX_NUM_HASH_BLOCKS));
if (safeMode) {
if (threadsCount > maximumReasonableThreadCount) {
threadsCount = maximumReasonableThreadCount;
}
if (hashFactor > maximumReasonableFactor / threadsCount) {
hashFactor = std::min(maximumReasonableFactor / threadsCount, maximumReasonableHashFactor);
hashFactor = std::max(hashFactor, static_cast<size_t>(1));
}
}
if (threadsCount == 0) {
if (hashFactor == 0) {
threadsCount = maximumReasonableThreadCount;
}
else {
threadsCount = std::min(maximumReasonableThreadCount,
maximumReasonableFactor / hashFactor);
}
if (maxCpuUsage < 100)
{
threadsCount = std::min(threadsCount, m_totalThreads * maxCpuUsage / 100);
}
threadsCount = std::max(threadsCount, static_cast<size_t>(1));
}
if (hashFactor == 0) {
hashFactor = std::min(maximumReasonableHashFactor, maximumReasonableFactor / threadsCount);
hashFactor = std::max(hashFactor, static_cast<size_t>(1));
}
}
bool CpuImpl::hasAES()
{
return (m_flags & Cpu::AES) != 0;
}
bool CpuImpl::isX64()
{
return (m_flags & Cpu::X86_64) != 0;
}
size_t CpuImpl::availableCache()
{
size_t cache = 0;
if (m_l3_cache) {
cache = m_l2_exclusive ? (m_l2_cache + m_l3_cache) : m_l3_cache;
}
else {
cache = m_l2_cache;
}
int count = 0;
const int size = (algo ? 1024 : 2048) * (doubleHash ? 2 : 1);
if (cache) {
count = cache / size;
}
else {
count = m_totalThreads / 2;
}
if (count > m_totalThreads) {
count = m_totalThreads;
}
if (((float) count / m_totalThreads * 100) > maxCpuUsage) {
count = (int) ceil((float) m_totalThreads * (maxCpuUsage / 100.0));
}
return count < 1 ? 1 : count;
return cache;
}
void Cpu::initCommon()
void CpuImpl::initCommon()
{
struct cpu_raw_data_t raw = { 0 };
struct cpu_id_t data = { 0 };
@ -105,14 +162,75 @@ void Cpu::initCommon()
}
# if defined(__x86_64__) || defined(_M_AMD64)
m_flags |= X86_64;
m_flags |= Cpu::X86_64;
# endif
if (data.flags[CPU_FEATURE_AES]) {
m_flags |= AES;
m_flags |= Cpu::AES;
}
if (data.flags[CPU_FEATURE_BMI2]) {
m_flags |= BMI2;
m_flags |= Cpu::BMI2;
}
}
void Cpu::init()
{
CpuImpl::instance().init();
}
void Cpu::optimizeParameters(size_t& threadsCount, size_t& hashFactor, Options::Algo algo,
size_t maxCpuUsage, bool safeMode)
{
CpuImpl::instance().optimizeParameters(threadsCount, hashFactor, algo, maxCpuUsage, safeMode);
}
void Cpu::setAffinity(int id, uint64_t mask)
{
CpuImpl::instance().setAffinity(id, mask);
}
bool Cpu::hasAES()
{
return CpuImpl::instance().hasAES();
}
bool Cpu::isX64()
{
return CpuImpl::instance().isX64();
}
const char* Cpu::brand()
{
return CpuImpl::instance().brand();
}
size_t Cpu::cores()
{
return CpuImpl::instance().cores();
}
size_t Cpu::l2()
{
return CpuImpl::instance().l2();
}
size_t Cpu::l3()
{
return CpuImpl::instance().l3();
}
size_t Cpu::sockets()
{
return CpuImpl::instance().sockets();
}
size_t Cpu::threads()
{
return CpuImpl::instance().threads();
}
size_t Cpu::availableCache()
{
return CpuImpl::instance().availableCache();
}

View file

@ -24,9 +24,9 @@
#ifndef __CPU_H__
#define __CPU_H__
#include <cstdint>
#include <stdint.h>
#include "Options.h"
class Cpu
{
@ -37,30 +37,22 @@ public:
BMI2 = 4
};
static int optimalThreadsCount(int algo, bool doubleHash, int maxCpuUsage);
static void init();
static void optimizeParameters(size_t& threadsCount, size_t& hashFactor, Options::Algo algo,
size_t maxCpuUsage, bool safeMode);
static void setAffinity(int id, uint64_t mask);
static inline bool hasAES() { return (m_flags & AES) != 0; }
static inline bool isX64() { return (m_flags & X86_64) != 0; }
static inline const char *brand() { return m_brand; }
static inline int cores() { return m_totalCores; }
static inline int l2() { return m_l2_cache; }
static inline int l3() { return m_l3_cache; }
static inline int sockets() { return m_sockets; }
static inline int threads() { return m_totalThreads; }
private:
static void initCommon();
static bool m_l2_exclusive;
static char m_brand[64];
static int m_flags;
static int m_l2_cache;
static int m_l3_cache;
static int m_sockets;
static int m_totalCores;
static int m_totalThreads;
static bool hasAES();
static bool isX64();
static const char *brand();
static size_t l2();
static size_t l3();
static size_t cores();
static size_t sockets();
static size_t threads();
static size_t availableCache();
};

View file

@ -5,7 +5,7 @@
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2016-2017 XMRig <support@xmrig.com>
*
* Copyright 2018 Sebastian Stolzenberg <https://github.com/sebastianstolzenberg>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@ -21,38 +21,47 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __DOUBLEWORKER_H__
#define __DOUBLEWORKER_H__
#ifndef __CPU_IMPL_H__
#define __CPU_IMPL_H__
#include "align.h"
#include "net/Job.h"
#include "net/JobResult.h"
#include "workers/Worker.h"
#include <cstdint>
#include <vector>
#include "Options.h"
class Handle;
class DoubleWorker : public Worker
class CpuImpl
{
public:
DoubleWorker(Handle *handle);
~DoubleWorker();
static CpuImpl& instance();
CpuImpl();
void init();
void start() override;
void optimizeParameters(size_t& threadsCount, size_t& hashFactor, Options::Algo algo,
size_t maxCpuUsage, bool safeMode);
void setAffinity(int id, uint64_t mask);
bool hasAES();
bool isX64();
const char *brand() { return m_brand; }
size_t l2() { return m_l2_cache; }
size_t l3() { return m_l3_cache; }
size_t cores() { return m_totalCores; }
size_t sockets() { return m_sockets; }
size_t threads() { return m_totalThreads; }
size_t availableCache();
private:
bool resume(const Job &job);
void consumeJob();
void save(const Job &job);
void initCommon();
class State;
uint8_t m_hash[64];
State *m_state;
State *m_pausedState;
bool m_l2_exclusive;
char m_brand[64];
int m_flags;
size_t m_l2_cache;
size_t m_l3_cache;
size_t m_sockets;
size_t m_totalCores;
size_t m_totalThreads;
};
#endif /* __SINGLEWORKER_H__ */
#endif /* __CPU_IMPL_H__ */

View file

@ -27,23 +27,18 @@
#include "Cpu.h"
char Cpu::m_brand[64] = { 0 };
int Cpu::m_flags = 0;
int Cpu::m_l2_cache = 0;
int Cpu::m_l3_cache = 0;
int Cpu::m_sockets = 1;
int Cpu::m_totalCores = 0;
int Cpu::m_totalThreads = 0;
int Cpu::optimalThreadsCount(int algo, bool doubleHash, int maxCpuUsage)
void CpuImpl::init()
{
return m_totalThreads;
m_brand = {0};
m_flags = 0;
m_l2_cache = 0;
m_l3_cache = 0;
m_sockets = 1;
m_totalCores = 0;
m_totalThreads = 0;
}
void Cpu::initCommon()
void CpuImpl::initCommon()
{
memcpy(m_brand, "Unknown", 7);

View file

@ -30,7 +30,7 @@
#include "Cpu.h"
void Cpu::init()
void CpuImpl::init()
{
# ifdef XMRIG_NO_LIBCPUID
m_totalThreads = sysconf(_SC_NPROCESSORS_CONF);
@ -40,6 +40,6 @@ void Cpu::init()
}
void Cpu::setAffinity(int id, uint64_t mask)
void CpuImpl::setAffinity(int id, uint64_t mask)
{
}

View file

@ -108,13 +108,6 @@ int Cpu::m_totalCores = 0;
int Cpu::m_totalThreads = 0;
int Cpu::optimalThreadsCount(int algo, bool doubleHash, int maxCpuUsage)
{
int count = m_totalThreads / 2;
return count < 1 ? 1 : count;
}
void Cpu::initCommon()
{
cpu_brand_string(m_brand);

View file

@ -36,7 +36,7 @@
#include <string.h>
#include "Cpu.h"
#include "CpuImpl.h"
#ifdef __FreeBSD__
@ -44,7 +44,7 @@ typedef cpuset_t cpu_set_t;
#endif
void Cpu::init()
void CpuImpl::init()
{
# ifdef XMRIG_NO_LIBCPUID
m_totalThreads = sysconf(_SC_NPROCESSORS_CONF);
@ -54,12 +54,12 @@ void Cpu::init()
}
void Cpu::setAffinity(int id, uint64_t mask)
void CpuImpl::setAffinity(int id, uint64_t mask)
{
cpu_set_t set;
CPU_ZERO(&set);
for (int i = 0; i < m_totalThreads; i++) {
for (int i = 0; i < threads(); i++) {
if (mask & (1UL << i)) {
CPU_SET(i, &set);
}

View file

@ -5,6 +5,7 @@
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2016-2017 XMRig <support@xmrig.com>
* Copyright 2018 BenDroid <ben@graef.in>
*
*
* This program is free software: you can redistribute it and/or modify
@ -25,10 +26,10 @@
#include <windows.h>
#include "Cpu.h"
#include "CpuImpl.h"
#include "Mem.h"
void Cpu::init()
void CpuImpl::init()
{
# ifdef XMRIG_NO_LIBCPUID
SYSTEM_INFO sysinfo;
@ -41,12 +42,24 @@ void Cpu::init()
}
void Cpu::setAffinity(int id, uint64_t mask)
void CpuImpl::setAffinity(int id, uint64_t mask)
{
if (id == -1) {
SetProcessAffinityMask(GetCurrentProcess(), mask);
} else {
Mem::ThreadBitSet threadAffinityMask = Mem::ThreadBitSet(mask);
int threadCount = 0;
for (int i = 0; i < m_totalThreads; i++) {
if (threadAffinityMask.test(i)) {
if (threadCount == id) {
SetThreadAffinityMask(GetCurrentThread(), 1ULL << i);
break;
}
threadCount++;
}
}
else {
SetThreadAffinityMask(GetCurrentThread(), mask);
}
}

View file

@ -27,16 +27,15 @@
#include "crypto/CryptoNight.h"
#include "Mem.h"
#include "Options.h"
bool Mem::m_doubleHash = false;
int Mem::m_algo = 0;
int Mem::m_flags = 0;
int Mem::m_threads = 0;
size_t Mem::m_hashFactor = 1;
size_t Mem::m_threads = 0;
size_t Mem::m_memorySize = 0;
uint8_t *Mem::m_memory = nullptr;
int64_t Mem::m_doubleHashThreadMask = -1L;
Mem::ThreadBitSet Mem::m_multiHashThreadMask = Mem::ThreadBitSet(-1L);
cryptonight_ctx *Mem::create(int threadId)
{
@ -45,7 +44,7 @@ cryptonight_ctx *Mem::create(int threadId)
size_t offset = 0;
for (int i=0; i < threadId; i++) {
offset += sizeof(cryptonight_ctx);
offset += isDoubleHash(i) ? scratchPadSize*2 : scratchPadSize;
offset += scratchPadSize * getThreadHashFactor(i);
}
auto* ctx = reinterpret_cast<cryptonight_ctx *>(&m_memory[offset]);

View file

@ -5,6 +5,8 @@
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2016-2017 XMRig <support@xmrig.com>
* Copyright 2018 Sebastian Stolzenberg <https://github.com/sebastianstolzenberg>
* Copyright 2018 BenDroid <ben@graef.in>
*
*
* This program is free software: you can redistribute it and/or modify
@ -25,8 +27,9 @@
#define __MEM_H__
#include <stddef.h>
#include <stdint.h>
#include <cstddef>
#include <cstdint>
#include <bitset>
#include "align.h"
#include "Options.h"
@ -37,6 +40,7 @@ struct cryptonight_ctx;
class Mem
{
public:
typedef std::bitset<64> ThreadBitSet;
enum Flags {
HugepagesAvailable = 1,
HugepagesEnabled = 2,
@ -47,18 +51,23 @@ public:
static cryptonight_ctx *create(int threadId);
static void release();
static inline bool isDoubleHash(int threadId) { return m_doubleHash && (m_doubleHashThreadMask == -1L || ((m_doubleHashThreadMask >> threadId) & 1)); }
static inline size_t hashFactor() { return m_hashFactor; }
static inline size_t getThreadHashFactor(int threadId)
{
return (m_multiHashThreadMask.all() ||
m_multiHashThreadMask.test(threadId)) ? m_hashFactor : 1;
}
static inline bool isHugepagesAvailable() { return (m_flags & HugepagesAvailable) != 0; }
static inline bool isHugepagesEnabled() { return (m_flags & HugepagesEnabled) != 0; }
static inline int flags() { return m_flags; }
static inline int threads() { return m_threads; }
static inline size_t threads() { return m_threads; }
private:
static bool m_doubleHash;
static size_t m_hashFactor;
static size_t m_threads;
static int m_algo;
static int m_flags;
static int m_threads;
static int64_t m_doubleHashThreadMask;
static ThreadBitSet m_multiHashThreadMask;
static size_t m_memorySize;
VAR_ALIGN(16, static uint8_t *m_memory);
};

View file

@ -22,7 +22,7 @@
*/
#include <stdlib.h>
#include <cstdlib>
#include <sys/mman.h>
@ -36,26 +36,20 @@
#include "crypto/CryptoNight.h"
#include "log/Log.h"
#include "Mem.h"
#include "Options.h"
bool Mem::allocate(const Options* options)
{
m_algo = options->algo();
m_threads = options->threads();
m_doubleHash = options->doubleHash();
m_doubleHashThreadMask = options->doubleHashThreadMask();
m_hashFactor = options->hashFactor();
m_multiHashThreadMask = Mem::ThreadBitSet(options->multiHashThreadMask());
m_memorySize = 0;
size_t scratchPadSize = m_algo == Options::ALGO_CRYPTONIGHT ? MEMORY : MEMORY_LITE;
for (int i=0; i < m_threads; i++) {
m_memorySize += sizeof(cryptonight_ctx);
if (isDoubleHash(i)) {
m_memorySize += scratchPadSize*2;
} else {
m_memorySize += scratchPadSize;
}
m_memorySize += scratchPadSize * getThreadHashFactor(i);
}
if (!options->hugePages()) {
@ -70,7 +64,7 @@ bool Mem::allocate(const Options* options)
# elif defined(__FreeBSD__)
m_memory = static_cast<uint8_t*>(mmap(0, m_memorySize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED_SUPER | MAP_PREFAULT_READ, -1, 0));
# else
m_memory = static_cast<uint8_t*>(mmap(0, m_memorySize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE, 0, 0));
m_memory = static_cast<uint8_t*>(mmap(nullptr, m_memorySize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE, 0, 0));
# endif
if (m_memory == MAP_FAILED) {
m_memory = static_cast<uint8_t*>(_mm_malloc(m_memorySize, 16));

View file

@ -148,19 +148,14 @@ bool Mem::allocate(const Options* options)
{
m_algo = options->algo();
m_threads = options->threads();
m_doubleHash = options->doubleHash();
m_doubleHashThreadMask = options->doubleHashThreadMask();
m_hashFactor = options->hashFactor();
m_multiHashThreadMask = Mem::ThreadBitSet(options->multiHashThreadMask());
m_memorySize = 0;
size_t scratchPadSize = m_algo == Options::ALGO_CRYPTONIGHT ? MEMORY : MEMORY_LITE;
for (int i=0; i < m_threads; i++) {
m_memorySize += sizeof(cryptonight_ctx);
if (isDoubleHash(i)) {
m_memorySize += scratchPadSize*2;
} else {
m_memorySize += scratchPadSize;
}
m_memorySize += scratchPadSize * getThreadHashFactor(i);
}
m_memorySize = m_memorySize - (m_memorySize % MEMORY) + MEMORY;

View file

@ -23,10 +23,9 @@
*/
#include <string.h>
#include <cstring>
#include <uv.h>
#ifdef _MSC_VER
# include "getopt/getopt.h"
#else
@ -75,7 +74,7 @@ Options:\n"
-k, --keepalive send keepalived for prevent timeout (need pool support)\n\
-r, --retries=N number of times to retry before switch to backup server (default: 5)\n\
-R, --retry-pause=N time to pause between retries (default: 5)\n\
--doublehash-thread-mask for av=2/4 only, limits doublehash to given threads (mask), (default: all threads)\n\
--multihash-thread-mask for av=2/4 only, limits multihash to given threads (mask), (default: all threads)\n\
--cpu-affinity set process affinity to CPU core(s), mask 0x3 for cores 0 and 1\n\
--cpu-priority set process priority (0 idle, 2 normal to 5 highest)\n\
--no-huge-pages disable huge pages support\n\
@ -127,6 +126,8 @@ static char const short_options[] = "a:c:khBp:Px:r:R:s:t:T:o:u:O:v:Vl:S";
static struct option const options[] = {
{ "algo", 1, nullptr, 'a' },
{ "av", 1, nullptr, 'v' },
{ "aesni", 1, nullptr, 'A' },
{ "multihash-factor", 1, nullptr, 'm' },
{ "background", 0, nullptr, 'B' },
{ "config", 1, nullptr, 'c' },
{ "cpu-affinity", 1, nullptr, 1020 },
@ -165,13 +166,16 @@ static struct option const options[] = {
{ "cc-custom-dashboard", 1, nullptr, 4010 },
{ "daemonized", 0, nullptr, 4011 },
{ "doublehash-thread-mask", 1, nullptr, 4013 },
{ 0, 0, 0, 0 }
{ "multihash-thread-mask", 1, nullptr, 4013 },
{ nullptr, 0, nullptr, 0 }
};
static struct option const config_options[] = {
{ "algo", 1, nullptr, 'a' },
{ "av", 1, nullptr, 'v' },
{ "aesni", 1, nullptr, 'A' },
{ "multihash-factor", 1, nullptr, 'm' },
{ "background", 0, nullptr, 'B' },
{ "colors", 0, nullptr, 2000 },
{ "cpu-affinity", 1, nullptr, 1020 },
@ -188,7 +192,8 @@ static struct option const config_options[] = {
{ "threads", 1, nullptr, 't' },
{ "user-agent", 1, nullptr, 1008 },
{ "doublehash-thread-mask", 1, nullptr, 4013 },
{ 0, 0, 0, 0 }
{ "multihash-thread-mask", 1, nullptr, 4013 },
{ nullptr, 0, nullptr, 0 }
};
@ -199,7 +204,7 @@ static struct option const pool_options[] = {
{ "userpass", 1, nullptr, 'O' },
{ "keepalive", 0, nullptr ,'k' },
{ "nicehash", 0, nullptr, 1006 },
{ 0, 0, 0, 0 }
{ nullptr, 0, nullptr, 0 }
};
@ -207,7 +212,7 @@ static struct option const api_options[] = {
{ "port", 1, nullptr, 4000 },
{ "access-token", 1, nullptr, 4001 },
{ "worker-id", 1, nullptr, 4002 },
{ 0, 0, 0, 0 }
{ nullptr, 0, nullptr, 0 }
};
@ -216,7 +221,7 @@ static struct option const cc_client_options[] = {
{ "access-token", 1, nullptr, 4004 },
{ "worker-id", 1, nullptr, 4005 },
{ "update-interval-s", 1, nullptr, 4012 },
{ 0, 0, 0, 0 }
{ nullptr, 0, nullptr, 0 }
};
static struct option const cc_server_options[] = {
@ -226,7 +231,7 @@ static struct option const cc_server_options[] = {
{ "pass", 1, nullptr, 4008 },
{ "client-config-folder", 1, nullptr, 4009 },
{ "custom-dashboard", 1, nullptr, 4010 },
{ 0, 0, 0, 0 }
{ nullptr, 0, nullptr, 0 }
};
static const char *algo_names[] = {
@ -239,7 +244,7 @@ static const char *algo_names[] = {
Options *Options::parse(int argc, char **argv)
{
Options *options = new Options(argc, argv);
auto options = new Options(argc, argv);
if (options->isReady()) {
m_self = options;
return m_self;
@ -259,7 +264,6 @@ const char *Options::algoName() const
Options::Options(int argc, char **argv) :
m_background(false),
m_colors(true),
m_doubleHash(false),
m_hugePages(true),
m_ready(false),
m_safe(false),
@ -277,8 +281,10 @@ Options::Options(int argc, char **argv) :
m_ccAdminPass(nullptr),
m_ccClientConfigFolder(nullptr),
m_ccCustomDashboard(nullptr),
m_algo(0),
m_algoVariant(0),
m_algo(ALGO_CRYPTONIGHT),
m_algoVariant(AV0_AUTO),
m_aesni(AESNI_AUTO),
m_hashFactor(0),
m_apiPort(0),
m_donateLevel(kDonateLevel),
m_maxCpuUsage(75),
@ -290,14 +296,14 @@ Options::Options(int argc, char **argv) :
m_ccUpdateInterval(10),
m_ccPort(0),
m_affinity(-1L),
m_doubleHashThreadMask(-1L)
m_multiHashThreadMask(-1L)
{
m_pools.push_back(new Url());
int key;
while (1) {
key = getopt_long(argc, argv, short_options, options, NULL);
while (true) {
key = getopt_long(argc, argv, short_options, options, nullptr);
if (key < 0) {
break;
}
@ -337,22 +343,9 @@ Options::Options(int argc, char **argv) :
fprintf(stderr, "Neither pool nor CCServer URL supplied. Exiting.\n");
return;
}
m_algoVariant = getAlgoVariant();
if (m_algoVariant == AV2_AESNI_DOUBLE || m_algoVariant == AV4_SOFT_AES_DOUBLE) {
m_doubleHash = true;
}
#endif
if (!m_threads) {
m_threads = Cpu::optimalThreadsCount(m_algo, m_doubleHash, m_maxCpuUsage);
}
else if (m_safe) {
const int count = Cpu::optimalThreadsCount(m_algo, m_doubleHash, m_maxCpuUsage);
if (m_threads > count) {
m_threads = count;
}
}
optimizeAlgorithmConfiguration();
for (Url *url : m_pools) {
url->applyExceptions();
@ -407,7 +400,7 @@ bool Options::parseArg(int key, const char *arg)
case 'o': /* --url */
if (m_pools.size() > 1 || m_pools[0]->isValid()) {
Url *url = new Url(arg);
auto url = new Url(arg);
if (url->isValid()) {
m_pools.push_back(url);
}
@ -494,6 +487,8 @@ bool Options::parseArg(int key, const char *arg)
case 'r': /* --retries */
case 'R': /* --retry-pause */
case 'v': /* --av */
case 'A': /* --aesni */
case 'm': /* --multihash-factor */
case 1003: /* --donate-level */
case 1004: /* --max-cpu-usage */
case 1007: /* --print-time */
@ -541,7 +536,7 @@ bool Options::parseArg(int key, const char *arg)
return parseArg(key, p ? strtoull(p, nullptr, 16) : strtoull(arg, nullptr, 10));
}
case 4013: { /* --doublehash-thread-mask */
case 4013: { /* --multihash-thread-mask */
const char *p = strstr(arg, "0x");
return parseArg(key, p ? strtoull(p, nullptr, 16) : strtoull(arg, nullptr, 10));
}
@ -596,7 +591,23 @@ bool Options::parseArg(int key, uint64_t arg)
return false;
}
m_algoVariant = (int) arg;
m_algoVariant = static_cast<AlgoVariant>(arg);
break;
case 'A': /* --aesni */
if (arg < AESNI_AUTO || arg > AESNI_OFF) {
showUsage(1);
return false;
}
m_aesni = static_cast<AesNi>(arg);
break;
case 'm': /* --multihash-factor */
if (arg > MAX_NUM_HASH_BLOCKS) {
showUsage(1);
return false;
}
m_hashFactor = arg;
break;
case 1003: /* --donate-level */
@ -658,9 +669,9 @@ bool Options::parseArg(int key, uint64_t arg)
m_ccUpdateInterval = (int) arg;
break;
case 4013: /* --doublehash-thread-mask */
case 4013: /* --multihash-thread-mask */
if (arg) {
m_doubleHashThreadMask = arg;
m_multiHashThreadMask = arg;
}
break;
default:
@ -737,8 +748,8 @@ void Options::parseConfig(const char *fileName)
return;
}
for (size_t i = 0; i < ARRAY_SIZE(config_options); i++) {
parseJSON(&config_options[i], doc);
for (auto option : config_options) {
parseJSON(&option, doc);
}
const rapidjson::Value &pools = doc["pools"];
@ -748,30 +759,30 @@ void Options::parseConfig(const char *fileName)
continue;
}
for (size_t i = 0; i < ARRAY_SIZE(pool_options); i++) {
parseJSON(&pool_options[i], value);
for (auto option : pool_options) {
parseJSON(&option, value);
}
}
}
const rapidjson::Value &api = doc["api"];
if (api.IsObject()) {
for (size_t i = 0; i < ARRAY_SIZE(api_options); i++) {
parseJSON(&api_options[i], api);
for (auto api_option : api_options) {
parseJSON(&api_option, api);
}
}
const rapidjson::Value &ccClient = doc["cc-client"];
if (ccClient.IsObject()) {
for (size_t i = 0; i < ARRAY_SIZE(cc_client_options); i++) {
parseJSON(&cc_client_options[i], ccClient);
for (auto cc_client_option : cc_client_options) {
parseJSON(&cc_client_option, ccClient);
}
}
const rapidjson::Value &ccServer = doc["cc-server"];
if (ccServer.IsObject()) {
for (size_t i = 0; i < ARRAY_SIZE(cc_server_options); i++) {
parseJSON(&cc_server_options[i], ccServer);
for (auto cc_server_option : cc_server_options) {
parseJSON(&cc_server_option, ccServer);
}
}
}
@ -848,7 +859,7 @@ bool Options::setAlgo(const char *algo)
{
for (size_t i = 0; i < ARRAY_SIZE(algo_names); i++) {
if (algo_names[i] && !strcmp(algo, algo_names[i])) {
m_algo = (int) i;
m_algo = static_cast<Algo>(i);
break;
}
@ -868,42 +879,51 @@ bool Options::setAlgo(const char *algo)
return true;
}
int Options::getAlgoVariant() const
void Options::optimizeAlgorithmConfiguration()
{
# ifndef XMRIG_NO_AEON
if (m_algo == ALGO_CRYPTONIGHT_LITE) {
return getAlgoVariantLite();
// backwards compatibility for configs still setting algo variant (av)
// av overrides mutli-hash and aesni when they are either not set or when they are set to auto
if (m_algoVariant != AV0_AUTO) {
size_t hashFactor = m_hashFactor;
AesNi aesni = m_aesni;
switch (m_algoVariant) {
case AV1_AESNI:
hashFactor = 1;
aesni = AESNI_ON;
break;
case AV2_AESNI_DOUBLE:
hashFactor = 2;
aesni = AESNI_ON;
break;
case AV3_SOFT_AES:
hashFactor = 1;
aesni = AESNI_OFF;
break;
case AV4_SOFT_AES_DOUBLE:
hashFactor = 2;
aesni = AESNI_OFF;
break;
case AV0_AUTO:
default:
// no change
break;
}
if (m_hashFactor == 0) {
m_hashFactor = hashFactor;
}
if (m_aesni == AESNI_AUTO) {
m_aesni = aesni;
}
# endif
if (m_algoVariant <= AV0_AUTO || m_algoVariant >= AV_MAX) {
return Cpu::hasAES() ? AV1_AESNI : AV3_SOFT_AES;
}
if (m_safe && !Cpu::hasAES() && m_algoVariant <= AV2_AESNI_DOUBLE) {
return m_algoVariant + 2;
AesNi aesniFromCpu = Cpu::hasAES() ? AESNI_ON : AESNI_OFF;
if (m_aesni == AESNI_AUTO || m_safe) {
m_aesni = aesniFromCpu;
}
return m_algoVariant;
Cpu::optimizeParameters(m_threads, m_hashFactor, m_algo, m_maxCpuUsage, m_safe);
}
#ifndef XMRIG_NO_AEON
int Options::getAlgoVariantLite() const
{
if (m_algoVariant <= AV0_AUTO || m_algoVariant >= AV_MAX) {
return Cpu::hasAES() ? AV2_AESNI_DOUBLE : AV4_SOFT_AES_DOUBLE;
}
if (m_safe && !Cpu::hasAES() && m_algoVariant <= AV2_AESNI_DOUBLE) {
return m_algoVariant + 2;
}
return m_algoVariant;
}
#endif
bool Options::parseCCUrl(const char* url)
{
free(m_ccHost);

View file

@ -25,14 +25,15 @@
#ifndef __OPTIONS_H__
#define __OPTIONS_H__
#ifndef MAX_NUM_HASH_BLOCKS
#define MAX_NUM_HASH_BLOCKS 5
#endif
#include <stdint.h>
#include <cstdint>
#include <vector>
#include "rapidjson/fwd.h"
class Url;
struct option;
@ -54,12 +55,17 @@ public:
AV_MAX
};
enum AesNi {
AESNI_AUTO,
AESNI_ON,
AESNI_OFF
};
static inline Options* i() { return m_self; }
static Options *parse(int argc, char **argv);
inline bool background() const { return m_background; }
inline bool colors() const { return m_colors; }
inline bool doubleHash() const { return m_doubleHash; }
inline bool hugePages() const { return m_hugePages; }
inline bool syslog() const { return m_syslog; }
inline bool daemonized() const { return m_daemonized; }
@ -76,19 +82,20 @@ public:
inline const char *ccClientConfigFolder() const { return m_ccClientConfigFolder; }
inline const char *ccCustomDashboard() const { return m_ccCustomDashboard == nullptr ? "index.html" : m_ccCustomDashboard; }
inline const std::vector<Url*> &pools() const { return m_pools; }
inline int algo() const { return m_algo; }
inline int algoVariant() const { return m_algoVariant; }
inline Algo algo() const { return m_algo; }
inline bool aesni() const { return m_aesni == AESNI_ON; }
inline size_t hashFactor() const { return m_hashFactor; }
inline int apiPort() const { return m_apiPort; }
inline int donateLevel() const { return m_donateLevel; }
inline int printTime() const { return m_printTime; }
inline int priority() const { return m_priority; }
inline int retries() const { return m_retries; }
inline int retryPause() const { return m_retryPause; }
inline int threads() const { return m_threads; }
inline size_t threads() const { return m_threads; }
inline int ccUpdateInterval() const { return m_ccUpdateInterval; }
inline int ccPort() const { return m_ccPort; }
inline int64_t affinity() const { return m_affinity; }
inline int64_t doubleHashThreadMask() const { return m_doubleHashThreadMask; }
inline int64_t multiHashThreadMask() const { return m_multiHashThreadMask; }
inline void setColors(bool colors) { m_colors = colors; }
inline static void release() { delete m_self; }
@ -118,15 +125,10 @@ private:
bool setAlgo(const char *algo);
int getAlgoVariant() const;
# ifndef XMRIG_NO_AEON
int getAlgoVariantLite() const;
# endif
void optimizeAlgorithmConfiguration();
bool m_background;
bool m_colors;
bool m_doubleHash;
bool m_hugePages;
bool m_ready;
bool m_safe;
@ -144,20 +146,22 @@ private:
char *m_ccAdminPass;
char *m_ccClientConfigFolder;
char *m_ccCustomDashboard;
int m_algo;
int m_algoVariant;
Algo m_algo;
AlgoVariant m_algoVariant;
AesNi m_aesni;
size_t m_hashFactor;
int m_apiPort;
int m_donateLevel;
int m_maxCpuUsage;
size_t m_maxCpuUsage;
int m_printTime;
int m_priority;
int m_retries;
int m_retryPause;
int m_threads;
size_t m_threads;
int m_ccUpdateInterval;
int m_ccPort;
int64_t m_affinity;
int64_t m_doubleHashThreadMask;
int64_t m_multiHashThreadMask;
std::vector<Url*> m_pools;
};

View file

@ -92,29 +92,29 @@ static void print_cpu()
static void print_threads()
{
char dhtMaskBuf[256];
if (Options::i()->doubleHash() && Options::i()->doubleHashThreadMask() != -1L) {
if (Options::i()->hashFactor() > 1 && Options::i()->multiHashThreadMask() != -1L) {
std::string singleThreads;
std::string doubleThreads;
std::string multiThreads;
auto addThread = [](std::string& threads, int id) {
if (!threads.empty()) {
threads.append(", ");
}
threads.append(std::to_string(id));
};
for (int i=0; i < Options::i()->threads(); i++) {
if (Mem::isDoubleHash(i)) {
if (!doubleThreads.empty()) {
doubleThreads.append(", ");
if (Mem::getThreadHashFactor(i) > 1) {
addThread(multiThreads, i);
}
doubleThreads.append(std::to_string(i));
} else {
if (!singleThreads.empty()) {
singleThreads.append(" ");
}
singleThreads.append(std::to_string(i));
else {
addThread(singleThreads, i);
}
}
snprintf(dhtMaskBuf, 256, ", doubleHashThreadMask=0x%" PRIX64 " [single threads: %s; double threads: %s]",
Options::i()->doubleHashThreadMask(), singleThreads.c_str(), doubleThreads.c_str());
snprintf(dhtMaskBuf, 256, ", multiHashThreadMask=0x%" PRIX64 " [single threads: %s; multihash threads: %s]",
Options::i()->multiHashThreadMask(), singleThreads.c_str(), multiThreads.c_str());
}
else {
dhtMaskBuf[0] = '\0';
@ -128,10 +128,13 @@ static void print_threads()
affBuf[0] = '\0';
}
Log::i()->text(Options::i()->colors() ? "\x1B[01;32m * \x1B[01;37mTHREADS: \x1B[01;36m%d\x1B[01;37m, %s, av=%d, %sdonate=%d%%\x1B[01;37m%s%s" : " * THREADS: %d, %s, av=%d, %sdonate=%d%%%s%s",
Log::i()->text(Options::i()->colors() ?
"\x1B[01;32m * \x1B[01;37mTHREADS: \x1B[01;36m%d\x1B[01;37m, %s, aes=%d, hf=%zu, %sdonate=%d%%\x1B[01;37m%s%s" :
" * THREADS: %d, %s, aes=%d, hf=%zu, %sdonate=%d%%\x1B[01;37m%s%s",
Options::i()->threads(),
Options::i()->algoName(),
Options::i()->algoVariant(),
Options::i()->aesni(),
Options::i()->hashFactor(),
Options::i()->colors() && Options::i()->donateLevel() == 0 ? "\x1B[01;31m" : "",
Options::i()->donateLevel(),
affBuf,

View file

@ -79,7 +79,7 @@ CCClient::CCClient(Options* options, uv_async_t* async)
m_clientStatus.setHugepagesEnabled(Mem::isHugepagesEnabled());
m_clientStatus.setHugepages(Mem::isHugepagesAvailable());
m_clientStatus.setDoubleHashMode(m_options->doubleHash());
m_clientStatus.setHashFactor(Mem::hashFactor());
m_clientStatus.setVersion(Version::string());
m_clientStatus.setCpuBrand(Cpu::brand());

View file

@ -35,9 +35,9 @@ ClientStatus::ClientStatus()
: m_currentStatus(Status::PAUSED),
m_hasHugepages(false),
m_isHugepagesEnabled(false),
m_isDoubleHashMode(false),
m_isCpuX64(false),
m_hasCpuAES(false),
m_hashFactor(1),
m_hashrateShort(0),
m_hashrateMedium(0),
m_hashrateLong(0),
@ -147,14 +147,14 @@ void ClientStatus::setHugepagesEnabled(bool hugepagesEnabled)
m_isHugepagesEnabled = hugepagesEnabled;
}
bool ClientStatus::isDoubleHashMode() const
int ClientStatus::getHashFactor() const
{
return m_isDoubleHashMode;
return m_hashFactor;
}
void ClientStatus::setDoubleHashMode(bool isDoubleHashMode)
void ClientStatus::setHashFactor(int hashFactor)
{
m_isDoubleHashMode = isDoubleHashMode;
m_hashFactor = hashFactor;
}
bool ClientStatus::isCpuX64() const
@ -365,8 +365,8 @@ bool ClientStatus::parseFromJson(const rapidjson::Document& document)
m_isHugepagesEnabled = clientStatus["hugepages_enabled"].GetBool();
}
if (clientStatus.HasMember("double_hash_mode")) {
m_isDoubleHashMode = clientStatus["double_hash_mode"].GetBool();
if (clientStatus.HasMember("hash_factor")) {
m_hashFactor = clientStatus["hash_factor"].GetInt();
}
if (clientStatus.HasMember("cpu_is_x64")) {
@ -459,7 +459,7 @@ rapidjson::Value ClientStatus::toJson(rapidjson::MemoryPoolAllocator<rapidjson::
clientStatus.AddMember("hugepages_available", m_hasHugepages, allocator);
clientStatus.AddMember("hugepages_enabled", m_isHugepagesEnabled, allocator);
clientStatus.AddMember("double_hash_mode", m_isDoubleHashMode, allocator);
clientStatus.AddMember("hash_factor", m_hashFactor, allocator);
clientStatus.AddMember("cpu_is_x64", m_isCpuX64, allocator);
clientStatus.AddMember("cpu_has_aes", m_hasCpuAES, allocator);

View file

@ -85,8 +85,8 @@ public:
bool isHugepagesEnabled() const;
void setHugepagesEnabled(bool hugepagesEnabled);
bool isDoubleHashMode() const;
void setDoubleHashMode(bool isDoubleHashMode);
int getHashFactor() const;
void setHashFactor(int hashFactor);
bool isCpuX64() const;
void setCpuX64(bool isCpuX64);
@ -161,10 +161,10 @@ private:
bool m_hasHugepages;
bool m_isHugepagesEnabled;
bool m_isDoubleHashMode;
bool m_isCpuX64;
bool m_hasCpuAES;
int m_hashFactor;
double m_hashrateShort;
double m_hashrateMedium;
double m_hashrateLong;

View file

@ -1,7 +1,14 @@
{
"algo": "cryptonight", // cryptonight (default) or cryptonight-lite
"av": 0, // algorithm variation, 0 auto select
"doublehash-thread-mask" : null, // for av=2/4 only, limits doublehash to given threads (mask), mask "0x3" means run doublehash on thread 0 and 1 only (default: all threads)
"av": null, // DEPRECATED: algorithm variation, (0 auto,
// 1 -> (aesni=1, multihash-factor=1),
// 2 -> (aesni=1, multihash-factor=2),
// 3 -> (aesni=2, multihash-factor=1),
// 4 -> (aesni=2, multihash-factor=2))
"aesni": 0, // selection of AES-NI mode (0 auto, 1 on, 2 off)
"threads": 0, // number of miner threads (not set or 0 enables automatic selection of optimal thread count)
"multihash-factor": 0, // number of hash blocks to process at a time (not set or 0 enables automatic selection of optimal number of hash blocks)
"multihash-thread-mask" : null, // for multihash-factors>0 only, limits multihash to given threads (mask), mask "0x3" means run multihash on thread 0 and 1 only (default: all threads)
"background": false, // true to run the miner in the background (Windows only, for *nix plase use screen/tmux or systemd service instead)
"colors": true, // false to disable colored output
"cpu-affinity": null, // set process affinity to CPU core(s), mask "0x3" for cores 0 and 1
@ -14,7 +21,6 @@
"retry-pause": 5, // time to pause between retries
"safe": false, // true to safe adjust threads and av settings for current CPU
"syslog": false, // use system log for output messages
"threads": null, // number of miner threads
"pools": [
{
"url": "", // URL of mining server

View file

@ -5,6 +5,8 @@
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2016-2017 XMRig <support@xmrig.com>
* Copyright 2018 Sebastian Stolzenberg <https://github.com/sebastianstolzenberg>
* Copyright 2018 BenDroid <ben@graef.in>
*
*
* This program is free software: you can redistribute it and/or modify
@ -21,7 +23,6 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "crypto/CryptoNight.h"
#if defined(XMRIG_ARM)
@ -31,129 +32,105 @@
#endif
#include "crypto/CryptoNight_test.h"
#include "net/Job.h"
#include "net/JobResult.h"
#include "Options.h"
void (*cryptonight_hash_ctx_s)(const void *input, size_t size, void *output, cryptonight_ctx *ctx) = nullptr;
void (*cryptonight_hash_ctx_d)(const void *input, size_t size, void *output, cryptonight_ctx *ctx) = nullptr;
static void cryptonight_av1_aesni(const void *input, size_t size, void *output, struct cryptonight_ctx *ctx) {
template <size_t NUM_HASH_BLOCKS>
static void cryptonight_aesni(const void *input, size_t size, void *output, cryptonight_ctx *ctx) {
# if !defined(XMRIG_ARMv7)
cryptonight_hash<0x80000, MEMORY, 0x1FFFF0, false>(input, size, output, ctx);
CryptoNightMultiHash<0x80000, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hash(input, size, output, ctx);
# endif
}
template <size_t NUM_HASH_BLOCKS>
static void cryptonight_softaes(const void *input, size_t size, void *output, cryptonight_ctx *ctx) {
CryptoNightMultiHash<0x80000, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hash(input, size, output, ctx);
}
static void cryptonight_av2_aesni_double(const void *input, size_t size, void *output, cryptonight_ctx *ctx) {
template <size_t NUM_HASH_BLOCKS>
static void cryptonight_lite_aesni(const void *input, size_t size, void *output, cryptonight_ctx *ctx) {
# if !defined(XMRIG_ARMv7)
cryptonight_double_hash<0x80000, MEMORY, 0x1FFFF0, false>(input, size, output, ctx);
CryptoNightMultiHash<0x40000, MEMORY_LITE, 0xFFFF0, false, NUM_HASH_BLOCKS>::hash(input, size, output, ctx);
# endif
}
static void cryptonight_av3_softaes(const void *input, size_t size, void *output, cryptonight_ctx *ctx) {
cryptonight_hash<0x80000, MEMORY, 0x1FFFF0, true>(input, size, output, ctx);
template <size_t NUM_HASH_BLOCKS>
static void cryptonight_lite_softaes(const void *input, size_t size, void *output, cryptonight_ctx *ctx) {
CryptoNightMultiHash<0x40000, MEMORY_LITE, 0xFFFF0, true, NUM_HASH_BLOCKS>::hash(input, size, output, ctx);
}
void (*cryptonight_hash_ctx[MAX_NUM_HASH_BLOCKS])(const void *input, size_t size, void *output, cryptonight_ctx *ctx);
static void cryptonight_av4_softaes_double(const void *input, size_t size, void *output, cryptonight_ctx *ctx) {
cryptonight_double_hash<0x80000, MEMORY, 0x1FFFF0, true>(input, size, output, ctx);
template <size_t HASH_FACTOR>
void setCryptoNightHashMethods(Options::Algo algo, bool aesni)
{
switch (algo) {
case Options::ALGO_CRYPTONIGHT:
if (aesni) {
cryptonight_hash_ctx[HASH_FACTOR - 1] = cryptonight_aesni<HASH_FACTOR>;
} else {
cryptonight_hash_ctx[HASH_FACTOR - 1] = cryptonight_softaes<HASH_FACTOR>;
}
break;
case Options::ALGO_CRYPTONIGHT_LITE:
if (aesni) {
cryptonight_hash_ctx[HASH_FACTOR - 1] = cryptonight_lite_aesni<HASH_FACTOR>;
} else {
cryptonight_hash_ctx[HASH_FACTOR - 1] = cryptonight_lite_softaes<HASH_FACTOR>;
}
break;
}
// next iteration
setCryptoNightHashMethods<HASH_FACTOR-1>(algo, aesni);
}
static void cryptonight_lite_av1_aesni(const void *input, size_t size, void *output, cryptonight_ctx *ctx) {
# if !defined(XMRIG_ARMv7)
cryptonight_hash<0x40000, MEMORY_LITE, 0xFFFF0, false>(input, size, output, ctx);
#endif
}
static void cryptonight_lite_av2_aesni_double(const void *input, size_t size, void *output, cryptonight_ctx *ctx) {
# if !defined(XMRIG_ARMv7)
cryptonight_double_hash<0x40000, MEMORY_LITE, 0xFFFF0, false>(input, size, output, ctx);
# endif
}
static void cryptonight_lite_av3_softaes(const void *input, size_t size, void *output, cryptonight_ctx *ctx) {
cryptonight_hash<0x40000, MEMORY_LITE, 0xFFFF0, true>(input, size, output, ctx);
}
static void cryptonight_lite_av4_softaes_double(const void *input, size_t size, void *output, cryptonight_ctx *ctx) {
cryptonight_double_hash<0x40000, MEMORY_LITE, 0xFFFF0, true>(input, size, output, ctx);
}
void (*cryptonight_variations[8])(const void *input, size_t size, void *output, cryptonight_ctx *ctx) = {
cryptonight_av1_aesni,
cryptonight_av2_aesni_double,
cryptonight_av3_softaes,
cryptonight_av4_softaes_double,
cryptonight_lite_av1_aesni,
cryptonight_lite_av2_aesni_double,
cryptonight_lite_av3_softaes,
cryptonight_lite_av4_softaes_double
template <>
void setCryptoNightHashMethods<0>(Options::Algo algo, bool aesni)
{
// template recursion abort
};
void CryptoNight::hash(const uint8_t* input, size_t size, uint8_t* output, cryptonight_ctx* ctx)
bool CryptoNight::init(int algo, bool aesni)
{
cryptonight_hash_ctx_s(input, size, output, ctx);
}
void CryptoNight::hashDouble(const uint8_t* input, size_t size, uint8_t* output, cryptonight_ctx* ctx)
{
cryptonight_hash_ctx_d(input, size, output, ctx);
}
bool CryptoNight::init(int algo, int variant)
{
if (variant < 1 || variant > 4)
{
return false;
}
int index = 0;
if (algo == Options::ALGO_CRYPTONIGHT_LITE) {
index += 4;
}
if (variant == 3 || variant == 4)
{
index += 2;
}
cryptonight_hash_ctx_s = cryptonight_variations[index];
cryptonight_hash_ctx_d = cryptonight_variations[index+1];
setCryptoNightHashMethods<MAX_NUM_HASH_BLOCKS>(static_cast<Options::Algo>(algo), aesni);
return selfTest(algo);
}
void CryptoNight::hash(size_t factor, const uint8_t* input, size_t size, uint8_t* output, cryptonight_ctx* ctx)
{
cryptonight_hash_ctx[factor-1](input, size, output, ctx);
}
bool CryptoNight::selfTest(int algo)
{
if (cryptonight_hash_ctx_s == nullptr || cryptonight_hash_ctx_d == nullptr) {
if (cryptonight_hash_ctx[0] == nullptr || cryptonight_hash_ctx[2] == nullptr ||
cryptonight_hash_ctx[2] == nullptr || cryptonight_hash_ctx[3] == nullptr ||
cryptonight_hash_ctx[4] == nullptr) {
return false;
}
char output[64];
char output[160];
struct cryptonight_ctx *ctx = (struct cryptonight_ctx*) _mm_malloc(sizeof(struct cryptonight_ctx), 16);
ctx->memory = (uint8_t *) _mm_malloc(MEMORY * 2, 16);
cryptonight_hash_ctx_s(test_input, 76, output, ctx);
auto ctx = (struct cryptonight_ctx*) _mm_malloc(sizeof(struct cryptonight_ctx), 16);
ctx->memory = (uint8_t *) _mm_malloc(MEMORY * 6, 16);
cryptonight_hash_ctx[0](test_input, 76, output, ctx);
bool resultSingle = memcmp(output, algo == Options::ALGO_CRYPTONIGHT_LITE ? test_output1 : test_output0, 32) == 0;
cryptonight_hash_ctx_d(test_input, 76, output, ctx);
cryptonight_hash_ctx[1](test_input, 76, output, ctx);
bool resultDouble = memcmp(output, algo == Options::ALGO_CRYPTONIGHT_LITE ? test_output1 : test_output0, 64) == 0;
cryptonight_hash_ctx[2](test_input, 76, output, ctx);
bool resultTriple = memcmp(output, algo == Options::ALGO_CRYPTONIGHT_LITE ? test_output1 : test_output0, 96) == 0;
cryptonight_hash_ctx[3](test_input, 76, output, ctx);
bool resultQuadruple = memcmp(output, algo == Options::ALGO_CRYPTONIGHT_LITE ? test_output1 : test_output0, 128) == 0;
cryptonight_hash_ctx[4](test_input, 76, output, ctx);
bool resultQuintuple = memcmp(output, algo == Options::ALGO_CRYPTONIGHT_LITE ? test_output1 : test_output0, 160) == 0;
_mm_free(ctx->memory);
_mm_free(ctx);
bool resultDouble = memcmp(output, algo == Options::ALGO_CRYPTONIGHT_LITE ? test_output1 : test_output0, 64) == 0;
return resultSingle && resultDouble;
return resultSingle && resultDouble && resultTriple && resultQuadruple && resultQuintuple;
}

View file

@ -25,20 +25,17 @@
#define __CRYPTONIGHT_H__
#include <stddef.h>
#include <stdint.h>
#include <cstddef>
#include <cstdint>
#include "align.h"
#include "Options.h"
#define MEMORY 2097152 /* 2 MiB */
#define MEMORY_LITE 1048576 /* 1 MiB */
struct cryptonight_ctx {
VAR_ALIGN(16, uint8_t state0[200]);
VAR_ALIGN(16, uint8_t state1[200]);
VAR_ALIGN(16, uint8_t state[MAX_NUM_HASH_BLOCKS][208]); // 208 instead of 200 to maintain aligned to 16 byte boundaries
VAR_ALIGN(16, uint8_t* memory);
};
@ -46,16 +43,16 @@ struct cryptonight_ctx {
class Job;
class JobResult;
class CryptoNight
{
public:
static void hash(const uint8_t* input, size_t size, uint8_t* output, cryptonight_ctx* ctx);
static bool init(int algo, int variant);
static void hashDouble(const uint8_t* input, size_t size, uint8_t* output, cryptonight_ctx* ctx);
static bool init(int algo, bool aesni);
static void hash(size_t factor, const uint8_t* input, size_t size, uint8_t* output, cryptonight_ctx* ctx);
private:
static bool selfTest(int algo);
};
#endif /* __CRYPTONIGHT_H__ */

View file

@ -6,6 +6,8 @@
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2016 Imran Yusuff <https://github.com/imranyusuff>
* Copyright 2016-2017 XMRig <support@xmrig.com>
* Copyright 2018 Sebastian Stolzenberg <https://github.com/sebastianstolzenberg>
* Copyright 2018 BenDroid <ben@graef.in>
*
*
* This program is free software: you can redistribute it and/or modify
@ -47,27 +49,32 @@ extern "C"
}
static inline void do_blake_hash(const void* input, size_t len, char* output) {
static inline void do_blake_hash(const void* input, size_t len, char* output)
{
blake256_hash(reinterpret_cast<uint8_t*>(output), static_cast<const uint8_t*>(input), len);
}
static inline void do_groestl_hash(const void* input, size_t len, char* output) {
static inline void do_groestl_hash(const void* input, size_t len, char* output)
{
groestl(static_cast<const uint8_t*>(input), len * 8, reinterpret_cast<uint8_t*>(output));
}
static inline void do_jh_hash(const void* input, size_t len, char* output) {
static inline void do_jh_hash(const void* input, size_t len, char* output)
{
jh_hash(32 * 8, static_cast<const uint8_t*>(input), 8 * len, reinterpret_cast<uint8_t*>(output));
}
static inline void do_skein_hash(const void* input, size_t len, char* output) {
static inline void do_skein_hash(const void* input, size_t len, char* output)
{
xmr_skein(static_cast<const uint8_t*>(input), reinterpret_cast<uint8_t*>(output));
}
void (* const extra_hashes[4])(const void *, size_t, char *) = {do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash};
void
(* const extra_hashes[4])(const void*, size_t, char*) = {do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash};
static inline __attribute__((always_inline)) __m128i _mm_set_epi64x(const uint64_t a, const uint64_t b)
@ -94,7 +101,9 @@ static inline uint64_t __umul128(uint64_t a, uint64_t b, uint64_t* hi)
return (uint64_t) r;
}
#else
static inline uint64_t __umul128(uint64_t multiplier, uint64_t multiplicand, uint64_t *product_hi) {
static inline uint64_t __umul128(uint64_t multiplier, uint64_t multiplicand, uint64_t* product_hi)
{
// multiplier = ab = a * 2^32 + b
// multiplicand = cd = c * 2^32 + d
// ab * cd = a * c * 2^64 + (a * d + b * c) * 2^32 + b * d
@ -118,6 +127,7 @@ static inline uint64_t __umul128(uint64_t multiplier, uint64_t multiplicand, uin
return product_lo;
}
#endif
@ -165,7 +175,9 @@ static inline void soft_aes_genkey_sub(__m128i* xout0, __m128i* xout2)
template<bool SOFT_AES>
static inline void aes_genkey(const __m128i* memory, __m128i* k0, __m128i* k1, __m128i* k2, __m128i* k3, __m128i* k4, __m128i* k5, __m128i* k6, __m128i* k7, __m128i* k8, __m128i* k9)
static inline void
aes_genkey(const __m128i* memory, __m128i* k0, __m128i* k1, __m128i* k2, __m128i* k3, __m128i* k4, __m128i* k5,
__m128i* k6, __m128i* k7, __m128i* k8, __m128i* k9)
{
__m128i xout0 = _mm_load_si128(memory);
__m128i xout2 = _mm_load_si128(memory + 1);
@ -191,7 +203,9 @@ static inline void aes_genkey(const __m128i* memory, __m128i* k0, __m128i* k1, _
template<bool SOFT_AES>
static inline void aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2, __m128i* x3, __m128i* x4, __m128i* x5, __m128i* x6, __m128i* x7)
static inline void
aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2, __m128i* x3, __m128i* x4, __m128i* x5, __m128i* x6,
__m128i* x7)
{
if (SOFT_AES) {
*x0 = soft_aesenc(*x0, key);
@ -205,21 +219,21 @@ static inline void aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2,
}
# ifndef XMRIG_ARMv7
else {
*x0 = vaesmcq_u8(vaeseq_u8(*((uint8x16_t *) x0), key));
*x1 = vaesmcq_u8(vaeseq_u8(*((uint8x16_t *) x1), key));
*x2 = vaesmcq_u8(vaeseq_u8(*((uint8x16_t *) x2), key));
*x3 = vaesmcq_u8(vaeseq_u8(*((uint8x16_t *) x3), key));
*x4 = vaesmcq_u8(vaeseq_u8(*((uint8x16_t *) x4), key));
*x5 = vaesmcq_u8(vaeseq_u8(*((uint8x16_t *) x5), key));
*x6 = vaesmcq_u8(vaeseq_u8(*((uint8x16_t *) x6), key));
*x7 = vaesmcq_u8(vaeseq_u8(*((uint8x16_t *) x7), key));
*x0 = vaesmcq_u8(vaeseq_u8(*((uint8x16_t*) x0), key));
*x1 = vaesmcq_u8(vaeseq_u8(*((uint8x16_t*) x1), key));
*x2 = vaesmcq_u8(vaeseq_u8(*((uint8x16_t*) x2), key));
*x3 = vaesmcq_u8(vaeseq_u8(*((uint8x16_t*) x3), key));
*x4 = vaesmcq_u8(vaeseq_u8(*((uint8x16_t*) x4), key));
*x5 = vaesmcq_u8(vaeseq_u8(*((uint8x16_t*) x5), key));
*x6 = vaesmcq_u8(vaeseq_u8(*((uint8x16_t*) x6), key));
*x7 = vaesmcq_u8(vaeseq_u8(*((uint8x16_t*) x7), key));
}
# endif
}
template<size_t MEM, bool SOFT_AES>
static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output)
static inline void cn_explode_scratchpad(const __m128i* input, __m128i* output)
{
__m128i xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7;
__m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
@ -259,8 +273,7 @@ static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output)
xin5 ^= k9;
xin6 ^= k9;
xin7 ^= k9;
}
else {
} else {
aes_round<SOFT_AES>(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
}
@ -277,7 +290,7 @@ static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output)
template<size_t MEM, bool SOFT_AES>
static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
static inline void cn_implode_scratchpad(const __m128i* input, __m128i* output)
{
__m128i xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7;
__m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
@ -293,8 +306,7 @@ static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
xout6 = _mm_load_si128(output + 10);
xout7 = _mm_load_si128(output + 11);
for (size_t i = 0; i < MEM / sizeof(__m128i); i += 8)
{
for (size_t i = 0; i < MEM / sizeof(__m128i); i += 8) {
xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0);
xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1);
xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2);
@ -327,8 +339,7 @@ static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
xout5 ^= k9;
xout6 ^= k9;
xout7 ^= k9;
}
else {
} else {
aes_round<SOFT_AES>(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
}
}
@ -343,104 +354,195 @@ static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
_mm_store_si128(output + 11, xout7);
}
template<size_t ITERATIONS, size_t MEM, size_t MASK, bool SOFT_AES>
inline void cryptonight_hash(const void *__restrict__ input, size_t size, void *__restrict__ output, cryptonight_ctx *__restrict__ ctx)
// n-Loop version. Seems to be little bit slower then the hardcoded one.
template<size_t ITERATIONS, size_t MEM, size_t MASK, bool SOFT_AES, size_t NUM_HASH_BLOCKS>
class CryptoNightMultiHash
{
keccak(static_cast<const uint8_t*>(input), (int) size, ctx->state0, 200);
public:
inline static void hash(const void* __restrict__ input,
size_t size,
void* __restrict__ output,
cryptonight_ctx* __restrict__ ctx)
{
const uint8_t* l[NUM_HASH_BLOCKS];
uint64_t* h[NUM_HASH_BLOCKS];
uint64_t al[NUM_HASH_BLOCKS];
uint64_t ah[NUM_HASH_BLOCKS];
__m128i bx[NUM_HASH_BLOCKS];
uint64_t idx[NUM_HASH_BLOCKS];
cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) ctx->state0, (__m128i*) ctx->memory);
for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
keccak(static_cast<const uint8_t*>(input) + hashBlock * size, (int) size,
ctx->state[hashBlock], 200);
}
const uint8_t* l0 = ctx->memory;
uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx->state0);
for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
l[hashBlock] = ctx->memory + hashBlock * MEM;
h[hashBlock] = reinterpret_cast<uint64_t*>(ctx->state[hashBlock]);
uint64_t al0 = h0[0] ^ h0[4];
uint64_t ah0 = h0[1] ^ h0[5];
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h[hashBlock], (__m128i*) l[hashBlock]);
uint64_t idx0 = h0[0] ^ h0[4];
al[hashBlock] = h[hashBlock][0] ^ h[hashBlock][4];
ah[hashBlock] = h[hashBlock][1] ^ h[hashBlock][5];
bx[hashBlock] =
_mm_set_epi64x(h[hashBlock][3] ^ h[hashBlock][7], h[hashBlock][2] ^ h[hashBlock][6]);
idx[hashBlock] = h[hashBlock][0] ^ h[hashBlock][4];
}
for (size_t i = 0; i < ITERATIONS; i++) {
__m128i cx = _mm_load_si128((__m128i *) &l0[idx0 & MASK]);
for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
__m128i cx;
cx = _mm_load_si128((__m128i*) &l[hashBlock][idx[hashBlock] & MASK]);
if (SOFT_AES) {
cx = soft_aesenc(cx, _mm_set_epi64x(ah0, al0));
cx = soft_aesenc(cx, _mm_set_epi64x(ah[hashBlock], al[hashBlock]));
} else {
cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah[hashBlock], al[hashBlock]));
}
else {
_mm_store_si128((__m128i*) &l[hashBlock][idx[hashBlock] & MASK],
_mm_xor_si128(bx[hashBlock], cx));
idx[hashBlock] = EXTRACT64(cx);
bx[hashBlock] = cx;
uint64_t hi, lo, cl, ch;
cl = ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0];
ch = ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[1];
lo = __umul128(idx[hashBlock], cl, &hi);
al[hashBlock] += hi;
ah[hashBlock] += lo;
((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0] = al[hashBlock];
((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[1] = ah[hashBlock];
ah[hashBlock] ^= ch;
al[hashBlock] ^= cl;
idx[hashBlock] = al[hashBlock];
}
}
for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l[hashBlock], (__m128i*) h[hashBlock]);
keccakf(h[hashBlock], 24);
extra_hashes[ctx->state[hashBlock][0] & 3](ctx->state[hashBlock], 200,
static_cast<char*>(output) + hashBlock * 32);
}
}
};
template<size_t ITERATIONS, size_t MEM, size_t MASK, bool SOFT_AES>
class CryptoNightMultiHash<ITERATIONS, MEM, MASK, SOFT_AES, 1>
{
public:
inline static void hash(const void* __restrict__ input,
size_t size,
void* __restrict__ output,
cryptonight_ctx* __restrict__ ctx)
{
const uint8_t* l;
uint64_t* h;
uint64_t al;
uint64_t ah;
__m128i bx;
uint64_t idx;
keccak(static_cast<const uint8_t*>(input), (int) size, ctx->state[0], 200);
l = ctx->memory;
h = reinterpret_cast<uint64_t*>(ctx->state[0]);
cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h, (__m128i*) l);
al = h[0] ^ h[4];
ah = h[1] ^ h[5];
bx = _mm_set_epi64x(h[3] ^ h[7], h[2] ^ h[6]);
idx = h[0] ^ h[4];
for (size_t i = 0; i < ITERATIONS; i++) {
__m128i cx = _mm_load_si128((__m128i*) &l[idx & MASK]);
if (SOFT_AES) {
cx = soft_aesenc(cx, _mm_set_epi64x(ah, al));
} else {
# ifndef XMRIG_ARMv7
cx = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah0, al0);
cx = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah, al);
# endif
}
_mm_store_si128((__m128i *) &l0[idx0 & MASK], _mm_xor_si128(bx0, cx));
idx0 = EXTRACT64(cx);
bx0 = cx;
_mm_store_si128((__m128i*) &l[idx & MASK], _mm_xor_si128(bx, cx));
idx = EXTRACT64(cx);
bx = cx;
uint64_t hi, lo, cl, ch;
cl = ((uint64_t*) &l0[idx0 & MASK])[0];
ch = ((uint64_t*) &l0[idx0 & MASK])[1];
lo = __umul128(idx0, cl, &hi);
cl = ((uint64_t*) &l[idx & MASK])[0];
ch = ((uint64_t*) &l[idx & MASK])[1];
lo = __umul128(idx, cl, &hi);
al0 += hi;
ah0 += lo;
al += hi;
ah += lo;
((uint64_t*)&l0[idx0 & MASK])[0] = al0;
((uint64_t*)&l0[idx0 & MASK])[1] = ah0;
((uint64_t*) &l[idx & MASK])[0] = al;
((uint64_t*) &l[idx & MASK])[1] = ah;
ah0 ^= ch;
al0 ^= cl;
idx0 = al0;
ah ^= ch;
al ^= cl;
idx = al;
}
cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) ctx->memory, (__m128i*) ctx->state0);
keccakf(h0, 24);
extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, static_cast<char*>(output));
}
cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l, (__m128i*) h);
keccakf(h, 24);
extra_hashes[ctx->state[0][0] & 3](ctx->state[0], 200, static_cast<char*>(output));
}
};
template<size_t ITERATIONS, size_t MEM, size_t MASK, bool SOFT_AES>
inline void cryptonight_double_hash(const void *__restrict__ input, size_t size, void *__restrict__ output, struct cryptonight_ctx *__restrict__ ctx)
class CryptoNightMultiHash<ITERATIONS, MEM, MASK, SOFT_AES, 2>
{
keccak((const uint8_t *) input, (int) size, ctx->state0, 200);
keccak((const uint8_t *) input + size, (int) size, ctx->state1, 200);
public:
inline static void hash(const void* __restrict__ input,
size_t size,
void* __restrict__ output,
cryptonight_ctx* __restrict__ ctx)
{
keccak((const uint8_t*) input, (int) size, ctx->state[0], 200);
keccak((const uint8_t*) input + size, (int) size, ctx->state[1], 200);
const uint8_t* l0 = ctx->memory;
const uint8_t* l1 = ctx->memory + MEM;
uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx->state0);
uint64_t* h1 = reinterpret_cast<uint64_t*>(ctx->state1);
uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx->state[0]);
uint64_t* h1 = reinterpret_cast<uint64_t*>(ctx->state[1]);
cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h0, (__m128i*) l0);
cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h1, (__m128i*) l1);
uint64_t al0 = h0[0] ^ h0[4];
uint64_t al1 = h1[0] ^ h1[4];
uint64_t ah0 = h0[1] ^ h0[5];
uint64_t ah1 = h1[1] ^ h1[5];
uint64_t al0 = h0[0] ^h0[4];
uint64_t al1 = h1[0] ^h1[4];
uint64_t ah0 = h0[1] ^h0[5];
uint64_t ah1 = h1[1] ^h1[5];
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
uint64_t idx0 = h0[0] ^ h0[4];
uint64_t idx1 = h1[0] ^ h1[4];
uint64_t idx0 = h0[0] ^h0[4];
uint64_t idx1 = h1[0] ^h1[4];
for (size_t i = 0; i < ITERATIONS; i++) {
__m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & MASK]);
__m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & MASK]);
__m128i cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]);
__m128i cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]);
if (SOFT_AES) {
cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0));
cx1 = soft_aesenc(cx1, _mm_set_epi64x(ah1, al1));
}
else {
} else {
# ifndef XMRIG_ARMv7
cx0 = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx0, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah0, al0);
cx1 = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx1, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah1, al1);
# endif
}
_mm_store_si128((__m128i *) &l0[idx0 & MASK], _mm_xor_si128(bx0, cx0));
_mm_store_si128((__m128i *) &l1[idx1 & MASK], _mm_xor_si128(bx1, cx1));
_mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx0, cx0));
_mm_store_si128((__m128i*) &l1[idx1 & MASK], _mm_xor_si128(bx1, cx1));
idx0 = EXTRACT64(cx0);
idx1 = EXTRACT64(cx1);
@ -484,8 +586,491 @@ inline void cryptonight_double_hash(const void *__restrict__ input, size_t size,
keccakf(h0, 24);
keccakf(h1, 24);
extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, static_cast<char*>(output));
extra_hashes[ctx->state1[0] & 3](ctx->state1, 200, static_cast<char*>(output) + 32);
}
extra_hashes[ctx->state[0][0] & 3](ctx->state[0], 200, static_cast<char*>(output));
extra_hashes[ctx->state[1][0] & 3](ctx->state[1], 200, static_cast<char*>(output) + 32);
}
};
template<size_t ITERATIONS, size_t MEM, size_t MASK, bool SOFT_AES>
class CryptoNightMultiHash<ITERATIONS, MEM, MASK, SOFT_AES, 3>
{
public:
inline static void hash(const void* __restrict__ input,
size_t size,
void* __restrict__ output,
cryptonight_ctx* __restrict__ ctx)
{
keccak((const uint8_t*) input, (int) size, ctx->state[0], 200);
keccak((const uint8_t*) input + size, (int) size, ctx->state[1], 200);
keccak((const uint8_t*) input + 2 * size, (int) size, ctx->state[2], 200);
const uint8_t* l0 = ctx->memory;
const uint8_t* l1 = ctx->memory + MEM;
const uint8_t* l2 = ctx->memory + 2 * MEM;
uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx->state[0]);
uint64_t* h1 = reinterpret_cast<uint64_t*>(ctx->state[1]);
uint64_t* h2 = reinterpret_cast<uint64_t*>(ctx->state[2]);
cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h0, (__m128i*) l0);
cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h1, (__m128i*) l1);
cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h2, (__m128i*) l2);
uint64_t al0 = h0[0] ^h0[4];
uint64_t al1 = h1[0] ^h1[4];
uint64_t al2 = h2[0] ^h2[4];
uint64_t ah0 = h0[1] ^h0[5];
uint64_t ah1 = h1[1] ^h1[5];
uint64_t ah2 = h2[1] ^h2[5];
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
__m128i bx2 = _mm_set_epi64x(h2[3] ^ h2[7], h2[2] ^ h2[6]);
uint64_t idx0 = h0[0] ^h0[4];
uint64_t idx1 = h1[0] ^h1[4];
uint64_t idx2 = h2[0] ^h2[4];
for (size_t i = 0; i < ITERATIONS; i++) {
__m128i cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]);
__m128i cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]);
__m128i cx2 = _mm_load_si128((__m128i*) &l2[idx2 & MASK]);
if (SOFT_AES) {
cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0));
cx1 = soft_aesenc(cx1, _mm_set_epi64x(ah1, al1));
cx2 = soft_aesenc(cx2, _mm_set_epi64x(ah2, al2));
} else {
# ifndef XMRIG_ARMv7
cx0 = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx0, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah0, al0);
cx1 = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx1, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah1, al1);
cx2 = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx2, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah2, al2);
# endif
}
_mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx0, cx0));
_mm_store_si128((__m128i*) &l1[idx1 & MASK], _mm_xor_si128(bx1, cx1));
_mm_store_si128((__m128i*) &l2[idx2 & MASK], _mm_xor_si128(bx2, cx2));
idx0 = EXTRACT64(cx0);
idx1 = EXTRACT64(cx1);
idx2 = EXTRACT64(cx2);
bx0 = cx0;
bx1 = cx1;
bx2 = cx2;
uint64_t hi, lo, cl, ch;
cl = ((uint64_t*) &l0[idx0 & MASK])[0];
ch = ((uint64_t*) &l0[idx0 & MASK])[1];
lo = __umul128(idx0, cl, &hi);
al0 += hi;
ah0 += lo;
((uint64_t*) &l0[idx0 & MASK])[0] = al0;
((uint64_t*) &l0[idx0 & MASK])[1] = ah0;
ah0 ^= ch;
al0 ^= cl;
idx0 = al0;
cl = ((uint64_t*) &l1[idx1 & MASK])[0];
ch = ((uint64_t*) &l1[idx1 & MASK])[1];
lo = __umul128(idx1, cl, &hi);
al1 += hi;
ah1 += lo;
((uint64_t*) &l1[idx1 & MASK])[0] = al1;
((uint64_t*) &l1[idx1 & MASK])[1] = ah1;
ah1 ^= ch;
al1 ^= cl;
idx1 = al1;
cl = ((uint64_t*) &l2[idx2 & MASK])[0];
ch = ((uint64_t*) &l2[idx2 & MASK])[1];
lo = __umul128(idx2, cl, &hi);
al2 += hi;
ah2 += lo;
((uint64_t*) &l2[idx2 & MASK])[0] = al2;
((uint64_t*) &l2[idx2 & MASK])[1] = ah2;
ah2 ^= ch;
al2 ^= cl;
idx2 = al2;
}
cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l0, (__m128i*) h0);
cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l1, (__m128i*) h1);
cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l2, (__m128i*) h2);
keccakf(h0, 24);
keccakf(h1, 24);
keccakf(h2, 24);
extra_hashes[ctx->state[0][0] & 3](ctx->state[0], 200, static_cast<char*>(output));
extra_hashes[ctx->state[1][0] & 3](ctx->state[1], 200, static_cast<char*>(output) + 32);
extra_hashes[ctx->state[2][0] & 3](ctx->state[2], 200, static_cast<char*>(output) + 64);
}
};
template<size_t ITERATIONS, size_t MEM, size_t MASK, bool SOFT_AES>
class CryptoNightMultiHash<ITERATIONS, MEM, MASK, SOFT_AES, 4>
{
public:
inline static void hash(const void* __restrict__ input,
size_t size,
void* __restrict__ output,
cryptonight_ctx* __restrict__ ctx)
{
keccak((const uint8_t*) input, (int) size, ctx->state[0], 200);
keccak((const uint8_t*) input + size, (int) size, ctx->state[1], 200);
keccak((const uint8_t*) input + 2 * size, (int) size, ctx->state[2], 200);
keccak((const uint8_t*) input + 3 * size, (int) size, ctx->state[3], 200);
const uint8_t* l0 = ctx->memory;
const uint8_t* l1 = ctx->memory + MEM;
const uint8_t* l2 = ctx->memory + 2 * MEM;
const uint8_t* l3 = ctx->memory + 3 * MEM;
uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx->state[0]);
uint64_t* h1 = reinterpret_cast<uint64_t*>(ctx->state[1]);
uint64_t* h2 = reinterpret_cast<uint64_t*>(ctx->state[2]);
uint64_t* h3 = reinterpret_cast<uint64_t*>(ctx->state[3]);
cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h0, (__m128i*) l0);
cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h1, (__m128i*) l1);
cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h2, (__m128i*) l2);
cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h3, (__m128i*) l3);
uint64_t al0 = h0[0] ^h0[4];
uint64_t al1 = h1[0] ^h1[4];
uint64_t al2 = h2[0] ^h2[4];
uint64_t al3 = h3[0] ^h3[4];
uint64_t ah0 = h0[1] ^h0[5];
uint64_t ah1 = h1[1] ^h1[5];
uint64_t ah2 = h2[1] ^h2[5];
uint64_t ah3 = h3[1] ^h3[5];
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
__m128i bx2 = _mm_set_epi64x(h2[3] ^ h2[7], h2[2] ^ h2[6]);
__m128i bx3 = _mm_set_epi64x(h3[3] ^ h3[7], h3[2] ^ h3[6]);
uint64_t idx0 = h0[0] ^h0[4];
uint64_t idx1 = h1[0] ^h1[4];
uint64_t idx2 = h2[0] ^h2[4];
uint64_t idx3 = h3[0] ^h3[4];
for (size_t i = 0; i < ITERATIONS; i++) {
__m128i cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]);
__m128i cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]);
__m128i cx2 = _mm_load_si128((__m128i*) &l2[idx2 & MASK]);
__m128i cx3 = _mm_load_si128((__m128i*) &l3[idx3 & MASK]);
if (SOFT_AES) {
cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0));
cx1 = soft_aesenc(cx1, _mm_set_epi64x(ah1, al1));
cx2 = soft_aesenc(cx2, _mm_set_epi64x(ah2, al2));
cx3 = soft_aesenc(cx3, _mm_set_epi64x(ah3, al3));
} else {
# ifndef XMRIG_ARMv7
cx0 = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx0, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah0, al0);
cx1 = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx1, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah1, al1);
cx2 = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx2, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah2, al2);
cx3 = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx3, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah3, al3);
# endif
}
_mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx0, cx0));
_mm_store_si128((__m128i*) &l1[idx1 & MASK], _mm_xor_si128(bx1, cx1));
_mm_store_si128((__m128i*) &l2[idx2 & MASK], _mm_xor_si128(bx2, cx2));
_mm_store_si128((__m128i*) &l3[idx3 & MASK], _mm_xor_si128(bx3, cx3));
idx0 = EXTRACT64(cx0);
idx1 = EXTRACT64(cx1);
idx2 = EXTRACT64(cx2);
idx3 = EXTRACT64(cx3);
bx0 = cx0;
bx1 = cx1;
bx2 = cx2;
bx3 = cx3;
uint64_t hi, lo, cl, ch;
cl = ((uint64_t*) &l0[idx0 & MASK])[0];
ch = ((uint64_t*) &l0[idx0 & MASK])[1];
lo = __umul128(idx0, cl, &hi);
al0 += hi;
ah0 += lo;
((uint64_t*) &l0[idx0 & MASK])[0] = al0;
((uint64_t*) &l0[idx0 & MASK])[1] = ah0;
ah0 ^= ch;
al0 ^= cl;
idx0 = al0;
cl = ((uint64_t*) &l1[idx1 & MASK])[0];
ch = ((uint64_t*) &l1[idx1 & MASK])[1];
lo = __umul128(idx1, cl, &hi);
al1 += hi;
ah1 += lo;
((uint64_t*) &l1[idx1 & MASK])[0] = al1;
((uint64_t*) &l1[idx1 & MASK])[1] = ah1;
ah1 ^= ch;
al1 ^= cl;
idx1 = al1;
cl = ((uint64_t*) &l2[idx2 & MASK])[0];
ch = ((uint64_t*) &l2[idx2 & MASK])[1];
lo = __umul128(idx2, cl, &hi);
al2 += hi;
ah2 += lo;
((uint64_t*) &l2[idx2 & MASK])[0] = al2;
((uint64_t*) &l2[idx2 & MASK])[1] = ah2;
ah2 ^= ch;
al2 ^= cl;
idx2 = al2;
cl = ((uint64_t*) &l3[idx3 & MASK])[0];
ch = ((uint64_t*) &l3[idx3 & MASK])[1];
lo = __umul128(idx3, cl, &hi);
al3 += hi;
ah3 += lo;
((uint64_t*) &l3[idx3 & MASK])[0] = al3;
((uint64_t*) &l3[idx3 & MASK])[1] = ah3;
ah3 ^= ch;
al3 ^= cl;
idx3 = al3;
}
cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l0, (__m128i*) h0);
cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l1, (__m128i*) h1);
cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l2, (__m128i*) h2);
cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l3, (__m128i*) h3);
keccakf(h0, 24);
keccakf(h1, 24);
keccakf(h2, 24);
keccakf(h3, 24);
extra_hashes[ctx->state[0][0] & 3](ctx->state[0], 200, static_cast<char*>(output));
extra_hashes[ctx->state[1][0] & 3](ctx->state[1], 200, static_cast<char*>(output) + 32);
extra_hashes[ctx->state[2][0] & 3](ctx->state[2], 200, static_cast<char*>(output) + 64);
extra_hashes[ctx->state[3][0] & 3](ctx->state[3], 200, static_cast<char*>(output) + 96);
}
};
template<size_t ITERATIONS, size_t MEM, size_t MASK, bool SOFT_AES>
class CryptoNightMultiHash<ITERATIONS, MEM, MASK, SOFT_AES, 5>
{
public:
inline static void hash(const void* __restrict__ input,
size_t size,
void* __restrict__ output,
cryptonight_ctx* __restrict__ ctx)
{
keccak((const uint8_t*) input, (int) size, ctx->state[0], 200);
keccak((const uint8_t*) input + size, (int) size, ctx->state[1], 200);
keccak((const uint8_t*) input + 2 * size, (int) size, ctx->state[2], 200);
keccak((const uint8_t*) input + 3 * size, (int) size, ctx->state[3], 200);
keccak((const uint8_t*) input + 4 * size, (int) size, ctx->state[4], 200);
const uint8_t* l0 = ctx->memory;
const uint8_t* l1 = ctx->memory + MEM;
const uint8_t* l2 = ctx->memory + 2 * MEM;
const uint8_t* l3 = ctx->memory + 3 * MEM;
const uint8_t* l4 = ctx->memory + 4 * MEM;
uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx->state[0]);
uint64_t* h1 = reinterpret_cast<uint64_t*>(ctx->state[1]);
uint64_t* h2 = reinterpret_cast<uint64_t*>(ctx->state[2]);
uint64_t* h3 = reinterpret_cast<uint64_t*>(ctx->state[3]);
uint64_t* h4 = reinterpret_cast<uint64_t*>(ctx->state[4]);
cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h0, (__m128i*) l0);
cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h1, (__m128i*) l1);
cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h2, (__m128i*) l2);
cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h3, (__m128i*) l3);
cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h4, (__m128i*) l4);
uint64_t al0 = h0[0] ^h0[4];
uint64_t al1 = h1[0] ^h1[4];
uint64_t al2 = h2[0] ^h2[4];
uint64_t al3 = h3[0] ^h3[4];
uint64_t al4 = h4[0] ^h4[4];
uint64_t ah0 = h0[1] ^h0[5];
uint64_t ah1 = h1[1] ^h1[5];
uint64_t ah2 = h2[1] ^h2[5];
uint64_t ah3 = h3[1] ^h3[5];
uint64_t ah4 = h4[1] ^h4[5];
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
__m128i bx2 = _mm_set_epi64x(h2[3] ^ h2[7], h2[2] ^ h2[6]);
__m128i bx3 = _mm_set_epi64x(h3[3] ^ h3[7], h3[2] ^ h3[6]);
__m128i bx4 = _mm_set_epi64x(h4[3] ^ h4[7], h4[2] ^ h4[6]);
uint64_t idx0 = h0[0] ^h0[4];
uint64_t idx1 = h1[0] ^h1[4];
uint64_t idx2 = h2[0] ^h2[4];
uint64_t idx3 = h3[0] ^h3[4];
uint64_t idx4 = h4[0] ^h4[4];
for (size_t i = 0; i < ITERATIONS; i++) {
__m128i cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]);
__m128i cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]);
__m128i cx2 = _mm_load_si128((__m128i*) &l2[idx2 & MASK]);
__m128i cx3 = _mm_load_si128((__m128i*) &l3[idx3 & MASK]);
__m128i cx4 = _mm_load_si128((__m128i*) &l4[idx4 & MASK]);
if (SOFT_AES) {
cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0));
cx1 = soft_aesenc(cx1, _mm_set_epi64x(ah1, al1));
cx2 = soft_aesenc(cx2, _mm_set_epi64x(ah2, al2));
cx3 = soft_aesenc(cx3, _mm_set_epi64x(ah3, al3));
cx4 = soft_aesenc(cx4, _mm_set_epi64x(ah4, al4));
} else {
# ifndef XMRIG_ARMv7
cx0 = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx0, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah0, al0);
cx1 = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx1, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah1, al1);
cx2 = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx2, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah2, al2);
cx3 = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx3, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah3, al3);
cx4 = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx4, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah4, al4);
# endif;
}
_mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx0, cx0));
_mm_store_si128((__m128i*) &l1[idx1 & MASK], _mm_xor_si128(bx1, cx1));
_mm_store_si128((__m128i*) &l2[idx2 & MASK], _mm_xor_si128(bx2, cx2));
_mm_store_si128((__m128i*) &l3[idx3 & MASK], _mm_xor_si128(bx3, cx3));
_mm_store_si128((__m128i*) &l4[idx4 & MASK], _mm_xor_si128(bx4, cx4));
idx0 = EXTRACT64(cx0);
idx1 = EXTRACT64(cx1);
idx2 = EXTRACT64(cx2);
idx3 = EXTRACT64(cx3);
idx4 = EXTRACT64(cx4);
bx0 = cx0;
bx1 = cx1;
bx2 = cx2;
bx3 = cx3;
bx4 = cx4;
uint64_t hi, lo, cl, ch;
cl = ((uint64_t*) &l0[idx0 & MASK])[0];
ch = ((uint64_t*) &l0[idx0 & MASK])[1];
lo = __umul128(idx0, cl, &hi);
al0 += hi;
ah0 += lo;
((uint64_t*) &l0[idx0 & MASK])[0] = al0;
((uint64_t*) &l0[idx0 & MASK])[1] = ah0;
ah0 ^= ch;
al0 ^= cl;
idx0 = al0;
cl = ((uint64_t*) &l1[idx1 & MASK])[0];
ch = ((uint64_t*) &l1[idx1 & MASK])[1];
lo = __umul128(idx1, cl, &hi);
al1 += hi;
ah1 += lo;
((uint64_t*) &l1[idx1 & MASK])[0] = al1;
((uint64_t*) &l1[idx1 & MASK])[1] = ah1;
ah1 ^= ch;
al1 ^= cl;
idx1 = al1;
cl = ((uint64_t*) &l2[idx2 & MASK])[0];
ch = ((uint64_t*) &l2[idx2 & MASK])[1];
lo = __umul128(idx2, cl, &hi);
al2 += hi;
ah2 += lo;
((uint64_t*) &l2[idx2 & MASK])[0] = al2;
((uint64_t*) &l2[idx2 & MASK])[1] = ah2;
ah2 ^= ch;
al2 ^= cl;
idx2 = al2;
cl = ((uint64_t*) &l3[idx3 & MASK])[0];
ch = ((uint64_t*) &l3[idx3 & MASK])[1];
lo = __umul128(idx3, cl, &hi);
al3 += hi;
ah3 += lo;
((uint64_t*) &l3[idx3 & MASK])[0] = al3;
((uint64_t*) &l3[idx3 & MASK])[1] = ah3;
ah3 ^= ch;
al3 ^= cl;
idx3 = al3;
cl = ((uint64_t*) &l4[idx4 & MASK])[0];
ch = ((uint64_t*) &l4[idx4 & MASK])[1];
lo = __umul128(idx4, cl, &hi);
al4 += hi;
ah4 += lo;
((uint64_t*) &l4[idx4 & MASK])[0] = al4;
((uint64_t*) &l4[idx4 & MASK])[1] = ah4;
ah4 ^= ch;
al4 ^= cl;
idx4 = al4;
}
cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l0, (__m128i*) h0);
cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l1, (__m128i*) h1);
cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l2, (__m128i*) h2);
cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l3, (__m128i*) h3);
cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l4, (__m128i*) h4);
keccakf(h0, 24);
keccakf(h1, 24);
keccakf(h2, 24);
keccakf(h3, 24);
keccakf(h4, 24);
extra_hashes[ctx->state[0][0] & 3](ctx->state[0], 200, static_cast<char*>(output));
extra_hashes[ctx->state[1][0] & 3](ctx->state[1], 200, static_cast<char*>(output) + 32);
extra_hashes[ctx->state[2][0] & 3](ctx->state[2], 200, static_cast<char*>(output) + 64);
extra_hashes[ctx->state[3][0] & 3](ctx->state[3], 200, static_cast<char*>(output) + 96);
extra_hashes[ctx->state[4][0] & 3](ctx->state[4], 200, static_cast<char*>(output) + 128);
}
};
#endif /* __CRYPTONIGHT_ARM_H__ */

View file

@ -25,7 +25,27 @@
#define __CRYPTONIGHT_TEST_H__
const static uint8_t test_input[152] = {
const static uint8_t test_input[456] = {
0x01, 0x00, 0xFB, 0x8E, 0x8A, 0xC8, 0x05, 0x89, 0x93, 0x23, 0x37, 0x1B, 0xB7, 0x90, 0xDB, 0x19,
0x21, 0x8A, 0xFD, 0x8D, 0xB8, 0xE3, 0x75, 0x5D, 0x8B, 0x90, 0xF3, 0x9B, 0x3D, 0x55, 0x06, 0xA9,
0xAB, 0xCE, 0x4F, 0xA9, 0x12, 0x24, 0x45, 0x00, 0x00, 0x00, 0x00, 0xEE, 0x81, 0x46, 0xD4, 0x9F,
0xA9, 0x3E, 0xE7, 0x24, 0xDE, 0xB5, 0x7D, 0x12, 0xCB, 0xC6, 0xC6, 0xF3, 0xB9, 0x24, 0xD9, 0x46,
0x12, 0x7C, 0x7A, 0x97, 0x41, 0x8F, 0x93, 0x48, 0x82, 0x8F, 0x0F, 0x02,
0x03, 0x05, 0xA0, 0xDB, 0xD6, 0xBF, 0x05, 0xCF, 0x16, 0xE5, 0x03, 0xF3, 0xA6, 0x6F, 0x78, 0x00,
0x7C, 0xBF, 0x34, 0x14, 0x43, 0x32, 0xEC, 0xBF, 0xC2, 0x2E, 0xD9, 0x5C, 0x87, 0x00, 0x38, 0x3B,
0x30, 0x9A, 0xCE, 0x19, 0x23, 0xA0, 0x96, 0x4B, 0x00, 0x00, 0x00, 0x08, 0xBA, 0x93, 0x9A, 0x62,
0x72, 0x4C, 0x0D, 0x75, 0x81, 0xFC, 0xE5, 0x76, 0x1E, 0x9D, 0x8A, 0x0E, 0x6A, 0x1C, 0x3F, 0x92,
0x4F, 0xDD, 0x84, 0x93, 0xD1, 0x11, 0x56, 0x49, 0xC0, 0x5E, 0xB6, 0x01,
0x01, 0x00, 0xFB, 0x8E, 0x8A, 0xC8, 0x05, 0x89, 0x93, 0x23, 0x37, 0x1B, 0xB7, 0x90, 0xDB, 0x19,
0x21, 0x8A, 0xFD, 0x8D, 0xB8, 0xE3, 0x75, 0x5D, 0x8B, 0x90, 0xF3, 0x9B, 0x3D, 0x55, 0x06, 0xA9,
0xAB, 0xCE, 0x4F, 0xA9, 0x12, 0x24, 0x45, 0x00, 0x00, 0x00, 0x00, 0xEE, 0x81, 0x46, 0xD4, 0x9F,
0xA9, 0x3E, 0xE7, 0x24, 0xDE, 0xB5, 0x7D, 0x12, 0xCB, 0xC6, 0xC6, 0xF3, 0xB9, 0x24, 0xD9, 0x46,
0x12, 0x7C, 0x7A, 0x97, 0x41, 0x8F, 0x93, 0x48, 0x82, 0x8F, 0x0F, 0x02,
0x03, 0x05, 0xA0, 0xDB, 0xD6, 0xBF, 0x05, 0xCF, 0x16, 0xE5, 0x03, 0xF3, 0xA6, 0x6F, 0x78, 0x00,
0x7C, 0xBF, 0x34, 0x14, 0x43, 0x32, 0xEC, 0xBF, 0xC2, 0x2E, 0xD9, 0x5C, 0x87, 0x00, 0x38, 0x3B,
0x30, 0x9A, 0xCE, 0x19, 0x23, 0xA0, 0x96, 0x4B, 0x00, 0x00, 0x00, 0x08, 0xBA, 0x93, 0x9A, 0x62,
0x72, 0x4C, 0x0D, 0x75, 0x81, 0xFC, 0xE5, 0x76, 0x1E, 0x9D, 0x8A, 0x0E, 0x6A, 0x1C, 0x3F, 0x92,
0x4F, 0xDD, 0x84, 0x93, 0xD1, 0x11, 0x56, 0x49, 0xC0, 0x5E, 0xB6, 0x01,
0x01, 0x00, 0xFB, 0x8E, 0x8A, 0xC8, 0x05, 0x89, 0x93, 0x23, 0x37, 0x1B, 0xB7, 0x90, 0xDB, 0x19,
0x21, 0x8A, 0xFD, 0x8D, 0xB8, 0xE3, 0x75, 0x5D, 0x8B, 0x90, 0xF3, 0x9B, 0x3D, 0x55, 0x06, 0xA9,
0xAB, 0xCE, 0x4F, 0xA9, 0x12, 0x24, 0x45, 0x00, 0x00, 0x00, 0x00, 0xEE, 0x81, 0x46, 0xD4, 0x9F,
@ -39,7 +59,15 @@ const static uint8_t test_input[152] = {
};
const static uint8_t test_output0[64] = {
const static uint8_t test_output0[192] = {
0x1B, 0x60, 0x6A, 0x3F, 0x4A, 0x07, 0xD6, 0x48, 0x9A, 0x1B, 0xCD, 0x07, 0x69, 0x7B, 0xD1, 0x66,
0x96, 0xB6, 0x1C, 0x8A, 0xE9, 0x82, 0xF6, 0x1A, 0x90, 0x16, 0x0F, 0x4E, 0x52, 0x82, 0x8A, 0x7F,
0x1A, 0x3F, 0xFB, 0xEE, 0x90, 0x9B, 0x42, 0x0D, 0x91, 0xF7, 0xBE, 0x6E, 0x5F, 0xB5, 0x6D, 0xB7,
0x1B, 0x31, 0x10, 0xD8, 0x86, 0x01, 0x1E, 0x87, 0x7E, 0xE5, 0x78, 0x6A, 0xFD, 0x08, 0x01, 0x00,
0x1B, 0x60, 0x6A, 0x3F, 0x4A, 0x07, 0xD6, 0x48, 0x9A, 0x1B, 0xCD, 0x07, 0x69, 0x7B, 0xD1, 0x66,
0x96, 0xB6, 0x1C, 0x8A, 0xE9, 0x82, 0xF6, 0x1A, 0x90, 0x16, 0x0F, 0x4E, 0x52, 0x82, 0x8A, 0x7F,
0x1A, 0x3F, 0xFB, 0xEE, 0x90, 0x9B, 0x42, 0x0D, 0x91, 0xF7, 0xBE, 0x6E, 0x5F, 0xB5, 0x6D, 0xB7,
0x1B, 0x31, 0x10, 0xD8, 0x86, 0x01, 0x1E, 0x87, 0x7E, 0xE5, 0x78, 0x6A, 0xFD, 0x08, 0x01, 0x00,
0x1B, 0x60, 0x6A, 0x3F, 0x4A, 0x07, 0xD6, 0x48, 0x9A, 0x1B, 0xCD, 0x07, 0x69, 0x7B, 0xD1, 0x66,
0x96, 0xB6, 0x1C, 0x8A, 0xE9, 0x82, 0xF6, 0x1A, 0x90, 0x16, 0x0F, 0x4E, 0x52, 0x82, 0x8A, 0x7F,
0x1A, 0x3F, 0xFB, 0xEE, 0x90, 0x9B, 0x42, 0x0D, 0x91, 0xF7, 0xBE, 0x6E, 0x5F, 0xB5, 0x6D, 0xB7,
@ -47,11 +75,19 @@ const static uint8_t test_output0[64] = {
};
const static uint8_t test_output1[64] = {
const static uint8_t test_output1[192] = {
0x28, 0xA2, 0x2B, 0xAD, 0x3F, 0x93, 0xD1, 0x40, 0x8F, 0xCA, 0x47, 0x2E, 0xB5, 0xAD, 0x1C, 0xBE,
0x75, 0xF2, 0x1D, 0x05, 0x3C, 0x8C, 0xE5, 0xB3, 0xAF, 0x10, 0x5A, 0x57, 0x71, 0x3E, 0x21, 0xDD,
0x36, 0x95, 0xB4, 0xB5, 0x3B, 0xB0, 0x03, 0x58, 0xB0, 0xAD, 0x38, 0xDC, 0x16, 0x0F, 0xEB, 0x9E,
0x00, 0x4E, 0xEC, 0xE0, 0x9B, 0x83, 0xA7, 0x2E, 0xF6, 0xBA, 0x98, 0x64, 0xD3, 0x51, 0x0C, 0x88,
0x28, 0xA2, 0x2B, 0xAD, 0x3F, 0x93, 0xD1, 0x40, 0x8F, 0xCA, 0x47, 0x2E, 0xB5, 0xAD, 0x1C, 0xBE,
0x75, 0xF2, 0x1D, 0x05, 0x3C, 0x8C, 0xE5, 0xB3, 0xAF, 0x10, 0x5A, 0x57, 0x71, 0x3E, 0x21, 0xDD,
0x36, 0x95, 0xB4, 0xB5, 0x3B, 0xB0, 0x03, 0x58, 0xB0, 0xAD, 0x38, 0xDC, 0x16, 0x0F, 0xEB, 0x9E,
0x00, 0x4E, 0xEC, 0xE0, 0x9B, 0x83, 0xA7, 0x2E, 0xF6, 0xBA, 0x98, 0x64, 0xD3, 0x51, 0x0C, 0x88,
0x28, 0xA2, 0x2B, 0xAD, 0x3F, 0x93, 0xD1, 0x40, 0x8F, 0xCA, 0x47, 0x2E, 0xB5, 0xAD, 0x1C, 0xBE,
0x75, 0xF2, 0x1D, 0x05, 0x3C, 0x8C, 0xE5, 0xB3, 0xAF, 0x10, 0x5A, 0x57, 0x71, 0x3E, 0x21, 0xDD,
0x36, 0x95, 0xB4, 0xB5, 0x3B, 0xB0, 0x03, 0x58, 0xB0, 0xAD, 0x38, 0xDC, 0x16, 0x0F, 0xEB, 0x9E,
0x00, 0x4E, 0xEC, 0xE0, 0x9B, 0x83, 0xA7, 0x2E, 0xF6, 0xBA, 0x98, 0x64, 0xD3, 0x51, 0x0C, 0x88
};

View file

@ -5,6 +5,8 @@
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2016-2017 XMRig <support@xmrig.com>
* Copyright 2018 Sebastian Stolzenberg <https://github.com/sebastianstolzenberg>
* Copyright 2018 BenDroid <ben@graef.in>
*
*
* This program is free software: you can redistribute it and/or modify
@ -47,42 +49,47 @@ extern "C"
}
static inline void do_blake_hash(const void* input, size_t len, char* output) {
static inline void do_blake_hash(const void* input, size_t len, char* output)
{
blake256_hash(reinterpret_cast<uint8_t*>(output), static_cast<const uint8_t*>(input), len);
}
static inline void do_groestl_hash(const void* input, size_t len, char* output) {
static inline void do_groestl_hash(const void* input, size_t len, char* output)
{
groestl(static_cast<const uint8_t*>(input), len * 8, reinterpret_cast<uint8_t*>(output));
}
static inline void do_jh_hash(const void* input, size_t len, char* output) {
static inline void do_jh_hash(const void* input, size_t len, char* output)
{
jh_hash(32 * 8, static_cast<const uint8_t*>(input), 8 * len, reinterpret_cast<uint8_t*>(output));
}
static inline void do_skein_hash(const void* input, size_t len, char* output) {
static inline void do_skein_hash(const void* input, size_t len, char* output)
{
xmr_skein(static_cast<const uint8_t*>(input), reinterpret_cast<uint8_t*>(output));
}
void (* const extra_hashes[4])(const void *, size_t, char *) = {do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash};
void (* const extra_hashes[4])(const void*, size_t, char*) = {do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash};
#if defined(__x86_64__) || defined(_M_AMD64)
# define EXTRACT64(X) _mm_cvtsi128_si64(X)
# ifdef __GNUC__
static inline uint64_t __umul128(uint64_t a, uint64_t b, uint64_t* hi)
{
unsigned __int128 r = (unsigned __int128) a * (unsigned __int128) b;
*hi = r >> 64;
return (uint64_t) r;
}
# else
#define __umul128 _umul128
#define __umul128 _umul128
# endif
#elif defined(__i386__) || defined(_M_IX86)
# define HI32(X) \
@ -164,7 +171,9 @@ static inline void soft_aes_genkey_sub(__m128i* xout0, __m128i* xout2)
template<bool SOFT_AES>
static inline void aes_genkey(const __m128i* memory, __m128i* k0, __m128i* k1, __m128i* k2, __m128i* k3, __m128i* k4, __m128i* k5, __m128i* k6, __m128i* k7, __m128i* k8, __m128i* k9)
static inline void
aes_genkey(const __m128i* memory, __m128i* k0, __m128i* k1, __m128i* k2, __m128i* k3, __m128i* k4, __m128i* k5,
__m128i* k6, __m128i* k7, __m128i* k8, __m128i* k9)
{
__m128i xout0 = _mm_load_si128(memory);
__m128i xout2 = _mm_load_si128(memory + 1);
@ -190,7 +199,9 @@ static inline void aes_genkey(const __m128i* memory, __m128i* k0, __m128i* k1, _
template<bool SOFT_AES>
static inline void aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2, __m128i* x3, __m128i* x4, __m128i* x5, __m128i* x6, __m128i* x7)
static inline void
aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2, __m128i* x3, __m128i* x4, __m128i* x5, __m128i* x6,
__m128i* x7)
{
if (SOFT_AES) {
*x0 = soft_aesenc(*x0, key);
@ -201,8 +212,7 @@ static inline void aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2,
*x5 = soft_aesenc(*x5, key);
*x6 = soft_aesenc(*x6, key);
*x7 = soft_aesenc(*x7, key);
}
else {
} else {
*x0 = _mm_aesenc_si128(*x0, key);
*x1 = _mm_aesenc_si128(*x1, key);
*x2 = _mm_aesenc_si128(*x2, key);
@ -216,7 +226,7 @@ static inline void aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2,
template<size_t MEM, bool SOFT_AES>
static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output)
static inline void cn_explode_scratchpad(const __m128i* input, __m128i* output)
{
__m128i xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7;
__m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
@ -257,7 +267,7 @@ static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output)
template<size_t MEM, bool SOFT_AES>
static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
static inline void cn_implode_scratchpad(const __m128i* input, __m128i* output)
{
__m128i xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7;
__m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
@ -273,8 +283,7 @@ static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
xout6 = _mm_load_si128(output + 10);
xout7 = _mm_load_si128(output + 11);
for (size_t i = 0; i < MEM / sizeof(__m128i); i += 8)
{
for (size_t i = 0; i < MEM / sizeof(__m128i); i += 8) {
xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0);
xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1);
xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2);
@ -306,101 +315,191 @@ static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
_mm_store_si128(output + 11, xout7);
}
template<size_t ITERATIONS, size_t MEM, size_t MASK, bool SOFT_AES>
inline void cryptonight_hash(const void *__restrict__ input, size_t size, void *__restrict__ output, cryptonight_ctx *__restrict__ ctx)
// n-Loop version. Seems to be little bit slower then the hardcoded one.
template<size_t ITERATIONS, size_t MEM, size_t MASK, bool SOFT_AES, size_t NUM_HASH_BLOCKS>
class CryptoNightMultiHash
{
keccak(static_cast<const uint8_t*>(input), (int) size, ctx->state0, 200);
public:
inline static void hash(const void* __restrict__ input,
size_t size,
void* __restrict__ output,
cryptonight_ctx* __restrict__ ctx)
{
const uint8_t* l[NUM_HASH_BLOCKS];
uint64_t* h[NUM_HASH_BLOCKS];
uint64_t al[NUM_HASH_BLOCKS];
uint64_t ah[NUM_HASH_BLOCKS];
__m128i bx[NUM_HASH_BLOCKS];
uint64_t idx[NUM_HASH_BLOCKS];
cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) ctx->state0, (__m128i*) ctx->memory);
for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
keccak(static_cast<const uint8_t*>(input) + hashBlock * size, (int) size,
ctx->state[hashBlock], 200);
}
const uint8_t* l0 = ctx->memory;
uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx->state0);
for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
l[hashBlock] = ctx->memory + hashBlock * MEM;
h[hashBlock] = reinterpret_cast<uint64_t*>(ctx->state[hashBlock]);
uint64_t al0 = h0[0] ^ h0[4];
uint64_t ah0 = h0[1] ^ h0[5];
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h[hashBlock], (__m128i*) l[hashBlock]);
uint64_t idx0 = h0[0] ^ h0[4];
al[hashBlock] = h[hashBlock][0] ^ h[hashBlock][4];
ah[hashBlock] = h[hashBlock][1] ^ h[hashBlock][5];
bx[hashBlock] =
_mm_set_epi64x(h[hashBlock][3] ^ h[hashBlock][7], h[hashBlock][2] ^ h[hashBlock][6]);
idx[hashBlock] = h[hashBlock][0] ^ h[hashBlock][4];
}
for (size_t i = 0; i < ITERATIONS; i++) {
for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
__m128i cx;
cx = _mm_load_si128((__m128i *) &l0[idx0 & MASK]);
cx = _mm_load_si128((__m128i*) &l[hashBlock][idx[hashBlock] & MASK]);
if (SOFT_AES) {
cx = soft_aesenc(cx, _mm_set_epi64x(ah0, al0));
}
else {
cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah0, al0));
cx = soft_aesenc(cx, _mm_set_epi64x(ah[hashBlock], al[hashBlock]));
} else {
cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah[hashBlock], al[hashBlock]));
}
_mm_store_si128((__m128i *) &l0[idx0 & MASK], _mm_xor_si128(bx0, cx));
idx0 = EXTRACT64(cx);
bx0 = cx;
_mm_store_si128((__m128i*) &l[hashBlock][idx[hashBlock] & MASK],
_mm_xor_si128(bx[hashBlock], cx));
idx[hashBlock] = EXTRACT64(cx);
bx[hashBlock] = cx;
uint64_t hi, lo, cl, ch;
cl = ((uint64_t*) &l0[idx0 & MASK])[0];
ch = ((uint64_t*) &l0[idx0 & MASK])[1];
lo = __umul128(idx0, cl, &hi);
cl = ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0];
ch = ((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[1];
lo = __umul128(idx[hashBlock], cl, &hi);
al0 += hi;
ah0 += lo;
al[hashBlock] += hi;
ah[hashBlock] += lo;
((uint64_t*)&l0[idx0 & MASK])[0] = al0;
((uint64_t*)&l0[idx0 & MASK])[1] = ah0;
((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[0] = al[hashBlock];
((uint64_t*) &l[hashBlock][idx[hashBlock] & MASK])[1] = ah[hashBlock];
ah0 ^= ch;
al0 ^= cl;
idx0 = al0;
ah[hashBlock] ^= ch;
al[hashBlock] ^= cl;
idx[hashBlock] = al[hashBlock];
}
}
cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) ctx->memory, (__m128i*) ctx->state0);
keccakf(h0, 24);
extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, static_cast<char*>(output));
}
for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l[hashBlock], (__m128i*) h[hashBlock]);
keccakf(h[hashBlock], 24);
extra_hashes[ctx->state[hashBlock][0] & 3](ctx->state[hashBlock], 200,
static_cast<char*>(output) + hashBlock * 32);
}
}
};
template<size_t ITERATIONS, size_t MEM, size_t MASK, bool SOFT_AES>
inline void cryptonight_double_hash(const void *__restrict__ input, size_t size, void *__restrict__ output, struct cryptonight_ctx *__restrict__ ctx)
class CryptoNightMultiHash<ITERATIONS, MEM, MASK, SOFT_AES, 1>
{
keccak((const uint8_t *) input, (int) size, ctx->state0, 200);
keccak((const uint8_t *) input + size, (int) size, ctx->state1, 200);
public:
inline static void hash(const void* __restrict__ input,
size_t size,
void* __restrict__ output,
cryptonight_ctx* __restrict__ ctx)
{
const uint8_t* l;
uint64_t* h;
uint64_t al;
uint64_t ah;
__m128i bx;
uint64_t idx;
keccak(static_cast<const uint8_t*>(input), (int) size, ctx->state[0], 200);
l = ctx->memory;
h = reinterpret_cast<uint64_t*>(ctx->state[0]);
cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h, (__m128i*) l);
al = h[0] ^ h[4];
ah = h[1] ^ h[5];
bx = _mm_set_epi64x(h[3] ^ h[7], h[2] ^ h[6]);
idx = h[0] ^ h[4];
for (size_t i = 0; i < ITERATIONS; i++) {
__m128i cx = _mm_load_si128((__m128i*) &l[idx & MASK]);
if (SOFT_AES) {
cx = soft_aesenc(cx, _mm_set_epi64x(ah, al));
} else {
cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah, al));
}
_mm_store_si128((__m128i*) &l[idx & MASK], _mm_xor_si128(bx, cx));
idx = EXTRACT64(cx);
bx = cx;
uint64_t hi, lo, cl, ch;
cl = ((uint64_t*) &l[idx & MASK])[0];
ch = ((uint64_t*) &l[idx & MASK])[1];
lo = __umul128(idx, cl, &hi);
al += hi;
ah += lo;
((uint64_t*) &l[idx & MASK])[0] = al;
((uint64_t*) &l[idx & MASK])[1] = ah;
ah ^= ch;
al ^= cl;
idx = al;
}
cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l, (__m128i*) h);
keccakf(h, 24);
extra_hashes[ctx->state[0][0] & 3](ctx->state[0], 200, static_cast<char*>(output));
}
};
template<size_t ITERATIONS, size_t MEM, size_t MASK, bool SOFT_AES>
class CryptoNightMultiHash<ITERATIONS, MEM, MASK, SOFT_AES, 2>
{
public:
inline static void hash(const void* __restrict__ input,
size_t size,
void* __restrict__ output,
cryptonight_ctx* __restrict__ ctx)
{
keccak((const uint8_t*) input, (int) size, ctx->state[0], 200);
keccak((const uint8_t*) input + size, (int) size, ctx->state[1], 200);
const uint8_t* l0 = ctx->memory;
const uint8_t* l1 = ctx->memory + MEM;
uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx->state0);
uint64_t* h1 = reinterpret_cast<uint64_t*>(ctx->state1);
uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx->state[0]);
uint64_t* h1 = reinterpret_cast<uint64_t*>(ctx->state[1]);
cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h0, (__m128i*) l0);
cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h1, (__m128i*) l1);
uint64_t al0 = h0[0] ^ h0[4];
uint64_t al1 = h1[0] ^ h1[4];
uint64_t ah0 = h0[1] ^ h0[5];
uint64_t ah1 = h1[1] ^ h1[5];
uint64_t al0 = h0[0] ^h0[4];
uint64_t al1 = h1[0] ^h1[4];
uint64_t ah0 = h0[1] ^h0[5];
uint64_t ah1 = h1[1] ^h1[5];
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
uint64_t idx0 = h0[0] ^ h0[4];
uint64_t idx1 = h1[0] ^ h1[4];
uint64_t idx0 = h0[0] ^h0[4];
uint64_t idx1 = h1[0] ^h1[4];
for (size_t i = 0; i < ITERATIONS; i++) {
__m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & MASK]);
__m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & MASK]);
__m128i cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]);
__m128i cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]);
if (SOFT_AES) {
cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0));
cx1 = soft_aesenc(cx1, _mm_set_epi64x(ah1, al1));
}
else {
} else {
cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0));
cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1));
}
_mm_store_si128((__m128i *) &l0[idx0 & MASK], _mm_xor_si128(bx0, cx0));
_mm_store_si128((__m128i *) &l1[idx1 & MASK], _mm_xor_si128(bx1, cx1));
_mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx0, cx0));
_mm_store_si128((__m128i*) &l1[idx1 & MASK], _mm_xor_si128(bx1, cx1));
idx0 = EXTRACT64(cx0);
idx1 = EXTRACT64(cx1);
@ -444,8 +543,485 @@ inline void cryptonight_double_hash(const void *__restrict__ input, size_t size,
keccakf(h0, 24);
keccakf(h1, 24);
extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, static_cast<char*>(output));
extra_hashes[ctx->state1[0] & 3](ctx->state1, 200, static_cast<char*>(output) + 32);
}
extra_hashes[ctx->state[0][0] & 3](ctx->state[0], 200, static_cast<char*>(output));
extra_hashes[ctx->state[1][0] & 3](ctx->state[1], 200, static_cast<char*>(output) + 32);
}
};
template<size_t ITERATIONS, size_t MEM, size_t MASK, bool SOFT_AES>
class CryptoNightMultiHash<ITERATIONS, MEM, MASK, SOFT_AES, 3>
{
public:
inline static void hash(const void* __restrict__ input,
size_t size,
void* __restrict__ output,
cryptonight_ctx* __restrict__ ctx)
{
keccak((const uint8_t*) input, (int) size, ctx->state[0], 200);
keccak((const uint8_t*) input + size, (int) size, ctx->state[1], 200);
keccak((const uint8_t*) input + 2 * size, (int) size, ctx->state[2], 200);
const uint8_t* l0 = ctx->memory;
const uint8_t* l1 = ctx->memory + MEM;
const uint8_t* l2 = ctx->memory + 2 * MEM;
uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx->state[0]);
uint64_t* h1 = reinterpret_cast<uint64_t*>(ctx->state[1]);
uint64_t* h2 = reinterpret_cast<uint64_t*>(ctx->state[2]);
cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h0, (__m128i*) l0);
cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h1, (__m128i*) l1);
cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h2, (__m128i*) l2);
uint64_t al0 = h0[0] ^h0[4];
uint64_t al1 = h1[0] ^h1[4];
uint64_t al2 = h2[0] ^h2[4];
uint64_t ah0 = h0[1] ^h0[5];
uint64_t ah1 = h1[1] ^h1[5];
uint64_t ah2 = h2[1] ^h2[5];
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
__m128i bx2 = _mm_set_epi64x(h2[3] ^ h2[7], h2[2] ^ h2[6]);
uint64_t idx0 = h0[0] ^h0[4];
uint64_t idx1 = h1[0] ^h1[4];
uint64_t idx2 = h2[0] ^h2[4];
for (size_t i = 0; i < ITERATIONS; i++) {
__m128i cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]);
__m128i cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]);
__m128i cx2 = _mm_load_si128((__m128i*) &l2[idx2 & MASK]);
if (SOFT_AES) {
cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0));
cx1 = soft_aesenc(cx1, _mm_set_epi64x(ah1, al1));
cx2 = soft_aesenc(cx2, _mm_set_epi64x(ah2, al2));
} else {
cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0));
cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1));
cx2 = _mm_aesenc_si128(cx2, _mm_set_epi64x(ah2, al2));
}
_mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx0, cx0));
_mm_store_si128((__m128i*) &l1[idx1 & MASK], _mm_xor_si128(bx1, cx1));
_mm_store_si128((__m128i*) &l2[idx2 & MASK], _mm_xor_si128(bx2, cx2));
idx0 = EXTRACT64(cx0);
idx1 = EXTRACT64(cx1);
idx2 = EXTRACT64(cx2);
bx0 = cx0;
bx1 = cx1;
bx2 = cx2;
uint64_t hi, lo, cl, ch;
cl = ((uint64_t*) &l0[idx0 & MASK])[0];
ch = ((uint64_t*) &l0[idx0 & MASK])[1];
lo = __umul128(idx0, cl, &hi);
al0 += hi;
ah0 += lo;
((uint64_t*) &l0[idx0 & MASK])[0] = al0;
((uint64_t*) &l0[idx0 & MASK])[1] = ah0;
ah0 ^= ch;
al0 ^= cl;
idx0 = al0;
cl = ((uint64_t*) &l1[idx1 & MASK])[0];
ch = ((uint64_t*) &l1[idx1 & MASK])[1];
lo = __umul128(idx1, cl, &hi);
al1 += hi;
ah1 += lo;
((uint64_t*) &l1[idx1 & MASK])[0] = al1;
((uint64_t*) &l1[idx1 & MASK])[1] = ah1;
ah1 ^= ch;
al1 ^= cl;
idx1 = al1;
cl = ((uint64_t*) &l2[idx2 & MASK])[0];
ch = ((uint64_t*) &l2[idx2 & MASK])[1];
lo = __umul128(idx2, cl, &hi);
al2 += hi;
ah2 += lo;
((uint64_t*) &l2[idx2 & MASK])[0] = al2;
((uint64_t*) &l2[idx2 & MASK])[1] = ah2;
ah2 ^= ch;
al2 ^= cl;
idx2 = al2;
}
cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l0, (__m128i*) h0);
cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l1, (__m128i*) h1);
cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l2, (__m128i*) h2);
keccakf(h0, 24);
keccakf(h1, 24);
keccakf(h2, 24);
extra_hashes[ctx->state[0][0] & 3](ctx->state[0], 200, static_cast<char*>(output));
extra_hashes[ctx->state[1][0] & 3](ctx->state[1], 200, static_cast<char*>(output) + 32);
extra_hashes[ctx->state[2][0] & 3](ctx->state[2], 200, static_cast<char*>(output) + 64);
}
};
template<size_t ITERATIONS, size_t MEM, size_t MASK, bool SOFT_AES>
class CryptoNightMultiHash<ITERATIONS, MEM, MASK, SOFT_AES, 4>
{
public:
inline static void hash(const void* __restrict__ input,
size_t size,
void* __restrict__ output,
cryptonight_ctx* __restrict__ ctx)
{
keccak((const uint8_t*) input, (int) size, ctx->state[0], 200);
keccak((const uint8_t*) input + size, (int) size, ctx->state[1], 200);
keccak((const uint8_t*) input + 2 * size, (int) size, ctx->state[2], 200);
keccak((const uint8_t*) input + 3 * size, (int) size, ctx->state[3], 200);
const uint8_t* l0 = ctx->memory;
const uint8_t* l1 = ctx->memory + MEM;
const uint8_t* l2 = ctx->memory + 2 * MEM;
const uint8_t* l3 = ctx->memory + 3 * MEM;
uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx->state[0]);
uint64_t* h1 = reinterpret_cast<uint64_t*>(ctx->state[1]);
uint64_t* h2 = reinterpret_cast<uint64_t*>(ctx->state[2]);
uint64_t* h3 = reinterpret_cast<uint64_t*>(ctx->state[3]);
cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h0, (__m128i*) l0);
cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h1, (__m128i*) l1);
cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h2, (__m128i*) l2);
cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h3, (__m128i*) l3);
uint64_t al0 = h0[0] ^h0[4];
uint64_t al1 = h1[0] ^h1[4];
uint64_t al2 = h2[0] ^h2[4];
uint64_t al3 = h3[0] ^h3[4];
uint64_t ah0 = h0[1] ^h0[5];
uint64_t ah1 = h1[1] ^h1[5];
uint64_t ah2 = h2[1] ^h2[5];
uint64_t ah3 = h3[1] ^h3[5];
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
__m128i bx2 = _mm_set_epi64x(h2[3] ^ h2[7], h2[2] ^ h2[6]);
__m128i bx3 = _mm_set_epi64x(h3[3] ^ h3[7], h3[2] ^ h3[6]);
uint64_t idx0 = h0[0] ^h0[4];
uint64_t idx1 = h1[0] ^h1[4];
uint64_t idx2 = h2[0] ^h2[4];
uint64_t idx3 = h3[0] ^h3[4];
for (size_t i = 0; i < ITERATIONS; i++) {
__m128i cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]);
__m128i cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]);
__m128i cx2 = _mm_load_si128((__m128i*) &l2[idx2 & MASK]);
__m128i cx3 = _mm_load_si128((__m128i*) &l3[idx3 & MASK]);
if (SOFT_AES) {
cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0));
cx1 = soft_aesenc(cx1, _mm_set_epi64x(ah1, al1));
cx2 = soft_aesenc(cx2, _mm_set_epi64x(ah2, al2));
cx3 = soft_aesenc(cx3, _mm_set_epi64x(ah3, al3));
} else {
cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0));
cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1));
cx2 = _mm_aesenc_si128(cx2, _mm_set_epi64x(ah2, al2));
cx3 = _mm_aesenc_si128(cx3, _mm_set_epi64x(ah3, al3));
}
_mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx0, cx0));
_mm_store_si128((__m128i*) &l1[idx1 & MASK], _mm_xor_si128(bx1, cx1));
_mm_store_si128((__m128i*) &l2[idx2 & MASK], _mm_xor_si128(bx2, cx2));
_mm_store_si128((__m128i*) &l3[idx3 & MASK], _mm_xor_si128(bx3, cx3));
idx0 = EXTRACT64(cx0);
idx1 = EXTRACT64(cx1);
idx2 = EXTRACT64(cx2);
idx3 = EXTRACT64(cx3);
bx0 = cx0;
bx1 = cx1;
bx2 = cx2;
bx3 = cx3;
uint64_t hi, lo, cl, ch;
cl = ((uint64_t*) &l0[idx0 & MASK])[0];
ch = ((uint64_t*) &l0[idx0 & MASK])[1];
lo = __umul128(idx0, cl, &hi);
al0 += hi;
ah0 += lo;
((uint64_t*) &l0[idx0 & MASK])[0] = al0;
((uint64_t*) &l0[idx0 & MASK])[1] = ah0;
ah0 ^= ch;
al0 ^= cl;
idx0 = al0;
cl = ((uint64_t*) &l1[idx1 & MASK])[0];
ch = ((uint64_t*) &l1[idx1 & MASK])[1];
lo = __umul128(idx1, cl, &hi);
al1 += hi;
ah1 += lo;
((uint64_t*) &l1[idx1 & MASK])[0] = al1;
((uint64_t*) &l1[idx1 & MASK])[1] = ah1;
ah1 ^= ch;
al1 ^= cl;
idx1 = al1;
cl = ((uint64_t*) &l2[idx2 & MASK])[0];
ch = ((uint64_t*) &l2[idx2 & MASK])[1];
lo = __umul128(idx2, cl, &hi);
al2 += hi;
ah2 += lo;
((uint64_t*) &l2[idx2 & MASK])[0] = al2;
((uint64_t*) &l2[idx2 & MASK])[1] = ah2;
ah2 ^= ch;
al2 ^= cl;
idx2 = al2;
cl = ((uint64_t*) &l3[idx3 & MASK])[0];
ch = ((uint64_t*) &l3[idx3 & MASK])[1];
lo = __umul128(idx3, cl, &hi);
al3 += hi;
ah3 += lo;
((uint64_t*) &l3[idx3 & MASK])[0] = al3;
((uint64_t*) &l3[idx3 & MASK])[1] = ah3;
ah3 ^= ch;
al3 ^= cl;
idx3 = al3;
}
cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l0, (__m128i*) h0);
cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l1, (__m128i*) h1);
cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l2, (__m128i*) h2);
cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l3, (__m128i*) h3);
keccakf(h0, 24);
keccakf(h1, 24);
keccakf(h2, 24);
keccakf(h3, 24);
extra_hashes[ctx->state[0][0] & 3](ctx->state[0], 200, static_cast<char*>(output));
extra_hashes[ctx->state[1][0] & 3](ctx->state[1], 200, static_cast<char*>(output) + 32);
extra_hashes[ctx->state[2][0] & 3](ctx->state[2], 200, static_cast<char*>(output) + 64);
extra_hashes[ctx->state[3][0] & 3](ctx->state[3], 200, static_cast<char*>(output) + 96);
}
};
template<size_t ITERATIONS, size_t MEM, size_t MASK, bool SOFT_AES>
class CryptoNightMultiHash<ITERATIONS, MEM, MASK, SOFT_AES, 5>
{
public:
inline static void hash(const void* __restrict__ input,
size_t size,
void* __restrict__ output,
cryptonight_ctx* __restrict__ ctx)
{
keccak((const uint8_t*) input, (int) size, ctx->state[0], 200);
keccak((const uint8_t*) input + size, (int) size, ctx->state[1], 200);
keccak((const uint8_t*) input + 2 * size, (int) size, ctx->state[2], 200);
keccak((const uint8_t*) input + 3 * size, (int) size, ctx->state[3], 200);
keccak((const uint8_t*) input + 4 * size, (int) size, ctx->state[4], 200);
const uint8_t* l0 = ctx->memory;
const uint8_t* l1 = ctx->memory + MEM;
const uint8_t* l2 = ctx->memory + 2 * MEM;
const uint8_t* l3 = ctx->memory + 3 * MEM;
const uint8_t* l4 = ctx->memory + 4 * MEM;
uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx->state[0]);
uint64_t* h1 = reinterpret_cast<uint64_t*>(ctx->state[1]);
uint64_t* h2 = reinterpret_cast<uint64_t*>(ctx->state[2]);
uint64_t* h3 = reinterpret_cast<uint64_t*>(ctx->state[3]);
uint64_t* h4 = reinterpret_cast<uint64_t*>(ctx->state[4]);
cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h0, (__m128i*) l0);
cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h1, (__m128i*) l1);
cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h2, (__m128i*) l2);
cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h3, (__m128i*) l3);
cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h4, (__m128i*) l4);
uint64_t al0 = h0[0] ^h0[4];
uint64_t al1 = h1[0] ^h1[4];
uint64_t al2 = h2[0] ^h2[4];
uint64_t al3 = h3[0] ^h3[4];
uint64_t al4 = h4[0] ^h4[4];
uint64_t ah0 = h0[1] ^h0[5];
uint64_t ah1 = h1[1] ^h1[5];
uint64_t ah2 = h2[1] ^h2[5];
uint64_t ah3 = h3[1] ^h3[5];
uint64_t ah4 = h4[1] ^h4[5];
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
__m128i bx2 = _mm_set_epi64x(h2[3] ^ h2[7], h2[2] ^ h2[6]);
__m128i bx3 = _mm_set_epi64x(h3[3] ^ h3[7], h3[2] ^ h3[6]);
__m128i bx4 = _mm_set_epi64x(h4[3] ^ h4[7], h4[2] ^ h4[6]);
uint64_t idx0 = h0[0] ^h0[4];
uint64_t idx1 = h1[0] ^h1[4];
uint64_t idx2 = h2[0] ^h2[4];
uint64_t idx3 = h3[0] ^h3[4];
uint64_t idx4 = h4[0] ^h4[4];
for (size_t i = 0; i < ITERATIONS; i++) {
__m128i cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]);
__m128i cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]);
__m128i cx2 = _mm_load_si128((__m128i*) &l2[idx2 & MASK]);
__m128i cx3 = _mm_load_si128((__m128i*) &l3[idx3 & MASK]);
__m128i cx4 = _mm_load_si128((__m128i*) &l4[idx4 & MASK]);
if (SOFT_AES) {
cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0));
cx1 = soft_aesenc(cx1, _mm_set_epi64x(ah1, al1));
cx2 = soft_aesenc(cx2, _mm_set_epi64x(ah2, al2));
cx3 = soft_aesenc(cx3, _mm_set_epi64x(ah3, al3));
cx4 = soft_aesenc(cx4, _mm_set_epi64x(ah4, al4));
} else {
cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0));
cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1));
cx2 = _mm_aesenc_si128(cx2, _mm_set_epi64x(ah2, al2));
cx3 = _mm_aesenc_si128(cx3, _mm_set_epi64x(ah3, al3));
cx4 = _mm_aesenc_si128(cx4, _mm_set_epi64x(ah4, al4));
}
_mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx0, cx0));
_mm_store_si128((__m128i*) &l1[idx1 & MASK], _mm_xor_si128(bx1, cx1));
_mm_store_si128((__m128i*) &l2[idx2 & MASK], _mm_xor_si128(bx2, cx2));
_mm_store_si128((__m128i*) &l3[idx3 & MASK], _mm_xor_si128(bx3, cx3));
_mm_store_si128((__m128i*) &l4[idx4 & MASK], _mm_xor_si128(bx4, cx4));
idx0 = EXTRACT64(cx0);
idx1 = EXTRACT64(cx1);
idx2 = EXTRACT64(cx2);
idx3 = EXTRACT64(cx3);
idx4 = EXTRACT64(cx4);
bx0 = cx0;
bx1 = cx1;
bx2 = cx2;
bx3 = cx3;
bx4 = cx4;
uint64_t hi, lo, cl, ch;
cl = ((uint64_t*) &l0[idx0 & MASK])[0];
ch = ((uint64_t*) &l0[idx0 & MASK])[1];
lo = __umul128(idx0, cl, &hi);
al0 += hi;
ah0 += lo;
((uint64_t*) &l0[idx0 & MASK])[0] = al0;
((uint64_t*) &l0[idx0 & MASK])[1] = ah0;
ah0 ^= ch;
al0 ^= cl;
idx0 = al0;
cl = ((uint64_t*) &l1[idx1 & MASK])[0];
ch = ((uint64_t*) &l1[idx1 & MASK])[1];
lo = __umul128(idx1, cl, &hi);
al1 += hi;
ah1 += lo;
((uint64_t*) &l1[idx1 & MASK])[0] = al1;
((uint64_t*) &l1[idx1 & MASK])[1] = ah1;
ah1 ^= ch;
al1 ^= cl;
idx1 = al1;
cl = ((uint64_t*) &l2[idx2 & MASK])[0];
ch = ((uint64_t*) &l2[idx2 & MASK])[1];
lo = __umul128(idx2, cl, &hi);
al2 += hi;
ah2 += lo;
((uint64_t*) &l2[idx2 & MASK])[0] = al2;
((uint64_t*) &l2[idx2 & MASK])[1] = ah2;
ah2 ^= ch;
al2 ^= cl;
idx2 = al2;
cl = ((uint64_t*) &l3[idx3 & MASK])[0];
ch = ((uint64_t*) &l3[idx3 & MASK])[1];
lo = __umul128(idx3, cl, &hi);
al3 += hi;
ah3 += lo;
((uint64_t*) &l3[idx3 & MASK])[0] = al3;
((uint64_t*) &l3[idx3 & MASK])[1] = ah3;
ah3 ^= ch;
al3 ^= cl;
idx3 = al3;
cl = ((uint64_t*) &l4[idx4 & MASK])[0];
ch = ((uint64_t*) &l4[idx4 & MASK])[1];
lo = __umul128(idx4, cl, &hi);
al4 += hi;
ah4 += lo;
((uint64_t*) &l4[idx4 & MASK])[0] = al4;
((uint64_t*) &l4[idx4 & MASK])[1] = ah4;
ah4 ^= ch;
al4 ^= cl;
idx4 = al4;
}
cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l0, (__m128i*) h0);
cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l1, (__m128i*) h1);
cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l2, (__m128i*) h2);
cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l3, (__m128i*) h3);
cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l4, (__m128i*) h4);
keccakf(h0, 24);
keccakf(h1, 24);
keccakf(h2, 24);
keccakf(h3, 24);
keccakf(h4, 24);
extra_hashes[ctx->state[0][0] & 3](ctx->state[0], 200, static_cast<char*>(output));
extra_hashes[ctx->state[1][0] & 3](ctx->state[1], 200, static_cast<char*>(output) + 32);
extra_hashes[ctx->state[2][0] & 3](ctx->state[2], 200, static_cast<char*>(output) + 64);
extra_hashes[ctx->state[3][0] & 3](ctx->state[3], 200, static_cast<char*>(output) + 96);
extra_hashes[ctx->state[4][0] & 3](ctx->state[4], 200, static_cast<char*>(output) + 128);
}
};
#endif /* __CRYPTONIGHT_X86_H__ */

View file

@ -1,7 +1,14 @@
{
"algo": "cryptonight", // cryptonight (default) or cryptonight-lite
"av": 0, // algorithm variation, 0 auto select
"doublehash-thread-mask" : null, // for av=2/4 only, limits doublehash to given threads (mask), mask "0x3" means run doublehash on thread 0 and 1 only (default: all threads)
"av": null, // DEPRECATED: algorithm variation, (0 auto,
// 1 -> (aesni=1, multihash-factor=1),
// 2 -> (aesni=1, multihash-factor=2),
// 3 -> (aesni=2, multihash-factor=1),
// 4 -> (aesni=2, multihash-factor=2))
"aesni": 0, // selection of AES-NI mode (0 auto, 1 on, 2 off)
"threads": 0, // number of miner threads (not set or 0 enables automatic selection of optimal thread count)
"multihash-factor": 0, // number of hash blocks to process at a time (not set or 0 enables automatic selection of optimal number of hash blocks)
"multihash-thread-mask" : null, // for multihash-factors>0 only, limits multihash to given threads (mask), mask "0x3" means run multihash on thread 0 and 1 only (default: all threads)
"background": false, // true to run the miner in the background (Windows only, for *nix plase use screen/tmux or systemd service instead)
"colors": true, // false to disable colored output
"cpu-affinity": null, // set process affinity to CPU core(s), mask "0x3" for cores 0 and 1
@ -14,7 +21,6 @@
"retry-pause": 5, // time to pause between retries
"safe": false, // true to safe adjust threads and av settings for current CPU
"syslog": false, // use system log for output messages
"threads": null, // number of miner threads
"pools": [
{
"url": "", // URL of mining server

View file

@ -26,24 +26,24 @@
#define __VERSION_H__
#ifdef XMRIG_CC_SERVER
#define APP_ID "xmrigCC"
#define APP_ID "XMRigCC"
#define APP_NAME "XMRigCC"
#define APP_DESC "XMRigCC Command'n'Control Server"
#define APP_COPYRIGHT "Copyright (C) 2017- BenDr0id"
# else
#define APP_ID "xmrigCC"
#define APP_ID "XMRigCC"
#define APP_NAME "XMRigCC"
#define APP_DESC "XMRigCC CPU miner"
#define APP_COPYRIGHT "Copyright (C) 2017- BenDr0id"
#endif
#define APP_VERSION "1.3.2 (based on XMRig 2.4.3)"
#define APP_VERSION "1.4.0 (based on XMRig 2.4.4)"
#define APP_DOMAIN ""
#define APP_SITE "https://github.com/Bendr0id/xmrigCC"
#define APP_KIND "cpu"
#define APP_VER_MAJOR 1
#define APP_VER_MINOR 3
#define APP_VER_BUILD 2
#define APP_VER_MINOR 4
#define APP_VER_BUILD 0
#define APP_VER_REV 0
#ifndef NDEBUG

View file

@ -1,152 +0,0 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2016-2017 XMRig <support@xmrig.com>
*
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <thread>
#include "crypto/CryptoNight.h"
#include "workers/DoubleWorker.h"
#include "workers/Workers.h"
class DoubleWorker::State
{
public:
inline State() :
nonce1(0),
nonce2(0)
{}
Job job;
uint32_t nonce1;
uint32_t nonce2;
uint8_t blob[84 * 2];
};
DoubleWorker::DoubleWorker(Handle *handle)
: Worker(handle)
{
m_state = new State();
m_pausedState = new State();
}
DoubleWorker::~DoubleWorker()
{
delete m_state;
delete m_pausedState;
}
void DoubleWorker::start()
{
while (Workers::sequence() > 0) {
if (Workers::isPaused()) {
do {
std::this_thread::sleep_for(std::chrono::milliseconds(200));
}
while (Workers::isPaused());
if (Workers::sequence() == 0) {
break;
}
consumeJob();
}
while (!Workers::isOutdated(m_sequence)) {
if ((m_count & 0xF) == 0) {
storeStats();
}
m_count += 2;
*Job::nonce(m_state->blob) = ++m_state->nonce1;
*Job::nonce(m_state->blob + m_state->job.size()) = ++m_state->nonce2;
CryptoNight::hashDouble(m_state->blob, m_state->job.size(), m_hash, m_ctx);
if (*reinterpret_cast<uint64_t*>(m_hash + 24) < m_state->job.target()) {
Workers::submit(JobResult(m_state->job.poolId(), m_state->job.id(), m_state->nonce1, m_hash, m_state->job.diff()), m_id);
}
if (*reinterpret_cast<uint64_t*>(m_hash + 32 + 24) < m_state->job.target()) {
Workers::submit(JobResult(m_state->job.poolId(), m_state->job.id(), m_state->nonce2, m_hash + 32, m_state->job.diff()), m_id);
}
std::this_thread::yield();
}
consumeJob();
}
}
bool DoubleWorker::resume(const Job &job)
{
if (m_state->job.poolId() == -1 && job.poolId() >= 0 && job.id() == m_pausedState->job.id()) {
*m_state = *m_pausedState;
return true;
}
return false;
}
void DoubleWorker::consumeJob()
{
Job job = Workers::job();
m_sequence = Workers::sequence();
if (m_state->job == job) {
return;
}
save(job);
if (resume(job)) {
return;
}
m_state->job = std::move(job);
memcpy(m_state->blob, m_state->job.blob(), m_state->job.size());
memcpy(m_state->blob + m_state->job.size(), m_state->job.blob(), m_state->job.size());
if (m_state->job.isNicehash()) {
m_state->nonce1 = (*Job::nonce(m_state->blob) & 0xff000000U) + (0xffffffU / (m_threads * 2) * m_id);
m_state->nonce2 = (*Job::nonce(m_state->blob + m_state->job.size()) & 0xff000000U) + (0xffffffU / (m_threads * 2) * (m_id + m_threads));
}
else {
m_state->nonce1 = 0xffffffffU / (m_threads * 2) * m_id;
m_state->nonce2 = 0xffffffffU / (m_threads * 2) * (m_id + m_threads);
}
}
void DoubleWorker::save(const Job &job)
{
if (job.poolId() == -1 && m_state->job.poolId() >= 0) {
*m_pausedState = *m_state;
}
}

188
src/workers/MultiWorker.cpp Normal file
View file

@ -0,0 +1,188 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2016-2017 XMRig <support@xmrig.com>
* Copyright 2018 Sebastian Stolzenberg <https://github.com/sebastianstolzenberg>
*
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <thread>
#include "crypto/CryptoNight.h"
#include "workers/MultiWorker.h"
#include "workers/Workers.h"
#include "Mem.h"
class MultiWorker : public Worker
{
public:
explicit MultiWorker(Handle *handle, size_t hashMultiplier);
~MultiWorker();
void start() override;
private:
bool resume(const Job &job);
void consumeJob();
void save(const Job &job);
class State;
uint8_t* m_hash;
State *m_state;
State *m_pausedState;
size_t m_hashMultiplier;
};
class MultiWorker::State
{
public:
State(size_t hashMultiplier)
{
nonces = new uint32_t[hashMultiplier];
blob = new uint8_t[84 * hashMultiplier];
for(size_t i=0; i<hashMultiplier; ++i) {
nonces[i] = 0;
}
}
~State() {
delete[] blob;
delete[] nonces;
}
Job job;
uint32_t* nonces;
uint8_t* blob;
};
MultiWorker::MultiWorker(Handle *handle, size_t hashMultiplier)
: Worker(handle),
m_hash(new uint8_t[32 * hashMultiplier]),
m_state(new MultiWorker::State(hashMultiplier)),
m_pausedState(new MultiWorker::State(hashMultiplier)),
m_hashMultiplier(hashMultiplier)
{
}
MultiWorker::~MultiWorker()
{
delete[] m_hash;
delete m_state;
delete m_pausedState;
}
void MultiWorker::start()
{
while (Workers::sequence() > 0) {
if (Workers::isPaused()) {
do {
std::this_thread::sleep_for(std::chrono::milliseconds(200));
}
while (Workers::isPaused());
if (Workers::sequence() == 0) {
break;
}
consumeJob();
}
while (!Workers::isOutdated(m_sequence)) {
if ((m_count & 0xF) == 0) {
storeStats();
}
m_count += m_hashMultiplier;
for (size_t i=0; i < m_hashMultiplier; ++i) {
*Job::nonce(m_state->blob + i * m_state->job.size()) = ++m_state->nonces[i];
}
CryptoNight::hash(m_hashMultiplier, m_state->blob, m_state->job.size(), m_hash, m_ctx);
for (size_t i=0; i < m_hashMultiplier; ++i) {
if (*reinterpret_cast<uint64_t *>(m_hash + 24 + i * 32) < m_state->job.target()) {
Workers::submit(JobResult(m_state->job.poolId(), m_state->job.id(), m_state->nonces[i], m_hash + i * 32,
m_state->job.diff()), m_id);
}
}
std::this_thread::yield();
}
consumeJob();
}
}
bool MultiWorker::resume(const Job &job)
{
if (m_state->job.poolId() == -1 && job.poolId() >= 0 && job.id() == m_pausedState->job.id()) {
*m_state = *m_pausedState;
return true;
}
return false;
}
void MultiWorker::consumeJob()
{
Job job = Workers::job();
m_sequence = Workers::sequence();
if (m_state->job == job) {
return;
}
save(job);
if (resume(job)) {
return;
}
m_state->job = std::move(job);
for (size_t i=0; i < m_hashMultiplier; ++i) {
memcpy(m_state->blob + i * m_state->job.size(), m_state->job.blob(), m_state->job.size());
if (m_state->job.isNicehash()) {
m_state->nonces[i] = (*Job::nonce(m_state->blob + i * m_state->job.size()) & 0xff000000U) +
(0xffffffU / (m_threads * Mem::hashFactor()) * (m_id + i * m_threads));
}
else {
m_state->nonces[i] = std::numeric_limits<uint32_t>::max() / (m_threads *
Mem::hashFactor()) *
(m_id + i * m_threads);
}
}
}
void MultiWorker::save(const Job &job)
{
if (job.poolId() == -1 && m_state->job.poolId() >= 0) {
*m_pausedState = *m_state;
}
}
Worker* createMultiWorker(size_t numHashes, Handle *handle) {
return new MultiWorker(handle, numHashes);
}

View file

@ -5,6 +5,7 @@
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2016-2017 XMRig <support@xmrig.com>
* Copyright 2018 Sebastian Stolzenberg <https://github.com/sebastianstolzenberg>
*
*
* This program is free software: you can redistribute it and/or modify
@ -21,10 +22,11 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __SINGLEWORKER_H__
#define __SINGLEWORKER_H__
#ifndef __MULTIWORKER_H__
#define __MULTIWORKER_H__
#include "align.h"
#include "net/Job.h"
#include "net/JobResult.h"
#include "workers/Worker.h"
@ -32,23 +34,7 @@
class Handle;
class SingleWorker : public Worker
{
public:
SingleWorker(Handle *handle);
void start() override;
private:
bool resume(const Job &job);
void consumeJob();
void save(const Job &job);
Job m_job;
Job m_paused;
JobResult m_result;
};
Worker* createMultiWorker(size_t numHashes, Handle *handle);
#endif /* __SINGLEWORKER_H__ */

View file

@ -1,121 +0,0 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2016-2017 XMRig <support@xmrig.com>
*
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <thread>
#include "crypto/CryptoNight.h"
#include "workers/SingleWorker.h"
#include "workers/Workers.h"
SingleWorker::SingleWorker(Handle *handle)
: Worker(handle)
{
}
void SingleWorker::start()
{
while (Workers::sequence() > 0) {
if (Workers::isPaused()) {
do {
std::this_thread::sleep_for(std::chrono::milliseconds(200));
}
while (Workers::isPaused());
if (Workers::sequence() == 0) {
break;
}
consumeJob();
}
while (!Workers::isOutdated(m_sequence)) {
if ((m_count & 0xF) == 0) {
storeStats();
}
m_count++;
*m_job.nonce() = ++m_result.nonce;
CryptoNight::hash(m_job.blob(), m_job.size(), m_result.result, m_ctx);
if (*reinterpret_cast<uint64_t*>(m_result.result + 24) < m_job.target()) {
Workers::submit(m_result, m_id);
}
std::this_thread::yield();
}
consumeJob();
}
}
bool SingleWorker::resume(const Job &job)
{
if (m_job.poolId() == -1 && job.poolId() >= 0 && job.id() == m_paused.id()) {
m_job = m_paused;
m_result = m_job;
m_result.nonce = *m_job.nonce();
return true;
}
return false;
}
void SingleWorker::consumeJob()
{
Job job = Workers::job();
m_sequence = Workers::sequence();
if (m_job == job) {
return;
}
save(job);
if (resume(job)) {
return;
}
m_job = std::move(job);
m_result = m_job;
if (m_job.isNicehash()) {
m_result.nonce = (*m_job.nonce() & 0xff000000U) + (0xffffffU / (m_threads * 2) * m_id);
}
else {
m_result.nonce = 0xffffffffU / (m_threads * 2) * m_id;
}
}
void SingleWorker::save(const Job &job)
{
if (job.poolId() == -1 && m_job.poolId() >= 0) {
m_paused = m_job;
}
}

View file

@ -30,11 +30,9 @@
#include "api/Api.h"
#include "interfaces/IJobResultListener.h"
#include "Mem.h"
#include "Options.h"
#include "workers/DoubleWorker.h"
#include "workers/MultiWorker.h"
#include "workers/Handle.h"
#include "workers/Hashrate.h"
#include "workers/SingleWorker.h"
#include "workers/Workers.h"
@ -118,7 +116,7 @@ void Workers::start(int64_t affinity, int priority)
uv_timer_start(&m_timer, Workers::onTick, 500, 500);
for (int i = 0; i < threads; ++i) {
Handle *handle = new Handle(i, threads, affinity, priority);
auto handle = new Handle(i, threads, affinity, priority);
m_workers.push_back(handle);
handle->start(Workers::onReady);
}
@ -134,8 +132,8 @@ void Workers::stop()
m_paused = 0;
m_sequence = 0;
for (size_t i = 0; i < m_workers.size(); ++i) {
m_workers[i]->join();
for (auto worker : m_workers) {
worker->join();
}
}
@ -153,13 +151,7 @@ void Workers::submit(const JobResult &result, int threadId)
void Workers::onReady(void *arg)
{
auto handle = static_cast<Handle*>(arg);
if (Mem::isDoubleHash(handle->threadId())) {
handle->setWorker(new DoubleWorker(handle));
}
else {
handle->setWorker(new SingleWorker(handle));
}
handle->setWorker(createMultiWorker(Mem::getThreadHashFactor(handle->threadId()), handle));
handle->worker()->start();
}
@ -185,12 +177,12 @@ void Workers::onResult(uv_async_t *handle)
void Workers::onTick(uv_timer_t *handle)
{
for (Handle *handle : m_workers) {
if (!handle->worker()) {
for (auto workerHandle : m_workers) {
if (!workerHandle->worker()) {
return;
}
m_hashrate->add(handle->threadId(), handle->worker()->hashCount(), handle->worker()->timestamp());
m_hashrate->add(workerHandle->threadId(), workerHandle->worker()->hashCount(), workerHandle->worker()->timestamp());
}
if ((m_ticks++ & 0xF) == 0) {

View file

@ -4,6 +4,7 @@ cmake_minimum_required(VERSION 3.0)
include(CTest)
add_subdirectory(unity)
add_subdirectory(cryptonight)
add_subdirectory(cryptonight_lite)
add_subdirectory(autoconf)
#add_subdirectory(cryptonight)
#add_subdirectory(cryptonight_lite)
#add_subdirectory(autoconf)
add_subdirectory(cpu)

15
test/cpu/CMakeLists.txt Normal file
View file

@ -0,0 +1,15 @@
set(SOURCES
test_cpu.cpp
../../src/Cpu.cpp
)
include_directories(../../src)
add_executable(test_cpu ${SOURCES})
target_link_libraries(test_cpu unity xmrig_os_dependencies ${EXTRA_LIBS})
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maes -fno-strict-aliasing")
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -O2")
add_definitions(-DBUILD_TEST)
add_test(test_cpu test_cpu)

281
test/cpu/test_cpu.cpp Normal file
View file

@ -0,0 +1,281 @@
#include <unity.h>
#include <libcpuid.h>
#include <iostream>
#include "Options.h"
#include "Cpu.h"
struct cpu_id_t mockCpuId;
int cpuid_get_raw_data(struct cpu_raw_data_t* data)
{
return 0;
}
int cpu_identify(struct cpu_raw_data_t* raw, struct cpu_id_t* data)
{
memcpy(data, &mockCpuId, sizeof(struct cpu_id_t));
return 0;
}
void setMockedCpu(size_t numProcessors, size_t numCores, size_t numPusPerCore, size_t l3Cache)
{
strcpy(mockCpuId.brand_str, "CPU Test Brand");
mockCpuId.vendor = VENDOR_INTEL;
mockCpuId.num_cores = numCores;
mockCpuId.num_logical_cpus = numCores * numPusPerCore;
mockCpuId.total_logical_cpus = mockCpuId.num_logical_cpus * numProcessors;
mockCpuId.l3_cache = l3Cache;
mockCpuId.l2_cache = 128;
Cpu::init();
}
std::pair<size_t, size_t> testOptimize(size_t numThreads, size_t hashFactor, Options::Algo algo, bool safeMode,
size_t maxCpuUsage = 100)
{
Cpu::optimizeParameters(numThreads, hashFactor, algo, maxCpuUsage, safeMode);
return std::pair<size_t, size_t>(numThreads, hashFactor);
}
class Expected
{
public:
typedef std::pair<size_t, size_t> value_type;
public:
Expected(size_t threadCount, size_t hashFactor) :
m_expectedValues(threadCount,
std::min(hashFactor,
static_cast<size_t>(MAX_NUM_HASH_BLOCKS)))
{
}
bool operator==(const value_type& actualValues)
{
if (m_expectedValues != actualValues)
{
std::cout << "Mismatch:"
<< " expected=(" << m_expectedValues.first << "," << m_expectedValues.second <<")"
<< " actual=(" << actualValues.first << "," << actualValues.second << ")" << std::endl;
}
return m_expectedValues == actualValues;
}
private:
value_type m_expectedValues;
};
void test_cpu_optimizeparameters_p1_c1_v1_m1(void)
{
const size_t NUM_PROCESSORS = 1;
const size_t NUM_CORES = 1;
const size_t NUM_PUS_PER_CORE = 1;
const size_t L3_CACHE = 1024;
setMockedCpu(NUM_PROCESSORS, NUM_CORES, NUM_PUS_PER_CORE, L3_CACHE);
TEST_ASSERT_EQUAL_UINT32(Cpu::availableCache(), L3_CACHE);
TEST_ASSERT(Expected(1,1) == testOptimize(0, 0, Options::ALGO_CRYPTONIGHT, false));
TEST_ASSERT(Expected(1,1) == testOptimize(0, 0, Options::ALGO_CRYPTONIGHT_LITE, false));
TEST_ASSERT(Expected(1,1) == testOptimize(1, 0, Options::ALGO_CRYPTONIGHT, false));
TEST_ASSERT(Expected(1,1) == testOptimize(1, 0, Options::ALGO_CRYPTONIGHT_LITE, false));
TEST_ASSERT(Expected(1,1) == testOptimize(0, 1, Options::ALGO_CRYPTONIGHT, false));
TEST_ASSERT(Expected(1,1) == testOptimize(0, 1, Options::ALGO_CRYPTONIGHT_LITE, false));
TEST_ASSERT(Expected(1,1) == testOptimize(1, 1, Options::ALGO_CRYPTONIGHT, false));
TEST_ASSERT(Expected(1,1) == testOptimize(1, 1, Options::ALGO_CRYPTONIGHT_LITE, false));
TEST_ASSERT(Expected(10,1) == testOptimize(10, 1, Options::ALGO_CRYPTONIGHT, false));
TEST_ASSERT(Expected(10,1) == testOptimize(10, 1, Options::ALGO_CRYPTONIGHT_LITE, false));
TEST_ASSERT(Expected(1,1) == testOptimize(10, 1, Options::ALGO_CRYPTONIGHT, true));
TEST_ASSERT(Expected(1,1) == testOptimize(10, 1, Options::ALGO_CRYPTONIGHT_LITE, true));
TEST_ASSERT(Expected(1,10) == testOptimize(1, 10, Options::ALGO_CRYPTONIGHT, false));
TEST_ASSERT(Expected(1,10) == testOptimize(1, 10, Options::ALGO_CRYPTONIGHT_LITE, false));
TEST_ASSERT(Expected(1,1) == testOptimize(1, 10, Options::ALGO_CRYPTONIGHT, true));
TEST_ASSERT(Expected(1,1) == testOptimize(1, 10, Options::ALGO_CRYPTONIGHT_LITE, true));
TEST_ASSERT(Expected(10,10) == testOptimize(10, 10, Options::ALGO_CRYPTONIGHT, false));
TEST_ASSERT(Expected(10,10) == testOptimize(10, 10, Options::ALGO_CRYPTONIGHT_LITE, false));
TEST_ASSERT(Expected(1,1) == testOptimize(10, 10, Options::ALGO_CRYPTONIGHT, true));
TEST_ASSERT(Expected(1,1) == testOptimize(10, 10, Options::ALGO_CRYPTONIGHT_LITE, true));
}
void test_cpu_optimizeparameters_p1_c1_v2_m2(void)
{
const size_t NUM_PROCESSORS = 1;
const size_t NUM_CORES = 1;
const size_t NUM_PUS_PER_CORE = 2;
const size_t L3_CACHE = 2048;
setMockedCpu(NUM_PROCESSORS, NUM_CORES, NUM_PUS_PER_CORE, L3_CACHE);
TEST_ASSERT_EQUAL_UINT32(Cpu::availableCache(), L3_CACHE);
TEST_ASSERT(Expected(1,1) == testOptimize(0, 0, Options::ALGO_CRYPTONIGHT, false));
TEST_ASSERT(Expected(2,1) == testOptimize(0, 0, Options::ALGO_CRYPTONIGHT_LITE, false));
TEST_ASSERT(Expected(1,1) == testOptimize(1, 0, Options::ALGO_CRYPTONIGHT, false));
TEST_ASSERT(Expected(1,2) == testOptimize(1, 0, Options::ALGO_CRYPTONIGHT_LITE, false));
TEST_ASSERT(Expected(1,1) == testOptimize(0, 1, Options::ALGO_CRYPTONIGHT, false));
TEST_ASSERT(Expected(2,1) == testOptimize(0, 1, Options::ALGO_CRYPTONIGHT_LITE, false));
TEST_ASSERT(Expected(1,1) == testOptimize(1, 1, Options::ALGO_CRYPTONIGHT, false));
TEST_ASSERT(Expected(1,1) == testOptimize(1, 1, Options::ALGO_CRYPTONIGHT_LITE, false));
TEST_ASSERT(Expected(10,1) == testOptimize(10, 1, Options::ALGO_CRYPTONIGHT, false));
TEST_ASSERT(Expected(10,1) == testOptimize(10, 1, Options::ALGO_CRYPTONIGHT_LITE, false));
TEST_ASSERT(Expected(1,1) == testOptimize(10, 1, Options::ALGO_CRYPTONIGHT, true));
TEST_ASSERT(Expected(2,1) == testOptimize(10, 1, Options::ALGO_CRYPTONIGHT_LITE, true));
TEST_ASSERT(Expected(1,10) == testOptimize(1, 10, Options::ALGO_CRYPTONIGHT, false));
TEST_ASSERT(Expected(1,10) == testOptimize(1, 10, Options::ALGO_CRYPTONIGHT_LITE, false));
TEST_ASSERT(Expected(1,1) == testOptimize(1, 10, Options::ALGO_CRYPTONIGHT, true));
TEST_ASSERT(Expected(1,2) == testOptimize(1, 10, Options::ALGO_CRYPTONIGHT_LITE, true));
TEST_ASSERT(Expected(10,10) == testOptimize(10, 10, Options::ALGO_CRYPTONIGHT, false));
TEST_ASSERT(Expected(10,10) == testOptimize(10, 10, Options::ALGO_CRYPTONIGHT_LITE, false));
TEST_ASSERT(Expected(1,1) == testOptimize(10, 10, Options::ALGO_CRYPTONIGHT, true));
TEST_ASSERT(Expected(2,1) == testOptimize(10, 10, Options::ALGO_CRYPTONIGHT_LITE, true));
}
void test_cpu_optimizeparameters_p1_c4_v2_m8(void)
{
const size_t NUM_PROCESSORS = 1;
const size_t NUM_CORES = 4;
const size_t NUM_PUS_PER_CORE = 2;
const size_t L3_CACHE = 8 * 1024;
setMockedCpu(NUM_PROCESSORS, NUM_CORES, NUM_PUS_PER_CORE, L3_CACHE);
TEST_ASSERT_EQUAL_UINT32(Cpu::availableCache(), L3_CACHE);
TEST_ASSERT(Expected(4,1) == testOptimize(0, 0, Options::ALGO_CRYPTONIGHT, false));
TEST_ASSERT(Expected(4,1) == testOptimize(0, 0, Options::ALGO_CRYPTONIGHT, false, 80));
TEST_ASSERT(Expected(3,1) == testOptimize(0, 0, Options::ALGO_CRYPTONIGHT, false, 48));
TEST_ASSERT(Expected(3,1) == testOptimize(0, 0, Options::ALGO_CRYPTONIGHT, false, 38));
TEST_ASSERT(Expected(2,2) == testOptimize(0, 0, Options::ALGO_CRYPTONIGHT, false, 37));
TEST_ASSERT(Expected(2,2) == testOptimize(0, 0, Options::ALGO_CRYPTONIGHT, false, 25));
TEST_ASSERT(Expected(1,4) == testOptimize(0, 0, Options::ALGO_CRYPTONIGHT, false, 24));
TEST_ASSERT(Expected(1,4) == testOptimize(0, 0, Options::ALGO_CRYPTONIGHT, false, 1));
TEST_ASSERT(Expected(1,4) == testOptimize(0, 0, Options::ALGO_CRYPTONIGHT, false, 0));
TEST_ASSERT(Expected(8,1) == testOptimize(0, 0, Options::ALGO_CRYPTONIGHT_LITE, false));
TEST_ASSERT(Expected(1,4) == testOptimize(1, 0, Options::ALGO_CRYPTONIGHT, false));
TEST_ASSERT(Expected(1,8) == testOptimize(1, 0, Options::ALGO_CRYPTONIGHT_LITE, false));
TEST_ASSERT(Expected(2,2) == testOptimize(2, 0, Options::ALGO_CRYPTONIGHT, false));
TEST_ASSERT(Expected(2,4) == testOptimize(2, 0, Options::ALGO_CRYPTONIGHT_LITE, false));
TEST_ASSERT(Expected(3,1) == testOptimize(3, 0, Options::ALGO_CRYPTONIGHT, false));
TEST_ASSERT(Expected(3,2) == testOptimize(3, 0, Options::ALGO_CRYPTONIGHT_LITE, false));
TEST_ASSERT(Expected(4,1) == testOptimize(4, 0, Options::ALGO_CRYPTONIGHT, false));
TEST_ASSERT(Expected(4,2) == testOptimize(4, 0, Options::ALGO_CRYPTONIGHT_LITE, false));
TEST_ASSERT(Expected(4,1) == testOptimize(0, 1, Options::ALGO_CRYPTONIGHT, false));
TEST_ASSERT(Expected(8,1) == testOptimize(0, 1, Options::ALGO_CRYPTONIGHT_LITE, false));
TEST_ASSERT(Expected(2,2) == testOptimize(0, 2, Options::ALGO_CRYPTONIGHT, false));
TEST_ASSERT(Expected(4,2) == testOptimize(0, 2, Options::ALGO_CRYPTONIGHT_LITE, false));
TEST_ASSERT(Expected(1,3) == testOptimize(0, 3, Options::ALGO_CRYPTONIGHT, false));
TEST_ASSERT(Expected(2,3) == testOptimize(0, 3, Options::ALGO_CRYPTONIGHT_LITE, false));
TEST_ASSERT(Expected(1,4) == testOptimize(0, 4, Options::ALGO_CRYPTONIGHT, false));
TEST_ASSERT(Expected(2,4) == testOptimize(0, 4, Options::ALGO_CRYPTONIGHT_LITE, false));
TEST_ASSERT(Expected(1,1) == testOptimize(1, 1, Options::ALGO_CRYPTONIGHT, false));
TEST_ASSERT(Expected(1,1) == testOptimize(1, 1, Options::ALGO_CRYPTONIGHT_LITE, false));
TEST_ASSERT(Expected(10,1) == testOptimize(10, 1, Options::ALGO_CRYPTONIGHT, false));
TEST_ASSERT(Expected(10,1) == testOptimize(10, 1, Options::ALGO_CRYPTONIGHT_LITE, false));
TEST_ASSERT(Expected(4,1) == testOptimize(10, 1, Options::ALGO_CRYPTONIGHT, true));
TEST_ASSERT(Expected(8,1) == testOptimize(10, 1, Options::ALGO_CRYPTONIGHT_LITE, true));
TEST_ASSERT(Expected(1,10) == testOptimize(1, 10, Options::ALGO_CRYPTONIGHT, false));
TEST_ASSERT(Expected(1,10) == testOptimize(1, 10, Options::ALGO_CRYPTONIGHT_LITE, false));
TEST_ASSERT(Expected(1,4) == testOptimize(1, 10, Options::ALGO_CRYPTONIGHT, true));
TEST_ASSERT(Expected(1,8) == testOptimize(1, 10, Options::ALGO_CRYPTONIGHT_LITE, true));
TEST_ASSERT(Expected(10,10) == testOptimize(10, 10, Options::ALGO_CRYPTONIGHT, false));
TEST_ASSERT(Expected(10,10) == testOptimize(10, 10, Options::ALGO_CRYPTONIGHT_LITE, false));
TEST_ASSERT(Expected(4,1) == testOptimize(10, 10, Options::ALGO_CRYPTONIGHT, true));
TEST_ASSERT(Expected(8,1) == testOptimize(10, 10, Options::ALGO_CRYPTONIGHT_LITE, true));
}
void test_cpu_optimizeparameters_p1_c8_v1_m25(void)
{
const size_t NUM_PROCESSORS = 1;
const size_t NUM_CORES = 8;
const size_t NUM_PUS_PER_CORE = 1;
const size_t L3_CACHE = 25 * 1024;
setMockedCpu(NUM_PROCESSORS, NUM_CORES, NUM_PUS_PER_CORE, L3_CACHE);
TEST_ASSERT_EQUAL_UINT32(Cpu::availableCache(), L3_CACHE);
TEST_ASSERT(Expected(8,1) == testOptimize(0, 0, Options::ALGO_CRYPTONIGHT, false));
TEST_ASSERT(Expected(8,3) == testOptimize(0, 0, Options::ALGO_CRYPTONIGHT_LITE, false));
TEST_ASSERT(Expected(1,12) == testOptimize(1, 0, Options::ALGO_CRYPTONIGHT, false));
TEST_ASSERT(Expected(1,25) == testOptimize(1, 0, Options::ALGO_CRYPTONIGHT_LITE, false));
TEST_ASSERT(Expected(2,6) == testOptimize(2, 0, Options::ALGO_CRYPTONIGHT, false));
TEST_ASSERT(Expected(2,12) == testOptimize(2, 0, Options::ALGO_CRYPTONIGHT_LITE, false));
TEST_ASSERT(Expected(3,4) == testOptimize(3, 0, Options::ALGO_CRYPTONIGHT, false));
TEST_ASSERT(Expected(3,8) == testOptimize(3, 0, Options::ALGO_CRYPTONIGHT_LITE, false));
TEST_ASSERT(Expected(4,3) == testOptimize(4, 0, Options::ALGO_CRYPTONIGHT, false));
TEST_ASSERT(Expected(4,6) == testOptimize(4, 0, Options::ALGO_CRYPTONIGHT_LITE, false));
TEST_ASSERT(Expected(5,2) == testOptimize(5, 0, Options::ALGO_CRYPTONIGHT, false));
TEST_ASSERT(Expected(5,5) == testOptimize(5, 0, Options::ALGO_CRYPTONIGHT_LITE, false));
TEST_ASSERT(Expected(6,2) == testOptimize(6, 0, Options::ALGO_CRYPTONIGHT, false));
TEST_ASSERT(Expected(6,4) == testOptimize(6, 0, Options::ALGO_CRYPTONIGHT_LITE, false));
TEST_ASSERT(Expected(7,1) == testOptimize(7, 0, Options::ALGO_CRYPTONIGHT, false));
TEST_ASSERT(Expected(7,3) == testOptimize(7, 0, Options::ALGO_CRYPTONIGHT_LITE, false));
TEST_ASSERT(Expected(8,1) == testOptimize(8, 0, Options::ALGO_CRYPTONIGHT, false));
TEST_ASSERT(Expected(8,3) == testOptimize(8, 0, Options::ALGO_CRYPTONIGHT_LITE, false));
TEST_ASSERT(Expected(8,1) == testOptimize(0, 1, Options::ALGO_CRYPTONIGHT, false));
TEST_ASSERT(Expected(8,1) == testOptimize(0, 1, Options::ALGO_CRYPTONIGHT_LITE, false));
TEST_ASSERT(Expected(6,2) == testOptimize(0, 2, Options::ALGO_CRYPTONIGHT, false));
TEST_ASSERT(Expected(8,2) == testOptimize(0, 2, Options::ALGO_CRYPTONIGHT_LITE, false));
TEST_ASSERT(Expected(4,3) == testOptimize(0, 3, Options::ALGO_CRYPTONIGHT, false));
TEST_ASSERT(Expected(8,3) == testOptimize(0, 3, Options::ALGO_CRYPTONIGHT_LITE, false));
TEST_ASSERT(Expected(3,4) == testOptimize(0, 4, Options::ALGO_CRYPTONIGHT, false));
TEST_ASSERT(Expected(6,4) == testOptimize(0, 4, Options::ALGO_CRYPTONIGHT_LITE, false));
TEST_ASSERT(Expected(2,5) == testOptimize(0, 5, Options::ALGO_CRYPTONIGHT, false));
TEST_ASSERT(Expected(5,5) == testOptimize(0, 5, Options::ALGO_CRYPTONIGHT_LITE, false));
TEST_ASSERT(Expected(2,6) == testOptimize(0, 6, Options::ALGO_CRYPTONIGHT, false));
TEST_ASSERT(Expected(4,6) == testOptimize(0, 6, Options::ALGO_CRYPTONIGHT_LITE, false));
TEST_ASSERT(Expected(1,7) == testOptimize(0, 7, Options::ALGO_CRYPTONIGHT, false));
TEST_ASSERT(Expected(3,7) == testOptimize(0, 7, Options::ALGO_CRYPTONIGHT_LITE, false));
TEST_ASSERT(Expected(1,8) == testOptimize(0, 8, Options::ALGO_CRYPTONIGHT, false));
TEST_ASSERT(Expected(3,8) == testOptimize(0, 8, Options::ALGO_CRYPTONIGHT_LITE, false));
TEST_ASSERT(Expected(1,1) == testOptimize(1, 1, Options::ALGO_CRYPTONIGHT, false));
TEST_ASSERT(Expected(1,1) == testOptimize(1, 1, Options::ALGO_CRYPTONIGHT_LITE, false));
TEST_ASSERT(Expected(10,1) == testOptimize(10, 1, Options::ALGO_CRYPTONIGHT, false));
TEST_ASSERT(Expected(10,1) == testOptimize(10, 1, Options::ALGO_CRYPTONIGHT_LITE, false));
TEST_ASSERT(Expected(8,1) == testOptimize(10, 1, Options::ALGO_CRYPTONIGHT, true));
TEST_ASSERT(Expected(8,1) == testOptimize(10, 1, Options::ALGO_CRYPTONIGHT_LITE, true));
TEST_ASSERT(Expected(1,10) == testOptimize(1, 10, Options::ALGO_CRYPTONIGHT, false));
TEST_ASSERT(Expected(1,10) == testOptimize(1, 10, Options::ALGO_CRYPTONIGHT_LITE, false));
TEST_ASSERT(Expected(1,10) == testOptimize(1, 10, Options::ALGO_CRYPTONIGHT, true));
TEST_ASSERT(Expected(1,10) == testOptimize(1, 10, Options::ALGO_CRYPTONIGHT_LITE, true));
TEST_ASSERT(Expected(10,10) == testOptimize(10, 10, Options::ALGO_CRYPTONIGHT, false));
TEST_ASSERT(Expected(10,10) == testOptimize(10, 10, Options::ALGO_CRYPTONIGHT_LITE, false));
TEST_ASSERT(Expected(8,1) == testOptimize(10, 10, Options::ALGO_CRYPTONIGHT, true));
TEST_ASSERT(Expected(8,3) == testOptimize(10, 10, Options::ALGO_CRYPTONIGHT_LITE, true));
}
int main(void)
{
UNITY_BEGIN();
RUN_TEST(test_cpu_optimizeparameters_p1_c1_v1_m1);
RUN_TEST(test_cpu_optimizeparameters_p1_c1_v2_m2);
RUN_TEST(test_cpu_optimizeparameters_p1_c4_v2_m8);
RUN_TEST(test_cpu_optimizeparameters_p1_c8_v1_m25);
return UNITY_END();
}