Merge branch 'dev'
This commit is contained in:
commit
cfe2a098ce
19 changed files with 203 additions and 116 deletions
|
@ -1,3 +1,10 @@
|
||||||
|
# v6.3.5
|
||||||
|
- [#1845](https://github.com/xmrig/xmrig/pull/1845) [#1861](https://github.com/xmrig/xmrig/pull/1861) Fixed ARM build and added CMake option `WITH_SSE4_1`.
|
||||||
|
- [#1846](https://github.com/xmrig/xmrig/pull/1846) KawPow: fixed OpenCL memory leak.
|
||||||
|
- [#1849](https://github.com/xmrig/xmrig/pull/1849) [#1859](https://github.com/xmrig/xmrig/pull/1859) RandomX: optimized soft AES code.
|
||||||
|
- [#1850](https://github.com/xmrig/xmrig/pull/1850) [#1852](https://github.com/xmrig/xmrig/pull/1852) General code improvements.
|
||||||
|
- [#1853](https://github.com/xmrig/xmrig/issues/1853) [#1856](https://github.com/xmrig/xmrig/pull/1856) [#1857](https://github.com/xmrig/xmrig/pull/1857) Fixed crash on old CPUs.
|
||||||
|
|
||||||
# v6.3.4
|
# v6.3.4
|
||||||
- [#1823](https://github.com/xmrig/xmrig/pull/1823) RandomX: added new option `scratchpad_prefetch_mode`.
|
- [#1823](https://github.com/xmrig/xmrig/pull/1823) RandomX: added new option `scratchpad_prefetch_mode`.
|
||||||
- [#1827](https://github.com/xmrig/xmrig/pull/1827) [#1831](https://github.com/xmrig/xmrig/pull/1831) Improved nonce iteration performance.
|
- [#1827](https://github.com/xmrig/xmrig/pull/1827) [#1831](https://github.com/xmrig/xmrig/pull/1831) Improved nonce iteration performance.
|
||||||
|
|
|
@ -24,6 +24,7 @@ option(WITH_ADL "Enable ADL (AMD Display Library) or sysfs support (
|
||||||
option(WITH_STRICT_CACHE "Enable strict checks for OpenCL cache" ON)
|
option(WITH_STRICT_CACHE "Enable strict checks for OpenCL cache" ON)
|
||||||
option(WITH_INTERLEAVE_DEBUG_LOG "Enable debug log for threads interleave" OFF)
|
option(WITH_INTERLEAVE_DEBUG_LOG "Enable debug log for threads interleave" OFF)
|
||||||
option(WITH_PROFILING "Enable profiling for developers" OFF)
|
option(WITH_PROFILING "Enable profiling for developers" OFF)
|
||||||
|
option(WITH_SSE4_1 "Enable SSE 4.1 for Blake2" ON)
|
||||||
|
|
||||||
option(BUILD_STATIC "Build static binary" OFF)
|
option(BUILD_STATIC "Build static binary" OFF)
|
||||||
option(ARM_TARGET "Force use specific ARM target 8 or 7" 0)
|
option(ARM_TARGET "Force use specific ARM target 8 or 7" 0)
|
||||||
|
|
|
@ -2,9 +2,10 @@ if (NOT CMAKE_SYSTEM_PROCESSOR)
|
||||||
message(WARNING "CMAKE_SYSTEM_PROCESSOR not defined")
|
message(WARNING "CMAKE_SYSTEM_PROCESSOR not defined")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if (CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|AMD64)$" AND CMAKE_SIZEOF_VOID_P EQUAL 8)
|
||||||
if (CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|AMD64)$")
|
|
||||||
add_definitions(/DRAPIDJSON_SSE2)
|
add_definitions(/DRAPIDJSON_SSE2)
|
||||||
|
else()
|
||||||
|
set(WITH_SSE4_1 OFF)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (NOT ARM_TARGET)
|
if (NOT ARM_TARGET)
|
||||||
|
@ -41,3 +42,7 @@ if (ARM_TARGET AND ARM_TARGET GREATER 6)
|
||||||
add_definitions(/DXMRIG_ARMv7)
|
add_definitions(/DXMRIG_ARMv7)
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if (WITH_SSE4_1)
|
||||||
|
add_definitions(/DXMRIG_FEATURE_SSE4_1)
|
||||||
|
endif()
|
||||||
|
|
|
@ -29,8 +29,8 @@ if (CMAKE_CXX_COMPILER_ID MATCHES GNU)
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpu=neon")
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpu=neon")
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon -flax-vector-conversions")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon -flax-vector-conversions")
|
||||||
else()
|
else()
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maes -msse4.1")
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maes")
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maes -msse4.1")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maes")
|
||||||
|
|
||||||
add_definitions(/DHAVE_ROTR)
|
add_definitions(/DHAVE_ROTR)
|
||||||
endif()
|
endif()
|
||||||
|
@ -87,8 +87,8 @@ elseif (CMAKE_CXX_COMPILER_ID MATCHES Clang)
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpu=neon -march=${CMAKE_SYSTEM_PROCESSOR}")
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpu=neon -march=${CMAKE_SYSTEM_PROCESSOR}")
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon -march=${CMAKE_SYSTEM_PROCESSOR}")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon -march=${CMAKE_SYSTEM_PROCESSOR}")
|
||||||
else()
|
else()
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maes -msse4.1")
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maes")
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maes -msse4.1")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maes")
|
||||||
|
|
||||||
check_symbol_exists("_rotr" "x86intrin.h" HAVE_ROTR)
|
check_symbol_exists("_rotr" "x86intrin.h" HAVE_ROTR)
|
||||||
if (HAVE_ROTR)
|
if (HAVE_ROTR)
|
||||||
|
|
|
@ -64,6 +64,14 @@ if (WITH_RANDOMX)
|
||||||
set_property(SOURCE src/crypto/randomx/jit_compiler_a64_static.S PROPERTY LANGUAGE C)
|
set_property(SOURCE src/crypto/randomx/jit_compiler_a64_static.S PROPERTY LANGUAGE C)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if (WITH_SSE4_1)
|
||||||
|
list(APPEND SOURCES_CRYPTO src/crypto/randomx/blake2/blake2b_sse41.c)
|
||||||
|
|
||||||
|
if (CMAKE_C_COMPILER_ID MATCHES GNU OR CMAKE_C_COMPILER_ID MATCHES Clang)
|
||||||
|
set_source_files_properties(src/crypto/randomx/blake2/blake2b_sse41.c PROPERTIES COMPILE_FLAGS -msse4.1)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
if (CMAKE_CXX_COMPILER_ID MATCHES Clang)
|
if (CMAKE_CXX_COMPILER_ID MATCHES Clang)
|
||||||
set_source_files_properties(src/crypto/randomx/jit_compiler_x86.cpp PROPERTIES COMPILE_FLAGS -Wno-unused-const-variable)
|
set_source_files_properties(src/crypto/randomx/jit_compiler_x86.cpp PROPERTIES COMPILE_FLAGS -Wno-unused-const-variable)
|
||||||
endif()
|
endif()
|
||||||
|
|
1
doc/build/CMAKE_OPTIONS.md
vendored
1
doc/build/CMAKE_OPTIONS.md
vendored
|
@ -22,6 +22,7 @@ This feature add external dependency to libhwloc (1.10.0+) (except MSVC builds).
|
||||||
* **`-DWITH_EMBEDDED_CONFIG=ON`** Enable [embedded](https://github.com/xmrig/xmrig/issues/957) config support.
|
* **`-DWITH_EMBEDDED_CONFIG=ON`** Enable [embedded](https://github.com/xmrig/xmrig/issues/957) config support.
|
||||||
* **`-DWITH_OPENCL=OFF`** Disable OpenCL backend.
|
* **`-DWITH_OPENCL=OFF`** Disable OpenCL backend.
|
||||||
* **`-DWITH_CUDA=OFF`** Disable CUDA backend.
|
* **`-DWITH_CUDA=OFF`** Disable CUDA backend.
|
||||||
|
* **`-DWITH_SSE4_1=OFF`** Disable SSE 4.1 for Blake2 (useful for arm builds).
|
||||||
|
|
||||||
## Debug options
|
## Debug options
|
||||||
|
|
||||||
|
|
|
@ -69,8 +69,6 @@ OclKawPowRunner::~OclKawPowRunner()
|
||||||
|
|
||||||
delete m_calculateDagKernel;
|
delete m_calculateDagKernel;
|
||||||
|
|
||||||
OclLib::release(m_searchKernel);
|
|
||||||
|
|
||||||
OclLib::release(m_controlQueue);
|
OclLib::release(m_controlQueue);
|
||||||
OclLib::release(m_stop);
|
OclLib::release(m_stop);
|
||||||
|
|
||||||
|
@ -120,8 +118,7 @@ void OclKawPowRunner::run(uint32_t nonce, uint32_t *hashOutput)
|
||||||
void OclKawPowRunner::set(const Job &job, uint8_t *blob)
|
void OclKawPowRunner::set(const Job &job, uint8_t *blob)
|
||||||
{
|
{
|
||||||
m_blockHeight = static_cast<uint32_t>(job.height());
|
m_blockHeight = static_cast<uint32_t>(job.height());
|
||||||
m_searchProgram = OclKawPow::get(*this, m_blockHeight, m_workGroupSize);
|
m_searchKernel = OclKawPow::get(*this, m_blockHeight, m_workGroupSize);
|
||||||
m_searchKernel = OclLib::createKernel(m_searchProgram, "progpow_search");
|
|
||||||
|
|
||||||
const uint32_t epoch = m_blockHeight / KPHash::EPOCH_LENGTH;
|
const uint32_t epoch = m_blockHeight / KPHash::EPOCH_LENGTH;
|
||||||
|
|
||||||
|
|
|
@ -69,7 +69,6 @@ private:
|
||||||
|
|
||||||
KawPow_CalculateDAGKernel* m_calculateDagKernel = nullptr;
|
KawPow_CalculateDAGKernel* m_calculateDagKernel = nullptr;
|
||||||
|
|
||||||
cl_program m_searchProgram = nullptr;
|
|
||||||
cl_kernel m_searchKernel = nullptr;
|
cl_kernel m_searchKernel = nullptr;
|
||||||
|
|
||||||
size_t m_workGroupSize = 256;
|
size_t m_workGroupSize = 256;
|
||||||
|
|
|
@ -54,8 +54,9 @@ namespace xmrig {
|
||||||
class KawPowCacheEntry
|
class KawPowCacheEntry
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
inline KawPowCacheEntry(const Algorithm &algo, uint64_t period, uint32_t worksize, uint32_t index, cl_program program) :
|
inline KawPowCacheEntry(const Algorithm &algo, uint64_t period, uint32_t worksize, uint32_t index, cl_program program, cl_kernel kernel) :
|
||||||
program(program),
|
program(program),
|
||||||
|
kernel(kernel),
|
||||||
m_algo(algo),
|
m_algo(algo),
|
||||||
m_index(index),
|
m_index(index),
|
||||||
m_period(period),
|
m_period(period),
|
||||||
|
@ -65,9 +66,10 @@ public:
|
||||||
inline bool isExpired(uint64_t period) const { return m_period + 1 < period; }
|
inline bool isExpired(uint64_t period) const { return m_period + 1 < period; }
|
||||||
inline bool match(const Algorithm &algo, uint64_t period, uint32_t worksize, uint32_t index) const { return m_algo == algo && m_period == period && m_worksize == worksize && m_index == index; }
|
inline bool match(const Algorithm &algo, uint64_t period, uint32_t worksize, uint32_t index) const { return m_algo == algo && m_period == period && m_worksize == worksize && m_index == index; }
|
||||||
inline bool match(const IOclRunner &runner, uint64_t period, uint32_t worksize) const { return match(runner.algorithm(), period, worksize, runner.deviceIndex()); }
|
inline bool match(const IOclRunner &runner, uint64_t period, uint32_t worksize) const { return match(runner.algorithm(), period, worksize, runner.deviceIndex()); }
|
||||||
inline void release() { OclLib::release(program); }
|
inline void release() { OclLib::release(kernel); OclLib::release(program); }
|
||||||
|
|
||||||
cl_program program;
|
cl_program program;
|
||||||
|
cl_kernel kernel;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
Algorithm m_algo;
|
Algorithm m_algo;
|
||||||
|
@ -82,16 +84,16 @@ class KawPowCache
|
||||||
public:
|
public:
|
||||||
KawPowCache() = default;
|
KawPowCache() = default;
|
||||||
|
|
||||||
inline cl_program search(const IOclRunner &runner, uint64_t period, uint32_t worksize) { return search(runner.algorithm(), period, worksize, runner.deviceIndex()); }
|
inline cl_kernel search(const IOclRunner &runner, uint64_t period, uint32_t worksize) { return search(runner.algorithm(), period, worksize, runner.deviceIndex()); }
|
||||||
|
|
||||||
|
|
||||||
inline cl_program search(const Algorithm &algo, uint64_t period, uint32_t worksize, uint32_t index)
|
inline cl_kernel search(const Algorithm &algo, uint64_t period, uint32_t worksize, uint32_t index)
|
||||||
{
|
{
|
||||||
std::lock_guard<std::mutex> lock(m_mutex);
|
std::lock_guard<std::mutex> lock(m_mutex);
|
||||||
|
|
||||||
for (const auto &entry : m_data) {
|
for (const auto &entry : m_data) {
|
||||||
if (entry.match(algo, period, worksize, index)) {
|
if (entry.match(algo, period, worksize, index)) {
|
||||||
return entry.program;
|
return entry.kernel;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -99,9 +101,10 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void add(const Algorithm &algo, uint64_t period, uint32_t worksize, uint32_t index, cl_program program)
|
void add(const Algorithm &algo, uint64_t period, uint32_t worksize, uint32_t index, cl_program program, cl_kernel kernel)
|
||||||
{
|
{
|
||||||
if (search(algo, period, worksize, index)) {
|
if (search(algo, period, worksize, index)) {
|
||||||
|
OclLib::release(kernel);
|
||||||
OclLib::release(program);
|
OclLib::release(program);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -109,7 +112,7 @@ public:
|
||||||
std::lock_guard<std::mutex> lock(m_mutex);
|
std::lock_guard<std::mutex> lock(m_mutex);
|
||||||
|
|
||||||
gc(period);
|
gc(period);
|
||||||
m_data.emplace_back(algo, period, worksize, index, program);
|
m_data.emplace_back(algo, period, worksize, index, program, kernel);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -159,15 +162,15 @@ static KawPowCache cache;
|
||||||
class KawPowBuilder
|
class KawPowBuilder
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
cl_program build(const IOclRunner &runner, uint64_t period, uint32_t worksize)
|
cl_kernel build(const IOclRunner &runner, uint64_t period, uint32_t worksize)
|
||||||
{
|
{
|
||||||
std::lock_guard<std::mutex> lock(m_mutex);
|
std::lock_guard<std::mutex> lock(m_mutex);
|
||||||
|
|
||||||
const uint64_t ts = Chrono::steadyMSecs();
|
const uint64_t ts = Chrono::steadyMSecs();
|
||||||
|
|
||||||
cl_program program = cache.search(runner, period, worksize);
|
cl_kernel kernel = cache.search(runner, period, worksize);
|
||||||
if (program) {
|
if (kernel) {
|
||||||
return program;
|
return kernel;
|
||||||
}
|
}
|
||||||
|
|
||||||
cl_int ret;
|
cl_int ret;
|
||||||
|
@ -175,7 +178,7 @@ public:
|
||||||
cl_device_id device = runner.data().device.id();
|
cl_device_id device = runner.data().device.id();
|
||||||
const char *s = source.c_str();
|
const char *s = source.c_str();
|
||||||
|
|
||||||
program = OclLib::createProgramWithSource(runner.ctx(), 1, &s, nullptr, &ret);
|
cl_program program = OclLib::createProgramWithSource(runner.ctx(), 1, &s, nullptr, &ret);
|
||||||
if (ret != CL_SUCCESS) {
|
if (ret != CL_SUCCESS) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
@ -199,11 +202,17 @@ public:
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
kernel = OclLib::createKernel(program, "progpow_search", &ret);
|
||||||
|
if (ret != CL_SUCCESS) {
|
||||||
|
OclLib::release(program);
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
LOG_INFO("%s " YELLOW("KawPow") " program for period " WHITE_BOLD("%" PRIu64) " compiled " BLACK_BOLD("(%" PRIu64 "ms)"), Tags::opencl(), period, Chrono::steadyMSecs() - ts);
|
LOG_INFO("%s " YELLOW("KawPow") " program for period " WHITE_BOLD("%" PRIu64) " compiled " BLACK_BOLD("(%" PRIu64 "ms)"), Tags::opencl(), period, Chrono::steadyMSecs() - ts);
|
||||||
|
|
||||||
cache.add(runner.algorithm(), period, worksize, runner.deviceIndex(), program);
|
cache.add(runner.algorithm(), period, worksize, runner.deviceIndex(), program, kernel);
|
||||||
|
|
||||||
return program;
|
return kernel;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -382,7 +391,7 @@ public:
|
||||||
static KawPowBuilder builder;
|
static KawPowBuilder builder;
|
||||||
|
|
||||||
|
|
||||||
cl_program OclKawPow::get(const IOclRunner &runner, uint64_t height, uint32_t worksize)
|
cl_kernel OclKawPow::get(const IOclRunner &runner, uint64_t height, uint32_t worksize)
|
||||||
{
|
{
|
||||||
const uint64_t period = height / KPHash::PERIOD_LENGTH;
|
const uint64_t period = height / KPHash::PERIOD_LENGTH;
|
||||||
|
|
||||||
|
@ -396,9 +405,9 @@ cl_program OclKawPow::get(const IOclRunner &runner, uint64_t height, uint32_t wo
|
||||||
[](uv_work_t *req, int) { delete static_cast<KawPowBaton*>(req->data); }
|
[](uv_work_t *req, int) { delete static_cast<KawPowBaton*>(req->data); }
|
||||||
);
|
);
|
||||||
|
|
||||||
cl_program program = cache.search(runner, period, worksize);
|
cl_kernel kernel = cache.search(runner, period, worksize);
|
||||||
if (program) {
|
if (kernel) {
|
||||||
return program;
|
return kernel;
|
||||||
}
|
}
|
||||||
|
|
||||||
return builder.build(runner, period, worksize);
|
return builder.build(runner, period, worksize);
|
||||||
|
|
|
@ -30,7 +30,7 @@
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
|
|
||||||
|
|
||||||
using cl_program = struct _cl_program *;
|
using cl_kernel = struct _cl_kernel *;
|
||||||
|
|
||||||
|
|
||||||
namespace xmrig {
|
namespace xmrig {
|
||||||
|
@ -42,7 +42,7 @@ class IOclRunner;
|
||||||
class OclKawPow
|
class OclKawPow
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
static cl_program get(const IOclRunner &runner, uint64_t height, uint32_t worksize);
|
static cl_kernel get(const IOclRunner &runner, uint64_t height, uint32_t worksize);
|
||||||
static void clear();
|
static void clear();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -96,7 +96,7 @@ public:
|
||||||
|
|
||||||
inline bool isCN() const { auto f = family(); return f == CN || f == CN_LITE || f == CN_HEAVY || f == CN_PICO; }
|
inline bool isCN() const { auto f = family(); return f == CN || f == CN_LITE || f == CN_HEAVY || f == CN_PICO; }
|
||||||
inline bool isEqual(const Algorithm &other) const { return m_id == other.m_id; }
|
inline bool isEqual(const Algorithm &other) const { return m_id == other.m_id; }
|
||||||
inline bool isValid() const { return m_id != INVALID; }
|
inline bool isValid() const { return m_id != INVALID && family() != UNKNOWN; }
|
||||||
inline const char *name() const { return name(false); }
|
inline const char *name() const { return name(false); }
|
||||||
inline const char *shortName() const { return name(true); }
|
inline const char *shortName() const { return name(true); }
|
||||||
inline Family family() const { return family(m_id); }
|
inline Family family() const { return family(m_id); }
|
||||||
|
|
|
@ -33,7 +33,7 @@
|
||||||
xmrig::String::String(const char *str) :
|
xmrig::String::String(const char *str) :
|
||||||
m_size(str == nullptr ? 0 : strlen(str))
|
m_size(str == nullptr ? 0 : strlen(str))
|
||||||
{
|
{
|
||||||
if (m_size == 0) {
|
if (str == nullptr) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -121,7 +121,7 @@ public:
|
||||||
for (int i = 0; i < Algorithm::MAX; ++i) {
|
for (int i = 0; i < Algorithm::MAX; ++i) {
|
||||||
const Algorithm algo(static_cast<Algorithm::Id>(i));
|
const Algorithm algo(static_cast<Algorithm::Id>(i));
|
||||||
|
|
||||||
if (isEnabled(algo)) {
|
if (algo.isValid() && isEnabled(algo)) {
|
||||||
algorithms.push_back(algo);
|
algorithms.push_back(algo);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -260,6 +260,8 @@ void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, voi
|
||||||
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + k * 4 + 2, fill_state2); \
|
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + k * 4 + 2, fill_state2); \
|
||||||
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + k * 4 + 3, fill_state3);
|
rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + k * 4 + 3, fill_state3);
|
||||||
|
|
||||||
|
switch(softAes) {
|
||||||
|
case 0:
|
||||||
HASH_STATE(0);
|
HASH_STATE(0);
|
||||||
HASH_STATE(1);
|
HASH_STATE(1);
|
||||||
|
|
||||||
|
@ -271,6 +273,19 @@ void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, voi
|
||||||
|
|
||||||
scratchpadPtr += 128;
|
scratchpadPtr += 128;
|
||||||
prefetchPtr += 128;
|
prefetchPtr += 128;
|
||||||
|
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
HASH_STATE(0);
|
||||||
|
FILL_STATE(0);
|
||||||
|
rx_prefetch_t0(prefetchPtr);
|
||||||
|
|
||||||
|
scratchpadPtr += 64;
|
||||||
|
prefetchPtr += 64;
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
prefetchPtr = (const char*) scratchpad;
|
prefetchPtr = (const char*) scratchpad;
|
||||||
scratchpadEnd += PREFETCH_DISTANCE;
|
scratchpadEnd += PREFETCH_DISTANCE;
|
||||||
|
|
|
@ -39,40 +39,12 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#include "crypto/randomx/blake2/blake2.h"
|
#include "crypto/randomx/blake2/blake2.h"
|
||||||
#include "crypto/randomx/blake2/blake2-impl.h"
|
#include "crypto/randomx/blake2/blake2-impl.h"
|
||||||
|
|
||||||
#if defined(_M_X64) || defined(__x86_64__)
|
const uint64_t blake2b_IV[8] = {
|
||||||
|
|
||||||
#ifdef _MSC_VER
|
|
||||||
#include <intrin.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include <smmintrin.h>
|
|
||||||
#include "blake2b-round.h"
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static const uint64_t blake2b_IV[8] = {
|
|
||||||
UINT64_C(0x6a09e667f3bcc908), UINT64_C(0xbb67ae8584caa73b),
|
UINT64_C(0x6a09e667f3bcc908), UINT64_C(0xbb67ae8584caa73b),
|
||||||
UINT64_C(0x3c6ef372fe94f82b), UINT64_C(0xa54ff53a5f1d36f1),
|
UINT64_C(0x3c6ef372fe94f82b), UINT64_C(0xa54ff53a5f1d36f1),
|
||||||
UINT64_C(0x510e527fade682d1), UINT64_C(0x9b05688c2b3e6c1f),
|
UINT64_C(0x510e527fade682d1), UINT64_C(0x9b05688c2b3e6c1f),
|
||||||
UINT64_C(0x1f83d9abfb41bd6b), UINT64_C(0x5be0cd19137e2179) };
|
UINT64_C(0x1f83d9abfb41bd6b), UINT64_C(0x5be0cd19137e2179) };
|
||||||
|
|
||||||
#if defined(_M_X64) || defined(__x86_64__)
|
|
||||||
static const uint8_t blake2b_sigma_sse41[12][16] = {
|
|
||||||
{0, 2, 4, 6, 1, 3, 5, 7, 8, 10, 12, 14, 9, 11, 13, 15},
|
|
||||||
{14, 4, 9, 13, 10, 8, 15, 6, 1, 0, 11, 5, 12, 2, 7, 3},
|
|
||||||
{11, 12, 5, 15, 8, 0, 2, 13, 10, 3, 7, 9, 14, 6, 1, 4},
|
|
||||||
{7, 3, 13, 11, 9, 1, 12, 14, 2, 5, 4, 15, 6, 10, 0, 8},
|
|
||||||
{9, 5, 2, 10, 0, 7, 4, 15, 14, 11, 6, 3, 1, 12, 8, 13},
|
|
||||||
{2, 6, 0, 8, 12, 10, 11, 3, 4, 7, 15, 1, 13, 5, 14, 9},
|
|
||||||
{12, 1, 14, 4, 5, 15, 13, 10, 0, 6, 9, 8, 7, 3, 2, 11},
|
|
||||||
{13, 7, 12, 3, 11, 14, 1, 9, 5, 15, 8, 2, 0, 4, 6, 10},
|
|
||||||
{6, 14, 11, 0, 15, 9, 3, 8, 12, 13, 1, 10, 2, 7, 4, 5},
|
|
||||||
{10, 8, 7, 1, 2, 4, 6, 5, 15, 9, 3, 13, 11, 14, 12, 0},
|
|
||||||
{0, 2, 4, 6, 1, 3, 5, 7, 8, 10, 12, 14, 9, 11, 13, 15},
|
|
||||||
{14, 4, 9, 13, 10, 8, 15, 6, 1, 0, 11, 5, 12, 2, 7, 3},
|
|
||||||
};
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static const uint8_t blake2b_sigma[12][16] = {
|
static const uint8_t blake2b_sigma[12][16] = {
|
||||||
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
|
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
|
||||||
{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
|
{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
|
||||||
|
@ -207,46 +179,6 @@ int rx_blake2b_init_key(blake2b_state *S, size_t outlen, const void *key, size_t
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(_M_X64) || defined(__x86_64__)
|
|
||||||
static void rx_blake2b_compress_sse41(blake2b_state* S, const uint8_t *block)
|
|
||||||
{
|
|
||||||
__m128i row1l, row1h;
|
|
||||||
__m128i row2l, row2h;
|
|
||||||
__m128i row3l, row3h;
|
|
||||||
__m128i row4l, row4h;
|
|
||||||
__m128i b0, b1;
|
|
||||||
__m128i t0, t1;
|
|
||||||
|
|
||||||
const __m128i r16 = _mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9);
|
|
||||||
const __m128i r24 = _mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10);
|
|
||||||
|
|
||||||
row1l = LOADU(&S->h[0]);
|
|
||||||
row1h = LOADU(&S->h[2]);
|
|
||||||
row2l = LOADU(&S->h[4]);
|
|
||||||
row2h = LOADU(&S->h[6]);
|
|
||||||
row3l = LOADU(&blake2b_IV[0]);
|
|
||||||
row3h = LOADU(&blake2b_IV[2]);
|
|
||||||
row4l = _mm_xor_si128(LOADU(&blake2b_IV[4]), LOADU(&S->t[0]));
|
|
||||||
row4h = _mm_xor_si128(LOADU(&blake2b_IV[6]), LOADU(&S->f[0]));
|
|
||||||
|
|
||||||
const uint64_t* m = (const uint64_t*)(block);
|
|
||||||
|
|
||||||
for (uint32_t r = 0; r < 12; ++r) {
|
|
||||||
ROUND(r);
|
|
||||||
}
|
|
||||||
|
|
||||||
row1l = _mm_xor_si128(row3l, row1l);
|
|
||||||
row1h = _mm_xor_si128(row3h, row1h);
|
|
||||||
STOREU(&S->h[0], _mm_xor_si128(LOADU(&S->h[0]), row1l));
|
|
||||||
STOREU(&S->h[2], _mm_xor_si128(LOADU(&S->h[2]), row1h));
|
|
||||||
row2l = _mm_xor_si128(row4l, row2l);
|
|
||||||
row2h = _mm_xor_si128(row4h, row2h);
|
|
||||||
STOREU(&S->h[4], _mm_xor_si128(LOADU(&S->h[4]), row2l));
|
|
||||||
STOREU(&S->h[6], _mm_xor_si128(LOADU(&S->h[6]), row2h));
|
|
||||||
}
|
|
||||||
#undef ROUND
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static void rx_blake2b_compress_integer(blake2b_state *S, const uint8_t *block) {
|
static void rx_blake2b_compress_integer(blake2b_state *S, const uint8_t *block) {
|
||||||
uint64_t m[16];
|
uint64_t m[16];
|
||||||
uint64_t v[16];
|
uint64_t v[16];
|
||||||
|
@ -305,9 +237,10 @@ static void rx_blake2b_compress_integer(blake2b_state *S, const uint8_t *block)
|
||||||
#undef ROUND
|
#undef ROUND
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(_M_X64) || defined(__x86_64__)
|
#if defined(XMRIG_FEATURE_SSE4_1)
|
||||||
|
|
||||||
uint32_t rx_blake2b_use_sse41 = 0;
|
uint32_t rx_blake2b_use_sse41 = 0;
|
||||||
|
void rx_blake2b_compress_sse41(blake2b_state* S, const uint8_t* block);
|
||||||
|
|
||||||
#define rx_blake2b_compress(S, block) \
|
#define rx_blake2b_compress(S, block) \
|
||||||
if (rx_blake2b_use_sse41) \
|
if (rx_blake2b_use_sse41) \
|
||||||
|
|
108
src/crypto/randomx/blake2/blake2b_sse41.c
Normal file
108
src/crypto/randomx/blake2/blake2b_sse41.c
Normal file
|
@ -0,0 +1,108 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||||
|
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
|
||||||
|
* Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||||
|
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of the copyright holder nor the
|
||||||
|
names of its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||||
|
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* Original code from Argon2 reference source code package used under CC0 Licence
|
||||||
|
* https://github.com/P-H-C/phc-winner-argon2
|
||||||
|
* Copyright 2015
|
||||||
|
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||||
|
*/
|
||||||
|
|
||||||
|
#if defined(_M_X64) || defined(__x86_64__)
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#include "crypto/randomx/blake2/blake2.h"
|
||||||
|
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
#include <intrin.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <smmintrin.h>
|
||||||
|
#include "blake2b-round.h"
|
||||||
|
|
||||||
|
|
||||||
|
extern const uint64_t blake2b_IV[8];
|
||||||
|
|
||||||
|
|
||||||
|
static const uint8_t blake2b_sigma_sse41[12][16] = {
|
||||||
|
{0, 2, 4, 6, 1, 3, 5, 7, 8, 10, 12, 14, 9, 11, 13, 15},
|
||||||
|
{14, 4, 9, 13, 10, 8, 15, 6, 1, 0, 11, 5, 12, 2, 7, 3},
|
||||||
|
{11, 12, 5, 15, 8, 0, 2, 13, 10, 3, 7, 9, 14, 6, 1, 4},
|
||||||
|
{7, 3, 13, 11, 9, 1, 12, 14, 2, 5, 4, 15, 6, 10, 0, 8},
|
||||||
|
{9, 5, 2, 10, 0, 7, 4, 15, 14, 11, 6, 3, 1, 12, 8, 13},
|
||||||
|
{2, 6, 0, 8, 12, 10, 11, 3, 4, 7, 15, 1, 13, 5, 14, 9},
|
||||||
|
{12, 1, 14, 4, 5, 15, 13, 10, 0, 6, 9, 8, 7, 3, 2, 11},
|
||||||
|
{13, 7, 12, 3, 11, 14, 1, 9, 5, 15, 8, 2, 0, 4, 6, 10},
|
||||||
|
{6, 14, 11, 0, 15, 9, 3, 8, 12, 13, 1, 10, 2, 7, 4, 5},
|
||||||
|
{10, 8, 7, 1, 2, 4, 6, 5, 15, 9, 3, 13, 11, 14, 12, 0},
|
||||||
|
{0, 2, 4, 6, 1, 3, 5, 7, 8, 10, 12, 14, 9, 11, 13, 15},
|
||||||
|
{14, 4, 9, 13, 10, 8, 15, 6, 1, 0, 11, 5, 12, 2, 7, 3},
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
void rx_blake2b_compress_sse41(blake2b_state* S, const uint8_t *block)
|
||||||
|
{
|
||||||
|
__m128i row1l, row1h;
|
||||||
|
__m128i row2l, row2h;
|
||||||
|
__m128i row3l, row3h;
|
||||||
|
__m128i row4l, row4h;
|
||||||
|
__m128i b0, b1;
|
||||||
|
__m128i t0, t1;
|
||||||
|
|
||||||
|
const __m128i r16 = _mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9);
|
||||||
|
const __m128i r24 = _mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10);
|
||||||
|
|
||||||
|
row1l = LOADU(&S->h[0]);
|
||||||
|
row1h = LOADU(&S->h[2]);
|
||||||
|
row2l = LOADU(&S->h[4]);
|
||||||
|
row2h = LOADU(&S->h[6]);
|
||||||
|
row3l = LOADU(&blake2b_IV[0]);
|
||||||
|
row3h = LOADU(&blake2b_IV[2]);
|
||||||
|
row4l = _mm_xor_si128(LOADU(&blake2b_IV[4]), LOADU(&S->t[0]));
|
||||||
|
row4h = _mm_xor_si128(LOADU(&blake2b_IV[6]), LOADU(&S->f[0]));
|
||||||
|
|
||||||
|
const uint64_t* m = (const uint64_t*)(block);
|
||||||
|
|
||||||
|
for (uint32_t r = 0; r < 12; ++r) {
|
||||||
|
ROUND(r);
|
||||||
|
}
|
||||||
|
|
||||||
|
row1l = _mm_xor_si128(row3l, row1l);
|
||||||
|
row1h = _mm_xor_si128(row3h, row1h);
|
||||||
|
STOREU(&S->h[0], _mm_xor_si128(LOADU(&S->h[0]), row1l));
|
||||||
|
STOREU(&S->h[2], _mm_xor_si128(LOADU(&S->h[2]), row1h));
|
||||||
|
row2l = _mm_xor_si128(row4l, row2l);
|
||||||
|
row2h = _mm_xor_si128(row4h, row2h);
|
||||||
|
STOREU(&S->h[4], _mm_xor_si128(LOADU(&S->h[4]), row2l));
|
||||||
|
STOREU(&S->h[6], _mm_xor_si128(LOADU(&S->h[6]), row2h));
|
||||||
|
}
|
||||||
|
#endif
|
|
@ -337,12 +337,16 @@ typedef void(randomx::JitCompilerX86::* InstructionGeneratorX86_2)(const randomx
|
||||||
INST_HANDLE(FDIV_M, FMUL_R);
|
INST_HANDLE(FDIV_M, FMUL_R);
|
||||||
INST_HANDLE(FSQRT_R, FDIV_M);
|
INST_HANDLE(FSQRT_R, FDIV_M);
|
||||||
|
|
||||||
|
#if defined(_M_X64) || defined(__x86_64__)
|
||||||
if (xmrig::Cpu::info()->jccErratum()) {
|
if (xmrig::Cpu::info()->jccErratum()) {
|
||||||
INST_HANDLE2(CBRANCH, CBRANCH<true>, FSQRT_R);
|
INST_HANDLE2(CBRANCH, CBRANCH<true>, FSQRT_R);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
INST_HANDLE2(CBRANCH, CBRANCH<false>, FSQRT_R);
|
INST_HANDLE2(CBRANCH, CBRANCH<false>, FSQRT_R);
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
INST_HANDLE(CBRANCH, FSQRT_R);
|
||||||
|
#endif
|
||||||
|
|
||||||
#if defined(_M_X64) || defined(__x86_64__)
|
#if defined(_M_X64) || defined(__x86_64__)
|
||||||
if (xmrig::Cpu::info()->hasBMI2()) {
|
if (xmrig::Cpu::info()->hasBMI2()) {
|
||||||
|
|
|
@ -31,7 +31,7 @@
|
||||||
#include "crypto/rx/RxVm.h"
|
#include "crypto/rx/RxVm.h"
|
||||||
|
|
||||||
|
|
||||||
#if defined(_M_X64) || defined(__x86_64__)
|
#if defined(XMRIG_FEATURE_SSE4_1)
|
||||||
extern "C" uint32_t rx_blake2b_use_sse41;
|
extern "C" uint32_t rx_blake2b_use_sse41;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -60,7 +60,7 @@ randomx_vm* xmrig::RxVm::create(RxDataset *dataset, uint8_t *scratchpad, bool so
|
||||||
flags |= RANDOMX_FLAG_AMD;
|
flags |= RANDOMX_FLAG_AMD;
|
||||||
}
|
}
|
||||||
|
|
||||||
# if defined(_M_X64) || defined(__x86_64__)
|
# if defined(XMRIG_FEATURE_SSE4_1)
|
||||||
rx_blake2b_use_sse41 = Cpu::info()->has(ICpuInfo::FLAG_SSE41) ? 1 : 0;
|
rx_blake2b_use_sse41 = Cpu::info()->has(ICpuInfo::FLAG_SSE41) ? 1 : 0;
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
|
|
|
@ -28,7 +28,7 @@
|
||||||
#define APP_ID "xmrig"
|
#define APP_ID "xmrig"
|
||||||
#define APP_NAME "XMRig"
|
#define APP_NAME "XMRig"
|
||||||
#define APP_DESC "XMRig miner"
|
#define APP_DESC "XMRig miner"
|
||||||
#define APP_VERSION "6.3.4"
|
#define APP_VERSION "6.3.5-dev"
|
||||||
#define APP_DOMAIN "xmrig.com"
|
#define APP_DOMAIN "xmrig.com"
|
||||||
#define APP_SITE "www.xmrig.com"
|
#define APP_SITE "www.xmrig.com"
|
||||||
#define APP_COPYRIGHT "Copyright (C) 2016-2020 xmrig.com"
|
#define APP_COPYRIGHT "Copyright (C) 2016-2020 xmrig.com"
|
||||||
|
@ -36,7 +36,7 @@
|
||||||
|
|
||||||
#define APP_VER_MAJOR 6
|
#define APP_VER_MAJOR 6
|
||||||
#define APP_VER_MINOR 3
|
#define APP_VER_MINOR 3
|
||||||
#define APP_VER_PATCH 4
|
#define APP_VER_PATCH 5
|
||||||
|
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
# if (_MSC_VER >= 1920)
|
# if (_MSC_VER >= 1920)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue