Merge xmrig v6.16.5-dev into master
This commit is contained in:
commit
e4eb9ea581
86 changed files with 3070 additions and 359 deletions
|
@ -25,6 +25,7 @@
|
|||
#include "base/crypto/sha3.h"
|
||||
#include "base/tools/bswap_64.h"
|
||||
#include "crypto/cn/CryptoNight.h"
|
||||
#include "crypto/astrobwt/sort_indices2.h"
|
||||
|
||||
|
||||
#include <limits>
|
||||
|
@ -433,6 +434,45 @@ bool xmrig::astrobwt::astrobwt_dero(const void* input_data, uint32_t input_size,
|
|||
}
|
||||
|
||||
|
||||
bool xmrig::astrobwt::astrobwt_dero_v2(const void* input_data, uint32_t input_size, void* scratchpad, uint8_t* output_hash)
|
||||
{
|
||||
constexpr size_t N = 9973;
|
||||
constexpr size_t STRIDE = 10240;
|
||||
|
||||
alignas(8) uint8_t key[32];
|
||||
uint8_t* scratchpad_ptr = (uint8_t*)(scratchpad) + 64;
|
||||
uint8_t* v = scratchpad_ptr;
|
||||
uint32_t* indices = (uint32_t*)(scratchpad_ptr + STRIDE);
|
||||
uint32_t* tmp_indices = (uint32_t*)(scratchpad_ptr + STRIDE * 5);
|
||||
|
||||
#ifdef ASTROBWT_AVX2
|
||||
if (hasAVX2) {
|
||||
SHA3_256_AVX2_ASM(input_data, input_size, key);
|
||||
Salsa20_XORKeyStream_AVX256(key, v, N);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
sha3_HashBuffer(256, SHA3_FLAGS_NONE, input_data, input_size, key, sizeof(key));
|
||||
Salsa20_XORKeyStream(key, v, N);
|
||||
}
|
||||
|
||||
sort_indices_astrobwt_v2(N, v, indices, tmp_indices);
|
||||
|
||||
#ifdef ASTROBWT_AVX2
|
||||
if (hasAVX2) {
|
||||
SHA3_256_AVX2_ASM(indices, N * 2, output_hash);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
sha3_HashBuffer(256, SHA3_FLAGS_NONE, indices, N * 2, output_hash, 32);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
void xmrig::astrobwt::init()
|
||||
{
|
||||
if (!astrobwtInitialized) {
|
||||
|
@ -450,3 +490,10 @@ void xmrig::astrobwt::single_hash<xmrig::Algorithm::ASTROBWT_DERO>(const uint8_t
|
|||
{
|
||||
astrobwt_dero(input, static_cast<uint32_t>(size), ctx[0]->memory, output, std::numeric_limits<int>::max(), true);
|
||||
}
|
||||
|
||||
|
||||
template<>
|
||||
void xmrig::astrobwt::single_hash<xmrig::Algorithm::ASTROBWT_DERO_2>(const uint8_t* input, size_t size, uint8_t* output, cryptonight_ctx** ctx, uint64_t)
|
||||
{
|
||||
astrobwt_dero_v2(input, static_cast<uint32_t>(size), ctx[0]->memory, output);
|
||||
}
|
||||
|
|
|
@ -32,6 +32,7 @@ namespace xmrig {
|
|||
namespace astrobwt {
|
||||
|
||||
bool astrobwt_dero(const void* input_data, uint32_t input_size, void* scratchpad, uint8_t* output_hash, int stage2_max_size, bool avx2);
|
||||
bool astrobwt_dero_v2(const void* input_data, uint32_t input_size, void* scratchpad, uint8_t* output_hash);
|
||||
void init();
|
||||
|
||||
template<Algorithm::Id ALGO>
|
||||
|
@ -40,5 +41,7 @@ void single_hash(const uint8_t* input, size_t size, uint8_t* output, cryptonight
|
|||
template<>
|
||||
void single_hash<Algorithm::ASTROBWT_DERO>(const uint8_t* input, size_t size, uint8_t* output, cryptonight_ctx** ctx, uint64_t);
|
||||
|
||||
template<>
|
||||
void single_hash<Algorithm::ASTROBWT_DERO_2>(const uint8_t* input, size_t size, uint8_t* output, cryptonight_ctx** ctx, uint64_t);
|
||||
|
||||
}} // namespace xmrig::astrobwt
|
||||
|
|
208
src/crypto/astrobwt/sort_indices2.cpp
Normal file
208
src/crypto/astrobwt/sort_indices2.cpp
Normal file
|
@ -0,0 +1,208 @@
|
|||
/* XMRig
|
||||
* Copyright (c) 2018 Lee Clagett <https://github.com/vtnerd>
|
||||
* Copyright (c) 2018-2019 tevador <tevador@gmail.com>
|
||||
* Copyright (c) 2000 Transmeta Corporation <https://github.com/intel/msr-tools>
|
||||
* Copyright (c) 2004-2008 H. Peter Anvin <https://github.com/intel/msr-tools>
|
||||
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "crypto/astrobwt/sort_indices2.h"
|
||||
#include "base/tools/bswap_64.h"
|
||||
#include <cstring>
|
||||
|
||||
|
||||
#ifdef __GNUC__
|
||||
#define NOINLINE __attribute__((noinline))
|
||||
#define RESTRICT __restrict__
|
||||
#elif _MSC_VER
|
||||
#define NOINLINE __declspec(noinline)
|
||||
#define RESTRICT __restrict
|
||||
#else
|
||||
#define NOINLINE
|
||||
#define RESTRICT
|
||||
#endif
|
||||
|
||||
|
||||
#if __has_cpp_attribute(unlikely)
|
||||
#define UNLIKELY(X) (X) [[unlikely]]
|
||||
#elif defined __GNUC__
|
||||
#define UNLIKELY(X) (__builtin_expect((X), 0))
|
||||
#else
|
||||
#define UNLIKELY(X) (X)
|
||||
#endif
|
||||
|
||||
|
||||
static NOINLINE void fix(const uint8_t* RESTRICT v, uint32_t* RESTRICT indices, int32_t i)
|
||||
{
|
||||
uint32_t prev_t = indices[i - 1];
|
||||
uint32_t t = indices[i];
|
||||
|
||||
const uint32_t data_a = bswap_32(*(const uint32_t*)(v + (t & 0xFFFF) + 2));
|
||||
if (data_a < bswap_32(*(const uint32_t*)(v + (prev_t & 0xFFFF) + 2)))
|
||||
{
|
||||
const uint32_t t2 = prev_t;
|
||||
int32_t j = i - 1;
|
||||
do
|
||||
{
|
||||
indices[j + 1] = prev_t;
|
||||
--j;
|
||||
|
||||
if (j < 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
prev_t = indices[j];
|
||||
} while (((t ^ prev_t) <= 0xFFFF) && (data_a < bswap_32(*(const uint32_t*)(v + (prev_t & 0xFFFF) + 2))));
|
||||
indices[j + 1] = t;
|
||||
t = t2;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static NOINLINE void sort_indices(uint32_t N, const uint8_t* RESTRICT v, uint32_t* RESTRICT indices, uint32_t* RESTRICT tmp_indices)
|
||||
{
|
||||
uint8_t byte_counters[2][256] = {};
|
||||
uint32_t counters[2][256];
|
||||
|
||||
{
|
||||
#define ITER(X) ++byte_counters[1][v[i + X]];
|
||||
|
||||
enum { unroll = 12 };
|
||||
|
||||
uint32_t i = 0;
|
||||
const uint32_t n = N - (unroll - 1);
|
||||
for (; i < n; i += unroll) {
|
||||
ITER(0); ITER(1); ITER(2); ITER(3); ITER(4); ITER(5); ITER(6); ITER(7); ITER(8); ITER(9); ITER(10); ITER(11);
|
||||
}
|
||||
for (; i < N; ++i) {
|
||||
ITER(0);
|
||||
}
|
||||
memcpy(&byte_counters[0], &byte_counters[1], 256);
|
||||
--byte_counters[0][v[0]];
|
||||
|
||||
#undef ITER
|
||||
}
|
||||
|
||||
{
|
||||
uint32_t c0 = byte_counters[0][0];
|
||||
uint32_t c1 = byte_counters[1][0] - 1;
|
||||
counters[0][0] = c0;
|
||||
counters[1][0] = c1;
|
||||
uint8_t* src = &byte_counters[0][0] + 1;
|
||||
uint32_t* dst = &counters[0][0] + 1;
|
||||
const uint8_t* const e = &byte_counters[0][0] + 256;
|
||||
do {
|
||||
c0 += src[0];
|
||||
c1 += src[256];
|
||||
dst[0] = c0;
|
||||
dst[256] = c1;
|
||||
++src;
|
||||
++dst;
|
||||
} while (src < e);
|
||||
}
|
||||
|
||||
{
|
||||
#define ITER(X) \
|
||||
do { \
|
||||
const uint32_t byte0 = v[i - X + 0]; \
|
||||
const uint32_t byte1 = v[i - X + 1]; \
|
||||
tmp_indices[counters[0][byte1]--] = (byte0 << 24) | (byte1 << 16) | (i - X); \
|
||||
} while (0)
|
||||
|
||||
enum { unroll = 8 };
|
||||
|
||||
uint32_t i = N;
|
||||
for (; i >= unroll; i -= unroll) {
|
||||
ITER(1); ITER(2); ITER(3); ITER(4); ITER(5); ITER(6); ITER(7); ITER(8);
|
||||
}
|
||||
for (; i > 0; --i) {
|
||||
ITER(1);
|
||||
}
|
||||
|
||||
#undef ITER
|
||||
}
|
||||
|
||||
{
|
||||
#define ITER(X) \
|
||||
do { \
|
||||
const uint32_t data = tmp_indices[i - X]; \
|
||||
indices[counters[1][data >> 24]--] = data; \
|
||||
} while (0)
|
||||
|
||||
enum { unroll = 8 };
|
||||
|
||||
uint32_t i = N;
|
||||
for (; i >= unroll; i -= unroll) {
|
||||
ITER(1); ITER(2); ITER(3); ITER(4); ITER(5); ITER(6); ITER(7); ITER(8);
|
||||
}
|
||||
for (; i > 0; --i) {
|
||||
ITER(1);
|
||||
}
|
||||
|
||||
#undef ITER
|
||||
}
|
||||
|
||||
{
|
||||
#define ITER(X) do { if UNLIKELY(a[X * 2] == a[(X + 1) * 2]) fix(v, indices, i + X); } while (0)
|
||||
|
||||
enum { unroll = 16 };
|
||||
|
||||
uint32_t i = 1;
|
||||
const uint32_t n = N - (unroll - 1);
|
||||
const uint16_t* a = ((const uint16_t*)indices) + 1;
|
||||
|
||||
for (; i < n; i += unroll, a += unroll * 2) {
|
||||
ITER(0); ITER(1); ITER(2); ITER(3); ITER(4); ITER(5); ITER(6); ITER(7);
|
||||
ITER(8); ITER(9); ITER(10); ITER(11); ITER(12); ITER(13); ITER(14); ITER(15);
|
||||
}
|
||||
for (; i < N; ++i, a += 2) {
|
||||
ITER(0);
|
||||
}
|
||||
|
||||
#undef ITER
|
||||
}
|
||||
|
||||
{
|
||||
#define ITER(X) a[X] = b[X * 2];
|
||||
|
||||
enum { unroll = 32 };
|
||||
|
||||
uint16_t* a = (uint16_t*)indices;
|
||||
uint16_t* b = (uint16_t*)indices;
|
||||
uint16_t* e = ((uint16_t*)indices) + (N - (unroll - 1));
|
||||
|
||||
for (; a < e; a += unroll, b += unroll * 2) {
|
||||
ITER(0); ITER(1); ITER(2); ITER(3); ITER(4); ITER(5); ITER(6); ITER(7);
|
||||
ITER(8); ITER(9); ITER(10); ITER(11); ITER(12); ITER(13); ITER(14); ITER(15);
|
||||
ITER(16); ITER(17); ITER(18); ITER(19); ITER(20); ITER(21); ITER(22); ITER(23);
|
||||
ITER(24); ITER(25); ITER(26); ITER(27); ITER(28); ITER(29); ITER(30); ITER(31);
|
||||
}
|
||||
|
||||
e = ((uint16_t*)indices) + N;
|
||||
for (; a < e; ++a, b += 2) {
|
||||
ITER(0);
|
||||
}
|
||||
|
||||
#undef ITER
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void sort_indices_astrobwt_v2(uint32_t N, const uint8_t* v, uint32_t* indices, uint32_t* tmp_indices)
|
||||
{
|
||||
sort_indices(N, v, indices, tmp_indices);
|
||||
}
|
26
src/crypto/astrobwt/sort_indices2.h
Normal file
26
src/crypto/astrobwt/sort_indices2.h
Normal file
|
@ -0,0 +1,26 @@
|
|||
/* XMRig
|
||||
* Copyright (c) 2018 Lee Clagett <https://github.com/vtnerd>
|
||||
* Copyright (c) 2018-2019 tevador <tevador@gmail.com>
|
||||
* Copyright (c) 2000 Transmeta Corporation <https://github.com/intel/msr-tools>
|
||||
* Copyright (c) 2004-2008 H. Peter Anvin <https://github.com/intel/msr-tools>
|
||||
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
|
||||
void sort_indices_astrobwt_v2(uint32_t N, const uint8_t* v, uint32_t* indices, uint32_t* tmp_indices);
|
|
@ -385,6 +385,10 @@ xmrig::CnHash::CnHash()
|
|||
m_map[Algorithm::ASTROBWT_DERO] = new cn_hash_fun_array{};
|
||||
m_map[Algorithm::ASTROBWT_DERO]->data[AV_SINGLE][Assembly::NONE] = astrobwt::single_hash<Algorithm::ASTROBWT_DERO>;
|
||||
m_map[Algorithm::ASTROBWT_DERO]->data[AV_SINGLE_SOFT][Assembly::NONE] = astrobwt::single_hash<Algorithm::ASTROBWT_DERO>;
|
||||
|
||||
m_map[Algorithm::ASTROBWT_DERO_2] = new cn_hash_fun_array{};
|
||||
m_map[Algorithm::ASTROBWT_DERO_2]->data[AV_SINGLE][Assembly::NONE] = astrobwt::single_hash<Algorithm::ASTROBWT_DERO_2>;
|
||||
m_map[Algorithm::ASTROBWT_DERO_2]->data[AV_SINGLE_SOFT][Assembly::NONE] = astrobwt::single_hash<Algorithm::ASTROBWT_DERO_2>;
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
|
|
|
@ -464,6 +464,19 @@ const static uint8_t astrobwt_dero_test_out[256] = {
|
|||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
|
||||
};
|
||||
|
||||
// "astrobwt/v2"
|
||||
const static uint8_t astrobwt_dero_2_test_out[256] = {
|
||||
0x48, 0x9E, 0xD2, 0x66, 0x14, 0x27, 0x98, 0x65, 0x03, 0xFB, 0x87, 0x25, 0xE1, 0xD3, 0x98, 0xDA,
|
||||
0x27, 0xEE, 0x25, 0x3D, 0xB4, 0x37, 0x87, 0x98, 0xBF, 0x5A, 0x5C, 0x94, 0xEE, 0x0C, 0xE2, 0x2A,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
|
|
|
@ -773,6 +773,11 @@ void hash_octa(const uint8_t* data, size_t size, uint8_t* output, cryptonight_ct
|
|||
{
|
||||
constexpr uint32_t N = 8;
|
||||
|
||||
uint8_t* ctx_memory[N];
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
ctx_memory[i] = ctx[i]->memory;
|
||||
}
|
||||
|
||||
// PrevBlockHash (GhostRider's seed) is stored in bytes [4; 36)
|
||||
const uint8_t* seed = data + 4;
|
||||
|
||||
|
@ -800,30 +805,50 @@ void hash_octa(const uint8_t* data, size_t size, uint8_t* output, cryptonight_ct
|
|||
|
||||
const CnHash::AlgoVariant* av = Cpu::info()->hasAES() ? av_hw_aes : av_soft_aes;
|
||||
|
||||
const cn_hash_fun f[3] = {
|
||||
CnHash::fn(cn_hash[cn_indices[0]], av[step[cn_indices[0]]], Assembly::AUTO),
|
||||
CnHash::fn(cn_hash[cn_indices[1]], av[step[cn_indices[1]]], Assembly::AUTO),
|
||||
CnHash::fn(cn_hash[cn_indices[2]], av[step[cn_indices[2]]], Assembly::AUTO),
|
||||
};
|
||||
|
||||
uint8_t tmp[64 * N];
|
||||
|
||||
for (uint64_t part = 0; part < 3; ++part) {
|
||||
for (uint64_t i = 0; i < 5; ++i) {
|
||||
for (uint64_t j = 0; j < N; ++j) {
|
||||
core_hash[core_indices[part * 5 + i]](data + j * size, size, tmp + j * 64);
|
||||
data = tmp;
|
||||
size = 64;
|
||||
for (size_t part = 0; part < 3; ++part) {
|
||||
|
||||
// Allocate scratchpads
|
||||
{
|
||||
uint8_t* p = ctx_memory[0];
|
||||
|
||||
for (size_t i = 0, k = 0; i < N; ++i) {
|
||||
if ((i % step[cn_indices[part]]) == 0) {
|
||||
k = 0;
|
||||
p = ctx_memory[0];
|
||||
}
|
||||
else if (p - ctx_memory[k] >= (1 << 21)) {
|
||||
++k;
|
||||
p = ctx_memory[k];
|
||||
}
|
||||
ctx[i]->memory = p;
|
||||
p += cn_sizes[cn_indices[part]];
|
||||
}
|
||||
}
|
||||
for (uint64_t j = 0, k = step[cn_indices[part]]; j < N; j += k) {
|
||||
f[part](tmp + j * 64, 64, output + j * 32, ctx, 0);
|
||||
|
||||
for (size_t i = 0; i < 5; ++i) {
|
||||
for (size_t j = 0; j < N; ++j) {
|
||||
core_hash[core_indices[part * 5 + i]](data + j * size, size, tmp + j * 64);
|
||||
}
|
||||
data = tmp;
|
||||
size = 64;
|
||||
}
|
||||
for (uint64_t j = 0; j < N; ++j) {
|
||||
|
||||
auto f = CnHash::fn(cn_hash[cn_indices[part]], av[step[cn_indices[part]]], Assembly::AUTO);
|
||||
for (size_t j = 0; j < N; j += step[cn_indices[part]]) {
|
||||
f(tmp + j * 64, 64, output + j * 32, ctx, 0);
|
||||
}
|
||||
|
||||
for (size_t j = 0; j < N; ++j) {
|
||||
memcpy(tmp + j * 64, output + j * 32, 32);
|
||||
memset(tmp + j * 64 + 32, 0, 32);
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
ctx[i]->memory = ctx_memory[i];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue