Cleanup MoneroOcean patchset

This commit is contained in:
Tony Butler 2019-12-22 09:54:34 -07:00
parent aaaaf5c1ed
commit c4ff8c4064
49 changed files with 2094 additions and 2237 deletions

View file

@ -90,17 +90,17 @@ public:
case Algorithm::CN_DOUBLE:
return CN_ITER * 2;
# ifdef XMRIG_ALGO_CN_GPU
case Algorithm::CN_GPU:
return 0xC000;
# endif
# ifdef XMRIG_ALGO_CN_PICO
case Algorithm::CN_PICO_0:
case Algorithm::CN_PICO_TLO:
return CN_ITER / 8;
# endif
# ifdef XMRIG_ALGO_CN_GPU
case Algorithm::CN_GPU:
return 0xC000;
# endif
default:
break;
}
@ -110,18 +110,18 @@ public:
inline static uint32_t mask(Algorithm::Id algo)
{
# ifdef XMRIG_ALGO_CN_GPU
if (algo == Algorithm::CN_GPU) {
return 0x1FFFC0;
}
# endif
# ifdef XMRIG_ALGO_CN_PICO
if (algo == Algorithm::CN_PICO_0) {
return 0x1FFF0;
}
# endif
# ifdef XMRIG_ALGO_CN_GPU
if (algo == Algorithm::CN_GPU) {
return 0x1FFFC0;
}
# endif
return ((memory(algo) - 1) / 16) * 16;
}
@ -205,10 +205,10 @@ template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_XAO>::iterations() con
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_DOUBLE>::iterations() const { return CN_ITER * 2; }
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_RWZ>::iterations() const { return 0x60000; }
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_ZLS>::iterations() const { return 0x60000; }
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_GPU>::iterations() const { return 0xC000; }
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_PICO_0>::iterations() const { return CN_ITER / 8; }
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_PICO_TLO>::iterations() const { return CN_ITER / 8; }
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_CCX>::iterations() const { return CN_ITER / 2; }
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_GPU>::iterations() const { return 0xC000; }
template<> constexpr inline size_t CnAlgo<Algorithm::CN_LITE_0>::memory() const { return CN_MEMORY / 2; }
@ -220,8 +220,8 @@ template<> constexpr inline size_t CnAlgo<Algorithm::CN_PICO_0>::memory() const
template<> constexpr inline size_t CnAlgo<Algorithm::CN_PICO_TLO>::memory() const { return CN_MEMORY / 8; }
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_GPU>::mask() const { return 0x1FFFC0; }
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_PICO_0>::mask() const { return 0x1FFF0; }
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_GPU>::mask() const { return 0x1FFFC0; }
} /* namespace xmrig */

View file

@ -252,11 +252,6 @@ xmrig::CnHash::CnHash()
ADD_FN_ASM(Algorithm::CN_ZLS);
ADD_FN_ASM(Algorithm::CN_DOUBLE);
# ifdef XMRIG_ALGO_CN_GPU
m_map[Algorithm::CN_GPU][AV_SINGLE][Assembly::NONE] = cryptonight_single_hash_gpu<Algorithm::CN_GPU, false>;
m_map[Algorithm::CN_GPU][AV_SINGLE_SOFT][Assembly::NONE] = cryptonight_single_hash_gpu<Algorithm::CN_GPU, true>;
# endif
# ifdef XMRIG_ALGO_CN_LITE
ADD_FN(Algorithm::CN_LITE_0);
ADD_FN(Algorithm::CN_LITE_1);
@ -289,6 +284,11 @@ xmrig::CnHash::CnHash()
m_map[Algorithm::ASTROBWT_DERO][AV_SINGLE_SOFT][Assembly::NONE] = astrobwt::single_hash<Algorithm::ASTROBWT_DERO>;
# endif
# ifdef XMRIG_ALGO_CN_GPU
m_map[Algorithm::CN_GPU][AV_SINGLE][Assembly::NONE] = cryptonight_single_hash_gpu<Algorithm::CN_GPU, false>;
m_map[Algorithm::CN_GPU][AV_SINGLE_SOFT][Assembly::NONE] = cryptonight_single_hash_gpu<Algorithm::CN_GPU, true>;
# endif
# ifdef XMRIG_FEATURE_ASM
patchAsmVariants();
# endif

View file

@ -45,11 +45,11 @@ inline void prep_dv_avx(__m256i* idx, __m256i& v, __m256& n01)
n01 = _mm256_cvtepi32_ps(v);
}
inline __m256 fma_break(const __m256& x)
{
// Break the dependency chain by setitng the exp to ?????01
__m256 xx = _mm256_and_ps(_mm256_castsi256_ps(_mm256_set1_epi32(0xFEFFFFFF)), x);
return _mm256_or_ps(_mm256_castsi256_ps(_mm256_set1_epi32(0x00800000)), xx);
inline __m256 fma_break(const __m256& x)
{
// Break the dependency chain by setting the exp to ?????01
__m256 xx = _mm256_and_ps(_mm256_castsi256_ps(_mm256_set1_epi32(0xFEFFFFFF)), x);
return _mm256_or_ps(_mm256_castsi256_ps(_mm256_set1_epi32(0x00800000)), xx);
}
// 14
@ -151,13 +151,13 @@ void cn_gpu_inner_avx(const uint8_t* spad, uint8_t* lpad)
__m256 n01, n23;
prep_dv_avx(idx0, v01, n01);
prep_dv_avx(idx2, v23, n23);
__m256i out, out2;
__m256 n10, n22, n33;
n10 = _mm256_permute2f128_ps(n01, n01, 0x01);
n22 = _mm256_permute2f128_ps(n23, n23, 0x00);
n33 = _mm256_permute2f128_ps(n23, n23, 0x11);
out = _mm256_setzero_si256();
double_compute_wrap<0>(n01, n10, n22, n33, 1.3437500f, 1.4296875f, rc, suma, out);
double_compute_wrap<1>(n01, n22, n33, n10, 1.2812500f, 1.3984375f, rc, suma, out);
@ -166,7 +166,7 @@ void cn_gpu_inner_avx(const uint8_t* spad, uint8_t* lpad)
_mm256_store_si256(idx0, _mm256_xor_si256(v01, out));
sum0 = _mm256_add_ps(suma, sumb);
out2 = out;
__m256 n11, n02, n30;
n11 = _mm256_permute2f128_ps(n01, n01, 0x11);
n02 = _mm256_permute2f128_ps(n01, n23, 0x20);
@ -191,7 +191,7 @@ void cn_gpu_inner_avx(const uint8_t* spad, uint8_t* lpad)
__m128 sum = _mm256_castps256_ps128(sum0);
sum = _mm_and_ps(_mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)), sum); // take abs(va) by masking the float sign bit
// vs range 0 - 64
// vs range 0 - 64
__m128i v0 = _mm_cvttps_epi32(_mm_mul_ps(sum, _mm_set1_ps(16777216.0f)));
v0 = _mm_xor_si128(v0, _mm256_castsi256_si128(out2));
__m128i v1 = _mm_shuffle_epi32(v0, _MM_SHUFFLE(0, 1, 2, 3));

View file

@ -39,11 +39,11 @@ inline void prep_dv(__m128i* idx, __m128i& v, __m128& n)
n = _mm_cvtepi32_ps(v);
}
inline __m128 fma_break(__m128 x)
{
// Break the dependency chain by setitng the exp to ?????01
x = _mm_and_ps(_mm_castsi128_ps(_mm_set1_epi32(0xFEFFFFFF)), x);
return _mm_or_ps(_mm_castsi128_ps(_mm_set1_epi32(0x00800000)), x);
inline __m128 fma_break(__m128 x)
{
// Break the dependency chain by setting the exp to ?????01
x = _mm_and_ps(_mm_castsi128_ps(_mm_set1_epi32(0xFEFFFFFF)), x);
return _mm_or_ps(_mm_castsi128_ps(_mm_set1_epi32(0x00800000)), x);
}
// 14
@ -136,13 +136,13 @@ void cn_gpu_inner_ssse3(const uint8_t* spad, uint8_t* lpad)
__m128i* idx2 = scratchpad_ptr<MASK>(lpad, s, 2);
__m128i* idx3 = scratchpad_ptr<MASK>(lpad, s, 3);
__m128 sum0 = _mm_setzero_ps();
for(size_t i = 0; i < ITER; i++)
{
__m128 n0, n1, n2, n3;
__m128i v0, v1, v2, v3;
__m128 suma, sumb, sum1, sum2, sum3;
prep_dv(idx0, v0, n0);
prep_dv(idx1, v1, n1);
prep_dv(idx2, v2, n2);
@ -158,7 +158,7 @@ void cn_gpu_inner_ssse3(const uint8_t* spad, uint8_t* lpad)
sum0 = _mm_add_ps(suma, sumb);
_mm_store_si128(idx0, _mm_xor_si128(v0, out));
out2 = out;
out = _mm_setzero_si128();
single_compute_wrap<0>(n1, n0, n2, n3, 1.4296875f, rc, suma, out);
single_compute_wrap<1>(n1, n2, n3, n0, 1.3984375f, rc, suma, out);
@ -190,7 +190,7 @@ void cn_gpu_inner_ssse3(const uint8_t* spad, uint8_t* lpad)
sum0 = _mm_add_ps(sum0, sum2);
sum0 = _mm_and_ps(_mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)), sum0); // take abs(va) by masking the float sign bit
// vs range 0 - 64
// vs range 0 - 64
n0 = _mm_mul_ps(sum0, _mm_set1_ps(16777216.0f));
v0 = _mm_cvttps_epi32(n0);
v0 = _mm_xor_si128(v0, out2);

View file

@ -48,13 +48,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <cassert>
extern "C" {
#include "crypto/defyx/yescrypt.h"
#include "crypto/defyx/KangarooTwelve.h"
#include "crypto/randomx/defyx/yescrypt.h"
#include "crypto/randomx/defyx/KangarooTwelve.h"
}
#define YESCRYPT_FLAGS YESCRYPT_RW
#define YESCRYPT_BASE_N 2048
#define YESCRYPT_R 8
#define YESCRYPT_P 1
RandomX_ConfigurationWownero::RandomX_ConfigurationWownero()
{
@ -127,8 +123,8 @@ RandomX_ConfigurationScala::RandomX_ConfigurationScala()
ScratchpadL1_Size = 65536;
ScratchpadL2_Size = 131072;
ScratchpadL3_Size = 262144;
ProgramSize = 64;
ProgramIterations = 1024;
ProgramSize = 64;
ProgramIterations = 1024;
ProgramCount = 4;
RANDOMX_FREQ_IADD_RS = 25;
@ -344,31 +340,25 @@ alignas(64) RandomX_ConfigurationBase RandomX_CurrentConfig;
static std::mutex vm_pool_mutex;
int sipesh(void *out, size_t outlen, const void *in, size_t inlen)
int rx_sipesh_k12(void *out, size_t outlen, const void *in, size_t inlen)
{
const void *salt = in;
size_t saltlen = inlen;
unsigned int t_cost = 0;
unsigned int m_cost = 0;
const void *salt = in;
size_t saltlen = inlen;
yescrypt_local_t local;
int retval;
if (yescrypt_init_local(&local))
return -1;
retval = yescrypt_kdf(NULL, &local, (const uint8_t*)in, inlen, (const uint8_t*)salt, saltlen,
(uint64_t)YESCRYPT_BASE_N << m_cost, YESCRYPT_R, YESCRYPT_P,
t_cost, 0, YESCRYPT_FLAGS, (uint8_t*)out, outlen);
if (yescrypt_free_local(&local))
return -1;
if (yescrypt_init_local(&local)) return -1;
retval = yescrypt_kdf(NULL, &local,
(const uint8_t*)in, inlen,
(const uint8_t*)salt, saltlen,
(uint64_t)2048, 8, 1, 0, 0, (yescrypt_flags_t)1,
(uint8_t*)out, outlen
);
if (yescrypt_free_local(&local) || retval) return -1;
retval = KangarooTwelve((const unsigned char *)in, inlen, (unsigned char *)out, 32, 0, 0);
return retval;
}
int k12(void *hash, const void *data, size_t length)
{
int kDo = KangarooTwelve((const unsigned char *)data, length, (unsigned char *)hash, 32, 0, 0);
return kDo;
}
extern "C" {
randomx_cache *randomx_create_cache(randomx_flags flags, uint8_t *memory) {
@ -569,12 +559,16 @@ extern "C" {
vm->~randomx_vm();
}
void randomx_calculate_hash(randomx_vm *machine, const void *input, size_t inputSize, void *output) {
void randomx_calculate_hash(randomx_vm *machine, const void *input, size_t inputSize, void *output, const xmrig::Algorithm algo) {
assert(machine != nullptr);
assert(inputSize == 0 || input != nullptr);
assert(output != nullptr);
alignas(16) uint64_t tempHash[8];
rx_blake2b(tempHash, sizeof(tempHash), input, inputSize, nullptr, 0);
if (algo == xmrig::Algorithm::RX_DEFYX) {
rx_sipesh_k12(tempHash, sizeof(tempHash), input, inputSize);
} else {
rx_blake2b(tempHash, sizeof(tempHash), input, inputSize, nullptr, 0);
}
machine->initScratchpad(&tempHash);
machine->resetRoundingMode();
for (uint32_t chain = 0; chain < RandomX_CurrentConfig.ProgramCount - 1; ++chain) {
@ -585,12 +579,16 @@ extern "C" {
machine->getFinalResult(output, RANDOMX_HASH_SIZE);
}
void randomx_calculate_hash_first(randomx_vm* machine, uint64_t (&tempHash)[8], const void* input, size_t inputSize) {
rx_blake2b(tempHash, sizeof(tempHash), input, inputSize, nullptr, 0);
void randomx_calculate_hash_first(randomx_vm* machine, uint64_t (&tempHash)[8], const void* input, size_t inputSize, const xmrig::Algorithm algo) {
if (algo == xmrig::Algorithm::RX_DEFYX) {
rx_sipesh_k12(tempHash, sizeof(tempHash), input, inputSize);
} else {
rx_blake2b(tempHash, sizeof(tempHash), input, inputSize, nullptr, 0);
}
machine->initScratchpad(tempHash);
}
void randomx_calculate_hash_next(randomx_vm* machine, uint64_t (&tempHash)[8], const void* nextInput, size_t nextInputSize, void* output) {
void randomx_calculate_hash_next(randomx_vm* machine, uint64_t (&tempHash)[8], const void* nextInput, size_t nextInputSize, void* output, const xmrig::Algorithm algo) {
machine->resetRoundingMode();
for (uint32_t chain = 0; chain < RandomX_CurrentConfig.ProgramCount - 1; ++chain) {
machine->run(&tempHash);
@ -599,44 +597,12 @@ extern "C" {
machine->run(&tempHash);
// Finish current hash and fill the scratchpad for the next hash at the same time
rx_blake2b(tempHash, sizeof(tempHash), nextInput, nextInputSize, nullptr, 0);
if (algo == xmrig::Algorithm::RX_DEFYX) {
rx_sipesh_k12(tempHash, sizeof(tempHash), nextInput, nextInputSize);
} else {
rx_blake2b(tempHash, sizeof(tempHash), nextInput, nextInputSize, nullptr, 0);
}
machine->hashAndFill(output, RANDOMX_HASH_SIZE, tempHash);
}
void defyx_calculate_hash(randomx_vm *machine, const void *input, size_t inputSize, void *output) {
assert(machine != nullptr);
assert(inputSize == 0 || input != nullptr);
assert(output != nullptr);
alignas(16) uint64_t tempHash[8];
sipesh(tempHash, sizeof(tempHash), input, inputSize);
k12(tempHash, input, inputSize);
machine->initScratchpad(&tempHash);
machine->resetRoundingMode();
for (uint32_t chain = 0; chain < RandomX_CurrentConfig.ProgramCount - 1; ++chain) {
machine->run(&tempHash);
rx_blake2b(tempHash, sizeof(tempHash), machine->getRegisterFile(), sizeof(randomx::RegisterFile), nullptr, 0);
}
machine->run(&tempHash);
machine->getFinalResult(output, RANDOMX_HASH_SIZE);
}
void defyx_calculate_hash_first(randomx_vm* machine, uint64_t (&tempHash)[8], const void* input, size_t inputSize) {
sipesh(tempHash, sizeof(tempHash), input, inputSize);
k12(tempHash, input, inputSize);
machine->initScratchpad(tempHash);
}
void defyx_calculate_hash_next(randomx_vm* machine, uint64_t (&tempHash)[8], const void* nextInput, size_t nextInputSize, void* output) {
machine->resetRoundingMode();
for (uint32_t chain = 0; chain < RandomX_CurrentConfig.ProgramCount - 1; ++chain) {
machine->run(&tempHash);
rx_blake2b(tempHash, sizeof(tempHash), machine->getRegisterFile(), sizeof(randomx::RegisterFile), nullptr, 0);
}
machine->run(&tempHash);
// Finish current hash and fill the scratchpad for the next hash at the same time
sipesh(tempHash, sizeof(tempHash), nextInput, nextInputSize);
k12(tempHash, nextInput, nextInputSize);
machine->hashAndFill(output, RANDOMX_HASH_SIZE, tempHash);
}
}

View file

@ -32,6 +32,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <cstddef>
#include <cstdint>
#include <type_traits>
#include "base/crypto/Algorithm.h"
#include "crypto/randomx/intrin_portable.h"
#define RANDOMX_HASH_SIZE 32
@ -347,24 +348,10 @@ RANDOMX_EXPORT void randomx_destroy_vm(randomx_vm *machine);
* @param output is a pointer to memory where the hash will be stored. Must not
* be NULL and at least RANDOMX_HASH_SIZE bytes must be available for writing.
*/
RANDOMX_EXPORT void randomx_calculate_hash(randomx_vm *machine, const void *input, size_t inputSize, void *output);
RANDOMX_EXPORT void randomx_calculate_hash(randomx_vm *machine, const void *input, size_t inputSize, void *output, const xmrig::Algorithm algo);
RANDOMX_EXPORT void randomx_calculate_hash_first(randomx_vm* machine, uint64_t (&tempHash)[8], const void* input, size_t inputSize);
RANDOMX_EXPORT void randomx_calculate_hash_next(randomx_vm* machine, uint64_t (&tempHash)[8], const void* nextInput, size_t nextInputSize, void* output);
/**
* Calculates a RandomX hash value (Scala variant).
*
* @param machine is a pointer to a randomx_vm structure. Must not be NULL.
* @param input is a pointer to memory to be hashed. Must not be NULL.
* @param inputSize is the number of bytes to be hashed.
* @param output is a pointer to memory where the hash will be stored. Must not
* be NULL and at least RANDOMX_HASH_SIZE bytes must be available for writing.
*/
RANDOMX_EXPORT void defyx_calculate_hash(randomx_vm *machine, const void *input, size_t inputSize, void *output);
RANDOMX_EXPORT void defyx_calculate_hash_first(randomx_vm* machine, uint64_t (&tempHash)[8], const void* input, size_t inputSize);
RANDOMX_EXPORT void defyx_calculate_hash_next(randomx_vm* machine, uint64_t (&tempHash)[8], const void* nextInput, size_t nextInputSize, void* output);
RANDOMX_EXPORT void randomx_calculate_hash_first(randomx_vm* machine, uint64_t (&tempHash)[8], const void* input, size_t inputSize, const xmrig::Algorithm algo);
RANDOMX_EXPORT void randomx_calculate_hash_next(randomx_vm* machine, uint64_t (&tempHash)[8], const void* nextInput, size_t nextInputSize, void* output, const xmrig::Algorithm algo);
#if defined(__cplusplus)
}