Implemented CryptoNight with C++ templates.
This commit is contained in:
parent
878e021ff6
commit
8b83a5fe2e
6 changed files with 242 additions and 66 deletions
|
@ -38,6 +38,9 @@ extern "C"
|
|||
#include "crypto/c_blake256.h"
|
||||
#include "crypto/c_jh.h"
|
||||
#include "crypto/c_skein.h"
|
||||
|
||||
__m128i soft_aesenc(__m128i in, __m128i key);
|
||||
__m128i soft_aeskeygenassist(__m128i key, uint8_t rcon);
|
||||
}
|
||||
|
||||
|
||||
|
@ -64,14 +67,11 @@ static inline void do_skein_hash(const void* input, size_t len, char* output) {
|
|||
void (* const extra_hashes[4])(const void *, size_t, char *) = {do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash};
|
||||
|
||||
|
||||
__m128i soft_aesenc(__m128i in, __m128i key);
|
||||
__m128i soft_aeskeygenassist(__m128i key, uint8_t rcon);
|
||||
|
||||
|
||||
#if defined(__x86_64__)
|
||||
# define EXTRACT64(X) _mm_cvtsi128_si64(X)
|
||||
|
||||
static inline uint64_t _umul128(uint64_t a, uint64_t b, uint64_t* hi)
|
||||
static inline uint64_t __umul128(uint64_t a, uint64_t b, uint64_t* hi)
|
||||
{
|
||||
unsigned __int128 r = (unsigned __int128) a * (unsigned __int128) b;
|
||||
*hi = r >> 64;
|
||||
|
@ -86,7 +86,7 @@ static inline uint64_t _umul128(uint64_t a, uint64_t b, uint64_t* hi)
|
|||
((uint64_t)(uint32_t)_mm_cvtsi128_si32(X) | \
|
||||
((uint64_t)(uint32_t)_mm_cvtsi128_si32(HI32(X)) << 32))
|
||||
|
||||
static inline uint64_t _umul128(uint64_t multiplier, uint64_t multiplicand, uint64_t *product_hi) {
|
||||
static inline uint64_t __umul128(uint64_t multiplier, uint64_t multiplicand, uint64_t *product_hi) {
|
||||
// multiplier = ab = a * 2^32 + b
|
||||
// multiplicand = cd = c * 2^32 + d
|
||||
// ab * cd = a * c * 2^64 + (a * d + b * c) * 2^32 + b * d
|
||||
|
@ -300,7 +300,7 @@ static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
|
|||
|
||||
|
||||
template<size_t ITERATIONS, size_t MEM, size_t MASK, bool SOFT_AES>
|
||||
void cryptonight_hash(const void *__restrict__ input, size_t size, void *__restrict__ output, cryptonight_ctx *__restrict__ ctx)
|
||||
inline void cryptonight_hash(const void *__restrict__ input, size_t size, void *__restrict__ output, cryptonight_ctx *__restrict__ ctx)
|
||||
{
|
||||
keccak(static_cast<const uint8_t*>(input), size, ctx->state0, 200);
|
||||
|
||||
|
@ -327,7 +327,7 @@ void cryptonight_hash(const void *__restrict__ input, size_t size, void *__restr
|
|||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*) &l0[idx0 & MASK])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & MASK])[1];
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
lo = __umul128(idx0, cl, &hi);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
@ -346,4 +346,86 @@ void cryptonight_hash(const void *__restrict__ input, size_t size, void *__restr
|
|||
extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, static_cast<char*>(output));
|
||||
}
|
||||
|
||||
|
||||
template<size_t ITERATIONS, size_t MEM, size_t MASK, bool SOFT_AES>
|
||||
inline void cryptonight_double_hash(const void *__restrict__ input, size_t size, void *__restrict__ output, struct cryptonight_ctx *__restrict__ ctx)
|
||||
{
|
||||
keccak((const uint8_t *) input, size, ctx->state0, 200);
|
||||
keccak((const uint8_t *) input + size, size, ctx->state1, 200);
|
||||
|
||||
const uint8_t* l0 = ctx->memory;
|
||||
const uint8_t* l1 = ctx->memory + MEMORY;
|
||||
uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx->state0);
|
||||
uint64_t* h1 = reinterpret_cast<uint64_t*>(ctx->state1);
|
||||
|
||||
cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h0, (__m128i*) l0);
|
||||
cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h1, (__m128i*) l1);
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t al1 = h1[0] ^ h1[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
uint64_t ah1 = h1[1] ^ h1[5];
|
||||
|
||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
|
||||
|
||||
uint64_t idx0 = h0[0] ^ h0[4];
|
||||
uint64_t idx1 = h1[0] ^ h1[4];
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < ITERATIONS, 1); i++) {
|
||||
__m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & MASK]);
|
||||
__m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & MASK]);
|
||||
|
||||
cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0));
|
||||
cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1));
|
||||
|
||||
_mm_store_si128((__m128i *) &l0[idx0 & MASK], _mm_xor_si128(bx0, cx0));
|
||||
_mm_store_si128((__m128i *) &l1[idx1 & MASK], _mm_xor_si128(bx1, cx1));
|
||||
|
||||
idx0 = EXTRACT64(cx0);
|
||||
idx1 = EXTRACT64(cx1);
|
||||
|
||||
bx0 = cx0;
|
||||
bx1 = cx1;
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*) &l0[idx0 & MASK])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & MASK])[1];
|
||||
lo = __umul128(idx0, cl, &hi);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*) &l0[idx0 & MASK])[0] = al0;
|
||||
((uint64_t*) &l0[idx0 & MASK])[1] = ah0;
|
||||
|
||||
ah0 ^= ch;
|
||||
al0 ^= cl;
|
||||
idx0 = al0;
|
||||
|
||||
cl = ((uint64_t*) &l1[idx1 & MASK])[0];
|
||||
ch = ((uint64_t*) &l1[idx1 & MASK])[1];
|
||||
lo = __umul128(idx1, cl, &hi);
|
||||
|
||||
al1 += hi;
|
||||
ah1 += lo;
|
||||
|
||||
((uint64_t*) &l1[idx1 & MASK])[0] = al1;
|
||||
((uint64_t*) &l1[idx1 & MASK])[1] = ah1;
|
||||
|
||||
ah1 ^= ch;
|
||||
al1 ^= cl;
|
||||
idx1 = al1;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l0, (__m128i*) h0);
|
||||
cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l1, (__m128i*) h1);
|
||||
|
||||
keccakf(h0, 24);
|
||||
keccakf(h1, 24);
|
||||
|
||||
extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, static_cast<char*>(output));
|
||||
extra_hashes[ctx->state1[0] & 3](ctx->state1, 200, static_cast<char*>(output) + 32);
|
||||
}
|
||||
|
||||
#endif /* __CRYPTONIGHT_P_H__ */
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue