Compare commits
30 commits
Author | SHA1 | Date | |
---|---|---|---|
![]() |
4fdec33c50 | ||
![]() |
30e5e4a492 | ||
![]() |
d92c1a54de | ||
![]() |
aa474fa51b | ||
![]() |
7976059367 | ||
![]() |
c5cbd9d8fe | ||
![]() |
ef2e8bed6e | ||
![]() |
7574bfab60 | ||
![]() |
27980f24f8 | ||
![]() |
5e6a69e16f | ||
![]() |
69513e7049 | ||
![]() |
b834c50aba | ||
![]() |
302ebe5a5b | ||
![]() |
b9096f2392 | ||
![]() |
b02f4ff163 | ||
![]() |
11748fad78 | ||
![]() |
e0dc51edf9 | ||
![]() |
779238fc85 | ||
![]() |
a06a224c0a | ||
![]() |
bf2eb1a685 | ||
![]() |
0bba8849f0 | ||
![]() |
1e22a984af | ||
![]() |
61b49137c7 | ||
![]() |
93d072ff6e | ||
![]() |
f0b293f650 | ||
![]() |
b93e7d9daa | ||
![]() |
0b4b07fcd6 | ||
![]() |
af62621169 | ||
![]() |
ed7260449a | ||
![]() |
33944595a2 |
62 changed files with 8696 additions and 1279 deletions
|
@ -1,3 +1,12 @@
|
|||
# v0.9.0
|
||||
- **[#753](https://github.com/xmrig/xmrig/issues/753) Added new algorithm [CryptoNight variant 2](https://github.com/xmrig/xmrig/issues/753) for Monero fork, thanks [@SChernykh](https://github.com/SChernykh).**
|
||||
- Added option `--asm`, possible values `--asm auto`, `--asm none`, `--asm intel` and `--asm ryzen`.
|
||||
- Added support for new style long and short algorithm names, possible values: `cryptonight`, `cryptonight/0`, `cryptonight/1`, `cryptonight/2`, `cryptonight-lite`, `cryptonight-lite/0`, `cryptonight-lite/1` and short equvalents `cn/2` etc.
|
||||
- Added `--variant`, example `--algo cn --variant 2`, by default miner automaticaly detect proper variant for Monero by block version.
|
||||
- Added CryptoNight-Lite variant 1.
|
||||
- Added xmrig-proxy autodetection, nicehash will be enabled automaticaly.
|
||||
- Added workaround for xmrig-proxy [bug](https://github.com/xmrig/xmrig-proxy/commit/dfa1960fe3eeb13f80717b7dbfcc7c6e9f222d89).
|
||||
|
||||
# v0.8.2
|
||||
- Fixed L2 cache size detection for AMD CPUs (Bulldozer/Piledriver/Steamroller/Excavator architecture).
|
||||
- Fixed gcc 7.1 support.
|
||||
|
|
|
@ -3,22 +3,26 @@ project(xmrig C)
|
|||
|
||||
option(WITH_LIBCPUID "Use Libcpuid" ON)
|
||||
option(WITH_AEON "CryptoNight-Lite support" ON)
|
||||
option(WITH_ASM "Enable ASM PoW implementations" ON)
|
||||
|
||||
set(HEADERS
|
||||
compat.h
|
||||
algo/cryptonight/cryptonight.h
|
||||
algo/cryptonight/cryptonight_aesni.h
|
||||
algo/cryptonight/cryptonight_monero.h
|
||||
algo/cryptonight/cryptonight_softaes.h
|
||||
elist.h
|
||||
xmrig.h
|
||||
version.h
|
||||
options.h
|
||||
algo/cryptonight/cryptonight_test.h
|
||||
algo/cryptonight/variant4_random_math.h
|
||||
compat.h
|
||||
cpu.h
|
||||
persistent_memory.h
|
||||
stratum.h
|
||||
stats.h
|
||||
util.h
|
||||
donate.h
|
||||
elist.h
|
||||
options.h
|
||||
persistent_memory.h
|
||||
stats.h
|
||||
stratum.h
|
||||
util.h
|
||||
version.h
|
||||
xmrig.h
|
||||
)
|
||||
|
||||
set(HEADERS_CRYPTO
|
||||
|
@ -26,6 +30,7 @@ set(HEADERS_CRYPTO
|
|||
crypto/c_blake256.h
|
||||
crypto/c_jh.h
|
||||
crypto/c_skein.h
|
||||
crypto/soft_aes.h
|
||||
)
|
||||
|
||||
set(HEADERS_COMPAT
|
||||
|
@ -41,10 +46,14 @@ set(HEADERS_UTILS
|
|||
set(SOURCES
|
||||
xmrig.c
|
||||
algo/cryptonight/cryptonight.c
|
||||
algo/cryptonight/cryptonight_av1_aesni.c
|
||||
algo/cryptonight/cryptonight_av2_aesni_double.c
|
||||
algo/cryptonight/cryptonight_av3_softaes.c
|
||||
algo/cryptonight/cryptonight_av4_softaes_double.c
|
||||
algo/cryptonight/cryptonight_av1.c
|
||||
algo/cryptonight/cryptonight_av2.c
|
||||
algo/cryptonight/cryptonight_av3.c
|
||||
algo/cryptonight/cryptonight_av4.c
|
||||
algo/cryptonight/cryptonight_r_av1.c
|
||||
algo/cryptonight/cryptonight_r_av2.c
|
||||
algo/cryptonight/cryptonight_r_av3.c
|
||||
algo/cryptonight/cryptonight_r_av4.c
|
||||
util.c
|
||||
options.c
|
||||
stratum.c
|
||||
|
@ -58,7 +67,6 @@ set(SOURCES_CRYPTO
|
|||
crypto/c_blake256.c
|
||||
crypto/c_jh.c
|
||||
crypto/c_skein.c
|
||||
crypto/soft_aes.c
|
||||
)
|
||||
|
||||
set(SOURCES_UTILS
|
||||
|
@ -68,13 +76,13 @@ set(SOURCES_UTILS
|
|||
|
||||
if (WIN32)
|
||||
set(SOURCES_OS win/cpu_win.c win/memory_win.c win/xmrig_win.c win/app.rc compat/winansi.c)
|
||||
set(EXTRA_LIBS ws2_32)
|
||||
set(EXTRA_LIBS ws2_32 crypt32)
|
||||
add_definitions(/D_WIN32_WINNT=0x600)
|
||||
elseif (APPLE)
|
||||
set(SOURCES_OS mac/cpu_mac.c mac/memory_mac.c mac/xmrig_mac.c)
|
||||
else()
|
||||
set(SOURCES_OS unix/cpu_unix.c unix/memory_unix.c unix/xmrig_unix.c)
|
||||
set(EXTRA_LIBS pthread)
|
||||
set(EXTRA_LIBS pthread rt m)
|
||||
endif()
|
||||
|
||||
include_directories(.)
|
||||
|
@ -89,9 +97,9 @@ endif()
|
|||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maes -Wno-pointer-to-int-cast")
|
||||
|
||||
if (CMAKE_C_COMPILER_ID MATCHES "Clang")
|
||||
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -Ofast -funroll-loops -fvariable-expansion-in-unroller -fmerge-all-constants")
|
||||
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -Ofast -s -funroll-loops -fvariable-expansion-in-unroller -fmerge-all-constants")
|
||||
else()
|
||||
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -Ofast -funroll-loops -fvariable-expansion-in-unroller -ftree-loop-if-convert-stores -fmerge-all-constants -fbranch-target-load-optimize2")
|
||||
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -Ofast -s -funroll-loops -fvariable-expansion-in-unroller -ftree-loop-if-convert-stores -fmerge-all-constants -fbranch-target-load-optimize2")
|
||||
endif()
|
||||
|
||||
#set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -gdwarf-2")
|
||||
|
@ -123,12 +131,14 @@ else()
|
|||
set(SOURCES_CPUID cpu_stub.c)
|
||||
endif()
|
||||
|
||||
include(cmake/asm.cmake)
|
||||
|
||||
if (WITH_AEON)
|
||||
set(SOURCES_AEON
|
||||
algo/cryptonight-lite/cryptonight_lite_av1_aesni.c
|
||||
algo/cryptonight-lite/cryptonight_lite_av2_aesni_double.c
|
||||
algo/cryptonight-lite/cryptonight_lite_av3_softaes.c
|
||||
algo/cryptonight-lite/cryptonight_lite_av4_softaes_double.c
|
||||
algo/cryptonight-lite/cryptonight_lite_av1.c
|
||||
algo/cryptonight-lite/cryptonight_lite_av2.c
|
||||
algo/cryptonight-lite/cryptonight_lite_av3.c
|
||||
algo/cryptonight-lite/cryptonight_lite_av4.c
|
||||
algo/cryptonight-lite/cryptonight_lite_aesni.h
|
||||
algo/cryptonight-lite/cryptonight_lite_softaes.h
|
||||
)
|
||||
|
@ -137,10 +147,10 @@ else()
|
|||
endif()
|
||||
|
||||
if (CMAKE_SIZEOF_VOID_P EQUAL 8)
|
||||
add_executable(xmrig ${HEADERS} ${HEADERS_CRYPTO} ${SOURCES} ${SOURCES_CRYPTO} ${HEADERS_UTILS} ${SOURCES_UTILS} ${HEADERS_COMPAT} ${SOURCES_COMPAT} ${SOURCES_OS} ${SOURCES_CPUID} ${SOURCES_AEON})
|
||||
target_link_libraries(xmrig jansson curl ${CPUID_LIB} ${EXTRA_LIBS})
|
||||
add_executable(xmrig ${HEADERS} ${HEADERS_CRYPTO} ${SOURCES} ${SOURCES_CRYPTO} ${HEADERS_UTILS} ${SOURCES_UTILS} ${HEADERS_COMPAT} ${SOURCES_COMPAT} ${SOURCES_OS} ${SOURCES_CPUID} ${SOURCES_AEON} ${XMRIG_ASM_SOURCES})
|
||||
target_link_libraries(xmrig ${XMRIG_ASM_LIBRARY} jansson ${CURL_LIBRARY} ${CPUID_LIB} ${EXTRA_LIBS})
|
||||
else()
|
||||
add_executable(xmrig32 ${HEADERS} ${HEADERS_CRYPTO} ${SOURCES} ${SOURCES_CRYPTO} ${HEADERS_UTILS} ${SOURCES_UTILS} ${HEADERS_COMPAT} ${SOURCES_COMPAT} ${SOURCES_OS} ${SOURCES_CPUID} ${SOURCES_AEON})
|
||||
target_link_libraries(xmrig32 jansson curl ${CPUID_LIB} ${EXTRA_LIBS})
|
||||
add_executable(xmrig32 ${HEADERS} ${HEADERS_CRYPTO} ${SOURCES} ${SOURCES_CRYPTO} ${HEADERS_UTILS} ${SOURCES_UTILS} ${HEADERS_COMPAT} ${SOURCES_COMPAT} ${SOURCES_OS} ${SOURCES_CPUID} ${SOURCES_AEON} ${XMRIG_ASM_SOURCES})
|
||||
target_link_libraries(xmrig32 ${XMRIG_ASM_LIBRARY} jansson ${CURL_LIBRARY} ${CPUID_LIB} ${EXTRA_LIBS})
|
||||
endif()
|
||||
|
||||
|
|
|
@ -99,7 +99,7 @@ Configure options for libcurl:
|
|||
```
|
||||
CMake options:
|
||||
```
|
||||
cmake .. -G "Unix Makefiles" -DCMAKE_BUILD_TYPE=Release -DCURL_INCLUDE_DIR="c:\<path>\curl-7.53.1\include" -DCURL_LIBRARY="c:\<path>\curl-7.53.1\lib\.libs"
|
||||
cmake .. -G "Unix Makefiles" -DCMAKE_BUILD_TYPE=Release -DCURL_INCLUDE_DIR="c:\xmrig-deps\gcc\x64\include" -DCURL_LIBRARY="c:\xmrig-deps\gcc\x64\lib\libcurl.a"
|
||||
```
|
||||
|
||||
### Optional features
|
||||
|
|
|
@ -22,10 +22,12 @@
|
|||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef __CRYPTONIGHT_LITE_AESNI_H__
|
||||
#define __CRYPTONIGHT_LITE_AESNI_H__
|
||||
#ifndef XMRIG_CRYPTONIGHT_LITE_AESNI_H
|
||||
#define XMRIG_CRYPTONIGHT_LITE_AESNI_H
|
||||
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include <stdint.h>
|
||||
|
||||
|
||||
#define aes_genkey_sub(imm8) \
|
||||
|
@ -253,4 +255,20 @@ static inline uint64_t _umul128(uint64_t multiplier, uint64_t multiplicand, uint
|
|||
#endif
|
||||
|
||||
|
||||
#endif /* __CRYPTONIGHT_LITE_AESNI_H__ */
|
||||
static inline void cryptonight_monero_tweak(uint64_t* mem_out, __m128i tmp)
|
||||
{
|
||||
mem_out[0] = EXTRACT64(tmp);
|
||||
|
||||
tmp = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(tmp), _mm_castsi128_ps(tmp)));
|
||||
uint64_t vh = EXTRACT64(tmp);
|
||||
|
||||
uint8_t x = vh >> 24;
|
||||
static const uint16_t table = 0x7531;
|
||||
const uint8_t index = (((x >> 3) & 6) | (x & 1)) << 1;
|
||||
vh ^= ((table >> index) & 0x3) << 28;
|
||||
|
||||
mem_out[1] = vh;
|
||||
}
|
||||
|
||||
|
||||
#endif /* XMRIG_CRYPTONIGHT_LITE_AESNI_H */
|
||||
|
|
134
algo/cryptonight-lite/cryptonight_lite_av1.c
Normal file
134
algo/cryptonight-lite/cryptonight_lite_av1.c
Normal file
|
@ -0,0 +1,134 @@
|
|||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
||||
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "algo/cryptonight/cryptonight.h"
|
||||
#include "algo/cryptonight/cryptonight_monero.h"
|
||||
#include "crypto/c_keccak.h"
|
||||
#include "cryptonight_lite_aesni.h"
|
||||
|
||||
|
||||
void cryptonight_lite_av1_v0(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
|
||||
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
|
||||
|
||||
const uint8_t* l0 = ctx[0]->memory;
|
||||
uint64_t* h0 = (uint64_t*) ctx[0]->state;
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
|
||||
uint64_t idx0 = h0[0] ^ h0[4];
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x40000, 1); i++) {
|
||||
__m128i cx;
|
||||
cx = _mm_load_si128((__m128i *) &l0[idx0 & 0xFFFF0]);
|
||||
cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah0, al0));
|
||||
|
||||
_mm_store_si128((__m128i *) &l0[idx0 & 0xFFFF0], _mm_xor_si128(bx0, cx));
|
||||
idx0 = EXTRACT64(cx);
|
||||
bx0 = cx;
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*) &l0[idx0 & 0xFFFF0])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & 0xFFFF0])[1];
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*)&l0[idx0 & 0xFFFF0])[0] = al0;
|
||||
((uint64_t*)&l0[idx0 & 0xFFFF0])[1] = ah0;
|
||||
|
||||
ah0 ^= ch;
|
||||
al0 ^= cl;
|
||||
idx0 = al0;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
|
||||
|
||||
keccakf(h0, 24);
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
}
|
||||
|
||||
|
||||
void cryptonight_lite_av1_v1(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
if (size < 43) {
|
||||
memset(output, 0, 32);
|
||||
return;
|
||||
}
|
||||
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
|
||||
VARIANT1_INIT(0);
|
||||
|
||||
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
|
||||
|
||||
const uint8_t* l0 = ctx[0]->memory;
|
||||
uint64_t* h0 = (uint64_t*) ctx[0]->state;
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
|
||||
uint64_t idx0 = h0[0] ^ h0[4];
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x40000, 1); i++) {
|
||||
__m128i cx;
|
||||
cx = _mm_load_si128((__m128i *) &l0[idx0 & 0xFFFF0]);
|
||||
cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah0, al0));
|
||||
|
||||
cryptonight_monero_tweak((uint64_t*)&l0[idx0 & 0xFFFF0], _mm_xor_si128(bx0, cx));
|
||||
|
||||
idx0 = EXTRACT64(cx);
|
||||
bx0 = cx;
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*) &l0[idx0 & 0xFFFF0])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & 0xFFFF0])[1];
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*)&l0[idx0 & 0xFFFF0])[0] = al0;
|
||||
((uint64_t*)&l0[idx0 & 0xFFFF0])[1] = ah0 ^ tweak1_2_0;
|
||||
|
||||
ah0 ^= ch;
|
||||
al0 ^= cl;
|
||||
idx0 = al0;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
|
||||
|
||||
keccakf(h0, 24);
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
}
|
|
@ -1,77 +0,0 @@
|
|||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
|
||||
* Copyright 2016-2017 XMRig <support@xmrig.com>
|
||||
*
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "algo/cryptonight/cryptonight.h"
|
||||
#include "cryptonight_lite_aesni.h"
|
||||
#include "crypto/c_keccak.h"
|
||||
|
||||
|
||||
void cryptonight_lite_av1_aesni(const void *restrict input, size_t size, void *restrict output, struct cryptonight_ctx *restrict ctx)
|
||||
{
|
||||
keccak((const uint8_t *) input, size, ctx->state0, 200);
|
||||
|
||||
cn_explode_scratchpad((__m128i*) ctx->state0, (__m128i*) ctx->memory);
|
||||
|
||||
const uint8_t* l0 = ctx->memory;
|
||||
uint64_t* h0 = (uint64_t*) ctx->state0;
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
|
||||
uint64_t idx0 = h0[0] ^ h0[4];
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x40000, 1); i++) {
|
||||
__m128i cx;
|
||||
cx = _mm_load_si128((__m128i *) &l0[idx0 & 0xFFFF0]);
|
||||
cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah0, al0));
|
||||
|
||||
_mm_store_si128((__m128i *) &l0[idx0 & 0xFFFF0], _mm_xor_si128(bx0, cx));
|
||||
idx0 = EXTRACT64(cx);
|
||||
bx0 = cx;
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*) &l0[idx0 & 0xFFFF0])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & 0xFFFF0])[1];
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*)&l0[idx0 & 0xFFFF0])[0] = al0;
|
||||
((uint64_t*)&l0[idx0 & 0xFFFF0])[1] = ah0;
|
||||
|
||||
ah0 ^= ch;
|
||||
al0 ^= cl;
|
||||
idx0 = al0;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad((__m128i*) ctx->memory, (__m128i*) ctx->state0);
|
||||
|
||||
keccakf(h0, 24);
|
||||
extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, output);
|
||||
}
|
202
algo/cryptonight-lite/cryptonight_lite_av2.c
Normal file
202
algo/cryptonight-lite/cryptonight_lite_av2.c
Normal file
|
@ -0,0 +1,202 @@
|
|||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
||||
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "algo/cryptonight/cryptonight.h"
|
||||
#include "algo/cryptonight/cryptonight_monero.h"
|
||||
#include "cryptonight_lite_aesni.h"
|
||||
#include "crypto/c_keccak.h"
|
||||
|
||||
|
||||
void cryptonight_lite_av2_v0(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
keccak(input + size, size, ctx[1]->state, 200);
|
||||
|
||||
const uint8_t* l0 = ctx[0]->memory;
|
||||
const uint8_t* l1 = ctx[1]->memory;
|
||||
uint64_t* h0 = (uint64_t*) ctx[0]->state;
|
||||
uint64_t* h1 = (uint64_t*) ctx[1]->state;
|
||||
|
||||
cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
|
||||
cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1);
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t al1 = h1[0] ^ h1[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
uint64_t ah1 = h1[1] ^ h1[5];
|
||||
|
||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
|
||||
|
||||
uint64_t idx0 = h0[0] ^ h0[4];
|
||||
uint64_t idx1 = h1[0] ^ h1[4];
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x40000, 1); i++) {
|
||||
__m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & 0xFFFF0]);
|
||||
__m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & 0xFFFF0]);
|
||||
|
||||
cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0));
|
||||
cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1));
|
||||
|
||||
_mm_store_si128((__m128i *) &l0[idx0 & 0xFFFF0], _mm_xor_si128(bx0, cx0));
|
||||
_mm_store_si128((__m128i *) &l1[idx1 & 0xFFFF0], _mm_xor_si128(bx1, cx1));
|
||||
|
||||
idx0 = EXTRACT64(cx0);
|
||||
idx1 = EXTRACT64(cx1);
|
||||
|
||||
bx0 = cx0;
|
||||
bx1 = cx1;
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*) &l0[idx0 & 0xFFFF0])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & 0xFFFF0])[1];
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*) &l0[idx0 & 0xFFFF0])[0] = al0;
|
||||
((uint64_t*) &l0[idx0 & 0xFFFF0])[1] = ah0;
|
||||
|
||||
ah0 ^= ch;
|
||||
al0 ^= cl;
|
||||
idx0 = al0;
|
||||
|
||||
cl = ((uint64_t*) &l1[idx1 & 0xFFFF0])[0];
|
||||
ch = ((uint64_t*) &l1[idx1 & 0xFFFF0])[1];
|
||||
lo = _umul128(idx1, cl, &hi);
|
||||
|
||||
al1 += hi;
|
||||
ah1 += lo;
|
||||
|
||||
((uint64_t*) &l1[idx1 & 0xFFFF0])[0] = al1;
|
||||
((uint64_t*) &l1[idx1 & 0xFFFF0])[1] = ah1;
|
||||
|
||||
ah1 ^= ch;
|
||||
al1 ^= cl;
|
||||
idx1 = al1;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0);
|
||||
cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1);
|
||||
|
||||
keccakf(h0, 24);
|
||||
keccakf(h1, 24);
|
||||
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, (char*) output + 32);
|
||||
}
|
||||
|
||||
|
||||
void cryptonight_lite_av2_v1(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
if (size < 43) {
|
||||
memset(output, 0, 64);
|
||||
return;
|
||||
}
|
||||
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
keccak(input + size, size, ctx[1]->state, 200);
|
||||
|
||||
VARIANT1_INIT(0);
|
||||
VARIANT1_INIT(1);
|
||||
|
||||
const uint8_t* l0 = ctx[0]->memory;
|
||||
const uint8_t* l1 = ctx[1]->memory;
|
||||
uint64_t* h0 = (uint64_t*) ctx[0]->state;
|
||||
uint64_t* h1 = (uint64_t*) ctx[1]->state;
|
||||
|
||||
cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
|
||||
cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1);
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t al1 = h1[0] ^ h1[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
uint64_t ah1 = h1[1] ^ h1[5];
|
||||
|
||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
|
||||
|
||||
uint64_t idx0 = h0[0] ^ h0[4];
|
||||
uint64_t idx1 = h1[0] ^ h1[4];
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x40000, 1); i++) {
|
||||
__m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & 0xFFFF0]);
|
||||
__m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & 0xFFFF0]);
|
||||
|
||||
cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0));
|
||||
cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1));
|
||||
|
||||
cryptonight_monero_tweak((uint64_t*)&l0[idx0 & 0xFFFF0], _mm_xor_si128(bx0, cx0));
|
||||
cryptonight_monero_tweak((uint64_t*)&l1[idx1 & 0xFFFF0], _mm_xor_si128(bx1, cx1));
|
||||
|
||||
idx0 = EXTRACT64(cx0);
|
||||
idx1 = EXTRACT64(cx1);
|
||||
|
||||
bx0 = cx0;
|
||||
bx1 = cx1;
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*) &l0[idx0 & 0xFFFF0])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & 0xFFFF0])[1];
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*) &l0[idx0 & 0xFFFF0])[0] = al0;
|
||||
((uint64_t*) &l0[idx0 & 0xFFFF0])[1] = ah0 ^ tweak1_2_0;
|
||||
|
||||
ah0 ^= ch;
|
||||
al0 ^= cl;
|
||||
idx0 = al0;
|
||||
|
||||
cl = ((uint64_t*) &l1[idx1 & 0xFFFF0])[0];
|
||||
ch = ((uint64_t*) &l1[idx1 & 0xFFFF0])[1];
|
||||
lo = _umul128(idx1, cl, &hi);
|
||||
|
||||
al1 += hi;
|
||||
ah1 += lo;
|
||||
|
||||
((uint64_t*) &l1[idx1 & 0xFFFF0])[0] = al1;
|
||||
((uint64_t*) &l1[idx1 & 0xFFFF0])[1] = ah1 ^ tweak1_2_1;
|
||||
|
||||
ah1 ^= ch;
|
||||
al1 ^= cl;
|
||||
idx1 = al1;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0);
|
||||
cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1);
|
||||
|
||||
keccakf(h0, 24);
|
||||
keccakf(h1, 24);
|
||||
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, (char*) output + 32);
|
||||
}
|
|
@ -1,111 +0,0 @@
|
|||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
|
||||
* Copyright 2016-2017 XMRig <support@xmrig.com>
|
||||
*
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "algo/cryptonight/cryptonight.h"
|
||||
#include "cryptonight_lite_aesni.h"
|
||||
#include "crypto/c_keccak.h"
|
||||
|
||||
|
||||
void cryptonight_lite_av2_aesni_double(const void *restrict input, size_t size, void *restrict output, struct cryptonight_ctx *restrict ctx)
|
||||
{
|
||||
keccak((const uint8_t *) input, size, ctx->state0, 200);
|
||||
keccak((const uint8_t *) input + size, size, ctx->state1, 200);
|
||||
|
||||
const uint8_t* l0 = ctx->memory;
|
||||
const uint8_t* l1 = ctx->memory + MEMORY_LITE;
|
||||
uint64_t* h0 = (uint64_t*) ctx->state0;
|
||||
uint64_t* h1 = (uint64_t*) ctx->state1;
|
||||
|
||||
cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
|
||||
cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1);
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t al1 = h1[0] ^ h1[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
uint64_t ah1 = h1[1] ^ h1[5];
|
||||
|
||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
|
||||
|
||||
uint64_t idx0 = h0[0] ^ h0[4];
|
||||
uint64_t idx1 = h1[0] ^ h1[4];
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x40000, 1); i++) {
|
||||
__m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & 0xFFFF0]);
|
||||
__m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & 0xFFFF0]);
|
||||
|
||||
cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0));
|
||||
cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1));
|
||||
|
||||
_mm_store_si128((__m128i *) &l0[idx0 & 0xFFFF0], _mm_xor_si128(bx0, cx0));
|
||||
_mm_store_si128((__m128i *) &l1[idx1 & 0xFFFF0], _mm_xor_si128(bx1, cx1));
|
||||
|
||||
idx0 = EXTRACT64(cx0);
|
||||
idx1 = EXTRACT64(cx1);
|
||||
|
||||
bx0 = cx0;
|
||||
bx1 = cx1;
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*) &l0[idx0 & 0xFFFF0])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & 0xFFFF0])[1];
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*) &l0[idx0 & 0xFFFF0])[0] = al0;
|
||||
((uint64_t*) &l0[idx0 & 0xFFFF0])[1] = ah0;
|
||||
|
||||
ah0 ^= ch;
|
||||
al0 ^= cl;
|
||||
idx0 = al0;
|
||||
|
||||
cl = ((uint64_t*) &l1[idx1 & 0xFFFF0])[0];
|
||||
ch = ((uint64_t*) &l1[idx1 & 0xFFFF0])[1];
|
||||
lo = _umul128(idx1, cl, &hi);
|
||||
|
||||
al1 += hi;
|
||||
ah1 += lo;
|
||||
|
||||
((uint64_t*) &l1[idx1 & 0xFFFF0])[0] = al1;
|
||||
((uint64_t*) &l1[idx1 & 0xFFFF0])[1] = ah1;
|
||||
|
||||
ah1 ^= ch;
|
||||
al1 ^= cl;
|
||||
idx1 = al1;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0);
|
||||
cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1);
|
||||
|
||||
keccakf(h0, 24);
|
||||
keccakf(h1, 24);
|
||||
|
||||
extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, output);
|
||||
extra_hashes[ctx->state1[0] & 3](ctx->state1, 200, (char*) output + 32);
|
||||
}
|
134
algo/cryptonight-lite/cryptonight_lite_av3.c
Normal file
134
algo/cryptonight-lite/cryptonight_lite_av3.c
Normal file
|
@ -0,0 +1,134 @@
|
|||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
||||
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "algo/cryptonight/cryptonight.h"
|
||||
#include "algo/cryptonight/cryptonight_monero.h"
|
||||
#include "cryptonight_lite_softaes.h"
|
||||
#include "crypto/c_keccak.h"
|
||||
|
||||
|
||||
void cryptonight_lite_av3_v0(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
|
||||
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
|
||||
|
||||
const uint8_t* l0 = ctx[0]->memory;
|
||||
uint64_t* h0 = (uint64_t*) ctx[0]->state;
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
|
||||
uint64_t idx0 = h0[0] ^ h0[4];
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x40000, 1); i++) {
|
||||
__m128i cx;
|
||||
cx = _mm_load_si128((__m128i *) &l0[idx0 & 0xFFFF0]);
|
||||
cx = soft_aesenc(cx, _mm_set_epi64x(ah0, al0));
|
||||
|
||||
_mm_store_si128((__m128i *) &l0[idx0 & 0xFFFF0], _mm_xor_si128(bx0, cx));
|
||||
idx0 = EXTRACT64(cx);
|
||||
bx0 = cx;
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*) &l0[idx0 & 0xFFFF0])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & 0xFFFF0])[1];
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*)&l0[idx0 & 0xFFFF0])[0] = al0;
|
||||
((uint64_t*)&l0[idx0 & 0xFFFF0])[1] = ah0;
|
||||
|
||||
ah0 ^= ch;
|
||||
al0 ^= cl;
|
||||
idx0 = al0;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
|
||||
|
||||
keccakf(h0, 24);
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
}
|
||||
|
||||
|
||||
void cryptonight_lite_av3_v1(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
if (size < 43) {
|
||||
memset(output, 0, 32);
|
||||
return;
|
||||
}
|
||||
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
|
||||
VARIANT1_INIT(0);
|
||||
|
||||
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
|
||||
|
||||
const uint8_t* l0 = ctx[0]->memory;
|
||||
uint64_t* h0 = (uint64_t*) ctx[0]->state;
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
|
||||
uint64_t idx0 = h0[0] ^ h0[4];
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x40000, 1); i++) {
|
||||
__m128i cx;
|
||||
cx = _mm_load_si128((__m128i *) &l0[idx0 & 0xFFFF0]);
|
||||
cx = soft_aesenc(cx, _mm_set_epi64x(ah0, al0));
|
||||
|
||||
cryptonight_monero_tweak((uint64_t*)&l0[idx0 & 0xFFFF0], _mm_xor_si128(bx0, cx));
|
||||
|
||||
idx0 = EXTRACT64(cx);
|
||||
bx0 = cx;
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*) &l0[idx0 & 0xFFFF0])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & 0xFFFF0])[1];
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*)&l0[idx0 & 0xFFFF0])[0] = al0;
|
||||
((uint64_t*)&l0[idx0 & 0xFFFF0])[1] = ah0 ^ tweak1_2_0;
|
||||
|
||||
ah0 ^= ch;
|
||||
al0 ^= cl;
|
||||
idx0 = al0;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
|
||||
|
||||
keccakf(h0, 24);
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
}
|
|
@ -1,77 +0,0 @@
|
|||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
|
||||
* Copyright 2016-2017 XMRig <support@xmrig.com>
|
||||
*
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "algo/cryptonight/cryptonight.h"
|
||||
#include "cryptonight_lite_softaes.h"
|
||||
#include "crypto/c_keccak.h"
|
||||
|
||||
|
||||
void cryptonight_lite_av3_softaes(const void *restrict input, size_t size, void *restrict output, struct cryptonight_ctx *restrict ctx)
|
||||
{
|
||||
keccak((const uint8_t *) input, size, ctx->state0, 200);
|
||||
|
||||
cn_explode_scratchpad((__m128i*) ctx->state0, (__m128i*) ctx->memory);
|
||||
|
||||
const uint8_t* l0 = ctx->memory;
|
||||
uint64_t* h0 = (uint64_t*) ctx->state0;
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
|
||||
uint64_t idx0 = h0[0] ^ h0[4];
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x40000, 1); i++) {
|
||||
__m128i cx;
|
||||
cx = _mm_load_si128((__m128i *)&l0[idx0 & 0xFFFF0]);
|
||||
cx = soft_aesenc(cx, _mm_set_epi64x(ah0, al0));
|
||||
|
||||
_mm_store_si128((__m128i *)&l0[idx0 & 0xFFFF0], _mm_xor_si128(bx0, cx));
|
||||
idx0 = EXTRACT64(cx);
|
||||
bx0 = cx;
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*)&l0[idx0 & 0xFFFF0])[0];
|
||||
ch = ((uint64_t*)&l0[idx0 & 0xFFFF0])[1];
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*)&l0[idx0 & 0xFFFF0])[0] = al0;
|
||||
((uint64_t*)&l0[idx0 & 0xFFFF0])[1] = ah0;
|
||||
|
||||
ah0 ^= ch;
|
||||
al0 ^= cl;
|
||||
idx0 = al0;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad((__m128i*) ctx->memory, (__m128i*) ctx->state0);
|
||||
|
||||
keccakf(h0, 24);
|
||||
extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, output);
|
||||
}
|
202
algo/cryptonight-lite/cryptonight_lite_av4.c
Normal file
202
algo/cryptonight-lite/cryptonight_lite_av4.c
Normal file
|
@ -0,0 +1,202 @@
|
|||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
||||
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "algo/cryptonight/cryptonight.h"
|
||||
#include "algo/cryptonight/cryptonight_monero.h"
|
||||
#include "cryptonight_lite_softaes.h"
|
||||
#include "crypto/c_keccak.h"
|
||||
|
||||
|
||||
void cryptonight_lite_av4_v0(const void *restrict input, size_t size, void *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
keccak(input + size, size, ctx[1]->state, 200);
|
||||
|
||||
const uint8_t* l0 = ctx[0]->memory;
|
||||
const uint8_t* l1 = ctx[1]->memory;
|
||||
uint64_t* h0 = (uint64_t*) ctx[0]->state;
|
||||
uint64_t* h1 = (uint64_t*) ctx[1]->state;
|
||||
|
||||
cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
|
||||
cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1);
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t al1 = h1[0] ^ h1[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
uint64_t ah1 = h1[1] ^ h1[5];
|
||||
|
||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
|
||||
|
||||
uint64_t idx0 = h0[0] ^ h0[4];
|
||||
uint64_t idx1 = h1[0] ^ h1[4];
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x40000, 1); i++) {
|
||||
__m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & 0xFFFF0]);
|
||||
__m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & 0xFFFF0]);
|
||||
|
||||
cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0));
|
||||
cx1 = soft_aesenc(cx1, _mm_set_epi64x(ah1, al1));
|
||||
|
||||
_mm_store_si128((__m128i *) &l0[idx0 & 0xFFFF0], _mm_xor_si128(bx0, cx0));
|
||||
_mm_store_si128((__m128i *) &l1[idx1 & 0xFFFF0], _mm_xor_si128(bx1, cx1));
|
||||
|
||||
idx0 = EXTRACT64(cx0);
|
||||
idx1 = EXTRACT64(cx1);
|
||||
|
||||
bx0 = cx0;
|
||||
bx1 = cx1;
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*) &l0[idx0 & 0xFFFF0])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & 0xFFFF0])[1];
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*) &l0[idx0 & 0xFFFF0])[0] = al0;
|
||||
((uint64_t*) &l0[idx0 & 0xFFFF0])[1] = ah0;
|
||||
|
||||
ah0 ^= ch;
|
||||
al0 ^= cl;
|
||||
idx0 = al0;
|
||||
|
||||
cl = ((uint64_t*) &l1[idx1 & 0xFFFF0])[0];
|
||||
ch = ((uint64_t*) &l1[idx1 & 0xFFFF0])[1];
|
||||
lo = _umul128(idx1, cl, &hi);
|
||||
|
||||
al1 += hi;
|
||||
ah1 += lo;
|
||||
|
||||
((uint64_t*) &l1[idx1 & 0xFFFF0])[0] = al1;
|
||||
((uint64_t*) &l1[idx1 & 0xFFFF0])[1] = ah1;
|
||||
|
||||
ah1 ^= ch;
|
||||
al1 ^= cl;
|
||||
idx1 = al1;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0);
|
||||
cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1);
|
||||
|
||||
keccakf(h0, 24);
|
||||
keccakf(h1, 24);
|
||||
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
|
||||
}
|
||||
|
||||
|
||||
void cryptonight_lite_av4_v1(const void *restrict input, size_t size, void *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
if (size < 43) {
|
||||
memset(output, 0, 64);
|
||||
return;
|
||||
}
|
||||
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
keccak(input + size, size, ctx[1]->state, 200);
|
||||
|
||||
VARIANT1_INIT(0);
|
||||
VARIANT1_INIT(1);
|
||||
|
||||
const uint8_t* l0 = ctx[0]->memory;
|
||||
const uint8_t* l1 = ctx[1]->memory;
|
||||
uint64_t* h0 = (uint64_t*) ctx[0]->state;
|
||||
uint64_t* h1 = (uint64_t*) ctx[1]->state;
|
||||
|
||||
cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
|
||||
cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1);
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t al1 = h1[0] ^ h1[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
uint64_t ah1 = h1[1] ^ h1[5];
|
||||
|
||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
|
||||
|
||||
uint64_t idx0 = h0[0] ^ h0[4];
|
||||
uint64_t idx1 = h1[0] ^ h1[4];
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x40000, 1); i++) {
|
||||
__m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & 0xFFFF0]);
|
||||
__m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & 0xFFFF0]);
|
||||
|
||||
cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0));
|
||||
cx1 = soft_aesenc(cx1, _mm_set_epi64x(ah1, al1));
|
||||
|
||||
cryptonight_monero_tweak((uint64_t*)&l0[idx0 & 0xFFFF0], _mm_xor_si128(bx0, cx0));
|
||||
cryptonight_monero_tweak((uint64_t*)&l1[idx1 & 0xFFFF0], _mm_xor_si128(bx1, cx1));
|
||||
|
||||
idx0 = EXTRACT64(cx0);
|
||||
idx1 = EXTRACT64(cx1);
|
||||
|
||||
bx0 = cx0;
|
||||
bx1 = cx1;
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*) &l0[idx0 & 0xFFFF0])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & 0xFFFF0])[1];
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*) &l0[idx0 & 0xFFFF0])[0] = al0;
|
||||
((uint64_t*) &l0[idx0 & 0xFFFF0])[1] = ah0 ^ tweak1_2_0;
|
||||
|
||||
ah0 ^= ch;
|
||||
al0 ^= cl;
|
||||
idx0 = al0;
|
||||
|
||||
cl = ((uint64_t*) &l1[idx1 & 0xFFFF0])[0];
|
||||
ch = ((uint64_t*) &l1[idx1 & 0xFFFF0])[1];
|
||||
lo = _umul128(idx1, cl, &hi);
|
||||
|
||||
al1 += hi;
|
||||
ah1 += lo;
|
||||
|
||||
((uint64_t*) &l1[idx1 & 0xFFFF0])[0] = al1;
|
||||
((uint64_t*) &l1[idx1 & 0xFFFF0])[1] = ah1 ^ tweak1_2_1;
|
||||
|
||||
ah1 ^= ch;
|
||||
al1 ^= cl;
|
||||
idx1 = al1;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0);
|
||||
cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1);
|
||||
|
||||
keccakf(h0, 24);
|
||||
keccakf(h1, 24);
|
||||
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, (char*) output + 32);
|
||||
}
|
|
@ -1,111 +0,0 @@
|
|||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
|
||||
* Copyright 2016-2017 XMRig <support@xmrig.com>
|
||||
*
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "algo/cryptonight/cryptonight.h"
|
||||
#include "cryptonight_lite_softaes.h"
|
||||
#include "crypto/c_keccak.h"
|
||||
|
||||
|
||||
void cryptonight_lite_av4_softaes_double(const void *restrict input, size_t size, void *restrict output, struct cryptonight_ctx *restrict ctx)
|
||||
{
|
||||
keccak((const uint8_t *) input, size, ctx->state0, 200);
|
||||
keccak((const uint8_t *) input + size, size, ctx->state1, 200);
|
||||
|
||||
const uint8_t* l0 = ctx->memory;
|
||||
const uint8_t* l1 = ctx->memory + MEMORY_LITE;
|
||||
uint64_t* h0 = (uint64_t*) ctx->state0;
|
||||
uint64_t* h1 = (uint64_t*) ctx->state1;
|
||||
|
||||
cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
|
||||
cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1);
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t al1 = h1[0] ^ h1[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
uint64_t ah1 = h1[1] ^ h1[5];
|
||||
|
||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
|
||||
|
||||
uint64_t idx0 = h0[0] ^ h0[4];
|
||||
uint64_t idx1 = h1[0] ^ h1[4];
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x40000, 1); i++) {
|
||||
__m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & 0xFFFF0]);
|
||||
__m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & 0xFFFF0]);
|
||||
|
||||
cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0));
|
||||
cx1 = soft_aesenc(cx1, _mm_set_epi64x(ah1, al1));
|
||||
|
||||
_mm_store_si128((__m128i *) &l0[idx0 & 0xFFFF0], _mm_xor_si128(bx0, cx0));
|
||||
_mm_store_si128((__m128i *) &l1[idx1 & 0xFFFF0], _mm_xor_si128(bx1, cx1));
|
||||
|
||||
idx0 = EXTRACT64(cx0);
|
||||
idx1 = EXTRACT64(cx1);
|
||||
|
||||
bx0 = cx0;
|
||||
bx1 = cx1;
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*) &l0[idx0 & 0xFFFF0])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & 0xFFFF0])[1];
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*) &l0[idx0 & 0xFFFF0])[0] = al0;
|
||||
((uint64_t*) &l0[idx0 & 0xFFFF0])[1] = ah0;
|
||||
|
||||
ah0 ^= ch;
|
||||
al0 ^= cl;
|
||||
idx0 = al0;
|
||||
|
||||
cl = ((uint64_t*) &l1[idx1 & 0xFFFF0])[0];
|
||||
ch = ((uint64_t*) &l1[idx1 & 0xFFFF0])[1];
|
||||
lo = _umul128(idx1, cl, &hi);
|
||||
|
||||
al1 += hi;
|
||||
ah1 += lo;
|
||||
|
||||
((uint64_t*) &l1[idx1 & 0xFFFF0])[0] = al1;
|
||||
((uint64_t*) &l1[idx1 & 0xFFFF0])[1] = ah1;
|
||||
|
||||
ah1 ^= ch;
|
||||
al1 ^= cl;
|
||||
idx1 = al1;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0);
|
||||
cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1);
|
||||
|
||||
keccakf(h0, 24);
|
||||
keccakf(h1, 24);
|
||||
|
||||
extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, output);
|
||||
extra_hashes[ctx->state1[0] & 3](ctx->state1, 200, (char*) output + 32);
|
||||
}
|
|
@ -4,9 +4,9 @@
|
|||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
|
||||
* Copyright 2016-2017 XMRig <support@xmrig.com>
|
||||
*
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
@ -22,13 +22,15 @@
|
|||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef __CRYPTONIGHT_LITE_SOFTAES_H__
|
||||
#define __CRYPTONIGHT_LITE_SOFTAES_H__
|
||||
#ifndef XMRIG_CRYPTONIGHT_LITE_SOFTAES_H
|
||||
#define XMRIG_CRYPTONIGHT_LITE_SOFTAES_H
|
||||
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include <stdint.h>
|
||||
|
||||
extern __m128i soft_aesenc(__m128i in, __m128i key);
|
||||
extern __m128i soft_aeskeygenassist(__m128i key, uint8_t rcon);
|
||||
|
||||
#include "crypto/soft_aes.h"
|
||||
|
||||
|
||||
// This will shift and xor tmp1 into itself as 4 32-bit vals such as
|
||||
|
@ -234,4 +236,20 @@ static inline uint64_t _umul128(uint64_t multiplier, uint64_t multiplicand, uint
|
|||
#endif
|
||||
|
||||
|
||||
#endif /* __CRYPTONIGHT_LITE_SOFTAES_H__ */
|
||||
static inline void cryptonight_monero_tweak(uint64_t* mem_out, __m128i tmp)
|
||||
{
|
||||
mem_out[0] = EXTRACT64(tmp);
|
||||
|
||||
tmp = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(tmp), _mm_castsi128_ps(tmp)));
|
||||
uint64_t vh = EXTRACT64(tmp);
|
||||
|
||||
uint8_t x = vh >> 24;
|
||||
static const uint16_t table = 0x7531;
|
||||
const uint8_t index = (((x >> 3) & 6) | (x & 1)) << 1;
|
||||
vh ^= ((table >> index) & 0x3) << 28;
|
||||
|
||||
mem_out[1] = vh;
|
||||
}
|
||||
|
||||
|
||||
#endif /* XMRIG_CRYPTONIGHT_LITE_SOFTAES_H */
|
||||
|
|
|
@ -4,8 +4,10 @@
|
|||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2016-2017 XMRig <support@xmrig.com>
|
||||
*
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
||||
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
@ -22,151 +24,287 @@
|
|||
*/
|
||||
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <mm_malloc.h>
|
||||
|
||||
|
||||
#ifndef BUILD_TEST
|
||||
# include "xmrig.h"
|
||||
#endif
|
||||
|
||||
#include "crypto/c_groestl.h"
|
||||
#include "cpu.h"
|
||||
#include "crypto/c_blake256.h"
|
||||
#include "crypto/c_groestl.h"
|
||||
#include "crypto/c_jh.h"
|
||||
#include "crypto/c_skein.h"
|
||||
#include "cryptonight_test.h"
|
||||
#include "cryptonight.h"
|
||||
#include "options.h"
|
||||
#include "persistent_memory.h"
|
||||
|
||||
|
||||
const static char test_input[152] = {
|
||||
0x01, 0x00, 0xFB, 0x8E, 0x8A, 0xC8, 0x05, 0x89, 0x93, 0x23, 0x37, 0x1B, 0xB7, 0x90, 0xDB, 0x19,
|
||||
0x21, 0x8A, 0xFD, 0x8D, 0xB8, 0xE3, 0x75, 0x5D, 0x8B, 0x90, 0xF3, 0x9B, 0x3D, 0x55, 0x06, 0xA9,
|
||||
0xAB, 0xCE, 0x4F, 0xA9, 0x12, 0x24, 0x45, 0x00, 0x00, 0x00, 0x00, 0xEE, 0x81, 0x46, 0xD4, 0x9F,
|
||||
0xA9, 0x3E, 0xE7, 0x24, 0xDE, 0xB5, 0x7D, 0x12, 0xCB, 0xC6, 0xC6, 0xF3, 0xB9, 0x24, 0xD9, 0x46,
|
||||
0x12, 0x7C, 0x7A, 0x97, 0x41, 0x8F, 0x93, 0x48, 0x82, 0x8F, 0x0F, 0x02,
|
||||
0x03, 0x05, 0xA0, 0xDB, 0xD6, 0xBF, 0x05, 0xCF, 0x16, 0xE5, 0x03, 0xF3, 0xA6, 0x6F, 0x78, 0x00,
|
||||
0x7C, 0xBF, 0x34, 0x14, 0x43, 0x32, 0xEC, 0xBF, 0xC2, 0x2E, 0xD9, 0x5C, 0x87, 0x00, 0x38, 0x3B,
|
||||
0x30, 0x9A, 0xCE, 0x19, 0x23, 0xA0, 0x96, 0x4B, 0x00, 0x00, 0x00, 0x08, 0xBA, 0x93, 0x9A, 0x62,
|
||||
0x72, 0x4C, 0x0D, 0x75, 0x81, 0xFC, 0xE5, 0x76, 0x1E, 0x9D, 0x8A, 0x0E, 0x6A, 0x1C, 0x3F, 0x92,
|
||||
0x4F, 0xDD, 0x84, 0x93, 0xD1, 0x11, 0x56, 0x49, 0xC0, 0x5E, 0xB6, 0x01
|
||||
};
|
||||
static cn_hash_fun asm_func_map[AV_MAX][VARIANT_MAX][ASM_MAX] = {};
|
||||
|
||||
|
||||
const static char test_output0[64] = {
|
||||
0x1B, 0x60, 0x6A, 0x3F, 0x4A, 0x07, 0xD6, 0x48, 0x9A, 0x1B, 0xCD, 0x07, 0x69, 0x7B, 0xD1, 0x66,
|
||||
0x96, 0xB6, 0x1C, 0x8A, 0xE9, 0x82, 0xF6, 0x1A, 0x90, 0x16, 0x0F, 0x4E, 0x52, 0x82, 0x8A, 0x7F,
|
||||
0x1A, 0x3F, 0xFB, 0xEE, 0x90, 0x9B, 0x42, 0x0D, 0x91, 0xF7, 0xBE, 0x6E, 0x5F, 0xB5, 0x6D, 0xB7,
|
||||
0x1B, 0x31, 0x10, 0xD8, 0x86, 0x01, 0x1E, 0x87, 0x7E, 0xE5, 0x78, 0x6A, 0xFD, 0x08, 0x01, 0x00
|
||||
};
|
||||
void cryptonight_av1_v0(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
void cryptonight_av1_v1(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
void cryptonight_av1_v2(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
void cryptonight_av2_v0(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
void cryptonight_av2_v1(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
void cryptonight_av2_v2(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
void cryptonight_av3_v0(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
void cryptonight_av3_v1(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
void cryptonight_av3_v2(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
void cryptonight_av4_v0(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
void cryptonight_av4_v1(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
void cryptonight_av4_v2(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
|
||||
void cryptonight_r_av1(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
void cryptonight_r_av2(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
void cryptonight_r_av3(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
void cryptonight_r_av4(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
|
||||
void cryptonight_av1_aesni(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx);
|
||||
void cryptonight_av2_aesni_double(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx);
|
||||
void cryptonight_av3_softaes(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx);
|
||||
void cryptonight_av4_softaes_double(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx);
|
||||
|
||||
#ifndef XMRIG_NO_AEON
|
||||
const static char test_output1[64] = {
|
||||
0x28, 0xA2, 0x2B, 0xAD, 0x3F, 0x93, 0xD1, 0x40, 0x8F, 0xCA, 0x47, 0x2E, 0xB5, 0xAD, 0x1C, 0xBE,
|
||||
0x75, 0xF2, 0x1D, 0x05, 0x3C, 0x8C, 0xE5, 0xB3, 0xAF, 0x10, 0x5A, 0x57, 0x71, 0x3E, 0x21, 0xDD,
|
||||
0x36, 0x95, 0xB4, 0xB5, 0x3B, 0xB0, 0x03, 0x58, 0xB0, 0xAD, 0x38, 0xDC, 0x16, 0x0F, 0xEB, 0x9E,
|
||||
0x00, 0x4E, 0xEC, 0xE0, 0x9B, 0x83, 0xA7, 0x2E, 0xF6, 0xBA, 0x98, 0x64, 0xD3, 0x51, 0x0C, 0x88,
|
||||
};
|
||||
|
||||
void cryptonight_lite_av1_aesni(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx);
|
||||
void cryptonight_lite_av2_aesni_double(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx);
|
||||
void cryptonight_lite_av3_softaes(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx);
|
||||
void cryptonight_lite_av4_softaes_double(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx);
|
||||
void cryptonight_lite_av1_v0(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
void cryptonight_lite_av1_v1(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
void cryptonight_lite_av2_v0(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
void cryptonight_lite_av2_v1(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
void cryptonight_lite_av3_v0(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
void cryptonight_lite_av3_v1(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
void cryptonight_lite_av4_v0(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
void cryptonight_lite_av4_v1(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
#endif
|
||||
|
||||
void (*cryptonight_hash_ctx)(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx) = NULL;
|
||||
|
||||
#ifndef XMRIG_NO_ASM
|
||||
void cryptonight_single_hash_asm_intel(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
void cryptonight_single_hash_asm_ryzen(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
void cryptonight_single_hash_asm_bulldozer(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
void cryptonight_double_hash_asm(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
|
||||
void cryptonight_r_av1_asm_intel(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
void cryptonight_r_av1_asm_bulldozer(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
void cryptonight_r_av2_asm_intel(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
void cryptonight_r_av2_asm_bulldozer(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
#endif
|
||||
|
||||
|
||||
static bool self_test() {
|
||||
if (cryptonight_hash_ctx == NULL) {
|
||||
static inline bool verify(enum Variant variant, uint8_t *output, struct cryptonight_ctx **ctx, const uint8_t *referenceValue)
|
||||
{
|
||||
cn_hash_fun func = cryptonight_hash_fn(opt_algo, opt_av, variant);
|
||||
if (func == NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
char output[64];
|
||||
func(test_input, 76, output, ctx);
|
||||
|
||||
struct cryptonight_ctx *ctx = (struct cryptonight_ctx*) _mm_malloc(sizeof(struct cryptonight_ctx), 16);
|
||||
ctx->memory = (uint8_t *) _mm_malloc(MEMORY * 2, 16);
|
||||
|
||||
cryptonight_hash_ctx(test_input, 76, output, ctx);
|
||||
|
||||
_mm_free(ctx->memory);
|
||||
_mm_free(ctx);
|
||||
|
||||
# ifndef XMRIG_NO_AEON
|
||||
if (opt_algo == ALGO_CRYPTONIGHT_LITE) {
|
||||
return memcmp(output, test_output1, (opt_double_hash ? 64 : 32)) == 0;
|
||||
}
|
||||
# endif
|
||||
|
||||
return memcmp(output, test_output0, (opt_double_hash ? 64 : 32)) == 0;
|
||||
return memcmp(output, referenceValue, opt_double_hash ? 64 : 32) == 0;
|
||||
}
|
||||
|
||||
|
||||
#ifndef XMRIG_NO_AEON
|
||||
bool cryptonight_lite_init(int variant) {
|
||||
switch (variant) {
|
||||
case AEON_AV1_AESNI:
|
||||
cryptonight_hash_ctx = cryptonight_lite_av1_aesni;
|
||||
break;
|
||||
|
||||
case AEON_AV2_AESNI_DOUBLE:
|
||||
opt_double_hash = true;
|
||||
cryptonight_hash_ctx = cryptonight_lite_av2_aesni_double;
|
||||
break;
|
||||
|
||||
case AEON_AV3_SOFT_AES:
|
||||
cryptonight_hash_ctx = cryptonight_lite_av3_softaes;
|
||||
break;
|
||||
|
||||
case AEON_AV4_SOFT_AES_DOUBLE:
|
||||
opt_double_hash = true;
|
||||
cryptonight_hash_ctx = cryptonight_lite_av4_softaes_double;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
static inline bool verify2(enum Variant variant, uint8_t *output, struct cryptonight_ctx **ctx, const uint8_t *referenceValue)
|
||||
{
|
||||
cn_hash_fun func = cryptonight_hash_fn(opt_algo, opt_av, variant);
|
||||
if (func == NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return self_test();
|
||||
if (opt_double_hash) {
|
||||
uint8_t input[128];
|
||||
|
||||
for (size_t i = 0; i < (sizeof(cn_r_test_input) / sizeof(cn_r_test_input[0])); ++i) {
|
||||
const size_t size = cn_r_test_input[i].size;
|
||||
memcpy(input, cn_r_test_input[i].data, size);
|
||||
memcpy(input + size, cn_r_test_input[i].data, size);
|
||||
|
||||
ctx[0]->height = ctx[1]->height = cn_r_test_input[i].height;
|
||||
|
||||
func(input, size, output, ctx);
|
||||
|
||||
if (memcmp(output, referenceValue + i * 32, 32) != 0 || memcmp(output + 32, referenceValue + i * 32, 32) != 0) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
for (size_t i = 0; i < (sizeof(cn_r_test_input) / sizeof(cn_r_test_input[0])); ++i) {
|
||||
ctx[0]->height = cn_r_test_input[i].height;
|
||||
|
||||
func(cn_r_test_input[i].data, cn_r_test_input[i].size, output, ctx);
|
||||
|
||||
if (memcmp(output, referenceValue + i * 32, 32) != 0) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
static bool self_test() {
|
||||
struct cryptonight_ctx *ctx[2];
|
||||
uint8_t output[64];
|
||||
|
||||
const size_t count = opt_double_hash ? 2 : 1;
|
||||
const size_t size = opt_algo == ALGO_CRYPTONIGHT ? MEMORY : MEMORY_LITE;
|
||||
bool result = false;
|
||||
|
||||
for (size_t i = 0; i < count; ++i) {
|
||||
ctx[i] = _mm_malloc(sizeof(struct cryptonight_ctx), 16);
|
||||
ctx[i]->memory = _mm_malloc(size, 16);
|
||||
|
||||
init_cn_r(ctx[i]);
|
||||
}
|
||||
|
||||
if (opt_algo == ALGO_CRYPTONIGHT) {
|
||||
result = verify(VARIANT_0, output, ctx, test_output_v0) &&
|
||||
verify(VARIANT_1, output, ctx, test_output_v1) &&
|
||||
verify(VARIANT_2, output, ctx, test_output_v2) &&
|
||||
verify2(VARIANT_4, output, ctx, test_output_r);
|
||||
}
|
||||
# ifndef XMRIG_NO_AEON
|
||||
else {
|
||||
result = verify(VARIANT_0, output, ctx, test_output_v0_lite) &&
|
||||
verify(VARIANT_1, output, ctx, test_output_v1_lite);
|
||||
}
|
||||
# endif
|
||||
|
||||
|
||||
for (size_t i = 0; i < count; ++i) {
|
||||
_mm_free(ctx[i]->memory);
|
||||
_mm_free(ctx[i]);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
#ifndef XMRIG_NO_ASM
|
||||
cn_hash_fun cryptonight_hash_asm_fn(enum AlgoVariant av, enum Variant variant, enum Assembly assembly)
|
||||
{
|
||||
if (assembly == ASM_AUTO) {
|
||||
assembly = (enum Assembly) cpu_info.assembly;
|
||||
}
|
||||
|
||||
if (assembly == ASM_NONE) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return asm_func_map[av][variant][assembly];
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
bool cryptonight_init(int variant)
|
||||
cn_hash_fun cryptonight_hash_fn(enum Algo algorithm, enum AlgoVariant av, enum Variant variant)
|
||||
{
|
||||
# ifndef XMRIG_NO_AEON
|
||||
if (opt_algo == ALGO_CRYPTONIGHT_LITE) {
|
||||
return cryptonight_lite_init(variant);
|
||||
assert(av > AV_AUTO && av < AV_MAX);
|
||||
assert(variant > VARIANT_AUTO && variant < VARIANT_MAX);
|
||||
|
||||
# ifndef XMRIG_NO_ASM
|
||||
if (algorithm == ALGO_CRYPTONIGHT) {
|
||||
cn_hash_fun fun = cryptonight_hash_asm_fn(av, variant, opt_assembly);
|
||||
if (fun) {
|
||||
return fun;
|
||||
}
|
||||
}
|
||||
# endif
|
||||
|
||||
switch (variant) {
|
||||
case XMR_AV1_AESNI:
|
||||
cryptonight_hash_ctx = cryptonight_av1_aesni;
|
||||
break;
|
||||
static const cn_hash_fun func_table[VARIANT_MAX * 4 * 2] = {
|
||||
cryptonight_av1_v0,
|
||||
cryptonight_av2_v0,
|
||||
cryptonight_av3_v0,
|
||||
cryptonight_av4_v0,
|
||||
cryptonight_av1_v1,
|
||||
cryptonight_av2_v1,
|
||||
cryptonight_av3_v1,
|
||||
cryptonight_av4_v1,
|
||||
cryptonight_av1_v2,
|
||||
cryptonight_av2_v2,
|
||||
cryptonight_av3_v2,
|
||||
cryptonight_av4_v2,
|
||||
|
||||
case XMR_AV2_AESNI_DOUBLE:
|
||||
opt_double_hash = true;
|
||||
cryptonight_hash_ctx = cryptonight_av2_aesni_double;
|
||||
break;
|
||||
cryptonight_r_av1,
|
||||
cryptonight_r_av2,
|
||||
cryptonight_r_av3,
|
||||
cryptonight_r_av4,
|
||||
|
||||
case XMR_AV3_SOFT_AES:
|
||||
cryptonight_hash_ctx = cryptonight_av3_softaes;
|
||||
break;
|
||||
# ifndef XMRIG_NO_AEON
|
||||
cryptonight_lite_av1_v0,
|
||||
cryptonight_lite_av2_v0,
|
||||
cryptonight_lite_av3_v0,
|
||||
cryptonight_lite_av4_v0,
|
||||
cryptonight_lite_av1_v1,
|
||||
cryptonight_lite_av2_v1,
|
||||
cryptonight_lite_av3_v1,
|
||||
cryptonight_lite_av4_v1,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
# else
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
# endif
|
||||
};
|
||||
|
||||
case XMR_AV4_SOFT_AES_DOUBLE:
|
||||
opt_double_hash = true;
|
||||
cryptonight_hash_ctx = cryptonight_av4_softaes_double;
|
||||
break;
|
||||
# ifndef NDEBUG
|
||||
const size_t index = VARIANT_MAX * 4 * algorithm + 4 * variant + av - 1;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
cn_hash_fun func = func_table[index];
|
||||
|
||||
assert(index < sizeof(func_table) / sizeof(func_table[0]));
|
||||
assert(func != NULL);
|
||||
|
||||
return func;
|
||||
# else
|
||||
return func_table[VARIANT_MAX * 4 * algorithm + 4 * variant + av - 1];
|
||||
# endif
|
||||
}
|
||||
|
||||
|
||||
bool cryptonight_init(int av)
|
||||
{
|
||||
opt_double_hash = av == AV_DOUBLE || av == AV_DOUBLE_SOFT;
|
||||
|
||||
# ifndef XMRIG_NO_ASM
|
||||
asm_func_map[AV_SINGLE][VARIANT_2][ASM_INTEL] = cryptonight_single_hash_asm_intel;
|
||||
asm_func_map[AV_SINGLE][VARIANT_2][ASM_RYZEN] = cryptonight_single_hash_asm_intel;
|
||||
asm_func_map[AV_SINGLE][VARIANT_2][ASM_BULLDOZER] = cryptonight_single_hash_asm_bulldozer;
|
||||
|
||||
asm_func_map[AV_DOUBLE][VARIANT_2][ASM_INTEL] = cryptonight_double_hash_asm;
|
||||
asm_func_map[AV_DOUBLE][VARIANT_2][ASM_RYZEN] = cryptonight_double_hash_asm;
|
||||
asm_func_map[AV_DOUBLE][VARIANT_2][ASM_BULLDOZER] = cryptonight_double_hash_asm;
|
||||
|
||||
asm_func_map[AV_SINGLE][VARIANT_4][ASM_INTEL] = cryptonight_r_av1_asm_intel;
|
||||
asm_func_map[AV_SINGLE][VARIANT_4][ASM_RYZEN] = cryptonight_r_av1_asm_intel;
|
||||
asm_func_map[AV_SINGLE][VARIANT_4][ASM_BULLDOZER] = cryptonight_r_av1_asm_bulldozer;
|
||||
|
||||
asm_func_map[AV_DOUBLE][VARIANT_4][ASM_INTEL] = cryptonight_r_av2_asm_intel;
|
||||
asm_func_map[AV_DOUBLE][VARIANT_4][ASM_RYZEN] = cryptonight_r_av2_asm_intel;
|
||||
asm_func_map[AV_DOUBLE][VARIANT_4][ASM_BULLDOZER] = cryptonight_r_av2_asm_bulldozer;
|
||||
# endif
|
||||
|
||||
return self_test();
|
||||
}
|
||||
|
@ -195,12 +333,36 @@ static inline void do_skein_hash(const void* input, size_t len, char* output) {
|
|||
void (* const extra_hashes[4])(const void *, size_t, char *) = {do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash};
|
||||
|
||||
|
||||
static inline enum Variant cryptonight_variant(uint8_t version)
|
||||
{
|
||||
if (opt_variant != VARIANT_AUTO) {
|
||||
return opt_variant;
|
||||
}
|
||||
|
||||
if (opt_algo == ALGO_CRYPTONIGHT_LITE) {
|
||||
return VARIANT_1;
|
||||
}
|
||||
|
||||
if (version >= 10) {
|
||||
return VARIANT_4;
|
||||
}
|
||||
|
||||
if (version >= 8) {
|
||||
return VARIANT_2;
|
||||
}
|
||||
|
||||
return version == 7 ? VARIANT_1 : VARIANT_0;
|
||||
}
|
||||
|
||||
|
||||
#ifndef BUILD_TEST
|
||||
int scanhash_cryptonight(int thr_id, uint32_t *hash, uint32_t *restrict blob, size_t blob_size, uint32_t target, uint32_t max_nonce, unsigned long *restrict hashes_done, struct cryptonight_ctx *restrict ctx) {
|
||||
uint32_t *nonceptr = (uint32_t*) (((char*) blob) + 39);
|
||||
int scanhash_cryptonight(int thr_id, uint32_t *hash, uint8_t *restrict blob, size_t blob_size, uint32_t target, uint32_t max_nonce, unsigned long *restrict hashes_done, struct cryptonight_ctx **restrict ctx) {
|
||||
uint32_t *nonceptr = (uint32_t*) (((char*) blob) + 39);
|
||||
enum Variant variant = cryptonight_variant(blob[0]);
|
||||
|
||||
do {
|
||||
cryptonight_hash_ctx(blob, blob_size, hash, ctx);
|
||||
cryptonight_hash_fn(opt_algo, opt_av, variant)(blob, blob_size, (uint8_t *) hash, ctx);
|
||||
|
||||
(*hashes_done)++;
|
||||
|
||||
if (unlikely(hash[7] < target)) {
|
||||
|
@ -214,13 +376,14 @@ int scanhash_cryptonight(int thr_id, uint32_t *hash, uint32_t *restrict blob, si
|
|||
}
|
||||
|
||||
|
||||
int scanhash_cryptonight_double(int thr_id, uint32_t *hash, uint8_t *restrict blob, size_t blob_size, uint32_t target, uint32_t max_nonce, unsigned long *restrict hashes_done, struct cryptonight_ctx *restrict ctx) {
|
||||
int rc = 0;
|
||||
uint32_t *nonceptr0 = (uint32_t*) (((char*) blob) + 39);
|
||||
uint32_t *nonceptr1 = (uint32_t*) (((char*) blob) + 39 + blob_size);
|
||||
int scanhash_cryptonight_double(int thr_id, uint32_t *hash, uint8_t *restrict blob, size_t blob_size, uint32_t target, uint32_t max_nonce, unsigned long *restrict hashes_done, struct cryptonight_ctx **restrict ctx) {
|
||||
int rc = 0;
|
||||
uint32_t *nonceptr0 = (uint32_t*) (((char*) blob) + 39);
|
||||
uint32_t *nonceptr1 = (uint32_t*) (((char*) blob) + 39 + blob_size);
|
||||
enum Variant variant = cryptonight_variant(blob[0]);
|
||||
|
||||
do {
|
||||
cryptonight_hash_ctx(blob, blob_size, hash, ctx);
|
||||
cryptonight_hash_fn(opt_algo, opt_av, variant)(blob, blob_size, (uint8_t *) hash, ctx);
|
||||
(*hashes_done) += 2;
|
||||
|
||||
if (unlikely(hash[7] < target)) {
|
||||
|
|
|
@ -4,8 +4,10 @@
|
|||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2016-2017 XMRig <support@xmrig.com>
|
||||
*
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
||||
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
@ -21,27 +23,59 @@
|
|||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef __CRYPTONIGHT_H__
|
||||
#define __CRYPTONIGHT_H__
|
||||
#ifndef XMRIG_CRYPTONIGHT_H
|
||||
#define XMRIG_CRYPTONIGHT_H
|
||||
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
|
||||
#include "options.h"
|
||||
|
||||
|
||||
#define MEMORY 2097152 /* 2 MiB */
|
||||
#define MEMORY_LITE 1048576 /* 1 MiB */
|
||||
|
||||
|
||||
#if defined _MSC_VER || defined XMRIG_ARM
|
||||
#define ABI_ATTRIBUTE
|
||||
#else
|
||||
#define ABI_ATTRIBUTE __attribute__((ms_abi))
|
||||
#endif
|
||||
|
||||
|
||||
struct cryptonight_ctx;
|
||||
typedef void(*cn_mainloop_fun_ms_abi)(struct cryptonight_ctx*) ABI_ATTRIBUTE;
|
||||
typedef void(*cn_mainloop_double_fun_ms_abi)(struct cryptonight_ctx*, struct cryptonight_ctx*) ABI_ATTRIBUTE;
|
||||
|
||||
|
||||
struct cryptonight_ctx {
|
||||
uint8_t state0[200] __attribute__((aligned(16)));
|
||||
uint8_t state1[200] __attribute__((aligned(16)));
|
||||
uint8_t* memory __attribute__((aligned(16)));
|
||||
uint8_t state[224] __attribute__((aligned(16)));
|
||||
uint8_t *memory __attribute__((aligned(16)));
|
||||
|
||||
uint8_t unused[40];
|
||||
const uint32_t *saes_table;
|
||||
|
||||
cn_mainloop_fun_ms_abi generated_code;
|
||||
cn_mainloop_double_fun_ms_abi generated_code_double;
|
||||
uint64_t generated_code_height;
|
||||
uint64_t generated_code_double_height;
|
||||
uint64_t height;
|
||||
};
|
||||
|
||||
|
||||
typedef void (*cn_hash_fun)(const uint8_t *input, size_t size, uint8_t *output, struct cryptonight_ctx **ctx);
|
||||
|
||||
|
||||
extern void (* const extra_hashes[4])(const void *, size_t, char *);
|
||||
|
||||
bool cryptonight_init(int variant);
|
||||
int scanhash_cryptonight(int thr_id, uint32_t *hash, uint32_t *restrict blob, size_t blob_size, uint32_t target, uint32_t max_nonce, unsigned long *restrict hashes_done, struct cryptonight_ctx *restrict ctx);
|
||||
int scanhash_cryptonight_double(int thr_id, uint32_t *hash, uint8_t *restrict blob, size_t blob_size, uint32_t target, uint32_t max_nonce, unsigned long *restrict hashes_done, struct cryptonight_ctx *restrict ctx);
|
||||
cn_hash_fun cryptonight_hash_fn(enum Algo algorithm, enum AlgoVariant av, enum Variant variant);
|
||||
|
||||
#endif /* __CRYPTONIGHT_H__ */
|
||||
bool cryptonight_init(int av);
|
||||
int scanhash_cryptonight(int thr_id, uint32_t *hash, uint8_t *blob, size_t blob_size, uint32_t target, uint32_t max_nonce, unsigned long *hashes_done, struct cryptonight_ctx **ctx);
|
||||
int scanhash_cryptonight_double(int thr_id, uint32_t *hash, uint8_t *blob, size_t blob_size, uint32_t target, uint32_t max_nonce, unsigned long *hashes_done, struct cryptonight_ctx **ctx);
|
||||
|
||||
|
||||
#endif /* XMRIG_CRYPTONIGHT_H */
|
||||
|
|
|
@ -22,10 +22,12 @@
|
|||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef __CRYPTONIGHT_AESNI_H__
|
||||
#define __CRYPTONIGHT_AESNI_H__
|
||||
#ifndef XMRIG_CRYPTONIGHT_AESNI_H
|
||||
#define XMRIG_CRYPTONIGHT_AESNI_H
|
||||
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include <stdint.h>
|
||||
|
||||
|
||||
#define aes_genkey_sub(imm8) \
|
||||
|
@ -253,4 +255,20 @@ static inline uint64_t _umul128(uint64_t multiplier, uint64_t multiplicand, uint
|
|||
#endif
|
||||
|
||||
|
||||
#endif /* __CRYPTONIGHT_AESNI_H__ */
|
||||
static inline void cryptonight_monero_tweak(uint64_t* mem_out, __m128i tmp)
|
||||
{
|
||||
mem_out[0] = EXTRACT64(tmp);
|
||||
|
||||
tmp = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(tmp), _mm_castsi128_ps(tmp)));
|
||||
uint64_t vh = EXTRACT64(tmp);
|
||||
|
||||
uint8_t x = vh >> 24;
|
||||
static const uint16_t table = 0x7531;
|
||||
const uint8_t index = (((x >> 3) & 6) | (x & 1)) << 1;
|
||||
vh ^= ((table >> index) & 0x3) << 28;
|
||||
|
||||
mem_out[1] = vh;
|
||||
}
|
||||
|
||||
|
||||
#endif /* XMRIG_CRYPTONIGHT_AESNI_H */
|
||||
|
|
261
algo/cryptonight/cryptonight_av1.c
Normal file
261
algo/cryptonight/cryptonight_av1.c
Normal file
|
@ -0,0 +1,261 @@
|
|||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
||||
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "crypto/c_keccak.h"
|
||||
#include "cryptonight.h"
|
||||
#include "cryptonight_aesni.h"
|
||||
#include "cryptonight_monero.h"
|
||||
|
||||
|
||||
void cryptonight_av1_v0(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
|
||||
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
|
||||
|
||||
const uint8_t* l0 = ctx[0]->memory;
|
||||
uint64_t* h0 = (uint64_t*) ctx[0]->state;
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
|
||||
uint64_t idx0 = h0[0] ^ h0[4];
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
|
||||
__m128i cx;
|
||||
cx = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
|
||||
cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah0, al0));
|
||||
|
||||
_mm_store_si128((__m128i *) &l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx));
|
||||
idx0 = EXTRACT64(cx);
|
||||
bx0 = cx;
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0;
|
||||
((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0;
|
||||
|
||||
ah0 ^= ch;
|
||||
al0 ^= cl;
|
||||
idx0 = al0;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
|
||||
|
||||
keccakf(h0, 24);
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
}
|
||||
|
||||
|
||||
void cryptonight_av1_v1(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
if (size < 43) {
|
||||
memset(output, 0, 32);
|
||||
return;
|
||||
}
|
||||
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
|
||||
VARIANT1_INIT(0);
|
||||
|
||||
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
|
||||
|
||||
const uint8_t* l0 = ctx[0]->memory;
|
||||
uint64_t* h0 = (uint64_t*) ctx[0]->state;
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
|
||||
uint64_t idx0 = h0[0] ^ h0[4];
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
|
||||
__m128i cx;
|
||||
cx = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
|
||||
cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah0, al0));
|
||||
|
||||
cryptonight_monero_tweak((uint64_t*)&l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx));
|
||||
|
||||
idx0 = EXTRACT64(cx);
|
||||
bx0 = cx;
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0;
|
||||
((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0 ^ tweak1_2_0;
|
||||
|
||||
ah0 ^= ch;
|
||||
al0 ^= cl;
|
||||
idx0 = al0;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
|
||||
|
||||
keccakf(h0, 24);
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
}
|
||||
|
||||
|
||||
void cryptonight_av1_v2(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
|
||||
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
|
||||
|
||||
const uint8_t* l0 = ctx[0]->memory;
|
||||
uint64_t* h0 = (uint64_t*) ctx[0]->state;
|
||||
|
||||
VARIANT2_INIT(0);
|
||||
VARIANT2_SET_ROUNDING_MODE();
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
__m128i bx1 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]);
|
||||
|
||||
uint64_t idx0 = al0;
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
|
||||
__m128i cx = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
|
||||
const __m128i ax0 = _mm_set_epi64x(ah0, al0);
|
||||
|
||||
cx = _mm_aesenc_si128(cx, ax0);
|
||||
|
||||
VARIANT2_SHUFFLE(l0, idx0 & 0x1FFFF0, ax0, bx0, bx1);
|
||||
_mm_store_si128((__m128i *) &l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx));
|
||||
|
||||
idx0 = _mm_cvtsi128_si64(cx);
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
|
||||
|
||||
VARIANT2_INTEGER_MATH(0, cl, cx);
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
VARIANT2_SHUFFLE2(l0, idx0 & 0x1FFFF0, ax0, bx0, bx1, hi, lo);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0;
|
||||
((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0;
|
||||
|
||||
al0 ^= cl;
|
||||
ah0 ^= ch;
|
||||
idx0 = al0;
|
||||
|
||||
bx1 = bx0;
|
||||
bx0 = cx;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
|
||||
|
||||
keccakf(h0, 24);
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
}
|
||||
|
||||
|
||||
#ifndef XMRIG_NO_ASM
|
||||
extern void cnv2_mainloop_ivybridge_asm(struct cryptonight_ctx *ctx);
|
||||
extern void cnv2_mainloop_ryzen_asm(struct cryptonight_ctx *ctx);
|
||||
extern void cnv2_mainloop_bulldozer_asm(struct cryptonight_ctx *ctx);
|
||||
extern void cnv2_double_mainloop_sandybridge_asm(struct cryptonight_ctx* ctx0, struct cryptonight_ctx* ctx1);
|
||||
|
||||
|
||||
void cryptonight_single_hash_asm_intel(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
|
||||
|
||||
cnv2_mainloop_ivybridge_asm(ctx[0]);
|
||||
|
||||
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
|
||||
keccakf((uint64_t*) ctx[0]->state, 24);
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
}
|
||||
|
||||
|
||||
void cryptonight_single_hash_asm_ryzen(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
|
||||
|
||||
cnv2_mainloop_ryzen_asm(ctx[0]);
|
||||
|
||||
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
|
||||
keccakf((uint64_t*) ctx[0]->state, 24);
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
}
|
||||
|
||||
|
||||
void cryptonight_single_hash_asm_bulldozer(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
|
||||
|
||||
cnv2_mainloop_bulldozer_asm(ctx[0]);
|
||||
|
||||
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
|
||||
keccakf((uint64_t*) ctx[0]->state, 24);
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
}
|
||||
|
||||
|
||||
void cryptonight_double_hash_asm(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
keccak(input + size, size, ctx[1]->state, 200);
|
||||
|
||||
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
|
||||
cn_explode_scratchpad((__m128i*) ctx[1]->state, (__m128i*) ctx[1]->memory);
|
||||
|
||||
cnv2_double_mainloop_sandybridge_asm(ctx[0], ctx[1]);
|
||||
|
||||
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
|
||||
cn_implode_scratchpad((__m128i*) ctx[1]->memory, (__m128i*) ctx[1]->state);
|
||||
|
||||
keccakf((uint64_t*) ctx[0]->state, 24);
|
||||
keccakf((uint64_t*) ctx[1]->state, 24);
|
||||
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
|
||||
}
|
||||
#endif
|
|
@ -1,77 +0,0 @@
|
|||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
|
||||
* Copyright 2016-2017 XMRig <support@xmrig.com>
|
||||
*
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "cryptonight.h"
|
||||
#include "cryptonight_aesni.h"
|
||||
#include "crypto/c_keccak.h"
|
||||
|
||||
|
||||
void cryptonight_av1_aesni(const void *restrict input, size_t size, void *restrict output, struct cryptonight_ctx *restrict ctx)
|
||||
{
|
||||
keccak((const uint8_t *) input, size, ctx->state0, 200);
|
||||
|
||||
cn_explode_scratchpad((__m128i*) ctx->state0, (__m128i*) ctx->memory);
|
||||
|
||||
const uint8_t* l0 = ctx->memory;
|
||||
uint64_t* h0 = (uint64_t*) ctx->state0;
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
|
||||
uint64_t idx0 = h0[0] ^ h0[4];
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
|
||||
__m128i cx;
|
||||
cx = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
|
||||
cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah0, al0));
|
||||
|
||||
_mm_store_si128((__m128i *) &l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx));
|
||||
idx0 = EXTRACT64(cx);
|
||||
bx0 = cx;
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0;
|
||||
((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0;
|
||||
|
||||
ah0 ^= ch;
|
||||
al0 ^= cl;
|
||||
idx0 = al0;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad((__m128i*) ctx->memory, (__m128i*) ctx->state0);
|
||||
|
||||
keccakf(h0, 24);
|
||||
extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, output);
|
||||
}
|
304
algo/cryptonight/cryptonight_av2.c
Normal file
304
algo/cryptonight/cryptonight_av2.c
Normal file
|
@ -0,0 +1,304 @@
|
|||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
||||
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "crypto/c_keccak.h"
|
||||
#include "cryptonight.h"
|
||||
#include "cryptonight_aesni.h"
|
||||
#include "cryptonight_monero.h"
|
||||
|
||||
|
||||
void cryptonight_av2_v0(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
keccak(input + size, size, ctx[1]->state, 200);
|
||||
|
||||
const uint8_t* l0 = ctx[0]->memory;
|
||||
const uint8_t* l1 = ctx[1]->memory;
|
||||
uint64_t* h0 = (uint64_t*) ctx[0]->state;
|
||||
uint64_t* h1 = (uint64_t*) ctx[1]->state;
|
||||
|
||||
cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
|
||||
cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1);
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t al1 = h1[0] ^ h1[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
uint64_t ah1 = h1[1] ^ h1[5];
|
||||
|
||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
|
||||
|
||||
uint64_t idx0 = h0[0] ^ h0[4];
|
||||
uint64_t idx1 = h1[0] ^ h1[4];
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
|
||||
__m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
|
||||
__m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & 0x1FFFF0]);
|
||||
|
||||
cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0));
|
||||
cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1));
|
||||
|
||||
_mm_store_si128((__m128i *) &l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx0));
|
||||
_mm_store_si128((__m128i *) &l1[idx1 & 0x1FFFF0], _mm_xor_si128(bx1, cx1));
|
||||
|
||||
idx0 = EXTRACT64(cx0);
|
||||
idx1 = EXTRACT64(cx1);
|
||||
|
||||
bx0 = cx0;
|
||||
bx1 = cx1;
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*) &l0[idx0 & 0x1FFFF0])[0] = al0;
|
||||
((uint64_t*) &l0[idx0 & 0x1FFFF0])[1] = ah0;
|
||||
|
||||
ah0 ^= ch;
|
||||
al0 ^= cl;
|
||||
idx0 = al0;
|
||||
|
||||
cl = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[0];
|
||||
ch = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[1];
|
||||
lo = _umul128(idx1, cl, &hi);
|
||||
|
||||
al1 += hi;
|
||||
ah1 += lo;
|
||||
|
||||
((uint64_t*) &l1[idx1 & 0x1FFFF0])[0] = al1;
|
||||
((uint64_t*) &l1[idx1 & 0x1FFFF0])[1] = ah1;
|
||||
|
||||
ah1 ^= ch;
|
||||
al1 ^= cl;
|
||||
idx1 = al1;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0);
|
||||
cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1);
|
||||
|
||||
keccakf(h0, 24);
|
||||
keccakf(h1, 24);
|
||||
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
|
||||
}
|
||||
|
||||
|
||||
void cryptonight_av2_v1(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
if (size < 43) {
|
||||
memset(output, 0, 64);
|
||||
return;
|
||||
}
|
||||
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
keccak(input + size, size, ctx[1]->state, 200);
|
||||
|
||||
VARIANT1_INIT(0);
|
||||
VARIANT1_INIT(1);
|
||||
|
||||
const uint8_t* l0 = ctx[0]->memory;
|
||||
const uint8_t* l1 = ctx[1]->memory;
|
||||
uint64_t* h0 = (uint64_t*) ctx[0]->state;
|
||||
uint64_t* h1 = (uint64_t*) ctx[1]->state;
|
||||
|
||||
cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
|
||||
cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1);
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t al1 = h1[0] ^ h1[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
uint64_t ah1 = h1[1] ^ h1[5];
|
||||
|
||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
|
||||
|
||||
uint64_t idx0 = h0[0] ^ h0[4];
|
||||
uint64_t idx1 = h1[0] ^ h1[4];
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
|
||||
__m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
|
||||
__m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & 0x1FFFF0]);
|
||||
|
||||
cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0));
|
||||
cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1));
|
||||
|
||||
cryptonight_monero_tweak((uint64_t*)&l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx0));
|
||||
cryptonight_monero_tweak((uint64_t*)&l1[idx1 & 0x1FFFF0], _mm_xor_si128(bx1, cx1));
|
||||
|
||||
idx0 = EXTRACT64(cx0);
|
||||
idx1 = EXTRACT64(cx1);
|
||||
|
||||
bx0 = cx0;
|
||||
bx1 = cx1;
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*) &l0[idx0 & 0x1FFFF0])[0] = al0;
|
||||
((uint64_t*) &l0[idx0 & 0x1FFFF0])[1] = ah0 ^ tweak1_2_0;
|
||||
|
||||
ah0 ^= ch;
|
||||
al0 ^= cl;
|
||||
idx0 = al0;
|
||||
|
||||
cl = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[0];
|
||||
ch = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[1];
|
||||
lo = _umul128(idx1, cl, &hi);
|
||||
|
||||
al1 += hi;
|
||||
ah1 += lo;
|
||||
|
||||
((uint64_t*) &l1[idx1 & 0x1FFFF0])[0] = al1;
|
||||
((uint64_t*) &l1[idx1 & 0x1FFFF0])[1] = ah1 ^ tweak1_2_1;
|
||||
|
||||
ah1 ^= ch;
|
||||
al1 ^= cl;
|
||||
idx1 = al1;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0);
|
||||
cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1);
|
||||
|
||||
keccakf(h0, 24);
|
||||
keccakf(h1, 24);
|
||||
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
|
||||
}
|
||||
|
||||
|
||||
void cryptonight_av2_v2(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
keccak(input + size, size, ctx[1]->state, 200);
|
||||
|
||||
const uint8_t* l0 = ctx[0]->memory;
|
||||
const uint8_t* l1 = ctx[1]->memory;
|
||||
uint64_t* h0 = (uint64_t*) ctx[0]->state;
|
||||
uint64_t* h1 = (uint64_t*) ctx[1]->state;
|
||||
|
||||
VARIANT2_INIT(0);
|
||||
VARIANT2_INIT(1);
|
||||
VARIANT2_SET_ROUNDING_MODE();
|
||||
|
||||
cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
|
||||
cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1);
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t al1 = h1[0] ^ h1[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
uint64_t ah1 = h1[1] ^ h1[5];
|
||||
|
||||
__m128i bx00 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
__m128i bx01 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]);
|
||||
__m128i bx10 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
|
||||
__m128i bx11 = _mm_set_epi64x(h1[9] ^ h1[11], h1[8] ^ h1[10]);
|
||||
|
||||
uint64_t idx0 = al0;
|
||||
uint64_t idx1 = al1;
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
|
||||
__m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
|
||||
__m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & 0x1FFFF0]);
|
||||
|
||||
const __m128i ax0 = _mm_set_epi64x(ah0, al0);
|
||||
const __m128i ax1 = _mm_set_epi64x(ah1, al1);
|
||||
|
||||
cx0 = _mm_aesenc_si128(cx0, ax0);
|
||||
cx1 = _mm_aesenc_si128(cx1, ax1);
|
||||
|
||||
VARIANT2_SHUFFLE(l0, idx0 & 0x1FFFF0, ax0, bx00, bx01);
|
||||
_mm_store_si128((__m128i *) &l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx00, cx0));
|
||||
|
||||
VARIANT2_SHUFFLE(l1, idx1 & 0x1FFFF0, ax1, bx10, bx11);
|
||||
_mm_store_si128((__m128i *) &l1[idx1 & 0x1FFFF0], _mm_xor_si128(bx10, cx1));
|
||||
|
||||
idx0 = _mm_cvtsi128_si64(cx0);
|
||||
idx1 = _mm_cvtsi128_si64(cx1);
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
|
||||
|
||||
VARIANT2_INTEGER_MATH(0, cl, cx0);
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
VARIANT2_SHUFFLE2(l0, idx0 & 0x1FFFF0, ax0, bx00, bx01, hi, lo);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0;
|
||||
((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0;
|
||||
|
||||
al0 ^= cl;
|
||||
ah0 ^= ch;
|
||||
idx0 = al0;
|
||||
|
||||
cl = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[0];
|
||||
ch = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[1];
|
||||
|
||||
VARIANT2_INTEGER_MATH(1, cl, cx1);
|
||||
lo = _umul128(idx1, cl, &hi);
|
||||
VARIANT2_SHUFFLE2(l1, idx1 & 0x1FFFF0, ax1, bx10, bx11, hi, lo);
|
||||
|
||||
al1 += hi;
|
||||
ah1 += lo;
|
||||
|
||||
((uint64_t*)&l1[idx1 & 0x1FFFF0])[0] = al1;
|
||||
((uint64_t*)&l1[idx1 & 0x1FFFF0])[1] = ah1;
|
||||
|
||||
al1 ^= cl;
|
||||
ah1 ^= ch;
|
||||
idx1 = al1;
|
||||
|
||||
bx01 = bx00;
|
||||
bx11 = bx10;
|
||||
|
||||
bx00 = cx0;
|
||||
bx10 = cx1;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0);
|
||||
cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1);
|
||||
|
||||
keccakf(h0, 24);
|
||||
keccakf(h1, 24);
|
||||
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
|
||||
}
|
|
@ -1,111 +0,0 @@
|
|||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
|
||||
* Copyright 2016-2017 XMRig <support@xmrig.com>
|
||||
*
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "cryptonight.h"
|
||||
#include "cryptonight_aesni.h"
|
||||
#include "crypto/c_keccak.h"
|
||||
|
||||
|
||||
void cryptonight_av2_aesni_double(const void *restrict input, size_t size, void *restrict output, struct cryptonight_ctx *restrict ctx)
|
||||
{
|
||||
keccak((const uint8_t *) input, size, ctx->state0, 200);
|
||||
keccak((const uint8_t *) input + size, size, ctx->state1, 200);
|
||||
|
||||
const uint8_t* l0 = ctx->memory;
|
||||
const uint8_t* l1 = ctx->memory + MEMORY;
|
||||
uint64_t* h0 = (uint64_t*) ctx->state0;
|
||||
uint64_t* h1 = (uint64_t*) ctx->state1;
|
||||
|
||||
cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
|
||||
cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1);
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t al1 = h1[0] ^ h1[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
uint64_t ah1 = h1[1] ^ h1[5];
|
||||
|
||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
|
||||
|
||||
uint64_t idx0 = h0[0] ^ h0[4];
|
||||
uint64_t idx1 = h1[0] ^ h1[4];
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
|
||||
__m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
|
||||
__m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & 0x1FFFF0]);
|
||||
|
||||
cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0));
|
||||
cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1));
|
||||
|
||||
_mm_store_si128((__m128i *) &l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx0));
|
||||
_mm_store_si128((__m128i *) &l1[idx1 & 0x1FFFF0], _mm_xor_si128(bx1, cx1));
|
||||
|
||||
idx0 = EXTRACT64(cx0);
|
||||
idx1 = EXTRACT64(cx1);
|
||||
|
||||
bx0 = cx0;
|
||||
bx1 = cx1;
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*) &l0[idx0 & 0x1FFFF0])[0] = al0;
|
||||
((uint64_t*) &l0[idx0 & 0x1FFFF0])[1] = ah0;
|
||||
|
||||
ah0 ^= ch;
|
||||
al0 ^= cl;
|
||||
idx0 = al0;
|
||||
|
||||
cl = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[0];
|
||||
ch = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[1];
|
||||
lo = _umul128(idx1, cl, &hi);
|
||||
|
||||
al1 += hi;
|
||||
ah1 += lo;
|
||||
|
||||
((uint64_t*) &l1[idx1 & 0x1FFFF0])[0] = al1;
|
||||
((uint64_t*) &l1[idx1 & 0x1FFFF0])[1] = ah1;
|
||||
|
||||
ah1 ^= ch;
|
||||
al1 ^= cl;
|
||||
idx1 = al1;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0);
|
||||
cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1);
|
||||
|
||||
keccakf(h0, 24);
|
||||
keccakf(h1, 24);
|
||||
|
||||
extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, output);
|
||||
extra_hashes[ctx->state1[0] & 3](ctx->state1, 200, (char*) output + 32);
|
||||
}
|
193
algo/cryptonight/cryptonight_av3.c
Normal file
193
algo/cryptonight/cryptonight_av3.c
Normal file
|
@ -0,0 +1,193 @@
|
|||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
||||
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "crypto/c_keccak.h"
|
||||
#include "cryptonight.h"
|
||||
#include "cryptonight_monero.h"
|
||||
#include "cryptonight_softaes.h"
|
||||
|
||||
|
||||
void cryptonight_av3_v0(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
|
||||
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
|
||||
|
||||
const uint8_t* l0 = ctx[0]->memory;
|
||||
uint64_t* h0 = (uint64_t*) ctx[0]->state;
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
|
||||
uint64_t idx0 = h0[0] ^ h0[4];
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
|
||||
__m128i cx;
|
||||
cx = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
|
||||
cx = soft_aesenc(cx, _mm_set_epi64x(ah0, al0));
|
||||
|
||||
_mm_store_si128((__m128i *) &l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx));
|
||||
idx0 = EXTRACT64(cx);
|
||||
bx0 = cx;
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0;
|
||||
((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0;
|
||||
|
||||
ah0 ^= ch;
|
||||
al0 ^= cl;
|
||||
idx0 = al0;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
|
||||
|
||||
keccakf(h0, 24);
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
}
|
||||
|
||||
|
||||
void cryptonight_av3_v1(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
if (size < 43) {
|
||||
memset(output, 0, 32);
|
||||
return;
|
||||
}
|
||||
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
|
||||
VARIANT1_INIT(0);
|
||||
|
||||
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
|
||||
|
||||
const uint8_t* l0 = ctx[0]->memory;
|
||||
uint64_t* h0 = (uint64_t*) ctx[0]->state;
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
|
||||
uint64_t idx0 = h0[0] ^ h0[4];
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
|
||||
__m128i cx;
|
||||
cx = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
|
||||
cx = soft_aesenc(cx, _mm_set_epi64x(ah0, al0));
|
||||
|
||||
cryptonight_monero_tweak((uint64_t*)&l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx));
|
||||
|
||||
idx0 = EXTRACT64(cx);
|
||||
bx0 = cx;
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0;
|
||||
((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0 ^ tweak1_2_0;
|
||||
|
||||
ah0 ^= ch;
|
||||
al0 ^= cl;
|
||||
idx0 = al0;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
|
||||
|
||||
keccakf(h0, 24);
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
}
|
||||
|
||||
|
||||
void cryptonight_av3_v2(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
|
||||
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
|
||||
|
||||
const uint8_t* l0 = ctx[0]->memory;
|
||||
uint64_t* h0 = (uint64_t*) ctx[0]->state;
|
||||
|
||||
VARIANT2_INIT(0);
|
||||
VARIANT2_SET_ROUNDING_MODE();
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
__m128i bx1 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]);
|
||||
|
||||
uint64_t idx0 = al0;
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
|
||||
__m128i cx = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
|
||||
const __m128i ax0 = _mm_set_epi64x(ah0, al0);
|
||||
|
||||
cx = soft_aesenc(cx, ax0);
|
||||
|
||||
VARIANT2_SHUFFLE(l0, idx0 & 0x1FFFF0, ax0, bx0, bx1);
|
||||
_mm_store_si128((__m128i *) &l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx));
|
||||
|
||||
idx0 = _mm_cvtsi128_si64(cx);
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
|
||||
|
||||
VARIANT2_INTEGER_MATH(0, cl, cx);
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
VARIANT2_SHUFFLE2(l0, idx0 & 0x1FFFF0, ax0, bx0, bx1, hi, lo);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0;
|
||||
((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0;
|
||||
|
||||
al0 ^= cl;
|
||||
ah0 ^= ch;
|
||||
idx0 = al0;
|
||||
|
||||
bx1 = bx0;
|
||||
bx0 = cx;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
|
||||
|
||||
keccakf(h0, 24);
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
}
|
|
@ -1,77 +0,0 @@
|
|||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
|
||||
* Copyright 2016-2017 XMRig <support@xmrig.com>
|
||||
*
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "cryptonight.h"
|
||||
#include "cryptonight_softaes.h"
|
||||
#include "crypto/c_keccak.h"
|
||||
|
||||
|
||||
void cryptonight_av3_softaes(const void *restrict input, size_t size, void *restrict output, struct cryptonight_ctx *restrict ctx)
|
||||
{
|
||||
keccak((const uint8_t *) input, size, ctx->state0, 200);
|
||||
|
||||
cn_explode_scratchpad((__m128i*) ctx->state0, (__m128i*) ctx->memory);
|
||||
|
||||
const uint8_t* l0 = ctx->memory;
|
||||
uint64_t* h0 = (uint64_t*) ctx->state0;
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
|
||||
uint64_t idx0 = h0[0] ^ h0[4];
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
|
||||
__m128i cx;
|
||||
cx = _mm_load_si128((__m128i *)&l0[idx0 & 0x1FFFF0]);
|
||||
cx = soft_aesenc(cx, _mm_set_epi64x(ah0, al0));
|
||||
|
||||
_mm_store_si128((__m128i *)&l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx));
|
||||
idx0 = EXTRACT64(cx);
|
||||
bx0 = cx;
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*)&l0[idx0 & 0x1FFFF0])[0];
|
||||
ch = ((uint64_t*)&l0[idx0 & 0x1FFFF0])[1];
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0;
|
||||
((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0;
|
||||
|
||||
ah0 ^= ch;
|
||||
al0 ^= cl;
|
||||
idx0 = al0;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad((__m128i*) ctx->memory, (__m128i*) ctx->state0);
|
||||
|
||||
keccakf(h0, 24);
|
||||
extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, output);
|
||||
}
|
304
algo/cryptonight/cryptonight_av4.c
Normal file
304
algo/cryptonight/cryptonight_av4.c
Normal file
|
@ -0,0 +1,304 @@
|
|||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
||||
* Copyright 2016-2018 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "crypto/c_keccak.h"
|
||||
#include "cryptonight.h"
|
||||
#include "cryptonight_monero.h"
|
||||
#include "cryptonight_softaes.h"
|
||||
|
||||
|
||||
void cryptonight_av4_v0(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
keccak(input + size, size, ctx[1]->state, 200);
|
||||
|
||||
const uint8_t* l0 = ctx[0]->memory;
|
||||
const uint8_t* l1 = ctx[1]->memory;
|
||||
uint64_t* h0 = (uint64_t*) ctx[0]->state;
|
||||
uint64_t* h1 = (uint64_t*) ctx[1]->state;
|
||||
|
||||
cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
|
||||
cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1);
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t al1 = h1[0] ^ h1[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
uint64_t ah1 = h1[1] ^ h1[5];
|
||||
|
||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
|
||||
|
||||
uint64_t idx0 = h0[0] ^ h0[4];
|
||||
uint64_t idx1 = h1[0] ^ h1[4];
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
|
||||
__m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
|
||||
__m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & 0x1FFFF0]);
|
||||
|
||||
cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0));
|
||||
cx1 = soft_aesenc(cx1, _mm_set_epi64x(ah1, al1));
|
||||
|
||||
_mm_store_si128((__m128i *) &l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx0));
|
||||
_mm_store_si128((__m128i *) &l1[idx1 & 0x1FFFF0], _mm_xor_si128(bx1, cx1));
|
||||
|
||||
idx0 = EXTRACT64(cx0);
|
||||
idx1 = EXTRACT64(cx1);
|
||||
|
||||
bx0 = cx0;
|
||||
bx1 = cx1;
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*) &l0[idx0 & 0x1FFFF0])[0] = al0;
|
||||
((uint64_t*) &l0[idx0 & 0x1FFFF0])[1] = ah0;
|
||||
|
||||
ah0 ^= ch;
|
||||
al0 ^= cl;
|
||||
idx0 = al0;
|
||||
|
||||
cl = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[0];
|
||||
ch = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[1];
|
||||
lo = _umul128(idx1, cl, &hi);
|
||||
|
||||
al1 += hi;
|
||||
ah1 += lo;
|
||||
|
||||
((uint64_t*) &l1[idx1 & 0x1FFFF0])[0] = al1;
|
||||
((uint64_t*) &l1[idx1 & 0x1FFFF0])[1] = ah1;
|
||||
|
||||
ah1 ^= ch;
|
||||
al1 ^= cl;
|
||||
idx1 = al1;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0);
|
||||
cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1);
|
||||
|
||||
keccakf(h0, 24);
|
||||
keccakf(h1, 24);
|
||||
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
|
||||
}
|
||||
|
||||
|
||||
void cryptonight_av4_v1(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
if (size < 43) {
|
||||
memset(output, 0, 64);
|
||||
return;
|
||||
}
|
||||
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
keccak(input + size, size, ctx[1]->state, 200);
|
||||
|
||||
VARIANT1_INIT(0);
|
||||
VARIANT1_INIT(1);
|
||||
|
||||
const uint8_t* l0 = ctx[0]->memory;
|
||||
const uint8_t* l1 = ctx[1]->memory;
|
||||
uint64_t* h0 = (uint64_t*) ctx[0]->state;
|
||||
uint64_t* h1 = (uint64_t*) ctx[1]->state;
|
||||
|
||||
cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
|
||||
cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1);
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t al1 = h1[0] ^ h1[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
uint64_t ah1 = h1[1] ^ h1[5];
|
||||
|
||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
|
||||
|
||||
uint64_t idx0 = h0[0] ^ h0[4];
|
||||
uint64_t idx1 = h1[0] ^ h1[4];
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
|
||||
__m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
|
||||
__m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & 0x1FFFF0]);
|
||||
|
||||
cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0));
|
||||
cx1 = soft_aesenc(cx1, _mm_set_epi64x(ah1, al1));
|
||||
|
||||
cryptonight_monero_tweak((uint64_t*)&l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx0));
|
||||
cryptonight_monero_tweak((uint64_t*)&l1[idx1 & 0x1FFFF0], _mm_xor_si128(bx1, cx1));
|
||||
|
||||
idx0 = EXTRACT64(cx0);
|
||||
idx1 = EXTRACT64(cx1);
|
||||
|
||||
bx0 = cx0;
|
||||
bx1 = cx1;
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*) &l0[idx0 & 0x1FFFF0])[0] = al0;
|
||||
((uint64_t*) &l0[idx0 & 0x1FFFF0])[1] = ah0 ^ tweak1_2_0;
|
||||
|
||||
ah0 ^= ch;
|
||||
al0 ^= cl;
|
||||
idx0 = al0;
|
||||
|
||||
cl = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[0];
|
||||
ch = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[1];
|
||||
lo = _umul128(idx1, cl, &hi);
|
||||
|
||||
al1 += hi;
|
||||
ah1 += lo;
|
||||
|
||||
((uint64_t*) &l1[idx1 & 0x1FFFF0])[0] = al1;
|
||||
((uint64_t*) &l1[idx1 & 0x1FFFF0])[1] = ah1 ^ tweak1_2_1;
|
||||
|
||||
ah1 ^= ch;
|
||||
al1 ^= cl;
|
||||
idx1 = al1;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0);
|
||||
cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1);
|
||||
|
||||
keccakf(h0, 24);
|
||||
keccakf(h1, 24);
|
||||
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
|
||||
}
|
||||
|
||||
|
||||
void cryptonight_av4_v2(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
keccak(input + size, size, ctx[1]->state, 200);
|
||||
|
||||
const uint8_t* l0 = ctx[0]->memory;
|
||||
const uint8_t* l1 = ctx[1]->memory;
|
||||
uint64_t* h0 = (uint64_t*) ctx[0]->state;
|
||||
uint64_t* h1 = (uint64_t*) ctx[1]->state;
|
||||
|
||||
VARIANT2_INIT(0);
|
||||
VARIANT2_INIT(1);
|
||||
VARIANT2_SET_ROUNDING_MODE();
|
||||
|
||||
cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
|
||||
cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1);
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t al1 = h1[0] ^ h1[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
uint64_t ah1 = h1[1] ^ h1[5];
|
||||
|
||||
__m128i bx00 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
__m128i bx01 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]);
|
||||
__m128i bx10 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
|
||||
__m128i bx11 = _mm_set_epi64x(h1[9] ^ h1[11], h1[8] ^ h1[10]);
|
||||
|
||||
uint64_t idx0 = al0;
|
||||
uint64_t idx1 = al1;
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
|
||||
__m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
|
||||
__m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & 0x1FFFF0]);
|
||||
|
||||
const __m128i ax0 = _mm_set_epi64x(ah0, al0);
|
||||
const __m128i ax1 = _mm_set_epi64x(ah1, al1);
|
||||
|
||||
cx0 = soft_aesenc(cx0, ax0);
|
||||
cx1 = soft_aesenc(cx1, ax1);
|
||||
|
||||
VARIANT2_SHUFFLE(l0, idx0 & 0x1FFFF0, ax0, bx00, bx01);
|
||||
_mm_store_si128((__m128i *) &l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx00, cx0));
|
||||
|
||||
VARIANT2_SHUFFLE(l1, idx1 & 0x1FFFF0, ax1, bx10, bx11);
|
||||
_mm_store_si128((__m128i *) &l1[idx1 & 0x1FFFF0], _mm_xor_si128(bx10, cx1));
|
||||
|
||||
idx0 = _mm_cvtsi128_si64(cx0);
|
||||
idx1 = _mm_cvtsi128_si64(cx1);
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
|
||||
|
||||
VARIANT2_INTEGER_MATH(0, cl, cx0);
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
VARIANT2_SHUFFLE2(l0, idx0 & 0x1FFFF0, ax0, bx00, bx01, hi, lo);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0;
|
||||
((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0;
|
||||
|
||||
al0 ^= cl;
|
||||
ah0 ^= ch;
|
||||
idx0 = al0;
|
||||
|
||||
cl = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[0];
|
||||
ch = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[1];
|
||||
|
||||
VARIANT2_INTEGER_MATH(1, cl, cx1);
|
||||
lo = _umul128(idx1, cl, &hi);
|
||||
VARIANT2_SHUFFLE2(l1, idx1 & 0x1FFFF0, ax1, bx10, bx11, hi, lo);
|
||||
|
||||
al1 += hi;
|
||||
ah1 += lo;
|
||||
|
||||
((uint64_t*)&l1[idx1 & 0x1FFFF0])[0] = al1;
|
||||
((uint64_t*)&l1[idx1 & 0x1FFFF0])[1] = ah1;
|
||||
|
||||
al1 ^= cl;
|
||||
ah1 ^= ch;
|
||||
idx1 = al1;
|
||||
|
||||
bx01 = bx00;
|
||||
bx11 = bx10;
|
||||
|
||||
bx00 = cx0;
|
||||
bx10 = cx1;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0);
|
||||
cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1);
|
||||
|
||||
keccakf(h0, 24);
|
||||
keccakf(h1, 24);
|
||||
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
|
||||
}
|
|
@ -1,111 +0,0 @@
|
|||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
|
||||
* Copyright 2016-2017 XMRig <support@xmrig.com>
|
||||
*
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "cryptonight.h"
|
||||
#include "cryptonight_softaes.h"
|
||||
#include "crypto/c_keccak.h"
|
||||
|
||||
|
||||
void cryptonight_av4_softaes_double(const void *restrict input, size_t size, void *restrict output, struct cryptonight_ctx *restrict ctx)
|
||||
{
|
||||
keccak((const uint8_t *) input, size, ctx->state0, 200);
|
||||
keccak((const uint8_t *) input + size, size, ctx->state1, 200);
|
||||
|
||||
const uint8_t* l0 = ctx->memory;
|
||||
const uint8_t* l1 = ctx->memory + MEMORY;
|
||||
uint64_t* h0 = (uint64_t*) ctx->state0;
|
||||
uint64_t* h1 = (uint64_t*) ctx->state1;
|
||||
|
||||
cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
|
||||
cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1);
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t al1 = h1[0] ^ h1[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
uint64_t ah1 = h1[1] ^ h1[5];
|
||||
|
||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
|
||||
|
||||
uint64_t idx0 = h0[0] ^ h0[4];
|
||||
uint64_t idx1 = h1[0] ^ h1[4];
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
|
||||
__m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
|
||||
__m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & 0x1FFFF0]);
|
||||
|
||||
cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0));
|
||||
cx1 = soft_aesenc(cx1, _mm_set_epi64x(ah1, al1));
|
||||
|
||||
_mm_store_si128((__m128i *) &l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx0));
|
||||
_mm_store_si128((__m128i *) &l1[idx1 & 0x1FFFF0], _mm_xor_si128(bx1, cx1));
|
||||
|
||||
idx0 = EXTRACT64(cx0);
|
||||
idx1 = EXTRACT64(cx1);
|
||||
|
||||
bx0 = cx0;
|
||||
bx1 = cx1;
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*) &l0[idx0 & 0x1FFFF0])[0] = al0;
|
||||
((uint64_t*) &l0[idx0 & 0x1FFFF0])[1] = ah0;
|
||||
|
||||
ah0 ^= ch;
|
||||
al0 ^= cl;
|
||||
idx0 = al0;
|
||||
|
||||
cl = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[0];
|
||||
ch = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[1];
|
||||
lo = _umul128(idx1, cl, &hi);
|
||||
|
||||
al1 += hi;
|
||||
ah1 += lo;
|
||||
|
||||
((uint64_t*) &l1[idx1 & 0x1FFFF0])[0] = al1;
|
||||
((uint64_t*) &l1[idx1 & 0x1FFFF0])[1] = ah1;
|
||||
|
||||
ah1 ^= ch;
|
||||
al1 ^= cl;
|
||||
idx1 = al1;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0);
|
||||
cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1);
|
||||
|
||||
keccakf(h0, 24);
|
||||
keccakf(h1, 24);
|
||||
|
||||
extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, output);
|
||||
extra_hashes[ctx->state1[0] & 3](ctx->state1, 200, (char*) output + 32);
|
||||
}
|
150
algo/cryptonight/cryptonight_monero.h
Normal file
150
algo/cryptonight/cryptonight_monero.h
Normal file
|
@ -0,0 +1,150 @@
|
|||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
||||
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef XMRIG_CRYPTONIGHT_MONERO_H
|
||||
#define XMRIG_CRYPTONIGHT_MONERO_H
|
||||
|
||||
|
||||
#include <fenv.h>
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
#include <x86intrin.h>
|
||||
|
||||
|
||||
static inline __m128i int_sqrt_v2(const uint64_t n0)
|
||||
{
|
||||
__m128d x = _mm_castsi128_pd(_mm_add_epi64(_mm_cvtsi64_si128(n0 >> 12), _mm_set_epi64x(0, 1023ULL << 52)));
|
||||
x = _mm_sqrt_sd(_mm_setzero_pd(), x);
|
||||
uint64_t r = (uint64_t)(_mm_cvtsi128_si64(_mm_castpd_si128(x)));
|
||||
|
||||
const uint64_t s = r >> 20;
|
||||
r >>= 19;
|
||||
|
||||
uint64_t x2 = (s - (1022ULL << 32)) * (r - s - (1022ULL << 32) + 1);
|
||||
# if (defined(_MSC_VER) || __GNUC__ > 7 || (__GNUC__ == 7 && __GNUC_MINOR__ > 1)) && (defined(__x86_64__) || defined(_M_AMD64))
|
||||
_addcarry_u64(_subborrow_u64(0, x2, n0, (unsigned long long int*)&x2), r, 0, (unsigned long long int*)&r);
|
||||
# else
|
||||
if (x2 < n0) ++r;
|
||||
# endif
|
||||
|
||||
return _mm_cvtsi64_si128(r);
|
||||
}
|
||||
|
||||
|
||||
# define VARIANT1_INIT(part) \
|
||||
uint64_t tweak1_2_##part = (*(const uint64_t*)(input + 35 + part * size) ^ \
|
||||
*((const uint64_t*)(ctx[part]->state) + 24)); \
|
||||
|
||||
# define VARIANT2_INIT(part) \
|
||||
__m128i division_result_xmm_##part = _mm_cvtsi64_si128(h##part[12]); \
|
||||
__m128i sqrt_result_xmm_##part = _mm_cvtsi64_si128(h##part[13]);
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# define VARIANT2_SET_ROUNDING_MODE() { _control87(RC_DOWN, MCW_RC); }
|
||||
#else
|
||||
# define VARIANT2_SET_ROUNDING_MODE() { fesetround(FE_DOWNWARD); }
|
||||
#endif
|
||||
|
||||
# define VARIANT2_INTEGER_MATH(part, cl, cx) \
|
||||
{ \
|
||||
const uint64_t sqrt_result = (uint64_t)(_mm_cvtsi128_si64(sqrt_result_xmm_##part)); \
|
||||
const uint64_t cx_0 = _mm_cvtsi128_si64(cx); \
|
||||
cl ^= (uint64_t)(_mm_cvtsi128_si64(division_result_xmm_##part)) ^ (sqrt_result << 32); \
|
||||
const uint32_t d = (uint32_t)(cx_0 + (sqrt_result << 1)) | 0x80000001UL; \
|
||||
const uint64_t cx_1 = _mm_cvtsi128_si64(_mm_srli_si128(cx, 8)); \
|
||||
const uint64_t division_result = (uint32_t)(cx_1 / d) + ((cx_1 % d) << 32); \
|
||||
division_result_xmm_##part = _mm_cvtsi64_si128((int64_t)(division_result)); \
|
||||
sqrt_result_xmm_##part = int_sqrt_v2(cx_0 + division_result); \
|
||||
}
|
||||
|
||||
# define VARIANT2_SHUFFLE(base_ptr, offset, _a, _b, _b1) \
|
||||
{ \
|
||||
const __m128i chunk1 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10))); \
|
||||
const __m128i chunk2 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20))); \
|
||||
const __m128i chunk3 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30))); \
|
||||
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10)), _mm_add_epi64(chunk3, _b1)); \
|
||||
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20)), _mm_add_epi64(chunk1, _b)); \
|
||||
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30)), _mm_add_epi64(chunk2, _a)); \
|
||||
}
|
||||
|
||||
# define VARIANT4_SHUFFLE(base_ptr, offset, _a, _b, _b1, _c) \
|
||||
{ \
|
||||
const __m128i chunk1 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10))); \
|
||||
const __m128i chunk2 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20))); \
|
||||
const __m128i chunk3 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30))); \
|
||||
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10)), _mm_add_epi64(chunk3, _b1)); \
|
||||
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20)), _mm_add_epi64(chunk1, _b)); \
|
||||
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30)), _mm_add_epi64(chunk2, _a)); \
|
||||
_c = _mm_xor_si128(_mm_xor_si128(_c, chunk3), _mm_xor_si128(chunk1, chunk2)); \
|
||||
}
|
||||
|
||||
# define VARIANT2_SHUFFLE2(base_ptr, offset, _a, _b, _b1, hi, lo) \
|
||||
{ \
|
||||
const __m128i chunk1 = _mm_xor_si128(_mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10))), _mm_set_epi64x(lo, hi)); \
|
||||
const __m128i chunk2 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20))); \
|
||||
hi ^= ((uint64_t*)((base_ptr) + ((offset) ^ 0x20)))[0]; \
|
||||
lo ^= ((uint64_t*)((base_ptr) + ((offset) ^ 0x20)))[1]; \
|
||||
const __m128i chunk3 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30))); \
|
||||
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10)), _mm_add_epi64(chunk3, _b1)); \
|
||||
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20)), _mm_add_epi64(chunk1, _b)); \
|
||||
_mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30)), _mm_add_epi64(chunk2, _a)); \
|
||||
}
|
||||
|
||||
|
||||
#ifndef NOINLINE
|
||||
#ifdef __GNUC__
|
||||
#define NOINLINE __attribute__ ((noinline))
|
||||
#elif _MSC_VER
|
||||
#define NOINLINE __declspec(noinline)
|
||||
#else
|
||||
#define NOINLINE
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include "variant4_random_math.h"
|
||||
|
||||
#define VARIANT4_RANDOM_MATH_INIT(part) \
|
||||
uint32_t r##part[9]; \
|
||||
struct V4_Instruction code##part[256]; \
|
||||
{ \
|
||||
r##part[0] = (uint32_t)(h##part[12]); \
|
||||
r##part[1] = (uint32_t)(h##part[12] >> 32); \
|
||||
r##part[2] = (uint32_t)(h##part[13]); \
|
||||
r##part[3] = (uint32_t)(h##part[13] >> 32); \
|
||||
} \
|
||||
v4_random_math_init(code##part, ctx[part]->height);
|
||||
|
||||
#define VARIANT4_RANDOM_MATH(part, al, ah, cl, bx0, bx1) \
|
||||
{ \
|
||||
cl ^= (r##part[0] + r##part[1]) | ((uint64_t)(r##part[2] + r##part[3]) << 32); \
|
||||
r##part[4] = (uint32_t)(al); \
|
||||
r##part[5] = (uint32_t)(ah); \
|
||||
r##part[6] = (uint32_t)(_mm_cvtsi128_si32(bx0)); \
|
||||
r##part[7] = (uint32_t)(_mm_cvtsi128_si32(bx1)); \
|
||||
r##part[8] = (uint32_t)(_mm_cvtsi128_si32(_mm_srli_si128(bx1, 8))); \
|
||||
v4_random_math(code##part, r##part); \
|
||||
}
|
||||
|
||||
#endif /* XMRIG_CRYPTONIGHT_MONERO_H */
|
143
algo/cryptonight/cryptonight_r_av1.c
Normal file
143
algo/cryptonight/cryptonight_r_av1.c
Normal file
|
@ -0,0 +1,143 @@
|
|||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
||||
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "crypto/c_keccak.h"
|
||||
#include "cryptonight.h"
|
||||
#include "cryptonight_aesni.h"
|
||||
#include "cryptonight_monero.h"
|
||||
|
||||
|
||||
void cryptonight_r_av1(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
|
||||
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
|
||||
|
||||
const uint8_t* l0 = ctx[0]->memory;
|
||||
uint64_t* h0 = (uint64_t*) ctx[0]->state;
|
||||
|
||||
VARIANT2_INIT(0);
|
||||
VARIANT2_SET_ROUNDING_MODE();
|
||||
VARIANT4_RANDOM_MATH_INIT(0);
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
__m128i bx1 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]);
|
||||
|
||||
uint64_t idx0 = al0;
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
|
||||
__m128i cx = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
|
||||
const __m128i ax0 = _mm_set_epi64x(ah0, al0);
|
||||
|
||||
cx = _mm_aesenc_si128(cx, ax0);
|
||||
|
||||
VARIANT4_SHUFFLE(l0, idx0 & 0x1FFFF0, ax0, bx0, bx1, cx);
|
||||
_mm_store_si128((__m128i *) &l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx));
|
||||
|
||||
idx0 = _mm_cvtsi128_si64(cx);
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
|
||||
|
||||
VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx0, bx1);
|
||||
al0 ^= r0[2] | ((uint64_t)(r0[3]) << 32);
|
||||
ah0 ^= r0[0] | ((uint64_t)(r0[1]) << 32);
|
||||
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
VARIANT4_SHUFFLE(l0, idx0 & 0x1FFFF0, ax0, bx0, bx1, cx);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0;
|
||||
((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0;
|
||||
|
||||
al0 ^= cl;
|
||||
ah0 ^= ch;
|
||||
idx0 = al0;
|
||||
|
||||
bx1 = bx0;
|
||||
bx0 = cx;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
|
||||
|
||||
keccakf(h0, 24);
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
}
|
||||
|
||||
|
||||
#ifndef XMRIG_NO_ASM
|
||||
void v4_compile_code(const struct V4_Instruction* code, int code_size, void* machine_code, enum Assembly ASM);
|
||||
|
||||
|
||||
void cryptonight_r_av1_asm_intel(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
if (ctx[0]->generated_code_height != ctx[0]->height) {
|
||||
struct V4_Instruction code[256];
|
||||
const int code_size = v4_random_math_init(code, ctx[0]->height);
|
||||
|
||||
v4_compile_code(code, code_size, (void*)(ctx[0]->generated_code), ASM_INTEL);
|
||||
ctx[0]->generated_code_height = ctx[0]->height;
|
||||
}
|
||||
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
|
||||
|
||||
ctx[0]->generated_code(ctx[0]);
|
||||
|
||||
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
|
||||
keccakf((uint64_t*) ctx[0]->state, 24);
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
}
|
||||
|
||||
|
||||
void cryptonight_r_av1_asm_bulldozer(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
if (ctx[0]->generated_code_height != ctx[0]->height) {
|
||||
struct V4_Instruction code[256];
|
||||
const int code_size = v4_random_math_init(code, ctx[0]->height);
|
||||
|
||||
v4_compile_code(code, code_size, (void*)(ctx[0]->generated_code), ASM_BULLDOZER);
|
||||
ctx[0]->generated_code_height = ctx[0]->height;
|
||||
}
|
||||
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
|
||||
|
||||
ctx[0]->generated_code(ctx[0]);
|
||||
|
||||
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
|
||||
keccakf((uint64_t*) ctx[0]->state, 24);
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
}
|
||||
#endif
|
202
algo/cryptonight/cryptonight_r_av2.c
Normal file
202
algo/cryptonight/cryptonight_r_av2.c
Normal file
|
@ -0,0 +1,202 @@
|
|||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
||||
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "crypto/c_keccak.h"
|
||||
#include "cryptonight.h"
|
||||
#include "cryptonight_aesni.h"
|
||||
#include "cryptonight_monero.h"
|
||||
|
||||
|
||||
void cryptonight_r_av2(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
keccak(input + size, size, ctx[1]->state, 200);
|
||||
|
||||
const uint8_t* l0 = ctx[0]->memory;
|
||||
const uint8_t* l1 = ctx[1]->memory;
|
||||
uint64_t* h0 = (uint64_t*) ctx[0]->state;
|
||||
uint64_t* h1 = (uint64_t*) ctx[1]->state;
|
||||
|
||||
VARIANT2_INIT(0);
|
||||
VARIANT2_INIT(1);
|
||||
VARIANT2_SET_ROUNDING_MODE();
|
||||
VARIANT4_RANDOM_MATH_INIT(0);
|
||||
VARIANT4_RANDOM_MATH_INIT(1);
|
||||
|
||||
cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
|
||||
cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1);
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t al1 = h1[0] ^ h1[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
uint64_t ah1 = h1[1] ^ h1[5];
|
||||
|
||||
__m128i bx00 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
__m128i bx01 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]);
|
||||
__m128i bx10 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
|
||||
__m128i bx11 = _mm_set_epi64x(h1[9] ^ h1[11], h1[8] ^ h1[10]);
|
||||
|
||||
uint64_t idx0 = al0;
|
||||
uint64_t idx1 = al1;
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
|
||||
__m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
|
||||
__m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & 0x1FFFF0]);
|
||||
|
||||
const __m128i ax0 = _mm_set_epi64x(ah0, al0);
|
||||
const __m128i ax1 = _mm_set_epi64x(ah1, al1);
|
||||
|
||||
cx0 = _mm_aesenc_si128(cx0, ax0);
|
||||
cx1 = _mm_aesenc_si128(cx1, ax1);
|
||||
|
||||
VARIANT4_SHUFFLE(l0, idx0 & 0x1FFFF0, ax0, bx00, bx01, cx0);
|
||||
_mm_store_si128((__m128i *) &l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx00, cx0));
|
||||
|
||||
VARIANT4_SHUFFLE(l1, idx1 & 0x1FFFF0, ax1, bx10, bx11, cx1);
|
||||
_mm_store_si128((__m128i *) &l1[idx1 & 0x1FFFF0], _mm_xor_si128(bx10, cx1));
|
||||
|
||||
idx0 = _mm_cvtsi128_si64(cx0);
|
||||
idx1 = _mm_cvtsi128_si64(cx1);
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
|
||||
|
||||
VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx00, bx01);
|
||||
al0 ^= r0[2] | ((uint64_t)(r0[3]) << 32);
|
||||
ah0 ^= r0[0] | ((uint64_t)(r0[1]) << 32);
|
||||
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
VARIANT4_SHUFFLE(l0, idx0 & 0x1FFFF0, ax0, bx00, bx01, cx0);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0;
|
||||
((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0;
|
||||
|
||||
al0 ^= cl;
|
||||
ah0 ^= ch;
|
||||
idx0 = al0;
|
||||
|
||||
cl = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[0];
|
||||
ch = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[1];
|
||||
|
||||
VARIANT4_RANDOM_MATH(1, al1, ah1, cl, bx10, bx11);
|
||||
al1 ^= r1[2] | ((uint64_t)(r1[3]) << 32);
|
||||
ah1 ^= r1[0] | ((uint64_t)(r1[1]) << 32);
|
||||
|
||||
lo = _umul128(idx1, cl, &hi);
|
||||
VARIANT4_SHUFFLE(l1, idx1 & 0x1FFFF0, ax1, bx10, bx11, cx1);
|
||||
|
||||
al1 += hi;
|
||||
ah1 += lo;
|
||||
|
||||
((uint64_t*)&l1[idx1 & 0x1FFFF0])[0] = al1;
|
||||
((uint64_t*)&l1[idx1 & 0x1FFFF0])[1] = ah1;
|
||||
|
||||
al1 ^= cl;
|
||||
ah1 ^= ch;
|
||||
idx1 = al1;
|
||||
|
||||
bx01 = bx00;
|
||||
bx11 = bx10;
|
||||
|
||||
bx00 = cx0;
|
||||
bx10 = cx1;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0);
|
||||
cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1);
|
||||
|
||||
keccakf(h0, 24);
|
||||
keccakf(h1, 24);
|
||||
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
|
||||
}
|
||||
|
||||
|
||||
#ifndef XMRIG_NO_ASM
|
||||
void v4_compile_code_double(const struct V4_Instruction* code, int code_size, void* machine_code, enum Assembly ASM);
|
||||
|
||||
|
||||
void cryptonight_r_av2_asm_intel(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
if (ctx[0]->generated_code_height != ctx[0]->height) {
|
||||
struct V4_Instruction code[256];
|
||||
const int code_size = v4_random_math_init(code, ctx[0]->height);
|
||||
v4_compile_code_double(code, code_size, (void*)(ctx[0]->generated_code_double), ASM_INTEL);
|
||||
ctx[0]->generated_code_height = ctx[0]->height;
|
||||
}
|
||||
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
keccak(input + size, size, ctx[1]->state, 200);
|
||||
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
|
||||
cn_explode_scratchpad((__m128i*) ctx[1]->state, (__m128i*) ctx[1]->memory);
|
||||
|
||||
ctx[0]->generated_code_double(ctx[0], ctx[1]);
|
||||
|
||||
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
|
||||
cn_implode_scratchpad((__m128i*) ctx[1]->memory, (__m128i*) ctx[1]->state);
|
||||
|
||||
keccakf((uint64_t *) ctx[0]->state, 24);
|
||||
keccakf((uint64_t *) ctx[1]->state, 24);
|
||||
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
|
||||
}
|
||||
|
||||
|
||||
void cryptonight_r_av2_asm_bulldozer(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
if (ctx[0]->generated_code_height != ctx[0]->height) {
|
||||
struct V4_Instruction code[256];
|
||||
const int code_size = v4_random_math_init(code, ctx[0]->height);
|
||||
v4_compile_code_double(code, code_size, (void*)(ctx[0]->generated_code_double), ASM_BULLDOZER);
|
||||
ctx[0]->generated_code_height = ctx[0]->height;
|
||||
}
|
||||
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
keccak(input + size, size, ctx[1]->state, 200);
|
||||
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
|
||||
cn_explode_scratchpad((__m128i*) ctx[1]->state, (__m128i*) ctx[1]->memory);
|
||||
|
||||
ctx[0]->generated_code_double(ctx[0], ctx[1]);
|
||||
|
||||
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
|
||||
cn_implode_scratchpad((__m128i*) ctx[1]->memory, (__m128i*) ctx[1]->state);
|
||||
|
||||
keccakf((uint64_t *) ctx[0]->state, 24);
|
||||
keccakf((uint64_t *) ctx[1]->state, 24);
|
||||
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
|
||||
}
|
||||
#endif
|
112
algo/cryptonight/cryptonight_r_av3.c
Normal file
112
algo/cryptonight/cryptonight_r_av3.c
Normal file
|
@ -0,0 +1,112 @@
|
|||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
||||
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "crypto/c_keccak.h"
|
||||
#include "cryptonight.h"
|
||||
#include "cryptonight_monero.h"
|
||||
#include "cryptonight_softaes.h"
|
||||
|
||||
|
||||
#ifndef XMRIG_NO_ASM
|
||||
void v4_soft_aes_compile_code(const struct V4_Instruction* code, int code_size, void* machine_code, enum Assembly ASM);
|
||||
#endif
|
||||
|
||||
|
||||
void cryptonight_r_av3(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
|
||||
|
||||
# ifndef XMRIG_NO_ASM
|
||||
if (ctx[0]->generated_code_height != ctx[0]->height) {
|
||||
struct V4_Instruction code[256];
|
||||
const int code_size = v4_random_math_init(code, ctx[0]->height);
|
||||
|
||||
v4_soft_aes_compile_code(code, code_size, (void*)(ctx[0]->generated_code), ASM_NONE);
|
||||
ctx[0]->generated_code_height = ctx[0]->height;
|
||||
}
|
||||
|
||||
ctx[0]->saes_table = (const uint32_t*)saes_table;
|
||||
ctx[0]->generated_code(ctx[0]);
|
||||
# else
|
||||
const uint8_t* l0 = ctx[0]->memory;
|
||||
uint64_t* h0 = (uint64_t*) ctx[0]->state;
|
||||
|
||||
VARIANT2_INIT(0);
|
||||
VARIANT2_SET_ROUNDING_MODE();
|
||||
VARIANT4_RANDOM_MATH_INIT(0);
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
__m128i bx1 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]);
|
||||
|
||||
uint64_t idx0 = al0;
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
|
||||
__m128i cx = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
|
||||
const __m128i ax0 = _mm_set_epi64x(ah0, al0);
|
||||
|
||||
cx = soft_aesenc(cx, ax0);
|
||||
|
||||
VARIANT4_SHUFFLE(l0, idx0 & 0x1FFFF0, ax0, bx0, bx1, cx);
|
||||
_mm_store_si128((__m128i *) &l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx0, cx));
|
||||
|
||||
idx0 = _mm_cvtsi128_si64(cx);
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
|
||||
|
||||
VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx0, bx1);
|
||||
al0 ^= r0[2] | ((uint64_t)(r0[3]) << 32);
|
||||
ah0 ^= r0[0] | ((uint64_t)(r0[1]) << 32);
|
||||
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
VARIANT4_SHUFFLE(l0, idx0 & 0x1FFFF0, ax0, bx0, bx1, cx);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0;
|
||||
((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0;
|
||||
|
||||
al0 ^= cl;
|
||||
ah0 ^= ch;
|
||||
idx0 = al0;
|
||||
|
||||
bx1 = bx0;
|
||||
bx0 = cx;
|
||||
}
|
||||
# endif
|
||||
|
||||
cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
|
||||
keccakf((uint64_t *) ctx[0]->state, 24);
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
}
|
143
algo/cryptonight/cryptonight_r_av4.c
Normal file
143
algo/cryptonight/cryptonight_r_av4.c
Normal file
|
@ -0,0 +1,143 @@
|
|||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
||||
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "crypto/c_keccak.h"
|
||||
#include "cryptonight.h"
|
||||
#include "cryptonight_monero.h"
|
||||
#include "cryptonight_softaes.h"
|
||||
|
||||
|
||||
void cryptonight_r_av4(const uint8_t *restrict input, size_t size, uint8_t *restrict output, struct cryptonight_ctx **restrict ctx)
|
||||
{
|
||||
keccak(input, size, ctx[0]->state, 200);
|
||||
keccak(input + size, size, ctx[1]->state, 200);
|
||||
|
||||
const uint8_t* l0 = ctx[0]->memory;
|
||||
const uint8_t* l1 = ctx[1]->memory;
|
||||
uint64_t* h0 = (uint64_t*) ctx[0]->state;
|
||||
uint64_t* h1 = (uint64_t*) ctx[1]->state;
|
||||
|
||||
VARIANT2_INIT(0);
|
||||
VARIANT2_INIT(1);
|
||||
VARIANT2_SET_ROUNDING_MODE();
|
||||
VARIANT4_RANDOM_MATH_INIT(0);
|
||||
VARIANT4_RANDOM_MATH_INIT(1);
|
||||
|
||||
cn_explode_scratchpad((__m128i*) h0, (__m128i*) l0);
|
||||
cn_explode_scratchpad((__m128i*) h1, (__m128i*) l1);
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t al1 = h1[0] ^ h1[4];
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
uint64_t ah1 = h1[1] ^ h1[5];
|
||||
|
||||
__m128i bx00 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
__m128i bx01 = _mm_set_epi64x(h0[9] ^ h0[11], h0[8] ^ h0[10]);
|
||||
__m128i bx10 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
|
||||
__m128i bx11 = _mm_set_epi64x(h1[9] ^ h1[11], h1[8] ^ h1[10]);
|
||||
|
||||
uint64_t idx0 = al0;
|
||||
uint64_t idx1 = al1;
|
||||
|
||||
for (size_t i = 0; __builtin_expect(i < 0x80000, 1); i++) {
|
||||
__m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & 0x1FFFF0]);
|
||||
__m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & 0x1FFFF0]);
|
||||
|
||||
const __m128i ax0 = _mm_set_epi64x(ah0, al0);
|
||||
const __m128i ax1 = _mm_set_epi64x(ah1, al1);
|
||||
|
||||
cx0 = soft_aesenc(cx0, ax0);
|
||||
cx1 = soft_aesenc(cx1, ax1);
|
||||
|
||||
VARIANT4_SHUFFLE(l0, idx0 & 0x1FFFF0, ax0, bx00, bx01, cx0);
|
||||
_mm_store_si128((__m128i *) &l0[idx0 & 0x1FFFF0], _mm_xor_si128(bx00, cx0));
|
||||
|
||||
VARIANT4_SHUFFLE(l1, idx1 & 0x1FFFF0, ax1, bx10, bx11, cx1);
|
||||
_mm_store_si128((__m128i *) &l1[idx1 & 0x1FFFF0], _mm_xor_si128(bx10, cx1));
|
||||
|
||||
idx0 = _mm_cvtsi128_si64(cx0);
|
||||
idx1 = _mm_cvtsi128_si64(cx1);
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
cl = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[0];
|
||||
ch = ((uint64_t*) &l0[idx0 & 0x1FFFF0])[1];
|
||||
|
||||
VARIANT4_RANDOM_MATH(0, al0, ah0, cl, bx00, bx01);
|
||||
al0 ^= r0[2] | ((uint64_t)(r0[3]) << 32);
|
||||
ah0 ^= r0[0] | ((uint64_t)(r0[1]) << 32);
|
||||
|
||||
lo = _umul128(idx0, cl, &hi);
|
||||
VARIANT4_SHUFFLE(l0, idx0 & 0x1FFFF0, ax0, bx00, bx01, cx0);
|
||||
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
|
||||
((uint64_t*)&l0[idx0 & 0x1FFFF0])[0] = al0;
|
||||
((uint64_t*)&l0[idx0 & 0x1FFFF0])[1] = ah0;
|
||||
|
||||
al0 ^= cl;
|
||||
ah0 ^= ch;
|
||||
idx0 = al0;
|
||||
|
||||
cl = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[0];
|
||||
ch = ((uint64_t*) &l1[idx1 & 0x1FFFF0])[1];
|
||||
|
||||
VARIANT4_RANDOM_MATH(1, al1, ah1, cl, bx10, bx11);
|
||||
al1 ^= r1[2] | ((uint64_t)(r1[3]) << 32);
|
||||
ah1 ^= r1[0] | ((uint64_t)(r1[1]) << 32);
|
||||
|
||||
lo = _umul128(idx1, cl, &hi);
|
||||
VARIANT4_SHUFFLE(l1, idx1 & 0x1FFFF0, ax1, bx10, bx11, cx1);
|
||||
|
||||
al1 += hi;
|
||||
ah1 += lo;
|
||||
|
||||
((uint64_t*)&l1[idx1 & 0x1FFFF0])[0] = al1;
|
||||
((uint64_t*)&l1[idx1 & 0x1FFFF0])[1] = ah1;
|
||||
|
||||
al1 ^= cl;
|
||||
ah1 ^= ch;
|
||||
idx1 = al1;
|
||||
|
||||
bx01 = bx00;
|
||||
bx11 = bx10;
|
||||
|
||||
bx00 = cx0;
|
||||
bx10 = cx1;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad((__m128i*) l0, (__m128i*) h0);
|
||||
cn_implode_scratchpad((__m128i*) l1, (__m128i*) h1);
|
||||
|
||||
keccakf(h0, 24);
|
||||
keccakf(h1, 24);
|
||||
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
|
||||
}
|
|
@ -4,9 +4,9 @@
|
|||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017 fireice-uk <https://github.com/fireice-uk>
|
||||
* Copyright 2016-2017 XMRig <support@xmrig.com>
|
||||
*
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
@ -22,13 +22,15 @@
|
|||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef __CRYPTONIGHT_SOFTAES_H__
|
||||
#define __CRYPTONIGHT_SOFTAES_H__
|
||||
#ifndef XMRIG_CRYPTONIGHT_SOFTAES_H
|
||||
#define XMRIG_CRYPTONIGHT_SOFTAES_H
|
||||
|
||||
|
||||
#include <x86intrin.h>
|
||||
#include <stdint.h>
|
||||
|
||||
extern __m128i soft_aesenc(__m128i in, __m128i key);
|
||||
extern __m128i soft_aeskeygenassist(__m128i key, uint8_t rcon);
|
||||
|
||||
#include "crypto/soft_aes.h"
|
||||
|
||||
|
||||
// This will shift and xor tmp1 into itself as 4 32-bit vals such as
|
||||
|
@ -234,4 +236,20 @@ inline uint64_t _umul128(uint64_t multiplier, uint64_t multiplicand, uint64_t *p
|
|||
#endif
|
||||
|
||||
|
||||
#endif /* __CRYPTONIGHT_SOFTAES_H__ */
|
||||
static inline void cryptonight_monero_tweak(uint64_t* mem_out, __m128i tmp)
|
||||
{
|
||||
mem_out[0] = EXTRACT64(tmp);
|
||||
|
||||
tmp = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(tmp), _mm_castsi128_ps(tmp)));
|
||||
uint64_t vh = EXTRACT64(tmp);
|
||||
|
||||
uint8_t x = vh >> 24;
|
||||
static const uint16_t table = 0x7531;
|
||||
const uint8_t index = (((x >> 3) & 6) | (x & 1)) << 1;
|
||||
vh ^= ((table >> index) & 0x3) << 28;
|
||||
|
||||
mem_out[1] = vh;
|
||||
}
|
||||
|
||||
|
||||
#endif /* XMRIG_CRYPTONIGHT_SOFTAES_H */
|
||||
|
|
129
algo/cryptonight/cryptonight_test.h
Normal file
129
algo/cryptonight/cryptonight_test.h
Normal file
|
@ -0,0 +1,129 @@
|
|||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
||||
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef XMRIG_CRYPTONIGHT_TEST_H
|
||||
#define XMRIG_CRYPTONIGHT_TEST_H
|
||||
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
|
||||
const static uint8_t test_input[152] = {
|
||||
0x03, 0x05, 0xA0, 0xDB, 0xD6, 0xBF, 0x05, 0xCF, 0x16, 0xE5, 0x03, 0xF3, 0xA6, 0x6F, 0x78, 0x00,
|
||||
0x7C, 0xBF, 0x34, 0x14, 0x43, 0x32, 0xEC, 0xBF, 0xC2, 0x2E, 0xD9, 0x5C, 0x87, 0x00, 0x38, 0x3B,
|
||||
0x30, 0x9A, 0xCE, 0x19, 0x23, 0xA0, 0x96, 0x4B, 0x00, 0x00, 0x00, 0x08, 0xBA, 0x93, 0x9A, 0x62,
|
||||
0x72, 0x4C, 0x0D, 0x75, 0x81, 0xFC, 0xE5, 0x76, 0x1E, 0x9D, 0x8A, 0x0E, 0x6A, 0x1C, 0x3F, 0x92,
|
||||
0x4F, 0xDD, 0x84, 0x93, 0xD1, 0x11, 0x56, 0x49, 0xC0, 0x5E, 0xB6, 0x01,
|
||||
0x01, 0x00, 0xFB, 0x8E, 0x8A, 0xC8, 0x05, 0x89, 0x93, 0x23, 0x37, 0x1B, 0xB7, 0x90, 0xDB, 0x19,
|
||||
0x21, 0x8A, 0xFD, 0x8D, 0xB8, 0xE3, 0x75, 0x5D, 0x8B, 0x90, 0xF3, 0x9B, 0x3D, 0x55, 0x06, 0xA9,
|
||||
0xAB, 0xCE, 0x4F, 0xA9, 0x12, 0x24, 0x45, 0x00, 0x00, 0x00, 0x00, 0xEE, 0x81, 0x46, 0xD4, 0x9F,
|
||||
0xA9, 0x3E, 0xE7, 0x24, 0xDE, 0xB5, 0x7D, 0x12, 0xCB, 0xC6, 0xC6, 0xF3, 0xB9, 0x24, 0xD9, 0x46,
|
||||
0x12, 0x7C, 0x7A, 0x97, 0x41, 0x8F, 0x93, 0x48, 0x82, 0x8F, 0x0F, 0x02
|
||||
};
|
||||
|
||||
|
||||
const static uint8_t test_output_v0[64] = {
|
||||
0x1A, 0x3F, 0xFB, 0xEE, 0x90, 0x9B, 0x42, 0x0D, 0x91, 0xF7, 0xBE, 0x6E, 0x5F, 0xB5, 0x6D, 0xB7,
|
||||
0x1B, 0x31, 0x10, 0xD8, 0x86, 0x01, 0x1E, 0x87, 0x7E, 0xE5, 0x78, 0x6A, 0xFD, 0x08, 0x01, 0x00,
|
||||
0x1B, 0x60, 0x6A, 0x3F, 0x4A, 0x07, 0xD6, 0x48, 0x9A, 0x1B, 0xCD, 0x07, 0x69, 0x7B, 0xD1, 0x66,
|
||||
0x96, 0xB6, 0x1C, 0x8A, 0xE9, 0x82, 0xF6, 0x1A, 0x90, 0x16, 0x0F, 0x4E, 0x52, 0x82, 0x8A, 0x7F
|
||||
};
|
||||
|
||||
|
||||
// Cryptonight variant 1 (Monero v7)
|
||||
const static uint8_t test_output_v1[64] = {
|
||||
0xF2, 0x2D, 0x3D, 0x62, 0x03, 0xD2, 0xA0, 0x8B, 0x41, 0xD9, 0x02, 0x72, 0x78, 0xD8, 0xBC, 0xC9,
|
||||
0x83, 0xAC, 0xAD, 0xA9, 0xB6, 0x8E, 0x52, 0xE3, 0xC6, 0x89, 0x69, 0x2A, 0x50, 0xE9, 0x21, 0xD9,
|
||||
0xC9, 0xFA, 0xE8, 0x42, 0x5D, 0x86, 0x88, 0xDC, 0x23, 0x6B, 0xCD, 0xBC, 0x42, 0xFD, 0xB4, 0x2D,
|
||||
0x37, 0x6C, 0x6E, 0xC1, 0x90, 0x50, 0x1A, 0xA8, 0x4B, 0x04, 0xA4, 0xB4, 0xCF, 0x1E, 0xE1, 0x22
|
||||
};
|
||||
|
||||
|
||||
// Cryptonight variant 2 (Monero v8)
|
||||
const static uint8_t test_output_v2[64] = {
|
||||
0x97, 0x37, 0x82, 0x82, 0xCF, 0x10, 0xE7, 0xAD, 0x03, 0x3F, 0x7B, 0x80, 0x74, 0xC4, 0x0E, 0x14,
|
||||
0xD0, 0x6E, 0x7F, 0x60, 0x9D, 0xDD, 0xDA, 0x78, 0x76, 0x80, 0xB5, 0x8C, 0x05, 0xF4, 0x3D, 0x21,
|
||||
0x87, 0x1F, 0xCD, 0x68, 0x23, 0xF6, 0xA8, 0x79, 0xBB, 0x3F, 0x33, 0x95, 0x1C, 0x8E, 0x8E, 0x89,
|
||||
0x1D, 0x40, 0x43, 0x88, 0x0B, 0x02, 0xDF, 0xA1, 0xBB, 0x3B, 0xE4, 0x98, 0xB5, 0x0E, 0x75, 0x78
|
||||
};
|
||||
|
||||
|
||||
struct cn_r_test_input_data
|
||||
{
|
||||
uint64_t height;
|
||||
size_t size;
|
||||
uint8_t data[64];
|
||||
};
|
||||
|
||||
|
||||
const static struct cn_r_test_input_data cn_r_test_input[] = {
|
||||
{ 1806260, 44, { 0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x74, 0x65, 0x73, 0x74, 0x20, 0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x74, 0x65, 0x73, 0x74, 0x20, 0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x74, 0x65, 0x73, 0x74 } },
|
||||
{ 1806261, 50, { 0x4c, 0x6f, 0x72, 0x65, 0x6d, 0x20, 0x69, 0x70, 0x73, 0x75, 0x6d, 0x20, 0x64, 0x6f, 0x6c, 0x6f, 0x72, 0x20, 0x73, 0x69, 0x74, 0x20, 0x61, 0x6d, 0x65, 0x74, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x65, 0x63, 0x74, 0x65, 0x74, 0x75, 0x72, 0x20, 0x61, 0x64, 0x69, 0x70, 0x69, 0x73, 0x63, 0x69, 0x6e, 0x67 } },
|
||||
{ 1806262, 48, { 0x65, 0x6c, 0x69, 0x74, 0x2c, 0x20, 0x73, 0x65, 0x64, 0x20, 0x64, 0x6f, 0x20, 0x65, 0x69, 0x75, 0x73, 0x6d, 0x6f, 0x64, 0x20, 0x74, 0x65, 0x6d, 0x70, 0x6f, 0x72, 0x20, 0x69, 0x6e, 0x63, 0x69, 0x64, 0x69, 0x64, 0x75, 0x6e, 0x74, 0x20, 0x75, 0x74, 0x20, 0x6c, 0x61, 0x62, 0x6f, 0x72, 0x65 } },
|
||||
{ 1806263, 48, { 0x65, 0x74, 0x20, 0x64, 0x6f, 0x6c, 0x6f, 0x72, 0x65, 0x20, 0x6d, 0x61, 0x67, 0x6e, 0x61, 0x20, 0x61, 0x6c, 0x69, 0x71, 0x75, 0x61, 0x2e, 0x20, 0x55, 0x74, 0x20, 0x65, 0x6e, 0x69, 0x6d, 0x20, 0x61, 0x64, 0x20, 0x6d, 0x69, 0x6e, 0x69, 0x6d, 0x20, 0x76, 0x65, 0x6e, 0x69, 0x61, 0x6d, 0x2c } },
|
||||
{ 1806264, 46, { 0x71, 0x75, 0x69, 0x73, 0x20, 0x6e, 0x6f, 0x73, 0x74, 0x72, 0x75, 0x64, 0x20, 0x65, 0x78, 0x65, 0x72, 0x63, 0x69, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x20, 0x75, 0x6c, 0x6c, 0x61, 0x6d, 0x63, 0x6f, 0x20, 0x6c, 0x61, 0x62, 0x6f, 0x72, 0x69, 0x73, 0x20, 0x6e, 0x69, 0x73, 0x69 } },
|
||||
{ 1806265, 45, { 0x75, 0x74, 0x20, 0x61, 0x6c, 0x69, 0x71, 0x75, 0x69, 0x70, 0x20, 0x65, 0x78, 0x20, 0x65, 0x61, 0x20, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x64, 0x6f, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x65, 0x71, 0x75, 0x61, 0x74, 0x2e, 0x20, 0x44, 0x75, 0x69, 0x73, 0x20, 0x61, 0x75, 0x74, 0x65 } },
|
||||
{ 1806266, 47, { 0x69, 0x72, 0x75, 0x72, 0x65, 0x20, 0x64, 0x6f, 0x6c, 0x6f, 0x72, 0x20, 0x69, 0x6e, 0x20, 0x72, 0x65, 0x70, 0x72, 0x65, 0x68, 0x65, 0x6e, 0x64, 0x65, 0x72, 0x69, 0x74, 0x20, 0x69, 0x6e, 0x20, 0x76, 0x6f, 0x6c, 0x75, 0x70, 0x74, 0x61, 0x74, 0x65, 0x20, 0x76, 0x65, 0x6c, 0x69, 0x74 } },
|
||||
{ 1806267, 44, { 0x65, 0x73, 0x73, 0x65, 0x20, 0x63, 0x69, 0x6c, 0x6c, 0x75, 0x6d, 0x20, 0x64, 0x6f, 0x6c, 0x6f, 0x72, 0x65, 0x20, 0x65, 0x75, 0x20, 0x66, 0x75, 0x67, 0x69, 0x61, 0x74, 0x20, 0x6e, 0x75, 0x6c, 0x6c, 0x61, 0x20, 0x70, 0x61, 0x72, 0x69, 0x61, 0x74, 0x75, 0x72, 0x2e } },
|
||||
{ 1806268, 47, { 0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x65, 0x75, 0x72, 0x20, 0x73, 0x69, 0x6e, 0x74, 0x20, 0x6f, 0x63, 0x63, 0x61, 0x65, 0x63, 0x61, 0x74, 0x20, 0x63, 0x75, 0x70, 0x69, 0x64, 0x61, 0x74, 0x61, 0x74, 0x20, 0x6e, 0x6f, 0x6e, 0x20, 0x70, 0x72, 0x6f, 0x69, 0x64, 0x65, 0x6e, 0x74, 0x2c } },
|
||||
{ 1806269, 62, { 0x73, 0x75, 0x6e, 0x74, 0x20, 0x69, 0x6e, 0x20, 0x63, 0x75, 0x6c, 0x70, 0x61, 0x20, 0x71, 0x75, 0x69, 0x20, 0x6f, 0x66, 0x66, 0x69, 0x63, 0x69, 0x61, 0x20, 0x64, 0x65, 0x73, 0x65, 0x72, 0x75, 0x6e, 0x74, 0x20, 0x6d, 0x6f, 0x6c, 0x6c, 0x69, 0x74, 0x20, 0x61, 0x6e, 0x69, 0x6d, 0x20, 0x69, 0x64, 0x20, 0x65, 0x73, 0x74, 0x20, 0x6c, 0x61, 0x62, 0x6f, 0x72, 0x75, 0x6d, 0x2e } },
|
||||
};
|
||||
|
||||
|
||||
// "cn/r"
|
||||
const static uint8_t test_output_r[] = {
|
||||
0xf7, 0x59, 0x58, 0x8a, 0xd5, 0x7e, 0x75, 0x84, 0x67, 0x29, 0x54, 0x43, 0xa9, 0xbd, 0x71, 0x49, 0x0a, 0xbf, 0xf8, 0xe9, 0xda, 0xd1, 0xb9, 0x5b, 0x6b, 0xf2, 0xf5, 0xd0, 0xd7, 0x83, 0x87, 0xbc,
|
||||
0x5b, 0xb8, 0x33, 0xde, 0xca, 0x2b, 0xdd, 0x72, 0x52, 0xa9, 0xcc, 0xd7, 0xb4, 0xce, 0x0b, 0x6a, 0x48, 0x54, 0x51, 0x57, 0x94, 0xb5, 0x6c, 0x20, 0x72, 0x62, 0xf7, 0xa5, 0xb9, 0xbd, 0xb5, 0x66,
|
||||
0x1e, 0xe6, 0x72, 0x8d, 0xa6, 0x0f, 0xbd, 0x8d, 0x7d, 0x55, 0xb2, 0xb1, 0xad, 0xe4, 0x87, 0xa3, 0xcf, 0x52, 0xa2, 0xc3, 0xac, 0x6f, 0x52, 0x0d, 0xb1, 0x2c, 0x27, 0xd8, 0x92, 0x1f, 0x6c, 0xab,
|
||||
0x69, 0x69, 0xfe, 0x2d, 0xdf, 0xb7, 0x58, 0x43, 0x8d, 0x48, 0x04, 0x9f, 0x30, 0x2f, 0xc2, 0x10, 0x8a, 0x4f, 0xcc, 0x93, 0xe3, 0x76, 0x69, 0x17, 0x0e, 0x6d, 0xb4, 0xb0, 0xb9, 0xb4, 0xc4, 0xcb,
|
||||
0x7f, 0x30, 0x48, 0xb4, 0xe9, 0x0d, 0x0c, 0xbe, 0x7a, 0x57, 0xc0, 0x39, 0x4f, 0x37, 0x33, 0x8a, 0x01, 0xfa, 0xe3, 0xad, 0xfd, 0xc0, 0xe5, 0x12, 0x6d, 0x86, 0x3a, 0x89, 0x5e, 0xb0, 0x4e, 0x02,
|
||||
0x1d, 0x29, 0x04, 0x43, 0xa4, 0xb5, 0x42, 0xaf, 0x04, 0xa8, 0x2f, 0x6b, 0x24, 0x94, 0xa6, 0xee, 0x7f, 0x20, 0xf2, 0x75, 0x4c, 0x58, 0xe0, 0x84, 0x90, 0x32, 0x48, 0x3a, 0x56, 0xe8, 0xe2, 0xef,
|
||||
0xc4, 0x3c, 0xc6, 0x56, 0x74, 0x36, 0xa8, 0x6a, 0xfb, 0xd6, 0xaa, 0x9e, 0xaa, 0x7c, 0x27, 0x6e, 0x98, 0x06, 0x83, 0x03, 0x34, 0xb6, 0x14, 0xb2, 0xbe, 0xe2, 0x3c, 0xc7, 0x66, 0x34, 0xf6, 0xfd,
|
||||
0x87, 0xbe, 0x24, 0x79, 0xc0, 0xc4, 0xe8, 0xed, 0xfd, 0xfa, 0xa5, 0x60, 0x3e, 0x93, 0xf4, 0x26, 0x5b, 0x3f, 0x82, 0x24, 0xc1, 0xc5, 0x94, 0x6f, 0xeb, 0x42, 0x48, 0x19, 0xd1, 0x89, 0x90, 0xa4,
|
||||
0xdd, 0x9d, 0x6a, 0x6d, 0x8e, 0x47, 0x46, 0x5c, 0xce, 0xac, 0x08, 0x77, 0xef, 0x88, 0x9b, 0x93, 0xe7, 0xeb, 0xa9, 0x79, 0x55, 0x7e, 0x39, 0x35, 0xd7, 0xf8, 0x6d, 0xce, 0x11, 0xb0, 0x70, 0xf3,
|
||||
0x75, 0xc6, 0xf2, 0xae, 0x49, 0xa2, 0x05, 0x21, 0xde, 0x97, 0x28, 0x5b, 0x43, 0x1e, 0x71, 0x71, 0x25, 0x84, 0x7f, 0xb8, 0x93, 0x5e, 0xd8, 0x4a, 0x61, 0xe7, 0xf8, 0xd3, 0x6a, 0x2c, 0x3d, 0x8e,
|
||||
};
|
||||
|
||||
|
||||
#ifndef XMRIG_NO_AEON
|
||||
const static uint8_t test_output_v0_lite[64] = {
|
||||
0x36, 0x95, 0xB4, 0xB5, 0x3B, 0xB0, 0x03, 0x58, 0xB0, 0xAD, 0x38, 0xDC, 0x16, 0x0F, 0xEB, 0x9E,
|
||||
0x00, 0x4E, 0xEC, 0xE0, 0x9B, 0x83, 0xA7, 0x2E, 0xF6, 0xBA, 0x98, 0x64, 0xD3, 0x51, 0x0C, 0x88,
|
||||
0x28, 0xA2, 0x2B, 0xAD, 0x3F, 0x93, 0xD1, 0x40, 0x8F, 0xCA, 0x47, 0x2E, 0xB5, 0xAD, 0x1C, 0xBE,
|
||||
0x75, 0xF2, 0x1D, 0x05, 0x3C, 0x8C, 0xE5, 0xB3, 0xAF, 0x10, 0x5A, 0x57, 0x71, 0x3E, 0x21, 0xDD
|
||||
};
|
||||
|
||||
|
||||
// AEON v7
|
||||
const static uint8_t test_output_v1_lite[64] = {
|
||||
0x6D, 0x8C, 0xDC, 0x44, 0x4E, 0x9B, 0xBB, 0xFD, 0x68, 0xFC, 0x43, 0xFC, 0xD4, 0x85, 0x5B, 0x22,
|
||||
0x8C, 0x8A, 0x1B, 0xD9, 0x1D, 0x9D, 0x00, 0x28, 0x5B, 0xEC, 0x02, 0xB7, 0xCA, 0x2D, 0x67, 0x41,
|
||||
0x87, 0xC4, 0xE5, 0x70, 0x65, 0x3E, 0xB4, 0xC2, 0xB4, 0x2B, 0x7A, 0x0D, 0x54, 0x65, 0x59, 0x45,
|
||||
0x2D, 0xFA, 0xB5, 0x73, 0xB8, 0x2E, 0xC5, 0x2F, 0x15, 0x2B, 0x7F, 0xF9, 0x8E, 0x79, 0x44, 0x6F
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* XMRIG_CRYPTONIGHT_TEST_H */
|
449
algo/cryptonight/variant4_random_math.h
Normal file
449
algo/cryptonight/variant4_random_math.h
Normal file
|
@ -0,0 +1,449 @@
|
|||
#ifndef VARIANT4_RANDOM_MATH_H
|
||||
#define VARIANT4_RANDOM_MATH_H
|
||||
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
|
||||
#include "crypto/c_blake256.h"
|
||||
|
||||
|
||||
enum V4_Settings
|
||||
{
|
||||
// Generate code with minimal theoretical latency = 45 cycles, which is equivalent to 15 multiplications
|
||||
TOTAL_LATENCY = 15 * 3,
|
||||
|
||||
// Always generate at least 60 instructions
|
||||
NUM_INSTRUCTIONS_MIN = 60,
|
||||
|
||||
// Never generate more than 70 instructions (final RET instruction doesn't count here)
|
||||
NUM_INSTRUCTIONS_MAX = 70,
|
||||
|
||||
// Available ALUs for MUL
|
||||
// Modern CPUs typically have only 1 ALU which can do multiplications
|
||||
ALU_COUNT_MUL = 1,
|
||||
|
||||
// Total available ALUs
|
||||
// Modern CPUs have 4 ALUs, but we use only 3 because random math executes together with other main loop code
|
||||
ALU_COUNT = 3,
|
||||
};
|
||||
|
||||
enum V4_InstructionList
|
||||
{
|
||||
MUL, // a*b
|
||||
ADD, // a+b + C, C is an unsigned 32-bit constant
|
||||
SUB, // a-b
|
||||
ROR, // rotate right "a" by "b & 31" bits
|
||||
ROL, // rotate left "a" by "b & 31" bits
|
||||
XOR, // a^b
|
||||
RET, // finish execution
|
||||
V4_INSTRUCTION_COUNT = RET,
|
||||
};
|
||||
|
||||
// V4_InstructionDefinition is used to generate code from random data
|
||||
// Every random sequence of bytes is a valid code
|
||||
//
|
||||
// There are 9 registers in total:
|
||||
// - 4 variable registers
|
||||
// - 5 constant registers initialized from loop variables
|
||||
// This is why dst_index is 2 bits
|
||||
enum V4_InstructionDefinition
|
||||
{
|
||||
V4_OPCODE_BITS = 3,
|
||||
V4_DST_INDEX_BITS = 2,
|
||||
V4_SRC_INDEX_BITS = 3,
|
||||
};
|
||||
|
||||
struct V4_Instruction
|
||||
{
|
||||
uint8_t opcode;
|
||||
uint8_t dst_index;
|
||||
uint8_t src_index;
|
||||
uint32_t C;
|
||||
};
|
||||
|
||||
#ifndef FORCEINLINE
|
||||
#ifdef __GNUC__
|
||||
#define FORCEINLINE __attribute__((always_inline)) inline
|
||||
#elif _MSC_VER
|
||||
#define FORCEINLINE __forceinline
|
||||
#else
|
||||
#define FORCEINLINE inline
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef UNREACHABLE_CODE
|
||||
#ifdef __GNUC__
|
||||
#define UNREACHABLE_CODE __builtin_unreachable()
|
||||
#elif _MSC_VER
|
||||
#define UNREACHABLE_CODE __assume(false)
|
||||
#else
|
||||
#define UNREACHABLE_CODE
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define SWAP32LE(x) x
|
||||
#define SWAP64LE(x) x
|
||||
#define hash_extra_blake(data, length, hash) blake256_hash((uint8_t*)(hash), (uint8_t*)(data), (length))
|
||||
|
||||
// Random math interpreter's loop is fully unrolled and inlined to achieve 100% branch prediction on CPU:
|
||||
// every switch-case will point to the same destination on every iteration of Cryptonight main loop
|
||||
//
|
||||
// This is about as fast as it can get without using low-level machine code generation
|
||||
//template<typename v4_reg>
|
||||
static void v4_random_math(const struct V4_Instruction* code, uint32_t r[9])
|
||||
{
|
||||
#define REG_BITS 32
|
||||
#define V4_EXEC(i) \
|
||||
{ \
|
||||
const struct V4_Instruction* op = code + i; \
|
||||
const uint32_t src = r[op->src_index]; \
|
||||
uint32_t *dst = r + op->dst_index; \
|
||||
switch (op->opcode) \
|
||||
{ \
|
||||
case MUL: \
|
||||
*dst *= src; \
|
||||
break; \
|
||||
case ADD: \
|
||||
*dst += src + op->C; \
|
||||
break; \
|
||||
case SUB: \
|
||||
*dst -= src; \
|
||||
break; \
|
||||
case ROR: \
|
||||
{ \
|
||||
const uint32_t shift = src % REG_BITS; \
|
||||
*dst = (*dst >> shift) | (*dst << ((REG_BITS - shift) % REG_BITS)); \
|
||||
} \
|
||||
break; \
|
||||
case ROL: \
|
||||
{ \
|
||||
const uint32_t shift = src % REG_BITS; \
|
||||
*dst = (*dst << shift) | (*dst >> ((REG_BITS - shift) % REG_BITS)); \
|
||||
} \
|
||||
break; \
|
||||
case XOR: \
|
||||
*dst ^= src; \
|
||||
break; \
|
||||
case RET: \
|
||||
return; \
|
||||
default: \
|
||||
UNREACHABLE_CODE; \
|
||||
break; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define V4_EXEC_10(j) \
|
||||
V4_EXEC(j + 0) \
|
||||
V4_EXEC(j + 1) \
|
||||
V4_EXEC(j + 2) \
|
||||
V4_EXEC(j + 3) \
|
||||
V4_EXEC(j + 4) \
|
||||
V4_EXEC(j + 5) \
|
||||
V4_EXEC(j + 6) \
|
||||
V4_EXEC(j + 7) \
|
||||
V4_EXEC(j + 8) \
|
||||
V4_EXEC(j + 9)
|
||||
|
||||
// Generated program can have 60 + a few more (usually 2-3) instructions to achieve required latency
|
||||
// I've checked all block heights < 10,000,000 and here is the distribution of program sizes:
|
||||
//
|
||||
// 60 27960
|
||||
// 61 105054
|
||||
// 62 2452759
|
||||
// 63 5115997
|
||||
// 64 1022269
|
||||
// 65 1109635
|
||||
// 66 153145
|
||||
// 67 8550
|
||||
// 68 4529
|
||||
// 69 102
|
||||
|
||||
// Unroll 70 instructions here
|
||||
V4_EXEC_10(0); // instructions 0-9
|
||||
V4_EXEC_10(10); // instructions 10-19
|
||||
V4_EXEC_10(20); // instructions 20-29
|
||||
V4_EXEC_10(30); // instructions 30-39
|
||||
V4_EXEC_10(40); // instructions 40-49
|
||||
V4_EXEC_10(50); // instructions 50-59
|
||||
V4_EXEC_10(60); // instructions 60-69
|
||||
|
||||
#undef V4_EXEC_10
|
||||
#undef V4_EXEC
|
||||
#undef REG_BITS
|
||||
}
|
||||
|
||||
// If we don't have enough data available, generate more
|
||||
static FORCEINLINE void check_data(size_t* data_index, const size_t bytes_needed, int8_t* data, const size_t data_size)
|
||||
{
|
||||
if (*data_index + bytes_needed > data_size)
|
||||
{
|
||||
hash_extra_blake(data, data_size, (char*) data);
|
||||
*data_index = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Generates as many random math operations as possible with given latency and ALU restrictions
|
||||
// "code" array must have space for NUM_INSTRUCTIONS_MAX+1 instructions
|
||||
static int v4_random_math_init(struct V4_Instruction* code, const uint64_t height)
|
||||
{
|
||||
// MUL is 3 cycles, 3-way addition and rotations are 2 cycles, SUB/XOR are 1 cycle
|
||||
// These latencies match real-life instruction latencies for Intel CPUs starting from Sandy Bridge and up to Skylake/Coffee lake
|
||||
//
|
||||
// AMD Ryzen has the same latencies except 1-cycle ROR/ROL, so it'll be a bit faster than Intel Sandy Bridge and newer processors
|
||||
// Surprisingly, Intel Nehalem also has 1-cycle ROR/ROL, so it'll also be faster than Intel Sandy Bridge and newer processors
|
||||
// AMD Bulldozer has 4 cycles latency for MUL (slower than Intel) and 1 cycle for ROR/ROL (faster than Intel), so average performance will be the same
|
||||
// Source: https://www.agner.org/optimize/instruction_tables.pdf
|
||||
const int op_latency[V4_INSTRUCTION_COUNT] = { 3, 2, 1, 2, 2, 1 };
|
||||
|
||||
// Instruction latencies for theoretical ASIC implementation
|
||||
const int asic_op_latency[V4_INSTRUCTION_COUNT] = { 3, 1, 1, 1, 1, 1 };
|
||||
|
||||
// Available ALUs for each instruction
|
||||
const int op_ALUs[V4_INSTRUCTION_COUNT] = { ALU_COUNT_MUL, ALU_COUNT, ALU_COUNT, ALU_COUNT, ALU_COUNT, ALU_COUNT };
|
||||
|
||||
int8_t data[32];
|
||||
memset(data, 0, sizeof(data));
|
||||
uint64_t tmp = SWAP64LE(height);
|
||||
memcpy(data, &tmp, sizeof(uint64_t));
|
||||
data[20] = -38;
|
||||
|
||||
// Set data_index past the last byte in data
|
||||
// to trigger full data update with blake hash
|
||||
// before we start using it
|
||||
size_t data_index = sizeof(data);
|
||||
|
||||
int code_size;
|
||||
|
||||
// There is a small chance (1.8%) that register R8 won't be used in the generated program
|
||||
// So we keep track of it and try again if it's not used
|
||||
bool r8_used;
|
||||
do {
|
||||
int latency[9];
|
||||
int asic_latency[9];
|
||||
|
||||
// Tracks previous instruction and value of the source operand for registers R0-R3 throughout code execution
|
||||
// byte 0: current value of the destination register
|
||||
// byte 1: instruction opcode
|
||||
// byte 2: current value of the source register
|
||||
//
|
||||
// Registers R4-R8 are constant and are treated as having the same value because when we do
|
||||
// the same operation twice with two constant source registers, it can be optimized into a single operation
|
||||
uint32_t inst_data[9] = { 0, 1, 2, 3, 0xFFFFFF, 0xFFFFFF, 0xFFFFFF, 0xFFFFFF, 0xFFFFFF };
|
||||
|
||||
bool alu_busy[TOTAL_LATENCY + 1][ALU_COUNT];
|
||||
bool is_rotation[V4_INSTRUCTION_COUNT];
|
||||
bool rotated[4];
|
||||
int rotate_count = 0;
|
||||
|
||||
memset(latency, 0, sizeof(latency));
|
||||
memset(asic_latency, 0, sizeof(asic_latency));
|
||||
memset(alu_busy, 0, sizeof(alu_busy));
|
||||
memset(is_rotation, 0, sizeof(is_rotation));
|
||||
memset(rotated, 0, sizeof(rotated));
|
||||
is_rotation[ROR] = true;
|
||||
is_rotation[ROL] = true;
|
||||
|
||||
int num_retries = 0;
|
||||
code_size = 0;
|
||||
|
||||
int total_iterations = 0;
|
||||
r8_used = false;
|
||||
|
||||
// Generate random code to achieve minimal required latency for our abstract CPU
|
||||
// Try to get this latency for all 4 registers
|
||||
while (((latency[0] < TOTAL_LATENCY) || (latency[1] < TOTAL_LATENCY) || (latency[2] < TOTAL_LATENCY) || (latency[3] < TOTAL_LATENCY)) && (num_retries < 64))
|
||||
{
|
||||
// Fail-safe to guarantee loop termination
|
||||
++total_iterations;
|
||||
if (total_iterations > 256)
|
||||
break;
|
||||
|
||||
check_data(&data_index, 1, data, sizeof(data));
|
||||
|
||||
const uint8_t c = ((uint8_t*)data)[data_index++];
|
||||
|
||||
// MUL = opcodes 0-2
|
||||
// ADD = opcode 3
|
||||
// SUB = opcode 4
|
||||
// ROR/ROL = opcode 5, shift direction is selected randomly
|
||||
// XOR = opcodes 6-7
|
||||
uint8_t opcode = c & ((1 << V4_OPCODE_BITS) - 1);
|
||||
if (opcode == 5)
|
||||
{
|
||||
check_data(&data_index, 1, data, sizeof(data));
|
||||
opcode = (data[data_index++] >= 0) ? ROR : ROL;
|
||||
}
|
||||
else if (opcode >= 6)
|
||||
{
|
||||
opcode = XOR;
|
||||
}
|
||||
else
|
||||
{
|
||||
opcode = (opcode <= 2) ? MUL : (opcode - 2);
|
||||
}
|
||||
|
||||
uint8_t dst_index = (c >> V4_OPCODE_BITS) & ((1 << V4_DST_INDEX_BITS) - 1);
|
||||
uint8_t src_index = (c >> (V4_OPCODE_BITS + V4_DST_INDEX_BITS)) & ((1 << V4_SRC_INDEX_BITS) - 1);
|
||||
|
||||
const int a = dst_index;
|
||||
int b = src_index;
|
||||
|
||||
// Don't do ADD/SUB/XOR with the same register
|
||||
if (((opcode == ADD) || (opcode == SUB) || (opcode == XOR)) && (a == b))
|
||||
{
|
||||
// a is always < 4, so we don't need to check bounds here
|
||||
b = 8;
|
||||
src_index = b;
|
||||
}
|
||||
|
||||
// Don't do rotation with the same destination twice because it's equal to a single rotation
|
||||
if (is_rotation[opcode] && rotated[a])
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Don't do the same instruction (except MUL) with the same source value twice because all other cases can be optimized:
|
||||
// 2xADD(a, b, C) = ADD(a, b*2, C1+C2), same for SUB and rotations
|
||||
// 2xXOR(a, b) = NOP
|
||||
if ((opcode != MUL) && ((inst_data[a] & 0xFFFF00) == (opcode << 8) + ((inst_data[b] & 255) << 16)))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Find which ALU is available (and when) for this instruction
|
||||
int next_latency = (latency[a] > latency[b]) ? latency[a] : latency[b];
|
||||
int alu_index = -1;
|
||||
while (next_latency < TOTAL_LATENCY)
|
||||
{
|
||||
for (int i = op_ALUs[opcode] - 1; i >= 0; --i)
|
||||
{
|
||||
if (!alu_busy[next_latency][i])
|
||||
{
|
||||
// ADD is implemented as two 1-cycle instructions on a real CPU, so do an additional availability check
|
||||
if ((opcode == ADD) && alu_busy[next_latency + 1][i])
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Rotation can only start when previous rotation is finished, so do an additional availability check
|
||||
if (is_rotation[opcode] && (next_latency < rotate_count * op_latency[opcode]))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
alu_index = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (alu_index >= 0)
|
||||
{
|
||||
break;
|
||||
}
|
||||
++next_latency;
|
||||
}
|
||||
|
||||
// Don't generate instructions that leave some register unchanged for more than 7 cycles
|
||||
if (next_latency > latency[a] + 7)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
next_latency += op_latency[opcode];
|
||||
|
||||
if (next_latency <= TOTAL_LATENCY)
|
||||
{
|
||||
if (is_rotation[opcode])
|
||||
{
|
||||
++rotate_count;
|
||||
}
|
||||
|
||||
// Mark ALU as busy only for the first cycle when it starts executing the instruction because ALUs are fully pipelined
|
||||
alu_busy[next_latency - op_latency[opcode]][alu_index] = true;
|
||||
latency[a] = next_latency;
|
||||
|
||||
// ASIC is supposed to have enough ALUs to run as many independent instructions per cycle as possible, so latency calculation for ASIC is simple
|
||||
asic_latency[a] = ((asic_latency[a] > asic_latency[b]) ? asic_latency[a] : asic_latency[b]) + asic_op_latency[opcode];
|
||||
|
||||
rotated[a] = is_rotation[opcode];
|
||||
|
||||
inst_data[a] = code_size + (opcode << 8) + ((inst_data[b] & 255) << 16);
|
||||
|
||||
code[code_size].opcode = opcode;
|
||||
code[code_size].dst_index = dst_index;
|
||||
code[code_size].src_index = src_index;
|
||||
code[code_size].C = 0;
|
||||
|
||||
if (src_index == 8)
|
||||
{
|
||||
r8_used = true;
|
||||
}
|
||||
|
||||
if (opcode == ADD)
|
||||
{
|
||||
// ADD instruction is implemented as two 1-cycle instructions on a real CPU, so mark ALU as busy for the next cycle too
|
||||
alu_busy[next_latency - op_latency[opcode] + 1][alu_index] = true;
|
||||
|
||||
// ADD instruction requires 4 more random bytes for 32-bit constant "C" in "a = a + b + C"
|
||||
check_data(&data_index, sizeof(uint32_t), data, sizeof(data));
|
||||
uint32_t t;
|
||||
memcpy(&t, data + data_index, sizeof(uint32_t));
|
||||
code[code_size].C = SWAP32LE(t);
|
||||
data_index += sizeof(uint32_t);
|
||||
}
|
||||
|
||||
++code_size;
|
||||
if (code_size >= NUM_INSTRUCTIONS_MIN)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
++num_retries;
|
||||
}
|
||||
}
|
||||
|
||||
// ASIC has more execution resources and can extract as much parallelism from the code as possible
|
||||
// We need to add a few more MUL and ROR instructions to achieve minimal required latency for ASIC
|
||||
// Get this latency for at least 1 of the 4 registers
|
||||
const int prev_code_size = code_size;
|
||||
while ((code_size < NUM_INSTRUCTIONS_MAX) && (asic_latency[0] < TOTAL_LATENCY) && (asic_latency[1] < TOTAL_LATENCY) && (asic_latency[2] < TOTAL_LATENCY) && (asic_latency[3] < TOTAL_LATENCY))
|
||||
{
|
||||
int min_idx = 0;
|
||||
int max_idx = 0;
|
||||
for (int i = 1; i < 4; ++i)
|
||||
{
|
||||
if (asic_latency[i] < asic_latency[min_idx]) min_idx = i;
|
||||
if (asic_latency[i] > asic_latency[max_idx]) max_idx = i;
|
||||
}
|
||||
|
||||
const uint8_t pattern[3] = { ROR, MUL, MUL };
|
||||
const uint8_t opcode = pattern[(code_size - prev_code_size) % 3];
|
||||
latency[min_idx] = latency[max_idx] + op_latency[opcode];
|
||||
asic_latency[min_idx] = asic_latency[max_idx] + asic_op_latency[opcode];
|
||||
|
||||
code[code_size].opcode = opcode;
|
||||
code[code_size].dst_index = min_idx;
|
||||
code[code_size].src_index = max_idx;
|
||||
code[code_size].C = 0;
|
||||
++code_size;
|
||||
}
|
||||
|
||||
// There is ~98.15% chance that loop condition is false, so this loop will execute only 1 iteration most of the time
|
||||
// It never does more than 4 iterations for all block heights < 10,000,000
|
||||
} while (!r8_used || (code_size < NUM_INSTRUCTIONS_MIN) || (code_size > NUM_INSTRUCTIONS_MAX));
|
||||
|
||||
// It's guaranteed that NUM_INSTRUCTIONS_MIN <= code_size <= NUM_INSTRUCTIONS_MAX here
|
||||
// Add final instruction to stop the interpreter
|
||||
code[code_size].opcode = RET;
|
||||
code[code_size].dst_index = 0;
|
||||
code[code_size].src_index = 0;
|
||||
code[code_size].C = 0;
|
||||
|
||||
return code_size;
|
||||
}
|
||||
|
||||
#endif
|
27
cmake/asm.cmake
Normal file
27
cmake/asm.cmake
Normal file
|
@ -0,0 +1,27 @@
|
|||
if (WITH_ASM AND NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8)
|
||||
set(XMRIG_ASM_LIBRARY "xmrig-asm")
|
||||
|
||||
enable_language(ASM)
|
||||
|
||||
if (WIN32 AND CMAKE_C_COMPILER_ID MATCHES GNU)
|
||||
set(XMRIG_ASM_FILES
|
||||
"crypto/asm/win64/cn_main_loop.S"
|
||||
"crypto/asm/CryptonightR_template.S"
|
||||
)
|
||||
else()
|
||||
set(XMRIG_ASM_FILES
|
||||
"crypto/asm/cn_main_loop.S"
|
||||
"crypto/asm/CryptonightR_template.S"
|
||||
)
|
||||
endif()
|
||||
|
||||
set_property(SOURCE ${XMRIG_ASM_FILES} PROPERTY C)
|
||||
|
||||
add_library(${XMRIG_ASM_LIBRARY} STATIC ${XMRIG_ASM_FILES})
|
||||
set(XMRIG_ASM_SOURCES "crypto/CryptonightR_gen.c")
|
||||
set_property(TARGET ${XMRIG_ASM_LIBRARY} PROPERTY LINKER_LANGUAGE C)
|
||||
else()
|
||||
set(XMRIG_ASM_SOURCES "")
|
||||
set(XMRIG_ASM_LIBRARY "")
|
||||
add_definitions(/DXMRIG_NO_ASM)
|
||||
endif()
|
15
cpu.c
15
cpu.c
|
@ -4,8 +4,9 @@
|
|||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2016-2017 XMRig <support@xmrig.com>
|
||||
*
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
@ -31,6 +32,7 @@
|
|||
#endif
|
||||
|
||||
#include "cpu.h"
|
||||
#include "options.h"
|
||||
|
||||
|
||||
#ifndef BUILD_TEST
|
||||
|
@ -68,6 +70,15 @@ void cpu_init_common() {
|
|||
if (data.flags[CPU_FEATURE_BMI2]) {
|
||||
cpu_info.flags |= CPU_FLAG_BMI2;
|
||||
}
|
||||
|
||||
# ifndef XMRIG_NO_ASM
|
||||
if (data.vendor == VENDOR_AMD) {
|
||||
cpu_info.assembly = (data.ext_family >= 23) ? ASM_RYZEN : ASM_BULLDOZER;
|
||||
}
|
||||
else if (data.vendor == VENDOR_INTEL) {
|
||||
cpu_info.assembly = ASM_INTEL;
|
||||
}
|
||||
# endif
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
12
cpu.h
12
cpu.h
|
@ -4,8 +4,9 @@
|
|||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2016-2017 XMRig <support@xmrig.com>
|
||||
*
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
@ -21,8 +22,8 @@
|
|||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef __CPU_H__
|
||||
#define __CPU_H__
|
||||
#ifndef XMRIG_CPU_H
|
||||
#define XMRIG_CPU_H
|
||||
|
||||
#include <stdbool.h>
|
||||
|
||||
|
@ -34,6 +35,7 @@ struct cpu_info {
|
|||
int l2_cache;
|
||||
int l3_cache;
|
||||
char brand[64];
|
||||
int assembly;
|
||||
};
|
||||
|
||||
extern struct cpu_info cpu_info;
|
||||
|
@ -50,4 +52,4 @@ void cpu_init();
|
|||
int get_optimal_threads_count(int algo, bool double_hash, int max_cpu_usage);
|
||||
int affine_to_cpu_mask(int id, unsigned long mask);
|
||||
|
||||
#endif /* __CPU_H__ */
|
||||
#endif /* XMRIG_CPU_H */
|
||||
|
|
28
cpu_stub.c
28
cpu_stub.c
|
@ -24,7 +24,11 @@
|
|||
#include <cpuid.h>
|
||||
#include <string.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
|
||||
|
||||
#include "cpu.h"
|
||||
#include "options.h"
|
||||
|
||||
|
||||
#define VENDOR_ID (0)
|
||||
|
@ -53,7 +57,7 @@ static inline void cpuid(int level, int output[4]) {
|
|||
|
||||
|
||||
static void cpu_brand_string(char* s) {
|
||||
int cpu_info[4] = { 0 };
|
||||
int32_t cpu_info[4] = { 0 };
|
||||
cpuid(VENDOR_ID, cpu_info);
|
||||
|
||||
if (cpu_info[EAX_Reg] >= 4) {
|
||||
|
@ -68,7 +72,7 @@ static void cpu_brand_string(char* s) {
|
|||
|
||||
static bool has_aes_ni()
|
||||
{
|
||||
int cpu_info[4] = { 0 };
|
||||
int32_t cpu_info[4] = { 0 };
|
||||
cpuid(PROCESSOR_INFO, cpu_info);
|
||||
|
||||
return cpu_info[ECX_Reg] & bit_AES;
|
||||
|
@ -76,7 +80,7 @@ static bool has_aes_ni()
|
|||
|
||||
|
||||
static bool has_bmi2() {
|
||||
int cpu_info[4] = { 0 };
|
||||
int32_t cpu_info[4] = { 0 };
|
||||
cpuid(EXTENDED_FEATURES, cpu_info);
|
||||
|
||||
return cpu_info[EBX_Reg] & bit_BMI2;
|
||||
|
@ -93,6 +97,24 @@ void cpu_init_common() {
|
|||
|
||||
if (has_aes_ni()) {
|
||||
cpu_info.flags |= CPU_FLAG_AES;
|
||||
|
||||
# ifndef XMRIG_NO_ASM
|
||||
char vendor[13] = { 0 };
|
||||
int32_t data[4] = { 0 };
|
||||
|
||||
cpuid(0, data);
|
||||
|
||||
memcpy(vendor + 0, &data[1], 4);
|
||||
memcpy(vendor + 4, &data[3], 4);
|
||||
memcpy(vendor + 8, &data[2], 4);
|
||||
|
||||
if (memcmp(vendor, "GenuineIntel", 12) == 0) {
|
||||
cpu_info.assembly = ASM_INTEL;
|
||||
}
|
||||
else if (memcmp(vendor, "AuthenticAMD", 12) == 0) {
|
||||
cpu_info.assembly = ASM_RYZEN;
|
||||
}
|
||||
# endif
|
||||
}
|
||||
|
||||
if (has_bmi2()) {
|
||||
|
|
146
crypto/CryptonightR_gen.c
Normal file
146
crypto/CryptonightR_gen.c
Normal file
|
@ -0,0 +1,146 @@
|
|||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
||||
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "algo/cryptonight/cryptonight_monero.h"
|
||||
#include "crypto/asm/CryptonightR_template.h"
|
||||
#include "persistent_memory.h"
|
||||
|
||||
|
||||
static inline void add_code(uint8_t **p, void (*p1)(), void (*p2)())
|
||||
{
|
||||
const ptrdiff_t size = (const uint8_t*)(p2) - (const uint8_t*)(p1);
|
||||
if (size > 0) {
|
||||
memcpy(*p, (const void *) p1, size);
|
||||
*p += size;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static inline void add_random_math(uint8_t **p, const struct V4_Instruction* code, int code_size, const void_func* instructions, const void_func* instructions_mov, bool is_64_bit, enum Assembly ASM)
|
||||
{
|
||||
uint32_t prev_rot_src = (uint32_t)(-1);
|
||||
|
||||
for (int i = 0;; ++i) {
|
||||
const struct V4_Instruction inst = code[i];
|
||||
if (inst.opcode == RET) {
|
||||
break;
|
||||
}
|
||||
|
||||
uint8_t opcode = (inst.opcode == MUL) ? inst.opcode : (inst.opcode + 2);
|
||||
uint8_t dst_index = inst.dst_index;
|
||||
uint8_t src_index = inst.src_index;
|
||||
|
||||
const uint32_t a = inst.dst_index;
|
||||
const uint32_t b = inst.src_index;
|
||||
const uint8_t c = opcode | (dst_index << V4_OPCODE_BITS) | (((src_index == 8) ? dst_index : src_index) << (V4_OPCODE_BITS + V4_DST_INDEX_BITS));
|
||||
|
||||
switch (inst.opcode) {
|
||||
case ROR:
|
||||
case ROL:
|
||||
if (b != prev_rot_src) {
|
||||
prev_rot_src = b;
|
||||
add_code(p, instructions_mov[c], instructions_mov[c + 1]);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (a == prev_rot_src) {
|
||||
prev_rot_src = (uint32_t)(-1);
|
||||
}
|
||||
|
||||
void_func begin = instructions[c];
|
||||
|
||||
if ((ASM = ASM_BULLDOZER) && (inst.opcode == MUL) && !is_64_bit) {
|
||||
// AMD Bulldozer has latency 4 for 32-bit IMUL and 6 for 64-bit IMUL
|
||||
// Always use 32-bit IMUL for AMD Bulldozer in 32-bit mode - skip prefix 0x48 and change 0x49 to 0x41
|
||||
uint8_t* prefix = (uint8_t*) begin;
|
||||
|
||||
if (*prefix == 0x49) {
|
||||
**p = 0x41;
|
||||
*p += 1;
|
||||
}
|
||||
|
||||
begin = (void_func)(prefix + 1);
|
||||
}
|
||||
|
||||
add_code(p, begin, instructions[c + 1]);
|
||||
|
||||
if (inst.opcode == ADD) {
|
||||
*(uint32_t*)(*p - sizeof(uint32_t) - (is_64_bit ? 3 : 0)) = inst.C;
|
||||
if (is_64_bit) {
|
||||
prev_rot_src = (uint32_t)(-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void v4_compile_code(const struct V4_Instruction* code, int code_size, void* machine_code, enum Assembly ASM)
|
||||
{
|
||||
uint8_t* p0 = machine_code;
|
||||
uint8_t* p = p0;
|
||||
|
||||
add_code(&p, CryptonightR_template_part1, CryptonightR_template_part2);
|
||||
add_random_math(&p, code, code_size, instructions, instructions_mov, false, ASM);
|
||||
add_code(&p, CryptonightR_template_part2, CryptonightR_template_part3);
|
||||
*(int*)(p - 4) = (int)((((const uint8_t*)CryptonightR_template_mainloop) - ((const uint8_t*)CryptonightR_template_part1)) - (p - p0));
|
||||
add_code(&p, CryptonightR_template_part3, CryptonightR_template_end);
|
||||
|
||||
flush_instruction_cache(machine_code, p - p0);
|
||||
}
|
||||
|
||||
|
||||
void v4_compile_code_double(const struct V4_Instruction* code, int code_size, void* machine_code, enum Assembly ASM)
|
||||
{
|
||||
uint8_t* p0 = (uint8_t*) machine_code;
|
||||
uint8_t* p = p0;
|
||||
|
||||
add_code(&p, CryptonightR_template_double_part1, CryptonightR_template_double_part2);
|
||||
add_random_math(&p, code, code_size, instructions, instructions_mov, false, ASM);
|
||||
add_code(&p, CryptonightR_template_double_part2, CryptonightR_template_double_part3);
|
||||
add_random_math(&p, code, code_size, instructions, instructions_mov, false, ASM);
|
||||
add_code(&p, CryptonightR_template_double_part3, CryptonightR_template_double_part4);
|
||||
*(int*)(p - 4) = (int)((((const uint8_t*)CryptonightR_template_double_mainloop) - ((const uint8_t*)CryptonightR_template_double_part1)) - (p - p0));
|
||||
add_code(&p, CryptonightR_template_double_part4, CryptonightR_template_double_end);
|
||||
|
||||
flush_instruction_cache(machine_code, p - p0);
|
||||
}
|
||||
|
||||
|
||||
void v4_soft_aes_compile_code(const struct V4_Instruction* code, int code_size, void* machine_code, enum Assembly ASM)
|
||||
{
|
||||
uint8_t* p0 = machine_code;
|
||||
uint8_t* p = p0;
|
||||
|
||||
add_code(&p, CryptonightR_soft_aes_template_part1, CryptonightR_soft_aes_template_part2);
|
||||
add_random_math(&p, code, code_size, instructions, instructions_mov, false, ASM);
|
||||
add_code(&p, CryptonightR_soft_aes_template_part2, CryptonightR_soft_aes_template_part3);
|
||||
*(int*)(p - 4) = (int)((((const uint8_t*)CryptonightR_soft_aes_template_mainloop) - ((const uint8_t*)CryptonightR_soft_aes_template_part1)) - (p - p0));
|
||||
add_code(&p, CryptonightR_soft_aes_template_part3, CryptonightR_soft_aes_template_end);
|
||||
|
||||
flush_instruction_cache(machine_code, p - p0);
|
||||
}
|
279
crypto/asm/CryptonightR_soft_aes_template.inc
Normal file
279
crypto/asm/CryptonightR_soft_aes_template.inc
Normal file
|
@ -0,0 +1,279 @@
|
|||
PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_part1)
|
||||
PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_mainloop)
|
||||
PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_part2)
|
||||
PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_part3)
|
||||
PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_end)
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(CryptonightR_soft_aes_template_part1):
|
||||
mov QWORD PTR [rsp+8], rcx
|
||||
push rbx
|
||||
push rbp
|
||||
push rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 232
|
||||
|
||||
mov eax, [rcx+96]
|
||||
mov ebx, [rcx+100]
|
||||
mov esi, [rcx+104]
|
||||
mov edx, [rcx+108]
|
||||
mov [rsp+144], eax
|
||||
mov [rsp+148], ebx
|
||||
mov [rsp+152], esi
|
||||
mov [rsp+156], edx
|
||||
|
||||
mov rax, QWORD PTR [rcx+48]
|
||||
mov r10, rcx
|
||||
xor rax, QWORD PTR [rcx+16]
|
||||
mov r8, QWORD PTR [rcx+32]
|
||||
xor r8, QWORD PTR [rcx]
|
||||
mov r9, QWORD PTR [rcx+40]
|
||||
xor r9, QWORD PTR [rcx+8]
|
||||
movq xmm4, rax
|
||||
mov rdx, QWORD PTR [rcx+56]
|
||||
xor rdx, QWORD PTR [rcx+24]
|
||||
mov r11, QWORD PTR [rcx+224]
|
||||
mov rcx, QWORD PTR [rcx+88]
|
||||
xor rcx, QWORD PTR [r10+72]
|
||||
mov rax, QWORD PTR [r10+80]
|
||||
movq xmm0, rdx
|
||||
xor rax, QWORD PTR [r10+64]
|
||||
|
||||
movaps XMMWORD PTR [rsp+16], xmm6
|
||||
movaps XMMWORD PTR [rsp+32], xmm7
|
||||
movaps XMMWORD PTR [rsp+48], xmm8
|
||||
movaps XMMWORD PTR [rsp+64], xmm9
|
||||
movaps XMMWORD PTR [rsp+80], xmm10
|
||||
movaps XMMWORD PTR [rsp+96], xmm11
|
||||
movaps XMMWORD PTR [rsp+112], xmm12
|
||||
movaps XMMWORD PTR [rsp+128], xmm13
|
||||
|
||||
movq xmm5, rax
|
||||
|
||||
mov rax, r8
|
||||
punpcklqdq xmm4, xmm0
|
||||
and eax, 2097136
|
||||
movq xmm10, QWORD PTR [r10+96]
|
||||
movq xmm0, rcx
|
||||
mov rcx, QWORD PTR [r10+104]
|
||||
xorps xmm9, xmm9
|
||||
mov QWORD PTR [rsp+328], rax
|
||||
movq xmm12, r11
|
||||
mov QWORD PTR [rsp+320], r9
|
||||
punpcklqdq xmm5, xmm0
|
||||
movq xmm13, rcx
|
||||
mov r12d, 524288
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(CryptonightR_soft_aes_template_mainloop):
|
||||
movd xmm11, r12d
|
||||
mov r12, QWORD PTR [r10+272]
|
||||
lea r13, QWORD PTR [rax+r11]
|
||||
mov esi, DWORD PTR [r13]
|
||||
movq xmm0, r9
|
||||
mov r10d, DWORD PTR [r13+4]
|
||||
movq xmm7, r8
|
||||
mov ebp, DWORD PTR [r13+12]
|
||||
mov r14d, DWORD PTR [r13+8]
|
||||
mov rdx, QWORD PTR [rsp+328]
|
||||
movzx ecx, sil
|
||||
shr esi, 8
|
||||
punpcklqdq xmm7, xmm0
|
||||
mov r15d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
mov edi, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r14b
|
||||
shr r14d, 8
|
||||
mov ebx, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, bpl
|
||||
shr ebp, 8
|
||||
mov r9d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
xor r15d, DWORD PTR [r12+rcx*4+1024]
|
||||
movzx ecx, r14b
|
||||
shr r14d, 8
|
||||
mov eax, r14d
|
||||
shr eax, 8
|
||||
xor edi, DWORD PTR [r12+rcx*4+1024]
|
||||
add eax, 256
|
||||
movzx ecx, bpl
|
||||
shr ebp, 8
|
||||
xor ebx, DWORD PTR [r12+rcx*4+1024]
|
||||
movzx ecx, sil
|
||||
shr esi, 8
|
||||
xor r9d, DWORD PTR [r12+rcx*4+1024]
|
||||
add r12, 2048
|
||||
movzx ecx, r10b
|
||||
shr r10d, 8
|
||||
add r10d, 256
|
||||
mov r11d, DWORD PTR [r12+rax*4]
|
||||
xor r11d, DWORD PTR [r12+rcx*4]
|
||||
xor r11d, r9d
|
||||
movzx ecx, sil
|
||||
mov r10d, DWORD PTR [r12+r10*4]
|
||||
shr esi, 8
|
||||
add esi, 256
|
||||
xor r10d, DWORD PTR [r12+rcx*4]
|
||||
movzx ecx, bpl
|
||||
xor r10d, ebx
|
||||
shr ebp, 8
|
||||
movd xmm1, r11d
|
||||
add ebp, 256
|
||||
movq r11, xmm12
|
||||
mov r9d, DWORD PTR [r12+rcx*4]
|
||||
xor r9d, DWORD PTR [r12+rsi*4]
|
||||
mov eax, DWORD PTR [r12+rbp*4]
|
||||
xor r9d, edi
|
||||
movzx ecx, r14b
|
||||
movd xmm0, r10d
|
||||
movd xmm2, r9d
|
||||
xor eax, DWORD PTR [r12+rcx*4]
|
||||
mov rcx, rdx
|
||||
xor eax, r15d
|
||||
punpckldq xmm2, xmm1
|
||||
xor rcx, 16
|
||||
movd xmm6, eax
|
||||
mov rax, rdx
|
||||
punpckldq xmm6, xmm0
|
||||
xor rax, 32
|
||||
punpckldq xmm6, xmm2
|
||||
xor rdx, 48
|
||||
movdqu xmm2, XMMWORD PTR [rcx+r11]
|
||||
pxor xmm6, xmm2
|
||||
pxor xmm6, xmm7
|
||||
paddq xmm2, xmm4
|
||||
movdqu xmm1, XMMWORD PTR [rax+r11]
|
||||
movdqu xmm0, XMMWORD PTR [rdx+r11]
|
||||
pxor xmm6, xmm1
|
||||
pxor xmm6, xmm0
|
||||
paddq xmm0, xmm5
|
||||
movdqu XMMWORD PTR [rcx+r11], xmm0
|
||||
movdqu XMMWORD PTR [rax+r11], xmm2
|
||||
movq rcx, xmm13
|
||||
paddq xmm1, xmm7
|
||||
movdqu XMMWORD PTR [rdx+r11], xmm1
|
||||
movq rdi, xmm6
|
||||
mov r10, rdi
|
||||
and r10d, 2097136
|
||||
movdqa xmm0, xmm6
|
||||
pxor xmm0, xmm4
|
||||
movdqu XMMWORD PTR [r13], xmm0
|
||||
|
||||
mov ebx, [rsp+144]
|
||||
mov ebp, [rsp+152]
|
||||
add ebx, [rsp+148]
|
||||
add ebp, [rsp+156]
|
||||
shl rbp, 32
|
||||
or rbx, rbp
|
||||
|
||||
xor rbx, QWORD PTR [r10+r11]
|
||||
lea r14, QWORD PTR [r10+r11]
|
||||
mov rbp, QWORD PTR [r14+8]
|
||||
|
||||
mov [rsp+160], rbx
|
||||
mov [rsp+168], rdi
|
||||
mov [rsp+176], rbp
|
||||
mov [rsp+184], r10
|
||||
mov r10, rsp
|
||||
|
||||
mov ebx, [rsp+144]
|
||||
mov esi, [rsp+148]
|
||||
mov edi, [rsp+152]
|
||||
mov ebp, [rsp+156]
|
||||
|
||||
movd esp, xmm7
|
||||
movaps xmm0, xmm7
|
||||
psrldq xmm0, 8
|
||||
movd r15d, xmm0
|
||||
movd eax, xmm4
|
||||
movd edx, xmm5
|
||||
movaps xmm0, xmm5
|
||||
psrldq xmm0, 8
|
||||
movd r9d, xmm0
|
||||
|
||||
FN_PREFIX(CryptonightR_soft_aes_template_part2):
|
||||
mov rsp, r10
|
||||
mov [rsp+144], ebx
|
||||
mov [rsp+148], esi
|
||||
mov [rsp+152], edi
|
||||
mov [rsp+156], ebp
|
||||
|
||||
mov edi, edi
|
||||
shl rbp, 32
|
||||
or rbp, rdi
|
||||
xor r8, rbp
|
||||
|
||||
mov ebx, ebx
|
||||
shl rsi, 32
|
||||
or rsi, rbx
|
||||
xor QWORD PTR [rsp+320], rsi
|
||||
|
||||
mov rbx, [rsp+160]
|
||||
mov rdi, [rsp+168]
|
||||
mov rbp, [rsp+176]
|
||||
mov r10, [rsp+184]
|
||||
|
||||
mov r9, r10
|
||||
xor r9, 16
|
||||
mov rcx, r10
|
||||
xor rcx, 32
|
||||
xor r10, 48
|
||||
mov rax, rbx
|
||||
mul rdi
|
||||
movdqu xmm2, XMMWORD PTR [r9+r11]
|
||||
movdqu xmm1, XMMWORD PTR [rcx+r11]
|
||||
pxor xmm6, xmm2
|
||||
pxor xmm6, xmm1
|
||||
paddq xmm1, xmm7
|
||||
add r8, rdx
|
||||
movdqu xmm0, XMMWORD PTR [r10+r11]
|
||||
pxor xmm6, xmm0
|
||||
paddq xmm0, xmm5
|
||||
paddq xmm2, xmm4
|
||||
movdqu XMMWORD PTR [r9+r11], xmm0
|
||||
movdqa xmm5, xmm4
|
||||
mov r9, QWORD PTR [rsp+320]
|
||||
movdqa xmm4, xmm6
|
||||
add r9, rax
|
||||
movdqu XMMWORD PTR [rcx+r11], xmm2
|
||||
movdqu XMMWORD PTR [r10+r11], xmm1
|
||||
mov r10, QWORD PTR [rsp+304]
|
||||
movd r12d, xmm11
|
||||
mov QWORD PTR [r14], r8
|
||||
xor r8, rbx
|
||||
mov rax, r8
|
||||
mov QWORD PTR [r14+8], r9
|
||||
and eax, 2097136
|
||||
xor r9, rbp
|
||||
mov QWORD PTR [rsp+320], r9
|
||||
mov QWORD PTR [rsp+328], rax
|
||||
sub r12d, 1
|
||||
jne FN_PREFIX(CryptonightR_soft_aes_template_mainloop)
|
||||
|
||||
FN_PREFIX(CryptonightR_soft_aes_template_part3):
|
||||
movaps xmm6, XMMWORD PTR [rsp+16]
|
||||
movaps xmm7, XMMWORD PTR [rsp+32]
|
||||
movaps xmm8, XMMWORD PTR [rsp+48]
|
||||
movaps xmm9, XMMWORD PTR [rsp+64]
|
||||
movaps xmm10, XMMWORD PTR [rsp+80]
|
||||
movaps xmm11, XMMWORD PTR [rsp+96]
|
||||
movaps xmm12, XMMWORD PTR [rsp+112]
|
||||
movaps xmm13, XMMWORD PTR [rsp+128]
|
||||
|
||||
add rsp, 232
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbp
|
||||
pop rbx
|
||||
ret
|
||||
FN_PREFIX(CryptonightR_soft_aes_template_end):
|
1593
crypto/asm/CryptonightR_template.S
Normal file
1593
crypto/asm/CryptonightR_template.S
Normal file
File diff suppressed because it is too large
Load diff
1060
crypto/asm/CryptonightR_template.h
Normal file
1060
crypto/asm/CryptonightR_template.h
Normal file
File diff suppressed because it is too large
Load diff
531
crypto/asm/CryptonightR_template.inc
Normal file
531
crypto/asm/CryptonightR_template.inc
Normal file
|
@ -0,0 +1,531 @@
|
|||
PUBLIC FN_PREFIX(CryptonightR_template_part1)
|
||||
PUBLIC FN_PREFIX(CryptonightR_template_mainloop)
|
||||
PUBLIC FN_PREFIX(CryptonightR_template_part2)
|
||||
PUBLIC FN_PREFIX(CryptonightR_template_part3)
|
||||
PUBLIC FN_PREFIX(CryptonightR_template_end)
|
||||
PUBLIC FN_PREFIX(CryptonightR_template_double_part1)
|
||||
PUBLIC FN_PREFIX(CryptonightR_template_double_mainloop)
|
||||
PUBLIC FN_PREFIX(CryptonightR_template_double_part2)
|
||||
PUBLIC FN_PREFIX(CryptonightR_template_double_part3)
|
||||
PUBLIC FN_PREFIX(CryptonightR_template_double_part4)
|
||||
PUBLIC FN_PREFIX(CryptonightR_template_double_end)
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(CryptonightR_template_part1):
|
||||
mov QWORD PTR [rsp+16], rbx
|
||||
mov QWORD PTR [rsp+24], rbp
|
||||
mov QWORD PTR [rsp+32], rsi
|
||||
push r10
|
||||
push r11
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
push rdi
|
||||
sub rsp, 64
|
||||
mov r12, rcx
|
||||
mov r8, QWORD PTR [r12+32]
|
||||
mov rdx, r12
|
||||
xor r8, QWORD PTR [r12]
|
||||
mov r15, QWORD PTR [r12+40]
|
||||
mov r9, r8
|
||||
xor r15, QWORD PTR [r12+8]
|
||||
mov r11, QWORD PTR [r12+224]
|
||||
mov r12, QWORD PTR [r12+56]
|
||||
xor r12, QWORD PTR [rdx+24]
|
||||
mov rax, QWORD PTR [rdx+48]
|
||||
xor rax, QWORD PTR [rdx+16]
|
||||
movaps XMMWORD PTR [rsp+48], xmm6
|
||||
movq xmm0, r12
|
||||
movaps XMMWORD PTR [rsp+32], xmm7
|
||||
movaps XMMWORD PTR [rsp+16], xmm8
|
||||
movaps XMMWORD PTR [rsp], xmm9
|
||||
mov r12, QWORD PTR [rdx+88]
|
||||
xor r12, QWORD PTR [rdx+72]
|
||||
movq xmm6, rax
|
||||
mov rax, QWORD PTR [rdx+80]
|
||||
xor rax, QWORD PTR [rdx+64]
|
||||
punpcklqdq xmm6, xmm0
|
||||
and r9d, 2097136
|
||||
movq xmm0, r12
|
||||
movq xmm7, rax
|
||||
punpcklqdq xmm7, xmm0
|
||||
mov r10d, r9d
|
||||
movq xmm9, rsp
|
||||
mov rsp, r8
|
||||
mov r8d, 524288
|
||||
|
||||
mov ebx, [rdx+96]
|
||||
mov esi, [rdx+100]
|
||||
mov edi, [rdx+104]
|
||||
mov ebp, [rdx+108]
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(CryptonightR_template_mainloop):
|
||||
movdqa xmm5, XMMWORD PTR [r9+r11]
|
||||
movq xmm0, r15
|
||||
movq xmm4, rsp
|
||||
punpcklqdq xmm4, xmm0
|
||||
lea rdx, QWORD PTR [r9+r11]
|
||||
|
||||
aesenc xmm5, xmm4
|
||||
|
||||
mov r13d, r9d
|
||||
mov eax, r9d
|
||||
xor r9d, 48
|
||||
xor r13d, 16
|
||||
xor eax, 32
|
||||
movdqu xmm0, XMMWORD PTR [r9+r11]
|
||||
movaps xmm3, xmm0
|
||||
movdqu xmm2, XMMWORD PTR [r13+r11]
|
||||
movdqu xmm1, XMMWORD PTR [rax+r11]
|
||||
pxor xmm0, xmm2
|
||||
pxor xmm5, xmm1
|
||||
pxor xmm5, xmm0
|
||||
|
||||
movq r12, xmm5
|
||||
movd r10d, xmm5
|
||||
and r10d, 2097136
|
||||
|
||||
paddq xmm3, xmm7
|
||||
paddq xmm2, xmm6
|
||||
paddq xmm1, xmm4
|
||||
movdqu XMMWORD PTR [r13+r11], xmm3
|
||||
movdqu XMMWORD PTR [rax+r11], xmm2
|
||||
movdqu XMMWORD PTR [r9+r11], xmm1
|
||||
|
||||
movdqa xmm0, xmm5
|
||||
pxor xmm0, xmm6
|
||||
movdqu XMMWORD PTR [rdx], xmm0
|
||||
|
||||
lea r13d, [ebx+esi]
|
||||
lea edx, [edi+ebp]
|
||||
shl rdx, 32
|
||||
or r13, rdx
|
||||
|
||||
movd eax, xmm6
|
||||
movd edx, xmm7
|
||||
pextrd r9d, xmm7, 2
|
||||
|
||||
xor r13, QWORD PTR [r10+r11]
|
||||
mov r14, QWORD PTR [r10+r11+8]
|
||||
|
||||
FN_PREFIX(CryptonightR_template_part2):
|
||||
lea rcx, [r10+r11]
|
||||
|
||||
mov eax, edi
|
||||
mov edx, ebp
|
||||
shl rdx, 32
|
||||
or rax, rdx
|
||||
xor rsp, rax
|
||||
|
||||
mov eax, ebx
|
||||
mov edx, esi
|
||||
shl rdx, 32
|
||||
or rax, rdx
|
||||
xor r15, rax
|
||||
|
||||
mov rax, r13
|
||||
mul r12
|
||||
add r15, rax
|
||||
add rsp, rdx
|
||||
|
||||
mov r9d, r10d
|
||||
mov r12d, r10d
|
||||
xor r9d, 16
|
||||
xor r12d, 32
|
||||
xor r10d, 48
|
||||
movdqa xmm1, XMMWORD PTR [r12+r11]
|
||||
movaps xmm3, xmm1
|
||||
movdqa xmm2, XMMWORD PTR [r9+r11]
|
||||
movdqa xmm0, XMMWORD PTR [r10+r11]
|
||||
pxor xmm1, xmm2
|
||||
pxor xmm5, xmm0
|
||||
pxor xmm5, xmm1
|
||||
paddq xmm3, xmm4
|
||||
paddq xmm2, xmm6
|
||||
paddq xmm0, xmm7
|
||||
movdqu XMMWORD PTR [r9+r11], xmm0
|
||||
movdqu XMMWORD PTR [r12+r11], xmm2
|
||||
movdqu XMMWORD PTR [r10+r11], xmm3
|
||||
|
||||
movdqa xmm7, xmm6
|
||||
mov QWORD PTR [rcx], rsp
|
||||
xor rsp, r13
|
||||
mov r9d, esp
|
||||
mov QWORD PTR [rcx+8], r15
|
||||
and r9d, 2097136
|
||||
xor r15, r14
|
||||
movdqa xmm6, xmm5
|
||||
dec r8d
|
||||
jnz FN_PREFIX(CryptonightR_template_mainloop)
|
||||
|
||||
FN_PREFIX(CryptonightR_template_part3):
|
||||
movq rsp, xmm9
|
||||
|
||||
mov rbx, QWORD PTR [rsp+136]
|
||||
mov rbp, QWORD PTR [rsp+144]
|
||||
mov rsi, QWORD PTR [rsp+152]
|
||||
movaps xmm6, XMMWORD PTR [rsp+48]
|
||||
movaps xmm7, XMMWORD PTR [rsp+32]
|
||||
movaps xmm8, XMMWORD PTR [rsp+16]
|
||||
movaps xmm9, XMMWORD PTR [rsp]
|
||||
add rsp, 64
|
||||
pop rdi
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop r11
|
||||
pop r10
|
||||
ret 0
|
||||
FN_PREFIX(CryptonightR_template_end):
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(CryptonightR_template_double_part1):
|
||||
mov QWORD PTR [rsp+24], rbx
|
||||
push rbp
|
||||
push rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 320
|
||||
mov r14, QWORD PTR [rcx+32]
|
||||
mov r8, rcx
|
||||
xor r14, QWORD PTR [rcx]
|
||||
mov r12, QWORD PTR [rcx+40]
|
||||
mov ebx, r14d
|
||||
mov rsi, QWORD PTR [rcx+224]
|
||||
and ebx, 2097136
|
||||
xor r12, QWORD PTR [rcx+8]
|
||||
mov rcx, QWORD PTR [rcx+56]
|
||||
xor rcx, QWORD PTR [r8+24]
|
||||
mov rax, QWORD PTR [r8+48]
|
||||
xor rax, QWORD PTR [r8+16]
|
||||
mov r15, QWORD PTR [rdx+32]
|
||||
xor r15, QWORD PTR [rdx]
|
||||
movq xmm0, rcx
|
||||
mov rcx, QWORD PTR [r8+88]
|
||||
xor rcx, QWORD PTR [r8+72]
|
||||
mov r13, QWORD PTR [rdx+40]
|
||||
mov rdi, QWORD PTR [rdx+224]
|
||||
xor r13, QWORD PTR [rdx+8]
|
||||
movaps XMMWORD PTR [rsp+160], xmm6
|
||||
movaps XMMWORD PTR [rsp+176], xmm7
|
||||
movaps XMMWORD PTR [rsp+192], xmm8
|
||||
movaps XMMWORD PTR [rsp+208], xmm9
|
||||
movaps XMMWORD PTR [rsp+224], xmm10
|
||||
movaps XMMWORD PTR [rsp+240], xmm11
|
||||
movaps XMMWORD PTR [rsp+256], xmm12
|
||||
movaps XMMWORD PTR [rsp+272], xmm13
|
||||
movaps XMMWORD PTR [rsp+288], xmm14
|
||||
movaps XMMWORD PTR [rsp+304], xmm15
|
||||
movq xmm7, rax
|
||||
mov rax, QWORD PTR [r8+80]
|
||||
xor rax, QWORD PTR [r8+64]
|
||||
|
||||
movaps xmm1, XMMWORD PTR [rdx+96]
|
||||
movaps xmm2, XMMWORD PTR [r8+96]
|
||||
movaps XMMWORD PTR [rsp], xmm1
|
||||
movaps XMMWORD PTR [rsp+16], xmm2
|
||||
|
||||
mov r8d, r15d
|
||||
punpcklqdq xmm7, xmm0
|
||||
movq xmm0, rcx
|
||||
mov rcx, QWORD PTR [rdx+56]
|
||||
xor rcx, QWORD PTR [rdx+24]
|
||||
movq xmm9, rax
|
||||
mov QWORD PTR [rsp+128], rsi
|
||||
mov rax, QWORD PTR [rdx+48]
|
||||
xor rax, QWORD PTR [rdx+16]
|
||||
punpcklqdq xmm9, xmm0
|
||||
movq xmm0, rcx
|
||||
mov rcx, QWORD PTR [rdx+88]
|
||||
xor rcx, QWORD PTR [rdx+72]
|
||||
movq xmm8, rax
|
||||
mov QWORD PTR [rsp+136], rdi
|
||||
mov rax, QWORD PTR [rdx+80]
|
||||
xor rax, QWORD PTR [rdx+64]
|
||||
punpcklqdq xmm8, xmm0
|
||||
and r8d, 2097136
|
||||
movq xmm0, rcx
|
||||
mov r11d, 524288
|
||||
movq xmm10, rax
|
||||
punpcklqdq xmm10, xmm0
|
||||
|
||||
movq xmm14, QWORD PTR [rsp+128]
|
||||
movq xmm15, QWORD PTR [rsp+136]
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(CryptonightR_template_double_mainloop):
|
||||
movdqu xmm6, XMMWORD PTR [rbx+rsi]
|
||||
movq xmm0, r12
|
||||
mov ecx, ebx
|
||||
movq xmm3, r14
|
||||
punpcklqdq xmm3, xmm0
|
||||
xor ebx, 16
|
||||
aesenc xmm6, xmm3
|
||||
movq xmm4, r15
|
||||
movdqu xmm0, XMMWORD PTR [rbx+rsi]
|
||||
pxor xmm6, xmm0
|
||||
xor ebx, 48
|
||||
paddq xmm0, xmm7
|
||||
movdqu xmm1, XMMWORD PTR [rbx+rsi]
|
||||
pxor xmm6, xmm1
|
||||
movdqu XMMWORD PTR [rbx+rsi], xmm0
|
||||
paddq xmm1, xmm3
|
||||
xor ebx, 16
|
||||
mov eax, ebx
|
||||
xor rax, 32
|
||||
movdqu xmm0, XMMWORD PTR [rbx+rsi]
|
||||
pxor xmm6, xmm0
|
||||
movq rdx, xmm6
|
||||
movdqu XMMWORD PTR [rbx+rsi], xmm1
|
||||
paddq xmm0, xmm9
|
||||
movdqu XMMWORD PTR [rax+rsi], xmm0
|
||||
movdqa xmm0, xmm6
|
||||
pxor xmm0, xmm7
|
||||
movdqu XMMWORD PTR [rcx+rsi], xmm0
|
||||
mov esi, edx
|
||||
movdqu xmm5, XMMWORD PTR [r8+rdi]
|
||||
and esi, 2097136
|
||||
mov ecx, r8d
|
||||
movq xmm0, r13
|
||||
punpcklqdq xmm4, xmm0
|
||||
xor r8d, 16
|
||||
aesenc xmm5, xmm4
|
||||
movdqu xmm0, XMMWORD PTR [r8+rdi]
|
||||
pxor xmm5, xmm0
|
||||
xor r8d, 48
|
||||
paddq xmm0, xmm8
|
||||
movdqu xmm1, XMMWORD PTR [r8+rdi]
|
||||
pxor xmm5, xmm1
|
||||
movdqu XMMWORD PTR [r8+rdi], xmm0
|
||||
paddq xmm1, xmm4
|
||||
xor r8d, 16
|
||||
mov eax, r8d
|
||||
xor rax, 32
|
||||
movdqu xmm0, XMMWORD PTR [r8+rdi]
|
||||
pxor xmm5, xmm0
|
||||
movdqu XMMWORD PTR [r8+rdi], xmm1
|
||||
paddq xmm0, xmm10
|
||||
movdqu XMMWORD PTR [rax+rdi], xmm0
|
||||
movdqa xmm0, xmm5
|
||||
pxor xmm0, xmm8
|
||||
movdqu XMMWORD PTR [rcx+rdi], xmm0
|
||||
movq rdi, xmm5
|
||||
movq rcx, xmm14
|
||||
mov ebp, edi
|
||||
mov r8, QWORD PTR [rcx+rsi]
|
||||
mov r10, QWORD PTR [rcx+rsi+8]
|
||||
lea r9, QWORD PTR [rcx+rsi]
|
||||
xor esi, 16
|
||||
|
||||
movq xmm0, rsp
|
||||
movq xmm1, rsi
|
||||
movq xmm2, rdi
|
||||
movq xmm11, rbp
|
||||
movq xmm12, r15
|
||||
movq xmm13, rdx
|
||||
mov [rsp+104], rcx
|
||||
mov [rsp+112], r9
|
||||
|
||||
mov ebx, DWORD PTR [rsp+16]
|
||||
mov esi, DWORD PTR [rsp+20]
|
||||
mov edi, DWORD PTR [rsp+24]
|
||||
mov ebp, DWORD PTR [rsp+28]
|
||||
|
||||
lea eax, [ebx+esi]
|
||||
lea edx, [edi+ebp]
|
||||
shl rdx, 32
|
||||
or rax, rdx
|
||||
xor r8, rax
|
||||
|
||||
movd esp, xmm3
|
||||
pextrd r15d, xmm3, 2
|
||||
movd eax, xmm7
|
||||
movd edx, xmm9
|
||||
pextrd r9d, xmm9, 2
|
||||
|
||||
FN_PREFIX(CryptonightR_template_double_part2):
|
||||
|
||||
mov eax, edi
|
||||
mov edx, ebp
|
||||
shl rdx, 32
|
||||
or rax, rdx
|
||||
xor r14, rax
|
||||
|
||||
mov eax, ebx
|
||||
mov edx, esi
|
||||
shl rdx, 32
|
||||
or rax, rdx
|
||||
xor r12, rax
|
||||
|
||||
movq rsp, xmm0
|
||||
mov DWORD PTR [rsp+16], ebx
|
||||
mov DWORD PTR [rsp+20], esi
|
||||
mov DWORD PTR [rsp+24], edi
|
||||
mov DWORD PTR [rsp+28], ebp
|
||||
|
||||
movq rsi, xmm1
|
||||
movq rdi, xmm2
|
||||
movq rbp, xmm11
|
||||
movq r15, xmm12
|
||||
movq rdx, xmm13
|
||||
mov rcx, [rsp+104]
|
||||
mov r9, [rsp+112]
|
||||
|
||||
mov rbx, r8
|
||||
mov rax, r8
|
||||
mul rdx
|
||||
and ebp, 2097136
|
||||
mov r8, rax
|
||||
movdqu xmm1, XMMWORD PTR [rcx+rsi]
|
||||
pxor xmm6, xmm1
|
||||
xor esi, 48
|
||||
paddq xmm1, xmm7
|
||||
movdqu xmm2, XMMWORD PTR [rsi+rcx]
|
||||
pxor xmm6, xmm2
|
||||
paddq xmm2, xmm3
|
||||
movdqu XMMWORD PTR [rsi+rcx], xmm1
|
||||
xor esi, 16
|
||||
mov eax, esi
|
||||
mov rsi, rcx
|
||||
movdqu xmm0, XMMWORD PTR [rax+rcx]
|
||||
pxor xmm6, xmm0
|
||||
movdqu XMMWORD PTR [rax+rcx], xmm2
|
||||
paddq xmm0, xmm9
|
||||
add r12, r8
|
||||
xor rax, 32
|
||||
add r14, rdx
|
||||
movdqa xmm9, xmm7
|
||||
movdqa xmm7, xmm6
|
||||
movdqu XMMWORD PTR [rax+rcx], xmm0
|
||||
mov QWORD PTR [r9+8], r12
|
||||
xor r12, r10
|
||||
mov QWORD PTR [r9], r14
|
||||
movq rcx, xmm15
|
||||
xor r14, rbx
|
||||
mov r10d, ebp
|
||||
mov ebx, r14d
|
||||
xor ebp, 16
|
||||
and ebx, 2097136
|
||||
mov r8, QWORD PTR [r10+rcx]
|
||||
mov r9, QWORD PTR [r10+rcx+8]
|
||||
|
||||
movq xmm0, rsp
|
||||
movq xmm1, rbx
|
||||
movq xmm2, rsi
|
||||
movq xmm11, rdi
|
||||
movq xmm12, rbp
|
||||
movq xmm13, r15
|
||||
mov [rsp+104], rcx
|
||||
mov [rsp+112], r9
|
||||
|
||||
mov ebx, DWORD PTR [rsp]
|
||||
mov esi, DWORD PTR [rsp+4]
|
||||
mov edi, DWORD PTR [rsp+8]
|
||||
mov ebp, DWORD PTR [rsp+12]
|
||||
|
||||
lea eax, [ebx+esi]
|
||||
lea edx, [edi+ebp]
|
||||
shl rdx, 32
|
||||
or rax, rdx
|
||||
|
||||
xor r8, rax
|
||||
movq xmm3, r8
|
||||
|
||||
movd esp, xmm4
|
||||
pextrd r15d, xmm4, 2
|
||||
movd eax, xmm8
|
||||
movd edx, xmm10
|
||||
pextrd r9d, xmm10, 2
|
||||
|
||||
FN_PREFIX(CryptonightR_template_double_part3):
|
||||
|
||||
movq r15, xmm13
|
||||
|
||||
mov eax, edi
|
||||
mov edx, ebp
|
||||
shl rdx, 32
|
||||
or rax, rdx
|
||||
xor r15, rax
|
||||
|
||||
mov eax, ebx
|
||||
mov edx, esi
|
||||
shl rdx, 32
|
||||
or rax, rdx
|
||||
xor r13, rax
|
||||
|
||||
movq rsp, xmm0
|
||||
mov DWORD PTR [rsp], ebx
|
||||
mov DWORD PTR [rsp+4], esi
|
||||
mov DWORD PTR [rsp+8], edi
|
||||
mov DWORD PTR [rsp+12], ebp
|
||||
|
||||
movq rbx, xmm1
|
||||
movq rsi, xmm2
|
||||
movq rdi, xmm11
|
||||
movq rbp, xmm12
|
||||
mov rcx, [rsp+104]
|
||||
mov r9, [rsp+112]
|
||||
|
||||
mov rax, r8
|
||||
mul rdi
|
||||
mov rdi, rcx
|
||||
mov r8, rax
|
||||
movdqu xmm1, XMMWORD PTR [rbp+rcx]
|
||||
pxor xmm5, xmm1
|
||||
xor ebp, 48
|
||||
paddq xmm1, xmm8
|
||||
add r13, r8
|
||||
movdqu xmm2, XMMWORD PTR [rbp+rcx]
|
||||
pxor xmm5, xmm2
|
||||
add r15, rdx
|
||||
movdqu XMMWORD PTR [rbp+rcx], xmm1
|
||||
paddq xmm2, xmm4
|
||||
xor ebp, 16
|
||||
mov eax, ebp
|
||||
xor rax, 32
|
||||
movdqu xmm0, XMMWORD PTR [rbp+rcx]
|
||||
pxor xmm5, xmm0
|
||||
movdqu XMMWORD PTR [rbp+rcx], xmm2
|
||||
paddq xmm0, xmm10
|
||||
movdqu XMMWORD PTR [rax+rcx], xmm0
|
||||
movq rax, xmm3
|
||||
movdqa xmm10, xmm8
|
||||
mov QWORD PTR [r10+rcx], r15
|
||||
movdqa xmm8, xmm5
|
||||
xor r15, rax
|
||||
mov QWORD PTR [r10+rcx+8], r13
|
||||
mov r8d, r15d
|
||||
xor r13, r9
|
||||
and r8d, 2097136
|
||||
dec r11d
|
||||
jnz FN_PREFIX(CryptonightR_template_double_mainloop)
|
||||
|
||||
FN_PREFIX(CryptonightR_template_double_part4):
|
||||
|
||||
mov rbx, QWORD PTR [rsp+400]
|
||||
movaps xmm6, XMMWORD PTR [rsp+160]
|
||||
movaps xmm7, XMMWORD PTR [rsp+176]
|
||||
movaps xmm8, XMMWORD PTR [rsp+192]
|
||||
movaps xmm9, XMMWORD PTR [rsp+208]
|
||||
movaps xmm10, XMMWORD PTR [rsp+224]
|
||||
movaps xmm11, XMMWORD PTR [rsp+240]
|
||||
movaps xmm12, XMMWORD PTR [rsp+256]
|
||||
movaps xmm13, XMMWORD PTR [rsp+272]
|
||||
movaps xmm14, XMMWORD PTR [rsp+288]
|
||||
movaps xmm15, XMMWORD PTR [rsp+304]
|
||||
add rsp, 320
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbp
|
||||
ret 0
|
||||
FN_PREFIX(CryptonightR_template_double_end):
|
410
crypto/asm/cn2/cnv2_double_main_loop_sandybridge.inc
Normal file
410
crypto/asm/cn2/cnv2_double_main_loop_sandybridge.inc
Normal file
|
@ -0,0 +1,410 @@
|
|||
mov rax, rsp
|
||||
push rbx
|
||||
push rbp
|
||||
push rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 184
|
||||
|
||||
stmxcsr DWORD PTR [rsp+272]
|
||||
mov DWORD PTR [rsp+276], 24448
|
||||
ldmxcsr DWORD PTR [rsp+276]
|
||||
|
||||
mov r13, QWORD PTR [rcx+224]
|
||||
mov r9, rdx
|
||||
mov r10, QWORD PTR [rcx+32]
|
||||
mov r8, rcx
|
||||
xor r10, QWORD PTR [rcx]
|
||||
mov r14d, 524288
|
||||
mov r11, QWORD PTR [rcx+40]
|
||||
xor r11, QWORD PTR [rcx+8]
|
||||
mov rsi, QWORD PTR [rdx+224]
|
||||
mov rdx, QWORD PTR [rcx+56]
|
||||
xor rdx, QWORD PTR [rcx+24]
|
||||
mov rdi, QWORD PTR [r9+32]
|
||||
xor rdi, QWORD PTR [r9]
|
||||
mov rbp, QWORD PTR [r9+40]
|
||||
xor rbp, QWORD PTR [r9+8]
|
||||
movq xmm0, rdx
|
||||
movaps XMMWORD PTR [rax-88], xmm6
|
||||
movaps XMMWORD PTR [rax-104], xmm7
|
||||
movaps XMMWORD PTR [rax-120], xmm8
|
||||
movaps XMMWORD PTR [rsp+112], xmm9
|
||||
movaps XMMWORD PTR [rsp+96], xmm10
|
||||
movaps XMMWORD PTR [rsp+80], xmm11
|
||||
movaps XMMWORD PTR [rsp+64], xmm12
|
||||
movaps XMMWORD PTR [rsp+48], xmm13
|
||||
movaps XMMWORD PTR [rsp+32], xmm14
|
||||
movaps XMMWORD PTR [rsp+16], xmm15
|
||||
mov rdx, r10
|
||||
movq xmm4, QWORD PTR [r8+96]
|
||||
and edx, 2097136
|
||||
mov rax, QWORD PTR [rcx+48]
|
||||
xorps xmm13, xmm13
|
||||
xor rax, QWORD PTR [rcx+16]
|
||||
mov rcx, QWORD PTR [rcx+88]
|
||||
xor rcx, QWORD PTR [r8+72]
|
||||
movq xmm5, QWORD PTR [r8+104]
|
||||
movq xmm7, rax
|
||||
|
||||
mov eax, 1
|
||||
shl rax, 52
|
||||
movq xmm14, rax
|
||||
punpcklqdq xmm14, xmm14
|
||||
|
||||
mov eax, 1023
|
||||
shl rax, 52
|
||||
movq xmm12, rax
|
||||
punpcklqdq xmm12, xmm12
|
||||
|
||||
mov rax, QWORD PTR [r8+80]
|
||||
xor rax, QWORD PTR [r8+64]
|
||||
punpcklqdq xmm7, xmm0
|
||||
movq xmm0, rcx
|
||||
mov rcx, QWORD PTR [r9+56]
|
||||
xor rcx, QWORD PTR [r9+24]
|
||||
movq xmm3, rax
|
||||
mov rax, QWORD PTR [r9+48]
|
||||
xor rax, QWORD PTR [r9+16]
|
||||
punpcklqdq xmm3, xmm0
|
||||
movq xmm0, rcx
|
||||
mov QWORD PTR [rsp], r13
|
||||
mov rcx, QWORD PTR [r9+88]
|
||||
xor rcx, QWORD PTR [r9+72]
|
||||
movq xmm6, rax
|
||||
mov rax, QWORD PTR [r9+80]
|
||||
xor rax, QWORD PTR [r9+64]
|
||||
punpcklqdq xmm6, xmm0
|
||||
movq xmm0, rcx
|
||||
mov QWORD PTR [rsp+256], r10
|
||||
mov rcx, rdi
|
||||
mov QWORD PTR [rsp+264], r11
|
||||
movq xmm8, rax
|
||||
and ecx, 2097136
|
||||
punpcklqdq xmm8, xmm0
|
||||
movq xmm0, QWORD PTR [r9+96]
|
||||
punpcklqdq xmm4, xmm0
|
||||
movq xmm0, QWORD PTR [r9+104]
|
||||
lea r8, QWORD PTR [rcx+rsi]
|
||||
movdqu xmm11, XMMWORD PTR [r8]
|
||||
punpcklqdq xmm5, xmm0
|
||||
lea r9, QWORD PTR [rdx+r13]
|
||||
movdqu xmm15, XMMWORD PTR [r9]
|
||||
|
||||
ALIGN(64)
|
||||
main_loop_double_sandybridge:
|
||||
movdqu xmm9, xmm15
|
||||
mov eax, edx
|
||||
mov ebx, edx
|
||||
xor eax, 16
|
||||
xor ebx, 32
|
||||
xor edx, 48
|
||||
|
||||
movq xmm0, r11
|
||||
movq xmm2, r10
|
||||
punpcklqdq xmm2, xmm0
|
||||
aesenc xmm9, xmm2
|
||||
|
||||
movdqu xmm0, XMMWORD PTR [rax+r13]
|
||||
movdqu xmm1, XMMWORD PTR [rbx+r13]
|
||||
paddq xmm0, xmm7
|
||||
paddq xmm1, xmm2
|
||||
movdqu XMMWORD PTR [rbx+r13], xmm0
|
||||
movdqu xmm0, XMMWORD PTR [rdx+r13]
|
||||
movdqu XMMWORD PTR [rdx+r13], xmm1
|
||||
paddq xmm0, xmm3
|
||||
movdqu XMMWORD PTR [rax+r13], xmm0
|
||||
|
||||
movq r11, xmm9
|
||||
mov edx, r11d
|
||||
and edx, 2097136
|
||||
movdqa xmm0, xmm9
|
||||
pxor xmm0, xmm7
|
||||
movdqu XMMWORD PTR [r9], xmm0
|
||||
|
||||
lea rbx, QWORD PTR [rdx+r13]
|
||||
mov r10, QWORD PTR [rdx+r13]
|
||||
|
||||
movdqu xmm10, xmm11
|
||||
movq xmm0, rbp
|
||||
movq xmm11, rdi
|
||||
punpcklqdq xmm11, xmm0
|
||||
aesenc xmm10, xmm11
|
||||
|
||||
mov eax, ecx
|
||||
mov r12d, ecx
|
||||
xor eax, 16
|
||||
xor r12d, 32
|
||||
xor ecx, 48
|
||||
|
||||
movdqu xmm0, XMMWORD PTR [rax+rsi]
|
||||
paddq xmm0, xmm6
|
||||
movdqu xmm1, XMMWORD PTR [r12+rsi]
|
||||
movdqu XMMWORD PTR [r12+rsi], xmm0
|
||||
paddq xmm1, xmm11
|
||||
movdqu xmm0, XMMWORD PTR [rcx+rsi]
|
||||
movdqu XMMWORD PTR [rcx+rsi], xmm1
|
||||
paddq xmm0, xmm8
|
||||
movdqu XMMWORD PTR [rax+rsi], xmm0
|
||||
|
||||
movq rcx, xmm10
|
||||
and ecx, 2097136
|
||||
|
||||
movdqa xmm0, xmm10
|
||||
pxor xmm0, xmm6
|
||||
movdqu XMMWORD PTR [r8], xmm0
|
||||
mov r12, QWORD PTR [rcx+rsi]
|
||||
|
||||
mov r9, QWORD PTR [rbx+8]
|
||||
|
||||
xor edx, 16
|
||||
mov r8d, edx
|
||||
mov r15d, edx
|
||||
|
||||
movq rdx, xmm5
|
||||
shl rdx, 32
|
||||
movq rax, xmm4
|
||||
xor rdx, rax
|
||||
xor r10, rdx
|
||||
mov rax, r10
|
||||
mul r11
|
||||
mov r11d, r8d
|
||||
xor r11d, 48
|
||||
movq xmm0, rdx
|
||||
xor rdx, [r11+r13]
|
||||
movq xmm1, rax
|
||||
xor rax, [r11+r13+8]
|
||||
punpcklqdq xmm0, xmm1
|
||||
|
||||
pxor xmm0, XMMWORD PTR [r8+r13]
|
||||
xor r8d, 32
|
||||
movdqu xmm1, XMMWORD PTR [r11+r13]
|
||||
paddq xmm0, xmm7
|
||||
paddq xmm1, xmm2
|
||||
movdqu XMMWORD PTR [r11+r13], xmm0
|
||||
movdqu xmm0, XMMWORD PTR [r8+r13]
|
||||
movdqu XMMWORD PTR [r8+r13], xmm1
|
||||
paddq xmm0, xmm3
|
||||
movdqu XMMWORD PTR [r15+r13], xmm0
|
||||
|
||||
mov r11, QWORD PTR [rsp+256]
|
||||
add r11, rdx
|
||||
mov rdx, QWORD PTR [rsp+264]
|
||||
add rdx, rax
|
||||
mov QWORD PTR [rbx], r11
|
||||
xor r11, r10
|
||||
mov QWORD PTR [rbx+8], rdx
|
||||
xor rdx, r9
|
||||
mov QWORD PTR [rsp+256], r11
|
||||
and r11d, 2097136
|
||||
mov QWORD PTR [rsp+264], rdx
|
||||
mov QWORD PTR [rsp+8], r11
|
||||
lea r15, QWORD PTR [r11+r13]
|
||||
movdqu xmm15, XMMWORD PTR [r11+r13]
|
||||
lea r13, QWORD PTR [rsi+rcx]
|
||||
movdqa xmm0, xmm5
|
||||
psrldq xmm0, 8
|
||||
movaps xmm2, xmm13
|
||||
movq r10, xmm0
|
||||
psllq xmm5, 1
|
||||
shl r10, 32
|
||||
movdqa xmm0, xmm9
|
||||
psrldq xmm0, 8
|
||||
movdqa xmm1, xmm10
|
||||
movq r11, xmm0
|
||||
psrldq xmm1, 8
|
||||
movq r8, xmm1
|
||||
psrldq xmm4, 8
|
||||
movaps xmm0, xmm13
|
||||
movq rax, xmm4
|
||||
xor r10, rax
|
||||
movaps xmm1, xmm13
|
||||
xor r10, r12
|
||||
lea rax, QWORD PTR [r11+1]
|
||||
shr rax, 1
|
||||
movdqa xmm3, xmm9
|
||||
punpcklqdq xmm3, xmm10
|
||||
paddq xmm5, xmm3
|
||||
movq rdx, xmm5
|
||||
psrldq xmm5, 8
|
||||
cvtsi2sd xmm2, rax
|
||||
or edx, -2147483647
|
||||
lea rax, QWORD PTR [r8+1]
|
||||
shr rax, 1
|
||||
movq r9, xmm5
|
||||
cvtsi2sd xmm0, rax
|
||||
or r9d, -2147483647
|
||||
cvtsi2sd xmm1, rdx
|
||||
unpcklpd xmm2, xmm0
|
||||
movaps xmm0, xmm13
|
||||
cvtsi2sd xmm0, r9
|
||||
unpcklpd xmm1, xmm0
|
||||
divpd xmm2, xmm1
|
||||
paddq xmm2, xmm14
|
||||
cvttsd2si rax, xmm2
|
||||
psrldq xmm2, 8
|
||||
mov rbx, rax
|
||||
imul rax, rdx
|
||||
sub r11, rax
|
||||
js div_fix_1_sandybridge
|
||||
div_fix_1_ret_sandybridge:
|
||||
|
||||
cvttsd2si rdx, xmm2
|
||||
mov rax, rdx
|
||||
imul rax, r9
|
||||
movd xmm2, r11d
|
||||
movd xmm4, ebx
|
||||
sub r8, rax
|
||||
js div_fix_2_sandybridge
|
||||
div_fix_2_ret_sandybridge:
|
||||
|
||||
movd xmm1, r8d
|
||||
movd xmm0, edx
|
||||
punpckldq xmm2, xmm1
|
||||
punpckldq xmm4, xmm0
|
||||
punpckldq xmm4, xmm2
|
||||
paddq xmm3, xmm4
|
||||
movdqa xmm0, xmm3
|
||||
psrlq xmm0, 12
|
||||
paddq xmm0, xmm12
|
||||
sqrtpd xmm1, xmm0
|
||||
movq r9, xmm1
|
||||
movdqa xmm5, xmm1
|
||||
psrlq xmm5, 19
|
||||
test r9, 524287
|
||||
je sqrt_fix_1_sandybridge
|
||||
sqrt_fix_1_ret_sandybridge:
|
||||
|
||||
movq r9, xmm10
|
||||
psrldq xmm1, 8
|
||||
movq r8, xmm1
|
||||
test r8, 524287
|
||||
je sqrt_fix_2_sandybridge
|
||||
sqrt_fix_2_ret_sandybridge:
|
||||
|
||||
mov r12d, ecx
|
||||
mov r8d, ecx
|
||||
xor r12d, 16
|
||||
xor r8d, 32
|
||||
xor ecx, 48
|
||||
mov rax, r10
|
||||
mul r9
|
||||
movq xmm0, rax
|
||||
movq xmm3, rdx
|
||||
punpcklqdq xmm3, xmm0
|
||||
|
||||
movdqu xmm0, XMMWORD PTR [r12+rsi]
|
||||
pxor xmm0, xmm3
|
||||
movdqu xmm1, XMMWORD PTR [r8+rsi]
|
||||
xor rdx, [r8+rsi]
|
||||
xor rax, [r8+rsi+8]
|
||||
movdqu xmm3, XMMWORD PTR [rcx+rsi]
|
||||
paddq xmm0, xmm6
|
||||
paddq xmm1, xmm11
|
||||
paddq xmm3, xmm8
|
||||
movdqu XMMWORD PTR [r8+rsi], xmm0
|
||||
movdqu XMMWORD PTR [rcx+rsi], xmm1
|
||||
movdqu XMMWORD PTR [r12+rsi], xmm3
|
||||
|
||||
add rdi, rdx
|
||||
mov QWORD PTR [r13], rdi
|
||||
xor rdi, r10
|
||||
mov ecx, edi
|
||||
and ecx, 2097136
|
||||
lea r8, QWORD PTR [rcx+rsi]
|
||||
|
||||
mov rdx, QWORD PTR [r13+8]
|
||||
add rbp, rax
|
||||
mov QWORD PTR [r13+8], rbp
|
||||
movdqu xmm11, XMMWORD PTR [rcx+rsi]
|
||||
xor rbp, rdx
|
||||
mov r13, QWORD PTR [rsp]
|
||||
movdqa xmm3, xmm7
|
||||
mov rdx, QWORD PTR [rsp+8]
|
||||
movdqa xmm8, xmm6
|
||||
mov r10, QWORD PTR [rsp+256]
|
||||
movdqa xmm7, xmm9
|
||||
mov r11, QWORD PTR [rsp+264]
|
||||
movdqa xmm6, xmm10
|
||||
mov r9, r15
|
||||
dec r14d
|
||||
jne main_loop_double_sandybridge
|
||||
|
||||
ldmxcsr DWORD PTR [rsp+272]
|
||||
movaps xmm13, XMMWORD PTR [rsp+48]
|
||||
lea r11, QWORD PTR [rsp+184]
|
||||
movaps xmm6, XMMWORD PTR [r11-24]
|
||||
movaps xmm7, XMMWORD PTR [r11-40]
|
||||
movaps xmm8, XMMWORD PTR [r11-56]
|
||||
movaps xmm9, XMMWORD PTR [r11-72]
|
||||
movaps xmm10, XMMWORD PTR [r11-88]
|
||||
movaps xmm11, XMMWORD PTR [r11-104]
|
||||
movaps xmm12, XMMWORD PTR [r11-120]
|
||||
movaps xmm14, XMMWORD PTR [rsp+32]
|
||||
movaps xmm15, XMMWORD PTR [rsp+16]
|
||||
mov rsp, r11
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbp
|
||||
pop rbx
|
||||
jmp cnv2_double_mainloop_asm_sandybridge_endp
|
||||
|
||||
div_fix_1_sandybridge:
|
||||
dec rbx
|
||||
add r11, rdx
|
||||
jmp div_fix_1_ret_sandybridge
|
||||
|
||||
div_fix_2_sandybridge:
|
||||
dec rdx
|
||||
add r8, r9
|
||||
jmp div_fix_2_ret_sandybridge
|
||||
|
||||
sqrt_fix_1_sandybridge:
|
||||
movq r8, xmm3
|
||||
movdqa xmm0, xmm5
|
||||
psrldq xmm0, 8
|
||||
dec r9
|
||||
mov r11d, -1022
|
||||
shl r11, 32
|
||||
mov rax, r9
|
||||
shr r9, 19
|
||||
shr rax, 20
|
||||
mov rdx, r9
|
||||
sub rdx, rax
|
||||
lea rdx, [rdx+r11+1]
|
||||
add rax, r11
|
||||
imul rdx, rax
|
||||
sub rdx, r8
|
||||
adc r9, 0
|
||||
movq xmm5, r9
|
||||
punpcklqdq xmm5, xmm0
|
||||
jmp sqrt_fix_1_ret_sandybridge
|
||||
|
||||
sqrt_fix_2_sandybridge:
|
||||
psrldq xmm3, 8
|
||||
movq r11, xmm3
|
||||
dec r8
|
||||
mov ebx, -1022
|
||||
shl rbx, 32
|
||||
mov rax, r8
|
||||
shr r8, 19
|
||||
shr rax, 20
|
||||
mov rdx, r8
|
||||
sub rdx, rax
|
||||
lea rdx, [rdx+rbx+1]
|
||||
add rax, rbx
|
||||
imul rdx, rax
|
||||
sub rdx, r11
|
||||
adc r8, 0
|
||||
movq xmm0, r8
|
||||
punpcklqdq xmm5, xmm0
|
||||
jmp sqrt_fix_2_ret_sandybridge
|
||||
|
||||
cnv2_double_mainloop_asm_sandybridge_endp:
|
180
crypto/asm/cn2/cnv2_main_loop_bulldozer.inc
Normal file
180
crypto/asm/cn2/cnv2_main_loop_bulldozer.inc
Normal file
|
@ -0,0 +1,180 @@
|
|||
mov QWORD PTR [rsp+16], rbx
|
||||
mov QWORD PTR [rsp+24], rbp
|
||||
mov QWORD PTR [rsp+32], rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 64
|
||||
|
||||
stmxcsr DWORD PTR [rsp]
|
||||
mov DWORD PTR [rsp+4], 24448
|
||||
ldmxcsr DWORD PTR [rsp+4]
|
||||
|
||||
mov rax, QWORD PTR [rcx+48]
|
||||
mov r9, rcx
|
||||
xor rax, QWORD PTR [rcx+16]
|
||||
mov ebp, 524288
|
||||
mov r8, QWORD PTR [rcx+32]
|
||||
xor r8, QWORD PTR [rcx]
|
||||
mov r11, QWORD PTR [rcx+40]
|
||||
mov r10, r8
|
||||
mov rdx, QWORD PTR [rcx+56]
|
||||
movq xmm3, rax
|
||||
xor rdx, QWORD PTR [rcx+24]
|
||||
xor r11, QWORD PTR [rcx+8]
|
||||
mov rbx, QWORD PTR [rcx+224]
|
||||
mov rax, QWORD PTR [r9+80]
|
||||
xor rax, QWORD PTR [r9+64]
|
||||
movq xmm0, rdx
|
||||
mov rcx, QWORD PTR [rcx+88]
|
||||
xor rcx, QWORD PTR [r9+72]
|
||||
mov rdi, QWORD PTR [r9+104]
|
||||
and r10d, 2097136
|
||||
movaps XMMWORD PTR [rsp+48], xmm6
|
||||
movq xmm4, rax
|
||||
movaps XMMWORD PTR [rsp+32], xmm7
|
||||
movaps XMMWORD PTR [rsp+16], xmm8
|
||||
xorps xmm8, xmm8
|
||||
mov ax, 1023
|
||||
shl rax, 52
|
||||
movq xmm7, rax
|
||||
mov r15, QWORD PTR [r9+96]
|
||||
punpcklqdq xmm3, xmm0
|
||||
movq xmm0, rcx
|
||||
punpcklqdq xmm4, xmm0
|
||||
|
||||
ALIGN(64)
|
||||
cnv2_main_loop_bulldozer:
|
||||
movdqa xmm5, XMMWORD PTR [r10+rbx]
|
||||
movq xmm6, r8
|
||||
pinsrq xmm6, r11, 1
|
||||
lea rdx, QWORD PTR [r10+rbx]
|
||||
lea r9, QWORD PTR [rdi+rdi]
|
||||
shl rdi, 32
|
||||
|
||||
mov ecx, r10d
|
||||
mov eax, r10d
|
||||
xor ecx, 16
|
||||
xor eax, 32
|
||||
xor r10d, 48
|
||||
aesenc xmm5, xmm6
|
||||
movdqa xmm2, XMMWORD PTR [rcx+rbx]
|
||||
movdqa xmm1, XMMWORD PTR [rax+rbx]
|
||||
movdqa xmm0, XMMWORD PTR [r10+rbx]
|
||||
paddq xmm2, xmm3
|
||||
paddq xmm1, xmm6
|
||||
paddq xmm0, xmm4
|
||||
movdqa XMMWORD PTR [rcx+rbx], xmm0
|
||||
movdqa XMMWORD PTR [rax+rbx], xmm2
|
||||
movdqa XMMWORD PTR [r10+rbx], xmm1
|
||||
|
||||
movaps xmm1, xmm8
|
||||
mov rsi, r15
|
||||
xor rsi, rdi
|
||||
|
||||
mov edi, 1023
|
||||
shl rdi, 52
|
||||
|
||||
movq r14, xmm5
|
||||
pextrq rax, xmm5, 1
|
||||
|
||||
movdqa xmm0, xmm5
|
||||
pxor xmm0, xmm3
|
||||
mov r10, r14
|
||||
and r10d, 2097136
|
||||
movdqa XMMWORD PTR [rdx], xmm0
|
||||
xor rsi, QWORD PTR [r10+rbx]
|
||||
lea r12, QWORD PTR [r10+rbx]
|
||||
mov r13, QWORD PTR [r10+rbx+8]
|
||||
|
||||
add r9d, r14d
|
||||
or r9d, -2147483647
|
||||
xor edx, edx
|
||||
div r9
|
||||
mov eax, eax
|
||||
shl rdx, 32
|
||||
lea r15, [rax+rdx]
|
||||
lea rax, [r14+r15]
|
||||
shr rax, 12
|
||||
add rax, rdi
|
||||
movq xmm0, rax
|
||||
sqrtsd xmm1, xmm0
|
||||
movq rdi, xmm1
|
||||
test rdi, 524287
|
||||
je sqrt_fixup_bulldozer
|
||||
shr rdi, 19
|
||||
|
||||
sqrt_fixup_bulldozer_ret:
|
||||
mov rax, rsi
|
||||
mul r14
|
||||
movq xmm1, rax
|
||||
movq xmm0, rdx
|
||||
punpcklqdq xmm0, xmm1
|
||||
|
||||
mov r9d, r10d
|
||||
mov ecx, r10d
|
||||
xor r9d, 16
|
||||
xor ecx, 32
|
||||
xor r10d, 48
|
||||
movdqa xmm1, XMMWORD PTR [rcx+rbx]
|
||||
xor rdx, [rcx+rbx]
|
||||
xor rax, [rcx+rbx+8]
|
||||
movdqa xmm2, XMMWORD PTR [r9+rbx]
|
||||
pxor xmm2, xmm0
|
||||
paddq xmm4, XMMWORD PTR [r10+rbx]
|
||||
paddq xmm2, xmm3
|
||||
paddq xmm1, xmm6
|
||||
movdqa XMMWORD PTR [r9+rbx], xmm4
|
||||
movdqa XMMWORD PTR [rcx+rbx], xmm2
|
||||
movdqa XMMWORD PTR [r10+rbx], xmm1
|
||||
|
||||
movdqa xmm4, xmm3
|
||||
add r8, rdx
|
||||
add r11, rax
|
||||
mov QWORD PTR [r12], r8
|
||||
xor r8, rsi
|
||||
mov QWORD PTR [r12+8], r11
|
||||
mov r10, r8
|
||||
xor r11, r13
|
||||
and r10d, 2097136
|
||||
movdqa xmm3, xmm5
|
||||
dec ebp
|
||||
jne cnv2_main_loop_bulldozer
|
||||
|
||||
ldmxcsr DWORD PTR [rsp]
|
||||
movaps xmm6, XMMWORD PTR [rsp+48]
|
||||
lea r11, QWORD PTR [rsp+64]
|
||||
mov rbx, QWORD PTR [r11+56]
|
||||
mov rbp, QWORD PTR [r11+64]
|
||||
mov rsi, QWORD PTR [r11+72]
|
||||
movaps xmm8, XMMWORD PTR [r11-48]
|
||||
movaps xmm7, XMMWORD PTR [rsp+32]
|
||||
mov rsp, r11
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
||||
jmp cnv2_main_loop_bulldozer_endp
|
||||
|
||||
sqrt_fixup_bulldozer:
|
||||
movq r9, xmm5
|
||||
add r9, r15
|
||||
dec rdi
|
||||
mov edx, -1022
|
||||
shl rdx, 32
|
||||
mov rax, rdi
|
||||
shr rdi, 19
|
||||
shr rax, 20
|
||||
mov rcx, rdi
|
||||
sub rcx, rax
|
||||
lea rcx, [rcx+rdx+1]
|
||||
add rax, rdx
|
||||
imul rcx, rax
|
||||
sub rcx, r9
|
||||
adc rdi, 0
|
||||
jmp sqrt_fixup_bulldozer_ret
|
||||
|
||||
cnv2_main_loop_bulldozer_endp:
|
186
crypto/asm/cn2/cnv2_main_loop_ivybridge.inc
Normal file
186
crypto/asm/cn2/cnv2_main_loop_ivybridge.inc
Normal file
|
@ -0,0 +1,186 @@
|
|||
mov QWORD PTR [rsp+24], rbx
|
||||
push rbp
|
||||
push rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 80
|
||||
|
||||
stmxcsr DWORD PTR [rsp]
|
||||
mov DWORD PTR [rsp+4], 24448
|
||||
ldmxcsr DWORD PTR [rsp+4]
|
||||
|
||||
mov rax, QWORD PTR [rcx+48]
|
||||
mov r9, rcx
|
||||
xor rax, QWORD PTR [rcx+16]
|
||||
mov esi, 524288
|
||||
mov r8, QWORD PTR [rcx+32]
|
||||
mov r13d, -2147483647
|
||||
xor r8, QWORD PTR [rcx]
|
||||
mov r11, QWORD PTR [rcx+40]
|
||||
mov r10, r8
|
||||
mov rdx, QWORD PTR [rcx+56]
|
||||
movq xmm4, rax
|
||||
xor rdx, QWORD PTR [rcx+24]
|
||||
xor r11, QWORD PTR [rcx+8]
|
||||
mov rbx, QWORD PTR [rcx+224]
|
||||
mov rax, QWORD PTR [r9+80]
|
||||
xor rax, QWORD PTR [r9+64]
|
||||
movq xmm0, rdx
|
||||
mov rcx, QWORD PTR [rcx+88]
|
||||
xor rcx, QWORD PTR [r9+72]
|
||||
movq xmm3, QWORD PTR [r9+104]
|
||||
movaps XMMWORD PTR [rsp+64], xmm6
|
||||
movaps XMMWORD PTR [rsp+48], xmm7
|
||||
movaps XMMWORD PTR [rsp+32], xmm8
|
||||
and r10d, 2097136
|
||||
movq xmm5, rax
|
||||
|
||||
xor eax, eax
|
||||
mov QWORD PTR [rsp+16], rax
|
||||
|
||||
mov ax, 1023
|
||||
shl rax, 52
|
||||
movq xmm8, rax
|
||||
mov r15, QWORD PTR [r9+96]
|
||||
punpcklqdq xmm4, xmm0
|
||||
movq xmm0, rcx
|
||||
punpcklqdq xmm5, xmm0
|
||||
movdqu xmm6, XMMWORD PTR [r10+rbx]
|
||||
|
||||
ALIGN(64)
|
||||
main_loop_ivybridge:
|
||||
lea rdx, QWORD PTR [r10+rbx]
|
||||
mov ecx, r10d
|
||||
mov eax, r10d
|
||||
mov rdi, r15
|
||||
xor ecx, 16
|
||||
xor eax, 32
|
||||
xor r10d, 48
|
||||
movq xmm0, r11
|
||||
movq xmm7, r8
|
||||
punpcklqdq xmm7, xmm0
|
||||
aesenc xmm6, xmm7
|
||||
movq rbp, xmm6
|
||||
mov r9, rbp
|
||||
and r9d, 2097136
|
||||
movdqu xmm2, XMMWORD PTR [rcx+rbx]
|
||||
movdqu xmm1, XMMWORD PTR [rax+rbx]
|
||||
movdqu xmm0, XMMWORD PTR [r10+rbx]
|
||||
paddq xmm1, xmm7
|
||||
paddq xmm0, xmm5
|
||||
paddq xmm2, xmm4
|
||||
movdqu XMMWORD PTR [rcx+rbx], xmm0
|
||||
movdqu XMMWORD PTR [rax+rbx], xmm2
|
||||
movdqu XMMWORD PTR [r10+rbx], xmm1
|
||||
mov r10, r9
|
||||
xor r10d, 32
|
||||
movq rcx, xmm3
|
||||
mov rax, rcx
|
||||
shl rax, 32
|
||||
xor rdi, rax
|
||||
movdqa xmm0, xmm6
|
||||
pxor xmm0, xmm4
|
||||
movdqu XMMWORD PTR [rdx], xmm0
|
||||
xor rdi, QWORD PTR [r9+rbx]
|
||||
lea r14, QWORD PTR [r9+rbx]
|
||||
mov r12, QWORD PTR [r14+8]
|
||||
xor edx, edx
|
||||
lea r9d, DWORD PTR [ecx+ecx]
|
||||
add r9d, ebp
|
||||
movdqa xmm0, xmm6
|
||||
psrldq xmm0, 8
|
||||
or r9d, r13d
|
||||
movq rax, xmm0
|
||||
div r9
|
||||
xorps xmm3, xmm3
|
||||
mov eax, eax
|
||||
shl rdx, 32
|
||||
add rdx, rax
|
||||
lea r9, QWORD PTR [rdx+rbp]
|
||||
mov r15, rdx
|
||||
mov rax, r9
|
||||
shr rax, 12
|
||||
movq xmm0, rax
|
||||
paddq xmm0, xmm8
|
||||
sqrtsd xmm3, xmm0
|
||||
psubq xmm3, XMMWORD PTR [rsp+16]
|
||||
movq rdx, xmm3
|
||||
test edx, 524287
|
||||
je sqrt_fixup_ivybridge
|
||||
psrlq xmm3, 19
|
||||
sqrt_fixup_ivybridge_ret:
|
||||
|
||||
mov ecx, r10d
|
||||
mov rax, rdi
|
||||
mul rbp
|
||||
movq xmm2, rdx
|
||||
xor rdx, [rcx+rbx]
|
||||
add r8, rdx
|
||||
mov QWORD PTR [r14], r8
|
||||
xor r8, rdi
|
||||
mov edi, r8d
|
||||
and edi, 2097136
|
||||
movq xmm0, rax
|
||||
xor rax, [rcx+rbx+8]
|
||||
add r11, rax
|
||||
mov QWORD PTR [r14+8], r11
|
||||
punpcklqdq xmm2, xmm0
|
||||
|
||||
mov r9d, r10d
|
||||
xor r9d, 48
|
||||
xor r10d, 16
|
||||
pxor xmm2, XMMWORD PTR [r9+rbx]
|
||||
movdqu xmm0, XMMWORD PTR [r10+rbx]
|
||||
paddq xmm0, xmm5
|
||||
movdqu xmm1, XMMWORD PTR [rcx+rbx]
|
||||
paddq xmm2, xmm4
|
||||
paddq xmm1, xmm7
|
||||
movdqa xmm5, xmm4
|
||||
movdqu XMMWORD PTR [r9+rbx], xmm0
|
||||
movdqa xmm4, xmm6
|
||||
movdqu XMMWORD PTR [rcx+rbx], xmm2
|
||||
movdqu XMMWORD PTR [r10+rbx], xmm1
|
||||
movdqu xmm6, [rdi+rbx]
|
||||
mov r10d, edi
|
||||
xor r11, r12
|
||||
dec rsi
|
||||
jne main_loop_ivybridge
|
||||
|
||||
ldmxcsr DWORD PTR [rsp]
|
||||
mov rbx, QWORD PTR [rsp+160]
|
||||
movaps xmm6, XMMWORD PTR [rsp+64]
|
||||
movaps xmm7, XMMWORD PTR [rsp+48]
|
||||
movaps xmm8, XMMWORD PTR [rsp+32]
|
||||
add rsp, 80
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbp
|
||||
jmp cnv2_main_loop_ivybridge_endp
|
||||
|
||||
sqrt_fixup_ivybridge:
|
||||
dec rdx
|
||||
mov r13d, -1022
|
||||
shl r13, 32
|
||||
mov rax, rdx
|
||||
shr rdx, 19
|
||||
shr rax, 20
|
||||
mov rcx, rdx
|
||||
sub rcx, rax
|
||||
add rax, r13
|
||||
not r13
|
||||
sub rcx, r13
|
||||
mov r13d, -2147483647
|
||||
imul rcx, rax
|
||||
sub rcx, r9
|
||||
adc rdx, 0
|
||||
movq xmm3, rdx
|
||||
jmp sqrt_fixup_ivybridge_ret
|
||||
|
||||
cnv2_main_loop_ivybridge_endp:
|
179
crypto/asm/cn2/cnv2_main_loop_ryzen.inc
Normal file
179
crypto/asm/cn2/cnv2_main_loop_ryzen.inc
Normal file
|
@ -0,0 +1,179 @@
|
|||
mov QWORD PTR [rsp+16], rbx
|
||||
mov QWORD PTR [rsp+24], rbp
|
||||
mov QWORD PTR [rsp+32], rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 64
|
||||
|
||||
stmxcsr DWORD PTR [rsp]
|
||||
mov DWORD PTR [rsp+4], 24448
|
||||
ldmxcsr DWORD PTR [rsp+4]
|
||||
|
||||
mov rax, QWORD PTR [rcx+48]
|
||||
mov r9, rcx
|
||||
xor rax, QWORD PTR [rcx+16]
|
||||
mov ebp, 524288
|
||||
mov r8, QWORD PTR [rcx+32]
|
||||
xor r8, QWORD PTR [rcx]
|
||||
mov r11, QWORD PTR [rcx+40]
|
||||
mov r10, r8
|
||||
mov rdx, QWORD PTR [rcx+56]
|
||||
movq xmm3, rax
|
||||
xor rdx, QWORD PTR [rcx+24]
|
||||
xor r11, QWORD PTR [rcx+8]
|
||||
mov rbx, QWORD PTR [rcx+224]
|
||||
mov rax, QWORD PTR [r9+80]
|
||||
xor rax, QWORD PTR [r9+64]
|
||||
movq xmm0, rdx
|
||||
mov rcx, QWORD PTR [rcx+88]
|
||||
xor rcx, QWORD PTR [r9+72]
|
||||
mov rdi, QWORD PTR [r9+104]
|
||||
and r10d, 2097136
|
||||
movaps XMMWORD PTR [rsp+48], xmm6
|
||||
movq xmm4, rax
|
||||
movaps XMMWORD PTR [rsp+32], xmm7
|
||||
movaps XMMWORD PTR [rsp+16], xmm8
|
||||
xorps xmm8, xmm8
|
||||
mov ax, 1023
|
||||
shl rax, 52
|
||||
movq xmm7, rax
|
||||
mov r15, QWORD PTR [r9+96]
|
||||
punpcklqdq xmm3, xmm0
|
||||
movq xmm0, rcx
|
||||
punpcklqdq xmm4, xmm0
|
||||
|
||||
ALIGN(64)
|
||||
main_loop_ryzen:
|
||||
movdqa xmm5, XMMWORD PTR [r10+rbx]
|
||||
movq xmm0, r11
|
||||
movq xmm6, r8
|
||||
punpcklqdq xmm6, xmm0
|
||||
lea rdx, QWORD PTR [r10+rbx]
|
||||
lea r9, QWORD PTR [rdi+rdi]
|
||||
shl rdi, 32
|
||||
|
||||
mov ecx, r10d
|
||||
mov eax, r10d
|
||||
xor ecx, 16
|
||||
xor eax, 32
|
||||
xor r10d, 48
|
||||
aesenc xmm5, xmm6
|
||||
movdqa xmm2, XMMWORD PTR [rcx+rbx]
|
||||
movdqa xmm1, XMMWORD PTR [rax+rbx]
|
||||
movdqa xmm0, XMMWORD PTR [r10+rbx]
|
||||
paddq xmm2, xmm3
|
||||
paddq xmm1, xmm6
|
||||
paddq xmm0, xmm4
|
||||
movdqa XMMWORD PTR [rcx+rbx], xmm0
|
||||
movdqa XMMWORD PTR [rax+rbx], xmm2
|
||||
movdqa XMMWORD PTR [r10+rbx], xmm1
|
||||
|
||||
movaps xmm1, xmm8
|
||||
mov rsi, r15
|
||||
xor rsi, rdi
|
||||
movq r14, xmm5
|
||||
movdqa xmm0, xmm5
|
||||
pxor xmm0, xmm3
|
||||
mov r10, r14
|
||||
and r10d, 2097136
|
||||
movdqa XMMWORD PTR [rdx], xmm0
|
||||
xor rsi, QWORD PTR [r10+rbx]
|
||||
lea r12, QWORD PTR [r10+rbx]
|
||||
mov r13, QWORD PTR [r10+rbx+8]
|
||||
|
||||
add r9d, r14d
|
||||
or r9d, -2147483647
|
||||
xor edx, edx
|
||||
movdqa xmm0, xmm5
|
||||
psrldq xmm0, 8
|
||||
movq rax, xmm0
|
||||
|
||||
div r9
|
||||
movq xmm0, rax
|
||||
movq xmm1, rdx
|
||||
punpckldq xmm0, xmm1
|
||||
movq r15, xmm0
|
||||
paddq xmm0, xmm5
|
||||
movdqa xmm2, xmm0
|
||||
psrlq xmm0, 12
|
||||
paddq xmm0, xmm7
|
||||
sqrtsd xmm1, xmm0
|
||||
movq rdi, xmm1
|
||||
test rdi, 524287
|
||||
je sqrt_fixup_ryzen
|
||||
shr rdi, 19
|
||||
|
||||
sqrt_fixup_ryzen_ret:
|
||||
mov rax, rsi
|
||||
mul r14
|
||||
movq xmm1, rax
|
||||
movq xmm0, rdx
|
||||
punpcklqdq xmm0, xmm1
|
||||
|
||||
mov r9d, r10d
|
||||
mov ecx, r10d
|
||||
xor r9d, 16
|
||||
xor ecx, 32
|
||||
xor r10d, 48
|
||||
movdqa xmm1, XMMWORD PTR [rcx+rbx]
|
||||
xor rdx, [rcx+rbx]
|
||||
xor rax, [rcx+rbx+8]
|
||||
movdqa xmm2, XMMWORD PTR [r9+rbx]
|
||||
pxor xmm2, xmm0
|
||||
paddq xmm4, XMMWORD PTR [r10+rbx]
|
||||
paddq xmm2, xmm3
|
||||
paddq xmm1, xmm6
|
||||
movdqa XMMWORD PTR [r9+rbx], xmm4
|
||||
movdqa XMMWORD PTR [rcx+rbx], xmm2
|
||||
movdqa XMMWORD PTR [r10+rbx], xmm1
|
||||
|
||||
movdqa xmm4, xmm3
|
||||
add r8, rdx
|
||||
add r11, rax
|
||||
mov QWORD PTR [r12], r8
|
||||
xor r8, rsi
|
||||
mov QWORD PTR [r12+8], r11
|
||||
mov r10, r8
|
||||
xor r11, r13
|
||||
and r10d, 2097136
|
||||
movdqa xmm3, xmm5
|
||||
dec ebp
|
||||
jne main_loop_ryzen
|
||||
|
||||
ldmxcsr DWORD PTR [rsp]
|
||||
movaps xmm6, XMMWORD PTR [rsp+48]
|
||||
lea r11, QWORD PTR [rsp+64]
|
||||
mov rbx, QWORD PTR [r11+56]
|
||||
mov rbp, QWORD PTR [r11+64]
|
||||
mov rsi, QWORD PTR [r11+72]
|
||||
movaps xmm8, XMMWORD PTR [r11-48]
|
||||
movaps xmm7, XMMWORD PTR [rsp+32]
|
||||
mov rsp, r11
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
||||
jmp cnv2_main_loop_ryzen_endp
|
||||
|
||||
sqrt_fixup_ryzen:
|
||||
movq r9, xmm2
|
||||
dec rdi
|
||||
mov edx, -1022
|
||||
shl rdx, 32
|
||||
mov rax, rdi
|
||||
shr rdi, 19
|
||||
shr rax, 20
|
||||
mov rcx, rdi
|
||||
sub rcx, rax
|
||||
lea rcx, [rcx+rdx+1]
|
||||
add rax, rdx
|
||||
imul rcx, rax
|
||||
sub rcx, r9
|
||||
adc rdi, 0
|
||||
jmp sqrt_fixup_ryzen_ret
|
||||
|
||||
cnv2_main_loop_ryzen_endp:
|
54
crypto/asm/cn_main_loop.S
Normal file
54
crypto/asm/cn_main_loop.S
Normal file
|
@ -0,0 +1,54 @@
|
|||
#ifdef __APPLE__
|
||||
# define ALIGN(x) .align 6
|
||||
#else
|
||||
# define ALIGN(x) .align 64
|
||||
#endif
|
||||
.intel_syntax noprefix
|
||||
#ifdef __APPLE__
|
||||
# define FN_PREFIX(fn) _ ## fn
|
||||
.text
|
||||
#else
|
||||
# define FN_PREFIX(fn) fn
|
||||
.section .text
|
||||
#endif
|
||||
.global FN_PREFIX(cnv2_mainloop_ivybridge_asm)
|
||||
.global FN_PREFIX(cnv2_mainloop_ryzen_asm)
|
||||
.global FN_PREFIX(cnv2_mainloop_bulldozer_asm)
|
||||
.global FN_PREFIX(cnv2_double_mainloop_sandybridge_asm)
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(cnv2_mainloop_ivybridge_asm):
|
||||
sub rsp, 48
|
||||
mov rcx, rdi
|
||||
#include "cn2/cnv2_main_loop_ivybridge.inc"
|
||||
add rsp, 48
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(cnv2_mainloop_ryzen_asm):
|
||||
sub rsp, 48
|
||||
mov rcx, rdi
|
||||
#include "cn2/cnv2_main_loop_ryzen.inc"
|
||||
add rsp, 48
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(cnv2_mainloop_bulldozer_asm):
|
||||
sub rsp, 48
|
||||
mov rcx, rdi
|
||||
#include "cn2/cnv2_main_loop_bulldozer.inc"
|
||||
add rsp, 48
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(cnv2_double_mainloop_sandybridge_asm):
|
||||
sub rsp, 48
|
||||
mov rcx, rdi
|
||||
mov rdx, rsi
|
||||
#include "cn2/cnv2_double_main_loop_sandybridge.inc"
|
||||
add rsp, 48
|
||||
ret 0
|
||||
mov eax, 3735929054
|
31
crypto/asm/win64/cn_main_loop.S
Normal file
31
crypto/asm/win64/cn_main_loop.S
Normal file
|
@ -0,0 +1,31 @@
|
|||
#define ALIGN(x) .align 64
|
||||
.intel_syntax noprefix
|
||||
.section .text
|
||||
.global cnv2_mainloop_ivybridge_asm
|
||||
.global cnv2_mainloop_ryzen_asm
|
||||
.global cnv2_mainloop_bulldozer_asm
|
||||
.global cnv2_double_mainloop_sandybridge_asm
|
||||
|
||||
ALIGN(64)
|
||||
cnv2_mainloop_ivybridge_asm:
|
||||
#include "../cn2/cnv2_main_loop_ivybridge.inc"
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
|
||||
ALIGN(64)
|
||||
cnv2_mainloop_ryzen_asm:
|
||||
#include "../cn2/cnv2_main_loop_ryzen.inc"
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
|
||||
ALIGN(64)
|
||||
cnv2_mainloop_bulldozer_asm:
|
||||
#include "../cn2/cnv2_main_loop_bulldozer.inc"
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
|
||||
ALIGN(64)
|
||||
cnv2_double_mainloop_sandybridge_asm:
|
||||
#include "../cn2/cnv2_double_main_loop_sandybridge.inc"
|
||||
ret 0
|
||||
mov eax, 3735929054
|
|
@ -1,212 +0,0 @@
|
|||
/*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* Additional permission under GNU GPL version 3 section 7
|
||||
*
|
||||
* If you modify this Program, or any covered work, by linking or combining
|
||||
* it with OpenSSL (or a modified version of that library), containing parts
|
||||
* covered by the terms of OpenSSL License and SSLeay License, the licensors
|
||||
* of this Program grant you additional permission to convey the resulting work.
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
* The orginal author of this AES implementation is Karl Malbrain.
|
||||
*/
|
||||
|
||||
#ifdef __GNUC__
|
||||
#include <x86intrin.h>
|
||||
#else
|
||||
#include <intrin.h>
|
||||
#endif // __GNUC__
|
||||
|
||||
#include <inttypes.h>
|
||||
|
||||
#define TABLE_ALIGN 32
|
||||
#define WPOLY 0x011b
|
||||
#define N_COLS 4
|
||||
#define AES_BLOCK_SIZE 16
|
||||
#define RC_LENGTH (5 * (AES_BLOCK_SIZE / 4 - 2))
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#define ALIGN __declspec(align(TABLE_ALIGN))
|
||||
#elif defined(__GNUC__)
|
||||
#define ALIGN __attribute__ ((aligned(16)))
|
||||
#else
|
||||
#define ALIGN
|
||||
#endif
|
||||
|
||||
#define rf1(r,c) (r)
|
||||
#define word_in(x,c) (*((uint32_t*)(x)+(c)))
|
||||
#define word_out(x,c,v) (*((uint32_t*)(x)+(c)) = (v))
|
||||
|
||||
#define s(x,c) x[c]
|
||||
#define si(y,x,c) (s(y,c) = word_in(x, c))
|
||||
#define so(y,x,c) word_out(y, c, s(x,c))
|
||||
#define state_in(y,x) si(y,x,0); si(y,x,1); si(y,x,2); si(y,x,3)
|
||||
#define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3)
|
||||
#define round(y,x,k) \
|
||||
y[0] = (k)[0] ^ (t_fn[0][x[0] & 0xff] ^ t_fn[1][(x[1] >> 8) & 0xff] ^ t_fn[2][(x[2] >> 16) & 0xff] ^ t_fn[3][x[3] >> 24]); \
|
||||
y[1] = (k)[1] ^ (t_fn[0][x[1] & 0xff] ^ t_fn[1][(x[2] >> 8) & 0xff] ^ t_fn[2][(x[3] >> 16) & 0xff] ^ t_fn[3][x[0] >> 24]); \
|
||||
y[2] = (k)[2] ^ (t_fn[0][x[2] & 0xff] ^ t_fn[1][(x[3] >> 8) & 0xff] ^ t_fn[2][(x[0] >> 16) & 0xff] ^ t_fn[3][x[1] >> 24]); \
|
||||
y[3] = (k)[3] ^ (t_fn[0][x[3] & 0xff] ^ t_fn[1][(x[0] >> 8) & 0xff] ^ t_fn[2][(x[1] >> 16) & 0xff] ^ t_fn[3][x[2] >> 24]);
|
||||
#define to_byte(x) ((x) & 0xff)
|
||||
#define bval(x,n) to_byte((x) >> (8 * (n)))
|
||||
|
||||
#define fwd_var(x,r,c)\
|
||||
( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\
|
||||
: r == 1 ? ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0))\
|
||||
: r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\
|
||||
: ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2)))
|
||||
|
||||
#define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(f,n),fwd_var,rf1,c))
|
||||
|
||||
#define sb_data(w) {\
|
||||
w(0x63), w(0x7c), w(0x77), w(0x7b), w(0xf2), w(0x6b), w(0x6f), w(0xc5),\
|
||||
w(0x30), w(0x01), w(0x67), w(0x2b), w(0xfe), w(0xd7), w(0xab), w(0x76),\
|
||||
w(0xca), w(0x82), w(0xc9), w(0x7d), w(0xfa), w(0x59), w(0x47), w(0xf0),\
|
||||
w(0xad), w(0xd4), w(0xa2), w(0xaf), w(0x9c), w(0xa4), w(0x72), w(0xc0),\
|
||||
w(0xb7), w(0xfd), w(0x93), w(0x26), w(0x36), w(0x3f), w(0xf7), w(0xcc),\
|
||||
w(0x34), w(0xa5), w(0xe5), w(0xf1), w(0x71), w(0xd8), w(0x31), w(0x15),\
|
||||
w(0x04), w(0xc7), w(0x23), w(0xc3), w(0x18), w(0x96), w(0x05), w(0x9a),\
|
||||
w(0x07), w(0x12), w(0x80), w(0xe2), w(0xeb), w(0x27), w(0xb2), w(0x75),\
|
||||
w(0x09), w(0x83), w(0x2c), w(0x1a), w(0x1b), w(0x6e), w(0x5a), w(0xa0),\
|
||||
w(0x52), w(0x3b), w(0xd6), w(0xb3), w(0x29), w(0xe3), w(0x2f), w(0x84),\
|
||||
w(0x53), w(0xd1), w(0x00), w(0xed), w(0x20), w(0xfc), w(0xb1), w(0x5b),\
|
||||
w(0x6a), w(0xcb), w(0xbe), w(0x39), w(0x4a), w(0x4c), w(0x58), w(0xcf),\
|
||||
w(0xd0), w(0xef), w(0xaa), w(0xfb), w(0x43), w(0x4d), w(0x33), w(0x85),\
|
||||
w(0x45), w(0xf9), w(0x02), w(0x7f), w(0x50), w(0x3c), w(0x9f), w(0xa8),\
|
||||
w(0x51), w(0xa3), w(0x40), w(0x8f), w(0x92), w(0x9d), w(0x38), w(0xf5),\
|
||||
w(0xbc), w(0xb6), w(0xda), w(0x21), w(0x10), w(0xff), w(0xf3), w(0xd2),\
|
||||
w(0xcd), w(0x0c), w(0x13), w(0xec), w(0x5f), w(0x97), w(0x44), w(0x17),\
|
||||
w(0xc4), w(0xa7), w(0x7e), w(0x3d), w(0x64), w(0x5d), w(0x19), w(0x73),\
|
||||
w(0x60), w(0x81), w(0x4f), w(0xdc), w(0x22), w(0x2a), w(0x90), w(0x88),\
|
||||
w(0x46), w(0xee), w(0xb8), w(0x14), w(0xde), w(0x5e), w(0x0b), w(0xdb),\
|
||||
w(0xe0), w(0x32), w(0x3a), w(0x0a), w(0x49), w(0x06), w(0x24), w(0x5c),\
|
||||
w(0xc2), w(0xd3), w(0xac), w(0x62), w(0x91), w(0x95), w(0xe4), w(0x79),\
|
||||
w(0xe7), w(0xc8), w(0x37), w(0x6d), w(0x8d), w(0xd5), w(0x4e), w(0xa9),\
|
||||
w(0x6c), w(0x56), w(0xf4), w(0xea), w(0x65), w(0x7a), w(0xae), w(0x08),\
|
||||
w(0xba), w(0x78), w(0x25), w(0x2e), w(0x1c), w(0xa6), w(0xb4), w(0xc6),\
|
||||
w(0xe8), w(0xdd), w(0x74), w(0x1f), w(0x4b), w(0xbd), w(0x8b), w(0x8a),\
|
||||
w(0x70), w(0x3e), w(0xb5), w(0x66), w(0x48), w(0x03), w(0xf6), w(0x0e),\
|
||||
w(0x61), w(0x35), w(0x57), w(0xb9), w(0x86), w(0xc1), w(0x1d), w(0x9e),\
|
||||
w(0xe1), w(0xf8), w(0x98), w(0x11), w(0x69), w(0xd9), w(0x8e), w(0x94),\
|
||||
w(0x9b), w(0x1e), w(0x87), w(0xe9), w(0xce), w(0x55), w(0x28), w(0xdf),\
|
||||
w(0x8c), w(0xa1), w(0x89), w(0x0d), w(0xbf), w(0xe6), w(0x42), w(0x68),\
|
||||
w(0x41), w(0x99), w(0x2d), w(0x0f), w(0xb0), w(0x54), w(0xbb), w(0x16) }
|
||||
|
||||
#define rc_data(w) {\
|
||||
w(0x01), w(0x02), w(0x04), w(0x08), w(0x10),w(0x20), w(0x40), w(0x80),\
|
||||
w(0x1b), w(0x36) }
|
||||
|
||||
#define bytes2word(b0, b1, b2, b3) (((uint32_t)(b3) << 24) | \
|
||||
((uint32_t)(b2) << 16) | ((uint32_t)(b1) << 8) | (b0))
|
||||
|
||||
#define h0(x) (x)
|
||||
#define w0(p) bytes2word(p, 0, 0, 0)
|
||||
#define w1(p) bytes2word(0, p, 0, 0)
|
||||
#define w2(p) bytes2word(0, 0, p, 0)
|
||||
#define w3(p) bytes2word(0, 0, 0, p)
|
||||
|
||||
#define u0(p) bytes2word(f2(p), p, p, f3(p))
|
||||
#define u1(p) bytes2word(f3(p), f2(p), p, p)
|
||||
#define u2(p) bytes2word(p, f3(p), f2(p), p)
|
||||
#define u3(p) bytes2word(p, p, f3(p), f2(p))
|
||||
|
||||
#define v0(p) bytes2word(fe(p), f9(p), fd(p), fb(p))
|
||||
#define v1(p) bytes2word(fb(p), fe(p), f9(p), fd(p))
|
||||
#define v2(p) bytes2word(fd(p), fb(p), fe(p), f9(p))
|
||||
#define v3(p) bytes2word(f9(p), fd(p), fb(p), fe(p))
|
||||
|
||||
#define f2(x) ((x<<1) ^ (((x>>7) & 1) * WPOLY))
|
||||
#define f4(x) ((x<<2) ^ (((x>>6) & 1) * WPOLY) ^ (((x>>6) & 2) * WPOLY))
|
||||
#define f8(x) ((x<<3) ^ (((x>>5) & 1) * WPOLY) ^ (((x>>5) & 2) * WPOLY) ^ (((x>>5) & 4) * WPOLY))
|
||||
#define f3(x) (f2(x) ^ x)
|
||||
#define f9(x) (f8(x) ^ x)
|
||||
#define fb(x) (f8(x) ^ f2(x) ^ x)
|
||||
#define fd(x) (f8(x) ^ f4(x) ^ x)
|
||||
#define fe(x) (f8(x) ^ f4(x) ^ f2(x))
|
||||
|
||||
#define t_dec(m,n) t_##m##n
|
||||
#define t_set(m,n) t_##m##n
|
||||
#define t_use(m,n) t_##m##n
|
||||
|
||||
#define d_4(t,n,b,e,f,g,h) ALIGN const t n[4][256] = { b(e), b(f), b(g), b(h) }
|
||||
|
||||
#define four_tables(x,tab,vf,rf,c) \
|
||||
(tab[0][bval(vf(x,0,c),rf(0,c))] \
|
||||
^ tab[1][bval(vf(x,1,c),rf(1,c))] \
|
||||
^ tab[2][bval(vf(x,2,c),rf(2,c))] \
|
||||
^ tab[3][bval(vf(x,3,c),rf(3,c))])
|
||||
|
||||
d_4(uint32_t, t_dec(f,n), sb_data, u0, u1, u2, u3);
|
||||
|
||||
__m128i soft_aesenc(__m128i in, __m128i key)
|
||||
{
|
||||
uint32_t x0, x1, x2, x3;
|
||||
x0 = _mm_cvtsi128_si32(in);
|
||||
x1 = _mm_cvtsi128_si32(_mm_shuffle_epi32(in, 0x55));
|
||||
x2 = _mm_cvtsi128_si32(_mm_shuffle_epi32(in, 0xAA));
|
||||
x3 = _mm_cvtsi128_si32(_mm_shuffle_epi32(in, 0xFF));
|
||||
|
||||
__m128i out = _mm_set_epi32(
|
||||
(t_fn[0][x3 & 0xff] ^ t_fn[1][(x0 >> 8) & 0xff] ^ t_fn[2][(x1 >> 16) & 0xff] ^ t_fn[3][x2 >> 24]),
|
||||
(t_fn[0][x2 & 0xff] ^ t_fn[1][(x3 >> 8) & 0xff] ^ t_fn[2][(x0 >> 16) & 0xff] ^ t_fn[3][x1 >> 24]),
|
||||
(t_fn[0][x1 & 0xff] ^ t_fn[1][(x2 >> 8) & 0xff] ^ t_fn[2][(x3 >> 16) & 0xff] ^ t_fn[3][x0 >> 24]),
|
||||
(t_fn[0][x0 & 0xff] ^ t_fn[1][(x1 >> 8) & 0xff] ^ t_fn[2][(x2 >> 16) & 0xff] ^ t_fn[3][x3 >> 24]));
|
||||
|
||||
return _mm_xor_si128(out, key);
|
||||
}
|
||||
|
||||
uint8_t Sbox[256] = { // forward s-box
|
||||
0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
|
||||
0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
|
||||
0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
|
||||
0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
|
||||
0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
|
||||
0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
|
||||
0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
|
||||
0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
|
||||
0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
|
||||
0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
|
||||
0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
|
||||
0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
|
||||
0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
|
||||
0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
|
||||
0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
|
||||
0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16};
|
||||
|
||||
static inline void sub_word(uint8_t* key)
|
||||
{
|
||||
key[0] = Sbox[key[0]];
|
||||
key[1] = Sbox[key[1]];
|
||||
key[2] = Sbox[key[2]];
|
||||
key[3] = Sbox[key[3]];
|
||||
}
|
||||
|
||||
#ifdef __clang__
|
||||
uint32_t _rotr(uint32_t value, uint32_t amount)
|
||||
{
|
||||
return (value >> amount) | (value << ((32 - amount) & 31));
|
||||
}
|
||||
#endif
|
||||
|
||||
__m128i soft_aeskeygenassist(__m128i key, uint8_t rcon)
|
||||
{
|
||||
uint32_t X1 = _mm_cvtsi128_si32(_mm_shuffle_epi32(key, 0x55));
|
||||
uint32_t X3 = _mm_cvtsi128_si32(_mm_shuffle_epi32(key, 0xFF));
|
||||
sub_word((uint8_t*)&X1);
|
||||
sub_word((uint8_t*)&X3);
|
||||
return _mm_set_epi32(_rotr(X3, 8) ^ rcon, X3,_rotr(X1, 8) ^ rcon, X1);
|
||||
}
|
131
crypto/soft_aes.h
Normal file
131
crypto/soft_aes.h
Normal file
|
@ -0,0 +1,131 @@
|
|||
/*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* Additional permission under GNU GPL version 3 section 7
|
||||
*
|
||||
* If you modify this Program, or any covered work, by linking or combining
|
||||
* it with OpenSSL (or a modified version of that library), containing parts
|
||||
* covered by the terms of OpenSSL License and SSLeay License, the licensors
|
||||
* of this Program grant you additional permission to convey the resulting work.
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
* Parts of this file are originally copyright (c) 2014-2017, The Monero Project
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
|
||||
#if defined(XMRIG_ARM)
|
||||
# include "crypto/SSE2NEON.h"
|
||||
#elif defined(__GNUC__)
|
||||
# include <x86intrin.h>
|
||||
#else
|
||||
# include <intrin.h>
|
||||
#endif
|
||||
|
||||
#include <inttypes.h>
|
||||
|
||||
|
||||
#define saes_data(w) {\
|
||||
w(0x63), w(0x7c), w(0x77), w(0x7b), w(0xf2), w(0x6b), w(0x6f), w(0xc5),\
|
||||
w(0x30), w(0x01), w(0x67), w(0x2b), w(0xfe), w(0xd7), w(0xab), w(0x76),\
|
||||
w(0xca), w(0x82), w(0xc9), w(0x7d), w(0xfa), w(0x59), w(0x47), w(0xf0),\
|
||||
w(0xad), w(0xd4), w(0xa2), w(0xaf), w(0x9c), w(0xa4), w(0x72), w(0xc0),\
|
||||
w(0xb7), w(0xfd), w(0x93), w(0x26), w(0x36), w(0x3f), w(0xf7), w(0xcc),\
|
||||
w(0x34), w(0xa5), w(0xe5), w(0xf1), w(0x71), w(0xd8), w(0x31), w(0x15),\
|
||||
w(0x04), w(0xc7), w(0x23), w(0xc3), w(0x18), w(0x96), w(0x05), w(0x9a),\
|
||||
w(0x07), w(0x12), w(0x80), w(0xe2), w(0xeb), w(0x27), w(0xb2), w(0x75),\
|
||||
w(0x09), w(0x83), w(0x2c), w(0x1a), w(0x1b), w(0x6e), w(0x5a), w(0xa0),\
|
||||
w(0x52), w(0x3b), w(0xd6), w(0xb3), w(0x29), w(0xe3), w(0x2f), w(0x84),\
|
||||
w(0x53), w(0xd1), w(0x00), w(0xed), w(0x20), w(0xfc), w(0xb1), w(0x5b),\
|
||||
w(0x6a), w(0xcb), w(0xbe), w(0x39), w(0x4a), w(0x4c), w(0x58), w(0xcf),\
|
||||
w(0xd0), w(0xef), w(0xaa), w(0xfb), w(0x43), w(0x4d), w(0x33), w(0x85),\
|
||||
w(0x45), w(0xf9), w(0x02), w(0x7f), w(0x50), w(0x3c), w(0x9f), w(0xa8),\
|
||||
w(0x51), w(0xa3), w(0x40), w(0x8f), w(0x92), w(0x9d), w(0x38), w(0xf5),\
|
||||
w(0xbc), w(0xb6), w(0xda), w(0x21), w(0x10), w(0xff), w(0xf3), w(0xd2),\
|
||||
w(0xcd), w(0x0c), w(0x13), w(0xec), w(0x5f), w(0x97), w(0x44), w(0x17),\
|
||||
w(0xc4), w(0xa7), w(0x7e), w(0x3d), w(0x64), w(0x5d), w(0x19), w(0x73),\
|
||||
w(0x60), w(0x81), w(0x4f), w(0xdc), w(0x22), w(0x2a), w(0x90), w(0x88),\
|
||||
w(0x46), w(0xee), w(0xb8), w(0x14), w(0xde), w(0x5e), w(0x0b), w(0xdb),\
|
||||
w(0xe0), w(0x32), w(0x3a), w(0x0a), w(0x49), w(0x06), w(0x24), w(0x5c),\
|
||||
w(0xc2), w(0xd3), w(0xac), w(0x62), w(0x91), w(0x95), w(0xe4), w(0x79),\
|
||||
w(0xe7), w(0xc8), w(0x37), w(0x6d), w(0x8d), w(0xd5), w(0x4e), w(0xa9),\
|
||||
w(0x6c), w(0x56), w(0xf4), w(0xea), w(0x65), w(0x7a), w(0xae), w(0x08),\
|
||||
w(0xba), w(0x78), w(0x25), w(0x2e), w(0x1c), w(0xa6), w(0xb4), w(0xc6),\
|
||||
w(0xe8), w(0xdd), w(0x74), w(0x1f), w(0x4b), w(0xbd), w(0x8b), w(0x8a),\
|
||||
w(0x70), w(0x3e), w(0xb5), w(0x66), w(0x48), w(0x03), w(0xf6), w(0x0e),\
|
||||
w(0x61), w(0x35), w(0x57), w(0xb9), w(0x86), w(0xc1), w(0x1d), w(0x9e),\
|
||||
w(0xe1), w(0xf8), w(0x98), w(0x11), w(0x69), w(0xd9), w(0x8e), w(0x94),\
|
||||
w(0x9b), w(0x1e), w(0x87), w(0xe9), w(0xce), w(0x55), w(0x28), w(0xdf),\
|
||||
w(0x8c), w(0xa1), w(0x89), w(0x0d), w(0xbf), w(0xe6), w(0x42), w(0x68),\
|
||||
w(0x41), w(0x99), w(0x2d), w(0x0f), w(0xb0), w(0x54), w(0xbb), w(0x16) }
|
||||
|
||||
#define SAES_WPOLY 0x011b
|
||||
|
||||
#define saes_b2w(b0, b1, b2, b3) (((uint32_t)(b3) << 24) | \
|
||||
((uint32_t)(b2) << 16) | ((uint32_t)(b1) << 8) | (b0))
|
||||
|
||||
#define saes_f2(x) ((x<<1) ^ (((x>>7) & 1) * SAES_WPOLY))
|
||||
#define saes_f3(x) (saes_f2(x) ^ x)
|
||||
#define saes_h0(x) (x)
|
||||
|
||||
#define saes_u0(p) saes_b2w(saes_f2(p), p, p, saes_f3(p))
|
||||
#define saes_u1(p) saes_b2w(saes_f3(p), saes_f2(p), p, p)
|
||||
#define saes_u2(p) saes_b2w( p, saes_f3(p), saes_f2(p), p)
|
||||
#define saes_u3(p) saes_b2w( p, p, saes_f3(p), saes_f2(p))
|
||||
|
||||
__attribute__((aligned(16))) const static uint32_t saes_table[4][256] = { saes_data(saes_u0), saes_data(saes_u1), saes_data(saes_u2), saes_data(saes_u3) };
|
||||
__attribute__((aligned(16))) const static uint8_t saes_sbox[256] = saes_data(saes_h0);
|
||||
|
||||
|
||||
static inline __m128i soft_aesenc(__m128i in, __m128i key)
|
||||
{
|
||||
uint32_t x0, x1, x2, x3;
|
||||
x0 = _mm_cvtsi128_si32(in);
|
||||
x1 = _mm_cvtsi128_si32(_mm_shuffle_epi32(in, 0x55));
|
||||
x2 = _mm_cvtsi128_si32(_mm_shuffle_epi32(in, 0xAA));
|
||||
x3 = _mm_cvtsi128_si32(_mm_shuffle_epi32(in, 0xFF));
|
||||
|
||||
__m128i out = _mm_set_epi32(
|
||||
(saes_table[0][x3 & 0xff] ^ saes_table[1][(x0 >> 8) & 0xff] ^ saes_table[2][(x1 >> 16) & 0xff] ^ saes_table[3][x2 >> 24]),
|
||||
(saes_table[0][x2 & 0xff] ^ saes_table[1][(x3 >> 8) & 0xff] ^ saes_table[2][(x0 >> 16) & 0xff] ^ saes_table[3][x1 >> 24]),
|
||||
(saes_table[0][x1 & 0xff] ^ saes_table[1][(x2 >> 8) & 0xff] ^ saes_table[2][(x3 >> 16) & 0xff] ^ saes_table[3][x0 >> 24]),
|
||||
(saes_table[0][x0 & 0xff] ^ saes_table[1][(x1 >> 8) & 0xff] ^ saes_table[2][(x2 >> 16) & 0xff] ^ saes_table[3][x3 >> 24]));
|
||||
|
||||
return _mm_xor_si128(out, key);
|
||||
}
|
||||
|
||||
static inline uint32_t sub_word(uint32_t key)
|
||||
{
|
||||
return (saes_sbox[key >> 24 ] << 24) |
|
||||
(saes_sbox[(key >> 16) & 0xff] << 16 ) |
|
||||
(saes_sbox[(key >> 8) & 0xff] << 8 ) |
|
||||
saes_sbox[key & 0xff];
|
||||
}
|
||||
|
||||
#if defined(__clang__) || defined(XMRIG_ARM)
|
||||
static inline uint32_t _rotr(uint32_t value, uint32_t amount)
|
||||
{
|
||||
return (value >> amount) | (value << ((32 - amount) & 31));
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
static inline __m128i soft_aeskeygenassist(__m128i key, uint8_t rcon)
|
||||
{
|
||||
const uint32_t X1 = sub_word(_mm_cvtsi128_si32(_mm_shuffle_epi32(key, 0x55)));
|
||||
const uint32_t X3 = sub_word(_mm_cvtsi128_si32(_mm_shuffle_epi32(key, 0xFF)));
|
||||
return _mm_set_epi32(_rotr(X3, 8) ^ rcon, X3, _rotr(X1, 8) ^ rcon, X1);
|
||||
}
|
2
donate.h
2
donate.h
|
@ -24,6 +24,6 @@
|
|||
#ifndef __DONATE_H__
|
||||
#define __DONATE_H__
|
||||
|
||||
#define DONATE_LEVEL 5
|
||||
#define DONATE_LEVEL 0
|
||||
|
||||
#endif /* __DONATE_H__ */
|
||||
|
|
|
@ -4,8 +4,9 @@
|
|||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2016-2017 XMRig <support@xmrig.com>
|
||||
*
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
@ -74,3 +75,21 @@ void persistent_memory_free() {
|
|||
_mm_free(persistent_memory);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void *allocate_executable_memory(size_t size)
|
||||
{
|
||||
return mmap(0, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANON, -1, 0);
|
||||
}
|
||||
|
||||
|
||||
void protect_executable_memory(void *p, size_t size)
|
||||
{
|
||||
mprotect(p, size, PROT_READ | PROT_EXEC);
|
||||
}
|
||||
|
||||
|
||||
void flush_instruction_cache(void *p, size_t size)
|
||||
{
|
||||
__builtin___clear_cache((char*) p, (char*)(p) + size);
|
||||
}
|
||||
|
|
65
memory.c
65
memory.c
|
@ -4,8 +4,9 @@
|
|||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2016-2017 XMRig <support@xmrig.com>
|
||||
*
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
@ -24,33 +25,15 @@
|
|||
#include <string.h>
|
||||
|
||||
#include "persistent_memory.h"
|
||||
#include "algo/cryptonight/cryptonight.h"
|
||||
#include "options.h"
|
||||
|
||||
|
||||
static size_t offset = 0;
|
||||
|
||||
|
||||
#ifndef XMRIG_NO_AEON
|
||||
static void * create_persistent_ctx_lite(int thr_id) {
|
||||
struct cryptonight_ctx *ctx = NULL;
|
||||
|
||||
if (!opt_double_hash) {
|
||||
const size_t offset = MEMORY * (thr_id + 1);
|
||||
|
||||
ctx = (struct cryptonight_ctx *) &persistent_memory[offset + MEMORY_LITE];
|
||||
ctx->memory = (uint8_t*) &persistent_memory[offset];
|
||||
return ctx;
|
||||
}
|
||||
|
||||
ctx = (struct cryptonight_ctx *) &persistent_memory[MEMORY - sizeof(struct cryptonight_ctx) * (thr_id + 1)];
|
||||
ctx->memory = (uint8_t*) &persistent_memory[MEMORY * (thr_id + 1)];
|
||||
|
||||
return ctx;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
void * persistent_calloc(size_t num, size_t size) {
|
||||
size += size % 16;
|
||||
|
||||
void *mem = &persistent_memory[offset];
|
||||
offset += (num * size);
|
||||
|
||||
|
@ -60,17 +43,29 @@ void * persistent_calloc(size_t num, size_t size) {
|
|||
}
|
||||
|
||||
|
||||
void * create_persistent_ctx(int thr_id) {
|
||||
# ifndef XMRIG_NO_AEON
|
||||
if (opt_algo == ALGO_CRYPTONIGHT_LITE) {
|
||||
return create_persistent_ctx_lite(thr_id);
|
||||
}
|
||||
# endif
|
||||
void init_cn_r(struct cryptonight_ctx *ctx)
|
||||
{
|
||||
uint8_t *p = allocate_executable_memory(0x4000);
|
||||
|
||||
struct cryptonight_ctx *ctx = (struct cryptonight_ctx *) &persistent_memory[MEMORY - sizeof(struct cryptonight_ctx) * (thr_id + 1)];
|
||||
|
||||
const int ratio = opt_double_hash ? 2 : 1;
|
||||
ctx->memory = (uint8_t*) &persistent_memory[MEMORY * (thr_id * ratio + 1)];
|
||||
|
||||
return ctx;
|
||||
ctx->generated_code = (cn_mainloop_fun_ms_abi) p;
|
||||
ctx->generated_code_double = (cn_mainloop_double_fun_ms_abi)(p + 0x2000);
|
||||
ctx->generated_code_height = ctx->generated_code_double_height = (uint64_t)(-1);
|
||||
ctx->height = 0;
|
||||
}
|
||||
|
||||
|
||||
void create_cryptonight_ctx(struct cryptonight_ctx **ctx, int thr_id)
|
||||
{
|
||||
const int ratio = (opt_double_hash && opt_algo == ALGO_CRYPTONIGHT) ? 2 : 1;
|
||||
ctx[0] = persistent_calloc(1, sizeof(struct cryptonight_ctx));
|
||||
ctx[0]->memory = &persistent_memory[MEMORY * (thr_id * ratio + 1)];
|
||||
|
||||
init_cn_r(ctx[0]);
|
||||
|
||||
if (opt_double_hash) {
|
||||
ctx[1] = persistent_calloc(1, sizeof(struct cryptonight_ctx));
|
||||
ctx[1]->memory = ctx[0]->memory + (opt_algo == ALGO_CRYPTONIGHT ? MEMORY : MEMORY_LITE);
|
||||
|
||||
init_cn_r(ctx[1]);
|
||||
}
|
||||
}
|
||||
|
|
132
options.c
132
options.c
|
@ -4,8 +4,9 @@
|
|||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2016-2017 XMRig <support@xmrig.com>
|
||||
*
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
@ -38,7 +39,6 @@
|
|||
|
||||
int64_t opt_affinity = -1L;
|
||||
int opt_n_threads = 0;
|
||||
int opt_algo_variant = 0;
|
||||
int opt_retries = 5;
|
||||
int opt_retry_pause = 5;
|
||||
int opt_donate_level = DONATE_LEVEL;
|
||||
|
@ -55,13 +55,43 @@ char *opt_userpass = NULL;
|
|||
char *opt_user = NULL;
|
||||
char *opt_pass = NULL;
|
||||
|
||||
enum mining_algo opt_algo = ALGO_CRYPTONIGHT;
|
||||
enum Algo opt_algo = ALGO_CRYPTONIGHT;
|
||||
enum Variant opt_variant = VARIANT_AUTO;
|
||||
enum AlgoVariant opt_av = AV_AUTO;
|
||||
enum Assembly opt_assembly = ASM_AUTO;
|
||||
|
||||
|
||||
struct AlgoData
|
||||
{
|
||||
const char *name;
|
||||
const char *shortName;
|
||||
enum Algo algo;
|
||||
enum Variant variant;
|
||||
};
|
||||
|
||||
|
||||
static struct AlgoData const algorithms[] = {
|
||||
{ "cryptonight", "cn", ALGO_CRYPTONIGHT, VARIANT_AUTO },
|
||||
{ "cryptonight/0", "cn/0", ALGO_CRYPTONIGHT, VARIANT_0 },
|
||||
{ "cryptonight/1", "cn/1", ALGO_CRYPTONIGHT, VARIANT_1 },
|
||||
{ "cryptonight/2", "cn/2", ALGO_CRYPTONIGHT, VARIANT_2 },
|
||||
{ "cryptonight/4", "cn/4", ALGO_CRYPTONIGHT, VARIANT_4 },
|
||||
{ "cryptonight/r", "cn/r", ALGO_CRYPTONIGHT, VARIANT_4 },
|
||||
|
||||
# ifndef XMRIG_NO_AEON
|
||||
{ "cryptonight-lite", "cn-lite", ALGO_CRYPTONIGHT_LITE, VARIANT_AUTO },
|
||||
{ "cryptonight-light", "cn-light", ALGO_CRYPTONIGHT_LITE, VARIANT_AUTO },
|
||||
{ "cryptonight-lite/0", "cn-lite/0", ALGO_CRYPTONIGHT_LITE, VARIANT_0 },
|
||||
{ "cryptonight-lite/1", "cn-lite/1", ALGO_CRYPTONIGHT_LITE, VARIANT_1 },
|
||||
# endif
|
||||
};
|
||||
|
||||
|
||||
static char const usage[] = "\
|
||||
Usage: " APP_ID " [OPTIONS]\n\
|
||||
Options:\n\
|
||||
-a, --algo=ALGO cryptonight (default) or cryptonight-lite\n\
|
||||
--variant=N cryptonight variant: 0-4\n\
|
||||
-o, --url=URL URL of mining server\n\
|
||||
-b, --backup-url=URL URL of backup mining server\n\
|
||||
-O, --userpass=U:P username:password pair for mining server\n\
|
||||
|
@ -97,7 +127,7 @@ static struct option const options[] = {
|
|||
{ "cpu-affinity", 1, NULL, 1020 },
|
||||
{ "donate-level", 1, NULL, 1003 },
|
||||
{ "help", 0, NULL, 'h' },
|
||||
{ "keepalive", 0, NULL ,'k' },
|
||||
{ "keepalive", 0, NULL, 'k' },
|
||||
{ "max-cpu-usage", 1, NULL, 1004 },
|
||||
{ "nicehash", 0, NULL, 1006 },
|
||||
{ "no-color", 0, NULL, 1002 },
|
||||
|
@ -110,25 +140,45 @@ static struct option const options[] = {
|
|||
{ "user", 1, NULL, 'u' },
|
||||
{ "userpass", 1, NULL, 'O' },
|
||||
{ "version", 0, NULL, 'V' },
|
||||
{ 0, 0, 0, 0 }
|
||||
{ "variant", 1, NULL, 1021 },
|
||||
{ "asm", 1, NULL, 1022 },
|
||||
{ NULL, 0, NULL, 0 }
|
||||
};
|
||||
|
||||
|
||||
static const char *algo_names[] = {
|
||||
[ALGO_CRYPTONIGHT] = "cryptonight",
|
||||
"cryptonight",
|
||||
# ifndef XMRIG_NO_AEON
|
||||
[ALGO_CRYPTONIGHT_LITE] = "cryptonight-lite"
|
||||
"cryptonight-lite"
|
||||
# endif
|
||||
};
|
||||
|
||||
|
||||
static const char *variant_names[] = {
|
||||
"auto",
|
||||
"0",
|
||||
"1",
|
||||
"2",
|
||||
"4"
|
||||
};
|
||||
|
||||
|
||||
static const char *asm_names[] = {
|
||||
"none",
|
||||
"auto",
|
||||
"intel",
|
||||
"ryzen",
|
||||
"bulldozer"
|
||||
};
|
||||
|
||||
|
||||
#ifndef XMRIG_NO_AEON
|
||||
static int get_cryptonight_lite_variant(int variant) {
|
||||
if (variant <= AEON_AV0_AUTO || variant >= AEON_AV_MAX) {
|
||||
return (cpu_info.flags & CPU_FLAG_AES) ? AEON_AV2_AESNI_DOUBLE : AEON_AV4_SOFT_AES_DOUBLE;
|
||||
if (variant <= AV_AUTO || variant >= AV_MAX) {
|
||||
return (cpu_info.flags & CPU_FLAG_AES) ? AV_DOUBLE : AV_DOUBLE_SOFT;
|
||||
}
|
||||
|
||||
if (opt_safe && !(cpu_info.flags & CPU_FLAG_AES) && variant <= AEON_AV2_AESNI_DOUBLE) {
|
||||
if (opt_safe && !(cpu_info.flags & CPU_FLAG_AES) && variant <= AV_DOUBLE) {
|
||||
return variant + 2;
|
||||
}
|
||||
|
||||
|
@ -144,11 +194,11 @@ static int get_algo_variant(int algo, int variant) {
|
|||
}
|
||||
# endif
|
||||
|
||||
if (variant <= XMR_AV0_AUTO || variant >= XMR_AV_MAX) {
|
||||
return (cpu_info.flags & CPU_FLAG_AES) ? XMR_AV1_AESNI : XMR_AV3_SOFT_AES;
|
||||
if (variant <= AV_AUTO || variant >= AV_MAX) {
|
||||
return (cpu_info.flags & CPU_FLAG_AES) ? AV_SINGLE : AV_SINGLE_SOFT;
|
||||
}
|
||||
|
||||
if (opt_safe && !(cpu_info.flags & CPU_FLAG_AES) && variant <= XMR_AV2_AESNI_DOUBLE) {
|
||||
if (opt_safe && !(cpu_info.flags & CPU_FLAG_AES) && variant <= AV_DOUBLE) {
|
||||
return variant + 2;
|
||||
}
|
||||
|
||||
|
@ -167,18 +217,21 @@ static void parse_arg(int key, char *arg) {
|
|||
|
||||
switch (key)
|
||||
{
|
||||
case 'a':
|
||||
for (int i = 0; i < ARRAY_SIZE(algo_names); i++) {
|
||||
if (algo_names[i] && !strcmp(arg, algo_names[i])) {
|
||||
opt_algo = i;
|
||||
case 'a': /* --algo */
|
||||
for (size_t i = 0; i < ARRAY_SIZE(algorithms); i++) {
|
||||
if ((strcasecmp(arg, algorithms[i].name) == 0) || (strcasecmp(arg, algorithms[i].shortName) == 0)) {
|
||||
opt_algo = algorithms[i].algo;
|
||||
opt_variant = algorithms[i].variant;
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
# ifndef XMRIG_NO_AEON
|
||||
if (i == ARRAY_SIZE(algo_names) - 1 && !strcmp(arg, "cryptonight-light")) {
|
||||
opt_algo = i = ALGO_CRYPTONIGHT_LITE;
|
||||
case 1022: /* --asm */
|
||||
for (size_t i = 0; i < ARRAY_SIZE(asm_names); i++) {
|
||||
if (strcasecmp(arg, asm_names[i]) == 0) {
|
||||
opt_assembly = i;
|
||||
}
|
||||
# endif
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -300,11 +353,11 @@ static void parse_arg(int key, char *arg) {
|
|||
|
||||
case 'v': /* --av */
|
||||
v = atoi(arg);
|
||||
if (v < 0 || v > 1000) {
|
||||
if (v <= AV_AUTO || v >= AV_MAX) {
|
||||
show_usage_and_exit(1);
|
||||
}
|
||||
|
||||
opt_algo_variant = v;
|
||||
opt_av = v;
|
||||
break;
|
||||
|
||||
case 1020: /* --cpu-affinity */
|
||||
|
@ -322,12 +375,23 @@ static void parse_arg(int key, char *arg) {
|
|||
break;
|
||||
|
||||
case 1003: /* --donate-level */
|
||||
// v = atoi(arg);
|
||||
// if (v < 1 || v > 99) {
|
||||
// show_usage_and_exit(1);
|
||||
// }
|
||||
|
||||
// opt_donate_level = v;
|
||||
break;
|
||||
|
||||
case 1021: /* --variant */
|
||||
v = atoi(arg);
|
||||
if (v < 1 || v > 99) {
|
||||
show_usage_and_exit(1);
|
||||
if (v == 4 || strcasecmp(arg, "r") == 0) {
|
||||
opt_variant = VARIANT_4;
|
||||
}
|
||||
else if (v > VARIANT_AUTO && v < VARIANT_MAX) {
|
||||
opt_variant = v;
|
||||
}
|
||||
|
||||
opt_donate_level = v;
|
||||
break;
|
||||
|
||||
case 1006: /* --nicehash */
|
||||
|
@ -342,7 +406,7 @@ static void parse_arg(int key, char *arg) {
|
|||
|
||||
static void parse_config(json_t *config, char *ref)
|
||||
{
|
||||
int i;
|
||||
size_t i;
|
||||
char buf[16];
|
||||
json_t *val;
|
||||
|
||||
|
@ -451,9 +515,9 @@ void parse_cmdline(int argc, char *argv[]) {
|
|||
sprintf(opt_userpass, "%s:%s", opt_user, opt_pass);
|
||||
}
|
||||
|
||||
opt_algo_variant = get_algo_variant(opt_algo, opt_algo_variant);
|
||||
opt_av = get_algo_variant(opt_algo, opt_av);
|
||||
|
||||
if (!cryptonight_init(opt_algo_variant)) {
|
||||
if (!cryptonight_init(opt_av)) {
|
||||
applog(LOG_ERR, "Cryptonight hash self-test failed. This might be caused by bad compiler optimizations.");
|
||||
proper_exit(1);
|
||||
}
|
||||
|
@ -511,6 +575,12 @@ void show_version_and_exit(void) {
|
|||
}
|
||||
|
||||
|
||||
const char* get_current_algo_name(void) {
|
||||
const char *get_current_algo_name(void) {
|
||||
return algo_names[opt_algo];
|
||||
}
|
||||
|
||||
|
||||
const char *get_current_variant_name(void)
|
||||
{
|
||||
return variant_names[opt_variant + 1];
|
||||
}
|
||||
|
|
64
options.h
64
options.h
|
@ -4,8 +4,9 @@
|
|||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2016-2017 XMRig <support@xmrig.com>
|
||||
*
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
@ -21,43 +22,52 @@
|
|||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef __OPTIONS_H__
|
||||
#define __OPTIONS_H__
|
||||
#ifndef XMRIG_OPTIONS_H
|
||||
#define XMRIG_OPTIONS_H
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
|
||||
|
||||
#ifndef ARRAY_SIZE
|
||||
# define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
|
||||
#endif
|
||||
|
||||
|
||||
enum mining_algo {
|
||||
enum Algo {
|
||||
ALGO_CRYPTONIGHT, /* CryptoNight (Monero) */
|
||||
ALGO_CRYPTONIGHT_LITE, /* CryptoNight-Lite (AEON) */
|
||||
};
|
||||
|
||||
|
||||
enum xmr_algo_variant {
|
||||
XMR_AV0_AUTO,
|
||||
XMR_AV1_AESNI,
|
||||
XMR_AV2_AESNI_DOUBLE,
|
||||
XMR_AV3_SOFT_AES,
|
||||
XMR_AV4_SOFT_AES_DOUBLE,
|
||||
XMR_AV_MAX
|
||||
enum Variant {
|
||||
VARIANT_AUTO = -1,
|
||||
VARIANT_0 = 0,
|
||||
VARIANT_1 = 1,
|
||||
VARIANT_2 = 2,
|
||||
VARIANT_4 = 3,
|
||||
VARIANT_MAX
|
||||
};
|
||||
|
||||
|
||||
#ifndef XMRIG_NO_AEON
|
||||
enum aeon_algo_variant {
|
||||
AEON_AV0_AUTO,
|
||||
AEON_AV1_AESNI,
|
||||
AEON_AV2_AESNI_DOUBLE,
|
||||
AEON_AV3_SOFT_AES,
|
||||
AEON_AV4_SOFT_AES_DOUBLE,
|
||||
AEON_AV_MAX
|
||||
enum AlgoVariant {
|
||||
AV_AUTO, // --av=0 Automatic mode.
|
||||
AV_SINGLE, // --av=1 Single hash mode
|
||||
AV_DOUBLE, // --av=2 Double hash mode
|
||||
AV_SINGLE_SOFT, // --av=3 Single hash mode (Software AES)
|
||||
AV_DOUBLE_SOFT, // --av=4 Double hash mode (Software AES)
|
||||
AV_MAX
|
||||
};
|
||||
|
||||
|
||||
enum Assembly {
|
||||
ASM_NONE,
|
||||
ASM_AUTO,
|
||||
ASM_INTEL,
|
||||
ASM_RYZEN,
|
||||
ASM_BULLDOZER,
|
||||
ASM_MAX
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
extern bool opt_colors;
|
||||
|
@ -72,20 +82,24 @@ extern char *opt_userpass;
|
|||
extern char *opt_user;
|
||||
extern char *opt_pass;
|
||||
extern int opt_n_threads;
|
||||
extern int opt_algo_variant;
|
||||
extern int opt_retry_pause;
|
||||
extern int opt_retries;
|
||||
extern int opt_donate_level;
|
||||
extern int opt_max_cpu_usage;
|
||||
extern int64_t opt_affinity;
|
||||
extern enum mining_algo opt_algo;
|
||||
|
||||
extern enum Algo opt_algo;
|
||||
extern enum Variant opt_variant;
|
||||
extern enum AlgoVariant opt_av;
|
||||
extern enum Assembly opt_assembly;
|
||||
|
||||
void parse_cmdline(int argc, char *argv[]);
|
||||
void show_usage_and_exit(int status);
|
||||
void show_version_and_exit(void);
|
||||
const char* get_current_algo_name(void);
|
||||
const char *get_current_algo_name(void);
|
||||
const char *get_current_variant_name(void);
|
||||
|
||||
extern void proper_exit(int reason);
|
||||
|
||||
|
||||
#endif /* __OPTIONS_H__ */
|
||||
#endif /* XMRIG_OPTIONS_H */
|
||||
|
|
|
@ -4,8 +4,9 @@
|
|||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2016-2017 XMRig <support@xmrig.com>
|
||||
*
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
@ -21,12 +22,16 @@
|
|||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef __PERSISTENT_MEMORY_H__
|
||||
#define __PERSISTENT_MEMORY_H__
|
||||
#ifndef XMRIG_PERSISTENT_MEMORY_H
|
||||
#define XMRIG_PERSISTENT_MEMORY_H
|
||||
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
|
||||
#include "algo/cryptonight/cryptonight.h"
|
||||
|
||||
|
||||
enum memory_flags {
|
||||
MEMORY_HUGEPAGES_AVAILABLE = 1,
|
||||
MEMORY_HUGEPAGES_ENABLED = 2,
|
||||
|
@ -43,8 +48,15 @@ extern int persistent_memory_flags;
|
|||
|
||||
const char * persistent_memory_allocate();
|
||||
void persistent_memory_free();
|
||||
void * persistent_calloc(size_t num, size_t size);
|
||||
void * create_persistent_ctx(int thr_id);
|
||||
void *persistent_calloc(size_t num, size_t size);
|
||||
void create_cryptonight_ctx(struct cryptonight_ctx **ctx, int thr_id);
|
||||
|
||||
|
||||
#endif /* __PERSISTENT_MEMORY_H__ */
|
||||
void *allocate_executable_memory(size_t size);
|
||||
void flush_instruction_cache(void *p, size_t size);
|
||||
void init_cn_r(struct cryptonight_ctx *ctx);
|
||||
void protect_executable_memory(void *p, size_t size);
|
||||
|
||||
|
||||
|
||||
#endif /* XMRIG_PERSISTENT_MEMORY_H */
|
||||
|
|
39
stratum.c
39
stratum.c
|
@ -4,8 +4,9 @@
|
|||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2016-2017 XMRig <support@xmrig.com>
|
||||
*
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
@ -40,11 +41,12 @@
|
|||
# include <netinet/in.h>
|
||||
#endif
|
||||
|
||||
#include "stratum.h"
|
||||
#include "version.h"
|
||||
#include "options.h"
|
||||
#include "stats.h"
|
||||
#include "stratum.h"
|
||||
#include "util.h"
|
||||
#include "utils/applog.h"
|
||||
#include "version.h"
|
||||
|
||||
|
||||
#ifdef WIN32
|
||||
|
@ -73,6 +75,7 @@ static int sockopt_keepalive_cb(void *userdata, curl_socket_t fd, curlsocktype p
|
|||
static curl_socket_t opensocket_grab_cb(void *clientp, curlsocktype purpose, struct curl_sockaddr *addr);
|
||||
static int closesocket_cb(void *clientp, curl_socket_t item);
|
||||
static bool login_decode(struct stratum_ctx *sctx, const json_t *val);
|
||||
static void extensions_decode(const json_t *val);
|
||||
static bool job_decode(const json_t *job);
|
||||
static bool jobj_binary(const json_t *obj, const char *key, void *buf, size_t buflen);
|
||||
|
||||
|
@ -625,6 +628,11 @@ static bool login_decode(struct stratum_ctx *sctx, const json_t *val) {
|
|||
memcpy(&sctx->id, id, strlen(id));
|
||||
|
||||
const char *s = json_string_value(json_object_get(res, "status"));
|
||||
if (!s) {
|
||||
// Workaround for xmrig-proxy bug https://github.com/xmrig/xmrig-proxy/commit/dfa1960fe3eeb13f80717b7dbfcc7c6e9f222d89
|
||||
s = json_string_value(json_object_get(val, "status"));
|
||||
}
|
||||
|
||||
if (!s) {
|
||||
applog(LOG_ERR, "JSON invalid status");
|
||||
return false;
|
||||
|
@ -635,10 +643,31 @@ static bool login_decode(struct stratum_ctx *sctx, const json_t *val) {
|
|||
return false;
|
||||
}
|
||||
|
||||
extensions_decode(res);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
static void extensions_decode(const json_t *res)
|
||||
{
|
||||
json_t *extensions = json_object_get(res, "extensions");
|
||||
if (!extensions || json_array_size(extensions) == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
size_t index;
|
||||
json_t *value;
|
||||
|
||||
json_array_foreach(extensions, index, value) {
|
||||
const char *s = json_string_value(value);
|
||||
if (s && strcmp(s, "nicehash")) {
|
||||
opt_nicehash = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @brief job_decode
|
||||
* @param sctx
|
||||
|
@ -681,6 +710,8 @@ static bool job_decode(const json_t *job) {
|
|||
memset(work.job_id, 0, sizeof(work.job_id));
|
||||
memcpy(work.job_id, job_id, strlen(job_id));
|
||||
|
||||
work.height = (uint64_t) json_integer_value(json_object_get(job, "height"));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
13
stratum.h
13
stratum.h
|
@ -4,8 +4,9 @@
|
|||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2016-2017 XMRig <support@xmrig.com>
|
||||
*
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
@ -21,8 +22,9 @@
|
|||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef __STRATUM_H__
|
||||
#define __STRATUM_H__
|
||||
#ifndef XMRIG_STRATUM_H
|
||||
#define XMRIG_STRATUM_H
|
||||
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <inttypes.h>
|
||||
|
@ -41,6 +43,7 @@ struct work {
|
|||
uint32_t target __attribute__((aligned(16)));
|
||||
uint32_t hash[8] __attribute__((aligned(16)));
|
||||
char job_id[64] __attribute__((aligned(16)));
|
||||
uint64_t height;
|
||||
};
|
||||
|
||||
|
||||
|
@ -75,4 +78,4 @@ bool stratum_handle_method(struct stratum_ctx *sctx, const char *s);
|
|||
bool stratum_handle_response(char *buf);
|
||||
bool stratum_keepalived(struct stratum_ctx *sctx);
|
||||
|
||||
#endif /* __STRATUM_H__ */
|
||||
#endif /* XMRIG_STRATUM_H */
|
||||
|
|
|
@ -74,3 +74,23 @@ void persistent_memory_free() {
|
|||
_mm_free(persistent_memory);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void *allocate_executable_memory(size_t size)
|
||||
{
|
||||
return mmap(0, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
}
|
||||
|
||||
|
||||
void protect_executable_memory(void *p, size_t size)
|
||||
{
|
||||
mprotect(p, size, PROT_READ | PROT_EXEC);
|
||||
}
|
||||
|
||||
|
||||
void flush_instruction_cache(void *p, size_t size)
|
||||
{
|
||||
# ifndef __FreeBSD__
|
||||
__builtin___clear_cache((char*) p, (char*)(p) + size);
|
||||
# endif
|
||||
}
|
||||
|
|
|
@ -4,8 +4,9 @@
|
|||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2016-2017 XMRig <support@xmrig.com>
|
||||
*
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
@ -77,10 +78,10 @@ static void print_threads() {
|
|||
}
|
||||
|
||||
if (opt_colors) {
|
||||
applog_notime(LOG_INFO, CL_LGR " * " CL_WHT "THREADS: " CL_WHT "%d" CL_WHT ", av=%d, %s, donate=%d%%%s", opt_n_threads, opt_algo_variant, get_current_algo_name(), opt_donate_level, extra);
|
||||
applog_notime(LOG_INFO, CL_LGR " * " CL_WHT "THREADS: " CL_WHT "%d" CL_WHT ", av=%d, %s/%s, donate=%d%%%s", opt_n_threads, opt_av, get_current_algo_name(), get_current_variant_name(), opt_donate_level, extra);
|
||||
}
|
||||
else {
|
||||
applog_notime(LOG_INFO, " * THREADS: %d, av=%d, %s, donate=%d%%%s", opt_n_threads, opt_algo_variant, get_current_algo_name(), opt_donate_level, extra);
|
||||
applog_notime(LOG_INFO, " * THREADS: %d, av=%d, %s/%s, donate=%d%%%s", opt_n_threads, opt_av, get_current_algo_name(), get_current_variant_name(), opt_donate_level, extra);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -105,10 +106,10 @@ static void print_stratum() {
|
|||
|
||||
void print_summary() {
|
||||
if (opt_colors) {
|
||||
applog_notime(LOG_INFO, CL_LGR " * " CL_WHT APP_NAME " " APP_VERSION " " CL_LCY APP_SITE);
|
||||
applog_notime(LOG_INFO, CL_LGR " * " CL_WHT APP_NAME " " APP_VERSION " " CL_LCY APP_SITE);
|
||||
}
|
||||
else {
|
||||
applog_notime(LOG_INFO, " * " APP_NAME " " APP_VERSION " " APP_SITE);
|
||||
applog_notime(LOG_INFO, " * " APP_NAME " " APP_VERSION " " APP_SITE);
|
||||
}
|
||||
|
||||
print_memory();
|
||||
|
|
19
version.h
19
version.h
|
@ -4,8 +4,9 @@
|
|||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2016-2017 XMRig <support@xmrig.com>
|
||||
*
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
@ -21,20 +22,20 @@
|
|||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef __VERSION_H__
|
||||
#define __VERSION_H__
|
||||
#ifndef XMRIG_VERSION_H
|
||||
#define XMRIG_VERSION_H
|
||||
|
||||
#define APP_ID "xmrig"
|
||||
#define APP_NAME "XMRig"
|
||||
#define APP_DESC "Monero (XMR) CPU miner"
|
||||
#define APP_VERSION "0.8.3-dev"
|
||||
#define APP_VERSION "0.10.0"
|
||||
#define APP_DOMAIN "xmrig.com"
|
||||
#define APP_SITE "www.xmrig.com"
|
||||
#define APP_COPYRIGHT "Copyright (C) 2016-2017 xmrig.com"
|
||||
#define APP_COPYRIGHT "Copyright (C) 2016-2019 xmrig.com"
|
||||
|
||||
#define APP_VER_MAJOR 0
|
||||
#define APP_VER_MINOR 8
|
||||
#define APP_VER_BUILD 3
|
||||
#define APP_VER_MINOR 10
|
||||
#define APP_VER_BUILD 0
|
||||
#define APP_VER_REV 0
|
||||
|
||||
#endif /* __VERSION_H__ */
|
||||
#endif /* XMRIG_VERSION_H */
|
||||
|
|
|
@ -4,8 +4,9 @@
|
|||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2016-2017 XMRig <support@xmrig.com>
|
||||
*
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
@ -21,9 +22,6 @@
|
|||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef __MEMORY_H__
|
||||
#define __MEMORY_H__
|
||||
|
||||
#include <windows.h>
|
||||
#include <ntsecapi.h>
|
||||
#include <tchar.h>
|
||||
|
@ -172,4 +170,21 @@ void persistent_memory_free() {
|
|||
}
|
||||
}
|
||||
|
||||
#endif /* __MEMORY_H__ */
|
||||
|
||||
void *allocate_executable_memory(size_t size)
|
||||
{
|
||||
return VirtualAlloc(0, size, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE);
|
||||
}
|
||||
|
||||
|
||||
void protect_executable_memory(void *p, size_t size)
|
||||
{
|
||||
DWORD oldProtect;
|
||||
VirtualProtect(p, size, PAGE_EXECUTE_READ, &oldProtect);
|
||||
}
|
||||
|
||||
|
||||
void flush_instruction_cache(void *p, size_t size)
|
||||
{
|
||||
FlushInstructionCache(GetCurrentProcess(), p, size);
|
||||
}
|
||||
|
|
18
xmrig.c
18
xmrig.c
|
@ -4,8 +4,9 @@
|
|||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2016-2017 XMRig <support@xmrig.com>
|
||||
*
|
||||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
@ -260,7 +261,8 @@ static void *miner_thread(void *userdata) {
|
|||
uint32_t max_nonce;
|
||||
uint32_t end_nonce = 0xffffffffU / opt_n_threads * (thr_id + 1) - 0x20;
|
||||
|
||||
struct cryptonight_ctx *persistentctx = (struct cryptonight_ctx *) create_persistent_ctx(thr_id);
|
||||
struct cryptonight_ctx *persistentctx[1];
|
||||
create_cryptonight_ctx(persistentctx, thr_id);
|
||||
|
||||
if (cpu_info.total_logical_cpus > 1 && opt_affinity != -1L) {
|
||||
affine_to_cpu_mask(thr_id, (unsigned long) opt_affinity);
|
||||
|
@ -305,8 +307,10 @@ static void *miner_thread(void *userdata) {
|
|||
struct timeval tv_start;
|
||||
gettimeofday(&tv_start, NULL);
|
||||
|
||||
persistentctx[0]->height = work.height;
|
||||
|
||||
/* scan nonces for a proof-of-work hash */
|
||||
const int rc = scanhash_cryptonight(thr_id, hash, work.blob, work.blob_size, work.target, max_nonce, &hashes_done, persistentctx);
|
||||
const int rc = scanhash_cryptonight(thr_id, hash, (uint8_t *) work.blob, work.blob_size, work.target, max_nonce, &hashes_done, persistentctx);
|
||||
stats_add_hashes(thr_id, &tv_start, hashes_done);
|
||||
|
||||
if (!rc) {
|
||||
|
@ -335,7 +339,8 @@ static void *miner_thread_double(void *userdata) {
|
|||
uint32_t max_nonce;
|
||||
uint32_t end_nonce = 0xffffffffU / opt_n_threads * (thr_id + 1) - 0x20;
|
||||
|
||||
struct cryptonight_ctx *persistentctx = (struct cryptonight_ctx *) create_persistent_ctx(thr_id);
|
||||
struct cryptonight_ctx *persistentctx[2];
|
||||
create_cryptonight_ctx(persistentctx, thr_id);
|
||||
|
||||
if (cpu_info.total_logical_cpus > 1 && opt_affinity != -1L) {
|
||||
affine_to_cpu_mask(thr_id, (unsigned long) opt_affinity);
|
||||
|
@ -389,6 +394,9 @@ static void *miner_thread_double(void *userdata) {
|
|||
struct timeval tv_start;
|
||||
gettimeofday(&tv_start, NULL);
|
||||
|
||||
persistentctx[0]->height = work.height;
|
||||
persistentctx[1]->height = work.height;
|
||||
|
||||
/* scan nonces for a proof-of-work hash */
|
||||
const int rc = scanhash_cryptonight_double(thr_id, (uint32_t *) double_hash, double_blob, work.blob_size, work.target, max_nonce, &hashes_done, persistentctx);
|
||||
stats_add_hashes(thr_id, &tv_start, hashes_done);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue