Merge branch 'dev' into sync-base

2022-06-14 05:19:28 +07:00 · 2022-06-14 05:19:28 +07:00 · 3ded8e6734
commit 3ded8e6734
parent 774f630e14 ee51dec499
119 changed files with 5867 additions and 14547 deletions
--- a/src/crypto/astrobwt/AstroBWT.cpp
+++ b/src/crypto/astrobwt/AstroBWT.cpp
@ -1,139 +0,0 @@
-/* XMRig
- * Copyright (c) 2018      Lee Clagett              <https://github.com/vtnerd>
- * Copyright (c) 2018-2019 tevador                  <tevador@gmail.com>
- * Copyright (c) 2000      Transmeta Corporation    <https://github.com/intel/msr-tools>
- * Copyright (c) 2004-2008 H. Peter Anvin           <https://github.com/intel/msr-tools>
- * Copyright (c) 2018-2021 SChernykh                <https://github.com/SChernykh>
- * Copyright (c) 2016-2021 XMRig                    <https://github.com/xmrig>, <support@xmrig.com>
- *
- *   This program is free software: you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation, either version 3 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "crypto/astrobwt/AstroBWT.h"
-#include "backend/cpu/Cpu.h"
-#include "base/crypto/sha3.h"
-#include "base/tools/bswap_64.h"
-#include "crypto/cn/CryptoNight.h"
-#include "crypto/astrobwt/sort_indices2.h"
-
-
-#include <limits>
-
-
-static bool astrobwtInitialized = false;
-
-#ifdef ASTROBWT_AVX2
-static bool hasAVX2 = false;
-
-extern "C"
-#ifndef _MSC_VER
-__attribute__((ms_abi))
-#endif
-void SHA3_256_AVX2_ASM(const void* in, size_t inBytes, void* out);
-#endif
-
-#ifdef XMRIG_ARM
-extern "C" {
-#include "salsa20_ref/ecrypt-sync.h"
-}
-
-static void Salsa20_XORKeyStream(const void* key, void* output, size_t size)
-{
-	uint8_t iv[8] = {};
-	ECRYPT_ctx ctx;
-	ECRYPT_keysetup(&ctx, static_cast<const uint8_t*>(key), 256, 64);
-	ECRYPT_ivsetup(&ctx, iv);
-	ECRYPT_keystream_bytes(&ctx, static_cast<uint8_t*>(output), size);
-	memset(static_cast<uint8_t*>(output) - 16, 0, 16);
-	memset(static_cast<uint8_t*>(output) + size, 0, 16);
-}
-#else
-#include "Salsa20.hpp"
-
-static void Salsa20_XORKeyStream(const void* key, void* output, size_t size)
-{
-	const uint64_t iv = 0;
-	ZeroTier::Salsa20 s(key, &iv);
-	s.XORKeyStream(output, static_cast<uint32_t>(size));
-	memset(static_cast<uint8_t*>(output) - 16, 0, 16);
-	memset(static_cast<uint8_t*>(output) + size, 0, 16);
-}
-
-extern "C" int salsa20_stream_avx2(void* c, uint64_t clen, const void* iv, const void* key);
-
-static void Salsa20_XORKeyStream_AVX256(const void* key, void* output, size_t size)
-{
-	const uint64_t iv = 0;
-	salsa20_stream_avx2(output, size, &iv, key);
-	memset(static_cast<uint8_t*>(output) - 16, 0, 16);
-	memset(static_cast<uint8_t*>(output) + size, 0, 16);
-}
-#endif
-
-bool xmrig::astrobwt::astrobwt_dero_v2(const void* input_data, uint32_t input_size, void* scratchpad, uint8_t* output_hash)
-{
-	constexpr size_t N = 9973;
-	constexpr size_t STRIDE = 10240;
-
-	alignas(8) uint8_t key[32];
-	uint8_t* scratchpad_ptr = (uint8_t*)(scratchpad) + 64;
-	uint8_t* v = scratchpad_ptr;
-	uint32_t* indices = (uint32_t*)(scratchpad_ptr + STRIDE);
-	uint32_t* tmp_indices = (uint32_t*)(scratchpad_ptr + STRIDE * 5);
-
-#ifdef ASTROBWT_AVX2
-	if (hasAVX2) {
-		SHA3_256_AVX2_ASM(input_data, input_size, key);
-		Salsa20_XORKeyStream_AVX256(key, v, N);
-	}
-	else
-#endif
-	{
-		sha3_HashBuffer(256, SHA3_FLAGS_NONE, input_data, input_size, key, sizeof(key));
-		Salsa20_XORKeyStream(key, v, N);
-	}
-
-	sort_indices_astrobwt_v2(N, v, indices, tmp_indices);
-
-#ifdef ASTROBWT_AVX2
-	if (hasAVX2) {
-		SHA3_256_AVX2_ASM(indices, N * 2, output_hash);
-	}
-	else
-#endif
-	{
-		sha3_HashBuffer(256, SHA3_FLAGS_NONE, indices, N * 2, output_hash, 32);
-	}
-
-	return true;
-}
-
-
-void xmrig::astrobwt::init()
-{
-	if (!astrobwtInitialized) {
-#		ifdef ASTROBWT_AVX2
-		hasAVX2 = Cpu::info()->hasAVX2();
-#		endif
-
-		astrobwtInitialized = true;
-	}
-}
-
-
-template<>
-void xmrig::astrobwt::single_hash<xmrig::Algorithm::ASTROBWT_DERO_2>(const uint8_t* input, size_t size, uint8_t* output, cryptonight_ctx** ctx, uint64_t)
-{
-	astrobwt_dero_v2(input, static_cast<uint32_t>(size), ctx[0]->memory, output);
-}
--- a/src/crypto/astrobwt/AstroBWT.h
+++ b/src/crypto/astrobwt/AstroBWT.h
@ -1,43 +0,0 @@
-/* XMRig
- * Copyright (c) 2018      Lee Clagett              <https://github.com/vtnerd>
- * Copyright (c) 2018-2019 tevador                  <tevador@gmail.com>
- * Copyright (c) 2000      Transmeta Corporation    <https://github.com/intel/msr-tools>
- * Copyright (c) 2004-2008 H. Peter Anvin           <https://github.com/intel/msr-tools>
- * Copyright (c) 2018-2021 SChernykh                <https://github.com/SChernykh>
- * Copyright (c) 2016-2021 XMRig                    <https://github.com/xmrig>, <support@xmrig.com>
- *
- *   This program is free software: you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation, either version 3 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "base/crypto/Algorithm.h"
-
-
-struct cryptonight_ctx;
-
-
-namespace xmrig {
-
-
-namespace astrobwt {
-
-bool astrobwt_dero_v2(const void* input_data, uint32_t input_size, void* scratchpad, uint8_t* output_hash);
-void init();
-
-template<Algorithm::Id ALGO>
-void single_hash(const uint8_t* input, size_t size, uint8_t* output, cryptonight_ctx** ctx, uint64_t);
-
-template<>
-void single_hash<Algorithm::ASTROBWT_DERO_2>(const uint8_t* input, size_t size, uint8_t* output, cryptonight_ctx** ctx, uint64_t);
-
-}} // namespace xmrig::astrobwt
--- a/src/crypto/astrobwt/Salsa20.cpp
+++ b/src/crypto/astrobwt/Salsa20.cpp
@ -1,352 +0,0 @@
-/*
- * Based on public domain code available at: http://cr.yp.to/snuffle.html
- *
- * Modifications and C-native SSE macro based SSE implementation by
- * Adam Ierymenko <adam.ierymenko@zerotier.com>.
- *
- * Additional modifications and code cleanup for AstroBWT by
- * SChernykh <https://github.com/SChernykh>
- *
- * Since the original was public domain, this is too.
- */
-
-#include "Salsa20.hpp"
-
-// Statically compute and define SSE constants
-class _s20sseconsts
-{
-public:
-	_s20sseconsts()
-	{
-		maskLo32 = _mm_shuffle_epi32(_mm_cvtsi32_si128(-1), _MM_SHUFFLE(1, 0, 1, 0));
-		maskHi32 = _mm_slli_epi64(maskLo32, 32);
-	}
-	__m128i maskLo32,maskHi32;
-};
-static const _s20sseconsts _S20SSECONSTANTS;
-
-namespace ZeroTier {
-
-void Salsa20::init(const void *key,const void *iv)
-{
-	const uint32_t *const k = (const uint32_t *)key;
-	_state.i[0] = 0x61707865;
-	_state.i[1] = 0x3320646e;
-	_state.i[2] = 0x79622d32;
-	_state.i[3] = 0x6b206574;
-	_state.i[4] = k[3];
-	_state.i[5] = 0;
-	_state.i[6] = k[7];
-	_state.i[7] = k[2];
-	_state.i[8] = 0;
-	_state.i[9] = k[6];
-	_state.i[10] = k[1];
-	_state.i[11] = ((const uint32_t *)iv)[1];
-	_state.i[12] = k[5];
-	_state.i[13] = k[0];
-	_state.i[14] = ((const uint32_t *)iv)[0];
-	_state.i[15] = k[4];
-}
-
-void Salsa20::XORKeyStream(void *out,unsigned int bytes)
-{
-	uint8_t tmp[64];
-	uint8_t *c = (uint8_t *)out;
-	uint8_t *ctarget = c;
-	unsigned int i;
-
-	if (!bytes)
-		return;
-
-	for (;;) {
-		if (bytes < 64) {
-			for (i = 0;i < bytes;++i)
-				tmp[i] = 0;
-			ctarget = c;
-			c = tmp;
-		}
-
-		__m128i X0 = _mm_loadu_si128((const __m128i *)&(_state.v[0]));
-		__m128i X1 = _mm_loadu_si128((const __m128i *)&(_state.v[1]));
-		__m128i X2 = _mm_loadu_si128((const __m128i *)&(_state.v[2]));
-		__m128i X3 = _mm_loadu_si128((const __m128i *)&(_state.v[3]));
-		__m128i T;
-		__m128i X0s = X0;
-		__m128i X1s = X1;
-		__m128i X2s = X2;
-		__m128i X3s = X3;
-
-		// 2X round -------------------------------------------------------------
-		T = _mm_add_epi32(X0, X3);
-		X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
-		T = _mm_add_epi32(X1, X0);
-		X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
-		T = _mm_add_epi32(X2, X1);
-		X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
-		T = _mm_add_epi32(X3, X2);
-		X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
-		X1 = _mm_shuffle_epi32(X1, 0x93);
-		X2 = _mm_shuffle_epi32(X2, 0x4E);
-		X3 = _mm_shuffle_epi32(X3, 0x39);
-		T = _mm_add_epi32(X0, X1);
-		X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
-		T = _mm_add_epi32(X3, X0);
-		X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
-		T = _mm_add_epi32(X2, X3);
-		X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
-		T = _mm_add_epi32(X1, X2);
-		X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
-		X1 = _mm_shuffle_epi32(X1, 0x39);
-		X2 = _mm_shuffle_epi32(X2, 0x4E);
-		X3 = _mm_shuffle_epi32(X3, 0x93);
-
-		// 2X round -------------------------------------------------------------
-		T = _mm_add_epi32(X0, X3);
-		X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
-		T = _mm_add_epi32(X1, X0);
-		X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
-		T = _mm_add_epi32(X2, X1);
-		X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
-		T = _mm_add_epi32(X3, X2);
-		X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
-		X1 = _mm_shuffle_epi32(X1, 0x93);
-		X2 = _mm_shuffle_epi32(X2, 0x4E);
-		X3 = _mm_shuffle_epi32(X3, 0x39);
-		T = _mm_add_epi32(X0, X1);
-		X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
-		T = _mm_add_epi32(X3, X0);
-		X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
-		T = _mm_add_epi32(X2, X3);
-		X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
-		T = _mm_add_epi32(X1, X2);
-		X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
-		X1 = _mm_shuffle_epi32(X1, 0x39);
-		X2 = _mm_shuffle_epi32(X2, 0x4E);
-		X3 = _mm_shuffle_epi32(X3, 0x93);
-
-		// 2X round -------------------------------------------------------------
-		T = _mm_add_epi32(X0, X3);
-		X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
-		T = _mm_add_epi32(X1, X0);
-		X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
-		T = _mm_add_epi32(X2, X1);
-		X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
-		T = _mm_add_epi32(X3, X2);
-		X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
-		X1 = _mm_shuffle_epi32(X1, 0x93);
-		X2 = _mm_shuffle_epi32(X2, 0x4E);
-		X3 = _mm_shuffle_epi32(X3, 0x39);
-		T = _mm_add_epi32(X0, X1);
-		X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
-		T = _mm_add_epi32(X3, X0);
-		X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
-		T = _mm_add_epi32(X2, X3);
-		X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
-		T = _mm_add_epi32(X1, X2);
-		X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
-		X1 = _mm_shuffle_epi32(X1, 0x39);
-		X2 = _mm_shuffle_epi32(X2, 0x4E);
-		X3 = _mm_shuffle_epi32(X3, 0x93);
-
-		// 2X round -------------------------------------------------------------
-		T = _mm_add_epi32(X0, X3);
-		X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
-		T = _mm_add_epi32(X1, X0);
-		X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
-		T = _mm_add_epi32(X2, X1);
-		X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
-		T = _mm_add_epi32(X3, X2);
-		X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
-		X1 = _mm_shuffle_epi32(X1, 0x93);
-		X2 = _mm_shuffle_epi32(X2, 0x4E);
-		X3 = _mm_shuffle_epi32(X3, 0x39);
-		T = _mm_add_epi32(X0, X1);
-		X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
-		T = _mm_add_epi32(X3, X0);
-		X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
-		T = _mm_add_epi32(X2, X3);
-		X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
-		T = _mm_add_epi32(X1, X2);
-		X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
-		X1 = _mm_shuffle_epi32(X1, 0x39);
-		X2 = _mm_shuffle_epi32(X2, 0x4E);
-		X3 = _mm_shuffle_epi32(X3, 0x93);
-
-		// 2X round -------------------------------------------------------------
-		T = _mm_add_epi32(X0, X3);
-		X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
-		T = _mm_add_epi32(X1, X0);
-		X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
-		T = _mm_add_epi32(X2, X1);
-		X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
-		T = _mm_add_epi32(X3, X2);
-		X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
-		X1 = _mm_shuffle_epi32(X1, 0x93);
-		X2 = _mm_shuffle_epi32(X2, 0x4E);
-		X3 = _mm_shuffle_epi32(X3, 0x39);
-		T = _mm_add_epi32(X0, X1);
-		X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
-		T = _mm_add_epi32(X3, X0);
-		X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
-		T = _mm_add_epi32(X2, X3);
-		X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
-		T = _mm_add_epi32(X1, X2);
-		X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
-		X1 = _mm_shuffle_epi32(X1, 0x39);
-		X2 = _mm_shuffle_epi32(X2, 0x4E);
-		X3 = _mm_shuffle_epi32(X3, 0x93);
-
-		// 2X round -------------------------------------------------------------
-		T = _mm_add_epi32(X0, X3);
-		X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
-		T = _mm_add_epi32(X1, X0);
-		X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
-		T = _mm_add_epi32(X2, X1);
-		X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
-		T = _mm_add_epi32(X3, X2);
-		X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
-		X1 = _mm_shuffle_epi32(X1, 0x93);
-		X2 = _mm_shuffle_epi32(X2, 0x4E);
-		X3 = _mm_shuffle_epi32(X3, 0x39);
-		T = _mm_add_epi32(X0, X1);
-		X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
-		T = _mm_add_epi32(X3, X0);
-		X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
-		T = _mm_add_epi32(X2, X3);
-		X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
-		T = _mm_add_epi32(X1, X2);
-		X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
-		X1 = _mm_shuffle_epi32(X1, 0x39);
-		X2 = _mm_shuffle_epi32(X2, 0x4E);
-		X3 = _mm_shuffle_epi32(X3, 0x93);
-
-		// 2X round -------------------------------------------------------------
-		T = _mm_add_epi32(X0, X3);
-		X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
-		T = _mm_add_epi32(X1, X0);
-		X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
-		T = _mm_add_epi32(X2, X1);
-		X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
-		T = _mm_add_epi32(X3, X2);
-		X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
-		X1 = _mm_shuffle_epi32(X1, 0x93);
-		X2 = _mm_shuffle_epi32(X2, 0x4E);
-		X3 = _mm_shuffle_epi32(X3, 0x39);
-		T = _mm_add_epi32(X0, X1);
-		X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
-		T = _mm_add_epi32(X3, X0);
-		X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
-		T = _mm_add_epi32(X2, X3);
-		X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
-		T = _mm_add_epi32(X1, X2);
-		X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
-		X1 = _mm_shuffle_epi32(X1, 0x39);
-		X2 = _mm_shuffle_epi32(X2, 0x4E);
-		X3 = _mm_shuffle_epi32(X3, 0x93);
-
-		// 2X round -------------------------------------------------------------
-		T = _mm_add_epi32(X0, X3);
-		X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
-		T = _mm_add_epi32(X1, X0);
-		X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
-		T = _mm_add_epi32(X2, X1);
-		X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
-		T = _mm_add_epi32(X3, X2);
-		X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
-		X1 = _mm_shuffle_epi32(X1, 0x93);
-		X2 = _mm_shuffle_epi32(X2, 0x4E);
-		X3 = _mm_shuffle_epi32(X3, 0x39);
-		T = _mm_add_epi32(X0, X1);
-		X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
-		T = _mm_add_epi32(X3, X0);
-		X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
-		T = _mm_add_epi32(X2, X3);
-		X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
-		T = _mm_add_epi32(X1, X2);
-		X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
-		X1 = _mm_shuffle_epi32(X1, 0x39);
-		X2 = _mm_shuffle_epi32(X2, 0x4E);
-		X3 = _mm_shuffle_epi32(X3, 0x93);
-
-		// 2X round -------------------------------------------------------------
-		T = _mm_add_epi32(X0, X3);
-		X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
-		T = _mm_add_epi32(X1, X0);
-		X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
-		T = _mm_add_epi32(X2, X1);
-		X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
-		T = _mm_add_epi32(X3, X2);
-		X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
-		X1 = _mm_shuffle_epi32(X1, 0x93);
-		X2 = _mm_shuffle_epi32(X2, 0x4E);
-		X3 = _mm_shuffle_epi32(X3, 0x39);
-		T = _mm_add_epi32(X0, X1);
-		X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
-		T = _mm_add_epi32(X3, X0);
-		X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
-		T = _mm_add_epi32(X2, X3);
-		X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
-		T = _mm_add_epi32(X1, X2);
-		X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
-		X1 = _mm_shuffle_epi32(X1, 0x39);
-		X2 = _mm_shuffle_epi32(X2, 0x4E);
-		X3 = _mm_shuffle_epi32(X3, 0x93);
-
-		// 2X round -------------------------------------------------------------
-		T = _mm_add_epi32(X0, X3);
-		X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
-		T = _mm_add_epi32(X1, X0);
-		X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
-		T = _mm_add_epi32(X2, X1);
-		X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
-		T = _mm_add_epi32(X3, X2);
-		X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
-		X1 = _mm_shuffle_epi32(X1, 0x93);
-		X2 = _mm_shuffle_epi32(X2, 0x4E);
-		X3 = _mm_shuffle_epi32(X3, 0x39);
-		T = _mm_add_epi32(X0, X1);
-		X3 = _mm_xor_si128(_mm_xor_si128(X3, _mm_slli_epi32(T, 7)), _mm_srli_epi32(T, 25));
-		T = _mm_add_epi32(X3, X0);
-		X2 = _mm_xor_si128(_mm_xor_si128(X2, _mm_slli_epi32(T, 9)), _mm_srli_epi32(T, 23));
-		T = _mm_add_epi32(X2, X3);
-		X1 = _mm_xor_si128(_mm_xor_si128(X1, _mm_slli_epi32(T, 13)), _mm_srli_epi32(T, 19));
-		T = _mm_add_epi32(X1, X2);
-		X0 = _mm_xor_si128(_mm_xor_si128(X0, _mm_slli_epi32(T, 18)), _mm_srli_epi32(T, 14));
-		X1 = _mm_shuffle_epi32(X1, 0x39);
-		X2 = _mm_shuffle_epi32(X2, 0x4E);
-		X3 = _mm_shuffle_epi32(X3, 0x93);
-
-		X0 = _mm_add_epi32(X0s,X0);
-		X1 = _mm_add_epi32(X1s,X1);
-		X2 = _mm_add_epi32(X2s,X2);
-		X3 = _mm_add_epi32(X3s,X3);
-
-		__m128i k02 = _mm_shuffle_epi32(_mm_or_si128(_mm_slli_epi64(X0, 32), _mm_srli_epi64(X3, 32)), _MM_SHUFFLE(0, 1, 2, 3));
-		__m128i k13 = _mm_shuffle_epi32(_mm_or_si128(_mm_slli_epi64(X1, 32), _mm_srli_epi64(X0, 32)), _MM_SHUFFLE(0, 1, 2, 3));
-		__m128i k20 = _mm_or_si128(_mm_and_si128(X2, _S20SSECONSTANTS.maskLo32), _mm_and_si128(X1, _S20SSECONSTANTS.maskHi32));
-		__m128i k31 = _mm_or_si128(_mm_and_si128(X3, _S20SSECONSTANTS.maskLo32), _mm_and_si128(X2, _S20SSECONSTANTS.maskHi32));
-		_mm_storeu_ps(reinterpret_cast<float *>(c),_mm_castsi128_ps(_mm_unpackhi_epi64(k02,k20)));
-		_mm_storeu_ps(reinterpret_cast<float *>(c) + 4,_mm_castsi128_ps(_mm_unpackhi_epi64(k13,k31)));
-		_mm_storeu_ps(reinterpret_cast<float *>(c) + 8,_mm_castsi128_ps(_mm_unpacklo_epi64(k20,k02)));
-		_mm_storeu_ps(reinterpret_cast<float *>(c) + 12,_mm_castsi128_ps(_mm_unpacklo_epi64(k31,k13)));
-
-		if (!(++_state.i[8])) {
-			++_state.i[5]; // state reordered for SSE
-			/* stopping at 2^70 bytes per nonce is user's responsibility */
-		}
-
-		if (bytes <= 64) {
-			if (bytes < 64) {
-				for (i = 0;i < bytes;++i)
-					ctarget[i] = c[i];
-			}
-
-			return;
-		}
-
-		bytes -= 64;
-		c += 64;
-	}
-}
-
-} // namespace ZeroTier
--- a/src/crypto/astrobwt/Salsa20.hpp
+++ b/src/crypto/astrobwt/Salsa20.hpp
@ -1,52 +0,0 @@
-/*
- * Based on public domain code available at: http://cr.yp.to/snuffle.html
- *
- * This therefore is public domain.
- */
-
-#ifndef ZT_SALSA20_HPP
-#define ZT_SALSA20_HPP
-
-#include <stdio.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include <string.h>
-#include <emmintrin.h>
-
-namespace ZeroTier {
-
-/**
- * Salsa20 stream cipher
- */
-class Salsa20
-{
-public:
-	/**
-	 * @param key 256-bit (32 byte) key
-	 * @param iv 64-bit initialization vector
-	 */
-	Salsa20(const void *key,const void *iv)
-	{
-		init(key,iv);
-	}
-
-	/**
-	 * Initialize cipher
-	 *
-	 * @param key Key bits
-	 * @param iv 64-bit initialization vector
-	 */
-	void init(const void *key,const void *iv);
-
-	void XORKeyStream(void *out,unsigned int bytes);
-
-private:
-	union {
-		__m128i v[4];
-		uint32_t i[16];
-	} _state;
-};
-
-} // namespace ZeroTier
-
-#endif
--- a/src/crypto/astrobwt/salsa20_ref/ecrypt-config.h
+++ b/src/crypto/astrobwt/salsa20_ref/ecrypt-config.h
@ -1,272 +0,0 @@
-/* ecrypt-config.h */
-
-/* *** Normally, it should not be necessary to edit this file. *** */
-
-#ifndef ECRYPT_CONFIG
-#define ECRYPT_CONFIG
-
-/* ------------------------------------------------------------------------- */
-
-/* Guess the endianness of the target architecture. */
-
-/*
- * The LITTLE endian machines:
- */
-#if defined(__ultrix)           /* Older MIPS */
-#define ECRYPT_LITTLE_ENDIAN
-#elif defined(__alpha)          /* Alpha */
-#define ECRYPT_LITTLE_ENDIAN
-#elif defined(i386)             /* x86 (gcc) */
-#define ECRYPT_LITTLE_ENDIAN
-#elif defined(__i386)           /* x86 (gcc) */
-#define ECRYPT_LITTLE_ENDIAN
-#elif defined(_M_IX86)          /* x86 (MSC, Borland) */
-#define ECRYPT_LITTLE_ENDIAN
-#elif defined(_MSC_VER)         /* x86 (surely MSC) */
-#define ECRYPT_LITTLE_ENDIAN
-#elif defined(__INTEL_COMPILER) /* x86 (surely Intel compiler icl.exe) */
-#define ECRYPT_LITTLE_ENDIAN
-
-/*
- * The BIG endian machines:
- */
-#elif defined(sun)              /* Newer Sparc's */
-#define ECRYPT_BIG_ENDIAN
-#elif defined(__ppc__)          /* PowerPC */
-#define ECRYPT_BIG_ENDIAN
-
-/*
- * Finally machines with UNKNOWN endianness:
- */
-#elif defined (_AIX)            /* RS6000 */
-#define ECRYPT_UNKNOWN
-#elif defined(__hpux)           /* HP-PA */
-#define ECRYPT_UNKNOWN
-#elif defined(__aux)            /* 68K */
-#define ECRYPT_UNKNOWN
-#elif defined(__dgux)           /* 88K (but P6 in latest boxes) */
-#define ECRYPT_UNKNOWN
-#elif defined(__sgi)            /* Newer MIPS */
-#define ECRYPT_UNKNOWN
-#else	                        /* Any other processor */
-#define ECRYPT_UNKNOWN
-#endif
-
-/* ------------------------------------------------------------------------- */
-
-/*
- * Find minimal-width types to store 8-bit, 16-bit, 32-bit, and 64-bit
- * integers.
- *
- * Note: to enable 64-bit types on 32-bit compilers, it might be
- * necessary to switch from ISO C90 mode to ISO C99 mode (e.g., gcc
- * -std=c99).
- */
-
-#include <limits.h>
-
-/* --- check char --- */
-
-#if (UCHAR_MAX / 0xFU > 0xFU)
-#ifndef I8T
-#define I8T char
-#define U8C(v) (v##U)
-
-#if (UCHAR_MAX == 0xFFU)
-#define ECRYPT_I8T_IS_BYTE
-#endif
-
-#endif
-
-#if (UCHAR_MAX / 0xFFU > 0xFFU)
-#ifndef I16T
-#define I16T char
-#define U16C(v) (v##U)
-#endif
-
-#if (UCHAR_MAX / 0xFFFFU > 0xFFFFU)
-#ifndef I32T
-#define I32T char
-#define U32C(v) (v##U)
-#endif
-
-#if (UCHAR_MAX / 0xFFFFFFFFU > 0xFFFFFFFFU)
-#ifndef I64T
-#define I64T char
-#define U64C(v) (v##U)
-#define ECRYPT_NATIVE64
-#endif
-
-#endif
-#endif
-#endif
-#endif
-
-/* --- check short --- */
-
-#if (USHRT_MAX / 0xFU > 0xFU)
-#ifndef I8T
-#define I8T short
-#define U8C(v) (v##U)
-
-#if (USHRT_MAX == 0xFFU)
-#define ECRYPT_I8T_IS_BYTE
-#endif
-
-#endif
-
-#if (USHRT_MAX / 0xFFU > 0xFFU)
-#ifndef I16T
-#define I16T short
-#define U16C(v) (v##U)
-#endif
-
-#if (USHRT_MAX / 0xFFFFU > 0xFFFFU)
-#ifndef I32T
-#define I32T short
-#define U32C(v) (v##U)
-#endif
-
-#if (USHRT_MAX / 0xFFFFFFFFU > 0xFFFFFFFFU)
-#ifndef I64T
-#define I64T short
-#define U64C(v) (v##U)
-#define ECRYPT_NATIVE64
-#endif
-
-#endif
-#endif
-#endif
-#endif
-
-/* --- check int --- */
-
-#if (UINT_MAX / 0xFU > 0xFU)
-#ifndef I8T
-#define I8T int
-#define U8C(v) (v##U)
-
-#if (ULONG_MAX == 0xFFU)
-#define ECRYPT_I8T_IS_BYTE
-#endif
-
-#endif
-
-#if (UINT_MAX / 0xFFU > 0xFFU)
-#ifndef I16T
-#define I16T int
-#define U16C(v) (v##U)
-#endif
-
-#if (UINT_MAX / 0xFFFFU > 0xFFFFU)
-#ifndef I32T
-#define I32T int
-#define U32C(v) (v##U)
-#endif
-
-#if (UINT_MAX / 0xFFFFFFFFU > 0xFFFFFFFFU)
-#ifndef I64T
-#define I64T int
-#define U64C(v) (v##U)
-#define ECRYPT_NATIVE64
-#endif
-
-#endif
-#endif
-#endif
-#endif
-
-/* --- check long --- */
-
-#if (ULONG_MAX / 0xFUL > 0xFUL)
-#ifndef I8T
-#define I8T long
-#define U8C(v) (v##UL)
-
-#if (ULONG_MAX == 0xFFUL)
-#define ECRYPT_I8T_IS_BYTE
-#endif
-
-#endif
-
-#if (ULONG_MAX / 0xFFUL > 0xFFUL)
-#ifndef I16T
-#define I16T long
-#define U16C(v) (v##UL)
-#endif
-
-#if (ULONG_MAX / 0xFFFFUL > 0xFFFFUL)
-#ifndef I32T
-#define I32T long
-#define U32C(v) (v##UL)
-#endif
-
-#if (ULONG_MAX / 0xFFFFFFFFUL > 0xFFFFFFFFUL)
-#ifndef I64T
-#define I64T long
-#define U64C(v) (v##UL)
-#define ECRYPT_NATIVE64
-#endif
-
-#endif
-#endif
-#endif
-#endif
-
-/* --- check long long --- */
-
-#ifdef ULLONG_MAX
-
-#if (ULLONG_MAX / 0xFULL > 0xFULL)
-#ifndef I8T
-#define I8T long long
-#define U8C(v) (v##ULL)
-
-#if (ULLONG_MAX == 0xFFULL)
-#define ECRYPT_I8T_IS_BYTE
-#endif
-
-#endif
-
-#if (ULLONG_MAX / 0xFFULL > 0xFFULL)
-#ifndef I16T
-#define I16T long long
-#define U16C(v) (v##ULL)
-#endif
-
-#if (ULLONG_MAX / 0xFFFFULL > 0xFFFFULL)
-#ifndef I32T
-#define I32T long long
-#define U32C(v) (v##ULL)
-#endif
-
-#if (ULLONG_MAX / 0xFFFFFFFFULL > 0xFFFFFFFFULL)
-#ifndef I64T
-#define I64T long long
-#define U64C(v) (v##ULL)
-#endif
-
-#endif
-#endif
-#endif
-#endif
-
-#endif
-
-/* --- check __int64 --- */
-
-#ifdef _UI64_MAX
-
-#if (_UI64_MAX / 0xFFFFFFFFui64 > 0xFFFFFFFFui64)
-#ifndef I64T
-#define I64T __int64
-#define U64C(v) (v##ui64)
-#endif
-
-#endif
-
-#endif
-
-/* ------------------------------------------------------------------------- */
-
-#endif
--- a/src/crypto/astrobwt/salsa20_ref/ecrypt-machine.h
+++ b/src/crypto/astrobwt/salsa20_ref/ecrypt-machine.h
@ -1,46 +0,0 @@
-/* ecrypt-machine.h */
-
-/*
- * This file is included by 'ecrypt-portable.h'. It allows to override
- * the default macros for specific platforms. Please carefully check
- * the machine code generated by your compiler (with optimisations
- * turned on) before deciding to edit this file.
- */
-
-/* ------------------------------------------------------------------------- */
-
-#if (defined(ECRYPT_DEFAULT_ROT) && !defined(ECRYPT_MACHINE_ROT))
-
-#define ECRYPT_MACHINE_ROT
-
-#if (defined(WIN32) && defined(_MSC_VER))
-
-#undef ROTL32
-#undef ROTR32
-#undef ROTL64
-#undef ROTR64
-
-#include <stdlib.h>
-
-#define ROTL32(v, n) _lrotl(v, n)
-#define ROTR32(v, n) _lrotr(v, n)
-#define ROTL64(v, n) _rotl64(v, n)
-#define ROTR64(v, n) _rotr64(v, n)
-
-#endif
-
-#endif
-
-/* ------------------------------------------------------------------------- */
-
-#if (defined(ECRYPT_DEFAULT_SWAP) && !defined(ECRYPT_MACHINE_SWAP))
-
-#define ECRYPT_MACHINE_SWAP
-
-/*
- * If you want to overwrite the default swap macros, put it here. And so on.
- */
-
-#endif
-
-/* ------------------------------------------------------------------------- */
--- a/src/crypto/astrobwt/salsa20_ref/ecrypt-portable.h
+++ b/src/crypto/astrobwt/salsa20_ref/ecrypt-portable.h
@ -1,303 +0,0 @@
-/* ecrypt-portable.h */
-
-/*
- * WARNING: the conversions defined below are implemented as macros,
- * and should be used carefully. They should NOT be used with
- * parameters which perform some action. E.g., the following two lines
- * are not equivalent:
- *
- *  1) ++x; y = ROTL32(x, n);
- *  2) y = ROTL32(++x, n);
- */
-
-/*
- * *** Please do not edit this file. ***
- *
- * The default macros can be overridden for specific architectures by
- * editing 'ecrypt-machine.h'.
- */
-
-#ifndef ECRYPT_PORTABLE
-#define ECRYPT_PORTABLE
-
-#include "ecrypt-config.h"
-
-/* ------------------------------------------------------------------------- */
-
-/*
- * The following types are defined (if available):
- *
- * u8:  unsigned integer type, at least 8 bits
- * u16: unsigned integer type, at least 16 bits
- * u32: unsigned integer type, at least 32 bits
- * u64: unsigned integer type, at least 64 bits
- *
- * s8, s16, s32, s64 -> signed counterparts of u8, u16, u32, u64
- *
- * The selection of minimum-width integer types is taken care of by
- * 'ecrypt-config.h'. Note: to enable 64-bit types on 32-bit
- * compilers, it might be necessary to switch from ISO C90 mode to ISO
- * C99 mode (e.g., gcc -std=c99).
- */
-
-#ifdef I8T
-typedef signed I8T s8;
-typedef unsigned I8T u8;
-#endif
-
-#ifdef I16T
-typedef signed I16T s16;
-typedef unsigned I16T u16;
-#endif
-
-#ifdef I32T
-typedef signed I32T s32;
-typedef unsigned I32T u32;
-#endif
-
-#ifdef I64T
-typedef signed I64T s64;
-typedef unsigned I64T u64;
-#endif
-
-/*
- * The following macros are used to obtain exact-width results.
- */
-
-#define U8V(v) ((u8)(v) & U8C(0xFF))
-#define U16V(v) ((u16)(v) & U16C(0xFFFF))
-#define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF))
-#define U64V(v) ((u64)(v) & U64C(0xFFFFFFFFFFFFFFFF))
-
-/* ------------------------------------------------------------------------- */
-
-/*
- * The following macros return words with their bits rotated over n
- * positions to the left/right.
- */
-
-#define ECRYPT_DEFAULT_ROT
-
-#define ROTL8(v, n) \
-  (U8V((v) << (n)) | ((v) >> (8 - (n))))
-
-#define ROTL16(v, n) \
-  (U16V((v) << (n)) | ((v) >> (16 - (n))))
-
-#define ROTL32(v, n) \
-  (U32V((v) << (n)) | ((v) >> (32 - (n))))
-
-#define ROTL64(v, n) \
-  (U64V((v) << (n)) | ((v) >> (64 - (n))))
-
-#define ROTR8(v, n) ROTL8(v, 8 - (n))
-#define ROTR16(v, n) ROTL16(v, 16 - (n))
-#define ROTR32(v, n) ROTL32(v, 32 - (n))
-#define ROTR64(v, n) ROTL64(v, 64 - (n))
-
-#include "ecrypt-machine.h"
-
-/* ------------------------------------------------------------------------- */
-
-/*
- * The following macros return a word with bytes in reverse order.
- */
-
-#define ECRYPT_DEFAULT_SWAP
-
-#define SWAP16(v) \
-  ROTL16(v, 8)
-
-#define SWAP32(v) \
-  ((ROTL32(v,  8) & U32C(0x00FF00FF)) | \
-   (ROTL32(v, 24) & U32C(0xFF00FF00)))
-
-#ifdef ECRYPT_NATIVE64
-#define SWAP64(v) \
-  ((ROTL64(v,  8) & U64C(0x000000FF000000FF)) | \
-   (ROTL64(v, 24) & U64C(0x0000FF000000FF00)) | \
-   (ROTL64(v, 40) & U64C(0x00FF000000FF0000)) | \
-   (ROTL64(v, 56) & U64C(0xFF000000FF000000)))
-#else
-#define SWAP64(v) \
-  (((u64)SWAP32(U32V(v)) << 32) | (u64)SWAP32(U32V(v >> 32)))
-#endif
-
-#include "ecrypt-machine.h"
-
-#define ECRYPT_DEFAULT_WTOW
-
-#ifdef ECRYPT_LITTLE_ENDIAN
-#define U16TO16_LITTLE(v) (v)
-#define U32TO32_LITTLE(v) (v)
-#define U64TO64_LITTLE(v) (v)
-
-#define U16TO16_BIG(v) SWAP16(v)
-#define U32TO32_BIG(v) SWAP32(v)
-#define U64TO64_BIG(v) SWAP64(v)
-#endif
-
-#ifdef ECRYPT_BIG_ENDIAN
-#define U16TO16_LITTLE(v) SWAP16(v)
-#define U32TO32_LITTLE(v) SWAP32(v)
-#define U64TO64_LITTLE(v) SWAP64(v)
-
-#define U16TO16_BIG(v) (v)
-#define U32TO32_BIG(v) (v)
-#define U64TO64_BIG(v) (v)
-#endif
-
-#include "ecrypt-machine.h"
-
-/*
- * The following macros load words from an array of bytes with
- * different types of endianness, and vice versa.
- */
-
-#define ECRYPT_DEFAULT_BTOW
-
-#if (!defined(ECRYPT_UNKNOWN) && defined(ECRYPT_I8T_IS_BYTE))
-
-#define U8TO16_LITTLE(p) U16TO16_LITTLE(((u16*)(p))[0])
-#define U8TO32_LITTLE(p) U32TO32_LITTLE(((u32*)(p))[0])
-#define U8TO64_LITTLE(p) U64TO64_LITTLE(((u64*)(p))[0])
-
-#define U8TO16_BIG(p) U16TO16_BIG(((u16*)(p))[0])
-#define U8TO32_BIG(p) U32TO32_BIG(((u32*)(p))[0])
-#define U8TO64_BIG(p) U64TO64_BIG(((u64*)(p))[0])
-
-#define U16TO8_LITTLE(p, v) (((u16*)(p))[0] = U16TO16_LITTLE(v))
-#define U32TO8_LITTLE(p, v) (((u32*)(p))[0] = U32TO32_LITTLE(v))
-#define U64TO8_LITTLE(p, v) (((u64*)(p))[0] = U64TO64_LITTLE(v))
-
-#define U16TO8_BIG(p, v) (((u16*)(p))[0] = U16TO16_BIG(v))
-#define U32TO8_BIG(p, v) (((u32*)(p))[0] = U32TO32_BIG(v))
-#define U64TO8_BIG(p, v) (((u64*)(p))[0] = U64TO64_BIG(v))
-
-#else
-
-#define U8TO16_LITTLE(p) \
-  (((u16)((p)[0])      ) | \
-   ((u16)((p)[1]) <<  8))
-
-#define U8TO32_LITTLE(p) \
-  (((u32)((p)[0])      ) | \
-   ((u32)((p)[1]) <<  8) | \
-   ((u32)((p)[2]) << 16) | \
-   ((u32)((p)[3]) << 24))
-
-#ifdef ECRYPT_NATIVE64
-#define U8TO64_LITTLE(p) \
-  (((u64)((p)[0])      ) | \
-   ((u64)((p)[1]) <<  8) | \
-   ((u64)((p)[2]) << 16) | \
-   ((u64)((p)[3]) << 24) | \
-   ((u64)((p)[4]) << 32) | \
-   ((u64)((p)[5]) << 40) | \
-   ((u64)((p)[6]) << 48) | \
-   ((u64)((p)[7]) << 56))
-#else
-#define U8TO64_LITTLE(p) \
-  ((u64)U8TO32_LITTLE(p) | ((u64)U8TO32_LITTLE((p) + 4) << 32))
-#endif
-
-#define U8TO16_BIG(p) \
-  (((u16)((p)[0]) <<  8) | \
-   ((u16)((p)[1])      ))
-
-#define U8TO32_BIG(p) \
-  (((u32)((p)[0]) << 24) | \
-   ((u32)((p)[1]) << 16) | \
-   ((u32)((p)[2]) <<  8) | \
-   ((u32)((p)[3])      ))
-
-#ifdef ECRYPT_NATIVE64
-#define U8TO64_BIG(p) \
-  (((u64)((p)[0]) << 56) | \
-   ((u64)((p)[1]) << 48) | \
-   ((u64)((p)[2]) << 40) | \
-   ((u64)((p)[3]) << 32) | \
-   ((u64)((p)[4]) << 24) | \
-   ((u64)((p)[5]) << 16) | \
-   ((u64)((p)[6]) <<  8) | \
-   ((u64)((p)[7])      ))
-#else
-#define U8TO64_BIG(p) \
-  (((u64)U8TO32_BIG(p) << 32) | (u64)U8TO32_BIG((p) + 4))
-#endif
-
-#define U16TO8_LITTLE(p, v) \
-  do { \
-    (p)[0] = U8V((v)      ); \
-    (p)[1] = U8V((v) >>  8); \
-  } while (0)
-
-#define U32TO8_LITTLE(p, v) \
-  do { \
-    (p)[0] = U8V((v)      ); \
-    (p)[1] = U8V((v) >>  8); \
-    (p)[2] = U8V((v) >> 16); \
-    (p)[3] = U8V((v) >> 24); \
-  } while (0)
-
-#ifdef ECRYPT_NATIVE64
-#define U64TO8_LITTLE(p, v) \
-  do { \
-    (p)[0] = U8V((v)      ); \
-    (p)[1] = U8V((v) >>  8); \
-    (p)[2] = U8V((v) >> 16); \
-    (p)[3] = U8V((v) >> 24); \
-    (p)[4] = U8V((v) >> 32); \
-    (p)[5] = U8V((v) >> 40); \
-    (p)[6] = U8V((v) >> 48); \
-    (p)[7] = U8V((v) >> 56); \
-  } while (0)
-#else
-#define U64TO8_LITTLE(p, v) \
-  do { \
-    U32TO8_LITTLE((p),     U32V((v)      )); \
-    U32TO8_LITTLE((p) + 4, U32V((v) >> 32)); \
-  } while (0)
-#endif
-
-#define U16TO8_BIG(p, v) \
-  do { \
-    (p)[0] = U8V((v)      ); \
-    (p)[1] = U8V((v) >>  8); \
-  } while (0)
-
-#define U32TO8_BIG(p, v) \
-  do { \
-    (p)[0] = U8V((v) >> 24); \
-    (p)[1] = U8V((v) >> 16); \
-    (p)[2] = U8V((v) >>  8); \
-    (p)[3] = U8V((v)      ); \
-  } while (0)
-
-#ifdef ECRYPT_NATIVE64
-#define U64TO8_BIG(p, v) \
-  do { \
-    (p)[0] = U8V((v) >> 56); \
-    (p)[1] = U8V((v) >> 48); \
-    (p)[2] = U8V((v) >> 40); \
-    (p)[3] = U8V((v) >> 32); \
-    (p)[4] = U8V((v) >> 24); \
-    (p)[5] = U8V((v) >> 16); \
-    (p)[6] = U8V((v) >>  8); \
-    (p)[7] = U8V((v)      ); \
-  } while (0)
-#else
-#define U64TO8_BIG(p, v) \
-  do { \
-    U32TO8_BIG((p),     U32V((v) >> 32)); \
-    U32TO8_BIG((p) + 4, U32V((v)      )); \
-  } while (0)
-#endif
-
-#endif
-
-#include "ecrypt-machine.h"
-
-/* ------------------------------------------------------------------------- */
-
-#endif
--- a/src/crypto/astrobwt/salsa20_ref/ecrypt-sync.h
+++ b/src/crypto/astrobwt/salsa20_ref/ecrypt-sync.h
@ -1,279 +0,0 @@
-/* ecrypt-sync.h */
-
-/*
- * Header file for synchronous stream ciphers without authentication
- * mechanism.
- *
- * *** Please only edit parts marked with "[edit]". ***
- */
-
-#ifndef ECRYPT_SYNC
-#define ECRYPT_SYNC
-
-#include "ecrypt-portable.h"
-
-/* ------------------------------------------------------------------------- */
-
-/* Cipher parameters */
-
-/*
- * The name of your cipher.
- */
-#define ECRYPT_NAME "Salsa20"    /* [edit] */
-#define ECRYPT_PROFILE "S!_H."
-
-/*
- * Specify which key and IV sizes are supported by your cipher. A user
- * should be able to enumerate the supported sizes by running the
- * following code:
- *
- * for (i = 0; ECRYPT_KEYSIZE(i) <= ECRYPT_MAXKEYSIZE; ++i)
- *   {
- *     keysize = ECRYPT_KEYSIZE(i);
- *
- *     ...
- *   }
- *
- * All sizes are in bits.
- */
-
-#define ECRYPT_MAXKEYSIZE 256                 /* [edit] */
-#define ECRYPT_KEYSIZE(i) (128 + (i)*128)     /* [edit] */
-
-#define ECRYPT_MAXIVSIZE 64                   /* [edit] */
-#define ECRYPT_IVSIZE(i) (64 + (i)*64)        /* [edit] */
-
-/* ------------------------------------------------------------------------- */
-
-/* Data structures */
-
-/*
- * ECRYPT_ctx is the structure containing the representation of the
- * internal state of your cipher.
- */
-
-typedef struct
-{
-  u32 input[16]; /* could be compressed */
-  /*
-   * [edit]
-   *
-   * Put here all state variable needed during the encryption process.
-   */
-} ECRYPT_ctx;
-
-/* ------------------------------------------------------------------------- */
-
-/* Mandatory functions */
-
-/*
- * Key and message independent initialization. This function will be
- * called once when the program starts (e.g., to build expanded S-box
- * tables).
- */
-void ECRYPT_init();
-
-/*
- * Key setup. It is the user's responsibility to select the values of
- * keysize and ivsize from the set of supported values specified
- * above.
- */
-void ECRYPT_keysetup(
-  ECRYPT_ctx* ctx,
-  const u8* key,
-  u32 keysize,                /* Key size in bits. */
-  u32 ivsize);                /* IV size in bits. */
-
-/*
- * IV setup. After having called ECRYPT_keysetup(), the user is
- * allowed to call ECRYPT_ivsetup() different times in order to
- * encrypt/decrypt different messages with the same key but different
- * IV's.
- */
-void ECRYPT_ivsetup(
-  ECRYPT_ctx* ctx,
-  const u8* iv);
-
-/*
- * Encryption/decryption of arbitrary length messages.
- *
- * For efficiency reasons, the API provides two types of
- * encrypt/decrypt functions. The ECRYPT_encrypt_bytes() function
- * (declared here) encrypts byte strings of arbitrary length, while
- * the ECRYPT_encrypt_blocks() function (defined later) only accepts
- * lengths which are multiples of ECRYPT_BLOCKLENGTH.
- *
- * The user is allowed to make multiple calls to
- * ECRYPT_encrypt_blocks() to incrementally encrypt a long message,
- * but he is NOT allowed to make additional encryption calls once he
- * has called ECRYPT_encrypt_bytes() (unless he starts a new message
- * of course). For example, this sequence of calls is acceptable:
- *
- * ECRYPT_keysetup();
- *
- * ECRYPT_ivsetup();
- * ECRYPT_encrypt_blocks();
- * ECRYPT_encrypt_blocks();
- * ECRYPT_encrypt_bytes();
- *
- * ECRYPT_ivsetup();
- * ECRYPT_encrypt_blocks();
- * ECRYPT_encrypt_blocks();
- *
- * ECRYPT_ivsetup();
- * ECRYPT_encrypt_bytes();
- *
- * The following sequence is not:
- *
- * ECRYPT_keysetup();
- * ECRYPT_ivsetup();
- * ECRYPT_encrypt_blocks();
- * ECRYPT_encrypt_bytes();
- * ECRYPT_encrypt_blocks();
- */
-
-void ECRYPT_encrypt_bytes(
-  ECRYPT_ctx* ctx,
-  const u8* plaintext,
-  u8* ciphertext,
-  u32 msglen);                /* Message length in bytes. */
-
-void ECRYPT_decrypt_bytes(
-  ECRYPT_ctx* ctx,
-  const u8* ciphertext,
-  u8* plaintext,
-  u32 msglen);                /* Message length in bytes. */
-
-/* ------------------------------------------------------------------------- */
-
-/* Optional features */
-
-/*
- * For testing purposes it can sometimes be useful to have a function
- * which immediately generates keystream without having to provide it
- * with a zero plaintext. If your cipher cannot provide this function
- * (e.g., because it is not strictly a synchronous cipher), please
- * reset the ECRYPT_GENERATES_KEYSTREAM flag.
- */
-
-#define ECRYPT_GENERATES_KEYSTREAM
-#ifdef ECRYPT_GENERATES_KEYSTREAM
-
-void ECRYPT_keystream_bytes(
-  ECRYPT_ctx* ctx,
-  u8* keystream,
-  u32 length);                /* Length of keystream in bytes. */
-
-#endif
-
-/* ------------------------------------------------------------------------- */
-
-/* Optional optimizations */
-
-/*
- * By default, the functions in this section are implemented using
- * calls to functions declared above. However, you might want to
- * implement them differently for performance reasons.
- */
-
-/*
- * All-in-one encryption/decryption of (short) packets.
- *
- * The default definitions of these functions can be found in
- * "ecrypt-sync.c". If you want to implement them differently, please
- * undef the ECRYPT_USES_DEFAULT_ALL_IN_ONE flag.
- */
-#define ECRYPT_USES_DEFAULT_ALL_IN_ONE        /* [edit] */
-
-void ECRYPT_encrypt_packet(
-  ECRYPT_ctx* ctx,
-  const u8* iv,
-  const u8* plaintext,
-  u8* ciphertext,
-  u32 msglen);
-
-void ECRYPT_decrypt_packet(
-  ECRYPT_ctx* ctx,
-  const u8* iv,
-  const u8* ciphertext,
-  u8* plaintext,
-  u32 msglen);
-
-/*
- * Encryption/decryption of blocks.
- *
- * By default, these functions are defined as macros. If you want to
- * provide a different implementation, please undef the
- * ECRYPT_USES_DEFAULT_BLOCK_MACROS flag and implement the functions
- * declared below.
- */
-
-#define ECRYPT_BLOCKLENGTH 64                  /* [edit] */
-
-#define ECRYPT_USES_DEFAULT_BLOCK_MACROS      /* [edit] */
-#ifdef ECRYPT_USES_DEFAULT_BLOCK_MACROS
-
-#define ECRYPT_encrypt_blocks(ctx, plaintext, ciphertext, blocks)  \
-  ECRYPT_encrypt_bytes(ctx, plaintext, ciphertext,                 \
-    (blocks) * ECRYPT_BLOCKLENGTH)
-
-#define ECRYPT_decrypt_blocks(ctx, ciphertext, plaintext, blocks)  \
-  ECRYPT_decrypt_bytes(ctx, ciphertext, plaintext,                 \
-    (blocks) * ECRYPT_BLOCKLENGTH)
-
-#ifdef ECRYPT_GENERATES_KEYSTREAM
-
-#define ECRYPT_keystream_blocks(ctx, keystream, blocks)            \
-  ECRYPT_keystream_bytes(ctx, keystream,                        \
-    (blocks) * ECRYPT_BLOCKLENGTH)
-
-#endif
-
-#else
-
-void ECRYPT_encrypt_blocks(
-  ECRYPT_ctx* ctx,
-  const u8* plaintext,
-  u8* ciphertext,
-  u32 blocks);                /* Message length in blocks. */
-
-void ECRYPT_decrypt_blocks(
-  ECRYPT_ctx* ctx,
-  const u8* ciphertext,
-  u8* plaintext,
-  u32 blocks);                /* Message length in blocks. */
-
-#ifdef ECRYPT_GENERATES_KEYSTREAM
-
-void ECRYPT_keystream_blocks(
-  ECRYPT_ctx* ctx,
-  const u8* keystream,
-  u32 blocks);                /* Keystream length in blocks. */
-
-#endif
-
-#endif
-
-/*
- * If your cipher can be implemented in different ways, you can use
- * the ECRYPT_VARIANT parameter to allow the user to choose between
- * them at compile time (e.g., gcc -DECRYPT_VARIANT=3 ...). Please
- * only use this possibility if you really think it could make a
- * significant difference and keep the number of variants
- * (ECRYPT_MAXVARIANT) as small as possible (definitely not more than
- * 10). Note also that all variants should have exactly the same
- * external interface (i.e., the same ECRYPT_BLOCKLENGTH, etc.).
- */
-#define ECRYPT_MAXVARIANT 1                   /* [edit] */
-
-#ifndef ECRYPT_VARIANT
-#define ECRYPT_VARIANT 1
-#endif
-
-#if (ECRYPT_VARIANT > ECRYPT_MAXVARIANT)
-#error this variant does not exist
-#endif
-
-/* ------------------------------------------------------------------------- */
-
-#endif
--- a/src/crypto/astrobwt/salsa20_ref/salsa20.c
+++ b/src/crypto/astrobwt/salsa20_ref/salsa20.c
@ -1,219 +0,0 @@
-/*
-salsa20-merged.c version 20051118
-D. J. Bernstein
-Public domain.
-*/
-
-#include "ecrypt-sync.h"
-
-#define ROTATE(v,c) (ROTL32(v,c))
-#define XOR(v,w) ((v) ^ (w))
-#define PLUS(v,w) (U32V((v) + (w)))
-#define PLUSONE(v) (PLUS((v),1))
-
-void ECRYPT_init(void)
-{
-  return;
-}
-
-static const char sigma[16] = "expand 32-byte k";
-static const char tau[16] = "expand 16-byte k";
-
-void ECRYPT_keysetup(ECRYPT_ctx *x,const u8 *k,u32 kbits,u32 ivbits)
-{
-  const char *constants;
-
-  x->input[1] = U8TO32_LITTLE(k + 0);
-  x->input[2] = U8TO32_LITTLE(k + 4);
-  x->input[3] = U8TO32_LITTLE(k + 8);
-  x->input[4] = U8TO32_LITTLE(k + 12);
-  if (kbits == 256) { /* recommended */
-    k += 16;
-    constants = sigma;
-  } else { /* kbits == 128 */
-    constants = tau;
-  }
-  x->input[11] = U8TO32_LITTLE(k + 0);
-  x->input[12] = U8TO32_LITTLE(k + 4);
-  x->input[13] = U8TO32_LITTLE(k + 8);
-  x->input[14] = U8TO32_LITTLE(k + 12);
-  x->input[0] = U8TO32_LITTLE(constants + 0);
-  x->input[5] = U8TO32_LITTLE(constants + 4);
-  x->input[10] = U8TO32_LITTLE(constants + 8);
-  x->input[15] = U8TO32_LITTLE(constants + 12);
-}
-
-void ECRYPT_ivsetup(ECRYPT_ctx *x,const u8 *iv)
-{
-  x->input[6] = U8TO32_LITTLE(iv + 0);
-  x->input[7] = U8TO32_LITTLE(iv + 4);
-  x->input[8] = 0;
-  x->input[9] = 0;
-}
-
-void ECRYPT_encrypt_bytes(ECRYPT_ctx *x,const u8 *m,u8 *c,u32 bytes)
-{
-  u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
-  u32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
-  u8 *ctarget = 0;
-  u8 tmp[64];
-  int i;
-
-  if (!bytes) return;
-
-  j0 = x->input[0];
-  j1 = x->input[1];
-  j2 = x->input[2];
-  j3 = x->input[3];
-  j4 = x->input[4];
-  j5 = x->input[5];
-  j6 = x->input[6];
-  j7 = x->input[7];
-  j8 = x->input[8];
-  j9 = x->input[9];
-  j10 = x->input[10];
-  j11 = x->input[11];
-  j12 = x->input[12];
-  j13 = x->input[13];
-  j14 = x->input[14];
-  j15 = x->input[15];
-
-  for (;;) {
-    if (bytes < 64) {
-      for (i = 0;i < bytes;++i) tmp[i] = m[i];
-      m = tmp;
-      ctarget = c;
-      c = tmp;
-    }
-    x0 = j0;
-    x1 = j1;
-    x2 = j2;
-    x3 = j3;
-    x4 = j4;
-    x5 = j5;
-    x6 = j6;
-    x7 = j7;
-    x8 = j8;
-    x9 = j9;
-    x10 = j10;
-    x11 = j11;
-    x12 = j12;
-    x13 = j13;
-    x14 = j14;
-    x15 = j15;
-    for (i = 20;i > 0;i -= 2) {
-       x4 = XOR( x4,ROTATE(PLUS( x0,x12), 7));
-       x8 = XOR( x8,ROTATE(PLUS( x4, x0), 9));
-      x12 = XOR(x12,ROTATE(PLUS( x8, x4),13));
-       x0 = XOR( x0,ROTATE(PLUS(x12, x8),18));
-       x9 = XOR( x9,ROTATE(PLUS( x5, x1), 7));
-      x13 = XOR(x13,ROTATE(PLUS( x9, x5), 9));
-       x1 = XOR( x1,ROTATE(PLUS(x13, x9),13));
-       x5 = XOR( x5,ROTATE(PLUS( x1,x13),18));
-      x14 = XOR(x14,ROTATE(PLUS(x10, x6), 7));
-       x2 = XOR( x2,ROTATE(PLUS(x14,x10), 9));
-       x6 = XOR( x6,ROTATE(PLUS( x2,x14),13));
-      x10 = XOR(x10,ROTATE(PLUS( x6, x2),18));
-       x3 = XOR( x3,ROTATE(PLUS(x15,x11), 7));
-       x7 = XOR( x7,ROTATE(PLUS( x3,x15), 9));
-      x11 = XOR(x11,ROTATE(PLUS( x7, x3),13));
-      x15 = XOR(x15,ROTATE(PLUS(x11, x7),18));
-       x1 = XOR( x1,ROTATE(PLUS( x0, x3), 7));
-       x2 = XOR( x2,ROTATE(PLUS( x1, x0), 9));
-       x3 = XOR( x3,ROTATE(PLUS( x2, x1),13));
-       x0 = XOR( x0,ROTATE(PLUS( x3, x2),18));
-       x6 = XOR( x6,ROTATE(PLUS( x5, x4), 7));
-       x7 = XOR( x7,ROTATE(PLUS( x6, x5), 9));
-       x4 = XOR( x4,ROTATE(PLUS( x7, x6),13));
-       x5 = XOR( x5,ROTATE(PLUS( x4, x7),18));
-      x11 = XOR(x11,ROTATE(PLUS(x10, x9), 7));
-       x8 = XOR( x8,ROTATE(PLUS(x11,x10), 9));
-       x9 = XOR( x9,ROTATE(PLUS( x8,x11),13));
-      x10 = XOR(x10,ROTATE(PLUS( x9, x8),18));
-      x12 = XOR(x12,ROTATE(PLUS(x15,x14), 7));
-      x13 = XOR(x13,ROTATE(PLUS(x12,x15), 9));
-      x14 = XOR(x14,ROTATE(PLUS(x13,x12),13));
-      x15 = XOR(x15,ROTATE(PLUS(x14,x13),18));
-    }
-    x0 = PLUS(x0,j0);
-    x1 = PLUS(x1,j1);
-    x2 = PLUS(x2,j2);
-    x3 = PLUS(x3,j3);
-    x4 = PLUS(x4,j4);
-    x5 = PLUS(x5,j5);
-    x6 = PLUS(x6,j6);
-    x7 = PLUS(x7,j7);
-    x8 = PLUS(x8,j8);
-    x9 = PLUS(x9,j9);
-    x10 = PLUS(x10,j10);
-    x11 = PLUS(x11,j11);
-    x12 = PLUS(x12,j12);
-    x13 = PLUS(x13,j13);
-    x14 = PLUS(x14,j14);
-    x15 = PLUS(x15,j15);
-
-    x0 = XOR(x0,U8TO32_LITTLE(m + 0));
-    x1 = XOR(x1,U8TO32_LITTLE(m + 4));
-    x2 = XOR(x2,U8TO32_LITTLE(m + 8));
-    x3 = XOR(x3,U8TO32_LITTLE(m + 12));
-    x4 = XOR(x4,U8TO32_LITTLE(m + 16));
-    x5 = XOR(x5,U8TO32_LITTLE(m + 20));
-    x6 = XOR(x6,U8TO32_LITTLE(m + 24));
-    x7 = XOR(x7,U8TO32_LITTLE(m + 28));
-    x8 = XOR(x8,U8TO32_LITTLE(m + 32));
-    x9 = XOR(x9,U8TO32_LITTLE(m + 36));
-    x10 = XOR(x10,U8TO32_LITTLE(m + 40));
-    x11 = XOR(x11,U8TO32_LITTLE(m + 44));
-    x12 = XOR(x12,U8TO32_LITTLE(m + 48));
-    x13 = XOR(x13,U8TO32_LITTLE(m + 52));
-    x14 = XOR(x14,U8TO32_LITTLE(m + 56));
-    x15 = XOR(x15,U8TO32_LITTLE(m + 60));
-
-    j8 = PLUSONE(j8);
-    if (!j8) {
-      j9 = PLUSONE(j9);
-      /* stopping at 2^70 bytes per nonce is user's responsibility */
-    }
-
-    U32TO8_LITTLE(c + 0,x0);
-    U32TO8_LITTLE(c + 4,x1);
-    U32TO8_LITTLE(c + 8,x2);
-    U32TO8_LITTLE(c + 12,x3);
-    U32TO8_LITTLE(c + 16,x4);
-    U32TO8_LITTLE(c + 20,x5);
-    U32TO8_LITTLE(c + 24,x6);
-    U32TO8_LITTLE(c + 28,x7);
-    U32TO8_LITTLE(c + 32,x8);
-    U32TO8_LITTLE(c + 36,x9);
-    U32TO8_LITTLE(c + 40,x10);
-    U32TO8_LITTLE(c + 44,x11);
-    U32TO8_LITTLE(c + 48,x12);
-    U32TO8_LITTLE(c + 52,x13);
-    U32TO8_LITTLE(c + 56,x14);
-    U32TO8_LITTLE(c + 60,x15);
-
-    if (bytes <= 64) {
-      if (bytes < 64) {
-        for (i = 0;i < bytes;++i) ctarget[i] = c[i];
-      }
-      x->input[8] = j8;
-      x->input[9] = j9;
-      return;
-    }
-    bytes -= 64;
-    c += 64;
-    m += 64;
-  }
-}
-
-void ECRYPT_decrypt_bytes(ECRYPT_ctx *x,const u8 *c,u8 *m,u32 bytes)
-{
-  ECRYPT_encrypt_bytes(x,c,m,bytes);
-}
-
-void ECRYPT_keystream_bytes(ECRYPT_ctx *x,u8 *stream,u32 bytes)
-{
-  u32 i;
-  for (i = 0; i < bytes; ++i) stream[i] = 0;
-  ECRYPT_encrypt_bytes(x,stream,stream,bytes);
-}
--- a/src/crypto/astrobwt/sha3_256_avx2.S
+++ b/src/crypto/astrobwt/sha3_256_avx2.S
@ -1,57 +0,0 @@
-;# XMRig
-;# Copyright 2010      Jeff Garzik              <jgarzik@pobox.com>
-;# Copyright 2012-2014 pooler                   <pooler@litecoinpool.org>
-;# Copyright 2014      Lucas Jones              <https://github.com/lucasjones>
-;# Copyright 2014-2016 Wolf9466                 <https://github.com/OhGodAPet>
-;# Copyright 2016      Jay D Dee                <jayddee246@gmail.com>
-;# Copyright 2017-2019 XMR-Stak                 <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
-;# Copyright 2018      Lee Clagett              <https://github.com/vtnerd>
-;# Copyright 2018-2019 tevador                  <tevador@gmail.com>
-;# Copyright 2000      Transmeta Corporation    <https://github.com/intel/msr-tools>
-;# Copyright 2004-2008 H. Peter Anvin           <https://github.com/intel/msr-tools>
-;# Copyright 2018-2020 SChernykh                <https://github.com/SChernykh>
-;# Copyright 2016-2020 XMRig                    <https://github.com/xmrig>, <support@xmrig.com>
-;#
-;#   This program is free software: you can redistribute it and/or modify
-;#   it under the terms of the GNU General Public License as published by
-;#   the Free Software Foundation, either version 3 of the License, or
-;#   (at your option) any later version.
-;#
-;#   This program is distributed in the hope that it will be useful,
-;#   but WITHOUT ANY WARRANTY; without even the implied warranty of
-;#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-;#   GNU General Public License for more details.
-;#
-;#   You should have received a copy of the GNU General Public License
-;#   along with this program. If not, see <http://www.gnu.org/licenses/>.
-;#
-
-.intel_syntax noprefix
-#if defined(__APPLE__)
-.text
-#define DECL(x) _##x
-#else
-.section .text
-#define DECL(x) x
-#endif
-
-#define ALIGN .balign
-#define dq .quad
-
-.global DECL(SHA3_256_AVX2_ASM)
-
-ALIGN 64
-DECL(SHA3_256_AVX2_ASM):
-
-#include "sha3_256_avx2.inc"
-
-KeccakF1600_AVX2_ASM:
-	lea r8,[rip+rot_left+96]
-	lea r9,[rip+rot_right+96]
-	lea r10,[rip+rndc]
-
-#include "sha3_256_keccakf1600_avx2.inc"
-
-#if defined(__linux__) && defined(__ELF__)
-.section .note.GNU-stack,"",%progbits
-#endif
--- a/src/crypto/astrobwt/sha3_256_avx2.asm
+++ b/src/crypto/astrobwt/sha3_256_avx2.asm
@ -1,45 +0,0 @@
-;# XMRig
-;# Copyright 2010      Jeff Garzik              <jgarzik@pobox.com>
-;# Copyright 2012-2014 pooler                   <pooler@litecoinpool.org>
-;# Copyright 2014      Lucas Jones              <https://github.com/lucasjones>
-;# Copyright 2014-2016 Wolf9466                 <https://github.com/OhGodAPet>
-;# Copyright 2016      Jay D Dee                <jayddee246@gmail.com>
-;# Copyright 2017-2019 XMR-Stak                 <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
-;# Copyright 2018      Lee Clagett              <https://github.com/vtnerd>
-;# Copyright 2018-2019 tevador                  <tevador@gmail.com>
-;# Copyright 2000      Transmeta Corporation    <https://github.com/intel/msr-tools>
-;# Copyright 2004-2008 H. Peter Anvin           <https://github.com/intel/msr-tools>
-;# Copyright 2018-2020 SChernykh                <https://github.com/SChernykh>
-;# Copyright 2016-2020 XMRig                    <https://github.com/xmrig>, <support@xmrig.com>
-;#
-;#   This program is free software: you can redistribute it and/or modify
-;#   it under the terms of the GNU General Public License as published by
-;#   the Free Software Foundation, either version 3 of the License, or
-;#   (at your option) any later version.
-;#
-;#   This program is distributed in the hope that it will be useful,
-;#   but WITHOUT ANY WARRANTY; without even the implied warranty of
-;#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-;#   GNU General Public License for more details.
-;#
-;#   You should have received a copy of the GNU General Public License
-;#   along with this program. If not, see <http://www.gnu.org/licenses/>.
-;#
-
-_SHA3_256_AVX2_ASM SEGMENT PAGE READ EXECUTE
-PUBLIC SHA3_256_AVX2_ASM
-
-ALIGN 64
-SHA3_256_AVX2_ASM:
-
-include sha3_256_avx2.inc
-
-KeccakF1600_AVX2_ASM:
-	lea r8,[rot_left+96]
-	lea r9,[rot_right+96]
-	lea r10,[rndc]
-
-include sha3_256_keccakf1600_avx2.inc
-
-_SHA3_256_AVX2_ASM ENDS
-END
--- a/src/crypto/astrobwt/sha3_256_avx2.inc
+++ b/src/crypto/astrobwt/sha3_256_avx2.inc
@ -1,162 +0,0 @@
-;# XMRig
-;# Copyright 2010      Jeff Garzik              <jgarzik@pobox.com>
-;# Copyright 2012-2014 pooler                   <pooler@litecoinpool.org>
-;# Copyright 2014      Lucas Jones              <https://github.com/lucasjones>
-;# Copyright 2014-2016 Wolf9466                 <https://github.com/OhGodAPet>
-;# Copyright 2016      Jay D Dee                <jayddee246@gmail.com>
-;# Copyright 2017-2019 XMR-Stak                 <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
-;# Copyright 2018      Lee Clagett              <https://github.com/vtnerd>
-;# Copyright 2018-2019 tevador                  <tevador@gmail.com>
-;# Copyright 2000      Transmeta Corporation    <https://github.com/intel/msr-tools>
-;# Copyright 2004-2008 H. Peter Anvin           <https://github.com/intel/msr-tools>
-;# Copyright 2018-2020 SChernykh                <https://github.com/SChernykh>
-;# Copyright 2016-2020 XMRig                    <https://github.com/xmrig>, <support@xmrig.com>
-;#
-;#   This program is free software: you can redistribute it and/or modify
-;#   it under the terms of the GNU General Public License as published by
-;#   the Free Software Foundation, either version 3 of the License, or
-;#   (at your option) any later version.
-;#
-;#   This program is distributed in the hope that it will be useful,
-;#   but WITHOUT ANY WARRANTY; without even the implied warranty of
-;#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-;#   GNU General Public License for more details.
-;#
-;#   You should have received a copy of the GNU General Public License
-;#   along with this program. If not, see <http://www.gnu.org/licenses/>.
-;#
-
-	vzeroupper
-
-	mov qword ptr [rsp+8],rbx
-	mov qword ptr [rsp+16],rsi
-	mov qword ptr [rsp+24],rdi
-	push rbp
-	push r12
-	push r13
-	push r14
-	push r15
-
-	sub rsp, 80
-	movdqu xmmword ptr [rsp+64], xmm6
-	movdqu xmmword ptr [rsp+48], xmm7
-	movdqu xmmword ptr [rsp+32], xmm8
-	movdqu xmmword ptr [rsp+16], xmm9
-	movdqu xmmword ptr [rsp+0], xmm10
-	sub rsp, 80
-	movdqu xmmword ptr [rsp+64], xmm11
-	movdqu xmmword ptr [rsp+48], xmm12
-	movdqu xmmword ptr [rsp+32], xmm13
-	movdqu xmmword ptr [rsp+16], xmm14
-	movdqu xmmword ptr [rsp+0], xmm15
-
-	sub rsp,320
-	lea rbp,[rsp+64]
-	and rbp,-32
-	vpxor xmm0,xmm0,xmm0
-	xor edi,edi
-	mov dword ptr [rbp],50462976
-	mov r12,rdx
-	mov dword ptr [rbp+4],169150212
-	mov r14,rdx
-	mov dword ptr [rbp+8],218436623
-	shr r14,3
-	and r12d,7
-	mov dword ptr [rbp+12],135009046
-	mov r13,r8
-	mov byte ptr [rbp+16],9
-	mov rsi,rcx
-	mov ebx,edi
-	vmovdqa ymmword ptr [rbp+32],ymm0
-	vmovdqa ymmword ptr [rbp+64],ymm0
-	vmovdqa ymmword ptr [rbp+96],ymm0
-	vmovdqa ymmword ptr [rbp+128],ymm0
-	vmovdqa ymmword ptr [rbp+160],ymm0
-	vmovdqa ymmword ptr [rbp+192],ymm0
-	vmovdqa ymmword ptr [rbp+224],ymm0
-	test r14,r14
-	je sha3_main_loop_end
-
-sha3_main_loop:
-	movzx eax,byte ptr [rbp+rbx]
-	lea rcx,[rbp+32]
-	lea rcx,[rcx+rax*8]
-	mov rax,qword ptr [rsi]
-	xor qword ptr [rcx],rax
-	lea r15,[rbx+1]
-	cmp rbx,16
-	jne skip_keccak
-
-	lea rcx,[rbp+32]
-	call KeccakF1600_AVX2_ASM
-
-skip_keccak:
-	cmp rbx,16
-	mov rax,rdi
-	cmovne rax,r15
-	add rsi,8
-	mov rbx,rax
-	sub r14,1
-	jne sha3_main_loop
-
-sha3_main_loop_end:
-	mov rdx,rdi
-	test r12,r12
-	je sha3_tail_loop_end
-	mov r8,rdi
-
-sha3_tail_loop:
-	movzx eax,byte ptr [rdx+rsi]
-	inc rdx
-	shlx rcx,rax,r8
-	or rdi,rcx
-	add r8,8
-	cmp rdx,r12
-	jb sha3_tail_loop
-
-sha3_tail_loop_end:
-	movzx eax,byte ptr [rbp+rbx]
-	lea rdx,[rbp+32]
-	lea rdx,[rdx+rax*8]
-	mov ecx,6
-	lea rax,[r12*8]
-	shlx rcx,rcx,rax
-	xor rcx,qword ptr [rdx]
-	mov eax,1
-	shl rax,63
-	xor rcx,rdi
-	mov qword ptr [rdx],rcx
-	xor qword ptr [rbp+104],rax
-
-	lea rcx,[rbp+32]
-	call KeccakF1600_AVX2_ASM
-
-	vmovups ymm0,ymmword ptr [rbp+32]
-	vmovups ymmword ptr [r13],ymm0
-	vzeroupper
-
-	add rsp,320
-
-	movdqu xmm15, xmmword ptr [rsp]
-	movdqu xmm14, xmmword ptr [rsp+16]
-	movdqu xmm13, xmmword ptr [rsp+32]
-	movdqu xmm12, xmmword ptr [rsp+48]
-	movdqu xmm11, xmmword ptr [rsp+64]
-	add rsp, 80
-	movdqu xmm10, xmmword ptr [rsp]
-	movdqu xmm9, xmmword ptr [rsp+16]
-	movdqu xmm8, xmmword ptr [rsp+32]
-	movdqu xmm7, xmmword ptr [rsp+48]
-	movdqu xmm6, xmmword ptr [rsp+64]
-	add rsp, 80
-
-	pop r15
-	pop r14
-	pop r13
-	pop r12
-	pop rbp
-	mov rbx,qword ptr [rsp+8]
-	mov rsi,qword ptr [rsp+16]
-	mov rdi,qword ptr [rsp+24]
-
-	ret
--- a/src/crypto/astrobwt/sha3_256_keccakf1600_avx2.inc
+++ b/src/crypto/astrobwt/sha3_256_keccakf1600_avx2.inc
@ -1,203 +0,0 @@
-;# XMRig
-;# Copyright 2010      Jeff Garzik              <jgarzik@pobox.com>
-;# Copyright 2012-2014 pooler                   <pooler@litecoinpool.org>
-;# Copyright 2014      Lucas Jones              <https://github.com/lucasjones>
-;# Copyright 2014-2016 Wolf9466                 <https://github.com/OhGodAPet>
-;# Copyright 2016      Jay D Dee                <jayddee246@gmail.com>
-;# Copyright 2017-2019 XMR-Stak                 <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
-;# Copyright 2018      Lee Clagett              <https://github.com/vtnerd>
-;# Copyright 2018-2019 tevador                  <tevador@gmail.com>
-;# Copyright 2000      Transmeta Corporation    <https://github.com/intel/msr-tools>
-;# Copyright 2004-2008 H. Peter Anvin           <https://github.com/intel/msr-tools>
-;# Copyright 2018-2020 SChernykh                <https://github.com/SChernykh>
-;# Copyright 2016-2020 XMRig                    <https://github.com/xmrig>, <support@xmrig.com>
-;#
-;#   This program is free software: you can redistribute it and/or modify
-;#   it under the terms of the GNU General Public License as published by
-;#   the Free Software Foundation, either version 3 of the License, or
-;#   (at your option) any later version.
-;#
-;#   This program is distributed in the hope that it will be useful,
-;#   but WITHOUT ANY WARRANTY; without even the implied warranty of
-;#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-;#   GNU General Public License for more details.
-;#
-;#   You should have received a copy of the GNU General Public License
-;#   along with this program. If not, see <http://www.gnu.org/licenses/>.
-;#
-
-	mov eax,24
-	lea rcx,[rcx+96]
-	vpbroadcastq ymm0,QWORD PTR [rcx-96]
-	vmovdqu ymm1,YMMWORD PTR [rcx-88]
-	vmovdqu ymm2,YMMWORD PTR [rcx-56]
-	vmovdqu ymm3,YMMWORD PTR [rcx-24]
-	vmovdqu ymm4,YMMWORD PTR [rcx+8]
-	vmovdqu ymm5,YMMWORD PTR [rcx+40]
-	vmovdqu ymm6,YMMWORD PTR [rcx+72]
-
-ALIGN 64
-Loop_avx2:
-	vpshufd ymm13,ymm2,78
-	vpxor ymm12,ymm5,ymm3
-	vpxor ymm9,ymm4,ymm6
-	vpxor ymm12,ymm12,ymm1
-	vpxor ymm12,ymm12,ymm9
-	vpermq ymm11,ymm12,147
-	vpxor ymm13,ymm13,ymm2
-	vpermq ymm7,ymm13,78
-	vpsrlq ymm8,ymm12,63
-	vpaddq ymm9,ymm12,ymm12
-	vpor ymm8,ymm8,ymm9
-	vpermq ymm15,ymm8,57
-	vpxor ymm14,ymm8,ymm11
-	vpermq ymm14,ymm14,0
-	vpxor ymm13,ymm13,ymm0
-	vpxor ymm13,ymm13,ymm7
-	vpsrlq ymm7,ymm13,63
-	vpaddq ymm8,ymm13,ymm13
-	vpor ymm8,ymm8,ymm7
-	vpxor ymm2,ymm2,ymm14
-	vpxor ymm0,ymm0,ymm14
-	vpblendd ymm15,ymm15,ymm8,192
-	vpblendd ymm11,ymm11,ymm13,3
-	vpxor ymm15,ymm15,ymm11
-	vpsllvq ymm10,ymm2,YMMWORD PTR [r8-96]
-	vpsrlvq ymm2,ymm2,YMMWORD PTR [r9-96]
-	vpor ymm2,ymm2,ymm10
-	vpxor ymm3,ymm3,ymm15
-	vpsllvq ymm11,ymm3,YMMWORD PTR [r8-32]
-	vpsrlvq ymm3,ymm3,YMMWORD PTR [r9-32]
-	vpor ymm3,ymm3,ymm11
-	vpxor ymm4,ymm4,ymm15
-	vpsllvq ymm12,ymm4,YMMWORD PTR [r8]
-	vpsrlvq ymm4,ymm4,YMMWORD PTR [r9]
-	vpor ymm4,ymm4,ymm12
-	vpxor ymm5,ymm5,ymm15
-	vpsllvq ymm13,ymm5,YMMWORD PTR [r8+32]
-	vpsrlvq ymm5,ymm5,YMMWORD PTR [r9+32]
-	vpor ymm5,ymm5,ymm13
-	vpxor ymm6,ymm6,ymm15
-	vpermq ymm10,ymm2,141
-	vpermq ymm11,ymm3,141
-	vpsllvq ymm14,ymm6,YMMWORD PTR [r8+64]
-	vpsrlvq ymm8,ymm6,YMMWORD PTR [r9+64]
-	vpor ymm8,ymm8,ymm14
-	vpxor ymm1,ymm1,ymm15
-	vpermq ymm12,ymm4,27
-	vpermq ymm13,ymm5,114
-	vpsllvq ymm15,ymm1,YMMWORD PTR [r8-64]
-	vpsrlvq ymm9,ymm1,YMMWORD PTR [r9-64]
-	vpor ymm9,ymm9,ymm15
-	vpsrldq ymm14,ymm8,8
-	vpandn ymm7,ymm8,ymm14
-	vpblendd ymm3,ymm9,ymm13,12
-	vpblendd ymm15,ymm11,ymm9,12
-	vpblendd ymm5,ymm10,ymm11,12
-	vpblendd ymm14,ymm9,ymm10,12
-	vpblendd ymm3,ymm3,ymm11,48
-	vpblendd ymm15,ymm15,ymm12,48
-	vpblendd ymm5,ymm5,ymm9,48
-	vpblendd ymm14,ymm14,ymm13,48
-	vpblendd ymm3,ymm3,ymm12,192
-	vpblendd ymm15,ymm15,ymm13,192
-	vpblendd ymm5,ymm5,ymm13,192
-	vpblendd ymm14,ymm14,ymm11,192
-	vpandn ymm3,ymm3,ymm15
-	vpandn ymm5,ymm5,ymm14
-	vpblendd ymm6,ymm12,ymm9,12
-	vpblendd ymm15,ymm10,ymm12,12
-	vpxor ymm3,ymm3,ymm10
-	vpblendd ymm6,ymm6,ymm10,48
-	vpblendd ymm15,ymm15,ymm11,48
-	vpxor ymm5,ymm5,ymm12
-	vpblendd ymm6,ymm6,ymm11,192
-	vpblendd ymm15,ymm15,ymm9,192
-	vpandn ymm6,ymm6,ymm15
-	vpxor ymm6,ymm6,ymm13
-	vpermq ymm4,ymm8,30
-	vpblendd ymm15,ymm4,ymm0,48
-	vpermq ymm1,ymm8,57
-	vpblendd ymm1,ymm1,ymm0,192
-	vpandn ymm1,ymm1,ymm15
-	vpblendd ymm2,ymm11,ymm12,12
-	vpblendd ymm14,ymm13,ymm11,12
-	vpblendd ymm2,ymm2,ymm13,48
-	vpblendd ymm14,ymm14,ymm10,48
-	vpblendd ymm2,ymm2,ymm10,192
-	vpblendd ymm14,ymm14,ymm12,192
-	vpandn ymm2,ymm2,ymm14
-	vpxor ymm2,ymm2,ymm9
-	vpermq ymm7,ymm7,0
-	vpermq ymm3,ymm3,27
-	vpermq ymm5,ymm5,141
-	vpermq ymm6,ymm6,114
-	vpblendd ymm4,ymm13,ymm10,12
-	vpblendd ymm14,ymm12,ymm13,12
-	vpblendd ymm4,ymm4,ymm12,48
-	vpblendd ymm14,ymm14,ymm9,48
-	vpblendd ymm4,ymm4,ymm9,192
-	vpblendd ymm14,ymm14,ymm10,192
-	vpandn ymm4,ymm4,ymm14
-	vpxor ymm0,ymm0,ymm7
-	vpxor ymm1,ymm1,ymm8
-	vpxor ymm4,ymm4,ymm11
-	vpxor ymm0,ymm0,YMMWORD PTR [r10]
-	lea r10,[r10+32]
-	dec eax
-	jnz Loop_avx2
-
-	vmovq QWORD PTR [rcx-96],xmm0
-	vmovdqu YMMWORD PTR [rcx-88],ymm1
-	vmovdqu YMMWORD PTR [rcx-56],ymm2
-	vmovdqu YMMWORD PTR [rcx-24],ymm3
-	vmovdqu YMMWORD PTR [rcx+8],ymm4
-	vmovdqu YMMWORD PTR [rcx+40],ymm5
-	vmovdqu YMMWORD PTR [rcx+72],ymm6
-
-	ret
-
-ALIGN 32
-rot_left:
-	dq 3, 18, 36, 41
-	dq 1, 62, 28, 27
-	dq 45, 6, 56, 39
-	dq 10, 61, 55, 8
-	dq 2, 15, 25, 20
-	dq 44, 43, 21, 14
-
-ALIGN 32
-rot_right:
-	dq 64-3, 64-18, 64-36, 64-41
-	dq 64-1, 64-62, 64-28, 64-27
-	dq 64-45, 64-6, 64-56, 64-39
-	dq 64-10, 64-61, 64-55, 64-8
-	dq 64-2, 64-15, 64-25, 64-20
-	dq 64-44, 64-43, 64-21, 64-14
-
-ALIGN 32
-rndc:
-        dq 1, 1, 1, 1
-        dq 32898, 32898, 32898, 32898
-        dq 9223372036854808714, 9223372036854808714, 9223372036854808714, 9223372036854808714
-        dq 9223372039002292224, 9223372039002292224, 9223372039002292224, 9223372039002292224
-        dq 32907, 32907, 32907, 32907
-        dq 2147483649, 2147483649, 2147483649, 2147483649
-        dq 9223372039002292353, 9223372039002292353, 9223372039002292353, 9223372039002292353
-        dq 9223372036854808585, 9223372036854808585, 9223372036854808585, 9223372036854808585
-        dq 138, 138, 138, 138
-        dq 136, 136, 136, 136
-        dq 2147516425, 2147516425, 2147516425, 2147516425
-        dq 2147483658, 2147483658, 2147483658, 2147483658
-        dq 2147516555, 2147516555, 2147516555, 2147516555
-        dq 9223372036854775947, 9223372036854775947, 9223372036854775947, 9223372036854775947
-        dq 9223372036854808713, 9223372036854808713, 9223372036854808713, 9223372036854808713
-        dq 9223372036854808579, 9223372036854808579, 9223372036854808579, 9223372036854808579
-        dq 9223372036854808578, 9223372036854808578, 9223372036854808578, 9223372036854808578
-        dq 9223372036854775936, 9223372036854775936, 9223372036854775936, 9223372036854775936
-        dq 32778, 32778, 32778, 32778
-        dq 9223372039002259466, 9223372039002259466, 9223372039002259466, 9223372039002259466
-        dq 9223372039002292353, 9223372039002292353, 9223372039002292353, 9223372039002292353
-        dq 9223372036854808704, 9223372036854808704, 9223372036854808704, 9223372036854808704
-        dq 2147483649, 2147483649, 2147483649, 2147483649
-        dq 9223372039002292232, 9223372039002292232, 9223372039002292232, 9223372039002292232
--- a/src/crypto/astrobwt/sort_indices2.cpp
+++ b/src/crypto/astrobwt/sort_indices2.cpp
@ -1,208 +0,0 @@
-/* XMRig
- * Copyright (c) 2018      Lee Clagett              <https://github.com/vtnerd>
- * Copyright (c) 2018-2019 tevador                  <tevador@gmail.com>
- * Copyright (c) 2000      Transmeta Corporation    <https://github.com/intel/msr-tools>
- * Copyright (c) 2004-2008 H. Peter Anvin           <https://github.com/intel/msr-tools>
- * Copyright (c) 2018-2021 SChernykh                <https://github.com/SChernykh>
- * Copyright (c) 2016-2021 XMRig                    <https://github.com/xmrig>, <support@xmrig.com>
- *
- *   This program is free software: you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation, either version 3 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "crypto/astrobwt/sort_indices2.h"
-#include "base/tools/bswap_64.h"
-#include <cstring>
-
-
-#ifdef __GNUC__
-#define NOINLINE __attribute__((noinline))
-#define RESTRICT __restrict__
-#elif _MSC_VER
-#define NOINLINE __declspec(noinline)
-#define RESTRICT __restrict
-#else
-#define NOINLINE
-#define RESTRICT
-#endif
-
-
-#if __has_cpp_attribute(unlikely)
-#define UNLIKELY(X) (X) [[unlikely]]
-#elif defined __GNUC__
-#define UNLIKELY(X) (__builtin_expect((X), 0))
-#else
-#define UNLIKELY(X) (X)
-#endif
-
-
-static NOINLINE void fix(const uint8_t* RESTRICT v, uint32_t* RESTRICT indices, int32_t i)
-{
-    uint32_t prev_t = indices[i - 1];
-    uint32_t t = indices[i];
-
-    const uint32_t data_a = bswap_32(*(const uint32_t*)(v + (t & 0xFFFF) + 2));
-    if (data_a < bswap_32(*(const uint32_t*)(v + (prev_t & 0xFFFF) + 2)))
-    {
-        const uint32_t t2 = prev_t;
-        int32_t j = i - 1;
-        do
-        {
-            indices[j + 1] = prev_t;
-            --j;
-
-            if (j < 0) {
-                break;
-            }
-
-            prev_t = indices[j];
-        } while (((t ^ prev_t) <= 0xFFFF) && (data_a < bswap_32(*(const uint32_t*)(v + (prev_t & 0xFFFF) + 2))));
-        indices[j + 1] = t;
-        t = t2;
-    }
-}
-
-
-static NOINLINE void sort_indices(uint32_t N, const uint8_t* RESTRICT v, uint32_t* RESTRICT indices, uint32_t* RESTRICT tmp_indices)
-{
-    uint8_t byte_counters[2][256] = {};
-    uint32_t counters[2][256];
-
-    {
-#define ITER(X) ++byte_counters[1][v[i + X]];
-
-        enum { unroll = 12 };
-
-        uint32_t i = 0;
-        const uint32_t n = N - (unroll - 1);
-        for (; i < n; i += unroll) {
-            ITER(0); ITER(1); ITER(2); ITER(3); ITER(4); ITER(5); ITER(6); ITER(7); ITER(8); ITER(9); ITER(10); ITER(11);
-        }
-        for (; i < N; ++i) {
-            ITER(0);
-        }
-        memcpy(&byte_counters[0], &byte_counters[1], 256);
-        --byte_counters[0][v[0]];
-
-#undef ITER
-    }
-
-    {
-        uint32_t c0 = byte_counters[0][0];
-        uint32_t c1 = byte_counters[1][0] - 1;
-        counters[0][0] = c0;
-        counters[1][0] = c1;
-        uint8_t* src = &byte_counters[0][0] + 1;
-        uint32_t* dst = &counters[0][0] + 1;
-        const uint8_t* const e = &byte_counters[0][0] + 256;
-        do {
-            c0 += src[0];
-            c1 += src[256];
-            dst[0] = c0;
-            dst[256] = c1;
-            ++src;
-            ++dst;
-        } while (src < e);
-    }
-
-    {
-#define ITER(X) \
-        do { \
-            const uint32_t byte0 = v[i - X + 0]; \
-            const uint32_t byte1 = v[i - X + 1]; \
-            tmp_indices[counters[0][byte1]--] = (byte0 << 24) | (byte1 << 16) | (i - X); \
-        } while (0)
-
-        enum { unroll = 8 };
-
-        uint32_t i = N;
-        for (; i >= unroll; i -= unroll) {
-            ITER(1); ITER(2); ITER(3); ITER(4); ITER(5); ITER(6); ITER(7); ITER(8);
-        }
-        for (; i > 0; --i) {
-            ITER(1);
-        }
-
-#undef ITER
-    }
-
-    {
-#define ITER(X) \
-        do { \
-            const uint32_t data = tmp_indices[i - X]; \
-            indices[counters[1][data >> 24]--] = data; \
-        } while (0)
-
-        enum { unroll = 8 };
-
-        uint32_t i = N;
-        for (; i >= unroll; i -= unroll) {
-            ITER(1); ITER(2); ITER(3); ITER(4); ITER(5); ITER(6); ITER(7); ITER(8);
-        }
-        for (; i > 0; --i) {
-            ITER(1);
-        }
-
-#undef ITER
-    }
-
-    {
-#define ITER(X) do { if UNLIKELY(a[X * 2] == a[(X + 1) * 2]) fix(v, indices, i + X); } while (0)
-
-        enum { unroll = 16 };
-
-        uint32_t i = 1;
-        const uint32_t n = N - (unroll - 1);
-        const uint16_t* a = ((const uint16_t*)indices) + 1;
-
-        for (; i < n; i += unroll, a += unroll * 2) {
-            ITER(0); ITER(1); ITER(2); ITER(3); ITER(4); ITER(5); ITER(6); ITER(7);
-            ITER(8); ITER(9); ITER(10); ITER(11); ITER(12); ITER(13); ITER(14); ITER(15);
-        }
-        for (; i < N; ++i, a += 2) {
-            ITER(0);
-        }
-
-#undef ITER
-    }
-
-    {
-#define ITER(X) a[X] = b[X * 2];
-
-        enum { unroll = 32 };
-
-        uint16_t* a = (uint16_t*)indices;
-        uint16_t* b = (uint16_t*)indices;
-        uint16_t* e = ((uint16_t*)indices) + (N - (unroll - 1));
-
-        for (; a < e; a += unroll, b += unroll * 2) {
-            ITER(0); ITER(1); ITER(2); ITER(3); ITER(4); ITER(5); ITER(6); ITER(7);
-            ITER(8); ITER(9); ITER(10); ITER(11); ITER(12); ITER(13); ITER(14); ITER(15);
-            ITER(16); ITER(17); ITER(18); ITER(19); ITER(20); ITER(21); ITER(22); ITER(23);
-            ITER(24); ITER(25); ITER(26); ITER(27); ITER(28); ITER(29); ITER(30); ITER(31);
-        }
-
-        e = ((uint16_t*)indices) + N;
-        for (; a < e; ++a, b += 2) {
-            ITER(0);
-        }
-
-#undef ITER
-    }
-}
-
-
-void sort_indices_astrobwt_v2(uint32_t N, const uint8_t* v, uint32_t* indices, uint32_t* tmp_indices)
-{
-    sort_indices(N, v, indices, tmp_indices);
-}
--- a/src/crypto/astrobwt/sort_indices2.h
+++ b/src/crypto/astrobwt/sort_indices2.h
@ -1,26 +0,0 @@
-/* XMRig
- * Copyright (c) 2018      Lee Clagett              <https://github.com/vtnerd>
- * Copyright (c) 2018-2019 tevador                  <tevador@gmail.com>
- * Copyright (c) 2000      Transmeta Corporation    <https://github.com/intel/msr-tools>
- * Copyright (c) 2004-2008 H. Peter Anvin           <https://github.com/intel/msr-tools>
- * Copyright (c) 2018-2021 SChernykh                <https://github.com/SChernykh>
- * Copyright (c) 2016-2021 XMRig                    <https://github.com/xmrig>, <support@xmrig.com>
- *
- *   This program is free software: you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation, either version 3 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <stdint.h>
-
-
-void sort_indices_astrobwt_v2(uint32_t N, const uint8_t* v, uint32_t* indices, uint32_t* tmp_indices);
--- a/src/crypto/astrobwt/xmm6int/salsa20_xmm6int-avx2.c
+++ b/src/crypto/astrobwt/xmm6int/salsa20_xmm6int-avx2.c
@ -1,98 +0,0 @@
-/*
- * ISC License
- *
- * Copyright (c) 2013-2021
- * Frank Denis <j at pureftpd dot org>
- *
- * Permission to use, copy, modify, and/or distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-#include <stdint.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include <emmintrin.h>
-#include <immintrin.h>
-#include <smmintrin.h>
-#include <tmmintrin.h>
-
-#define ROUNDS 20
-
-typedef struct salsa_ctx {
-    uint32_t input[16];
-} salsa_ctx;
-
-static const int TR[16] = {
-    0, 5, 10, 15, 12, 1, 6, 11, 8, 13, 2, 7, 4, 9, 14, 3
-};
-
-#define LOAD32_LE(p) *((uint32_t*)(p))
-#define STORE32_LE(dst, src) memcpy((dst), &(src), sizeof(uint32_t))
-
-static void
-salsa_keysetup(salsa_ctx *ctx, const uint8_t *k)
-{
-    ctx->input[TR[1]]  = LOAD32_LE(k + 0);
-    ctx->input[TR[2]]  = LOAD32_LE(k + 4);
-    ctx->input[TR[3]]  = LOAD32_LE(k + 8);
-    ctx->input[TR[4]]  = LOAD32_LE(k + 12);
-    ctx->input[TR[11]] = LOAD32_LE(k + 16);
-    ctx->input[TR[12]] = LOAD32_LE(k + 20);
-    ctx->input[TR[13]] = LOAD32_LE(k + 24);
-    ctx->input[TR[14]] = LOAD32_LE(k + 28);
-    ctx->input[TR[0]]  = 0x61707865;
-    ctx->input[TR[5]]  = 0x3320646e;
-    ctx->input[TR[10]] = 0x79622d32;
-    ctx->input[TR[15]] = 0x6b206574;
-}
-
-static void
-salsa_ivsetup(salsa_ctx *ctx, const uint8_t *iv, const uint8_t *counter)
-{
-    ctx->input[TR[6]] = LOAD32_LE(iv + 0);
-    ctx->input[TR[7]] = LOAD32_LE(iv + 4);
-    ctx->input[TR[8]] = counter == NULL ? 0 : LOAD32_LE(counter + 0);
-    ctx->input[TR[9]] = counter == NULL ? 0 : LOAD32_LE(counter + 4);
-}
-
-static void
-salsa20_encrypt_bytes(salsa_ctx *ctx, const uint8_t *m, uint8_t *c,
-                      unsigned long long bytes)
-{
-    uint32_t * const x = &ctx->input[0];
-
-    if (!bytes) {
-        return; /* LCOV_EXCL_LINE */
-    }
-
-#include "u8.h"
-#include "u4.h"
-#include "u1.h"
-#include "u0.h"
-}
-
-int salsa20_stream_avx2(void* c, uint64_t clen, const void* iv, const void* key)
-{
-	struct salsa_ctx ctx;
-
-	if (!clen) {
-		return 0;
-	}
-
-	salsa_keysetup(&ctx, (const uint8_t*)key);
-	salsa_ivsetup(&ctx, (const uint8_t*)iv, NULL);
-	memset(c, 0, clen);
-	salsa20_encrypt_bytes(&ctx, (const uint8_t*)c, (uint8_t*)c, clen);
-
-	return 0;
-}
--- a/src/crypto/astrobwt/xmm6int/u0.h
+++ b/src/crypto/astrobwt/xmm6int/u0.h
@ -1,193 +0,0 @@
-if (bytes > 0) {
-    __m128i diag0 = _mm_loadu_si128((const __m128i *) (x + 0));
-    __m128i diag1 = _mm_loadu_si128((const __m128i *) (x + 4));
-    __m128i diag2 = _mm_loadu_si128((const __m128i *) (x + 8));
-    __m128i diag3 = _mm_loadu_si128((const __m128i *) (x + 12));
-    __m128i a0, a1, a2, a3, a4, a5, a6, a7;
-    __m128i b0, b1, b2, b3, b4, b5, b6, b7;
-    uint8_t partialblock[64];
-
-    unsigned int i;
-
-    a0 = diag1;
-    for (i = 0; i < ROUNDS; i += 4) {
-        a0    = _mm_add_epi32(a0, diag0);
-        a1    = diag0;
-        b0    = a0;
-        a0    = _mm_slli_epi32(a0, 7);
-        b0    = _mm_srli_epi32(b0, 25);
-        diag3 = _mm_xor_si128(diag3, a0);
-
-        diag3 = _mm_xor_si128(diag3, b0);
-
-        a1    = _mm_add_epi32(a1, diag3);
-        a2    = diag3;
-        b1    = a1;
-        a1    = _mm_slli_epi32(a1, 9);
-        b1    = _mm_srli_epi32(b1, 23);
-        diag2 = _mm_xor_si128(diag2, a1);
-        diag3 = _mm_shuffle_epi32(diag3, 0x93);
-        diag2 = _mm_xor_si128(diag2, b1);
-
-        a2    = _mm_add_epi32(a2, diag2);
-        a3    = diag2;
-        b2    = a2;
-        a2    = _mm_slli_epi32(a2, 13);
-        b2    = _mm_srli_epi32(b2, 19);
-        diag1 = _mm_xor_si128(diag1, a2);
-        diag2 = _mm_shuffle_epi32(diag2, 0x4e);
-        diag1 = _mm_xor_si128(diag1, b2);
-
-        a3    = _mm_add_epi32(a3, diag1);
-        a4    = diag3;
-        b3    = a3;
-        a3    = _mm_slli_epi32(a3, 18);
-        b3    = _mm_srli_epi32(b3, 14);
-        diag0 = _mm_xor_si128(diag0, a3);
-        diag1 = _mm_shuffle_epi32(diag1, 0x39);
-        diag0 = _mm_xor_si128(diag0, b3);
-
-        a4    = _mm_add_epi32(a4, diag0);
-        a5    = diag0;
-        b4    = a4;
-        a4    = _mm_slli_epi32(a4, 7);
-        b4    = _mm_srli_epi32(b4, 25);
-        diag1 = _mm_xor_si128(diag1, a4);
-
-        diag1 = _mm_xor_si128(diag1, b4);
-
-        a5    = _mm_add_epi32(a5, diag1);
-        a6    = diag1;
-        b5    = a5;
-        a5    = _mm_slli_epi32(a5, 9);
-        b5    = _mm_srli_epi32(b5, 23);
-        diag2 = _mm_xor_si128(diag2, a5);
-        diag1 = _mm_shuffle_epi32(diag1, 0x93);
-        diag2 = _mm_xor_si128(diag2, b5);
-
-        a6    = _mm_add_epi32(a6, diag2);
-        a7    = diag2;
-        b6    = a6;
-        a6    = _mm_slli_epi32(a6, 13);
-        b6    = _mm_srli_epi32(b6, 19);
-        diag3 = _mm_xor_si128(diag3, a6);
-        diag2 = _mm_shuffle_epi32(diag2, 0x4e);
-        diag3 = _mm_xor_si128(diag3, b6);
-
-        a7    = _mm_add_epi32(a7, diag3);
-        a0    = diag1;
-        b7    = a7;
-        a7    = _mm_slli_epi32(a7, 18);
-        b7    = _mm_srli_epi32(b7, 14);
-        diag0 = _mm_xor_si128(diag0, a7);
-        diag3 = _mm_shuffle_epi32(diag3, 0x39);
-        diag0 = _mm_xor_si128(diag0, b7);
-
-        a0    = _mm_add_epi32(a0, diag0);
-        a1    = diag0;
-        b0    = a0;
-        a0    = _mm_slli_epi32(a0, 7);
-        b0    = _mm_srli_epi32(b0, 25);
-        diag3 = _mm_xor_si128(diag3, a0);
-
-        diag3 = _mm_xor_si128(diag3, b0);
-
-        a1    = _mm_add_epi32(a1, diag3);
-        a2    = diag3;
-        b1    = a1;
-        a1    = _mm_slli_epi32(a1, 9);
-        b1    = _mm_srli_epi32(b1, 23);
-        diag2 = _mm_xor_si128(diag2, a1);
-        diag3 = _mm_shuffle_epi32(diag3, 0x93);
-        diag2 = _mm_xor_si128(diag2, b1);
-
-        a2    = _mm_add_epi32(a2, diag2);
-        a3    = diag2;
-        b2    = a2;
-        a2    = _mm_slli_epi32(a2, 13);
-        b2    = _mm_srli_epi32(b2, 19);
-        diag1 = _mm_xor_si128(diag1, a2);
-        diag2 = _mm_shuffle_epi32(diag2, 0x4e);
-        diag1 = _mm_xor_si128(diag1, b2);
-
-        a3    = _mm_add_epi32(a3, diag1);
-        a4    = diag3;
-        b3    = a3;
-        a3    = _mm_slli_epi32(a3, 18);
-        b3    = _mm_srli_epi32(b3, 14);
-        diag0 = _mm_xor_si128(diag0, a3);
-        diag1 = _mm_shuffle_epi32(diag1, 0x39);
-        diag0 = _mm_xor_si128(diag0, b3);
-
-        a4    = _mm_add_epi32(a4, diag0);
-        a5    = diag0;
-        b4    = a4;
-        a4    = _mm_slli_epi32(a4, 7);
-        b4    = _mm_srli_epi32(b4, 25);
-        diag1 = _mm_xor_si128(diag1, a4);
-
-        diag1 = _mm_xor_si128(diag1, b4);
-
-        a5    = _mm_add_epi32(a5, diag1);
-        a6    = diag1;
-        b5    = a5;
-        a5    = _mm_slli_epi32(a5, 9);
-        b5    = _mm_srli_epi32(b5, 23);
-        diag2 = _mm_xor_si128(diag2, a5);
-        diag1 = _mm_shuffle_epi32(diag1, 0x93);
-        diag2 = _mm_xor_si128(diag2, b5);
-
-        a6    = _mm_add_epi32(a6, diag2);
-        a7    = diag2;
-        b6    = a6;
-        a6    = _mm_slli_epi32(a6, 13);
-        b6    = _mm_srli_epi32(b6, 19);
-        diag3 = _mm_xor_si128(diag3, a6);
-        diag2 = _mm_shuffle_epi32(diag2, 0x4e);
-        diag3 = _mm_xor_si128(diag3, b6);
-
-        a7    = _mm_add_epi32(a7, diag3);
-        a0    = diag1;
-        b7    = a7;
-        a7    = _mm_slli_epi32(a7, 18);
-        b7    = _mm_srli_epi32(b7, 14);
-        diag0 = _mm_xor_si128(diag0, a7);
-        diag3 = _mm_shuffle_epi32(diag3, 0x39);
-        diag0 = _mm_xor_si128(diag0, b7);
-    }
-
-    diag0 = _mm_add_epi32(diag0, _mm_loadu_si128((const __m128i *) (x + 0)));
-    diag1 = _mm_add_epi32(diag1, _mm_loadu_si128((const __m128i *) (x + 4)));
-    diag2 = _mm_add_epi32(diag2, _mm_loadu_si128((const __m128i *) (x + 8)));
-    diag3 = _mm_add_epi32(diag3, _mm_loadu_si128((const __m128i *) (x + 12)));
-
-#define ONEQUAD_SHUFFLE(A, B, C, D)                                              \
-    do {                                                                         \
-        uint32_t in##A                         = _mm_cvtsi128_si32(diag0);       \
-        uint32_t in##B                         = _mm_cvtsi128_si32(diag1);       \
-        uint32_t in##C                         = _mm_cvtsi128_si32(diag2);       \
-        uint32_t in##D                         = _mm_cvtsi128_si32(diag3);       \
-        diag0                                  = _mm_shuffle_epi32(diag0, 0x39); \
-        diag1                                  = _mm_shuffle_epi32(diag1, 0x39); \
-        diag2                                  = _mm_shuffle_epi32(diag2, 0x39); \
-        diag3                                  = _mm_shuffle_epi32(diag3, 0x39); \
-        *(uint32_t *) (partialblock + (A * 4)) = in##A;                          \
-        *(uint32_t *) (partialblock + (B * 4)) = in##B;                          \
-        *(uint32_t *) (partialblock + (C * 4)) = in##C;                          \
-        *(uint32_t *) (partialblock + (D * 4)) = in##D;                          \
-    } while (0)
-
-#define ONEQUAD(A, B, C, D) ONEQUAD_SHUFFLE(A, B, C, D)
-
-    ONEQUAD(0, 12, 8, 4);
-    ONEQUAD(5, 1, 13, 9);
-    ONEQUAD(10, 6, 2, 14);
-    ONEQUAD(15, 11, 7, 3);
-
-#undef ONEQUAD
-#undef ONEQUAD_SHUFFLE
-
-    for (i = 0; i < bytes; i++) {
-        c[i] = m[i] ^ partialblock[i];
-    }
-}
--- a/src/crypto/astrobwt/xmm6int/u1.h
+++ b/src/crypto/astrobwt/xmm6int/u1.h
@ -1,207 +0,0 @@
-while (bytes >= 64) {
-    __m128i diag0 = _mm_loadu_si128((const __m128i *) (x + 0));
-    __m128i diag1 = _mm_loadu_si128((const __m128i *) (x + 4));
-    __m128i diag2 = _mm_loadu_si128((const __m128i *) (x + 8));
-    __m128i diag3 = _mm_loadu_si128((const __m128i *) (x + 12));
-    __m128i a0, a1, a2, a3, a4, a5, a6, a7;
-    __m128i b0, b1, b2, b3, b4, b5, b6, b7;
-
-    uint32_t in8;
-    uint32_t in9;
-    int      i;
-
-    a0 = diag1;
-    for (i = 0; i < ROUNDS; i += 4) {
-        a0    = _mm_add_epi32(a0, diag0);
-        a1    = diag0;
-        b0    = a0;
-        a0    = _mm_slli_epi32(a0, 7);
-        b0    = _mm_srli_epi32(b0, 25);
-        diag3 = _mm_xor_si128(diag3, a0);
-
-        diag3 = _mm_xor_si128(diag3, b0);
-
-        a1    = _mm_add_epi32(a1, diag3);
-        a2    = diag3;
-        b1    = a1;
-        a1    = _mm_slli_epi32(a1, 9);
-        b1    = _mm_srli_epi32(b1, 23);
-        diag2 = _mm_xor_si128(diag2, a1);
-        diag3 = _mm_shuffle_epi32(diag3, 0x93);
-        diag2 = _mm_xor_si128(diag2, b1);
-
-        a2    = _mm_add_epi32(a2, diag2);
-        a3    = diag2;
-        b2    = a2;
-        a2    = _mm_slli_epi32(a2, 13);
-        b2    = _mm_srli_epi32(b2, 19);
-        diag1 = _mm_xor_si128(diag1, a2);
-        diag2 = _mm_shuffle_epi32(diag2, 0x4e);
-        diag1 = _mm_xor_si128(diag1, b2);
-
-        a3    = _mm_add_epi32(a3, diag1);
-        a4    = diag3;
-        b3    = a3;
-        a3    = _mm_slli_epi32(a3, 18);
-        b3    = _mm_srli_epi32(b3, 14);
-        diag0 = _mm_xor_si128(diag0, a3);
-        diag1 = _mm_shuffle_epi32(diag1, 0x39);
-        diag0 = _mm_xor_si128(diag0, b3);
-
-        a4    = _mm_add_epi32(a4, diag0);
-        a5    = diag0;
-        b4    = a4;
-        a4    = _mm_slli_epi32(a4, 7);
-        b4    = _mm_srli_epi32(b4, 25);
-        diag1 = _mm_xor_si128(diag1, a4);
-
-        diag1 = _mm_xor_si128(diag1, b4);
-
-        a5    = _mm_add_epi32(a5, diag1);
-        a6    = diag1;
-        b5    = a5;
-        a5    = _mm_slli_epi32(a5, 9);
-        b5    = _mm_srli_epi32(b5, 23);
-        diag2 = _mm_xor_si128(diag2, a5);
-        diag1 = _mm_shuffle_epi32(diag1, 0x93);
-        diag2 = _mm_xor_si128(diag2, b5);
-
-        a6    = _mm_add_epi32(a6, diag2);
-        a7    = diag2;
-        b6    = a6;
-        a6    = _mm_slli_epi32(a6, 13);
-        b6    = _mm_srli_epi32(b6, 19);
-        diag3 = _mm_xor_si128(diag3, a6);
-        diag2 = _mm_shuffle_epi32(diag2, 0x4e);
-        diag3 = _mm_xor_si128(diag3, b6);
-
-        a7    = _mm_add_epi32(a7, diag3);
-        a0    = diag1;
-        b7    = a7;
-        a7    = _mm_slli_epi32(a7, 18);
-        b7    = _mm_srli_epi32(b7, 14);
-        diag0 = _mm_xor_si128(diag0, a7);
-        diag3 = _mm_shuffle_epi32(diag3, 0x39);
-        diag0 = _mm_xor_si128(diag0, b7);
-
-        a0    = _mm_add_epi32(a0, diag0);
-        a1    = diag0;
-        b0    = a0;
-        a0    = _mm_slli_epi32(a0, 7);
-        b0    = _mm_srli_epi32(b0, 25);
-        diag3 = _mm_xor_si128(diag3, a0);
-
-        diag3 = _mm_xor_si128(diag3, b0);
-
-        a1    = _mm_add_epi32(a1, diag3);
-        a2    = diag3;
-        b1    = a1;
-        a1    = _mm_slli_epi32(a1, 9);
-        b1    = _mm_srli_epi32(b1, 23);
-        diag2 = _mm_xor_si128(diag2, a1);
-        diag3 = _mm_shuffle_epi32(diag3, 0x93);
-        diag2 = _mm_xor_si128(diag2, b1);
-
-        a2    = _mm_add_epi32(a2, diag2);
-        a3    = diag2;
-        b2    = a2;
-        a2    = _mm_slli_epi32(a2, 13);
-        b2    = _mm_srli_epi32(b2, 19);
-        diag1 = _mm_xor_si128(diag1, a2);
-        diag2 = _mm_shuffle_epi32(diag2, 0x4e);
-        diag1 = _mm_xor_si128(diag1, b2);
-
-        a3    = _mm_add_epi32(a3, diag1);
-        a4    = diag3;
-        b3    = a3;
-        a3    = _mm_slli_epi32(a3, 18);
-        b3    = _mm_srli_epi32(b3, 14);
-        diag0 = _mm_xor_si128(diag0, a3);
-        diag1 = _mm_shuffle_epi32(diag1, 0x39);
-        diag0 = _mm_xor_si128(diag0, b3);
-
-        a4    = _mm_add_epi32(a4, diag0);
-        a5    = diag0;
-        b4    = a4;
-        a4    = _mm_slli_epi32(a4, 7);
-        b4    = _mm_srli_epi32(b4, 25);
-        diag1 = _mm_xor_si128(diag1, a4);
-
-        diag1 = _mm_xor_si128(diag1, b4);
-
-        a5    = _mm_add_epi32(a5, diag1);
-        a6    = diag1;
-        b5    = a5;
-        a5    = _mm_slli_epi32(a5, 9);
-        b5    = _mm_srli_epi32(b5, 23);
-        diag2 = _mm_xor_si128(diag2, a5);
-        diag1 = _mm_shuffle_epi32(diag1, 0x93);
-        diag2 = _mm_xor_si128(diag2, b5);
-
-        a6    = _mm_add_epi32(a6, diag2);
-        a7    = diag2;
-        b6    = a6;
-        a6    = _mm_slli_epi32(a6, 13);
-        b6    = _mm_srli_epi32(b6, 19);
-        diag3 = _mm_xor_si128(diag3, a6);
-        diag2 = _mm_shuffle_epi32(diag2, 0x4e);
-        diag3 = _mm_xor_si128(diag3, b6);
-
-        a7    = _mm_add_epi32(a7, diag3);
-        a0    = diag1;
-        b7    = a7;
-        a7    = _mm_slli_epi32(a7, 18);
-        b7    = _mm_srli_epi32(b7, 14);
-        diag0 = _mm_xor_si128(diag0, a7);
-        diag3 = _mm_shuffle_epi32(diag3, 0x39);
-        diag0 = _mm_xor_si128(diag0, b7);
-    }
-
-    diag0 = _mm_add_epi32(diag0, _mm_loadu_si128((const __m128i *) (x + 0)));
-    diag1 = _mm_add_epi32(diag1, _mm_loadu_si128((const __m128i *) (x + 4)));
-    diag2 = _mm_add_epi32(diag2, _mm_loadu_si128((const __m128i *) (x + 8)));
-    diag3 = _mm_add_epi32(diag3, _mm_loadu_si128((const __m128i *) (x + 12)));
-
-#define ONEQUAD_SHUFFLE(A, B, C, D)                      \
-    do {                                                 \
-        uint32_t in##A = _mm_cvtsi128_si32(diag0);       \
-        uint32_t in##B = _mm_cvtsi128_si32(diag1);       \
-        uint32_t in##C = _mm_cvtsi128_si32(diag2);       \
-        uint32_t in##D = _mm_cvtsi128_si32(diag3);       \
-        diag0          = _mm_shuffle_epi32(diag0, 0x39); \
-        diag1          = _mm_shuffle_epi32(diag1, 0x39); \
-        diag2          = _mm_shuffle_epi32(diag2, 0x39); \
-        diag3          = _mm_shuffle_epi32(diag3, 0x39); \
-        in##A ^= *(const uint32_t *) (m + (A * 4));      \
-        in##B ^= *(const uint32_t *) (m + (B * 4));      \
-        in##C ^= *(const uint32_t *) (m + (C * 4));      \
-        in##D ^= *(const uint32_t *) (m + (D * 4));      \
-        *(uint32_t *) (c + (A * 4)) = in##A;             \
-        *(uint32_t *) (c + (B * 4)) = in##B;             \
-        *(uint32_t *) (c + (C * 4)) = in##C;             \
-        *(uint32_t *) (c + (D * 4)) = in##D;             \
-    } while (0)
-
-#define ONEQUAD(A, B, C, D) ONEQUAD_SHUFFLE(A, B, C, D)
-
-    ONEQUAD(0, 12, 8, 4);
-    ONEQUAD(5, 1, 13, 9);
-    ONEQUAD(10, 6, 2, 14);
-    ONEQUAD(15, 11, 7, 3);
-
-#undef ONEQUAD
-#undef ONEQUAD_SHUFFLE
-
-    in8 = x[8];
-    in9 = x[13];
-    in8++;
-    if (in8 == 0) {
-        in9++;
-    }
-    x[8]  = in8;
-    x[13] = in9;
-
-    c += 64;
-    m += 64;
-    bytes -= 64;
-}
--- a/src/crypto/astrobwt/xmm6int/u4.h
+++ b/src/crypto/astrobwt/xmm6int/u4.h
@ -1,547 +0,0 @@
-if (bytes >= 256) {
-    __m128i y0, y1, y2, y3, y4, y5, y6, y7, y8, y9, y10, y11, y12, y13, y14,
-        y15;
-    __m128i z0, z1, z2, z3, z4, z5, z6, z7, z8, z9, z10, z11, z12, z13, z14,
-        z15;
-    __m128i orig0, orig1, orig2, orig3, orig4, orig5, orig6, orig7, orig8,
-        orig9, orig10, orig11, orig12, orig13, orig14, orig15;
-
-    uint32_t in8;
-    uint32_t in9;
-    int      i;
-
-    /* element broadcast immediate for _mm_shuffle_epi32 are in order:
-       0x00, 0x55, 0xaa, 0xff */
-    z0  = _mm_loadu_si128((const __m128i *) (x + 0));
-    z5  = _mm_shuffle_epi32(z0, 0x55);
-    z10 = _mm_shuffle_epi32(z0, 0xaa);
-    z15 = _mm_shuffle_epi32(z0, 0xff);
-    z0  = _mm_shuffle_epi32(z0, 0x00);
-    z1  = _mm_loadu_si128((const __m128i *) (x + 4));
-    z6  = _mm_shuffle_epi32(z1, 0xaa);
-    z11 = _mm_shuffle_epi32(z1, 0xff);
-    z12 = _mm_shuffle_epi32(z1, 0x00);
-    z1  = _mm_shuffle_epi32(z1, 0x55);
-    z2  = _mm_loadu_si128((const __m128i *) (x + 8));
-    z7  = _mm_shuffle_epi32(z2, 0xff);
-    z13 = _mm_shuffle_epi32(z2, 0x55);
-    z2  = _mm_shuffle_epi32(z2, 0xaa);
-    /* no z8 -> first half of the nonce, will fill later */
-    z3  = _mm_loadu_si128((const __m128i *) (x + 12));
-    z4  = _mm_shuffle_epi32(z3, 0x00);
-    z14 = _mm_shuffle_epi32(z3, 0xaa);
-    z3  = _mm_shuffle_epi32(z3, 0xff);
-    /* no z9 -> second half of the nonce, will fill later */
-    orig0  = z0;
-    orig1  = z1;
-    orig2  = z2;
-    orig3  = z3;
-    orig4  = z4;
-    orig5  = z5;
-    orig6  = z6;
-    orig7  = z7;
-    orig10 = z10;
-    orig11 = z11;
-    orig12 = z12;
-    orig13 = z13;
-    orig14 = z14;
-    orig15 = z15;
-
-    while (bytes >= 256) {
-        /* vector implementation for z8 and z9 */
-        /* not sure if it helps for only 4 blocks */
-        const __m128i addv8 = _mm_set_epi64x(1, 0);
-        const __m128i addv9 = _mm_set_epi64x(3, 2);
-        __m128i       t8, t9;
-        uint64_t      in89;
-
-        in8  = x[8];
-        in9  = x[13];
-        in89 = ((uint64_t) in8) | (((uint64_t) in9) << 32);
-        t8   = _mm_set1_epi64x(in89);
-        t9   = _mm_set1_epi64x(in89);
-
-        z8 = _mm_add_epi64(addv8, t8);
-        z9 = _mm_add_epi64(addv9, t9);
-
-        t8 = _mm_unpacklo_epi32(z8, z9);
-        t9 = _mm_unpackhi_epi32(z8, z9);
-
-        z8 = _mm_unpacklo_epi32(t8, t9);
-        z9 = _mm_unpackhi_epi32(t8, t9);
-
-        orig8 = z8;
-        orig9 = z9;
-
-        in89 += 4;
-
-        x[8]  = in89 & 0xFFFFFFFF;
-        x[13] = (in89 >> 32) & 0xFFFFFFFF;
-
-        z5  = orig5;
-        z10 = orig10;
-        z15 = orig15;
-        z14 = orig14;
-        z3  = orig3;
-        z6  = orig6;
-        z11 = orig11;
-        z1  = orig1;
-
-        z7  = orig7;
-        z13 = orig13;
-        z2  = orig2;
-        z9  = orig9;
-        z0  = orig0;
-        z12 = orig12;
-        z4  = orig4;
-        z8  = orig8;
-
-        for (i = 0; i < ROUNDS; i += 2) {
-            /* the inner loop is a direct translation (regexp search/replace)
-             * from the amd64-xmm6 ASM */
-            __m128i r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13,
-                r14, r15;
-
-            y4 = z12;
-            y4 = _mm_add_epi32(y4, z0);
-            r4 = y4;
-            y4 = _mm_slli_epi32(y4, 7);
-            z4 = _mm_xor_si128(z4, y4);
-            r4 = _mm_srli_epi32(r4, 25);
-            z4 = _mm_xor_si128(z4, r4);
-
-            y9 = z1;
-            y9 = _mm_add_epi32(y9, z5);
-            r9 = y9;
-            y9 = _mm_slli_epi32(y9, 7);
-            z9 = _mm_xor_si128(z9, y9);
-            r9 = _mm_srli_epi32(r9, 25);
-            z9 = _mm_xor_si128(z9, r9);
-
-            y8 = z0;
-            y8 = _mm_add_epi32(y8, z4);
-            r8 = y8;
-            y8 = _mm_slli_epi32(y8, 9);
-            z8 = _mm_xor_si128(z8, y8);
-            r8 = _mm_srli_epi32(r8, 23);
-            z8 = _mm_xor_si128(z8, r8);
-
-            y13 = z5;
-            y13 = _mm_add_epi32(y13, z9);
-            r13 = y13;
-            y13 = _mm_slli_epi32(y13, 9);
-            z13 = _mm_xor_si128(z13, y13);
-            r13 = _mm_srli_epi32(r13, 23);
-            z13 = _mm_xor_si128(z13, r13);
-
-            y12 = z4;
-            y12 = _mm_add_epi32(y12, z8);
-            r12 = y12;
-            y12 = _mm_slli_epi32(y12, 13);
-            z12 = _mm_xor_si128(z12, y12);
-            r12 = _mm_srli_epi32(r12, 19);
-            z12 = _mm_xor_si128(z12, r12);
-
-            y1 = z9;
-            y1 = _mm_add_epi32(y1, z13);
-            r1 = y1;
-            y1 = _mm_slli_epi32(y1, 13);
-            z1 = _mm_xor_si128(z1, y1);
-            r1 = _mm_srli_epi32(r1, 19);
-            z1 = _mm_xor_si128(z1, r1);
-
-            y0 = z8;
-            y0 = _mm_add_epi32(y0, z12);
-            r0 = y0;
-            y0 = _mm_slli_epi32(y0, 18);
-            z0 = _mm_xor_si128(z0, y0);
-            r0 = _mm_srli_epi32(r0, 14);
-            z0 = _mm_xor_si128(z0, r0);
-
-            y5 = z13;
-            y5 = _mm_add_epi32(y5, z1);
-            r5 = y5;
-            y5 = _mm_slli_epi32(y5, 18);
-            z5 = _mm_xor_si128(z5, y5);
-            r5 = _mm_srli_epi32(r5, 14);
-            z5 = _mm_xor_si128(z5, r5);
-
-            y14 = z6;
-            y14 = _mm_add_epi32(y14, z10);
-            r14 = y14;
-            y14 = _mm_slli_epi32(y14, 7);
-            z14 = _mm_xor_si128(z14, y14);
-            r14 = _mm_srli_epi32(r14, 25);
-            z14 = _mm_xor_si128(z14, r14);
-
-            y3 = z11;
-            y3 = _mm_add_epi32(y3, z15);
-            r3 = y3;
-            y3 = _mm_slli_epi32(y3, 7);
-            z3 = _mm_xor_si128(z3, y3);
-            r3 = _mm_srli_epi32(r3, 25);
-            z3 = _mm_xor_si128(z3, r3);
-
-            y2 = z10;
-            y2 = _mm_add_epi32(y2, z14);
-            r2 = y2;
-            y2 = _mm_slli_epi32(y2, 9);
-            z2 = _mm_xor_si128(z2, y2);
-            r2 = _mm_srli_epi32(r2, 23);
-            z2 = _mm_xor_si128(z2, r2);
-
-            y7 = z15;
-            y7 = _mm_add_epi32(y7, z3);
-            r7 = y7;
-            y7 = _mm_slli_epi32(y7, 9);
-            z7 = _mm_xor_si128(z7, y7);
-            r7 = _mm_srli_epi32(r7, 23);
-            z7 = _mm_xor_si128(z7, r7);
-
-            y6 = z14;
-            y6 = _mm_add_epi32(y6, z2);
-            r6 = y6;
-            y6 = _mm_slli_epi32(y6, 13);
-            z6 = _mm_xor_si128(z6, y6);
-            r6 = _mm_srli_epi32(r6, 19);
-            z6 = _mm_xor_si128(z6, r6);
-
-            y11 = z3;
-            y11 = _mm_add_epi32(y11, z7);
-            r11 = y11;
-            y11 = _mm_slli_epi32(y11, 13);
-            z11 = _mm_xor_si128(z11, y11);
-            r11 = _mm_srli_epi32(r11, 19);
-            z11 = _mm_xor_si128(z11, r11);
-
-            y10 = z2;
-            y10 = _mm_add_epi32(y10, z6);
-            r10 = y10;
-            y10 = _mm_slli_epi32(y10, 18);
-            z10 = _mm_xor_si128(z10, y10);
-            r10 = _mm_srli_epi32(r10, 14);
-            z10 = _mm_xor_si128(z10, r10);
-
-            y1 = z3;
-            y1 = _mm_add_epi32(y1, z0);
-            r1 = y1;
-            y1 = _mm_slli_epi32(y1, 7);
-            z1 = _mm_xor_si128(z1, y1);
-            r1 = _mm_srli_epi32(r1, 25);
-            z1 = _mm_xor_si128(z1, r1);
-
-            y15 = z7;
-            y15 = _mm_add_epi32(y15, z11);
-            r15 = y15;
-            y15 = _mm_slli_epi32(y15, 18);
-            z15 = _mm_xor_si128(z15, y15);
-            r15 = _mm_srli_epi32(r15, 14);
-            z15 = _mm_xor_si128(z15, r15);
-
-            y6 = z4;
-            y6 = _mm_add_epi32(y6, z5);
-            r6 = y6;
-            y6 = _mm_slli_epi32(y6, 7);
-            z6 = _mm_xor_si128(z6, y6);
-            r6 = _mm_srli_epi32(r6, 25);
-            z6 = _mm_xor_si128(z6, r6);
-
-            y2 = z0;
-            y2 = _mm_add_epi32(y2, z1);
-            r2 = y2;
-            y2 = _mm_slli_epi32(y2, 9);
-            z2 = _mm_xor_si128(z2, y2);
-            r2 = _mm_srli_epi32(r2, 23);
-            z2 = _mm_xor_si128(z2, r2);
-
-            y7 = z5;
-            y7 = _mm_add_epi32(y7, z6);
-            r7 = y7;
-            y7 = _mm_slli_epi32(y7, 9);
-            z7 = _mm_xor_si128(z7, y7);
-            r7 = _mm_srli_epi32(r7, 23);
-            z7 = _mm_xor_si128(z7, r7);
-
-            y3 = z1;
-            y3 = _mm_add_epi32(y3, z2);
-            r3 = y3;
-            y3 = _mm_slli_epi32(y3, 13);
-            z3 = _mm_xor_si128(z3, y3);
-            r3 = _mm_srli_epi32(r3, 19);
-            z3 = _mm_xor_si128(z3, r3);
-
-            y4 = z6;
-            y4 = _mm_add_epi32(y4, z7);
-            r4 = y4;
-            y4 = _mm_slli_epi32(y4, 13);
-            z4 = _mm_xor_si128(z4, y4);
-            r4 = _mm_srli_epi32(r4, 19);
-            z4 = _mm_xor_si128(z4, r4);
-
-            y0 = z2;
-            y0 = _mm_add_epi32(y0, z3);
-            r0 = y0;
-            y0 = _mm_slli_epi32(y0, 18);
-            z0 = _mm_xor_si128(z0, y0);
-            r0 = _mm_srli_epi32(r0, 14);
-            z0 = _mm_xor_si128(z0, r0);
-
-            y5 = z7;
-            y5 = _mm_add_epi32(y5, z4);
-            r5 = y5;
-            y5 = _mm_slli_epi32(y5, 18);
-            z5 = _mm_xor_si128(z5, y5);
-            r5 = _mm_srli_epi32(r5, 14);
-            z5 = _mm_xor_si128(z5, r5);
-
-            y11 = z9;
-            y11 = _mm_add_epi32(y11, z10);
-            r11 = y11;
-            y11 = _mm_slli_epi32(y11, 7);
-            z11 = _mm_xor_si128(z11, y11);
-            r11 = _mm_srli_epi32(r11, 25);
-            z11 = _mm_xor_si128(z11, r11);
-
-            y12 = z14;
-            y12 = _mm_add_epi32(y12, z15);
-            r12 = y12;
-            y12 = _mm_slli_epi32(y12, 7);
-            z12 = _mm_xor_si128(z12, y12);
-            r12 = _mm_srli_epi32(r12, 25);
-            z12 = _mm_xor_si128(z12, r12);
-
-            y8 = z10;
-            y8 = _mm_add_epi32(y8, z11);
-            r8 = y8;
-            y8 = _mm_slli_epi32(y8, 9);
-            z8 = _mm_xor_si128(z8, y8);
-            r8 = _mm_srli_epi32(r8, 23);
-            z8 = _mm_xor_si128(z8, r8);
-
-            y13 = z15;
-            y13 = _mm_add_epi32(y13, z12);
-            r13 = y13;
-            y13 = _mm_slli_epi32(y13, 9);
-            z13 = _mm_xor_si128(z13, y13);
-            r13 = _mm_srli_epi32(r13, 23);
-            z13 = _mm_xor_si128(z13, r13);
-
-            y9 = z11;
-            y9 = _mm_add_epi32(y9, z8);
-            r9 = y9;
-            y9 = _mm_slli_epi32(y9, 13);
-            z9 = _mm_xor_si128(z9, y9);
-            r9 = _mm_srli_epi32(r9, 19);
-            z9 = _mm_xor_si128(z9, r9);
-
-            y14 = z12;
-            y14 = _mm_add_epi32(y14, z13);
-            r14 = y14;
-            y14 = _mm_slli_epi32(y14, 13);
-            z14 = _mm_xor_si128(z14, y14);
-            r14 = _mm_srli_epi32(r14, 19);
-            z14 = _mm_xor_si128(z14, r14);
-
-            y10 = z8;
-            y10 = _mm_add_epi32(y10, z9);
-            r10 = y10;
-            y10 = _mm_slli_epi32(y10, 18);
-            z10 = _mm_xor_si128(z10, y10);
-            r10 = _mm_srli_epi32(r10, 14);
-            z10 = _mm_xor_si128(z10, r10);
-
-            y15 = z13;
-            y15 = _mm_add_epi32(y15, z14);
-            r15 = y15;
-            y15 = _mm_slli_epi32(y15, 18);
-            z15 = _mm_xor_si128(z15, y15);
-            r15 = _mm_srli_epi32(r15, 14);
-            z15 = _mm_xor_si128(z15, r15);
-        }
-
-/* store data ; this macro replicates the original amd64-xmm6 code */
-#define ONEQUAD_SHUFFLE(A, B, C, D)        \
-    z##A  = _mm_add_epi32(z##A, orig##A);  \
-    z##B  = _mm_add_epi32(z##B, orig##B);  \
-    z##C  = _mm_add_epi32(z##C, orig##C);  \
-    z##D  = _mm_add_epi32(z##D, orig##D);  \
-    in##A = _mm_cvtsi128_si32(z##A);       \
-    in##B = _mm_cvtsi128_si32(z##B);       \
-    in##C = _mm_cvtsi128_si32(z##C);       \
-    in##D = _mm_cvtsi128_si32(z##D);       \
-    z##A  = _mm_shuffle_epi32(z##A, 0x39); \
-    z##B  = _mm_shuffle_epi32(z##B, 0x39); \
-    z##C  = _mm_shuffle_epi32(z##C, 0x39); \
-    z##D  = _mm_shuffle_epi32(z##D, 0x39); \
-                                           \
-    in##A ^= *(uint32_t *) (m + 0);        \
-    in##B ^= *(uint32_t *) (m + 4);        \
-    in##C ^= *(uint32_t *) (m + 8);        \
-    in##D ^= *(uint32_t *) (m + 12);       \
-                                           \
-    *(uint32_t *) (c + 0)  = in##A;        \
-    *(uint32_t *) (c + 4)  = in##B;        \
-    *(uint32_t *) (c + 8)  = in##C;        \
-    *(uint32_t *) (c + 12) = in##D;        \
-                                           \
-    in##A = _mm_cvtsi128_si32(z##A);       \
-    in##B = _mm_cvtsi128_si32(z##B);       \
-    in##C = _mm_cvtsi128_si32(z##C);       \
-    in##D = _mm_cvtsi128_si32(z##D);       \
-    z##A  = _mm_shuffle_epi32(z##A, 0x39); \
-    z##B  = _mm_shuffle_epi32(z##B, 0x39); \
-    z##C  = _mm_shuffle_epi32(z##C, 0x39); \
-    z##D  = _mm_shuffle_epi32(z##D, 0x39); \
-                                           \
-    in##A ^= *(uint32_t *) (m + 64);       \
-    in##B ^= *(uint32_t *) (m + 68);       \
-    in##C ^= *(uint32_t *) (m + 72);       \
-    in##D ^= *(uint32_t *) (m + 76);       \
-    *(uint32_t *) (c + 64) = in##A;        \
-    *(uint32_t *) (c + 68) = in##B;        \
-    *(uint32_t *) (c + 72) = in##C;        \
-    *(uint32_t *) (c + 76) = in##D;        \
-                                           \
-    in##A = _mm_cvtsi128_si32(z##A);       \
-    in##B = _mm_cvtsi128_si32(z##B);       \
-    in##C = _mm_cvtsi128_si32(z##C);       \
-    in##D = _mm_cvtsi128_si32(z##D);       \
-    z##A  = _mm_shuffle_epi32(z##A, 0x39); \
-    z##B  = _mm_shuffle_epi32(z##B, 0x39); \
-    z##C  = _mm_shuffle_epi32(z##C, 0x39); \
-    z##D  = _mm_shuffle_epi32(z##D, 0x39); \
-                                           \
-    in##A ^= *(uint32_t *) (m + 128);      \
-    in##B ^= *(uint32_t *) (m + 132);      \
-    in##C ^= *(uint32_t *) (m + 136);      \
-    in##D ^= *(uint32_t *) (m + 140);      \
-    *(uint32_t *) (c + 128) = in##A;       \
-    *(uint32_t *) (c + 132) = in##B;       \
-    *(uint32_t *) (c + 136) = in##C;       \
-    *(uint32_t *) (c + 140) = in##D;       \
-                                           \
-    in##A = _mm_cvtsi128_si32(z##A);       \
-    in##B = _mm_cvtsi128_si32(z##B);       \
-    in##C = _mm_cvtsi128_si32(z##C);       \
-    in##D = _mm_cvtsi128_si32(z##D);       \
-                                           \
-    in##A ^= *(uint32_t *) (m + 192);      \
-    in##B ^= *(uint32_t *) (m + 196);      \
-    in##C ^= *(uint32_t *) (m + 200);      \
-    in##D ^= *(uint32_t *) (m + 204);      \
-    *(uint32_t *) (c + 192) = in##A;       \
-    *(uint32_t *) (c + 196) = in##B;       \
-    *(uint32_t *) (c + 200) = in##C;       \
-    *(uint32_t *) (c + 204) = in##D
-
-/* store data ; this macro replaces shuffle+mov by a direct extract; not much
- * difference */
-#define ONEQUAD_EXTRACT(A, B, C, D)       \
-    z##A  = _mm_add_epi32(z##A, orig##A); \
-    z##B  = _mm_add_epi32(z##B, orig##B); \
-    z##C  = _mm_add_epi32(z##C, orig##C); \
-    z##D  = _mm_add_epi32(z##D, orig##D); \
-    in##A = _mm_cvtsi128_si32(z##A);      \
-    in##B = _mm_cvtsi128_si32(z##B);      \
-    in##C = _mm_cvtsi128_si32(z##C);      \
-    in##D = _mm_cvtsi128_si32(z##D);      \
-    in##A ^= *(uint32_t *) (m + 0);       \
-    in##B ^= *(uint32_t *) (m + 4);       \
-    in##C ^= *(uint32_t *) (m + 8);       \
-    in##D ^= *(uint32_t *) (m + 12);      \
-    *(uint32_t *) (c + 0)  = in##A;       \
-    *(uint32_t *) (c + 4)  = in##B;       \
-    *(uint32_t *) (c + 8)  = in##C;       \
-    *(uint32_t *) (c + 12) = in##D;       \
-                                          \
-    in##A = _mm_extract_epi32(z##A, 1);   \
-    in##B = _mm_extract_epi32(z##B, 1);   \
-    in##C = _mm_extract_epi32(z##C, 1);   \
-    in##D = _mm_extract_epi32(z##D, 1);   \
-                                          \
-    in##A ^= *(uint32_t *) (m + 64);      \
-    in##B ^= *(uint32_t *) (m + 68);      \
-    in##C ^= *(uint32_t *) (m + 72);      \
-    in##D ^= *(uint32_t *) (m + 76);      \
-    *(uint32_t *) (c + 64) = in##A;       \
-    *(uint32_t *) (c + 68) = in##B;       \
-    *(uint32_t *) (c + 72) = in##C;       \
-    *(uint32_t *) (c + 76) = in##D;       \
-                                          \
-    in##A = _mm_extract_epi32(z##A, 2);   \
-    in##B = _mm_extract_epi32(z##B, 2);   \
-    in##C = _mm_extract_epi32(z##C, 2);   \
-    in##D = _mm_extract_epi32(z##D, 2);   \
-                                          \
-    in##A ^= *(uint32_t *) (m + 128);     \
-    in##B ^= *(uint32_t *) (m + 132);     \
-    in##C ^= *(uint32_t *) (m + 136);     \
-    in##D ^= *(uint32_t *) (m + 140);     \
-    *(uint32_t *) (c + 128) = in##A;      \
-    *(uint32_t *) (c + 132) = in##B;      \
-    *(uint32_t *) (c + 136) = in##C;      \
-    *(uint32_t *) (c + 140) = in##D;      \
-                                          \
-    in##A = _mm_extract_epi32(z##A, 3);   \
-    in##B = _mm_extract_epi32(z##B, 3);   \
-    in##C = _mm_extract_epi32(z##C, 3);   \
-    in##D = _mm_extract_epi32(z##D, 3);   \
-                                          \
-    in##A ^= *(uint32_t *) (m + 192);     \
-    in##B ^= *(uint32_t *) (m + 196);     \
-    in##C ^= *(uint32_t *) (m + 200);     \
-    in##D ^= *(uint32_t *) (m + 204);     \
-    *(uint32_t *) (c + 192) = in##A;      \
-    *(uint32_t *) (c + 196) = in##B;      \
-    *(uint32_t *) (c + 200) = in##C;      \
-    *(uint32_t *) (c + 204) = in##D
-
-/* store data ; this macro first transpose data in-registers, and then store
- * them in memory. much faster with icc. */
-#define ONEQUAD_TRANSPOSE(A, B, C, D)                                         \
-    z##A = _mm_add_epi32(z##A, orig##A);                                      \
-    z##B = _mm_add_epi32(z##B, orig##B);                                      \
-    z##C = _mm_add_epi32(z##C, orig##C);                                      \
-    z##D = _mm_add_epi32(z##D, orig##D);                                      \
-    y##A = _mm_unpacklo_epi32(z##A, z##B);                                    \
-    y##B = _mm_unpacklo_epi32(z##C, z##D);                                    \
-    y##C = _mm_unpackhi_epi32(z##A, z##B);                                    \
-    y##D = _mm_unpackhi_epi32(z##C, z##D);                                    \
-    z##A = _mm_unpacklo_epi64(y##A, y##B);                                    \
-    z##B = _mm_unpackhi_epi64(y##A, y##B);                                    \
-    z##C = _mm_unpacklo_epi64(y##C, y##D);                                    \
-    z##D = _mm_unpackhi_epi64(y##C, y##D);                                    \
-    y##A = _mm_xor_si128(z##A, _mm_loadu_si128((const __m128i *) (m + 0)));   \
-    _mm_storeu_si128((__m128i *) (c + 0), y##A);                              \
-    y##B = _mm_xor_si128(z##B, _mm_loadu_si128((const __m128i *) (m + 64)));  \
-    _mm_storeu_si128((__m128i *) (c + 64), y##B);                             \
-    y##C = _mm_xor_si128(z##C, _mm_loadu_si128((const __m128i *) (m + 128))); \
-    _mm_storeu_si128((__m128i *) (c + 128), y##C);                            \
-    y##D = _mm_xor_si128(z##D, _mm_loadu_si128((const __m128i *) (m + 192))); \
-    _mm_storeu_si128((__m128i *) (c + 192), y##D)
-
-#define ONEQUAD(A, B, C, D) ONEQUAD_TRANSPOSE(A, B, C, D)
-
-        ONEQUAD(0, 1, 2, 3);
-        m += 16;
-        c += 16;
-        ONEQUAD(4, 5, 6, 7);
-        m += 16;
-        c += 16;
-        ONEQUAD(8, 9, 10, 11);
-        m += 16;
-        c += 16;
-        ONEQUAD(12, 13, 14, 15);
-        m -= 48;
-        c -= 48;
-
-#undef ONEQUAD
-#undef ONEQUAD_TRANSPOSE
-#undef ONEQUAD_EXTRACT
-#undef ONEQUAD_SHUFFLE
-
-        bytes -= 256;
-        c += 256;
-        m += 256;
-    }
-}
--- a/src/crypto/astrobwt/xmm6int/u8.h
+++ b/src/crypto/astrobwt/xmm6int/u8.h
@ -1,477 +0,0 @@
-if (bytes >= 512) {
-    __m256i y0, y1, y2, y3, y4, y5, y6, y7, y8, y9, y10, y11, y12, y13, y14,
-        y15;
-
-    /* the naive way seems as fast (if not a bit faster) than the vector way */
-    __m256i z0  = _mm256_set1_epi32(x[0]);
-    __m256i z5  = _mm256_set1_epi32(x[1]);
-    __m256i z10 = _mm256_set1_epi32(x[2]);
-    __m256i z15 = _mm256_set1_epi32(x[3]);
-    __m256i z12 = _mm256_set1_epi32(x[4]);
-    __m256i z1  = _mm256_set1_epi32(x[5]);
-    __m256i z6  = _mm256_set1_epi32(x[6]);
-    __m256i z11 = _mm256_set1_epi32(x[7]);
-    __m256i z8; /* useless */
-    __m256i z13 = _mm256_set1_epi32(x[9]);
-    __m256i z2  = _mm256_set1_epi32(x[10]);
-    __m256i z7  = _mm256_set1_epi32(x[11]);
-    __m256i z4  = _mm256_set1_epi32(x[12]);
-    __m256i z9; /* useless */
-    __m256i z14 = _mm256_set1_epi32(x[14]);
-    __m256i z3  = _mm256_set1_epi32(x[15]);
-
-    __m256i orig0 = z0;
-    __m256i orig1 = z1;
-    __m256i orig2 = z2;
-    __m256i orig3 = z3;
-    __m256i orig4 = z4;
-    __m256i orig5 = z5;
-    __m256i orig6 = z6;
-    __m256i orig7 = z7;
-    __m256i orig8;
-    __m256i orig9;
-    __m256i orig10 = z10;
-    __m256i orig11 = z11;
-    __m256i orig12 = z12;
-    __m256i orig13 = z13;
-    __m256i orig14 = z14;
-    __m256i orig15 = z15;
-
-    uint32_t in8;
-    uint32_t in9;
-    int      i;
-
-    while (bytes >= 512) {
-        /* vector implementation for z8 and z9 */
-        /* faster than the naive version for 8 blocks */
-        const __m256i addv8   = _mm256_set_epi64x(3, 2, 1, 0);
-        const __m256i addv9   = _mm256_set_epi64x(7, 6, 5, 4);
-        const __m256i permute = _mm256_set_epi32(7, 6, 3, 2, 5, 4, 1, 0);
-
-        __m256i  t8, t9;
-        uint64_t in89;
-
-        in8  = x[8];
-        in9  = x[13]; /* see arrays above for the address translation */
-        in89 = ((uint64_t) in8) | (((uint64_t) in9) << 32);
-
-        z8 = z9 = _mm256_broadcastq_epi64(_mm_cvtsi64_si128(in89));
-
-        t8 = _mm256_add_epi64(addv8, z8);
-        t9 = _mm256_add_epi64(addv9, z9);
-
-        z8 = _mm256_unpacklo_epi32(t8, t9);
-        z9 = _mm256_unpackhi_epi32(t8, t9);
-
-        t8 = _mm256_unpacklo_epi32(z8, z9);
-        t9 = _mm256_unpackhi_epi32(z8, z9);
-
-        /* required because unpack* are intra-lane */
-        z8 = _mm256_permutevar8x32_epi32(t8, permute);
-        z9 = _mm256_permutevar8x32_epi32(t9, permute);
-
-        orig8 = z8;
-        orig9 = z9;
-
-        in89 += 8;
-
-        x[8]  = in89 & 0xFFFFFFFF;
-        x[13] = (in89 >> 32) & 0xFFFFFFFF;
-
-        z5  = orig5;
-        z10 = orig10;
-        z15 = orig15;
-        z14 = orig14;
-        z3  = orig3;
-        z6  = orig6;
-        z11 = orig11;
-        z1  = orig1;
-
-        z7  = orig7;
-        z13 = orig13;
-        z2  = orig2;
-        z9  = orig9;
-        z0  = orig0;
-        z12 = orig12;
-        z4  = orig4;
-        z8  = orig8;
-
-        for (i = 0; i < ROUNDS; i += 2) {
-            /* the inner loop is a direct translation (regexp search/replace)
-             * from the amd64-xmm6 ASM */
-            __m256i r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13,
-                r14, r15;
-
-            y4 = z12;
-            y4 = _mm256_add_epi32(y4, z0);
-            r4 = y4;
-            y4 = _mm256_slli_epi32(y4, 7);
-            z4 = _mm256_xor_si256(z4, y4);
-            r4 = _mm256_srli_epi32(r4, 25);
-            z4 = _mm256_xor_si256(z4, r4);
-
-            y9 = z1;
-            y9 = _mm256_add_epi32(y9, z5);
-            r9 = y9;
-            y9 = _mm256_slli_epi32(y9, 7);
-            z9 = _mm256_xor_si256(z9, y9);
-            r9 = _mm256_srli_epi32(r9, 25);
-            z9 = _mm256_xor_si256(z9, r9);
-
-            y8 = z0;
-            y8 = _mm256_add_epi32(y8, z4);
-            r8 = y8;
-            y8 = _mm256_slli_epi32(y8, 9);
-            z8 = _mm256_xor_si256(z8, y8);
-            r8 = _mm256_srli_epi32(r8, 23);
-            z8 = _mm256_xor_si256(z8, r8);
-
-            y13 = z5;
-            y13 = _mm256_add_epi32(y13, z9);
-            r13 = y13;
-            y13 = _mm256_slli_epi32(y13, 9);
-            z13 = _mm256_xor_si256(z13, y13);
-            r13 = _mm256_srli_epi32(r13, 23);
-            z13 = _mm256_xor_si256(z13, r13);
-
-            y12 = z4;
-            y12 = _mm256_add_epi32(y12, z8);
-            r12 = y12;
-            y12 = _mm256_slli_epi32(y12, 13);
-            z12 = _mm256_xor_si256(z12, y12);
-            r12 = _mm256_srli_epi32(r12, 19);
-            z12 = _mm256_xor_si256(z12, r12);
-
-            y1 = z9;
-            y1 = _mm256_add_epi32(y1, z13);
-            r1 = y1;
-            y1 = _mm256_slli_epi32(y1, 13);
-            z1 = _mm256_xor_si256(z1, y1);
-            r1 = _mm256_srli_epi32(r1, 19);
-            z1 = _mm256_xor_si256(z1, r1);
-
-            y0 = z8;
-            y0 = _mm256_add_epi32(y0, z12);
-            r0 = y0;
-            y0 = _mm256_slli_epi32(y0, 18);
-            z0 = _mm256_xor_si256(z0, y0);
-            r0 = _mm256_srli_epi32(r0, 14);
-            z0 = _mm256_xor_si256(z0, r0);
-
-            y5 = z13;
-            y5 = _mm256_add_epi32(y5, z1);
-            r5 = y5;
-            y5 = _mm256_slli_epi32(y5, 18);
-            z5 = _mm256_xor_si256(z5, y5);
-            r5 = _mm256_srli_epi32(r5, 14);
-            z5 = _mm256_xor_si256(z5, r5);
-
-            y14 = z6;
-            y14 = _mm256_add_epi32(y14, z10);
-            r14 = y14;
-            y14 = _mm256_slli_epi32(y14, 7);
-            z14 = _mm256_xor_si256(z14, y14);
-            r14 = _mm256_srli_epi32(r14, 25);
-            z14 = _mm256_xor_si256(z14, r14);
-
-            y3 = z11;
-            y3 = _mm256_add_epi32(y3, z15);
-            r3 = y3;
-            y3 = _mm256_slli_epi32(y3, 7);
-            z3 = _mm256_xor_si256(z3, y3);
-            r3 = _mm256_srli_epi32(r3, 25);
-            z3 = _mm256_xor_si256(z3, r3);
-
-            y2 = z10;
-            y2 = _mm256_add_epi32(y2, z14);
-            r2 = y2;
-            y2 = _mm256_slli_epi32(y2, 9);
-            z2 = _mm256_xor_si256(z2, y2);
-            r2 = _mm256_srli_epi32(r2, 23);
-            z2 = _mm256_xor_si256(z2, r2);
-
-            y7 = z15;
-            y7 = _mm256_add_epi32(y7, z3);
-            r7 = y7;
-            y7 = _mm256_slli_epi32(y7, 9);
-            z7 = _mm256_xor_si256(z7, y7);
-            r7 = _mm256_srli_epi32(r7, 23);
-            z7 = _mm256_xor_si256(z7, r7);
-
-            y6 = z14;
-            y6 = _mm256_add_epi32(y6, z2);
-            r6 = y6;
-            y6 = _mm256_slli_epi32(y6, 13);
-            z6 = _mm256_xor_si256(z6, y6);
-            r6 = _mm256_srli_epi32(r6, 19);
-            z6 = _mm256_xor_si256(z6, r6);
-
-            y11 = z3;
-            y11 = _mm256_add_epi32(y11, z7);
-            r11 = y11;
-            y11 = _mm256_slli_epi32(y11, 13);
-            z11 = _mm256_xor_si256(z11, y11);
-            r11 = _mm256_srli_epi32(r11, 19);
-            z11 = _mm256_xor_si256(z11, r11);
-
-            y10 = z2;
-            y10 = _mm256_add_epi32(y10, z6);
-            r10 = y10;
-            y10 = _mm256_slli_epi32(y10, 18);
-            z10 = _mm256_xor_si256(z10, y10);
-            r10 = _mm256_srli_epi32(r10, 14);
-            z10 = _mm256_xor_si256(z10, r10);
-
-            y1 = z3;
-            y1 = _mm256_add_epi32(y1, z0);
-            r1 = y1;
-            y1 = _mm256_slli_epi32(y1, 7);
-            z1 = _mm256_xor_si256(z1, y1);
-            r1 = _mm256_srli_epi32(r1, 25);
-            z1 = _mm256_xor_si256(z1, r1);
-
-            y15 = z7;
-            y15 = _mm256_add_epi32(y15, z11);
-            r15 = y15;
-            y15 = _mm256_slli_epi32(y15, 18);
-            z15 = _mm256_xor_si256(z15, y15);
-            r15 = _mm256_srli_epi32(r15, 14);
-            z15 = _mm256_xor_si256(z15, r15);
-
-            y6 = z4;
-            y6 = _mm256_add_epi32(y6, z5);
-            r6 = y6;
-            y6 = _mm256_slli_epi32(y6, 7);
-            z6 = _mm256_xor_si256(z6, y6);
-            r6 = _mm256_srli_epi32(r6, 25);
-            z6 = _mm256_xor_si256(z6, r6);
-
-            y2 = z0;
-            y2 = _mm256_add_epi32(y2, z1);
-            r2 = y2;
-            y2 = _mm256_slli_epi32(y2, 9);
-            z2 = _mm256_xor_si256(z2, y2);
-            r2 = _mm256_srli_epi32(r2, 23);
-            z2 = _mm256_xor_si256(z2, r2);
-
-            y7 = z5;
-            y7 = _mm256_add_epi32(y7, z6);
-            r7 = y7;
-            y7 = _mm256_slli_epi32(y7, 9);
-            z7 = _mm256_xor_si256(z7, y7);
-            r7 = _mm256_srli_epi32(r7, 23);
-            z7 = _mm256_xor_si256(z7, r7);
-
-            y3 = z1;
-            y3 = _mm256_add_epi32(y3, z2);
-            r3 = y3;
-            y3 = _mm256_slli_epi32(y3, 13);
-            z3 = _mm256_xor_si256(z3, y3);
-            r3 = _mm256_srli_epi32(r3, 19);
-            z3 = _mm256_xor_si256(z3, r3);
-
-            y4 = z6;
-            y4 = _mm256_add_epi32(y4, z7);
-            r4 = y4;
-            y4 = _mm256_slli_epi32(y4, 13);
-            z4 = _mm256_xor_si256(z4, y4);
-            r4 = _mm256_srli_epi32(r4, 19);
-            z4 = _mm256_xor_si256(z4, r4);
-
-            y0 = z2;
-            y0 = _mm256_add_epi32(y0, z3);
-            r0 = y0;
-            y0 = _mm256_slli_epi32(y0, 18);
-            z0 = _mm256_xor_si256(z0, y0);
-            r0 = _mm256_srli_epi32(r0, 14);
-            z0 = _mm256_xor_si256(z0, r0);
-
-            y5 = z7;
-            y5 = _mm256_add_epi32(y5, z4);
-            r5 = y5;
-            y5 = _mm256_slli_epi32(y5, 18);
-            z5 = _mm256_xor_si256(z5, y5);
-            r5 = _mm256_srli_epi32(r5, 14);
-            z5 = _mm256_xor_si256(z5, r5);
-
-            y11 = z9;
-            y11 = _mm256_add_epi32(y11, z10);
-            r11 = y11;
-            y11 = _mm256_slli_epi32(y11, 7);
-            z11 = _mm256_xor_si256(z11, y11);
-            r11 = _mm256_srli_epi32(r11, 25);
-            z11 = _mm256_xor_si256(z11, r11);
-
-            y12 = z14;
-            y12 = _mm256_add_epi32(y12, z15);
-            r12 = y12;
-            y12 = _mm256_slli_epi32(y12, 7);
-            z12 = _mm256_xor_si256(z12, y12);
-            r12 = _mm256_srli_epi32(r12, 25);
-            z12 = _mm256_xor_si256(z12, r12);
-
-            y8 = z10;
-            y8 = _mm256_add_epi32(y8, z11);
-            r8 = y8;
-            y8 = _mm256_slli_epi32(y8, 9);
-            z8 = _mm256_xor_si256(z8, y8);
-            r8 = _mm256_srli_epi32(r8, 23);
-            z8 = _mm256_xor_si256(z8, r8);
-
-            y13 = z15;
-            y13 = _mm256_add_epi32(y13, z12);
-            r13 = y13;
-            y13 = _mm256_slli_epi32(y13, 9);
-            z13 = _mm256_xor_si256(z13, y13);
-            r13 = _mm256_srli_epi32(r13, 23);
-            z13 = _mm256_xor_si256(z13, r13);
-
-            y9 = z11;
-            y9 = _mm256_add_epi32(y9, z8);
-            r9 = y9;
-            y9 = _mm256_slli_epi32(y9, 13);
-            z9 = _mm256_xor_si256(z9, y9);
-            r9 = _mm256_srli_epi32(r9, 19);
-            z9 = _mm256_xor_si256(z9, r9);
-
-            y14 = z12;
-            y14 = _mm256_add_epi32(y14, z13);
-            r14 = y14;
-            y14 = _mm256_slli_epi32(y14, 13);
-            z14 = _mm256_xor_si256(z14, y14);
-            r14 = _mm256_srli_epi32(r14, 19);
-            z14 = _mm256_xor_si256(z14, r14);
-
-            y10 = z8;
-            y10 = _mm256_add_epi32(y10, z9);
-            r10 = y10;
-            y10 = _mm256_slli_epi32(y10, 18);
-            z10 = _mm256_xor_si256(z10, y10);
-            r10 = _mm256_srli_epi32(r10, 14);
-            z10 = _mm256_xor_si256(z10, r10);
-
-            y15 = z13;
-            y15 = _mm256_add_epi32(y15, z14);
-            r15 = y15;
-            y15 = _mm256_slli_epi32(y15, 18);
-            z15 = _mm256_xor_si256(z15, y15);
-            r15 = _mm256_srli_epi32(r15, 14);
-            z15 = _mm256_xor_si256(z15, r15);
-        }
-
-/* store data ; this macro first transpose data in-registers, and then store
- * them in memory. much faster with icc. */
-#define ONEQUAD_TRANSPOSE(A, B, C, D)                                    \
-    {                                                                    \
-        __m128i t0, t1, t2, t3;                                          \
-        z##A = _mm256_add_epi32(z##A, orig##A);                          \
-        z##B = _mm256_add_epi32(z##B, orig##B);                          \
-        z##C = _mm256_add_epi32(z##C, orig##C);                          \
-        z##D = _mm256_add_epi32(z##D, orig##D);                          \
-        y##A = _mm256_unpacklo_epi32(z##A, z##B);                        \
-        y##B = _mm256_unpacklo_epi32(z##C, z##D);                        \
-        y##C = _mm256_unpackhi_epi32(z##A, z##B);                        \
-        y##D = _mm256_unpackhi_epi32(z##C, z##D);                        \
-        z##A = _mm256_unpacklo_epi64(y##A, y##B);                        \
-        z##B = _mm256_unpackhi_epi64(y##A, y##B);                        \
-        z##C = _mm256_unpacklo_epi64(y##C, y##D);                        \
-        z##D = _mm256_unpackhi_epi64(y##C, y##D);                        \
-        t0   = _mm_xor_si128(_mm256_extracti128_si256(z##A, 0),          \
-                           _mm_loadu_si128((const __m128i*) (m + 0))); \
-        _mm_storeu_si128((__m128i*) (c + 0), t0);                        \
-        t1 = _mm_xor_si128(_mm256_extracti128_si256(z##B, 0),            \
-                           _mm_loadu_si128((const __m128i*) (m + 64)));  \
-        _mm_storeu_si128((__m128i*) (c + 64), t1);                       \
-        t2 = _mm_xor_si128(_mm256_extracti128_si256(z##C, 0),            \
-                           _mm_loadu_si128((const __m128i*) (m + 128))); \
-        _mm_storeu_si128((__m128i*) (c + 128), t2);                      \
-        t3 = _mm_xor_si128(_mm256_extracti128_si256(z##D, 0),            \
-                           _mm_loadu_si128((const __m128i*) (m + 192))); \
-        _mm_storeu_si128((__m128i*) (c + 192), t3);                      \
-        t0 = _mm_xor_si128(_mm256_extracti128_si256(z##A, 1),            \
-                           _mm_loadu_si128((const __m128i*) (m + 256))); \
-        _mm_storeu_si128((__m128i*) (c + 256), t0);                      \
-        t1 = _mm_xor_si128(_mm256_extracti128_si256(z##B, 1),            \
-                           _mm_loadu_si128((const __m128i*) (m + 320))); \
-        _mm_storeu_si128((__m128i*) (c + 320), t1);                      \
-        t2 = _mm_xor_si128(_mm256_extracti128_si256(z##C, 1),            \
-                           _mm_loadu_si128((const __m128i*) (m + 384))); \
-        _mm_storeu_si128((__m128i*) (c + 384), t2);                      \
-        t3 = _mm_xor_si128(_mm256_extracti128_si256(z##D, 1),            \
-                           _mm_loadu_si128((const __m128i*) (m + 448))); \
-        _mm_storeu_si128((__m128i*) (c + 448), t3);                      \
-    }
-
-#define ONEQUAD(A, B, C, D) ONEQUAD_TRANSPOSE(A, B, C, D)
-
-#define ONEQUAD_UNPCK(A, B, C, D)                 \
-    {                                             \
-        z##A = _mm256_add_epi32(z##A, orig##A);   \
-        z##B = _mm256_add_epi32(z##B, orig##B);   \
-        z##C = _mm256_add_epi32(z##C, orig##C);   \
-        z##D = _mm256_add_epi32(z##D, orig##D);   \
-        y##A = _mm256_unpacklo_epi32(z##A, z##B); \
-        y##B = _mm256_unpacklo_epi32(z##C, z##D); \
-        y##C = _mm256_unpackhi_epi32(z##A, z##B); \
-        y##D = _mm256_unpackhi_epi32(z##C, z##D); \
-        z##A = _mm256_unpacklo_epi64(y##A, y##B); \
-        z##B = _mm256_unpackhi_epi64(y##A, y##B); \
-        z##C = _mm256_unpacklo_epi64(y##C, y##D); \
-        z##D = _mm256_unpackhi_epi64(y##C, y##D); \
-    }
-
-#define ONEOCTO(A, B, C, D, A2, B2, C2, D2)                                    \
-    {                                                                          \
-        ONEQUAD_UNPCK(A, B, C, D);                                             \
-        ONEQUAD_UNPCK(A2, B2, C2, D2);                                         \
-        y##A  = _mm256_permute2x128_si256(z##A, z##A2, 0x20);                  \
-        y##A2 = _mm256_permute2x128_si256(z##A, z##A2, 0x31);                  \
-        y##B  = _mm256_permute2x128_si256(z##B, z##B2, 0x20);                  \
-        y##B2 = _mm256_permute2x128_si256(z##B, z##B2, 0x31);                  \
-        y##C  = _mm256_permute2x128_si256(z##C, z##C2, 0x20);                  \
-        y##C2 = _mm256_permute2x128_si256(z##C, z##C2, 0x31);                  \
-        y##D  = _mm256_permute2x128_si256(z##D, z##D2, 0x20);                  \
-        y##D2 = _mm256_permute2x128_si256(z##D, z##D2, 0x31);                  \
-        y##A  = _mm256_xor_si256(y##A,                                         \
-                                _mm256_loadu_si256((const __m256i*) (m + 0))); \
-        y##B  = _mm256_xor_si256(                                              \
-            y##B, _mm256_loadu_si256((const __m256i*) (m + 64)));              \
-        y##C = _mm256_xor_si256(                                               \
-            y##C, _mm256_loadu_si256((const __m256i*) (m + 128)));             \
-        y##D = _mm256_xor_si256(                                               \
-            y##D, _mm256_loadu_si256((const __m256i*) (m + 192)));             \
-        y##A2 = _mm256_xor_si256(                                              \
-            y##A2, _mm256_loadu_si256((const __m256i*) (m + 256)));            \
-        y##B2 = _mm256_xor_si256(                                              \
-            y##B2, _mm256_loadu_si256((const __m256i*) (m + 320)));            \
-        y##C2 = _mm256_xor_si256(                                              \
-            y##C2, _mm256_loadu_si256((const __m256i*) (m + 384)));            \
-        y##D2 = _mm256_xor_si256(                                              \
-            y##D2, _mm256_loadu_si256((const __m256i*) (m + 448)));            \
-        _mm256_storeu_si256((__m256i*) (c + 0), y##A);                         \
-        _mm256_storeu_si256((__m256i*) (c + 64), y##B);                        \
-        _mm256_storeu_si256((__m256i*) (c + 128), y##C);                       \
-        _mm256_storeu_si256((__m256i*) (c + 192), y##D);                       \
-        _mm256_storeu_si256((__m256i*) (c + 256), y##A2);                      \
-        _mm256_storeu_si256((__m256i*) (c + 320), y##B2);                      \
-        _mm256_storeu_si256((__m256i*) (c + 384), y##C2);                      \
-        _mm256_storeu_si256((__m256i*) (c + 448), y##D2);                      \
-    }
-
-        ONEOCTO(0, 1, 2, 3, 4, 5, 6, 7);
-        m += 32;
-        c += 32;
-        ONEOCTO(8, 9, 10, 11, 12, 13, 14, 15);
-        m -= 32;
-        c -= 32;
-
-#undef ONEQUAD
-#undef ONEQUAD_TRANSPOSE
-#undef ONEQUAD_UNPCK
-#undef ONEOCTO
-
-        bytes -= 512;
-        c += 512;
-        m += 512;
-    }
-}
--- a/src/crypto/cn/CnHash.cpp
+++ b/src/crypto/cn/CnHash.cpp
@ -35,11 +35,6 @@
 #endif


-#ifdef XMRIG_ALGO_ASTROBWT
-#   include "crypto/astrobwt/AstroBWT.h"
-#endif
-
-
 #define ADD_FN(algo) do {                                                                            \
        m_map[algo] = new cn_hash_fun_array{};                                                       \
        m_map[algo]->data[AV_SINGLE][Assembly::NONE]      = cryptonight_single_hash<algo, false, 0>; \
@ -375,12 +370,6 @@ xmrig::CnHash::CnHash()
    m_map[Algorithm::AR2_WRKZ]->data[AV_SINGLE_SOFT][Assembly::NONE]      = argon2::single_hash<Algorithm::AR2_WRKZ>;
 #   endif

-#   ifdef XMRIG_ALGO_ASTROBWT
-    m_map[Algorithm::ASTROBWT_DERO_2] = new cn_hash_fun_array{};
-    m_map[Algorithm::ASTROBWT_DERO_2]->data[AV_SINGLE][Assembly::NONE]      = astrobwt::single_hash<Algorithm::ASTROBWT_DERO_2>;
-    m_map[Algorithm::ASTROBWT_DERO_2]->data[AV_SINGLE_SOFT][Assembly::NONE] = astrobwt::single_hash<Algorithm::ASTROBWT_DERO_2>;
-#   endif
-
 #   ifdef XMRIG_ALGO_GHOSTRIDER
    ADD_FN(Algorithm::CN_GR_0);
    ADD_FN(Algorithm::CN_GR_1);
--- a/src/crypto/cn/CryptoNight_test.h
+++ b/src/crypto/cn/CryptoNight_test.h
@ -432,23 +432,6 @@ const static uint8_t argon2_wrkz_test_out[256] = {
 #endif


-#ifdef XMRIG_ALGO_ASTROBWT
-// "astrobwt/v2"
-const static uint8_t astrobwt_dero_2_test_out[256] = {
-    0x48, 0x9E, 0xD2, 0x66, 0x14, 0x27, 0x98, 0x65, 0x03, 0xFB, 0x87, 0x25, 0xE1, 0xD3, 0x98, 0xDA,
-    0x27, 0xEE, 0x25, 0x3D, 0xB4, 0x37, 0x87, 0x98, 0xBF, 0x5A, 0x5C, 0x94, 0xEE, 0x0C, 0xE2, 0x2A,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-};
-#endif
-
-
 #ifdef XMRIG_ALGO_GHOSTRIDER
 // "GhostRider"
 const static uint8_t test_output_gr[256] = {
--- a/src/crypto/ghostrider/CMakeLists.txt
+++ b/src/crypto/ghostrider/CMakeLists.txt
@ -43,22 +43,22 @@ set(SOURCES
 )

 if (CMAKE_C_COMPILER_ID MATCHES MSVC)
-    set_source_files_properties(sph_blake.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
-    set_source_files_properties(sph_bmw.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
-    set_source_files_properties(sph_cubehash.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
-    set_source_files_properties(sph_echo.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
-    set_source_files_properties(sph_fugue.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
-    set_source_files_properties(sph_groestl.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
-    set_source_files_properties(sph_hamsi.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
-    set_source_files_properties(sph_jh.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
-    set_source_files_properties(sph_keccak.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
-    set_source_files_properties(sph_luffa.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
-    set_source_files_properties(sph_shabal.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
-    set_source_files_properties(sph_shavite.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
-    set_source_files_properties(sph_simd.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
-    set_source_files_properties(sph_sha2.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
-    set_source_files_properties(sph_skein.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
-    set_source_files_properties(sph_whirlpool.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
+    set_source_files_properties(sph_blake.c PROPERTIES COMPILE_FLAGS_RELEASE "/O1 /Oi /Os")
+    set_source_files_properties(sph_bmw.c PROPERTIES COMPILE_FLAGS_RELEASE "/O1 /Oi /Os")
+    set_source_files_properties(sph_cubehash.c PROPERTIES COMPILE_FLAGS_RELEASE "/O1 /Oi /Os")
+    set_source_files_properties(sph_echo.c PROPERTIES COMPILE_FLAGS_RELEASE "/O1 /Oi /Os")
+    set_source_files_properties(sph_fugue.c PROPERTIES COMPILE_FLAGS_RELEASE "/O1 /Oi /Os")
+    set_source_files_properties(sph_groestl.c PROPERTIES COMPILE_FLAGS_RELEASE "/O1 /Oi /Os")
+    set_source_files_properties(sph_hamsi.c PROPERTIES COMPILE_FLAGS_RELEASE "/O1 /Oi /Os")
+    set_source_files_properties(sph_jh.c PROPERTIES COMPILE_FLAGS_RELEASE "/O1 /Oi /Os")
+    set_source_files_properties(sph_keccak.c PROPERTIES COMPILE_FLAGS_RELEASE "/O1 /Oi /Os")
+    set_source_files_properties(sph_luffa.c PROPERTIES COMPILE_FLAGS_RELEASE "/O1 /Oi /Os")
+    set_source_files_properties(sph_shabal.c PROPERTIES COMPILE_FLAGS_RELEASE "/O1 /Oi /Os")
+    set_source_files_properties(sph_shavite.c PROPERTIES COMPILE_FLAGS_RELEASE "/O1 /Oi /Os")
+    set_source_files_properties(sph_simd.c PROPERTIES COMPILE_FLAGS_RELEASE "/O1 /Oi /Os")
+    set_source_files_properties(sph_sha2.c PROPERTIES COMPILE_FLAGS_RELEASE "/O1 /Oi /Os")
+    set_source_files_properties(sph_skein.c PROPERTIES COMPILE_FLAGS_RELEASE "/O1 /Oi /Os")
+    set_source_files_properties(sph_whirlpool.c PROPERTIES COMPILE_FLAGS_RELEASE "/O1 /Oi /Os")
 elseif (CMAKE_C_COMPILER_ID MATCHES GNU OR CMAKE_C_COMPILER_ID MATCHES Clang)
    set_source_files_properties(sph_blake.c PROPERTIES COMPILE_FLAGS "-Os")
    set_source_files_properties(sph_bmw.c PROPERTIES COMPILE_FLAGS "-Os")
--- a/src/crypto/randomx/jit_compiler_fallback.cpp
+++ b/src/crypto/randomx/jit_compiler_fallback.cpp
@ -28,13 +28,12 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

-#pragma once
-

 void randomx_set_huge_pages_jit(bool)
 {
 }

+
 void randomx_set_optimized_dataset_init(int)
 {
 }