Include VS solution

And refactor code: tab, code style. and end of lines. Update the libcpuid lib.
2018-02-07 22:14:06 +01:00 · 2018-02-07 22:14:06 +01:00 · 86f0d9d944
commit 86f0d9d944
parent 98c151b190
106 changed files with 12665 additions and 6894 deletions
--- a/src/crypto/CryptoNight.cpp
+++ b/src/crypto/CryptoNight.cpp
@ -36,127 +36,143 @@
 #include "Options.h"


-void (*cryptonight_hash_ctx)(const void *input, size_t size, void *output, cryptonight_ctx *ctx) = nullptr;
+void (*cryptonight_hash_ctx)(const void* input, size_t size, void* output, cryptonight_ctx* ctx) = nullptr;


-static void cryptonight_av1_aesni(const void *input, size_t size, void *output, struct cryptonight_ctx *ctx) {
+static void cryptonight_av1_aesni(const void* input, size_t size, void* output, struct cryptonight_ctx* ctx)
+{
 #   if !defined(XMRIG_ARMv7)
-    cryptonight_hash<0x80000, MEMORY, 0x1FFFF0, false>(input, size, output, ctx);
+	cryptonight_hash<0x80000, MEMORY, 0x1FFFF0, false>(input, size, output, ctx);
 #   endif
 }


-static void cryptonight_av2_aesni_double(const void *input, size_t size, void *output, cryptonight_ctx *ctx) {
+static void cryptonight_av2_aesni_double(const void* input, size_t size, void* output, cryptonight_ctx* ctx)
+{
 #   if !defined(XMRIG_ARMv7)
-    cryptonight_double_hash<0x80000, MEMORY, 0x1FFFF0, false>(input, size, output, ctx);
+	cryptonight_double_hash<0x80000, MEMORY, 0x1FFFF0, false>(input, size, output, ctx);
 #   endif
 }


-static void cryptonight_av3_softaes(const void *input, size_t size, void *output, cryptonight_ctx *ctx) {
-    cryptonight_hash<0x80000, MEMORY, 0x1FFFF0, true>(input, size, output, ctx);
+static void cryptonight_av3_softaes(const void* input, size_t size, void* output, cryptonight_ctx* ctx)
+{
+	cryptonight_hash<0x80000, MEMORY, 0x1FFFF0, true>(input, size, output, ctx);
 }


-static void cryptonight_av4_softaes_double(const void *input, size_t size, void *output, cryptonight_ctx *ctx) {
-    cryptonight_double_hash<0x80000, MEMORY, 0x1FFFF0, true>(input, size, output, ctx);
+static void cryptonight_av4_softaes_double(const void* input, size_t size, void* output, cryptonight_ctx* ctx)
+{
+	cryptonight_double_hash<0x80000, MEMORY, 0x1FFFF0, true>(input, size, output, ctx);
 }


 #ifndef XMRIG_NO_AEON
-static void cryptonight_lite_av1_aesni(const void *input, size_t size, void *output, cryptonight_ctx *ctx) {
-    #   if !defined(XMRIG_ARMv7)
-    cryptonight_hash<0x40000, MEMORY_LITE, 0xFFFF0, false>(input, size, output, ctx);
+static void cryptonight_lite_av1_aesni(const void* input, size_t size, void* output, cryptonight_ctx* ctx)
+{
+#   if !defined(XMRIG_ARMv7)
+	cryptonight_hash<0x40000, MEMORY_LITE, 0xFFFF0, false>(input, size, output, ctx);
 #endif
 }


-static void cryptonight_lite_av2_aesni_double(const void *input, size_t size, void *output, cryptonight_ctx *ctx) {
+static void cryptonight_lite_av2_aesni_double(const void* input, size_t size, void* output,
+        cryptonight_ctx* ctx)
+{
 #   if !defined(XMRIG_ARMv7)
-    cryptonight_double_hash<0x40000, MEMORY_LITE, 0xFFFF0, false>(input, size, output, ctx);
+	cryptonight_double_hash<0x40000, MEMORY_LITE, 0xFFFF0, false>(input, size, output, ctx);
 #   endif
 }


-static void cryptonight_lite_av3_softaes(const void *input, size_t size, void *output, cryptonight_ctx *ctx) {
-    cryptonight_hash<0x40000, MEMORY_LITE, 0xFFFF0, true>(input, size, output, ctx);
+static void cryptonight_lite_av3_softaes(const void* input, size_t size, void* output, cryptonight_ctx* ctx)
+{
+	cryptonight_hash<0x40000, MEMORY_LITE, 0xFFFF0, true>(input, size, output, ctx);
 }


-static void cryptonight_lite_av4_softaes_double(const void *input, size_t size, void *output, cryptonight_ctx *ctx) {
-    cryptonight_double_hash<0x40000, MEMORY_LITE, 0xFFFF0, true>(input, size, output, ctx);
+static void cryptonight_lite_av4_softaes_double(const void* input, size_t size, void* output,
+        cryptonight_ctx* ctx)
+{
+	cryptonight_double_hash<0x40000, MEMORY_LITE, 0xFFFF0, true>(input, size, output, ctx);
 }

-void (*cryptonight_variations[8])(const void *input, size_t size, void *output, cryptonight_ctx *ctx) = {
-            cryptonight_av1_aesni,
-            cryptonight_av2_aesni_double,
-            cryptonight_av3_softaes,
-            cryptonight_av4_softaes_double,
-            cryptonight_lite_av1_aesni,
-            cryptonight_lite_av2_aesni_double,
-            cryptonight_lite_av3_softaes,
-            cryptonight_lite_av4_softaes_double
-        };
+void (*cryptonight_variations[8])(const void* input, size_t size, void* output, cryptonight_ctx* ctx) =
+{
+	cryptonight_av1_aesni,
+	cryptonight_av2_aesni_double,
+	cryptonight_av3_softaes,
+	cryptonight_av4_softaes_double,
+	cryptonight_lite_av1_aesni,
+	cryptonight_lite_av2_aesni_double,
+	cryptonight_lite_av3_softaes,
+	cryptonight_lite_av4_softaes_double
+};
 #else
-void (*cryptonight_variations[4])(const void *input, size_t size, void *output, cryptonight_ctx *ctx) = {
-            cryptonight_av1_aesni,
-            cryptonight_av2_aesni_double,
-            cryptonight_av3_softaes,
-            cryptonight_av4_softaes_double
-        };
+void (*cryptonight_variations[4])(const void* input, size_t size, void* output, cryptonight_ctx* ctx) =
+{
+	cryptonight_av1_aesni,
+	cryptonight_av2_aesni_double,
+	cryptonight_av3_softaes,
+	cryptonight_av4_softaes_double
+};
 #endif


-bool CryptoNight::hash(const Job &job, JobResult &result, cryptonight_ctx *ctx)
+bool CryptoNight::hash(const Job & job, JobResult & result, cryptonight_ctx* ctx)
 {
-    cryptonight_hash_ctx(job.blob(), job.size(), result.result, ctx);
+	cryptonight_hash_ctx(job.blob(), job.size(), result.result, ctx);

-    return *reinterpret_cast<uint64_t*>(result.result + 24) < job.target();
+	return *reinterpret_cast<uint64_t*>(result.result + 24) < job.target();
 }


 bool CryptoNight::init(int algo, int variant)
 {
-    if (variant < 1 || variant > 4) {
-        return false;
-    }
+	if(variant < 1 || variant > 4)
+	{
+		return false;
+	}

 #   ifndef XMRIG_NO_AEON
-    const int index = algo == Options::ALGO_CRYPTONIGHT_LITE ? (variant + 3) : (variant - 1);
+	const int index = algo == Options::ALGO_CRYPTONIGHT_LITE ? (variant + 3) : (variant - 1);
 #   else
-    const int index = variant - 1;
+	const int index = variant - 1;
 #   endif

-    cryptonight_hash_ctx = cryptonight_variations[index];
+	cryptonight_hash_ctx = cryptonight_variations[index];

-    return selfTest(algo);
+	return selfTest(algo);
 }


-void CryptoNight::hash(const uint8_t *input, size_t size, uint8_t *output, cryptonight_ctx *ctx)
+void CryptoNight::hash(const uint8_t* input, size_t size, uint8_t* output, cryptonight_ctx* ctx)
 {
-    cryptonight_hash_ctx(input, size, output, ctx);
+	cryptonight_hash_ctx(input, size, output, ctx);
 }


-bool CryptoNight::selfTest(int algo) {
-    if (cryptonight_hash_ctx == nullptr) {
-        return false;
-    }
+bool CryptoNight::selfTest(int algo)
+{
+	if(cryptonight_hash_ctx == nullptr)
+	{
+		return false;
+	}

-    char output[64];
+	char output[64];

-    struct cryptonight_ctx *ctx = (struct cryptonight_ctx*) _mm_malloc(sizeof(struct cryptonight_ctx), 16);
-    ctx->memory = (uint8_t *) _mm_malloc(MEMORY * 2, 16);
+	struct cryptonight_ctx* ctx = (struct cryptonight_ctx*) _mm_malloc(sizeof(struct cryptonight_ctx), 16);
+	ctx->memory = (uint8_t*) _mm_malloc(MEMORY * 2, 16);

-    cryptonight_hash_ctx(test_input, 76, output, ctx);
+	cryptonight_hash_ctx(test_input, 76, output, ctx);

-    _mm_free(ctx->memory);
-    _mm_free(ctx);
+	_mm_free(ctx->memory);
+	_mm_free(ctx);

 #   ifndef XMRIG_NO_AEON
-    return memcmp(output, algo == Options::ALGO_CRYPTONIGHT_LITE ? test_output1 : test_output0, (Options::i()->doubleHash() ? 64 : 32)) == 0;
+	return memcmp(output, algo == Options::ALGO_CRYPTONIGHT_LITE ? test_output1 : test_output0,
+	              (Options::i()->doubleHash() ? 64 : 32)) == 0;
 #   else
-    return memcmp(output, test_output0, (Options::i()->doubleHash() ? 64 : 32)) == 0;
+	return memcmp(output, test_output0, (Options::i()->doubleHash() ? 64 : 32)) == 0;
 #   endif
 }
--- a/src/crypto/CryptoNight.h
+++ b/src/crypto/CryptoNight.h
@ -36,10 +36,11 @@
 #define MEMORY_LITE 1048576 /* 1 MiB */


-struct cryptonight_ctx {
-    VAR_ALIGN(16, uint8_t state0[200]);
-    VAR_ALIGN(16, uint8_t state1[200]);
-    VAR_ALIGN(16, uint8_t* memory);
+struct cryptonight_ctx
+{
+	VAR_ALIGN(16, uint8_t state0[200]);
+	VAR_ALIGN(16, uint8_t state1[200]);
+	VAR_ALIGN(16, uint8_t* memory);
 };


@ -50,12 +51,12 @@ class JobResult;
 class CryptoNight
 {
 public:
-    static bool hash(const Job &job, JobResult &result, cryptonight_ctx *ctx);
-    static bool init(int algo, int variant);
-    static void hash(const uint8_t *input, size_t size, uint8_t *output, cryptonight_ctx *ctx);
+	static bool hash(const Job & job, JobResult & result, cryptonight_ctx* ctx);
+	static bool init(int algo, int variant);
+	static void hash(const uint8_t* input, size_t size, uint8_t* output, cryptonight_ctx* ctx);

 private:
-    static bool selfTest(int algo);
+	static bool selfTest(int algo);
 };

 #endif /* __CRYPTONIGHT_H__ */
--- a/src/crypto/CryptoNight_arm.h
+++ b/src/crypto/CryptoNight_arm.h
@ -47,39 +47,43 @@ extern "C"
 }


-static inline void do_blake_hash(const void* input, size_t len, char* output) {
-    blake256_hash(reinterpret_cast<uint8_t*>(output), static_cast<const uint8_t*>(input), len);
+static inline void do_blake_hash(const void* input, size_t len, char* output)
+{
+	blake256_hash(reinterpret_cast<uint8_t*>(output), static_cast<const uint8_t*>(input), len);
 }


-static inline void do_groestl_hash(const void* input, size_t len, char* output) {
-    groestl(static_cast<const uint8_t*>(input), len * 8, reinterpret_cast<uint8_t*>(output));
+static inline void do_groestl_hash(const void* input, size_t len, char* output)
+{
+	groestl(static_cast<const uint8_t*>(input), len * 8, reinterpret_cast<uint8_t*>(output));
 }


-static inline void do_jh_hash(const void* input, size_t len, char* output) {
-    jh_hash(32 * 8, static_cast<const uint8_t*>(input), 8 * len, reinterpret_cast<uint8_t*>(output));
+static inline void do_jh_hash(const void* input, size_t len, char* output)
+{
+	jh_hash(32 * 8, static_cast<const uint8_t*>(input), 8 * len, reinterpret_cast<uint8_t*>(output));
 }


-static inline void do_skein_hash(const void* input, size_t len, char* output) {
-    xmr_skein(static_cast<const uint8_t*>(input), reinterpret_cast<uint8_t*>(output));
+static inline void do_skein_hash(const void* input, size_t len, char* output)
+{
+	xmr_skein(static_cast<const uint8_t*>(input), reinterpret_cast<uint8_t*>(output));
 }


-void (* const extra_hashes[4])(const void *, size_t, char *) = {do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash};
+void (* const extra_hashes[4])(const void*, size_t, char*) = {do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash};


 static inline __attribute__((always_inline)) __m128i _mm_set_epi64x(const uint64_t a, const uint64_t b)
 {
-    return vcombine_u64(vcreate_u64(b), vcreate_u64(a));
+	return vcombine_u64(vcreate_u64(b), vcreate_u64(a));
 }


 /* this one was not implemented yet so here it is */
 static inline __attribute__((always_inline)) uint64_t _mm_cvtsi128_si64(__m128i a)
 {
-    return vgetq_lane_u64(a, 0);
+	return vgetq_lane_u64(a, 0);
 }


@ -89,34 +93,35 @@ static inline __attribute__((always_inline)) uint64_t _mm_cvtsi128_si64(__m128i
 #if defined(XMRIG_ARMv8)
 static inline uint64_t __umul128(uint64_t a, uint64_t b, uint64_t* hi)
 {
-    unsigned __int128 r = (unsigned __int128) a * (unsigned __int128) b;
-    *hi = r >> 64;
-    return (uint64_t) r;
+	unsigned __int128 r = (unsigned __int128) a * (unsigned __int128) b;
+	*hi = r >> 64;
+	return (uint64_t) r;
 }
 #else
-static inline uint64_t __umul128(uint64_t multiplier, uint64_t multiplicand, uint64_t *product_hi) {
-    // multiplier   = ab = a * 2^32 + b
-    // multiplicand = cd = c * 2^32 + d
-    // ab * cd = a * c * 2^64 + (a * d + b * c) * 2^32 + b * d
-    uint64_t a = multiplier >> 32;
-    uint64_t b = multiplier & 0xFFFFFFFF;
-    uint64_t c = multiplicand >> 32;
-    uint64_t d = multiplicand & 0xFFFFFFFF;
+static inline uint64_t __umul128(uint64_t multiplier, uint64_t multiplicand, uint64_t* product_hi)
+{
+	// multiplier   = ab = a * 2^32 + b
+	// multiplicand = cd = c * 2^32 + d
+	// ab * cd = a * c * 2^64 + (a * d + b * c) * 2^32 + b * d
+	uint64_t a = multiplier >> 32;
+	uint64_t b = multiplier & 0xFFFFFFFF;
+	uint64_t c = multiplicand >> 32;
+	uint64_t d = multiplicand & 0xFFFFFFFF;

-    //uint64_t ac = a * c;
-    uint64_t ad = a * d;
-    //uint64_t bc = b * c;
-    uint64_t bd = b * d;
+	//uint64_t ac = a * c;
+	uint64_t ad = a * d;
+	//uint64_t bc = b * c;
+	uint64_t bd = b * d;

-    uint64_t adbc = ad + (b * c);
-    uint64_t adbc_carry = adbc < ad ? 1 : 0;
+	uint64_t adbc = ad + (b * c);
+	uint64_t adbc_carry = adbc < ad ? 1 : 0;

-    // multiplier * multiplicand = product_hi * 2^64 + product_lo
-    uint64_t product_lo = bd + (adbc << 32);
-    uint64_t product_lo_carry = product_lo < bd ? 1 : 0;
-    *product_hi = (a * c) + (adbc >> 32) + (adbc_carry << 32) + product_lo_carry;
+	// multiplier * multiplicand = product_hi * 2^64 + product_lo
+	uint64_t product_lo = bd + (adbc << 32);
+	uint64_t product_lo_carry = product_lo < bd ? 1 : 0;
+	*product_hi = (a * c) + (adbc >> 32) + (adbc_carry << 32) + product_lo_carry;

-    return product_lo;
+	return product_lo;
 }
 #endif

@ -125,367 +130,386 @@ static inline uint64_t __umul128(uint64_t multiplier, uint64_t multiplicand, uin
 // sl_xor(a1 a2 a3 a4) = a1 (a2^a1) (a3^a2^a1) (a4^a3^a2^a1)
 static inline __m128i sl_xor(__m128i tmp1)
 {
-    __m128i tmp4;
-    tmp4 = _mm_slli_si128(tmp1, 0x04);
-    tmp1 = _mm_xor_si128(tmp1, tmp4);
-    tmp4 = _mm_slli_si128(tmp4, 0x04);
-    tmp1 = _mm_xor_si128(tmp1, tmp4);
-    tmp4 = _mm_slli_si128(tmp4, 0x04);
-    tmp1 = _mm_xor_si128(tmp1, tmp4);
-    return tmp1;
+	__m128i tmp4;
+	tmp4 = _mm_slli_si128(tmp1, 0x04);
+	tmp1 = _mm_xor_si128(tmp1, tmp4);
+	tmp4 = _mm_slli_si128(tmp4, 0x04);
+	tmp1 = _mm_xor_si128(tmp1, tmp4);
+	tmp4 = _mm_slli_si128(tmp4, 0x04);
+	tmp1 = _mm_xor_si128(tmp1, tmp4);
+	return tmp1;
 }


 template<uint8_t rcon>
 static inline void aes_genkey_sub(__m128i* xout0, __m128i* xout2)
 {
-//    __m128i xout1 = _mm_aeskeygenassist_si128(*xout2, rcon);
-//    xout1  = _mm_shuffle_epi32(xout1, 0xFF); // see PSHUFD, set all elems to 4th elem
-//    *xout0 = sl_xor(*xout0);
-//    *xout0 = _mm_xor_si128(*xout0, xout1);
-//    xout1  = _mm_aeskeygenassist_si128(*xout0, 0x00);
-//    xout1  = _mm_shuffle_epi32(xout1, 0xAA); // see PSHUFD, set all elems to 3rd elem
-//    *xout2 = sl_xor(*xout2);
-//    *xout2 = _mm_xor_si128(*xout2, xout1);
+	//    __m128i xout1 = _mm_aeskeygenassist_si128(*xout2, rcon);
+	//    xout1  = _mm_shuffle_epi32(xout1, 0xFF); // see PSHUFD, set all elems to 4th elem
+	//    *xout0 = sl_xor(*xout0);
+	//    *xout0 = _mm_xor_si128(*xout0, xout1);
+	//    xout1  = _mm_aeskeygenassist_si128(*xout0, 0x00);
+	//    xout1  = _mm_shuffle_epi32(xout1, 0xAA); // see PSHUFD, set all elems to 3rd elem
+	//    *xout2 = sl_xor(*xout2);
+	//    *xout2 = _mm_xor_si128(*xout2, xout1);
 }


 template<uint8_t rcon>
 static inline void soft_aes_genkey_sub(__m128i* xout0, __m128i* xout2)
 {
-    __m128i xout1 = soft_aeskeygenassist<rcon>(*xout2);
-    xout1  = _mm_shuffle_epi32(xout1, 0xFF); // see PSHUFD, set all elems to 4th elem
-    *xout0 = sl_xor(*xout0);
-    *xout0 = _mm_xor_si128(*xout0, xout1);
-    xout1  = soft_aeskeygenassist<0x00>(*xout0);
-    xout1  = _mm_shuffle_epi32(xout1, 0xAA); // see PSHUFD, set all elems to 3rd elem
-    *xout2 = sl_xor(*xout2);
-    *xout2 = _mm_xor_si128(*xout2, xout1);
+	__m128i xout1 = soft_aeskeygenassist<rcon>(*xout2);
+	xout1  = _mm_shuffle_epi32(xout1, 0xFF); // see PSHUFD, set all elems to 4th elem
+	*xout0 = sl_xor(*xout0);
+	*xout0 = _mm_xor_si128(*xout0, xout1);
+	xout1  = soft_aeskeygenassist<0x00>(*xout0);
+	xout1  = _mm_shuffle_epi32(xout1, 0xAA); // see PSHUFD, set all elems to 3rd elem
+	*xout2 = sl_xor(*xout2);
+	*xout2 = _mm_xor_si128(*xout2, xout1);
 }


 template<bool SOFT_AES>
-static inline void aes_genkey(const __m128i* memory, __m128i* k0, __m128i* k1, __m128i* k2, __m128i* k3, __m128i* k4, __m128i* k5, __m128i* k6, __m128i* k7, __m128i* k8, __m128i* k9)
+static inline void aes_genkey(const __m128i* memory, __m128i* k0, __m128i* k1, __m128i* k2, __m128i* k3,
+                              __m128i* k4, __m128i* k5, __m128i* k6, __m128i* k7, __m128i* k8, __m128i* k9)
 {
-    __m128i xout0 = _mm_load_si128(memory);
-    __m128i xout2 = _mm_load_si128(memory + 1);
-    *k0 = xout0;
-    *k1 = xout2;
+	__m128i xout0 = _mm_load_si128(memory);
+	__m128i xout2 = _mm_load_si128(memory + 1);
+	*k0 = xout0;
+	*k1 = xout2;

-    SOFT_AES ? soft_aes_genkey_sub<0x01>(&xout0, &xout2) : soft_aes_genkey_sub<0x01>(&xout0, &xout2);
-    *k2 = xout0;
-    *k3 = xout2;
+	SOFT_AES ? soft_aes_genkey_sub<0x01>(&xout0, &xout2) : soft_aes_genkey_sub<0x01>(&xout0, &xout2);
+	*k2 = xout0;
+	*k3 = xout2;

-    SOFT_AES ? soft_aes_genkey_sub<0x02>(&xout0, &xout2) : soft_aes_genkey_sub<0x02>(&xout0, &xout2);
-    *k4 = xout0;
-    *k5 = xout2;
+	SOFT_AES ? soft_aes_genkey_sub<0x02>(&xout0, &xout2) : soft_aes_genkey_sub<0x02>(&xout0, &xout2);
+	*k4 = xout0;
+	*k5 = xout2;

-    SOFT_AES ? soft_aes_genkey_sub<0x04>(&xout0, &xout2) : soft_aes_genkey_sub<0x04>(&xout0, &xout2);
-    *k6 = xout0;
-    *k7 = xout2;
+	SOFT_AES ? soft_aes_genkey_sub<0x04>(&xout0, &xout2) : soft_aes_genkey_sub<0x04>(&xout0, &xout2);
+	*k6 = xout0;
+	*k7 = xout2;

-    SOFT_AES ? soft_aes_genkey_sub<0x08>(&xout0, &xout2) : soft_aes_genkey_sub<0x08>(&xout0, &xout2);
-    *k8 = xout0;
-    *k9 = xout2;
+	SOFT_AES ? soft_aes_genkey_sub<0x08>(&xout0, &xout2) : soft_aes_genkey_sub<0x08>(&xout0, &xout2);
+	*k8 = xout0;
+	*k9 = xout2;
 }


 template<bool SOFT_AES>
-static inline void aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2, __m128i* x3, __m128i* x4, __m128i* x5, __m128i* x6, __m128i* x7)
+static inline void aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2, __m128i* x3, __m128i* x4,
+                             __m128i* x5, __m128i* x6, __m128i* x7)
 {
-    if (SOFT_AES) {
-        *x0 = soft_aesenc(*x0, key);
-        *x1 = soft_aesenc(*x1, key);
-        *x2 = soft_aesenc(*x2, key);
-        *x3 = soft_aesenc(*x3, key);
-        *x4 = soft_aesenc(*x4, key);
-        *x5 = soft_aesenc(*x5, key);
-        *x6 = soft_aesenc(*x6, key);
-        *x7 = soft_aesenc(*x7, key);
-    }
+	if(SOFT_AES)
+	{
+		*x0 = soft_aesenc(*x0, key);
+		*x1 = soft_aesenc(*x1, key);
+		*x2 = soft_aesenc(*x2, key);
+		*x3 = soft_aesenc(*x3, key);
+		*x4 = soft_aesenc(*x4, key);
+		*x5 = soft_aesenc(*x5, key);
+		*x6 = soft_aesenc(*x6, key);
+		*x7 = soft_aesenc(*x7, key);
+	}
 #   ifndef XMRIG_ARMv7
-    else {
-        *x0 = vaesmcq_u8(vaeseq_u8(*((uint8x16_t *) x0), key));
-        *x1 = vaesmcq_u8(vaeseq_u8(*((uint8x16_t *) x1), key));
-        *x2 = vaesmcq_u8(vaeseq_u8(*((uint8x16_t *) x2), key));
-        *x3 = vaesmcq_u8(vaeseq_u8(*((uint8x16_t *) x3), key));
-        *x4 = vaesmcq_u8(vaeseq_u8(*((uint8x16_t *) x4), key));
-        *x5 = vaesmcq_u8(vaeseq_u8(*((uint8x16_t *) x5), key));
-        *x6 = vaesmcq_u8(vaeseq_u8(*((uint8x16_t *) x6), key));
-        *x7 = vaesmcq_u8(vaeseq_u8(*((uint8x16_t *) x7), key));
-    }
+	else
+	{
+		*x0 = vaesmcq_u8(vaeseq_u8(*((uint8x16_t*) x0), key));
+		*x1 = vaesmcq_u8(vaeseq_u8(*((uint8x16_t*) x1), key));
+		*x2 = vaesmcq_u8(vaeseq_u8(*((uint8x16_t*) x2), key));
+		*x3 = vaesmcq_u8(vaeseq_u8(*((uint8x16_t*) x3), key));
+		*x4 = vaesmcq_u8(vaeseq_u8(*((uint8x16_t*) x4), key));
+		*x5 = vaesmcq_u8(vaeseq_u8(*((uint8x16_t*) x5), key));
+		*x6 = vaesmcq_u8(vaeseq_u8(*((uint8x16_t*) x6), key));
+		*x7 = vaesmcq_u8(vaeseq_u8(*((uint8x16_t*) x7), key));
+	}
 #   endif
 }


 template<size_t MEM, bool SOFT_AES>
-static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output)
+static inline void cn_explode_scratchpad(const __m128i* input, __m128i* output)
 {
-    __m128i xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7;
-    __m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
+	__m128i xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7;
+	__m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;

-    aes_genkey<SOFT_AES>(input, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
+	aes_genkey<SOFT_AES>(input, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);

-    xin0 = _mm_load_si128(input + 4);
-    xin1 = _mm_load_si128(input + 5);
-    xin2 = _mm_load_si128(input + 6);
-    xin3 = _mm_load_si128(input + 7);
-    xin4 = _mm_load_si128(input + 8);
-    xin5 = _mm_load_si128(input + 9);
-    xin6 = _mm_load_si128(input + 10);
-    xin7 = _mm_load_si128(input + 11);
+	xin0 = _mm_load_si128(input + 4);
+	xin1 = _mm_load_si128(input + 5);
+	xin2 = _mm_load_si128(input + 6);
+	xin3 = _mm_load_si128(input + 7);
+	xin4 = _mm_load_si128(input + 8);
+	xin5 = _mm_load_si128(input + 9);
+	xin6 = _mm_load_si128(input + 10);
+	xin7 = _mm_load_si128(input + 11);

-    for (size_t i = 0; i < MEM / sizeof(__m128i); i += 8) {
-        if (!SOFT_AES) {
-            aes_round<SOFT_AES>(_mm_setzero_si128(), &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-        }
+	for(size_t i = 0; i < MEM / sizeof(__m128i); i += 8)
+	{
+		if(!SOFT_AES)
+		{
+			aes_round<SOFT_AES>(_mm_setzero_si128(), &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+		}

-        aes_round<SOFT_AES>(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-        aes_round<SOFT_AES>(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-        aes_round<SOFT_AES>(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-        aes_round<SOFT_AES>(k3, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-        aes_round<SOFT_AES>(k4, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-        aes_round<SOFT_AES>(k5, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-        aes_round<SOFT_AES>(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-        aes_round<SOFT_AES>(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-        aes_round<SOFT_AES>(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+		aes_round<SOFT_AES>(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+		aes_round<SOFT_AES>(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+		aes_round<SOFT_AES>(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+		aes_round<SOFT_AES>(k3, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+		aes_round<SOFT_AES>(k4, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+		aes_round<SOFT_AES>(k5, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+		aes_round<SOFT_AES>(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+		aes_round<SOFT_AES>(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+		aes_round<SOFT_AES>(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);

-        if (!SOFT_AES) {
-            xin0 ^= k9;
-            xin1 ^= k9;
-            xin2 ^= k9;
-            xin3 ^= k9;
-            xin4 ^= k9;
-            xin5 ^= k9;
-            xin6 ^= k9;
-            xin7 ^= k9;
-        }
-        else {
-            aes_round<SOFT_AES>(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-        }
+		if(!SOFT_AES)
+		{
+			xin0 ^= k9;
+			xin1 ^= k9;
+			xin2 ^= k9;
+			xin3 ^= k9;
+			xin4 ^= k9;
+			xin5 ^= k9;
+			xin6 ^= k9;
+			xin7 ^= k9;
+		}
+		else
+		{
+			aes_round<SOFT_AES>(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+		}

-        _mm_store_si128(output + i + 0, xin0);
-        _mm_store_si128(output + i + 1, xin1);
-        _mm_store_si128(output + i + 2, xin2);
-        _mm_store_si128(output + i + 3, xin3);
-        _mm_store_si128(output + i + 4, xin4);
-        _mm_store_si128(output + i + 5, xin5);
-        _mm_store_si128(output + i + 6, xin6);
-        _mm_store_si128(output + i + 7, xin7);
-    }
+		_mm_store_si128(output + i + 0, xin0);
+		_mm_store_si128(output + i + 1, xin1);
+		_mm_store_si128(output + i + 2, xin2);
+		_mm_store_si128(output + i + 3, xin3);
+		_mm_store_si128(output + i + 4, xin4);
+		_mm_store_si128(output + i + 5, xin5);
+		_mm_store_si128(output + i + 6, xin6);
+		_mm_store_si128(output + i + 7, xin7);
+	}
 }


 template<size_t MEM, bool SOFT_AES>
-static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
+static inline void cn_implode_scratchpad(const __m128i* input, __m128i* output)
 {
-    __m128i xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7;
-    __m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
+	__m128i xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7;
+	__m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;

-    aes_genkey<SOFT_AES>(output + 2, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
+	aes_genkey<SOFT_AES>(output + 2, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);

-    xout0 = _mm_load_si128(output + 4);
-    xout1 = _mm_load_si128(output + 5);
-    xout2 = _mm_load_si128(output + 6);
-    xout3 = _mm_load_si128(output + 7);
-    xout4 = _mm_load_si128(output + 8);
-    xout5 = _mm_load_si128(output + 9);
-    xout6 = _mm_load_si128(output + 10);
-    xout7 = _mm_load_si128(output + 11);
+	xout0 = _mm_load_si128(output + 4);
+	xout1 = _mm_load_si128(output + 5);
+	xout2 = _mm_load_si128(output + 6);
+	xout3 = _mm_load_si128(output + 7);
+	xout4 = _mm_load_si128(output + 8);
+	xout5 = _mm_load_si128(output + 9);
+	xout6 = _mm_load_si128(output + 10);
+	xout7 = _mm_load_si128(output + 11);

-    for (size_t i = 0; i < MEM / sizeof(__m128i); i += 8)
-    {
-        xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0);
-        xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1);
-        xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2);
-        xout3 = _mm_xor_si128(_mm_load_si128(input + i + 3), xout3);
-        xout4 = _mm_xor_si128(_mm_load_si128(input + i + 4), xout4);
-        xout5 = _mm_xor_si128(_mm_load_si128(input + i + 5), xout5);
-        xout6 = _mm_xor_si128(_mm_load_si128(input + i + 6), xout6);
-        xout7 = _mm_xor_si128(_mm_load_si128(input + i + 7), xout7);
+	for(size_t i = 0; i < MEM / sizeof(__m128i); i += 8)
+	{
+		xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0);
+		xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1);
+		xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2);
+		xout3 = _mm_xor_si128(_mm_load_si128(input + i + 3), xout3);
+		xout4 = _mm_xor_si128(_mm_load_si128(input + i + 4), xout4);
+		xout5 = _mm_xor_si128(_mm_load_si128(input + i + 5), xout5);
+		xout6 = _mm_xor_si128(_mm_load_si128(input + i + 6), xout6);
+		xout7 = _mm_xor_si128(_mm_load_si128(input + i + 7), xout7);

-        if (!SOFT_AES) {
-            aes_round<SOFT_AES>(_mm_setzero_si128(), &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-        }
+		if(!SOFT_AES)
+		{
+			aes_round<SOFT_AES>(_mm_setzero_si128(), &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+		}

-        aes_round<SOFT_AES>(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-        aes_round<SOFT_AES>(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-        aes_round<SOFT_AES>(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-        aes_round<SOFT_AES>(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-        aes_round<SOFT_AES>(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-        aes_round<SOFT_AES>(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-        aes_round<SOFT_AES>(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-        aes_round<SOFT_AES>(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-        aes_round<SOFT_AES>(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+		aes_round<SOFT_AES>(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+		aes_round<SOFT_AES>(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+		aes_round<SOFT_AES>(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+		aes_round<SOFT_AES>(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+		aes_round<SOFT_AES>(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+		aes_round<SOFT_AES>(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+		aes_round<SOFT_AES>(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+		aes_round<SOFT_AES>(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+		aes_round<SOFT_AES>(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);

-        if (!SOFT_AES) {
-            xout0 ^= k9;
-            xout1 ^= k9;
-            xout2 ^= k9;
-            xout3 ^= k9;
-            xout4 ^= k9;
-            xout5 ^= k9;
-            xout6 ^= k9;
-            xout7 ^= k9;
-        }
-        else {
-            aes_round<SOFT_AES>(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-        }
-    }
+		if(!SOFT_AES)
+		{
+			xout0 ^= k9;
+			xout1 ^= k9;
+			xout2 ^= k9;
+			xout3 ^= k9;
+			xout4 ^= k9;
+			xout5 ^= k9;
+			xout6 ^= k9;
+			xout7 ^= k9;
+		}
+		else
+		{
+			aes_round<SOFT_AES>(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+		}
+	}

-    _mm_store_si128(output + 4, xout0);
-    _mm_store_si128(output + 5, xout1);
-    _mm_store_si128(output + 6, xout2);
-    _mm_store_si128(output + 7, xout3);
-    _mm_store_si128(output + 8, xout4);
-    _mm_store_si128(output + 9, xout5);
-    _mm_store_si128(output + 10, xout6);
-    _mm_store_si128(output + 11, xout7);
+	_mm_store_si128(output + 4, xout0);
+	_mm_store_si128(output + 5, xout1);
+	_mm_store_si128(output + 6, xout2);
+	_mm_store_si128(output + 7, xout3);
+	_mm_store_si128(output + 8, xout4);
+	_mm_store_si128(output + 9, xout5);
+	_mm_store_si128(output + 10, xout6);
+	_mm_store_si128(output + 11, xout7);
 }


 template<size_t ITERATIONS, size_t MEM, size_t MASK, bool SOFT_AES>
-inline void cryptonight_hash(const void *__restrict__ input, size_t size, void *__restrict__ output, cryptonight_ctx *__restrict__ ctx)
+inline void cryptonight_hash(const void* __restrict__ input, size_t size, void* __restrict__ output,
+                             cryptonight_ctx* __restrict__ ctx)
 {
-    keccak(static_cast<const uint8_t*>(input), (int) size, ctx->state0, 200);
+	keccak(static_cast<const uint8_t*>(input), (int) size, ctx->state0, 200);

-    cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) ctx->state0, (__m128i*) ctx->memory);
+	cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) ctx->state0, (__m128i*) ctx->memory);

-    const uint8_t* l0 = ctx->memory;
-    uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx->state0);
+	const uint8_t* l0 = ctx->memory;
+	uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx->state0);

-    uint64_t al0 = h0[0] ^ h0[4];
-    uint64_t ah0 = h0[1] ^ h0[5];
-    __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
+	uint64_t al0 = h0[0] ^ h0[4];
+	uint64_t ah0 = h0[1] ^ h0[5];
+	__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);

-    uint64_t idx0 = h0[0] ^ h0[4];
+	uint64_t idx0 = h0[0] ^ h0[4];

-    for (size_t i = 0; i < ITERATIONS; i++) {
-        __m128i cx = _mm_load_si128((__m128i *) &l0[idx0 & MASK]);
+	for(size_t i = 0; i < ITERATIONS; i++)
+	{
+		__m128i cx = _mm_load_si128((__m128i*) &l0[idx0 & MASK]);

-        if (SOFT_AES) {
-            cx = soft_aesenc(cx, _mm_set_epi64x(ah0, al0));
-        }
-        else {
+		if(SOFT_AES)
+		{
+			cx = soft_aesenc(cx, _mm_set_epi64x(ah0, al0));
+		}
+		else
+		{
 #           ifndef XMRIG_ARMv7
-            cx = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah0, al0);
+			cx = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah0, al0);
 #           endif
-        }
+		}

-        _mm_store_si128((__m128i *) &l0[idx0 & MASK], _mm_xor_si128(bx0, cx));
-        idx0 = EXTRACT64(cx);
-        bx0 = cx;
+		_mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx0, cx));
+		idx0 = EXTRACT64(cx);
+		bx0 = cx;

-        uint64_t hi, lo, cl, ch;
-        cl = ((uint64_t*) &l0[idx0 & MASK])[0];
-        ch = ((uint64_t*) &l0[idx0 & MASK])[1];
-        lo = __umul128(idx0, cl, &hi);
+		uint64_t hi, lo, cl, ch;
+		cl = ((uint64_t*) &l0[idx0 & MASK])[0];
+		ch = ((uint64_t*) &l0[idx0 & MASK])[1];
+		lo = __umul128(idx0, cl, &hi);

-        al0 += hi;
-        ah0 += lo;
+		al0 += hi;
+		ah0 += lo;

-        ((uint64_t*)&l0[idx0 & MASK])[0] = al0;
-        ((uint64_t*)&l0[idx0 & MASK])[1] = ah0;
+		((uint64_t*)&l0[idx0 & MASK])[0] = al0;
+		((uint64_t*)&l0[idx0 & MASK])[1] = ah0;

-        ah0 ^= ch;
-        al0 ^= cl;
-        idx0 = al0;
-    }
+		ah0 ^= ch;
+		al0 ^= cl;
+		idx0 = al0;
+	}

-    cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) ctx->memory, (__m128i*) ctx->state0);
+	cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) ctx->memory, (__m128i*) ctx->state0);

-    keccakf(h0, 24);
-    extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, static_cast<char*>(output));
+	keccakf(h0, 24);
+	extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, static_cast<char*>(output));
 }


 template<size_t ITERATIONS, size_t MEM, size_t MASK, bool SOFT_AES>
-inline void cryptonight_double_hash(const void *__restrict__ input, size_t size, void *__restrict__ output, struct cryptonight_ctx *__restrict__ ctx)
+inline void cryptonight_double_hash(const void* __restrict__ input, size_t size, void* __restrict__ output,
+                                    struct cryptonight_ctx* __restrict__ ctx)
 {
-    keccak((const uint8_t *) input,        (int) size, ctx->state0, 200);
-    keccak((const uint8_t *) input + size, (int) size, ctx->state1, 200);
+	keccak((const uint8_t*) input, (int) size, ctx->state0, 200);
+	keccak((const uint8_t*) input + size, (int) size, ctx->state1, 200);

-    const uint8_t* l0 = ctx->memory;
-    const uint8_t* l1 = ctx->memory + MEM;
-    uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx->state0);
-    uint64_t* h1 = reinterpret_cast<uint64_t*>(ctx->state1);
+	const uint8_t* l0 = ctx->memory;
+	const uint8_t* l1 = ctx->memory + MEM;
+	uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx->state0);
+	uint64_t* h1 = reinterpret_cast<uint64_t*>(ctx->state1);

-    cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h0, (__m128i*) l0);
-    cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h1, (__m128i*) l1);
+	cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h0, (__m128i*) l0);
+	cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h1, (__m128i*) l1);

-    uint64_t al0 = h0[0] ^ h0[4];
-    uint64_t al1 = h1[0] ^ h1[4];
-    uint64_t ah0 = h0[1] ^ h0[5];
-    uint64_t ah1 = h1[1] ^ h1[5];
+	uint64_t al0 = h0[0] ^ h0[4];
+	uint64_t al1 = h1[0] ^ h1[4];
+	uint64_t ah0 = h0[1] ^ h0[5];
+	uint64_t ah1 = h1[1] ^ h1[5];

-    __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
-    __m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
+	__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
+	__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);

-    uint64_t idx0 = h0[0] ^ h0[4];
-    uint64_t idx1 = h1[0] ^ h1[4];
+	uint64_t idx0 = h0[0] ^ h0[4];
+	uint64_t idx1 = h1[0] ^ h1[4];

-    for (size_t i = 0; i < ITERATIONS; i++) {
-        __m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & MASK]);
-        __m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & MASK]);
+	for(size_t i = 0; i < ITERATIONS; i++)
+	{
+		__m128i cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]);
+		__m128i cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]);

-        if (SOFT_AES) {
-            cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0));
-            cx1 = soft_aesenc(cx1, _mm_set_epi64x(ah1, al1));
-        }
-        else {
+		if(SOFT_AES)
+		{
+			cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0));
+			cx1 = soft_aesenc(cx1, _mm_set_epi64x(ah1, al1));
+		}
+		else
+		{
 #           ifndef XMRIG_ARMv7
-            cx0 = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx0, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah0, al0);
-            cx1 = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx1, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah1, al1);
+			cx0 = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx0, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah0, al0);
+			cx1 = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx1, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah1, al1);
 #           endif
-        }
+		}

-        _mm_store_si128((__m128i *) &l0[idx0 & MASK], _mm_xor_si128(bx0, cx0));
-        _mm_store_si128((__m128i *) &l1[idx1 & MASK], _mm_xor_si128(bx1, cx1));
+		_mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx0, cx0));
+		_mm_store_si128((__m128i*) &l1[idx1 & MASK], _mm_xor_si128(bx1, cx1));

-        idx0 = EXTRACT64(cx0);
-        idx1 = EXTRACT64(cx1);
+		idx0 = EXTRACT64(cx0);
+		idx1 = EXTRACT64(cx1);

-        bx0 = cx0;
-        bx1 = cx1;
+		bx0 = cx0;
+		bx1 = cx1;

-        uint64_t hi, lo, cl, ch;
-        cl = ((uint64_t*) &l0[idx0 & MASK])[0];
-        ch = ((uint64_t*) &l0[idx0 & MASK])[1];
-        lo = __umul128(idx0, cl, &hi);
+		uint64_t hi, lo, cl, ch;
+		cl = ((uint64_t*) &l0[idx0 & MASK])[0];
+		ch = ((uint64_t*) &l0[idx0 & MASK])[1];
+		lo = __umul128(idx0, cl, &hi);

-        al0 += hi;
-        ah0 += lo;
+		al0 += hi;
+		ah0 += lo;

-        ((uint64_t*) &l0[idx0 & MASK])[0] = al0;
-        ((uint64_t*) &l0[idx0 & MASK])[1] = ah0;
+		((uint64_t*) &l0[idx0 & MASK])[0] = al0;
+		((uint64_t*) &l0[idx0 & MASK])[1] = ah0;

-        ah0 ^= ch;
-        al0 ^= cl;
-        idx0 = al0;
+		ah0 ^= ch;
+		al0 ^= cl;
+		idx0 = al0;

-        cl = ((uint64_t*) &l1[idx1 & MASK])[0];
-        ch = ((uint64_t*) &l1[idx1 & MASK])[1];
-        lo = __umul128(idx1, cl, &hi);
+		cl = ((uint64_t*) &l1[idx1 & MASK])[0];
+		ch = ((uint64_t*) &l1[idx1 & MASK])[1];
+		lo = __umul128(idx1, cl, &hi);

-        al1 += hi;
-        ah1 += lo;
+		al1 += hi;
+		ah1 += lo;

-        ((uint64_t*) &l1[idx1 & MASK])[0] = al1;
-        ((uint64_t*) &l1[idx1 & MASK])[1] = ah1;
+		((uint64_t*) &l1[idx1 & MASK])[0] = al1;
+		((uint64_t*) &l1[idx1 & MASK])[1] = ah1;

-        ah1 ^= ch;
-        al1 ^= cl;
-        idx1 = al1;
-    }
+		ah1 ^= ch;
+		al1 ^= cl;
+		idx1 = al1;
+	}

-    cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l0, (__m128i*) h0);
-    cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l1, (__m128i*) h1);
+	cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l0, (__m128i*) h0);
+	cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l1, (__m128i*) h1);

-    keccakf(h0, 24);
-    keccakf(h1, 24);
+	keccakf(h0, 24);
+	keccakf(h1, 24);

-    extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, static_cast<char*>(output));
-    extra_hashes[ctx->state1[0] & 3](ctx->state1, 200, static_cast<char*>(output) + 32);
+	extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, static_cast<char*>(output));
+	extra_hashes[ctx->state1[0] & 3](ctx->state1, 200, static_cast<char*>(output) + 32);
 }

 #endif /* __CRYPTONIGHT_ARM_H__ */
--- a/src/crypto/CryptoNight_test.h
+++ b/src/crypto/CryptoNight_test.h
@ -25,34 +25,37 @@
 #define __CRYPTONIGHT_TEST_H__


-const static uint8_t test_input[152] = {
-    0x01, 0x00, 0xFB, 0x8E, 0x8A, 0xC8, 0x05, 0x89, 0x93, 0x23, 0x37, 0x1B, 0xB7, 0x90, 0xDB, 0x19,
-    0x21, 0x8A, 0xFD, 0x8D, 0xB8, 0xE3, 0x75, 0x5D, 0x8B, 0x90, 0xF3, 0x9B, 0x3D, 0x55, 0x06, 0xA9,
-    0xAB, 0xCE, 0x4F, 0xA9, 0x12, 0x24, 0x45, 0x00, 0x00, 0x00, 0x00, 0xEE, 0x81, 0x46, 0xD4, 0x9F,
-    0xA9, 0x3E, 0xE7, 0x24, 0xDE, 0xB5, 0x7D, 0x12, 0xCB, 0xC6, 0xC6, 0xF3, 0xB9, 0x24, 0xD9, 0x46,
-    0x12, 0x7C, 0x7A, 0x97, 0x41, 0x8F, 0x93, 0x48, 0x82, 0x8F, 0x0F, 0x02,
-    0x03, 0x05, 0xA0, 0xDB, 0xD6, 0xBF, 0x05, 0xCF, 0x16, 0xE5, 0x03, 0xF3, 0xA6, 0x6F, 0x78, 0x00,
-    0x7C, 0xBF, 0x34, 0x14, 0x43, 0x32, 0xEC, 0xBF, 0xC2, 0x2E, 0xD9, 0x5C, 0x87, 0x00, 0x38, 0x3B,
-    0x30, 0x9A, 0xCE, 0x19, 0x23, 0xA0, 0x96, 0x4B, 0x00, 0x00, 0x00, 0x08, 0xBA, 0x93, 0x9A, 0x62,
-    0x72, 0x4C, 0x0D, 0x75, 0x81, 0xFC, 0xE5, 0x76, 0x1E, 0x9D, 0x8A, 0x0E, 0x6A, 0x1C, 0x3F, 0x92,
-    0x4F, 0xDD, 0x84, 0x93, 0xD1, 0x11, 0x56, 0x49, 0xC0, 0x5E, 0xB6, 0x01
+const static uint8_t test_input[152] =
+{
+	0x01, 0x00, 0xFB, 0x8E, 0x8A, 0xC8, 0x05, 0x89, 0x93, 0x23, 0x37, 0x1B, 0xB7, 0x90, 0xDB, 0x19,
+	0x21, 0x8A, 0xFD, 0x8D, 0xB8, 0xE3, 0x75, 0x5D, 0x8B, 0x90, 0xF3, 0x9B, 0x3D, 0x55, 0x06, 0xA9,
+	0xAB, 0xCE, 0x4F, 0xA9, 0x12, 0x24, 0x45, 0x00, 0x00, 0x00, 0x00, 0xEE, 0x81, 0x46, 0xD4, 0x9F,
+	0xA9, 0x3E, 0xE7, 0x24, 0xDE, 0xB5, 0x7D, 0x12, 0xCB, 0xC6, 0xC6, 0xF3, 0xB9, 0x24, 0xD9, 0x46,
+	0x12, 0x7C, 0x7A, 0x97, 0x41, 0x8F, 0x93, 0x48, 0x82, 0x8F, 0x0F, 0x02,
+	0x03, 0x05, 0xA0, 0xDB, 0xD6, 0xBF, 0x05, 0xCF, 0x16, 0xE5, 0x03, 0xF3, 0xA6, 0x6F, 0x78, 0x00,
+	0x7C, 0xBF, 0x34, 0x14, 0x43, 0x32, 0xEC, 0xBF, 0xC2, 0x2E, 0xD9, 0x5C, 0x87, 0x00, 0x38, 0x3B,
+	0x30, 0x9A, 0xCE, 0x19, 0x23, 0xA0, 0x96, 0x4B, 0x00, 0x00, 0x00, 0x08, 0xBA, 0x93, 0x9A, 0x62,
+	0x72, 0x4C, 0x0D, 0x75, 0x81, 0xFC, 0xE5, 0x76, 0x1E, 0x9D, 0x8A, 0x0E, 0x6A, 0x1C, 0x3F, 0x92,
+	0x4F, 0xDD, 0x84, 0x93, 0xD1, 0x11, 0x56, 0x49, 0xC0, 0x5E, 0xB6, 0x01
 };


-const static uint8_t test_output0[64] = {
-    0x1B, 0x60, 0x6A, 0x3F, 0x4A, 0x07, 0xD6, 0x48, 0x9A, 0x1B, 0xCD, 0x07, 0x69, 0x7B, 0xD1, 0x66,
-    0x96, 0xB6, 0x1C, 0x8A, 0xE9, 0x82, 0xF6, 0x1A, 0x90, 0x16, 0x0F, 0x4E, 0x52, 0x82, 0x8A, 0x7F,
-    0x1A, 0x3F, 0xFB, 0xEE, 0x90, 0x9B, 0x42, 0x0D, 0x91, 0xF7, 0xBE, 0x6E, 0x5F, 0xB5, 0x6D, 0xB7,
-    0x1B, 0x31, 0x10, 0xD8, 0x86, 0x01, 0x1E, 0x87, 0x7E, 0xE5, 0x78, 0x6A, 0xFD, 0x08, 0x01, 0x00
+const static uint8_t test_output0[64] =
+{
+	0x1B, 0x60, 0x6A, 0x3F, 0x4A, 0x07, 0xD6, 0x48, 0x9A, 0x1B, 0xCD, 0x07, 0x69, 0x7B, 0xD1, 0x66,
+	0x96, 0xB6, 0x1C, 0x8A, 0xE9, 0x82, 0xF6, 0x1A, 0x90, 0x16, 0x0F, 0x4E, 0x52, 0x82, 0x8A, 0x7F,
+	0x1A, 0x3F, 0xFB, 0xEE, 0x90, 0x9B, 0x42, 0x0D, 0x91, 0xF7, 0xBE, 0x6E, 0x5F, 0xB5, 0x6D, 0xB7,
+	0x1B, 0x31, 0x10, 0xD8, 0x86, 0x01, 0x1E, 0x87, 0x7E, 0xE5, 0x78, 0x6A, 0xFD, 0x08, 0x01, 0x00
 };


 #ifndef XMRIG_NO_AEON
-const static uint8_t test_output1[64] = {
-    0x28, 0xA2, 0x2B, 0xAD, 0x3F, 0x93, 0xD1, 0x40, 0x8F, 0xCA, 0x47, 0x2E, 0xB5, 0xAD, 0x1C, 0xBE,
-    0x75, 0xF2, 0x1D, 0x05, 0x3C, 0x8C, 0xE5, 0xB3, 0xAF, 0x10, 0x5A, 0x57, 0x71, 0x3E, 0x21, 0xDD,
-    0x36, 0x95, 0xB4, 0xB5, 0x3B, 0xB0, 0x03, 0x58, 0xB0, 0xAD, 0x38, 0xDC, 0x16, 0x0F, 0xEB, 0x9E,
-    0x00, 0x4E, 0xEC, 0xE0, 0x9B, 0x83, 0xA7, 0x2E, 0xF6, 0xBA, 0x98, 0x64, 0xD3, 0x51, 0x0C, 0x88,
+const static uint8_t test_output1[64] =
+{
+	0x28, 0xA2, 0x2B, 0xAD, 0x3F, 0x93, 0xD1, 0x40, 0x8F, 0xCA, 0x47, 0x2E, 0xB5, 0xAD, 0x1C, 0xBE,
+	0x75, 0xF2, 0x1D, 0x05, 0x3C, 0x8C, 0xE5, 0xB3, 0xAF, 0x10, 0x5A, 0x57, 0x71, 0x3E, 0x21, 0xDD,
+	0x36, 0x95, 0xB4, 0xB5, 0x3B, 0xB0, 0x03, 0x58, 0xB0, 0xAD, 0x38, 0xDC, 0x16, 0x0F, 0xEB, 0x9E,
+	0x00, 0x4E, 0xEC, 0xE0, 0x9B, 0x83, 0xA7, 0x2E, 0xF6, 0xBA, 0x98, 0x64, 0xD3, 0x51, 0x0C, 0x88,
 };
 #endif

--- a/src/crypto/CryptoNight_x86.h
+++ b/src/crypto/CryptoNight_x86.h
@ -47,27 +47,31 @@ extern "C"
 }


-static inline void do_blake_hash(const void* input, size_t len, char* output) {
-    blake256_hash(reinterpret_cast<uint8_t*>(output), static_cast<const uint8_t*>(input), len);
+static inline void do_blake_hash(const void* input, size_t len, char* output)
+{
+	blake256_hash(reinterpret_cast<uint8_t*>(output), static_cast<const uint8_t*>(input), len);
 }


-static inline void do_groestl_hash(const void* input, size_t len, char* output) {
-    groestl(static_cast<const uint8_t*>(input), len * 8, reinterpret_cast<uint8_t*>(output));
+static inline void do_groestl_hash(const void* input, size_t len, char* output)
+{
+	groestl(static_cast<const uint8_t*>(input), len * 8, reinterpret_cast<uint8_t*>(output));
 }


-static inline void do_jh_hash(const void* input, size_t len, char* output) {
-    jh_hash(32 * 8, static_cast<const uint8_t*>(input), 8 * len, reinterpret_cast<uint8_t*>(output));
+static inline void do_jh_hash(const void* input, size_t len, char* output)
+{
+	jh_hash(32 * 8, static_cast<const uint8_t*>(input), 8 * len, reinterpret_cast<uint8_t*>(output));
 }


-static inline void do_skein_hash(const void* input, size_t len, char* output) {
-    xmr_skein(static_cast<const uint8_t*>(input), reinterpret_cast<uint8_t*>(output));
+static inline void do_skein_hash(const void* input, size_t len, char* output)
+{
+	xmr_skein(static_cast<const uint8_t*>(input), reinterpret_cast<uint8_t*>(output));
 }


-void (* const extra_hashes[4])(const void *, size_t, char *) = {do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash};
+void (* const extra_hashes[4])(const void*, size_t, char*) = {do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash};



@ -77,45 +81,46 @@ void (* const extra_hashes[4])(const void *, size_t, char *) = {do_blake_hash, d
 #   ifdef __GNUC__
 static inline uint64_t __umul128(uint64_t a, uint64_t b, uint64_t* hi)
 {
-    unsigned __int128 r = (unsigned __int128) a * (unsigned __int128) b;
-    *hi = r >> 64;
-    return (uint64_t) r;
+	unsigned __int128 r = (unsigned __int128) a * (unsigned __int128) b;
+	*hi = r >> 64;
+	return (uint64_t) r;
 }
 #   else
-    #define __umul128 _umul128
+#define __umul128 _umul128
 #   endif
 #elif defined(__i386__) || defined(_M_IX86)
 #   define HI32(X) \
-    _mm_srli_si128((X), 4)
+	_mm_srli_si128((X), 4)


 #   define EXTRACT64(X) \
-    ((uint64_t)(uint32_t)_mm_cvtsi128_si32(X) | \
-    ((uint64_t)(uint32_t)_mm_cvtsi128_si32(HI32(X)) << 32))
+	((uint64_t)(uint32_t)_mm_cvtsi128_si32(X) | \
+	 ((uint64_t)(uint32_t)_mm_cvtsi128_si32(HI32(X)) << 32))

-static inline uint64_t __umul128(uint64_t multiplier, uint64_t multiplicand, uint64_t *product_hi) {
-    // multiplier   = ab = a * 2^32 + b
-    // multiplicand = cd = c * 2^32 + d
-    // ab * cd = a * c * 2^64 + (a * d + b * c) * 2^32 + b * d
-    uint64_t a = multiplier >> 32;
-    uint64_t b = multiplier & 0xFFFFFFFF;
-    uint64_t c = multiplicand >> 32;
-    uint64_t d = multiplicand & 0xFFFFFFFF;
+static inline uint64_t __umul128(uint64_t multiplier, uint64_t multiplicand, uint64_t* product_hi)
+{
+	// multiplier   = ab = a * 2^32 + b
+	// multiplicand = cd = c * 2^32 + d
+	// ab * cd = a * c * 2^64 + (a * d + b * c) * 2^32 + b * d
+	uint64_t a = multiplier >> 32;
+	uint64_t b = multiplier & 0xFFFFFFFF;
+	uint64_t c = multiplicand >> 32;
+	uint64_t d = multiplicand & 0xFFFFFFFF;

-    //uint64_t ac = a * c;
-    uint64_t ad = a * d;
-    //uint64_t bc = b * c;
-    uint64_t bd = b * d;
+	//uint64_t ac = a * c;
+	uint64_t ad = a * d;
+	//uint64_t bc = b * c;
+	uint64_t bd = b * d;

-    uint64_t adbc = ad + (b * c);
-    uint64_t adbc_carry = adbc < ad ? 1 : 0;
+	uint64_t adbc = ad + (b * c);
+	uint64_t adbc_carry = adbc < ad ? 1 : 0;

-    // multiplier * multiplicand = product_hi * 2^64 + product_lo
-    uint64_t product_lo = bd + (adbc << 32);
-    uint64_t product_lo_carry = product_lo < bd ? 1 : 0;
-    *product_hi = (a * c) + (adbc >> 32) + (adbc_carry << 32) + product_lo_carry;
+	// multiplier * multiplicand = product_hi * 2^64 + product_lo
+	uint64_t product_lo = bd + (adbc << 32);
+	uint64_t product_lo_carry = product_lo < bd ? 1 : 0;
+	*product_hi = (a * c) + (adbc >> 32) + (adbc_carry << 32) + product_lo_carry;

-    return product_lo;
+	return product_lo;
 }
 #endif

@ -124,328 +129,341 @@ static inline uint64_t __umul128(uint64_t multiplier, uint64_t multiplicand, uin
 // sl_xor(a1 a2 a3 a4) = a1 (a2^a1) (a3^a2^a1) (a4^a3^a2^a1)
 static inline __m128i sl_xor(__m128i tmp1)
 {
-    __m128i tmp4;
-    tmp4 = _mm_slli_si128(tmp1, 0x04);
-    tmp1 = _mm_xor_si128(tmp1, tmp4);
-    tmp4 = _mm_slli_si128(tmp4, 0x04);
-    tmp1 = _mm_xor_si128(tmp1, tmp4);
-    tmp4 = _mm_slli_si128(tmp4, 0x04);
-    tmp1 = _mm_xor_si128(tmp1, tmp4);
-    return tmp1;
+	__m128i tmp4;
+	tmp4 = _mm_slli_si128(tmp1, 0x04);
+	tmp1 = _mm_xor_si128(tmp1, tmp4);
+	tmp4 = _mm_slli_si128(tmp4, 0x04);
+	tmp1 = _mm_xor_si128(tmp1, tmp4);
+	tmp4 = _mm_slli_si128(tmp4, 0x04);
+	tmp1 = _mm_xor_si128(tmp1, tmp4);
+	return tmp1;
 }


 template<uint8_t rcon>
 static inline void aes_genkey_sub(__m128i* xout0, __m128i* xout2)
 {
-    __m128i xout1 = _mm_aeskeygenassist_si128(*xout2, rcon);
-    xout1  = _mm_shuffle_epi32(xout1, 0xFF); // see PSHUFD, set all elems to 4th elem
-    *xout0 = sl_xor(*xout0);
-    *xout0 = _mm_xor_si128(*xout0, xout1);
-    xout1  = _mm_aeskeygenassist_si128(*xout0, 0x00);
-    xout1  = _mm_shuffle_epi32(xout1, 0xAA); // see PSHUFD, set all elems to 3rd elem
-    *xout2 = sl_xor(*xout2);
-    *xout2 = _mm_xor_si128(*xout2, xout1);
+	__m128i xout1 = _mm_aeskeygenassist_si128(*xout2, rcon);
+	xout1  = _mm_shuffle_epi32(xout1, 0xFF); // see PSHUFD, set all elems to 4th elem
+	*xout0 = sl_xor(*xout0);
+	*xout0 = _mm_xor_si128(*xout0, xout1);
+	xout1  = _mm_aeskeygenassist_si128(*xout0, 0x00);
+	xout1  = _mm_shuffle_epi32(xout1, 0xAA); // see PSHUFD, set all elems to 3rd elem
+	*xout2 = sl_xor(*xout2);
+	*xout2 = _mm_xor_si128(*xout2, xout1);
 }


 template<uint8_t rcon>
 static inline void soft_aes_genkey_sub(__m128i* xout0, __m128i* xout2)
 {
-    __m128i xout1 = soft_aeskeygenassist<rcon>(*xout2);
-    xout1  = _mm_shuffle_epi32(xout1, 0xFF); // see PSHUFD, set all elems to 4th elem
-    *xout0 = sl_xor(*xout0);
-    *xout0 = _mm_xor_si128(*xout0, xout1);
-    xout1  = soft_aeskeygenassist<0x00>(*xout0);
-    xout1  = _mm_shuffle_epi32(xout1, 0xAA); // see PSHUFD, set all elems to 3rd elem
-    *xout2 = sl_xor(*xout2);
-    *xout2 = _mm_xor_si128(*xout2, xout1);
+	__m128i xout1 = soft_aeskeygenassist<rcon>(*xout2);
+	xout1  = _mm_shuffle_epi32(xout1, 0xFF); // see PSHUFD, set all elems to 4th elem
+	*xout0 = sl_xor(*xout0);
+	*xout0 = _mm_xor_si128(*xout0, xout1);
+	xout1  = soft_aeskeygenassist<0x00>(*xout0);
+	xout1  = _mm_shuffle_epi32(xout1, 0xAA); // see PSHUFD, set all elems to 3rd elem
+	*xout2 = sl_xor(*xout2);
+	*xout2 = _mm_xor_si128(*xout2, xout1);
 }


 template<bool SOFT_AES>
-static inline void aes_genkey(const __m128i* memory, __m128i* k0, __m128i* k1, __m128i* k2, __m128i* k3, __m128i* k4, __m128i* k5, __m128i* k6, __m128i* k7, __m128i* k8, __m128i* k9)
+static inline void aes_genkey(const __m128i* memory, __m128i* k0, __m128i* k1, __m128i* k2, __m128i* k3,
+                              __m128i* k4, __m128i* k5, __m128i* k6, __m128i* k7, __m128i* k8, __m128i* k9)
 {
-    __m128i xout0 = _mm_load_si128(memory);
-    __m128i xout2 = _mm_load_si128(memory + 1);
-    *k0 = xout0;
-    *k1 = xout2;
+	__m128i xout0 = _mm_load_si128(memory);
+	__m128i xout2 = _mm_load_si128(memory + 1);
+	*k0 = xout0;
+	*k1 = xout2;

-    SOFT_AES ? soft_aes_genkey_sub<0x01>(&xout0, &xout2) : aes_genkey_sub<0x01>(&xout0, &xout2);
-    *k2 = xout0;
-    *k3 = xout2;
+	SOFT_AES ? soft_aes_genkey_sub<0x01>(&xout0, &xout2) : aes_genkey_sub<0x01>(&xout0, &xout2);
+	*k2 = xout0;
+	*k3 = xout2;

-    SOFT_AES ? soft_aes_genkey_sub<0x02>(&xout0, &xout2) : aes_genkey_sub<0x02>(&xout0, &xout2);
-    *k4 = xout0;
-    *k5 = xout2;
+	SOFT_AES ? soft_aes_genkey_sub<0x02>(&xout0, &xout2) : aes_genkey_sub<0x02>(&xout0, &xout2);
+	*k4 = xout0;
+	*k5 = xout2;

-    SOFT_AES ? soft_aes_genkey_sub<0x04>(&xout0, &xout2) : aes_genkey_sub<0x04>(&xout0, &xout2);
-    *k6 = xout0;
-    *k7 = xout2;
+	SOFT_AES ? soft_aes_genkey_sub<0x04>(&xout0, &xout2) : aes_genkey_sub<0x04>(&xout0, &xout2);
+	*k6 = xout0;
+	*k7 = xout2;

-    SOFT_AES ? soft_aes_genkey_sub<0x08>(&xout0, &xout2) : aes_genkey_sub<0x08>(&xout0, &xout2);
-    *k8 = xout0;
-    *k9 = xout2;
+	SOFT_AES ? soft_aes_genkey_sub<0x08>(&xout0, &xout2) : aes_genkey_sub<0x08>(&xout0, &xout2);
+	*k8 = xout0;
+	*k9 = xout2;
 }


 template<bool SOFT_AES>
-static inline void aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2, __m128i* x3, __m128i* x4, __m128i* x5, __m128i* x6, __m128i* x7)
+static inline void aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2, __m128i* x3, __m128i* x4,
+                             __m128i* x5, __m128i* x6, __m128i* x7)
 {
-    if (SOFT_AES) {
-        *x0 = soft_aesenc(*x0, key);
-        *x1 = soft_aesenc(*x1, key);
-        *x2 = soft_aesenc(*x2, key);
-        *x3 = soft_aesenc(*x3, key);
-        *x4 = soft_aesenc(*x4, key);
-        *x5 = soft_aesenc(*x5, key);
-        *x6 = soft_aesenc(*x6, key);
-        *x7 = soft_aesenc(*x7, key);
-    }
-    else {
-        *x0 = _mm_aesenc_si128(*x0, key);
-        *x1 = _mm_aesenc_si128(*x1, key);
-        *x2 = _mm_aesenc_si128(*x2, key);
-        *x3 = _mm_aesenc_si128(*x3, key);
-        *x4 = _mm_aesenc_si128(*x4, key);
-        *x5 = _mm_aesenc_si128(*x5, key);
-        *x6 = _mm_aesenc_si128(*x6, key);
-        *x7 = _mm_aesenc_si128(*x7, key);
-    }
+	if(SOFT_AES)
+	{
+		*x0 = soft_aesenc(*x0, key);
+		*x1 = soft_aesenc(*x1, key);
+		*x2 = soft_aesenc(*x2, key);
+		*x3 = soft_aesenc(*x3, key);
+		*x4 = soft_aesenc(*x4, key);
+		*x5 = soft_aesenc(*x5, key);
+		*x6 = soft_aesenc(*x6, key);
+		*x7 = soft_aesenc(*x7, key);
+	}
+	else
+	{
+		*x0 = _mm_aesenc_si128(*x0, key);
+		*x1 = _mm_aesenc_si128(*x1, key);
+		*x2 = _mm_aesenc_si128(*x2, key);
+		*x3 = _mm_aesenc_si128(*x3, key);
+		*x4 = _mm_aesenc_si128(*x4, key);
+		*x5 = _mm_aesenc_si128(*x5, key);
+		*x6 = _mm_aesenc_si128(*x6, key);
+		*x7 = _mm_aesenc_si128(*x7, key);
+	}
 }


 template<size_t MEM, bool SOFT_AES>
-static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output)
+static inline void cn_explode_scratchpad(const __m128i* input, __m128i* output)
 {
-    __m128i xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7;
-    __m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
+	__m128i xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7;
+	__m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;

-    aes_genkey<SOFT_AES>(input, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
+	aes_genkey<SOFT_AES>(input, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);

-    xin0 = _mm_load_si128(input + 4);
-    xin1 = _mm_load_si128(input + 5);
-    xin2 = _mm_load_si128(input + 6);
-    xin3 = _mm_load_si128(input + 7);
-    xin4 = _mm_load_si128(input + 8);
-    xin5 = _mm_load_si128(input + 9);
-    xin6 = _mm_load_si128(input + 10);
-    xin7 = _mm_load_si128(input + 11);
+	xin0 = _mm_load_si128(input + 4);
+	xin1 = _mm_load_si128(input + 5);
+	xin2 = _mm_load_si128(input + 6);
+	xin3 = _mm_load_si128(input + 7);
+	xin4 = _mm_load_si128(input + 8);
+	xin5 = _mm_load_si128(input + 9);
+	xin6 = _mm_load_si128(input + 10);
+	xin7 = _mm_load_si128(input + 11);

-    for (size_t i = 0; i < MEM / sizeof(__m128i); i += 8) {
-        aes_round<SOFT_AES>(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-        aes_round<SOFT_AES>(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-        aes_round<SOFT_AES>(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-        aes_round<SOFT_AES>(k3, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-        aes_round<SOFT_AES>(k4, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-        aes_round<SOFT_AES>(k5, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-        aes_round<SOFT_AES>(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-        aes_round<SOFT_AES>(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-        aes_round<SOFT_AES>(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
-        aes_round<SOFT_AES>(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+	for(size_t i = 0; i < MEM / sizeof(__m128i); i += 8)
+	{
+		aes_round<SOFT_AES>(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+		aes_round<SOFT_AES>(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+		aes_round<SOFT_AES>(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+		aes_round<SOFT_AES>(k3, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+		aes_round<SOFT_AES>(k4, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+		aes_round<SOFT_AES>(k5, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+		aes_round<SOFT_AES>(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+		aes_round<SOFT_AES>(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+		aes_round<SOFT_AES>(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
+		aes_round<SOFT_AES>(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);

-        _mm_store_si128(output + i + 0, xin0);
-        _mm_store_si128(output + i + 1, xin1);
-        _mm_store_si128(output + i + 2, xin2);
-        _mm_store_si128(output + i + 3, xin3);
-        _mm_store_si128(output + i + 4, xin4);
-        _mm_store_si128(output + i + 5, xin5);
-        _mm_store_si128(output + i + 6, xin6);
-        _mm_store_si128(output + i + 7, xin7);
-    }
+		_mm_store_si128(output + i + 0, xin0);
+		_mm_store_si128(output + i + 1, xin1);
+		_mm_store_si128(output + i + 2, xin2);
+		_mm_store_si128(output + i + 3, xin3);
+		_mm_store_si128(output + i + 4, xin4);
+		_mm_store_si128(output + i + 5, xin5);
+		_mm_store_si128(output + i + 6, xin6);
+		_mm_store_si128(output + i + 7, xin7);
+	}
 }


 template<size_t MEM, bool SOFT_AES>
-static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
+static inline void cn_implode_scratchpad(const __m128i* input, __m128i* output)
 {
-    __m128i xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7;
-    __m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
+	__m128i xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7;
+	__m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;

-    aes_genkey<SOFT_AES>(output + 2, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
+	aes_genkey<SOFT_AES>(output + 2, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);

-    xout0 = _mm_load_si128(output + 4);
-    xout1 = _mm_load_si128(output + 5);
-    xout2 = _mm_load_si128(output + 6);
-    xout3 = _mm_load_si128(output + 7);
-    xout4 = _mm_load_si128(output + 8);
-    xout5 = _mm_load_si128(output + 9);
-    xout6 = _mm_load_si128(output + 10);
-    xout7 = _mm_load_si128(output + 11);
+	xout0 = _mm_load_si128(output + 4);
+	xout1 = _mm_load_si128(output + 5);
+	xout2 = _mm_load_si128(output + 6);
+	xout3 = _mm_load_si128(output + 7);
+	xout4 = _mm_load_si128(output + 8);
+	xout5 = _mm_load_si128(output + 9);
+	xout6 = _mm_load_si128(output + 10);
+	xout7 = _mm_load_si128(output + 11);

-    for (size_t i = 0; i < MEM / sizeof(__m128i); i += 8)
-    {
-        xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0);
-        xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1);
-        xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2);
-        xout3 = _mm_xor_si128(_mm_load_si128(input + i + 3), xout3);
-        xout4 = _mm_xor_si128(_mm_load_si128(input + i + 4), xout4);
-        xout5 = _mm_xor_si128(_mm_load_si128(input + i + 5), xout5);
-        xout6 = _mm_xor_si128(_mm_load_si128(input + i + 6), xout6);
-        xout7 = _mm_xor_si128(_mm_load_si128(input + i + 7), xout7);
+	for(size_t i = 0; i < MEM / sizeof(__m128i); i += 8)
+	{
+		xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0);
+		xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1);
+		xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2);
+		xout3 = _mm_xor_si128(_mm_load_si128(input + i + 3), xout3);
+		xout4 = _mm_xor_si128(_mm_load_si128(input + i + 4), xout4);
+		xout5 = _mm_xor_si128(_mm_load_si128(input + i + 5), xout5);
+		xout6 = _mm_xor_si128(_mm_load_si128(input + i + 6), xout6);
+		xout7 = _mm_xor_si128(_mm_load_si128(input + i + 7), xout7);

-        aes_round<SOFT_AES>(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-        aes_round<SOFT_AES>(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-        aes_round<SOFT_AES>(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-        aes_round<SOFT_AES>(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-        aes_round<SOFT_AES>(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-        aes_round<SOFT_AES>(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-        aes_round<SOFT_AES>(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-        aes_round<SOFT_AES>(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-        aes_round<SOFT_AES>(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-        aes_round<SOFT_AES>(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
-    }
+		aes_round<SOFT_AES>(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+		aes_round<SOFT_AES>(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+		aes_round<SOFT_AES>(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+		aes_round<SOFT_AES>(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+		aes_round<SOFT_AES>(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+		aes_round<SOFT_AES>(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+		aes_round<SOFT_AES>(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+		aes_round<SOFT_AES>(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+		aes_round<SOFT_AES>(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+		aes_round<SOFT_AES>(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
+	}

-    _mm_store_si128(output + 4, xout0);
-    _mm_store_si128(output + 5, xout1);
-    _mm_store_si128(output + 6, xout2);
-    _mm_store_si128(output + 7, xout3);
-    _mm_store_si128(output + 8, xout4);
-    _mm_store_si128(output + 9, xout5);
-    _mm_store_si128(output + 10, xout6);
-    _mm_store_si128(output + 11, xout7);
+	_mm_store_si128(output + 4, xout0);
+	_mm_store_si128(output + 5, xout1);
+	_mm_store_si128(output + 6, xout2);
+	_mm_store_si128(output + 7, xout3);
+	_mm_store_si128(output + 8, xout4);
+	_mm_store_si128(output + 9, xout5);
+	_mm_store_si128(output + 10, xout6);
+	_mm_store_si128(output + 11, xout7);
 }


 template<size_t ITERATIONS, size_t MEM, size_t MASK, bool SOFT_AES>
-inline void cryptonight_hash(const void *__restrict__ input, size_t size, void *__restrict__ output, cryptonight_ctx *__restrict__ ctx)
+inline void cryptonight_hash(const void* __restrict__ input, size_t size, void* __restrict__ output,
+                             cryptonight_ctx* __restrict__ ctx)
 {
-    keccak(static_cast<const uint8_t*>(input), (int) size, ctx->state0, 200);
+	keccak(static_cast<const uint8_t*>(input), (int) size, ctx->state0, 200);

-    cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) ctx->state0, (__m128i*) ctx->memory);
+	cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) ctx->state0, (__m128i*) ctx->memory);

-    const uint8_t* l0 = ctx->memory;
-    uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx->state0);
+	const uint8_t* l0 = ctx->memory;
+	uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx->state0);

-    uint64_t al0 = h0[0] ^ h0[4];
-    uint64_t ah0 = h0[1] ^ h0[5];
-    __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
+	uint64_t al0 = h0[0] ^ h0[4];
+	uint64_t ah0 = h0[1] ^ h0[5];
+	__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);

-    uint64_t idx0 = h0[0] ^ h0[4];
+	uint64_t idx0 = h0[0] ^ h0[4];

-    for (size_t i = 0; i < ITERATIONS; i++) {
-        __m128i cx;
-        cx = _mm_load_si128((__m128i *) &l0[idx0 & MASK]);
+	for(size_t i = 0; i < ITERATIONS; i++)
+	{
+		__m128i cx;
+		cx = _mm_load_si128((__m128i*) &l0[idx0 & MASK]);

-        if (SOFT_AES) {
-            cx = soft_aesenc(cx, _mm_set_epi64x(ah0, al0));
-        }
-        else {
-            cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah0, al0));
-        }
+		if(SOFT_AES)
+		{
+			cx = soft_aesenc(cx, _mm_set_epi64x(ah0, al0));
+		}
+		else
+		{
+			cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah0, al0));
+		}

-        _mm_store_si128((__m128i *) &l0[idx0 & MASK], _mm_xor_si128(bx0, cx));
-        idx0 = EXTRACT64(cx);
-        bx0 = cx;
+		_mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx0, cx));
+		idx0 = EXTRACT64(cx);
+		bx0 = cx;

-        uint64_t hi, lo, cl, ch;
-        cl = ((uint64_t*) &l0[idx0 & MASK])[0];
-        ch = ((uint64_t*) &l0[idx0 & MASK])[1];
-        lo = __umul128(idx0, cl, &hi);
+		uint64_t hi, lo, cl, ch;
+		cl = ((uint64_t*) &l0[idx0 & MASK])[0];
+		ch = ((uint64_t*) &l0[idx0 & MASK])[1];
+		lo = __umul128(idx0, cl, &hi);

-        al0 += hi;
-        ah0 += lo;
+		al0 += hi;
+		ah0 += lo;

-        ((uint64_t*)&l0[idx0 & MASK])[0] = al0;
-        ((uint64_t*)&l0[idx0 & MASK])[1] = ah0;
+		((uint64_t*)&l0[idx0 & MASK])[0] = al0;
+		((uint64_t*)&l0[idx0 & MASK])[1] = ah0;

-        ah0 ^= ch;
-        al0 ^= cl;
-        idx0 = al0;
-    }
+		ah0 ^= ch;
+		al0 ^= cl;
+		idx0 = al0;
+	}

-    cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) ctx->memory, (__m128i*) ctx->state0);
+	cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) ctx->memory, (__m128i*) ctx->state0);

-    keccakf(h0, 24);
-    extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, static_cast<char*>(output));
+	keccakf(h0, 24);
+	extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, static_cast<char*>(output));
 }


 template<size_t ITERATIONS, size_t MEM, size_t MASK, bool SOFT_AES>
-inline void cryptonight_double_hash(const void *__restrict__ input, size_t size, void *__restrict__ output, struct cryptonight_ctx *__restrict__ ctx)
+inline void cryptonight_double_hash(const void* __restrict__ input, size_t size, void* __restrict__ output,
+                                    struct cryptonight_ctx* __restrict__ ctx)
 {
-    keccak((const uint8_t *) input,        (int) size, ctx->state0, 200);
-    keccak((const uint8_t *) input + size, (int) size, ctx->state1, 200);
+	keccak((const uint8_t*) input, (int) size, ctx->state0, 200);
+	keccak((const uint8_t*) input + size, (int) size, ctx->state1, 200);

-    const uint8_t* l0 = ctx->memory;
-    const uint8_t* l1 = ctx->memory + MEM;
-    uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx->state0);
-    uint64_t* h1 = reinterpret_cast<uint64_t*>(ctx->state1);
+	const uint8_t* l0 = ctx->memory;
+	const uint8_t* l1 = ctx->memory + MEM;
+	uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx->state0);
+	uint64_t* h1 = reinterpret_cast<uint64_t*>(ctx->state1);

-    cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h0, (__m128i*) l0);
-    cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h1, (__m128i*) l1);
+	cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h0, (__m128i*) l0);
+	cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h1, (__m128i*) l1);

-    uint64_t al0 = h0[0] ^ h0[4];
-    uint64_t al1 = h1[0] ^ h1[4];
-    uint64_t ah0 = h0[1] ^ h0[5];
-    uint64_t ah1 = h1[1] ^ h1[5];
+	uint64_t al0 = h0[0] ^ h0[4];
+	uint64_t al1 = h1[0] ^ h1[4];
+	uint64_t ah0 = h0[1] ^ h0[5];
+	uint64_t ah1 = h1[1] ^ h1[5];

-    __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
-    __m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
+	__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
+	__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);

-    uint64_t idx0 = h0[0] ^ h0[4];
-    uint64_t idx1 = h1[0] ^ h1[4];
+	uint64_t idx0 = h0[0] ^ h0[4];
+	uint64_t idx1 = h1[0] ^ h1[4];

-    for (size_t i = 0; i < ITERATIONS; i++) {
-        __m128i cx0 = _mm_load_si128((__m128i *) &l0[idx0 & MASK]);
-        __m128i cx1 = _mm_load_si128((__m128i *) &l1[idx1 & MASK]);
+	for(size_t i = 0; i < ITERATIONS; i++)
+	{
+		__m128i cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]);
+		__m128i cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]);

-        if (SOFT_AES) {
-            cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0));
-            cx1 = soft_aesenc(cx1, _mm_set_epi64x(ah1, al1));
-        }
-        else {
-            cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0));
-            cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1));
-        }
+		if(SOFT_AES)
+		{
+			cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0));
+			cx1 = soft_aesenc(cx1, _mm_set_epi64x(ah1, al1));
+		}
+		else
+		{
+			cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0));
+			cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1));
+		}

-        _mm_store_si128((__m128i *) &l0[idx0 & MASK], _mm_xor_si128(bx0, cx0));
-        _mm_store_si128((__m128i *) &l1[idx1 & MASK], _mm_xor_si128(bx1, cx1));
+		_mm_store_si128((__m128i*) &l0[idx0 & MASK], _mm_xor_si128(bx0, cx0));
+		_mm_store_si128((__m128i*) &l1[idx1 & MASK], _mm_xor_si128(bx1, cx1));

-        idx0 = EXTRACT64(cx0);
-        idx1 = EXTRACT64(cx1);
+		idx0 = EXTRACT64(cx0);
+		idx1 = EXTRACT64(cx1);

-        bx0 = cx0;
-        bx1 = cx1;
+		bx0 = cx0;
+		bx1 = cx1;

-        uint64_t hi, lo, cl, ch;
-        cl = ((uint64_t*) &l0[idx0 & MASK])[0];
-        ch = ((uint64_t*) &l0[idx0 & MASK])[1];
-        lo = __umul128(idx0, cl, &hi);
+		uint64_t hi, lo, cl, ch;
+		cl = ((uint64_t*) &l0[idx0 & MASK])[0];
+		ch = ((uint64_t*) &l0[idx0 & MASK])[1];
+		lo = __umul128(idx0, cl, &hi);

-        al0 += hi;
-        ah0 += lo;
+		al0 += hi;
+		ah0 += lo;

-        ((uint64_t*) &l0[idx0 & MASK])[0] = al0;
-        ((uint64_t*) &l0[idx0 & MASK])[1] = ah0;
+		((uint64_t*) &l0[idx0 & MASK])[0] = al0;
+		((uint64_t*) &l0[idx0 & MASK])[1] = ah0;

-        ah0 ^= ch;
-        al0 ^= cl;
-        idx0 = al0;
+		ah0 ^= ch;
+		al0 ^= cl;
+		idx0 = al0;

-        cl = ((uint64_t*) &l1[idx1 & MASK])[0];
-        ch = ((uint64_t*) &l1[idx1 & MASK])[1];
-        lo = __umul128(idx1, cl, &hi);
+		cl = ((uint64_t*) &l1[idx1 & MASK])[0];
+		ch = ((uint64_t*) &l1[idx1 & MASK])[1];
+		lo = __umul128(idx1, cl, &hi);

-        al1 += hi;
-        ah1 += lo;
+		al1 += hi;
+		ah1 += lo;

-        ((uint64_t*) &l1[idx1 & MASK])[0] = al1;
-        ((uint64_t*) &l1[idx1 & MASK])[1] = ah1;
+		((uint64_t*) &l1[idx1 & MASK])[0] = al1;
+		((uint64_t*) &l1[idx1 & MASK])[1] = ah1;

-        ah1 ^= ch;
-        al1 ^= cl;
-        idx1 = al1;
-    }
+		ah1 ^= ch;
+		al1 ^= cl;
+		idx1 = al1;
+	}

-    cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l0, (__m128i*) h0);
-    cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l1, (__m128i*) h1);
+	cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l0, (__m128i*) h0);
+	cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l1, (__m128i*) h1);

-    keccakf(h0, 24);
-    keccakf(h1, 24);
+	keccakf(h0, 24);
+	keccakf(h1, 24);

-    extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, static_cast<char*>(output));
-    extra_hashes[ctx->state1[0] & 3](ctx->state1, 200, static_cast<char*>(output) + 32);
+	extra_hashes[ctx->state0[0] & 3](ctx->state0, 200, static_cast<char*>(output));
+	extra_hashes[ctx->state1[0] & 3](ctx->state1, 200, static_cast<char*>(output) + 32);
 }

 #endif /* __CRYPTONIGHT_X86_H__ */
--- a/src/crypto/SSE2NEON.h
+++ b/src/crypto/SSE2NEON.h
@ -49,7 +49,7 @@
 // A struct is now defined in this header file called 'SIMDVec' which can be used by applications which
 // attempt to access the contents of an _m128 struct directly.  It is important to note that accessing the __m128
 // struct directly is bad coding practice by Microsoft: @see: https://msdn.microsoft.com/en-us/library/ayeb3ayc.aspx
-// 
+//
 // However, some legacy source code may try to access the contents of an __m128 struct directly so the developer
 // can use the SIMDVec as an alias for it.  Any casting must be done manually by the developer, as you cannot
 // cast or otherwise alias the base NEON data type for intrinsic operations.
@ -66,7 +66,7 @@
 //
 // Support for a number of new intrinsics was added, however, none of them yet have unit-tests to 100% confirm they are
 // producing the correct results on NEON.  These unit tests will be added as soon as possible.
-// 
+//
 // Here is the list of new instrinsics which have been added:
 //
 // _mm_cvtss_f32     :  extracts the lower order floating point value from the parameter
@ -338,9 +338,9 @@ FORCE_INLINE __m128 _mm_set_ps(float w, float z, float y, float x)
 }

 // Sets the four single-precision, floating-point values to the four inputs in reverse order. https://msdn.microsoft.com/en-us/library/vstudio/d2172ct3(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_setr_ps(float w, float z , float y , float x ) 
+FORCE_INLINE __m128 _mm_setr_ps(float w, float z , float y , float x)
 {
-	float __attribute__ ((aligned (16))) data[4] = { w, z, y, x };
+	float __attribute__((aligned(16))) data[4] = { w, z, y, x };
 	return vreinterpretq_m128_f32(vld1q_f32(data));
 }

@ -358,25 +358,25 @@ FORCE_INLINE __m128i _mm_set_epi32(int i3, int i2, int i1, int i0)
 }

 // Stores four single-precision, floating-point values. https://msdn.microsoft.com/en-us/library/vstudio/s3h4ay6y(v=vs.100).aspx
-FORCE_INLINE void _mm_store_ps(float *p, __m128 a)
+FORCE_INLINE void _mm_store_ps(float* p, __m128 a)
 {
 	vst1q_f32(p, vreinterpretq_f32_m128(a));
 }

 // Stores four single-precision, floating-point values. https://msdn.microsoft.com/en-us/library/44e30x22(v=vs.100).aspx
-FORCE_INLINE void _mm_storeu_ps(float *p, __m128 a)
+FORCE_INLINE void _mm_storeu_ps(float* p, __m128 a)
 {
 	vst1q_f32(p, vreinterpretq_f32_m128(a));
 }

 // Stores four 32-bit integer values as (as a __m128i value) at the address p. https://msdn.microsoft.com/en-us/library/vstudio/edk11s13(v=vs.100).aspx
-FORCE_INLINE void _mm_store_si128(__m128i *p, __m128i a)
+FORCE_INLINE void _mm_store_si128(__m128i* p, __m128i a)
 {
 	vst1q_s32((int32_t*) p, vreinterpretq_s32_m128i(a));
 }

 // Stores the lower single - precision, floating - point value. https://msdn.microsoft.com/en-us/library/tzz10fbx(v=vs.100).aspx
-FORCE_INLINE void _mm_store_ss(float *p, __m128 a)
+FORCE_INLINE void _mm_store_ss(float* p, __m128 a)
 {
 	vst1q_lane_f32(p, vreinterpretq_f32_m128(a), 0);
 }
@ -390,26 +390,26 @@ FORCE_INLINE void _mm_storel_epi64(__m128i* a, __m128i b)
 }

 // Loads a single single-precision, floating-point value, copying it into all four words https://msdn.microsoft.com/en-us/library/vstudio/5cdkf716(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_load1_ps(const float * p)
+FORCE_INLINE __m128 _mm_load1_ps(const float* p)
 {
 	return vreinterpretq_m128_f32(vld1q_dup_f32(p));
 }

 // Loads four single-precision, floating-point values. https://msdn.microsoft.com/en-us/library/vstudio/zzd50xxt(v=vs.100).aspx
-FORCE_INLINE __m128 _mm_load_ps(const float * p)
+FORCE_INLINE __m128 _mm_load_ps(const float* p)
 {
 	return vreinterpretq_m128_f32(vld1q_f32(p));
 }

 // Loads four single-precision, floating-point values.  https://msdn.microsoft.com/en-us/library/x1b16s7z%28v=vs.90%29.aspx
-FORCE_INLINE __m128 _mm_loadu_ps(const float * p)
+FORCE_INLINE __m128 _mm_loadu_ps(const float* p)
 {
 	// for neon, alignment doesn't matter, so _mm_load_ps and _mm_loadu_ps are equivalent for neon
 	return vreinterpretq_m128_f32(vld1q_f32(p));
 }

 // Loads an single - precision, floating - point value into the low word and clears the upper three words.  https://msdn.microsoft.com/en-us/library/548bb9h4%28v=vs.90%29.aspx
-FORCE_INLINE __m128 _mm_load_ss(const float * p)
+FORCE_INLINE __m128 _mm_load_ss(const float* p)
 {
 	return vreinterpretq_m128_f32(vsetq_lane_f32(*p, vdupq_n_f32(0), 0));
 }
@ -422,55 +422,57 @@ FORCE_INLINE __m128 _mm_load_ss(const float * p)
 // Compares for inequality.  https://msdn.microsoft.com/en-us/library/sf44thbx(v=vs.100).aspx
 FORCE_INLINE __m128 _mm_cmpneq_ps(__m128 a, __m128 b)
 {
-	return vreinterpretq_m128_u32( vmvnq_u32( vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)) ) );
+	return vreinterpretq_m128_u32(vmvnq_u32(vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))));
 }

 // Computes the bitwise AND-NOT of the four single-precision, floating-point values of a and b. https://msdn.microsoft.com/en-us/library/vstudio/68h7wd02(v=vs.100).aspx
 FORCE_INLINE __m128 _mm_andnot_ps(__m128 a, __m128 b)
 {
-	return vreinterpretq_m128_s32( vbicq_s32(vreinterpretq_s32_m128(b), vreinterpretq_s32_m128(a)) ); // *NOTE* argument swap
+	return vreinterpretq_m128_s32(vbicq_s32(vreinterpretq_s32_m128(b),
+	                                        vreinterpretq_s32_m128(a)));   // *NOTE* argument swap
 }

 // Computes the bitwise AND of the 128-bit value in b and the bitwise NOT of the 128-bit value in a. https://msdn.microsoft.com/en-us/library/vstudio/1beaceh8(v=vs.100).aspx
 FORCE_INLINE __m128i _mm_andnot_si128(__m128i a, __m128i b)
 {
-	return vreinterpretq_m128i_s32( vbicq_s32(vreinterpretq_s32_m128i(b), vreinterpretq_s32_m128i(a)) ); // *NOTE* argument swap
+	return vreinterpretq_m128i_s32(vbicq_s32(vreinterpretq_s32_m128i(b),
+	                               vreinterpretq_s32_m128i(a)));   // *NOTE* argument swap
 }

 // Computes the bitwise AND of the 128-bit value in a and the 128-bit value in b. https://msdn.microsoft.com/en-us/library/vstudio/6d1txsa8(v=vs.100).aspx
 FORCE_INLINE __m128i _mm_and_si128(__m128i a, __m128i b)
 {
-	return vreinterpretq_m128i_s32( vandq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)) );
+	return vreinterpretq_m128i_s32(vandq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)));
 }

 // Computes the bitwise AND of the four single-precision, floating-point values of a and b. https://msdn.microsoft.com/en-us/library/vstudio/73ck1xc5(v=vs.100).aspx
 FORCE_INLINE __m128 _mm_and_ps(__m128 a, __m128 b)
 {
-	return vreinterpretq_m128_s32( vandq_s32(vreinterpretq_s32_m128(a), vreinterpretq_s32_m128(b)) );
+	return vreinterpretq_m128_s32(vandq_s32(vreinterpretq_s32_m128(a), vreinterpretq_s32_m128(b)));
 }

 // Computes the bitwise OR of the four single-precision, floating-point values of a and b. https://msdn.microsoft.com/en-us/library/vstudio/7ctdsyy0(v=vs.100).aspx
 FORCE_INLINE __m128 _mm_or_ps(__m128 a, __m128 b)
 {
-	return vreinterpretq_m128_s32( vorrq_s32(vreinterpretq_s32_m128(a), vreinterpretq_s32_m128(b)) );
+	return vreinterpretq_m128_s32(vorrq_s32(vreinterpretq_s32_m128(a), vreinterpretq_s32_m128(b)));
 }

 // Computes bitwise EXOR (exclusive-or) of the four single-precision, floating-point values of a and b. https://msdn.microsoft.com/en-us/library/ss6k3wk8(v=vs.100).aspx
 FORCE_INLINE __m128 _mm_xor_ps(__m128 a, __m128 b)
 {
-	return vreinterpretq_m128_s32( veorq_s32(vreinterpretq_s32_m128(a), vreinterpretq_s32_m128(b)) );
+	return vreinterpretq_m128_s32(veorq_s32(vreinterpretq_s32_m128(a), vreinterpretq_s32_m128(b)));
 }

 // Computes the bitwise OR of the 128-bit value in a and the 128-bit value in b. https://msdn.microsoft.com/en-us/library/vstudio/ew8ty0db(v=vs.100).aspx
 FORCE_INLINE __m128i _mm_or_si128(__m128i a, __m128i b)
 {
-	return vreinterpretq_m128i_s32( vorrq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)) );
+	return vreinterpretq_m128i_s32(vorrq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)));
 }

 // Computes the bitwise XOR of the 128-bit value in a and the 128-bit value in b.  https://msdn.microsoft.com/en-us/library/fzt08www(v=vs.100).aspx
 FORCE_INLINE __m128i _mm_xor_si128(__m128i a, __m128i b)
 {
-	return vreinterpretq_m128i_s32( veorq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)) );
+	return vreinterpretq_m128i_s32(veorq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)));
 }

 // NEON does not provide this method
@ -478,7 +480,7 @@ FORCE_INLINE __m128i _mm_xor_si128(__m128i a, __m128i b)
 FORCE_INLINE int _mm_movemask_ps(__m128 a)
 {
 #if ENABLE_CPP_VERSION // I am not yet convinced that the NEON version is faster than the C version of this
-	uint32x4_t &ia = *(uint32x4_t *)&a;
+	uint32x4_t & ia = *(uint32x4_t*)&a;
 	return (ia[0] >> 31) | ((ia[1] >> 30) & 2) | ((ia[2] >> 29) & 4) | ((ia[3] >> 28) & 8);
 #else
 	static const uint32x4_t movemask = { 1, 2, 4, 8 };
@ -622,7 +624,7 @@ FORCE_INLINE __m128 _mm_shuffle_ps_2032(__m128 a, __m128 b)
 // The same is true on SSE as well.
 // Selects four specific single-precision, floating-point values from a and b, based on the mask i. https://msdn.microsoft.com/en-us/library/vstudio/5f0858x0(v=vs.100).aspx
 #if ENABLE_CPP_VERSION // I am not convinced that the NEON version is faster than the C version yet.
-FORCE_INLINE __m128 _mm_shuffle_ps_default(__m128 a, __m128 b, __constrange(0,255) int imm)
+FORCE_INLINE __m128 _mm_shuffle_ps_default(__m128 a, __m128 b, __constrange(0, 255) int imm)
 {
 	__m128 ret;
 	ret[0] = a[imm & 0x3];
@ -633,22 +635,22 @@ FORCE_INLINE __m128 _mm_shuffle_ps_default(__m128 a, __m128 b, __constrange(0,25
 }
 #else
 #define _mm_shuffle_ps_default(a, b, imm) \
-({ \
-	float32x4_t ret; \
-	ret = vmovq_n_f32(vgetq_lane_f32(vreinterpretq_f32_m128(a), (imm) & 0x3)); \
-	ret = vsetq_lane_f32(vgetq_lane_f32(vreinterpretq_f32_m128(a), ((imm) >> 2) & 0x3), ret, 1); \
-	ret = vsetq_lane_f32(vgetq_lane_f32(vreinterpretq_f32_m128(b), ((imm) >> 4) & 0x3), ret, 2); \
-	ret = vsetq_lane_f32(vgetq_lane_f32(vreinterpretq_f32_m128(b), ((imm) >> 6) & 0x3), ret, 3); \
-	vreinterpretq_m128_f32(ret); \
-})
+	({ \
+		float32x4_t ret; \
+		ret = vmovq_n_f32(vgetq_lane_f32(vreinterpretq_f32_m128(a), (imm) & 0x3)); \
+		ret = vsetq_lane_f32(vgetq_lane_f32(vreinterpretq_f32_m128(a), ((imm) >> 2) & 0x3), ret, 1); \
+		ret = vsetq_lane_f32(vgetq_lane_f32(vreinterpretq_f32_m128(b), ((imm) >> 4) & 0x3), ret, 2); \
+		ret = vsetq_lane_f32(vgetq_lane_f32(vreinterpretq_f32_m128(b), ((imm) >> 6) & 0x3), ret, 3); \
+		vreinterpretq_m128_f32(ret); \
+	})
 #endif

 //FORCE_INLINE __m128 _mm_shuffle_ps(__m128 a, __m128 b, __constrange(0,255) int imm)
 #define _mm_shuffle_ps(a, b, imm) \
-({ \
-	__m128 ret; \
-	switch (imm) \
-	{ \
+	({ \
+		__m128 ret; \
+		switch (imm) \
+		{ \
 		case _MM_SHUFFLE(1, 0, 3, 2): ret = _mm_shuffle_ps_1032((a), (b)); break; \
 		case _MM_SHUFFLE(2, 3, 0, 1): ret = _mm_shuffle_ps_2301((a), (b)); break; \
 		case _MM_SHUFFLE(0, 3, 2, 1): ret = _mm_shuffle_ps_0321((a), (b)); break; \
@ -666,9 +668,9 @@ FORCE_INLINE __m128 _mm_shuffle_ps_default(__m128 a, __m128 b, __constrange(0,25
 		case _MM_SHUFFLE(2, 0, 0, 1): ret = _mm_shuffle_ps_2001((a), (b)); break; \
 		case _MM_SHUFFLE(2, 0, 3, 2): ret = _mm_shuffle_ps_2032((a), (b)); break; \
 		default: ret = _mm_shuffle_ps_default((a), (b), (imm)); break; \
-	} \
-	ret; \
-})
+		} \
+		ret; \
+	})

 // Takes the upper 64 bits of a and places it in the low end of the result
 // Takes the lower 64 bits of a and places it into the high end of the result.
@ -748,7 +750,7 @@ FORCE_INLINE __m128i _mm_shuffle_epi_3332(__m128i a)

 //FORCE_INLINE __m128i _mm_shuffle_epi32_default(__m128i a, __constrange(0,255) int imm)
 #if ENABLE_CPP_VERSION
-FORCE_INLINE __m128i _mm_shuffle_epi32_default(__m128i a, __constrange(0,255) int imm)
+FORCE_INLINE __m128i _mm_shuffle_epi32_default(__m128i a, __constrange(0, 255) int imm)
 {
 	__m128i ret;
 	ret[0] = a[imm & 0x3];
@ -759,36 +761,36 @@ FORCE_INLINE __m128i _mm_shuffle_epi32_default(__m128i a, __constrange(0,255) in
 }
 #else
 #define _mm_shuffle_epi32_default(a, imm) \
-({ \
-	int32x4_t ret; \
-	ret = vmovq_n_s32(vgetq_lane_s32(vreinterpretq_s32_m128i(a), (imm) & 0x3)); \
-	ret = vsetq_lane_s32(vgetq_lane_s32(vreinterpretq_s32_m128i(a), ((imm) >> 2) & 0x3), ret, 1); \
-	ret = vsetq_lane_s32(vgetq_lane_s32(vreinterpretq_s32_m128i(a), ((imm) >> 4) & 0x3), ret, 2); \
-	ret = vsetq_lane_s32(vgetq_lane_s32(vreinterpretq_s32_m128i(a), ((imm) >> 6) & 0x3), ret, 3); \
-	vreinterpretq_m128i_s32(ret); \
-})
+	({ \
+		int32x4_t ret; \
+		ret = vmovq_n_s32(vgetq_lane_s32(vreinterpretq_s32_m128i(a), (imm) & 0x3)); \
+		ret = vsetq_lane_s32(vgetq_lane_s32(vreinterpretq_s32_m128i(a), ((imm) >> 2) & 0x3), ret, 1); \
+		ret = vsetq_lane_s32(vgetq_lane_s32(vreinterpretq_s32_m128i(a), ((imm) >> 4) & 0x3), ret, 2); \
+		ret = vsetq_lane_s32(vgetq_lane_s32(vreinterpretq_s32_m128i(a), ((imm) >> 6) & 0x3), ret, 3); \
+		vreinterpretq_m128i_s32(ret); \
+	})
 #endif

 //FORCE_INLINE __m128i _mm_shuffle_epi32_splat(__m128i a, __constrange(0,255) int imm)
 #if defined(__aarch64__)
 #define _mm_shuffle_epi32_splat(a, imm) \
-({ \
-	vreinterpretq_m128i_s32(vdupq_laneq_s32(vreinterpretq_s32_m128i(a), (imm))); \
-})
+	({ \
+		vreinterpretq_m128i_s32(vdupq_laneq_s32(vreinterpretq_s32_m128i(a), (imm))); \
+	})
 #else
 #define _mm_shuffle_epi32_splat(a, imm) \
-({ \
-	vreinterpretq_m128i_s32(vdupq_n_s32(vgetq_lane_s32(vreinterpretq_s32_m128i(a), (imm)))); \
-})
+	({ \
+		vreinterpretq_m128i_s32(vdupq_n_s32(vgetq_lane_s32(vreinterpretq_s32_m128i(a), (imm)))); \
+	})
 #endif

 // Shuffles the 4 signed or unsigned 32-bit integers in a as specified by imm.	https://msdn.microsoft.com/en-us/library/56f67xbk%28v=vs.90%29.aspx
 //FORCE_INLINE __m128i _mm_shuffle_epi32(__m128i a, __constrange(0,255) int imm)
 #define _mm_shuffle_epi32(a, imm) \
-({ \
-	__m128i ret; \
-	switch (imm) \
-	{ \
+	({ \
+		__m128i ret; \
+		switch (imm) \
+		{ \
 		case _MM_SHUFFLE(1, 0, 3, 2): ret = _mm_shuffle_epi_1032((a)); break; \
 		case _MM_SHUFFLE(2, 3, 0, 1): ret = _mm_shuffle_epi_2301((a)); break; \
 		case _MM_SHUFFLE(0, 3, 2, 1): ret = _mm_shuffle_epi_0321((a)); break; \
@ -804,22 +806,22 @@ FORCE_INLINE __m128i _mm_shuffle_epi32_default(__m128i a, __constrange(0,255) in
 		case _MM_SHUFFLE(2, 2, 2, 2): ret = _mm_shuffle_epi32_splat((a),2); break; \
 		case _MM_SHUFFLE(3, 3, 3, 3): ret = _mm_shuffle_epi32_splat((a),3); break; \
 		default: ret = _mm_shuffle_epi32_default((a), (imm)); break; \
-	} \
-	ret; \
-})
+		} \
+		ret; \
+	})

 // Shuffles the upper 4 signed or unsigned 16 - bit integers in a as specified by imm.  https://msdn.microsoft.com/en-us/library/13ywktbs(v=vs.100).aspx
 //FORCE_INLINE __m128i _mm_shufflehi_epi16_function(__m128i a, __constrange(0,255) int imm)
 #define _mm_shufflehi_epi16_function(a, imm) \
-({ \
-	int16x8_t ret = vreinterpretq_s16_s32(a); \
-	int16x4_t highBits = vget_high_s16(ret); \
-	ret = vsetq_lane_s16(vget_lane_s16(highBits, (imm) & 0x3), ret, 4); \
-	ret = vsetq_lane_s16(vget_lane_s16(highBits, ((imm) >> 2) & 0x3), ret, 5); \
-	ret = vsetq_lane_s16(vget_lane_s16(highBits, ((imm) >> 4) & 0x3), ret, 6); \
-	ret = vsetq_lane_s16(vget_lane_s16(highBits, ((imm) >> 6) & 0x3), ret, 7); \
-	vreinterpretq_s32_s16(ret); \
-})
+	({ \
+		int16x8_t ret = vreinterpretq_s16_s32(a); \
+		int16x4_t highBits = vget_high_s16(ret); \
+		ret = vsetq_lane_s16(vget_lane_s16(highBits, (imm) & 0x3), ret, 4); \
+		ret = vsetq_lane_s16(vget_lane_s16(highBits, ((imm) >> 2) & 0x3), ret, 5); \
+		ret = vsetq_lane_s16(vget_lane_s16(highBits, ((imm) >> 4) & 0x3), ret, 6); \
+		ret = vsetq_lane_s16(vget_lane_s16(highBits, ((imm) >> 6) & 0x3), ret, 7); \
+		vreinterpretq_s32_s16(ret); \
+	})

 //FORCE_INLINE __m128i _mm_shufflehi_epi16(__m128i a, __constrange(0,255) int imm)
 #define _mm_shufflehi_epi16(a, imm) \
@ -829,88 +831,88 @@ FORCE_INLINE __m128i _mm_shuffle_epi32_default(__m128i a, __constrange(0,255) in
 // Shifts the 4 signed or unsigned 32-bit integers in a left by count bits while shifting in zeros. : https://msdn.microsoft.com/en-us/library/z2k3bbtb%28v=vs.90%29.aspx
 //FORCE_INLINE __m128i _mm_slli_epi32(__m128i a, __constrange(0,255) int imm)
 #define _mm_slli_epi32(a, imm) \
-({ \
-	__m128i ret; \
-	if ((imm) <= 0) {\
-		ret = a; \
-	} \
-	else if ((imm) > 31) { \
-		ret = _mm_setzero_si128(); \
-	} \
-	else { \
-		ret = vreinterpretq_m128i_s32(vshlq_n_s32(vreinterpretq_s32_m128i(a), (imm))); \
-	} \
-	ret; \
-})
+	({ \
+		__m128i ret; \
+		if ((imm) <= 0) {\
+			ret = a; \
+		} \
+		else if ((imm) > 31) { \
+			ret = _mm_setzero_si128(); \
+		} \
+		else { \
+			ret = vreinterpretq_m128i_s32(vshlq_n_s32(vreinterpretq_s32_m128i(a), (imm))); \
+		} \
+		ret; \
+	})

 //Shifts the 4 signed or unsigned 32-bit integers in a right by count bits while shifting in zeros.  https://msdn.microsoft.com/en-us/library/w486zcfa(v=vs.100).aspx
 //FORCE_INLINE __m128i _mm_srli_epi32(__m128i a, __constrange(0,255) int imm)
 #define _mm_srli_epi32(a, imm) \
-({ \
-	__m128i ret; \
-	if ((imm) <= 0) { \
-		ret = a; \
-	} \
-	else if ((imm)> 31) { \
-		ret = _mm_setzero_si128(); \
-	} \
-	else { \
-		ret = vreinterpretq_m128i_u32(vshrq_n_u32(vreinterpretq_u32_m128i(a), (imm))); \
-	} \
-	ret; \
-})
+	({ \
+		__m128i ret; \
+		if ((imm) <= 0) { \
+			ret = a; \
+		} \
+		else if ((imm)> 31) { \
+			ret = _mm_setzero_si128(); \
+		} \
+		else { \
+			ret = vreinterpretq_m128i_u32(vshrq_n_u32(vreinterpretq_u32_m128i(a), (imm))); \
+		} \
+		ret; \
+	})

 // Shifts the 4 signed 32 - bit integers in a right by count bits while shifting in the sign bit.  https://msdn.microsoft.com/en-us/library/z1939387(v=vs.100).aspx
 //FORCE_INLINE __m128i _mm_srai_epi32(__m128i a, __constrange(0,255) int imm)
 #define _mm_srai_epi32(a, imm) \
-({ \
-	__m128i ret; \
-	if ((imm) <= 0) { \
-		ret = a; \
-	} \
-	else if ((imm) > 31) { \
-		ret = vreinterpretq_m128i_s32(vshrq_n_s32(vreinterpretq_s32_m128i(a), 16)); \
-		ret = vreinterpretq_m128i_s32(vshrq_n_s32(vreinterpretq_s32_m128i(ret), 16)); \
-	} \
-	else { \
-		ret = vreinterpretq_m128i_s32(vshrq_n_s32(vreinterpretq_s32_m128i(a), (imm))); \
-	} \
-	ret; \
-})
+	({ \
+		__m128i ret; \
+		if ((imm) <= 0) { \
+			ret = a; \
+		} \
+		else if ((imm) > 31) { \
+			ret = vreinterpretq_m128i_s32(vshrq_n_s32(vreinterpretq_s32_m128i(a), 16)); \
+			ret = vreinterpretq_m128i_s32(vshrq_n_s32(vreinterpretq_s32_m128i(ret), 16)); \
+		} \
+		else { \
+			ret = vreinterpretq_m128i_s32(vshrq_n_s32(vreinterpretq_s32_m128i(a), (imm))); \
+		} \
+		ret; \
+	})

 // Shifts the 128 - bit value in a right by imm bytes while shifting in zeros.imm must be an immediate. https://msdn.microsoft.com/en-us/library/305w28yz(v=vs.100).aspx
 //FORCE_INLINE _mm_srli_si128(__m128i a, __constrange(0,255) int imm)
 #define _mm_srli_si128(a, imm) \
-({ \
-	__m128i ret; \
-	if ((imm) <= 0) { \
-		ret = a; \
-	} \
-	else if ((imm) > 15) { \
-		ret = _mm_setzero_si128(); \
-	} \
-	else { \
-		ret = vreinterpretq_m128i_s8(vextq_s8(vreinterpretq_s8_m128i(a), vdupq_n_s8(0), (imm))); \
-	} \
-	ret; \
-})
+	({ \
+		__m128i ret; \
+		if ((imm) <= 0) { \
+			ret = a; \
+		} \
+		else if ((imm) > 15) { \
+			ret = _mm_setzero_si128(); \
+		} \
+		else { \
+			ret = vreinterpretq_m128i_s8(vextq_s8(vreinterpretq_s8_m128i(a), vdupq_n_s8(0), (imm))); \
+		} \
+		ret; \
+	})

 // Shifts the 128-bit value in a left by imm bytes while shifting in zeros. imm must be an immediate.  https://msdn.microsoft.com/en-us/library/34d3k2kt(v=vs.100).aspx
 //FORCE_INLINE __m128i _mm_slli_si128(__m128i a, __constrange(0,255) int imm)
 #define _mm_slli_si128(a, imm) \
-({ \
-	__m128i ret; \
-	if ((imm) <= 0) { \
-		ret = a; \
-	} \
-	else if ((imm) > 15) { \
-		ret = _mm_setzero_si128(); \
-	} \
-	else { \
-		ret = vreinterpretq_m128i_s8(vextq_s8(vdupq_n_s8(0), vreinterpretq_s8_m128i(a), 16 - (imm))); \
-	} \
-	ret; \
-})
+	({ \
+		__m128i ret; \
+		if ((imm) <= 0) { \
+			ret = a; \
+		} \
+		else if ((imm) > 15) { \
+			ret = _mm_setzero_si128(); \
+		} \
+		else { \
+			ret = vreinterpretq_m128i_s8(vextq_s8(vdupq_n_s8(0), vreinterpretq_s8_m128i(a), 16 - (imm))); \
+		} \
+		ret; \
+	})

 // NEON does not provide a version of this function, here is an article about some ways to repro the results.
 // http://stackoverflow.com/questions/11870910/sse-mm-movemask-epi8-equivalent-method-for-arm-neon
@ -1000,7 +1002,7 @@ FORCE_INLINE __m128i _mm_mullo_epi16(__m128i a, __m128i b)
 // Multiplies the 4 signed or unsigned 32-bit integers from a by the 4 signed or unsigned 32-bit integers from b. https://msdn.microsoft.com/en-us/library/vstudio/bb531409(v=vs.100).aspx
 FORCE_INLINE __m128i _mm_mullo_epi32(__m128i a, __m128i b)
 {
-	return vreinterpretq_m128i_s32(vmulq_s32(vreinterpretq_s32_m128i(a),vreinterpretq_s32_m128i(b)));
+	return vreinterpretq_m128i_s32(vmulq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b)));
 }

 // Multiplies the four single-precision, floating-point values of a and b. https://msdn.microsoft.com/en-us/library/vstudio/22kbk6t9(v=vs.100).aspx
@ -1030,7 +1032,7 @@ FORCE_INLINE __m128 recipq_newton(__m128 in, int n)
 {
 	int i;
 	float32x4_t recip = vrecpeq_f32(vreinterpretq_f32_m128(in));
-	for (i = 0; i < n; ++i)
+	for(i = 0; i < n; ++i)
 	{
 		recip = vmulq_f32(recip, vrecpsq_f32(recip, vreinterpretq_f32_m128(in)));
 	}
@ -1128,9 +1130,9 @@ FORCE_INLINE __m128i _mm_mulhi_epi16(__m128i a, __m128i b)
 	return vreinterpretq_m128i_u16(r.val[1]);
 }

-// Computes pairwise add of each argument as single-precision, floating-point values a and b. 
+// Computes pairwise add of each argument as single-precision, floating-point values a and b.
 //https://msdn.microsoft.com/en-us/library/yd9wecaa.aspx
-FORCE_INLINE __m128 _mm_hadd_ps(__m128 a, __m128 b ) 
+FORCE_INLINE __m128 _mm_hadd_ps(__m128 a, __m128 b)
 {
 #if defined(__aarch64__)
 	return vreinterpretq_m128_f32(vpaddq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); //AArch64
@ -1193,7 +1195,7 @@ FORCE_INLINE __m128i _mm_cmpgt_epi32(__m128i a, __m128i b)
 // see also:
 // http://stackoverflow.com/questions/8627331/what-does-ordered-unordered-comparison-mean
 // http://stackoverflow.com/questions/29349621/neon-isnanval-intrinsics
-FORCE_INLINE __m128 _mm_cmpord_ps(__m128 a, __m128 b ) 
+FORCE_INLINE __m128 _mm_cmpord_ps(__m128 a, __m128 b)
 {
 	// Note: NEON does not have ordered compare builtin
 	// Need to compare a eq a and b eq b to check for NaN
@ -1309,22 +1311,25 @@ FORCE_INLINE __m128i _mm_cvtepi16_epi32(__m128i a)
 }

 // Converts the four single-precision, floating-point values of a to signed 32-bit integer values. https://msdn.microsoft.com/en-us/library/vstudio/xdc42k5e(v=vs.100).aspx
-// *NOTE*. The default rounding mode on SSE is 'round to even', which ArmV7 does not support!  
+// *NOTE*. The default rounding mode on SSE is 'round to even', which ArmV7 does not support!
 // It is supported on ARMv8 however.
 FORCE_INLINE __m128i _mm_cvtps_epi32(__m128 a)
 {
 #if defined(__aarch64__)
 	return vcvtnq_s32_f32(a);
 #else
-    uint32x4_t signmask = vdupq_n_u32(0x80000000);
-    float32x4_t half = vbslq_f32(signmask, vreinterpretq_f32_m128(a), vdupq_n_f32(0.5f)); /* +/- 0.5 */
-    int32x4_t r_normal = vcvtq_s32_f32(vaddq_f32(vreinterpretq_f32_m128(a), half)); /* round to integer: [a + 0.5]*/
-    int32x4_t r_trunc = vcvtq_s32_f32(vreinterpretq_f32_m128(a)); /* truncate to integer: [a] */
-    int32x4_t plusone = vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(vnegq_s32(r_trunc)), 31)); /* 1 or 0 */
-    int32x4_t r_even = vbicq_s32(vaddq_s32(r_trunc, plusone), vdupq_n_s32(1)); /* ([a] + {0,1}) & ~1 */
-    float32x4_t delta = vsubq_f32(vreinterpretq_f32_m128(a), vcvtq_f32_s32(r_trunc)); /* compute delta: delta = (a - [a]) */
-    uint32x4_t is_delta_half = vceqq_f32(delta, half); /* delta == +/- 0.5 */
-    return vreinterpretq_m128i_s32(vbslq_s32(is_delta_half, r_even, r_normal));
+	uint32x4_t signmask = vdupq_n_u32(0x80000000);
+	float32x4_t half = vbslq_f32(signmask, vreinterpretq_f32_m128(a), vdupq_n_f32(0.5f)); /* +/- 0.5 */
+	int32x4_t r_normal = vcvtq_s32_f32(vaddq_f32(vreinterpretq_f32_m128(a),
+	                                   half)); /* round to integer: [a + 0.5]*/
+	int32x4_t r_trunc = vcvtq_s32_f32(vreinterpretq_f32_m128(a)); /* truncate to integer: [a] */
+	int32x4_t plusone = vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(vnegq_s32(r_trunc)),
+	                    31)); /* 1 or 0 */
+	int32x4_t r_even = vbicq_s32(vaddq_s32(r_trunc, plusone), vdupq_n_s32(1)); /* ([a] + {0,1}) & ~1 */
+	float32x4_t delta = vsubq_f32(vreinterpretq_f32_m128(a),
+	                              vcvtq_f32_s32(r_trunc)); /* compute delta: delta = (a - [a]) */
+	uint32x4_t is_delta_half = vceqq_f32(delta, half); /* delta == +/- 0.5 */
+	return vreinterpretq_m128i_s32(vbslq_s32(is_delta_half, r_even, r_normal));
 #endif
 }

@ -1354,9 +1359,9 @@ FORCE_INLINE __m128 _mm_castsi128_ps(__m128i a)
 }

 // Loads 128-bit value. : https://msdn.microsoft.com/en-us/library/atzzad1h(v=vs.80).aspx
-FORCE_INLINE __m128i _mm_load_si128(const __m128i *p)
+FORCE_INLINE __m128i _mm_load_si128(const __m128i* p)
 {
-	return vreinterpretq_m128i_s32(vld1q_s32((int32_t *)p));
+	return vreinterpretq_m128i_s32(vld1q_s32((int32_t*)p));
 }

 // ******************************************
@ -1366,19 +1371,22 @@ FORCE_INLINE __m128i _mm_load_si128(const __m128i *p)
 // Packs the 16 signed 16-bit integers from a and b into 8-bit integers and saturates. https://msdn.microsoft.com/en-us/library/k4y4f7w5%28v=vs.90%29.aspx
 FORCE_INLINE __m128i _mm_packs_epi16(__m128i a, __m128i b)
 {
-	return vreinterpretq_m128i_s8(vcombine_s8(vqmovn_s16(vreinterpretq_s16_m128i(a)), vqmovn_s16(vreinterpretq_s16_m128i(b))));
+	return vreinterpretq_m128i_s8(vcombine_s8(vqmovn_s16(vreinterpretq_s16_m128i(a)),
+	                              vqmovn_s16(vreinterpretq_s16_m128i(b))));
 }

 // Packs the 16 signed 16 - bit integers from a and b into 8 - bit unsigned integers and saturates. https://msdn.microsoft.com/en-us/library/07ad1wx4(v=vs.100).aspx
 FORCE_INLINE __m128i _mm_packus_epi16(const __m128i a, const __m128i b)
 {
-	return vreinterpretq_m128i_u8(vcombine_u8(vqmovun_s16(vreinterpretq_s16_m128i(a)), vqmovun_s16(vreinterpretq_s16_m128i(b))));
+	return vreinterpretq_m128i_u8(vcombine_u8(vqmovun_s16(vreinterpretq_s16_m128i(a)),
+	                              vqmovun_s16(vreinterpretq_s16_m128i(b))));
 }

 // Packs the 8 signed 32-bit integers from a and b into signed 16-bit integers and saturates. https://msdn.microsoft.com/en-us/library/393t56f9%28v=vs.90%29.aspx
 FORCE_INLINE __m128i _mm_packs_epi32(__m128i a, __m128i b)
 {
-	return vreinterpretq_m128i_s16(vcombine_s16(vqmovn_s32(vreinterpretq_s32_m128i(a)), vqmovn_s32(vreinterpretq_s32_m128i(b))));
+	return vreinterpretq_m128i_s16(vcombine_s16(vqmovn_s32(vreinterpretq_s32_m128i(a)),
+	                               vqmovn_s32(vreinterpretq_s32_m128i(b))));
 }

 // Interleaves the lower 8 signed or unsigned 8-bit integers in a with the lower 8 signed or unsigned 8-bit integers in b.  https://msdn.microsoft.com/en-us/library/xf7k860c%28v=vs.90%29.aspx
@ -1456,16 +1464,16 @@ FORCE_INLINE __m128i _mm_unpackhi_epi32(__m128i a, __m128i b)
 // Extracts the selected signed or unsigned 16-bit integer from a and zero extends.  https://msdn.microsoft.com/en-us/library/6dceta0c(v=vs.100).aspx
 //FORCE_INLINE int _mm_extract_epi16(__m128i a, __constrange(0,8) int imm)
 #define _mm_extract_epi16(a, imm) \
-({ \
-	(vgetq_lane_s16(vreinterpretq_s16_m128i(a), (imm)) & 0x0000ffffUL); \
-})
+	({ \
+		(vgetq_lane_s16(vreinterpretq_s16_m128i(a), (imm)) & 0x0000ffffUL); \
+	})

 // Inserts the least significant 16 bits of b into the selected 16-bit integer of a. https://msdn.microsoft.com/en-us/library/kaze8hz1%28v=vs.100%29.aspx
 //FORCE_INLINE __m128i _mm_insert_epi16(__m128i a, const int b, __constrange(0,8) int imm)
 #define _mm_insert_epi16(a, b, imm) \
-({ \
-	vreinterpretq_m128i_s16(vsetq_lane_s16((b), vreinterpretq_s16_m128i(a), (imm))); \
-})
+	({ \
+		vreinterpretq_m128i_s16(vsetq_lane_s16((b), vreinterpretq_s16_m128i(a), (imm))); \
+	})

 // ******************************************
 // Streaming Extensions
@ -1478,13 +1486,13 @@ FORCE_INLINE void _mm_sfence(void)
 }

 // Stores the data in a to the address p without polluting the caches.  If the cache line containing address p is already in the cache, the cache will be updated.Address p must be 16 - byte aligned.  https://msdn.microsoft.com/en-us/library/ba08y07y%28v=vs.90%29.aspx
-FORCE_INLINE void _mm_stream_si128(__m128i *p, __m128i a)
+FORCE_INLINE void _mm_stream_si128(__m128i* p, __m128i a)
 {
 	*p = a;
 }

 // Cache line containing p is flushed and invalidated from all caches in the coherency domain. : https://msdn.microsoft.com/en-us/library/ba08y07y(v=vs.100).aspx
-FORCE_INLINE void _mm_clflush(void const*p) 
+FORCE_INLINE void _mm_clflush(void const* p)
 {
 	// no corollary for Neon?
 }
--- a/src/crypto/c_blake256.c
+++ b/src/crypto/c_blake256.c
@ -14,313 +14,377 @@
 #include "c_blake256.h"

 #define U8TO32(p) \
-    (((uint32_t)((p)[0]) << 24) | ((uint32_t)((p)[1]) << 16) |    \
-     ((uint32_t)((p)[2]) <<  8) | ((uint32_t)((p)[3])      ))
+	(((uint32_t)((p)[0]) << 24) | ((uint32_t)((p)[1]) << 16) |    \
+	 ((uint32_t)((p)[2]) <<  8) | ((uint32_t)((p)[3])      ))
 #define U32TO8(p, v) \
-    (p)[0] = (uint8_t)((v) >> 24); (p)[1] = (uint8_t)((v) >> 16); \
-    (p)[2] = (uint8_t)((v) >>  8); (p)[3] = (uint8_t)((v)      );
+	(p)[0] = (uint8_t)((v) >> 24); (p)[1] = (uint8_t)((v) >> 16); \
+	(p)[2] = (uint8_t)((v) >>  8); (p)[3] = (uint8_t)((v)      );

-const uint8_t sigma[][16] = {
-    { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15},
-    {14,10, 4, 8, 9,15,13, 6, 1,12, 0, 2,11, 7, 5, 3},
-    {11, 8,12, 0, 5, 2,15,13,10,14, 3, 6, 7, 1, 9, 4},
-    { 7, 9, 3, 1,13,12,11,14, 2, 6, 5,10, 4, 0,15, 8},
-    { 9, 0, 5, 7, 2, 4,10,15,14, 1,11,12, 6, 8, 3,13},
-    { 2,12, 6,10, 0,11, 8, 3, 4,13, 7, 5,15,14, 1, 9},
-    {12, 5, 1,15,14,13, 4,10, 0, 7, 6, 3, 9, 2, 8,11},
-    {13,11, 7,14,12, 1, 3, 9, 5, 0,15, 4, 8, 6, 2,10},
-    { 6,15,14, 9,11, 3, 0, 8,12, 2,13, 7, 1, 4,10, 5},
-    {10, 2, 8, 4, 7, 6, 1, 5,15,11, 9,14, 3,12,13, 0},
-    { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15},
-    {14,10, 4, 8, 9,15,13, 6, 1,12, 0, 2,11, 7, 5, 3},
-    {11, 8,12, 0, 5, 2,15,13,10,14, 3, 6, 7, 1, 9, 4},
-    { 7, 9, 3, 1,13,12,11,14, 2, 6, 5,10, 4, 0,15, 8}
+const uint8_t sigma[][16] =
+{
+	{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
+	{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
+	{11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4},
+	{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8},
+	{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13},
+	{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9},
+	{12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11},
+	{13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10},
+	{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5},
+	{10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0},
+	{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
+	{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
+	{11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4},
+	{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8}
 };

-const uint32_t cst[16] = {
-    0x243F6A88, 0x85A308D3, 0x13198A2E, 0x03707344,
-    0xA4093822, 0x299F31D0, 0x082EFA98, 0xEC4E6C89,
-    0x452821E6, 0x38D01377, 0xBE5466CF, 0x34E90C6C,
-    0xC0AC29B7, 0xC97C50DD, 0x3F84D5B5, 0xB5470917
+const uint32_t cst[16] =
+{
+	0x243F6A88, 0x85A308D3, 0x13198A2E, 0x03707344,
+	0xA4093822, 0x299F31D0, 0x082EFA98, 0xEC4E6C89,
+	0x452821E6, 0x38D01377, 0xBE5466CF, 0x34E90C6C,
+	0xC0AC29B7, 0xC97C50DD, 0x3F84D5B5, 0xB5470917
 };

-static const uint8_t padding[] = {
-    0x80,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+static const uint8_t padding[] =
+{
+	0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 };


-void blake256_compress(state *S, const uint8_t *block) {
-    uint32_t v[16], m[16], i;
+void blake256_compress(state* S, const uint8_t* block)
+{
+	uint32_t v[16], m[16], i;

 #define ROT(x,n) (((x)<<(32-n))|((x)>>(n)))
 #define G(a,b,c,d,e)                                      \
-    v[a] += (m[sigma[i][e]] ^ cst[sigma[i][e+1]]) + v[b]; \
-    v[d] = ROT(v[d] ^ v[a],16);                           \
-    v[c] += v[d];                                         \
-    v[b] = ROT(v[b] ^ v[c],12);                           \
-    v[a] += (m[sigma[i][e+1]] ^ cst[sigma[i][e]])+v[b];   \
-    v[d] = ROT(v[d] ^ v[a], 8);                           \
-    v[c] += v[d];                                         \
-    v[b] = ROT(v[b] ^ v[c], 7);
+	v[a] += (m[sigma[i][e]] ^ cst[sigma[i][e+1]]) + v[b]; \
+	v[d] = ROT(v[d] ^ v[a],16);                           \
+	v[c] += v[d];                                         \
+	v[b] = ROT(v[b] ^ v[c],12);                           \
+	v[a] += (m[sigma[i][e+1]] ^ cst[sigma[i][e]])+v[b];   \
+	v[d] = ROT(v[d] ^ v[a], 8);                           \
+	v[c] += v[d];                                         \
+	v[b] = ROT(v[b] ^ v[c], 7);

-    for (i = 0; i < 16; ++i) m[i] = U8TO32(block + i * 4);
-    for (i = 0; i < 8;  ++i) v[i] = S->h[i];
-    v[ 8] = S->s[0] ^ 0x243F6A88;
-    v[ 9] = S->s[1] ^ 0x85A308D3;
-    v[10] = S->s[2] ^ 0x13198A2E;
-    v[11] = S->s[3] ^ 0x03707344;
-    v[12] = 0xA4093822;
-    v[13] = 0x299F31D0;
-    v[14] = 0x082EFA98;
-    v[15] = 0xEC4E6C89;
+	for(i = 0; i < 16; ++i)
+	{
+		m[i] = U8TO32(block + i * 4);
+	}
+	for(i = 0; i < 8;  ++i)
+	{
+		v[i] = S->h[i];
+	}
+	v[ 8] = S->s[0] ^ 0x243F6A88;
+	v[ 9] = S->s[1] ^ 0x85A308D3;
+	v[10] = S->s[2] ^ 0x13198A2E;
+	v[11] = S->s[3] ^ 0x03707344;
+	v[12] = 0xA4093822;
+	v[13] = 0x299F31D0;
+	v[14] = 0x082EFA98;
+	v[15] = 0xEC4E6C89;

-    if (S->nullt == 0) {
-        v[12] ^= S->t[0];
-        v[13] ^= S->t[0];
-        v[14] ^= S->t[1];
-        v[15] ^= S->t[1];
-    }
+	if(S->nullt == 0)
+	{
+		v[12] ^= S->t[0];
+		v[13] ^= S->t[0];
+		v[14] ^= S->t[1];
+		v[15] ^= S->t[1];
+	}

-    for (i = 0; i < 14; ++i) {
-        G(0, 4,  8, 12,  0);
-        G(1, 5,  9, 13,  2);
-        G(2, 6, 10, 14,  4);
-        G(3, 7, 11, 15,  6);
-        G(3, 4,  9, 14, 14);
-        G(2, 7,  8, 13, 12);
-        G(0, 5, 10, 15,  8);
-        G(1, 6, 11, 12, 10);
-    }
+	for(i = 0; i < 14; ++i)
+	{
+		G(0, 4,  8, 12,  0);
+		G(1, 5,  9, 13,  2);
+		G(2, 6, 10, 14,  4);
+		G(3, 7, 11, 15,  6);
+		G(3, 4,  9, 14, 14);
+		G(2, 7,  8, 13, 12);
+		G(0, 5, 10, 15,  8);
+		G(1, 6, 11, 12, 10);
+	}

-    for (i = 0; i < 16; ++i) S->h[i % 8] ^= v[i];
-    for (i = 0; i < 8;  ++i) S->h[i] ^= S->s[i % 4];
+	for(i = 0; i < 16; ++i)
+	{
+		S->h[i % 8] ^= v[i];
+	}
+	for(i = 0; i < 8;  ++i)
+	{
+		S->h[i] ^= S->s[i % 4];
+	}
 }

-void blake256_init(state *S) {
-    S->h[0] = 0x6A09E667;
-    S->h[1] = 0xBB67AE85;
-    S->h[2] = 0x3C6EF372;
-    S->h[3] = 0xA54FF53A;
-    S->h[4] = 0x510E527F;
-    S->h[5] = 0x9B05688C;
-    S->h[6] = 0x1F83D9AB;
-    S->h[7] = 0x5BE0CD19;
-    S->t[0] = S->t[1] = S->buflen = S->nullt = 0;
-    S->s[0] = S->s[1] = S->s[2] = S->s[3] = 0;
+void blake256_init(state* S)
+{
+	S->h[0] = 0x6A09E667;
+	S->h[1] = 0xBB67AE85;
+	S->h[2] = 0x3C6EF372;
+	S->h[3] = 0xA54FF53A;
+	S->h[4] = 0x510E527F;
+	S->h[5] = 0x9B05688C;
+	S->h[6] = 0x1F83D9AB;
+	S->h[7] = 0x5BE0CD19;
+	S->t[0] = S->t[1] = S->buflen = S->nullt = 0;
+	S->s[0] = S->s[1] = S->s[2] = S->s[3] = 0;
 }

-void blake224_init(state *S) {
-    S->h[0] = 0xC1059ED8;
-    S->h[1] = 0x367CD507;
-    S->h[2] = 0x3070DD17;
-    S->h[3] = 0xF70E5939;
-    S->h[4] = 0xFFC00B31;
-    S->h[5] = 0x68581511;
-    S->h[6] = 0x64F98FA7;
-    S->h[7] = 0xBEFA4FA4;
-    S->t[0] = S->t[1] = S->buflen = S->nullt = 0;
-    S->s[0] = S->s[1] = S->s[2] = S->s[3] = 0;
+void blake224_init(state* S)
+{
+	S->h[0] = 0xC1059ED8;
+	S->h[1] = 0x367CD507;
+	S->h[2] = 0x3070DD17;
+	S->h[3] = 0xF70E5939;
+	S->h[4] = 0xFFC00B31;
+	S->h[5] = 0x68581511;
+	S->h[6] = 0x64F98FA7;
+	S->h[7] = 0xBEFA4FA4;
+	S->t[0] = S->t[1] = S->buflen = S->nullt = 0;
+	S->s[0] = S->s[1] = S->s[2] = S->s[3] = 0;
 }

 // datalen = number of bits
-void blake256_update(state *S, const uint8_t *data, uint64_t datalen) {
-    int left = S->buflen >> 3;
-    int fill = 64 - left;
+void blake256_update(state* S, const uint8_t* data, uint64_t datalen)
+{
+	int left = S->buflen >> 3;
+	int fill = 64 - left;

-    if (left && (((datalen >> 3) & 0x3F) >= (unsigned) fill)) {
-        memcpy((void *) (S->buf + left), (void *) data, fill);
-        S->t[0] += 512;
-        if (S->t[0] == 0) S->t[1]++;
-        blake256_compress(S, S->buf);
-        data += fill;
-        datalen -= (fill << 3);
-        left = 0;
-    }
+	if(left && (((datalen >> 3) & 0x3F) >= (unsigned) fill))
+	{
+		memcpy((void*)(S->buf + left), (void*) data, fill);
+		S->t[0] += 512;
+		if(S->t[0] == 0)
+		{
+			S->t[1]++;
+		}
+		blake256_compress(S, S->buf);
+		data += fill;
+		datalen -= (fill << 3);
+		left = 0;
+	}

-    while (datalen >= 512) {
-        S->t[0] += 512;
-        if (S->t[0] == 0) S->t[1]++;
-        blake256_compress(S, data);
-        data += 64;
-        datalen -= 512;
-    }
+	while(datalen >= 512)
+	{
+		S->t[0] += 512;
+		if(S->t[0] == 0)
+		{
+			S->t[1]++;
+		}
+		blake256_compress(S, data);
+		data += 64;
+		datalen -= 512;
+	}

-    if (datalen > 0) {
-        memcpy((void *) (S->buf + left), (void *) data, datalen >> 3);
-        S->buflen = (left << 3) + (int) datalen;
-    } else {
-        S->buflen = 0;
-    }
+	if(datalen > 0)
+	{
+		memcpy((void*)(S->buf + left), (void*) data, datalen >> 3);
+		S->buflen = (left << 3) + (int) datalen;
+	}
+	else
+	{
+		S->buflen = 0;
+	}
 }

 // datalen = number of bits
-void blake224_update(state *S, const uint8_t *data, uint64_t datalen) {
-    blake256_update(S, data, datalen);
+void blake224_update(state* S, const uint8_t* data, uint64_t datalen)
+{
+	blake256_update(S, data, datalen);
 }

-void blake256_final_h(state *S, uint8_t *digest, uint8_t pa, uint8_t pb) {
-    uint8_t msglen[8];
-    uint32_t lo = S->t[0] + S->buflen, hi = S->t[1];
-    if (lo < (unsigned) S->buflen) hi++;
-    U32TO8(msglen + 0, hi);
-    U32TO8(msglen + 4, lo);
+void blake256_final_h(state* S, uint8_t* digest, uint8_t pa, uint8_t pb)
+{
+	uint8_t msglen[8];
+	uint32_t lo = S->t[0] + S->buflen, hi = S->t[1];
+	if(lo < (unsigned) S->buflen)
+	{
+		hi++;
+	}
+	U32TO8(msglen + 0, hi);
+	U32TO8(msglen + 4, lo);

-    if (S->buflen == 440) { /* one padding byte */
-        S->t[0] -= 8;
-        blake256_update(S, &pa, 8);
-    } else {
-        if (S->buflen < 440) { /* enough space to fill the block  */
-            if (S->buflen == 0) S->nullt = 1;
-            S->t[0] -= 440 - S->buflen;
-            blake256_update(S, padding, 440 - S->buflen);
-        } else { /* need 2 compressions */
-            S->t[0] -= 512 - S->buflen;
-            blake256_update(S, padding, 512 - S->buflen);
-            S->t[0] -= 440;
-            blake256_update(S, padding + 1, 440);
-            S->nullt = 1;
-        }
-        blake256_update(S, &pb, 8);
-        S->t[0] -= 8;
-    }
-    S->t[0] -= 64;
-    blake256_update(S, msglen, 64);
+	if(S->buflen == 440)    /* one padding byte */
+	{
+		S->t[0] -= 8;
+		blake256_update(S, &pa, 8);
+	}
+	else
+	{
+		if(S->buflen < 440)    /* enough space to fill the block  */
+		{
+			if(S->buflen == 0)
+			{
+				S->nullt = 1;
+			}
+			S->t[0] -= 440 - S->buflen;
+			blake256_update(S, padding, 440 - S->buflen);
+		}
+		else     /* need 2 compressions */
+		{
+			S->t[0] -= 512 - S->buflen;
+			blake256_update(S, padding, 512 - S->buflen);
+			S->t[0] -= 440;
+			blake256_update(S, padding + 1, 440);
+			S->nullt = 1;
+		}
+		blake256_update(S, &pb, 8);
+		S->t[0] -= 8;
+	}
+	S->t[0] -= 64;
+	blake256_update(S, msglen, 64);

-    U32TO8(digest +  0, S->h[0]);
-    U32TO8(digest +  4, S->h[1]);
-    U32TO8(digest +  8, S->h[2]);
-    U32TO8(digest + 12, S->h[3]);
-    U32TO8(digest + 16, S->h[4]);
-    U32TO8(digest + 20, S->h[5]);
-    U32TO8(digest + 24, S->h[6]);
-    U32TO8(digest + 28, S->h[7]);
+	U32TO8(digest +  0, S->h[0]);
+	U32TO8(digest +  4, S->h[1]);
+	U32TO8(digest +  8, S->h[2]);
+	U32TO8(digest + 12, S->h[3]);
+	U32TO8(digest + 16, S->h[4]);
+	U32TO8(digest + 20, S->h[5]);
+	U32TO8(digest + 24, S->h[6]);
+	U32TO8(digest + 28, S->h[7]);
 }

-void blake256_final(state *S, uint8_t *digest) {
-    blake256_final_h(S, digest, 0x81, 0x01);
+void blake256_final(state* S, uint8_t* digest)
+{
+	blake256_final_h(S, digest, 0x81, 0x01);
 }

-void blake224_final(state *S, uint8_t *digest) {
-    blake256_final_h(S, digest, 0x80, 0x00);
+void blake224_final(state* S, uint8_t* digest)
+{
+	blake256_final_h(S, digest, 0x80, 0x00);
 }

 // inlen = number of bytes
-void blake256_hash(uint8_t *out, const uint8_t *in, uint64_t inlen) {
-    state S;
-    blake256_init(&S);
-    blake256_update(&S, in, inlen * 8);
-    blake256_final(&S, out);
+void blake256_hash(uint8_t* out, const uint8_t* in, uint64_t inlen)
+{
+	state S;
+	blake256_init(&S);
+	blake256_update(&S, in, inlen * 8);
+	blake256_final(&S, out);
 }

 // inlen = number of bytes
-void blake224_hash(uint8_t *out, const uint8_t *in, uint64_t inlen) {
-    state S;
-    blake224_init(&S);
-    blake224_update(&S, in, inlen * 8);
-    blake224_final(&S, out);
+void blake224_hash(uint8_t* out, const uint8_t* in, uint64_t inlen)
+{
+	state S;
+	blake224_init(&S);
+	blake224_update(&S, in, inlen * 8);
+	blake224_final(&S, out);
 }

 // keylen = number of bytes
-void hmac_blake256_init(hmac_state *S, const uint8_t *_key, uint64_t keylen) {
-    const uint8_t *key = _key;
-    uint8_t keyhash[32];
-    uint8_t pad[64];
-    uint64_t i;
+void hmac_blake256_init(hmac_state* S, const uint8_t* _key, uint64_t keylen)
+{
+	const uint8_t* key = _key;
+	uint8_t keyhash[32];
+	uint8_t pad[64];
+	uint64_t i;

-    if (keylen > 64) {
-        blake256_hash(keyhash, key, keylen);
-        key = keyhash;
-        keylen = 32;
-    }
+	if(keylen > 64)
+	{
+		blake256_hash(keyhash, key, keylen);
+		key = keyhash;
+		keylen = 32;
+	}

-    blake256_init(&S->inner);
-    memset(pad, 0x36, 64);
-    for (i = 0; i < keylen; ++i) {
-        pad[i] ^= key[i];
-    }
-    blake256_update(&S->inner, pad, 512);
+	blake256_init(&S->inner);
+	memset(pad, 0x36, 64);
+	for(i = 0; i < keylen; ++i)
+	{
+		pad[i] ^= key[i];
+	}
+	blake256_update(&S->inner, pad, 512);

-    blake256_init(&S->outer);
-    memset(pad, 0x5c, 64);
-    for (i = 0; i < keylen; ++i) {
-        pad[i] ^= key[i];
-    }
-    blake256_update(&S->outer, pad, 512);
+	blake256_init(&S->outer);
+	memset(pad, 0x5c, 64);
+	for(i = 0; i < keylen; ++i)
+	{
+		pad[i] ^= key[i];
+	}
+	blake256_update(&S->outer, pad, 512);

-    memset(keyhash, 0, 32);
+	memset(keyhash, 0, 32);
 }

 // keylen = number of bytes
-void hmac_blake224_init(hmac_state *S, const uint8_t *_key, uint64_t keylen) {
-    const uint8_t *key = _key;
-    uint8_t keyhash[32];
-    uint8_t pad[64];
-    uint64_t i;
+void hmac_blake224_init(hmac_state* S, const uint8_t* _key, uint64_t keylen)
+{
+	const uint8_t* key = _key;
+	uint8_t keyhash[32];
+	uint8_t pad[64];
+	uint64_t i;

-    if (keylen > 64) {
-        blake256_hash(keyhash, key, keylen);
-        key = keyhash;
-        keylen = 28;
-    }
+	if(keylen > 64)
+	{
+		blake256_hash(keyhash, key, keylen);
+		key = keyhash;
+		keylen = 28;
+	}

-    blake224_init(&S->inner);
-    memset(pad, 0x36, 64);
-    for (i = 0; i < keylen; ++i) {
-        pad[i] ^= key[i];
-    }
-    blake224_update(&S->inner, pad, 512);
+	blake224_init(&S->inner);
+	memset(pad, 0x36, 64);
+	for(i = 0; i < keylen; ++i)
+	{
+		pad[i] ^= key[i];
+	}
+	blake224_update(&S->inner, pad, 512);

-    blake224_init(&S->outer);
-    memset(pad, 0x5c, 64);
-    for (i = 0; i < keylen; ++i) {
-        pad[i] ^= key[i];
-    }
-    blake224_update(&S->outer, pad, 512);
+	blake224_init(&S->outer);
+	memset(pad, 0x5c, 64);
+	for(i = 0; i < keylen; ++i)
+	{
+		pad[i] ^= key[i];
+	}
+	blake224_update(&S->outer, pad, 512);

-    memset(keyhash, 0, 32);
+	memset(keyhash, 0, 32);
 }

 // datalen = number of bits
-void hmac_blake256_update(hmac_state *S, const uint8_t *data, uint64_t datalen) {
-  // update the inner state
-  blake256_update(&S->inner, data, datalen);
+void hmac_blake256_update(hmac_state* S, const uint8_t* data, uint64_t datalen)
+{
+	// update the inner state
+	blake256_update(&S->inner, data, datalen);
 }

 // datalen = number of bits
-void hmac_blake224_update(hmac_state *S, const uint8_t *data, uint64_t datalen) {
-  // update the inner state
-  blake224_update(&S->inner, data, datalen);
+void hmac_blake224_update(hmac_state* S, const uint8_t* data, uint64_t datalen)
+{
+	// update the inner state
+	blake224_update(&S->inner, data, datalen);
 }

-void hmac_blake256_final(hmac_state *S, uint8_t *digest) {
-    uint8_t ihash[32];
-    blake256_final(&S->inner, ihash);
-    blake256_update(&S->outer, ihash, 256);
-    blake256_final(&S->outer, digest);
-    memset(ihash, 0, 32);
+void hmac_blake256_final(hmac_state* S, uint8_t* digest)
+{
+	uint8_t ihash[32];
+	blake256_final(&S->inner, ihash);
+	blake256_update(&S->outer, ihash, 256);
+	blake256_final(&S->outer, digest);
+	memset(ihash, 0, 32);
 }

-void hmac_blake224_final(hmac_state *S, uint8_t *digest) {
-    uint8_t ihash[32];
-    blake224_final(&S->inner, ihash);
-    blake224_update(&S->outer, ihash, 224);
-    blake224_final(&S->outer, digest);
-    memset(ihash, 0, 32);
+void hmac_blake224_final(hmac_state* S, uint8_t* digest)
+{
+	uint8_t ihash[32];
+	blake224_final(&S->inner, ihash);
+	blake224_update(&S->outer, ihash, 224);
+	blake224_final(&S->outer, digest);
+	memset(ihash, 0, 32);
 }

 // keylen = number of bytes; inlen = number of bytes
-void hmac_blake256_hash(uint8_t *out, const uint8_t *key, uint64_t keylen, const uint8_t *in, uint64_t inlen) {
-    hmac_state S;
-    hmac_blake256_init(&S, key, keylen);
-    hmac_blake256_update(&S, in, inlen * 8);
-    hmac_blake256_final(&S, out);
+void hmac_blake256_hash(uint8_t* out, const uint8_t* key, uint64_t keylen, const uint8_t* in, uint64_t inlen)
+{
+	hmac_state S;
+	hmac_blake256_init(&S, key, keylen);
+	hmac_blake256_update(&S, in, inlen * 8);
+	hmac_blake256_final(&S, out);
 }

 // keylen = number of bytes; inlen = number of bytes
-void hmac_blake224_hash(uint8_t *out, const uint8_t *key, uint64_t keylen, const uint8_t *in, uint64_t inlen) {
-    hmac_state S;
-    hmac_blake224_init(&S, key, keylen);
-    hmac_blake224_update(&S, in, inlen * 8);
-    hmac_blake224_final(&S, out);
+void hmac_blake224_hash(uint8_t* out, const uint8_t* key, uint64_t keylen, const uint8_t* in, uint64_t inlen)
+{
+	hmac_state S;
+	hmac_blake224_init(&S, key, keylen);
+	hmac_blake224_update(&S, in, inlen * 8);
+	hmac_blake224_final(&S, out);
 }
--- a/src/crypto/c_blake256.h
+++ b/src/crypto/c_blake256.h
@ -3,41 +3,43 @@

 #include <stdint.h>

-typedef struct {
-  uint32_t h[8], s[4], t[2];
-  int buflen, nullt;
-  uint8_t buf[64];
+typedef struct
+{
+	uint32_t h[8], s[4], t[2];
+	int buflen, nullt;
+	uint8_t buf[64];
 } state;

-typedef struct {
-  state inner;
-  state outer;
+typedef struct
+{
+	state inner;
+	state outer;
 } hmac_state;

-void blake256_init(state *);
-void blake224_init(state *);
+void blake256_init(state*);
+void blake224_init(state*);

-void blake256_update(state *, const uint8_t *, uint64_t);
-void blake224_update(state *, const uint8_t *, uint64_t);
+void blake256_update(state*, const uint8_t*, uint64_t);
+void blake224_update(state*, const uint8_t*, uint64_t);

-void blake256_final(state *, uint8_t *);
-void blake224_final(state *, uint8_t *);
+void blake256_final(state*, uint8_t*);
+void blake224_final(state*, uint8_t*);

-void blake256_hash(uint8_t *, const uint8_t *, uint64_t);
-void blake224_hash(uint8_t *, const uint8_t *, uint64_t);
+void blake256_hash(uint8_t*, const uint8_t*, uint64_t);
+void blake224_hash(uint8_t*, const uint8_t*, uint64_t);

 /* HMAC functions: */

-void hmac_blake256_init(hmac_state *, const uint8_t *, uint64_t);
-void hmac_blake224_init(hmac_state *, const uint8_t *, uint64_t);
+void hmac_blake256_init(hmac_state*, const uint8_t*, uint64_t);
+void hmac_blake224_init(hmac_state*, const uint8_t*, uint64_t);

-void hmac_blake256_update(hmac_state *, const uint8_t *, uint64_t);
-void hmac_blake224_update(hmac_state *, const uint8_t *, uint64_t);
+void hmac_blake256_update(hmac_state*, const uint8_t*, uint64_t);
+void hmac_blake224_update(hmac_state*, const uint8_t*, uint64_t);

-void hmac_blake256_final(hmac_state *, uint8_t *);
-void hmac_blake224_final(hmac_state *, uint8_t *);
+void hmac_blake256_final(hmac_state*, uint8_t*);
+void hmac_blake224_final(hmac_state*, uint8_t*);

-void hmac_blake256_hash(uint8_t *, const uint8_t *, uint64_t, const uint8_t *, uint64_t);
-void hmac_blake224_hash(uint8_t *, const uint8_t *, uint64_t, const uint8_t *, uint64_t);
+void hmac_blake256_hash(uint8_t*, const uint8_t*, uint64_t, const uint8_t*, uint64_t);
+void hmac_blake224_hash(uint8_t*, const uint8_t*, uint64_t, const uint8_t*, uint64_t);

 #endif /* _BLAKE256_H_ */
--- a/src/crypto/c_groestl.c
+++ b/src/crypto/c_groestl.c
@ -4,7 +4,7 @@
 *
 *  This work is based on the implementation of
 *          Soeren S. Thomsen and Krystian Matusiewicz
- *          
+ *
 *
 */

@ -14,178 +14,190 @@
 #define P_TYPE 0
 #define Q_TYPE 1

-const uint8_t shift_Values[2][8] = {{0,1,2,3,4,5,6,7},{1,3,5,7,0,2,4,6}};
+const uint8_t shift_Values[2][8] = {{0, 1, 2, 3, 4, 5, 6, 7}, {1, 3, 5, 7, 0, 2, 4, 6}};

-const uint8_t indices_cyclic[15] = {0,1,2,3,4,5,6,7,0,1,2,3,4,5,6};
+const uint8_t indices_cyclic[15] = {0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6};


 #define ROTATE_COLUMN_DOWN(v1, v2, amount_bytes, temp_var) {temp_var = (v1<<(8*amount_bytes))|(v2>>(8*(4-amount_bytes))); \
-															v2 = (v2<<(8*amount_bytes))|(v1>>(8*(4-amount_bytes))); \
-															v1 = temp_var;}
-  
+		v2 = (v2<<(8*amount_bytes))|(v1>>(8*(4-amount_bytes))); \
+		v1 = temp_var;}
+

 #define COLUMN(x,y,i,c0,c1,c2,c3,c4,c5,c6,c7,tv1,tv2,tu,tl,t)				\
-   tu = T[2*(uint32_t)x[4*c0+0]];			    \
-   tl = T[2*(uint32_t)x[4*c0+0]+1];		    \
-   tv1 = T[2*(uint32_t)x[4*c1+1]];			\
-   tv2 = T[2*(uint32_t)x[4*c1+1]+1];			\
-   ROTATE_COLUMN_DOWN(tv1,tv2,1,t)	\
-   tu ^= tv1;						\
-   tl ^= tv2;						\
-   tv1 = T[2*(uint32_t)x[4*c2+2]];			\
-   tv2 = T[2*(uint32_t)x[4*c2+2]+1];			\
-   ROTATE_COLUMN_DOWN(tv1,tv2,2,t)	\
-   tu ^= tv1;						\
-   tl ^= tv2;   					\
-   tv1 = T[2*(uint32_t)x[4*c3+3]];			\
-   tv2 = T[2*(uint32_t)x[4*c3+3]+1];			\
-   ROTATE_COLUMN_DOWN(tv1,tv2,3,t)	\
-   tu ^= tv1;						\
-   tl ^= tv2;						\
-   tl ^= T[2*(uint32_t)x[4*c4+0]];			\
-   tu ^= T[2*(uint32_t)x[4*c4+0]+1];			\
-   tv1 = T[2*(uint32_t)x[4*c5+1]];			\
-   tv2 = T[2*(uint32_t)x[4*c5+1]+1];			\
-   ROTATE_COLUMN_DOWN(tv1,tv2,1,t)	\
-   tl ^= tv1;						\
-   tu ^= tv2;						\
-   tv1 = T[2*(uint32_t)x[4*c6+2]];			\
-   tv2 = T[2*(uint32_t)x[4*c6+2]+1];			\
-   ROTATE_COLUMN_DOWN(tv1,tv2,2,t)	\
-   tl ^= tv1;						\
-   tu ^= tv2;   					\
-   tv1 = T[2*(uint32_t)x[4*c7+3]];			\
-   tv2 = T[2*(uint32_t)x[4*c7+3]+1];			\
-   ROTATE_COLUMN_DOWN(tv1,tv2,3,t)	\
-   tl ^= tv1;						\
-   tu ^= tv2;						\
-   y[i] = tu;						\
-   y[i+1] = tl;
+	tu = T[2*(uint32_t)x[4*c0+0]];			    \
+	tl = T[2*(uint32_t)x[4*c0+0]+1];		    \
+	tv1 = T[2*(uint32_t)x[4*c1+1]];			\
+	tv2 = T[2*(uint32_t)x[4*c1+1]+1];			\
+	ROTATE_COLUMN_DOWN(tv1,tv2,1,t)	\
+	tu ^= tv1;						\
+	tl ^= tv2;						\
+	tv1 = T[2*(uint32_t)x[4*c2+2]];			\
+	tv2 = T[2*(uint32_t)x[4*c2+2]+1];			\
+	ROTATE_COLUMN_DOWN(tv1,tv2,2,t)	\
+	tu ^= tv1;						\
+	tl ^= tv2;   					\
+	tv1 = T[2*(uint32_t)x[4*c3+3]];			\
+	tv2 = T[2*(uint32_t)x[4*c3+3]+1];			\
+	ROTATE_COLUMN_DOWN(tv1,tv2,3,t)	\
+	tu ^= tv1;						\
+	tl ^= tv2;						\
+	tl ^= T[2*(uint32_t)x[4*c4+0]];			\
+	tu ^= T[2*(uint32_t)x[4*c4+0]+1];			\
+	tv1 = T[2*(uint32_t)x[4*c5+1]];			\
+	tv2 = T[2*(uint32_t)x[4*c5+1]+1];			\
+	ROTATE_COLUMN_DOWN(tv1,tv2,1,t)	\
+	tl ^= tv1;						\
+	tu ^= tv2;						\
+	tv1 = T[2*(uint32_t)x[4*c6+2]];			\
+	tv2 = T[2*(uint32_t)x[4*c6+2]+1];			\
+	ROTATE_COLUMN_DOWN(tv1,tv2,2,t)	\
+	tl ^= tv1;						\
+	tu ^= tv2;   					\
+	tv1 = T[2*(uint32_t)x[4*c7+3]];			\
+	tv2 = T[2*(uint32_t)x[4*c7+3]+1];			\
+	ROTATE_COLUMN_DOWN(tv1,tv2,3,t)	\
+	tl ^= tv1;						\
+	tu ^= tv2;						\
+	y[i] = tu;						\
+	y[i+1] = tl;


 /* compute one round of P (short variants) */
-static void RND512P(uint8_t *x, uint32_t *y, uint32_t r) {
-  uint32_t temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp;
-  uint32_t* x32 = (uint32_t*)x;
-  x32[ 0] ^= 0x00000000^r;
-  x32[ 2] ^= 0x00000010^r;
-  x32[ 4] ^= 0x00000020^r;
-  x32[ 6] ^= 0x00000030^r;
-  x32[ 8] ^= 0x00000040^r;
-  x32[10] ^= 0x00000050^r;
-  x32[12] ^= 0x00000060^r;
-  x32[14] ^= 0x00000070^r;
-  COLUMN(x,y, 0,  0,  2,  4,  6,  9, 11, 13, 15, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
-  COLUMN(x,y, 2,  2,  4,  6,  8, 11, 13, 15,  1, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
-  COLUMN(x,y, 4,  4,  6,  8, 10, 13, 15,  1,  3, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
-  COLUMN(x,y, 6,  6,  8, 10, 12, 15,  1,  3,  5, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
-  COLUMN(x,y, 8,  8, 10, 12, 14,  1,  3,  5,  7, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
-  COLUMN(x,y,10, 10, 12, 14,  0,  3,  5,  7,  9, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
-  COLUMN(x,y,12, 12, 14,  0,  2,  5,  7,  9, 11, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
-  COLUMN(x,y,14, 14,  0,  2,  4,  7,  9, 11, 13, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
+static void RND512P(uint8_t* x, uint32_t* y, uint32_t r)
+{
+	uint32_t temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp;
+	uint32_t* x32 = (uint32_t*)x;
+	x32[ 0] ^= 0x00000000 ^ r;
+	x32[ 2] ^= 0x00000010 ^ r;
+	x32[ 4] ^= 0x00000020 ^ r;
+	x32[ 6] ^= 0x00000030 ^ r;
+	x32[ 8] ^= 0x00000040 ^ r;
+	x32[10] ^= 0x00000050 ^ r;
+	x32[12] ^= 0x00000060 ^ r;
+	x32[14] ^= 0x00000070 ^ r;
+	COLUMN(x, y, 0,  0,  2,  4,  6,  9, 11, 13, 15, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
+	COLUMN(x, y, 2,  2,  4,  6,  8, 11, 13, 15,  1, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
+	COLUMN(x, y, 4,  4,  6,  8, 10, 13, 15,  1,  3, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
+	COLUMN(x, y, 6,  6,  8, 10, 12, 15,  1,  3,  5, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
+	COLUMN(x, y, 8,  8, 10, 12, 14,  1,  3,  5,  7, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
+	COLUMN(x, y, 10, 10, 12, 14,  0,  3,  5,  7,  9, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
+	COLUMN(x, y, 12, 12, 14,  0,  2,  5,  7,  9, 11, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
+	COLUMN(x, y, 14, 14,  0,  2,  4,  7,  9, 11, 13, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
 }

 /* compute one round of Q (short variants) */
-static void RND512Q(uint8_t *x, uint32_t *y, uint32_t r) {
-  uint32_t temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp;
-  uint32_t* x32 = (uint32_t*)x;
-  x32[ 0] = ~x32[ 0];
-  x32[ 1] ^= 0xffffffff^r;
-  x32[ 2] = ~x32[ 2];
-  x32[ 3] ^= 0xefffffff^r;
-  x32[ 4] = ~x32[ 4];
-  x32[ 5] ^= 0xdfffffff^r;
-  x32[ 6] = ~x32[ 6];
-  x32[ 7] ^= 0xcfffffff^r;
-  x32[ 8] = ~x32[ 8];
-  x32[ 9] ^= 0xbfffffff^r;
-  x32[10] = ~x32[10];
-  x32[11] ^= 0xafffffff^r;
-  x32[12] = ~x32[12];
-  x32[13] ^= 0x9fffffff^r;
-  x32[14] = ~x32[14];
-  x32[15] ^= 0x8fffffff^r;
-  COLUMN(x,y, 0,  2,  6, 10, 14,  1,  5,  9, 13, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
-  COLUMN(x,y, 2,  4,  8, 12,  0,  3,  7, 11, 15, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
-  COLUMN(x,y, 4,  6, 10, 14,  2,  5,  9, 13,  1, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
-  COLUMN(x,y, 6,  8, 12,  0,  4,  7, 11, 15,  3, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
-  COLUMN(x,y, 8, 10, 14,  2,  6,  9, 13,  1,  5, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
-  COLUMN(x,y,10, 12,  0,  4,  8, 11, 15,  3,  7, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
-  COLUMN(x,y,12, 14,  2,  6, 10, 13,  1,  5,  9, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
-  COLUMN(x,y,14,  0,  4,  8, 12, 15,  3,  7, 11, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
+static void RND512Q(uint8_t* x, uint32_t* y, uint32_t r)
+{
+	uint32_t temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp;
+	uint32_t* x32 = (uint32_t*)x;
+	x32[ 0] = ~x32[ 0];
+	x32[ 1] ^= 0xffffffff ^ r;
+	x32[ 2] = ~x32[ 2];
+	x32[ 3] ^= 0xefffffff ^ r;
+	x32[ 4] = ~x32[ 4];
+	x32[ 5] ^= 0xdfffffff ^ r;
+	x32[ 6] = ~x32[ 6];
+	x32[ 7] ^= 0xcfffffff ^ r;
+	x32[ 8] = ~x32[ 8];
+	x32[ 9] ^= 0xbfffffff ^ r;
+	x32[10] = ~x32[10];
+	x32[11] ^= 0xafffffff ^ r;
+	x32[12] = ~x32[12];
+	x32[13] ^= 0x9fffffff ^ r;
+	x32[14] = ~x32[14];
+	x32[15] ^= 0x8fffffff ^ r;
+	COLUMN(x, y, 0,  2,  6, 10, 14,  1,  5,  9, 13, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
+	COLUMN(x, y, 2,  4,  8, 12,  0,  3,  7, 11, 15, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
+	COLUMN(x, y, 4,  6, 10, 14,  2,  5,  9, 13,  1, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
+	COLUMN(x, y, 6,  8, 12,  0,  4,  7, 11, 15,  3, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
+	COLUMN(x, y, 8, 10, 14,  2,  6,  9, 13,  1,  5, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
+	COLUMN(x, y, 10, 12,  0,  4,  8, 11, 15,  3,  7, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
+	COLUMN(x, y, 12, 14,  2,  6, 10, 13,  1,  5,  9, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
+	COLUMN(x, y, 14,  0,  4,  8, 12, 15,  3,  7, 11, temp_v1, temp_v2, temp_upper_value, temp_lower_value, temp);
 }

 /* compute compression function (short variants) */
-static void F512(uint32_t *h, const uint32_t *m) {
-  int i;
-  uint32_t Ptmp[2*COLS512];
-  uint32_t Qtmp[2*COLS512];
-  uint32_t y[2*COLS512];
-  uint32_t z[2*COLS512];
+static void F512(uint32_t* h, const uint32_t* m)
+{
+	int i;
+	uint32_t Ptmp[2 * COLS512];
+	uint32_t Qtmp[2 * COLS512];
+	uint32_t y[2 * COLS512];
+	uint32_t z[2 * COLS512];

-  for (i = 0; i < 2*COLS512; i++) {
-    z[i] = m[i];
-    Ptmp[i] = h[i]^m[i];
-  }
+	for(i = 0; i < 2 * COLS512; i++)
+	{
+		z[i] = m[i];
+		Ptmp[i] = h[i] ^ m[i];
+	}

-  /* compute Q(m) */
-  RND512Q((uint8_t*)z, y, 0x00000000);
-  RND512Q((uint8_t*)y, z, 0x01000000);
-  RND512Q((uint8_t*)z, y, 0x02000000);
-  RND512Q((uint8_t*)y, z, 0x03000000);
-  RND512Q((uint8_t*)z, y, 0x04000000);
-  RND512Q((uint8_t*)y, z, 0x05000000);
-  RND512Q((uint8_t*)z, y, 0x06000000);
-  RND512Q((uint8_t*)y, z, 0x07000000);
-  RND512Q((uint8_t*)z, y, 0x08000000);
-  RND512Q((uint8_t*)y, Qtmp, 0x09000000);
+	/* compute Q(m) */
+	RND512Q((uint8_t*)z, y, 0x00000000);
+	RND512Q((uint8_t*)y, z, 0x01000000);
+	RND512Q((uint8_t*)z, y, 0x02000000);
+	RND512Q((uint8_t*)y, z, 0x03000000);
+	RND512Q((uint8_t*)z, y, 0x04000000);
+	RND512Q((uint8_t*)y, z, 0x05000000);
+	RND512Q((uint8_t*)z, y, 0x06000000);
+	RND512Q((uint8_t*)y, z, 0x07000000);
+	RND512Q((uint8_t*)z, y, 0x08000000);
+	RND512Q((uint8_t*)y, Qtmp, 0x09000000);

-  /* compute P(h+m) */
-  RND512P((uint8_t*)Ptmp, y, 0x00000000);
-  RND512P((uint8_t*)y, z, 0x00000001);
-  RND512P((uint8_t*)z, y, 0x00000002);
-  RND512P((uint8_t*)y, z, 0x00000003);
-  RND512P((uint8_t*)z, y, 0x00000004);
-  RND512P((uint8_t*)y, z, 0x00000005);
-  RND512P((uint8_t*)z, y, 0x00000006);
-  RND512P((uint8_t*)y, z, 0x00000007);
-  RND512P((uint8_t*)z, y, 0x00000008);
-  RND512P((uint8_t*)y, Ptmp, 0x00000009);
+	/* compute P(h+m) */
+	RND512P((uint8_t*)Ptmp, y, 0x00000000);
+	RND512P((uint8_t*)y, z, 0x00000001);
+	RND512P((uint8_t*)z, y, 0x00000002);
+	RND512P((uint8_t*)y, z, 0x00000003);
+	RND512P((uint8_t*)z, y, 0x00000004);
+	RND512P((uint8_t*)y, z, 0x00000005);
+	RND512P((uint8_t*)z, y, 0x00000006);
+	RND512P((uint8_t*)y, z, 0x00000007);
+	RND512P((uint8_t*)z, y, 0x00000008);
+	RND512P((uint8_t*)y, Ptmp, 0x00000009);

-  /* compute P(h+m) + Q(m) + h */
-  for (i = 0; i < 2*COLS512; i++) {
-    h[i] ^= Ptmp[i]^Qtmp[i];
-  }
+	/* compute P(h+m) + Q(m) + h */
+	for(i = 0; i < 2 * COLS512; i++)
+	{
+		h[i] ^= Ptmp[i] ^ Qtmp[i];
+	}
 }


 /* digest up to msglen bytes of input (full blocks only) */
-static void Transform(groestlHashState *ctx,
-	       const uint8_t *input, 
-	       int msglen) {
+static void Transform(groestlHashState* ctx,
+                      const uint8_t* input,
+                      int msglen)
+{

-  /* digest message, one block at a time */
-  for (; msglen >= SIZE512; 
-       msglen -= SIZE512, input += SIZE512) {
-    F512(ctx->chaining,(uint32_t*)input);
+	/* digest message, one block at a time */
+	for(; msglen >= SIZE512;
+	        msglen -= SIZE512, input += SIZE512)
+	{
+		F512(ctx->chaining, (uint32_t*)input);

-    /* increment block counter */
-    ctx->block_counter1++;
-    if (ctx->block_counter1 == 0) ctx->block_counter2++;
-  }
+		/* increment block counter */
+		ctx->block_counter1++;
+		if(ctx->block_counter1 == 0)
+		{
+			ctx->block_counter2++;
+		}
+	}
 }

 /* given state h, do h <- P(h)+h */
-static void OutputTransformation(groestlHashState *ctx) {
-  int j;
-  uint32_t temp[2*COLS512];
-  uint32_t y[2*COLS512];
-  uint32_t z[2*COLS512];
+static void OutputTransformation(groestlHashState* ctx)
+{
+	int j;
+	uint32_t temp[2 * COLS512];
+	uint32_t y[2 * COLS512];
+	uint32_t z[2 * COLS512];



-	for (j = 0; j < 2*COLS512; j++) {
-	  temp[j] = ctx->chaining[j];
+	for(j = 0; j < 2 * COLS512; j++)
+	{
+		temp[j] = ctx->chaining[j];
 	}
 	RND512P((uint8_t*)temp, y, 0x00000000);
 	RND512P((uint8_t*)y, z, 0x00000001);
@ -197,75 +209,84 @@ static void OutputTransformation(groestlHashState *ctx) {
 	RND512P((uint8_t*)y, z, 0x00000007);
 	RND512P((uint8_t*)z, y, 0x00000008);
 	RND512P((uint8_t*)y, temp, 0x00000009);
-	for (j = 0; j < 2*COLS512; j++) {
-	  ctx->chaining[j] ^= temp[j];
-	}									  
+	for(j = 0; j < 2 * COLS512; j++)
+	{
+		ctx->chaining[j] ^= temp[j];
+	}
 }

 /* initialise context */
-static void Init(groestlHashState* ctx) {
-  int i = 0;
-  /* allocate memory for state and data buffer */
+static void Init(groestlHashState* ctx)
+{
+	int i = 0;
+	/* allocate memory for state and data buffer */

-  for(;i<(SIZE512/sizeof(uint32_t));i++)
-  {
-	ctx->chaining[i] = 0;
-  }
+	for(; i < (SIZE512 / sizeof(uint32_t)); i++)
+	{
+		ctx->chaining[i] = 0;
+	}

-  /* set initial value */
-  ctx->chaining[2*COLS512-1] = u32BIG((uint32_t)HASH_BIT_LEN);
+	/* set initial value */
+	ctx->chaining[2 * COLS512 - 1] = u32BIG((uint32_t)HASH_BIT_LEN);

-  /* set other variables */
-  ctx->buf_ptr = 0;
-  ctx->block_counter1 = 0;
-  ctx->block_counter2 = 0;
-  ctx->bits_in_last_byte = 0;
+	/* set other variables */
+	ctx->buf_ptr = 0;
+	ctx->block_counter1 = 0;
+	ctx->block_counter2 = 0;
+	ctx->bits_in_last_byte = 0;
 }

 /* update state with databitlen bits of input */
 static void Update(groestlHashState* ctx,
-		  const BitSequence* input,
-		  DataLength databitlen) {
-  int index = 0;
-  int msglen = (int)(databitlen/8);
-  int rem = (int)(databitlen%8);
+                   const BitSequence* input,
+                   DataLength databitlen)
+{
+	int index = 0;
+	int msglen = (int)(databitlen / 8);
+	int rem = (int)(databitlen % 8);

-  /* if the buffer contains data that has not yet been digested, first
-     add data to buffer until full */
-  if (ctx->buf_ptr) {
-    while (ctx->buf_ptr < SIZE512 && index < msglen) {
-      ctx->buffer[(int)ctx->buf_ptr++] = input[index++];
-    }
-    if (ctx->buf_ptr < SIZE512) {
-      /* buffer still not full, return */
-      if (rem) {
-	ctx->bits_in_last_byte = rem;
-	ctx->buffer[(int)ctx->buf_ptr++] = input[index];
-      }
-      return;
-    }
+	/* if the buffer contains data that has not yet been digested, first
+	   add data to buffer until full */
+	if(ctx->buf_ptr)
+	{
+		while(ctx->buf_ptr < SIZE512 && index < msglen)
+		{
+			ctx->buffer[(int)ctx->buf_ptr++] = input[index++];
+		}
+		if(ctx->buf_ptr < SIZE512)
+		{
+			/* buffer still not full, return */
+			if(rem)
+			{
+				ctx->bits_in_last_byte = rem;
+				ctx->buffer[(int)ctx->buf_ptr++] = input[index];
+			}
+			return;
+		}

-    /* digest buffer */
-    ctx->buf_ptr = 0;
-    Transform(ctx, ctx->buffer, SIZE512);
-  }
+		/* digest buffer */
+		ctx->buf_ptr = 0;
+		Transform(ctx, ctx->buffer, SIZE512);
+	}

-  /* digest bulk of message */
-  Transform(ctx, input+index, msglen-index);
-  index += ((msglen-index)/SIZE512)*SIZE512;
+	/* digest bulk of message */
+	Transform(ctx, input + index, msglen - index);
+	index += ((msglen - index) / SIZE512) * SIZE512;

-  /* store remaining data in buffer */
-  while (index < msglen) {
-    ctx->buffer[(int)ctx->buf_ptr++] = input[index++];
-  }
+	/* store remaining data in buffer */
+	while(index < msglen)
+	{
+		ctx->buffer[(int)ctx->buf_ptr++] = input[index++];
+	}

-  /* if non-integral number of bytes have been supplied, store
-     remaining bits in last byte, together with information about
-     number of bits */
-  if (rem) {
-    ctx->bits_in_last_byte = rem;
-    ctx->buffer[(int)ctx->buf_ptr++] = input[index];
-  }
+	/* if non-integral number of bytes have been supplied, store
+	   remaining bits in last byte, together with information about
+	   number of bits */
+	if(rem)
+	{
+		ctx->bits_in_last_byte = rem;
+		ctx->buffer[(int)ctx->buf_ptr++] = input[index];
+	}
 }

 #define BILB ctx->bits_in_last_byte
@ -273,80 +294,97 @@ static void Update(groestlHashState* ctx,
 /* finalise: process remaining data (including padding), perform
   output transformation, and write hash result to 'output' */
 static void Final(groestlHashState* ctx,
-		 BitSequence* output) {
-  int i, j = 0, hashbytelen = HASH_BIT_LEN/8;
-  uint8_t *s = (BitSequence*)ctx->chaining;
+                  BitSequence* output)
+{
+	int i, j = 0, hashbytelen = HASH_BIT_LEN / 8;
+	uint8_t* s = (BitSequence*)ctx->chaining;

-  /* pad with '1'-bit and first few '0'-bits */
-  if (BILB) {
-    ctx->buffer[(int)ctx->buf_ptr-1] &= ((1<<BILB)-1)<<(8-BILB);
-    ctx->buffer[(int)ctx->buf_ptr-1] ^= 0x1<<(7-BILB);
-    BILB = 0;
-  }
-  else ctx->buffer[(int)ctx->buf_ptr++] = 0x80;
+	/* pad with '1'-bit and first few '0'-bits */
+	if(BILB)
+	{
+		ctx->buffer[(int)ctx->buf_ptr - 1] &= ((1 << BILB) - 1) << (8 - BILB);
+		ctx->buffer[(int)ctx->buf_ptr - 1] ^= 0x1 << (7 - BILB);
+		BILB = 0;
+	}
+	else
+	{
+		ctx->buffer[(int)ctx->buf_ptr++] = 0x80;
+	}

-  /* pad with '0'-bits */
-  if (ctx->buf_ptr > SIZE512-LENGTHFIELDLEN) {
-    /* padding requires two blocks */
-    while (ctx->buf_ptr < SIZE512) {
-      ctx->buffer[(int)ctx->buf_ptr++] = 0;
-    }
-    /* digest first padding block */
-    Transform(ctx, ctx->buffer, SIZE512);
-    ctx->buf_ptr = 0;
-  }
-  while (ctx->buf_ptr < SIZE512-LENGTHFIELDLEN) {
-    ctx->buffer[(int)ctx->buf_ptr++] = 0;
-  }
+	/* pad with '0'-bits */
+	if(ctx->buf_ptr > SIZE512 - LENGTHFIELDLEN)
+	{
+		/* padding requires two blocks */
+		while(ctx->buf_ptr < SIZE512)
+		{
+			ctx->buffer[(int)ctx->buf_ptr++] = 0;
+		}
+		/* digest first padding block */
+		Transform(ctx, ctx->buffer, SIZE512);
+		ctx->buf_ptr = 0;
+	}
+	while(ctx->buf_ptr < SIZE512 - LENGTHFIELDLEN)
+	{
+		ctx->buffer[(int)ctx->buf_ptr++] = 0;
+	}

-  /* length padding */
-  ctx->block_counter1++;
-  if (ctx->block_counter1 == 0) ctx->block_counter2++;
-  ctx->buf_ptr = SIZE512;
+	/* length padding */
+	ctx->block_counter1++;
+	if(ctx->block_counter1 == 0)
+	{
+		ctx->block_counter2++;
+	}
+	ctx->buf_ptr = SIZE512;

-  while (ctx->buf_ptr > SIZE512-(int)sizeof(uint32_t)) {
-    ctx->buffer[(int)--ctx->buf_ptr] = (uint8_t)ctx->block_counter1;
-    ctx->block_counter1 >>= 8;
-  }
-  while (ctx->buf_ptr > SIZE512-LENGTHFIELDLEN) {
-    ctx->buffer[(int)--ctx->buf_ptr] = (uint8_t)ctx->block_counter2;
-    ctx->block_counter2 >>= 8;
-  }
-  /* digest final padding block */
-  Transform(ctx, ctx->buffer, SIZE512); 
-  /* perform output transformation */
-  OutputTransformation(ctx);
+	while(ctx->buf_ptr > SIZE512 - (int)sizeof(uint32_t))
+	{
+		ctx->buffer[(int)--ctx->buf_ptr] = (uint8_t)ctx->block_counter1;
+		ctx->block_counter1 >>= 8;
+	}
+	while(ctx->buf_ptr > SIZE512 - LENGTHFIELDLEN)
+	{
+		ctx->buffer[(int)--ctx->buf_ptr] = (uint8_t)ctx->block_counter2;
+		ctx->block_counter2 >>= 8;
+	}
+	/* digest final padding block */
+	Transform(ctx, ctx->buffer, SIZE512);
+	/* perform output transformation */
+	OutputTransformation(ctx);

-  /* store hash result in output */
-  for (i = SIZE512-hashbytelen; i < SIZE512; i++,j++) {
-    output[j] = s[i];
-  }
+	/* store hash result in output */
+	for(i = SIZE512 - hashbytelen; i < SIZE512; i++, j++)
+	{
+		output[j] = s[i];
+	}

-  /* zeroise relevant variables and deallocate memory */
-  for (i = 0; i < COLS512; i++) {
-    ctx->chaining[i] = 0;
-  }
-  for (i = 0; i < SIZE512; i++) {
-    ctx->buffer[i] = 0;
-  }
+	/* zeroise relevant variables and deallocate memory */
+	for(i = 0; i < COLS512; i++)
+	{
+		ctx->chaining[i] = 0;
+	}
+	for(i = 0; i < SIZE512; i++)
+	{
+		ctx->buffer[i] = 0;
+	}
 }

 /* hash bit sequence */
-void groestl(const BitSequence* data, 
-		DataLength databitlen,
-		BitSequence* hashval) {
+void groestl(const BitSequence* data,
+             DataLength databitlen,
+             BitSequence* hashval)
+{

-  groestlHashState context;
+	groestlHashState context;

-  /* initialise */
-    Init(&context);
+	/* initialise */
+	Init(&context);


-  /* process message */
-  Update(&context, data, databitlen);
+	/* process message */
+	Update(&context, data, databitlen);

-  /* finalise */
-  Final(&context, hashval);
+	/* finalise */
+	Final(&context, hashval);
 }
 /*
 static int crypto_hash(unsigned char *out,
--- a/src/crypto/c_groestl.h
+++ b/src/crypto/c_groestl.h
@ -4,10 +4,10 @@
 #include "crypto_uint8.h"
 #include "crypto_uint32.h"
 #include "crypto_uint64.h"
-#include "crypto_hash.h" 
+#include "crypto_hash.h"

-typedef crypto_uint8 uint8_t; 
-typedef crypto_uint32 uint32_t; 
+typedef crypto_uint8 uint8_t;
+typedef crypto_uint32 uint32_t;
 typedef crypto_uint64 uint64_t;
 */
 #include <stdint.h>
@ -30,18 +30,19 @@ typedef crypto_uint64 uint64_t;
 #define li_32(h) 0x##h##u
 #define EXT_BYTE(var,n) ((uint8_t)((uint32_t)(var) >> (8*n)))
 #define u32BIG(a)				\
-  ((ROTL32(a,8) & li_32(00FF00FF)) |		\
-   (ROTL32(a,24) & li_32(FF00FF00)))
+	((ROTL32(a,8) & li_32(00FF00FF)) |		\
+	 (ROTL32(a,24) & li_32(FF00FF00)))


 /* NIST API begin */
-typedef struct {
-  uint32_t chaining[SIZE512/sizeof(uint32_t)];            /* actual state */
-  uint32_t block_counter1,
-  block_counter2;         /* message block counter(s) */
-  BitSequence buffer[SIZE512];      /* data buffer */
-  int buf_ptr;              /* data buffer pointer */
-  int bits_in_last_byte;    /* no. of message bits in last byte of
+typedef struct
+{
+	uint32_t chaining[SIZE512 / sizeof(uint32_t)];          /* actual state */
+	uint32_t block_counter1,
+	         block_counter2;         /* message block counter(s) */
+	BitSequence buffer[SIZE512];      /* data buffer */
+	int buf_ptr;              /* data buffer pointer */
+	int bits_in_last_byte;    /* no. of message bits in last byte of
 			       data buffer */
 } groestlHashState;

--- a/src/crypto/c_jh.c
+++ b/src/crypto/c_jh.c
@ -23,81 +23,86 @@ typedef uint64_t uint64;

 /*define data alignment for different C compilers*/
 #if defined(__GNUC__)
-      #define DATA_ALIGN16(x) x __attribute__ ((aligned(16)))
+#define DATA_ALIGN16(x) x __attribute__ ((aligned(16)))
 #else
-      #define DATA_ALIGN16(x) __declspec(align(16)) x
+#define DATA_ALIGN16(x) __declspec(align(16)) x
 #endif


-typedef struct {
+typedef struct
+{
 	int hashbitlen;	   	              /*the message digest size*/
 	unsigned long long databitlen;    /*the message size in bits*/
-	unsigned long long datasize_in_buffer;      /*the size of the message remained in buffer; assumed to be multiple of 8bits except for the last partial block at the end of the message*/
-	DATA_ALIGN16(uint64 x[8][2]);     /*the 1024-bit state, ( x[i][0] || x[i][1] ) is the ith row of the state in the pseudocode*/
+	unsigned long long
+	datasize_in_buffer;      /*the size of the message remained in buffer; assumed to be multiple of 8bits except for the last partial block at the end of the message*/
+	DATA_ALIGN16(uint64
+	             x[8][2]);     /*the 1024-bit state, ( x[i][0] || x[i][1] ) is the ith row of the state in the pseudocode*/
 	unsigned char buffer[64];         /*the 512-bit message block to be hashed;*/
 } hashState;


 /*The initial hash value H(0)*/
-const unsigned char JH224_H0[128]={0x2d,0xfe,0xdd,0x62,0xf9,0x9a,0x98,0xac,0xae,0x7c,0xac,0xd6,0x19,0xd6,0x34,0xe7,0xa4,0x83,0x10,0x5,0xbc,0x30,0x12,0x16,0xb8,0x60,0x38,0xc6,0xc9,0x66,0x14,0x94,0x66,0xd9,0x89,0x9f,0x25,0x80,0x70,0x6f,0xce,0x9e,0xa3,0x1b,0x1d,0x9b,0x1a,0xdc,0x11,0xe8,0x32,0x5f,0x7b,0x36,0x6e,0x10,0xf9,0x94,0x85,0x7f,0x2,0xfa,0x6,0xc1,0x1b,0x4f,0x1b,0x5c,0xd8,0xc8,0x40,0xb3,0x97,0xf6,0xa1,0x7f,0x6e,0x73,0x80,0x99,0xdc,0xdf,0x93,0xa5,0xad,0xea,0xa3,0xd3,0xa4,0x31,0xe8,0xde,0xc9,0x53,0x9a,0x68,0x22,0xb4,0xa9,0x8a,0xec,0x86,0xa1,0xe4,0xd5,0x74,0xac,0x95,0x9c,0xe5,0x6c,0xf0,0x15,0x96,0xd,0xea,0xb5,0xab,0x2b,0xbf,0x96,0x11,0xdc,0xf0,0xdd,0x64,0xea,0x6e};
-const unsigned char JH256_H0[128]={0xeb,0x98,0xa3,0x41,0x2c,0x20,0xd3,0xeb,0x92,0xcd,0xbe,0x7b,0x9c,0xb2,0x45,0xc1,0x1c,0x93,0x51,0x91,0x60,0xd4,0xc7,0xfa,0x26,0x0,0x82,0xd6,0x7e,0x50,0x8a,0x3,0xa4,0x23,0x9e,0x26,0x77,0x26,0xb9,0x45,0xe0,0xfb,0x1a,0x48,0xd4,0x1a,0x94,0x77,0xcd,0xb5,0xab,0x26,0x2,0x6b,0x17,0x7a,0x56,0xf0,0x24,0x42,0xf,0xff,0x2f,0xa8,0x71,0xa3,0x96,0x89,0x7f,0x2e,0x4d,0x75,0x1d,0x14,0x49,0x8,0xf7,0x7d,0xe2,0x62,0x27,0x76,0x95,0xf7,0x76,0x24,0x8f,0x94,0x87,0xd5,0xb6,0x57,0x47,0x80,0x29,0x6c,0x5c,0x5e,0x27,0x2d,0xac,0x8e,0xd,0x6c,0x51,0x84,0x50,0xc6,0x57,0x5,0x7a,0xf,0x7b,0xe4,0xd3,0x67,0x70,0x24,0x12,0xea,0x89,0xe3,0xab,0x13,0xd3,0x1c,0xd7,0x69};
-const unsigned char JH384_H0[128]={0x48,0x1e,0x3b,0xc6,0xd8,0x13,0x39,0x8a,0x6d,0x3b,0x5e,0x89,0x4a,0xde,0x87,0x9b,0x63,0xfa,0xea,0x68,0xd4,0x80,0xad,0x2e,0x33,0x2c,0xcb,0x21,0x48,0xf,0x82,0x67,0x98,0xae,0xc8,0x4d,0x90,0x82,0xb9,0x28,0xd4,0x55,0xea,0x30,0x41,0x11,0x42,0x49,0x36,0xf5,0x55,0xb2,0x92,0x48,0x47,0xec,0xc7,0x25,0xa,0x93,0xba,0xf4,0x3c,0xe1,0x56,0x9b,0x7f,0x8a,0x27,0xdb,0x45,0x4c,0x9e,0xfc,0xbd,0x49,0x63,0x97,0xaf,0xe,0x58,0x9f,0xc2,0x7d,0x26,0xaa,0x80,0xcd,0x80,0xc0,0x8b,0x8c,0x9d,0xeb,0x2e,0xda,0x8a,0x79,0x81,0xe8,0xf8,0xd5,0x37,0x3a,0xf4,0x39,0x67,0xad,0xdd,0xd1,0x7a,0x71,0xa9,0xb4,0xd3,0xbd,0xa4,0x75,0xd3,0x94,0x97,0x6c,0x3f,0xba,0x98,0x42,0x73,0x7f};
-const unsigned char JH512_H0[128]={0x6f,0xd1,0x4b,0x96,0x3e,0x0,0xaa,0x17,0x63,0x6a,0x2e,0x5,0x7a,0x15,0xd5,0x43,0x8a,0x22,0x5e,0x8d,0xc,0x97,0xef,0xb,0xe9,0x34,0x12,0x59,0xf2,0xb3,0xc3,0x61,0x89,0x1d,0xa0,0xc1,0x53,0x6f,0x80,0x1e,0x2a,0xa9,0x5,0x6b,0xea,0x2b,0x6d,0x80,0x58,0x8e,0xcc,0xdb,0x20,0x75,0xba,0xa6,0xa9,0xf,0x3a,0x76,0xba,0xf8,0x3b,0xf7,0x1,0x69,0xe6,0x5,0x41,0xe3,0x4a,0x69,0x46,0xb5,0x8a,0x8e,0x2e,0x6f,0xe6,0x5a,0x10,0x47,0xa7,0xd0,0xc1,0x84,0x3c,0x24,0x3b,0x6e,0x71,0xb1,0x2d,0x5a,0xc1,0x99,0xcf,0x57,0xf6,0xec,0x9d,0xb1,0xf8,0x56,0xa7,0x6,0x88,0x7c,0x57,0x16,0xb1,0x56,0xe3,0xc2,0xfc,0xdf,0xe6,0x85,0x17,0xfb,0x54,0x5a,0x46,0x78,0xcc,0x8c,0xdd,0x4b};
+const unsigned char JH224_H0[128] = {0x2d, 0xfe, 0xdd, 0x62, 0xf9, 0x9a, 0x98, 0xac, 0xae, 0x7c, 0xac, 0xd6, 0x19, 0xd6, 0x34, 0xe7, 0xa4, 0x83, 0x10, 0x5, 0xbc, 0x30, 0x12, 0x16, 0xb8, 0x60, 0x38, 0xc6, 0xc9, 0x66, 0x14, 0x94, 0x66, 0xd9, 0x89, 0x9f, 0x25, 0x80, 0x70, 0x6f, 0xce, 0x9e, 0xa3, 0x1b, 0x1d, 0x9b, 0x1a, 0xdc, 0x11, 0xe8, 0x32, 0x5f, 0x7b, 0x36, 0x6e, 0x10, 0xf9, 0x94, 0x85, 0x7f, 0x2, 0xfa, 0x6, 0xc1, 0x1b, 0x4f, 0x1b, 0x5c, 0xd8, 0xc8, 0x40, 0xb3, 0x97, 0xf6, 0xa1, 0x7f, 0x6e, 0x73, 0x80, 0x99, 0xdc, 0xdf, 0x93, 0xa5, 0xad, 0xea, 0xa3, 0xd3, 0xa4, 0x31, 0xe8, 0xde, 0xc9, 0x53, 0x9a, 0x68, 0x22, 0xb4, 0xa9, 0x8a, 0xec, 0x86, 0xa1, 0xe4, 0xd5, 0x74, 0xac, 0x95, 0x9c, 0xe5, 0x6c, 0xf0, 0x15, 0x96, 0xd, 0xea, 0xb5, 0xab, 0x2b, 0xbf, 0x96, 0x11, 0xdc, 0xf0, 0xdd, 0x64, 0xea, 0x6e};
+const unsigned char JH256_H0[128] = {0xeb, 0x98, 0xa3, 0x41, 0x2c, 0x20, 0xd3, 0xeb, 0x92, 0xcd, 0xbe, 0x7b, 0x9c, 0xb2, 0x45, 0xc1, 0x1c, 0x93, 0x51, 0x91, 0x60, 0xd4, 0xc7, 0xfa, 0x26, 0x0, 0x82, 0xd6, 0x7e, 0x50, 0x8a, 0x3, 0xa4, 0x23, 0x9e, 0x26, 0x77, 0x26, 0xb9, 0x45, 0xe0, 0xfb, 0x1a, 0x48, 0xd4, 0x1a, 0x94, 0x77, 0xcd, 0xb5, 0xab, 0x26, 0x2, 0x6b, 0x17, 0x7a, 0x56, 0xf0, 0x24, 0x42, 0xf, 0xff, 0x2f, 0xa8, 0x71, 0xa3, 0x96, 0x89, 0x7f, 0x2e, 0x4d, 0x75, 0x1d, 0x14, 0x49, 0x8, 0xf7, 0x7d, 0xe2, 0x62, 0x27, 0x76, 0x95, 0xf7, 0x76, 0x24, 0x8f, 0x94, 0x87, 0xd5, 0xb6, 0x57, 0x47, 0x80, 0x29, 0x6c, 0x5c, 0x5e, 0x27, 0x2d, 0xac, 0x8e, 0xd, 0x6c, 0x51, 0x84, 0x50, 0xc6, 0x57, 0x5, 0x7a, 0xf, 0x7b, 0xe4, 0xd3, 0x67, 0x70, 0x24, 0x12, 0xea, 0x89, 0xe3, 0xab, 0x13, 0xd3, 0x1c, 0xd7, 0x69};
+const unsigned char JH384_H0[128] = {0x48, 0x1e, 0x3b, 0xc6, 0xd8, 0x13, 0x39, 0x8a, 0x6d, 0x3b, 0x5e, 0x89, 0x4a, 0xde, 0x87, 0x9b, 0x63, 0xfa, 0xea, 0x68, 0xd4, 0x80, 0xad, 0x2e, 0x33, 0x2c, 0xcb, 0x21, 0x48, 0xf, 0x82, 0x67, 0x98, 0xae, 0xc8, 0x4d, 0x90, 0x82, 0xb9, 0x28, 0xd4, 0x55, 0xea, 0x30, 0x41, 0x11, 0x42, 0x49, 0x36, 0xf5, 0x55, 0xb2, 0x92, 0x48, 0x47, 0xec, 0xc7, 0x25, 0xa, 0x93, 0xba, 0xf4, 0x3c, 0xe1, 0x56, 0x9b, 0x7f, 0x8a, 0x27, 0xdb, 0x45, 0x4c, 0x9e, 0xfc, 0xbd, 0x49, 0x63, 0x97, 0xaf, 0xe, 0x58, 0x9f, 0xc2, 0x7d, 0x26, 0xaa, 0x80, 0xcd, 0x80, 0xc0, 0x8b, 0x8c, 0x9d, 0xeb, 0x2e, 0xda, 0x8a, 0x79, 0x81, 0xe8, 0xf8, 0xd5, 0x37, 0x3a, 0xf4, 0x39, 0x67, 0xad, 0xdd, 0xd1, 0x7a, 0x71, 0xa9, 0xb4, 0xd3, 0xbd, 0xa4, 0x75, 0xd3, 0x94, 0x97, 0x6c, 0x3f, 0xba, 0x98, 0x42, 0x73, 0x7f};
+const unsigned char JH512_H0[128] = {0x6f, 0xd1, 0x4b, 0x96, 0x3e, 0x0, 0xaa, 0x17, 0x63, 0x6a, 0x2e, 0x5, 0x7a, 0x15, 0xd5, 0x43, 0x8a, 0x22, 0x5e, 0x8d, 0xc, 0x97, 0xef, 0xb, 0xe9, 0x34, 0x12, 0x59, 0xf2, 0xb3, 0xc3, 0x61, 0x89, 0x1d, 0xa0, 0xc1, 0x53, 0x6f, 0x80, 0x1e, 0x2a, 0xa9, 0x5, 0x6b, 0xea, 0x2b, 0x6d, 0x80, 0x58, 0x8e, 0xcc, 0xdb, 0x20, 0x75, 0xba, 0xa6, 0xa9, 0xf, 0x3a, 0x76, 0xba, 0xf8, 0x3b, 0xf7, 0x1, 0x69, 0xe6, 0x5, 0x41, 0xe3, 0x4a, 0x69, 0x46, 0xb5, 0x8a, 0x8e, 0x2e, 0x6f, 0xe6, 0x5a, 0x10, 0x47, 0xa7, 0xd0, 0xc1, 0x84, 0x3c, 0x24, 0x3b, 0x6e, 0x71, 0xb1, 0x2d, 0x5a, 0xc1, 0x99, 0xcf, 0x57, 0xf6, 0xec, 0x9d, 0xb1, 0xf8, 0x56, 0xa7, 0x6, 0x88, 0x7c, 0x57, 0x16, 0xb1, 0x56, 0xe3, 0xc2, 0xfc, 0xdf, 0xe6, 0x85, 0x17, 0xfb, 0x54, 0x5a, 0x46, 0x78, 0xcc, 0x8c, 0xdd, 0x4b};

 /*42 round constants, each round constant is 32-byte (256-bit)*/
-const unsigned char E8_bitslice_roundconstant[42][32]={
-{0x72,0xd5,0xde,0xa2,0xdf,0x15,0xf8,0x67,0x7b,0x84,0x15,0xa,0xb7,0x23,0x15,0x57,0x81,0xab,0xd6,0x90,0x4d,0x5a,0x87,0xf6,0x4e,0x9f,0x4f,0xc5,0xc3,0xd1,0x2b,0x40},
-{0xea,0x98,0x3a,0xe0,0x5c,0x45,0xfa,0x9c,0x3,0xc5,0xd2,0x99,0x66,0xb2,0x99,0x9a,0x66,0x2,0x96,0xb4,0xf2,0xbb,0x53,0x8a,0xb5,0x56,0x14,0x1a,0x88,0xdb,0xa2,0x31},
-{0x3,0xa3,0x5a,0x5c,0x9a,0x19,0xe,0xdb,0x40,0x3f,0xb2,0xa,0x87,0xc1,0x44,0x10,0x1c,0x5,0x19,0x80,0x84,0x9e,0x95,0x1d,0x6f,0x33,0xeb,0xad,0x5e,0xe7,0xcd,0xdc},
-{0x10,0xba,0x13,0x92,0x2,0xbf,0x6b,0x41,0xdc,0x78,0x65,0x15,0xf7,0xbb,0x27,0xd0,0xa,0x2c,0x81,0x39,0x37,0xaa,0x78,0x50,0x3f,0x1a,0xbf,0xd2,0x41,0x0,0x91,0xd3},
-{0x42,0x2d,0x5a,0xd,0xf6,0xcc,0x7e,0x90,0xdd,0x62,0x9f,0x9c,0x92,0xc0,0x97,0xce,0x18,0x5c,0xa7,0xb,0xc7,0x2b,0x44,0xac,0xd1,0xdf,0x65,0xd6,0x63,0xc6,0xfc,0x23},
-{0x97,0x6e,0x6c,0x3,0x9e,0xe0,0xb8,0x1a,0x21,0x5,0x45,0x7e,0x44,0x6c,0xec,0xa8,0xee,0xf1,0x3,0xbb,0x5d,0x8e,0x61,0xfa,0xfd,0x96,0x97,0xb2,0x94,0x83,0x81,0x97},
-{0x4a,0x8e,0x85,0x37,0xdb,0x3,0x30,0x2f,0x2a,0x67,0x8d,0x2d,0xfb,0x9f,0x6a,0x95,0x8a,0xfe,0x73,0x81,0xf8,0xb8,0x69,0x6c,0x8a,0xc7,0x72,0x46,0xc0,0x7f,0x42,0x14},
-{0xc5,0xf4,0x15,0x8f,0xbd,0xc7,0x5e,0xc4,0x75,0x44,0x6f,0xa7,0x8f,0x11,0xbb,0x80,0x52,0xde,0x75,0xb7,0xae,0xe4,0x88,0xbc,0x82,0xb8,0x0,0x1e,0x98,0xa6,0xa3,0xf4},
-{0x8e,0xf4,0x8f,0x33,0xa9,0xa3,0x63,0x15,0xaa,0x5f,0x56,0x24,0xd5,0xb7,0xf9,0x89,0xb6,0xf1,0xed,0x20,0x7c,0x5a,0xe0,0xfd,0x36,0xca,0xe9,0x5a,0x6,0x42,0x2c,0x36},
-{0xce,0x29,0x35,0x43,0x4e,0xfe,0x98,0x3d,0x53,0x3a,0xf9,0x74,0x73,0x9a,0x4b,0xa7,0xd0,0xf5,0x1f,0x59,0x6f,0x4e,0x81,0x86,0xe,0x9d,0xad,0x81,0xaf,0xd8,0x5a,0x9f},
-{0xa7,0x5,0x6,0x67,0xee,0x34,0x62,0x6a,0x8b,0xb,0x28,0xbe,0x6e,0xb9,0x17,0x27,0x47,0x74,0x7,0x26,0xc6,0x80,0x10,0x3f,0xe0,0xa0,0x7e,0x6f,0xc6,0x7e,0x48,0x7b},
-{0xd,0x55,0xa,0xa5,0x4a,0xf8,0xa4,0xc0,0x91,0xe3,0xe7,0x9f,0x97,0x8e,0xf1,0x9e,0x86,0x76,0x72,0x81,0x50,0x60,0x8d,0xd4,0x7e,0x9e,0x5a,0x41,0xf3,0xe5,0xb0,0x62},
-{0xfc,0x9f,0x1f,0xec,0x40,0x54,0x20,0x7a,0xe3,0xe4,0x1a,0x0,0xce,0xf4,0xc9,0x84,0x4f,0xd7,0x94,0xf5,0x9d,0xfa,0x95,0xd8,0x55,0x2e,0x7e,0x11,0x24,0xc3,0x54,0xa5},
-{0x5b,0xdf,0x72,0x28,0xbd,0xfe,0x6e,0x28,0x78,0xf5,0x7f,0xe2,0xf,0xa5,0xc4,0xb2,0x5,0x89,0x7c,0xef,0xee,0x49,0xd3,0x2e,0x44,0x7e,0x93,0x85,0xeb,0x28,0x59,0x7f},
-{0x70,0x5f,0x69,0x37,0xb3,0x24,0x31,0x4a,0x5e,0x86,0x28,0xf1,0x1d,0xd6,0xe4,0x65,0xc7,0x1b,0x77,0x4,0x51,0xb9,0x20,0xe7,0x74,0xfe,0x43,0xe8,0x23,0xd4,0x87,0x8a},
-{0x7d,0x29,0xe8,0xa3,0x92,0x76,0x94,0xf2,0xdd,0xcb,0x7a,0x9,0x9b,0x30,0xd9,0xc1,0x1d,0x1b,0x30,0xfb,0x5b,0xdc,0x1b,0xe0,0xda,0x24,0x49,0x4f,0xf2,0x9c,0x82,0xbf},
-{0xa4,0xe7,0xba,0x31,0xb4,0x70,0xbf,0xff,0xd,0x32,0x44,0x5,0xde,0xf8,0xbc,0x48,0x3b,0xae,0xfc,0x32,0x53,0xbb,0xd3,0x39,0x45,0x9f,0xc3,0xc1,0xe0,0x29,0x8b,0xa0},
-{0xe5,0xc9,0x5,0xfd,0xf7,0xae,0x9,0xf,0x94,0x70,0x34,0x12,0x42,0x90,0xf1,0x34,0xa2,0x71,0xb7,0x1,0xe3,0x44,0xed,0x95,0xe9,0x3b,0x8e,0x36,0x4f,0x2f,0x98,0x4a},
-{0x88,0x40,0x1d,0x63,0xa0,0x6c,0xf6,0x15,0x47,0xc1,0x44,0x4b,0x87,0x52,0xaf,0xff,0x7e,0xbb,0x4a,0xf1,0xe2,0xa,0xc6,0x30,0x46,0x70,0xb6,0xc5,0xcc,0x6e,0x8c,0xe6},
-{0xa4,0xd5,0xa4,0x56,0xbd,0x4f,0xca,0x0,0xda,0x9d,0x84,0x4b,0xc8,0x3e,0x18,0xae,0x73,0x57,0xce,0x45,0x30,0x64,0xd1,0xad,0xe8,0xa6,0xce,0x68,0x14,0x5c,0x25,0x67},
-{0xa3,0xda,0x8c,0xf2,0xcb,0xe,0xe1,0x16,0x33,0xe9,0x6,0x58,0x9a,0x94,0x99,0x9a,0x1f,0x60,0xb2,0x20,0xc2,0x6f,0x84,0x7b,0xd1,0xce,0xac,0x7f,0xa0,0xd1,0x85,0x18},
-{0x32,0x59,0x5b,0xa1,0x8d,0xdd,0x19,0xd3,0x50,0x9a,0x1c,0xc0,0xaa,0xa5,0xb4,0x46,0x9f,0x3d,0x63,0x67,0xe4,0x4,0x6b,0xba,0xf6,0xca,0x19,0xab,0xb,0x56,0xee,0x7e},
-{0x1f,0xb1,0x79,0xea,0xa9,0x28,0x21,0x74,0xe9,0xbd,0xf7,0x35,0x3b,0x36,0x51,0xee,0x1d,0x57,0xac,0x5a,0x75,0x50,0xd3,0x76,0x3a,0x46,0xc2,0xfe,0xa3,0x7d,0x70,0x1},
-{0xf7,0x35,0xc1,0xaf,0x98,0xa4,0xd8,0x42,0x78,0xed,0xec,0x20,0x9e,0x6b,0x67,0x79,0x41,0x83,0x63,0x15,0xea,0x3a,0xdb,0xa8,0xfa,0xc3,0x3b,0x4d,0x32,0x83,0x2c,0x83},
-{0xa7,0x40,0x3b,0x1f,0x1c,0x27,0x47,0xf3,0x59,0x40,0xf0,0x34,0xb7,0x2d,0x76,0x9a,0xe7,0x3e,0x4e,0x6c,0xd2,0x21,0x4f,0xfd,0xb8,0xfd,0x8d,0x39,0xdc,0x57,0x59,0xef},
-{0x8d,0x9b,0xc,0x49,0x2b,0x49,0xeb,0xda,0x5b,0xa2,0xd7,0x49,0x68,0xf3,0x70,0xd,0x7d,0x3b,0xae,0xd0,0x7a,0x8d,0x55,0x84,0xf5,0xa5,0xe9,0xf0,0xe4,0xf8,0x8e,0x65},
-{0xa0,0xb8,0xa2,0xf4,0x36,0x10,0x3b,0x53,0xc,0xa8,0x7,0x9e,0x75,0x3e,0xec,0x5a,0x91,0x68,0x94,0x92,0x56,0xe8,0x88,0x4f,0x5b,0xb0,0x5c,0x55,0xf8,0xba,0xbc,0x4c},
-{0xe3,0xbb,0x3b,0x99,0xf3,0x87,0x94,0x7b,0x75,0xda,0xf4,0xd6,0x72,0x6b,0x1c,0x5d,0x64,0xae,0xac,0x28,0xdc,0x34,0xb3,0x6d,0x6c,0x34,0xa5,0x50,0xb8,0x28,0xdb,0x71},
-{0xf8,0x61,0xe2,0xf2,0x10,0x8d,0x51,0x2a,0xe3,0xdb,0x64,0x33,0x59,0xdd,0x75,0xfc,0x1c,0xac,0xbc,0xf1,0x43,0xce,0x3f,0xa2,0x67,0xbb,0xd1,0x3c,0x2,0xe8,0x43,0xb0},
-{0x33,0xa,0x5b,0xca,0x88,0x29,0xa1,0x75,0x7f,0x34,0x19,0x4d,0xb4,0x16,0x53,0x5c,0x92,0x3b,0x94,0xc3,0xe,0x79,0x4d,0x1e,0x79,0x74,0x75,0xd7,0xb6,0xee,0xaf,0x3f},
-{0xea,0xa8,0xd4,0xf7,0xbe,0x1a,0x39,0x21,0x5c,0xf4,0x7e,0x9,0x4c,0x23,0x27,0x51,0x26,0xa3,0x24,0x53,0xba,0x32,0x3c,0xd2,0x44,0xa3,0x17,0x4a,0x6d,0xa6,0xd5,0xad},
-{0xb5,0x1d,0x3e,0xa6,0xaf,0xf2,0xc9,0x8,0x83,0x59,0x3d,0x98,0x91,0x6b,0x3c,0x56,0x4c,0xf8,0x7c,0xa1,0x72,0x86,0x60,0x4d,0x46,0xe2,0x3e,0xcc,0x8,0x6e,0xc7,0xf6},
-{0x2f,0x98,0x33,0xb3,0xb1,0xbc,0x76,0x5e,0x2b,0xd6,0x66,0xa5,0xef,0xc4,0xe6,0x2a,0x6,0xf4,0xb6,0xe8,0xbe,0xc1,0xd4,0x36,0x74,0xee,0x82,0x15,0xbc,0xef,0x21,0x63},
-{0xfd,0xc1,0x4e,0xd,0xf4,0x53,0xc9,0x69,0xa7,0x7d,0x5a,0xc4,0x6,0x58,0x58,0x26,0x7e,0xc1,0x14,0x16,0x6,0xe0,0xfa,0x16,0x7e,0x90,0xaf,0x3d,0x28,0x63,0x9d,0x3f},
-{0xd2,0xc9,0xf2,0xe3,0x0,0x9b,0xd2,0xc,0x5f,0xaa,0xce,0x30,0xb7,0xd4,0xc,0x30,0x74,0x2a,0x51,0x16,0xf2,0xe0,0x32,0x98,0xd,0xeb,0x30,0xd8,0xe3,0xce,0xf8,0x9a},
-{0x4b,0xc5,0x9e,0x7b,0xb5,0xf1,0x79,0x92,0xff,0x51,0xe6,0x6e,0x4,0x86,0x68,0xd3,0x9b,0x23,0x4d,0x57,0xe6,0x96,0x67,0x31,0xcc,0xe6,0xa6,0xf3,0x17,0xa,0x75,0x5},
-{0xb1,0x76,0x81,0xd9,0x13,0x32,0x6c,0xce,0x3c,0x17,0x52,0x84,0xf8,0x5,0xa2,0x62,0xf4,0x2b,0xcb,0xb3,0x78,0x47,0x15,0x47,0xff,0x46,0x54,0x82,0x23,0x93,0x6a,0x48},
-{0x38,0xdf,0x58,0x7,0x4e,0x5e,0x65,0x65,0xf2,0xfc,0x7c,0x89,0xfc,0x86,0x50,0x8e,0x31,0x70,0x2e,0x44,0xd0,0xb,0xca,0x86,0xf0,0x40,0x9,0xa2,0x30,0x78,0x47,0x4e},
-{0x65,0xa0,0xee,0x39,0xd1,0xf7,0x38,0x83,0xf7,0x5e,0xe9,0x37,0xe4,0x2c,0x3a,0xbd,0x21,0x97,0xb2,0x26,0x1,0x13,0xf8,0x6f,0xa3,0x44,0xed,0xd1,0xef,0x9f,0xde,0xe7},
-{0x8b,0xa0,0xdf,0x15,0x76,0x25,0x92,0xd9,0x3c,0x85,0xf7,0xf6,0x12,0xdc,0x42,0xbe,0xd8,0xa7,0xec,0x7c,0xab,0x27,0xb0,0x7e,0x53,0x8d,0x7d,0xda,0xaa,0x3e,0xa8,0xde},
-{0xaa,0x25,0xce,0x93,0xbd,0x2,0x69,0xd8,0x5a,0xf6,0x43,0xfd,0x1a,0x73,0x8,0xf9,0xc0,0x5f,0xef,0xda,0x17,0x4a,0x19,0xa5,0x97,0x4d,0x66,0x33,0x4c,0xfd,0x21,0x6a},
-{0x35,0xb4,0x98,0x31,0xdb,0x41,0x15,0x70,0xea,0x1e,0xf,0xbb,0xed,0xcd,0x54,0x9b,0x9a,0xd0,0x63,0xa1,0x51,0x97,0x40,0x72,0xf6,0x75,0x9d,0xbf,0x91,0x47,0x6f,0xe2}};
+const unsigned char E8_bitslice_roundconstant[42][32] =
+{
+	{0x72, 0xd5, 0xde, 0xa2, 0xdf, 0x15, 0xf8, 0x67, 0x7b, 0x84, 0x15, 0xa, 0xb7, 0x23, 0x15, 0x57, 0x81, 0xab, 0xd6, 0x90, 0x4d, 0x5a, 0x87, 0xf6, 0x4e, 0x9f, 0x4f, 0xc5, 0xc3, 0xd1, 0x2b, 0x40},
+	{0xea, 0x98, 0x3a, 0xe0, 0x5c, 0x45, 0xfa, 0x9c, 0x3, 0xc5, 0xd2, 0x99, 0x66, 0xb2, 0x99, 0x9a, 0x66, 0x2, 0x96, 0xb4, 0xf2, 0xbb, 0x53, 0x8a, 0xb5, 0x56, 0x14, 0x1a, 0x88, 0xdb, 0xa2, 0x31},
+	{0x3, 0xa3, 0x5a, 0x5c, 0x9a, 0x19, 0xe, 0xdb, 0x40, 0x3f, 0xb2, 0xa, 0x87, 0xc1, 0x44, 0x10, 0x1c, 0x5, 0x19, 0x80, 0x84, 0x9e, 0x95, 0x1d, 0x6f, 0x33, 0xeb, 0xad, 0x5e, 0xe7, 0xcd, 0xdc},
+	{0x10, 0xba, 0x13, 0x92, 0x2, 0xbf, 0x6b, 0x41, 0xdc, 0x78, 0x65, 0x15, 0xf7, 0xbb, 0x27, 0xd0, 0xa, 0x2c, 0x81, 0x39, 0x37, 0xaa, 0x78, 0x50, 0x3f, 0x1a, 0xbf, 0xd2, 0x41, 0x0, 0x91, 0xd3},
+	{0x42, 0x2d, 0x5a, 0xd, 0xf6, 0xcc, 0x7e, 0x90, 0xdd, 0x62, 0x9f, 0x9c, 0x92, 0xc0, 0x97, 0xce, 0x18, 0x5c, 0xa7, 0xb, 0xc7, 0x2b, 0x44, 0xac, 0xd1, 0xdf, 0x65, 0xd6, 0x63, 0xc6, 0xfc, 0x23},
+	{0x97, 0x6e, 0x6c, 0x3, 0x9e, 0xe0, 0xb8, 0x1a, 0x21, 0x5, 0x45, 0x7e, 0x44, 0x6c, 0xec, 0xa8, 0xee, 0xf1, 0x3, 0xbb, 0x5d, 0x8e, 0x61, 0xfa, 0xfd, 0x96, 0x97, 0xb2, 0x94, 0x83, 0x81, 0x97},
+	{0x4a, 0x8e, 0x85, 0x37, 0xdb, 0x3, 0x30, 0x2f, 0x2a, 0x67, 0x8d, 0x2d, 0xfb, 0x9f, 0x6a, 0x95, 0x8a, 0xfe, 0x73, 0x81, 0xf8, 0xb8, 0x69, 0x6c, 0x8a, 0xc7, 0x72, 0x46, 0xc0, 0x7f, 0x42, 0x14},
+	{0xc5, 0xf4, 0x15, 0x8f, 0xbd, 0xc7, 0x5e, 0xc4, 0x75, 0x44, 0x6f, 0xa7, 0x8f, 0x11, 0xbb, 0x80, 0x52, 0xde, 0x75, 0xb7, 0xae, 0xe4, 0x88, 0xbc, 0x82, 0xb8, 0x0, 0x1e, 0x98, 0xa6, 0xa3, 0xf4},
+	{0x8e, 0xf4, 0x8f, 0x33, 0xa9, 0xa3, 0x63, 0x15, 0xaa, 0x5f, 0x56, 0x24, 0xd5, 0xb7, 0xf9, 0x89, 0xb6, 0xf1, 0xed, 0x20, 0x7c, 0x5a, 0xe0, 0xfd, 0x36, 0xca, 0xe9, 0x5a, 0x6, 0x42, 0x2c, 0x36},
+	{0xce, 0x29, 0x35, 0x43, 0x4e, 0xfe, 0x98, 0x3d, 0x53, 0x3a, 0xf9, 0x74, 0x73, 0x9a, 0x4b, 0xa7, 0xd0, 0xf5, 0x1f, 0x59, 0x6f, 0x4e, 0x81, 0x86, 0xe, 0x9d, 0xad, 0x81, 0xaf, 0xd8, 0x5a, 0x9f},
+	{0xa7, 0x5, 0x6, 0x67, 0xee, 0x34, 0x62, 0x6a, 0x8b, 0xb, 0x28, 0xbe, 0x6e, 0xb9, 0x17, 0x27, 0x47, 0x74, 0x7, 0x26, 0xc6, 0x80, 0x10, 0x3f, 0xe0, 0xa0, 0x7e, 0x6f, 0xc6, 0x7e, 0x48, 0x7b},
+	{0xd, 0x55, 0xa, 0xa5, 0x4a, 0xf8, 0xa4, 0xc0, 0x91, 0xe3, 0xe7, 0x9f, 0x97, 0x8e, 0xf1, 0x9e, 0x86, 0x76, 0x72, 0x81, 0x50, 0x60, 0x8d, 0xd4, 0x7e, 0x9e, 0x5a, 0x41, 0xf3, 0xe5, 0xb0, 0x62},
+	{0xfc, 0x9f, 0x1f, 0xec, 0x40, 0x54, 0x20, 0x7a, 0xe3, 0xe4, 0x1a, 0x0, 0xce, 0xf4, 0xc9, 0x84, 0x4f, 0xd7, 0x94, 0xf5, 0x9d, 0xfa, 0x95, 0xd8, 0x55, 0x2e, 0x7e, 0x11, 0x24, 0xc3, 0x54, 0xa5},
+	{0x5b, 0xdf, 0x72, 0x28, 0xbd, 0xfe, 0x6e, 0x28, 0x78, 0xf5, 0x7f, 0xe2, 0xf, 0xa5, 0xc4, 0xb2, 0x5, 0x89, 0x7c, 0xef, 0xee, 0x49, 0xd3, 0x2e, 0x44, 0x7e, 0x93, 0x85, 0xeb, 0x28, 0x59, 0x7f},
+	{0x70, 0x5f, 0x69, 0x37, 0xb3, 0x24, 0x31, 0x4a, 0x5e, 0x86, 0x28, 0xf1, 0x1d, 0xd6, 0xe4, 0x65, 0xc7, 0x1b, 0x77, 0x4, 0x51, 0xb9, 0x20, 0xe7, 0x74, 0xfe, 0x43, 0xe8, 0x23, 0xd4, 0x87, 0x8a},
+	{0x7d, 0x29, 0xe8, 0xa3, 0x92, 0x76, 0x94, 0xf2, 0xdd, 0xcb, 0x7a, 0x9, 0x9b, 0x30, 0xd9, 0xc1, 0x1d, 0x1b, 0x30, 0xfb, 0x5b, 0xdc, 0x1b, 0xe0, 0xda, 0x24, 0x49, 0x4f, 0xf2, 0x9c, 0x82, 0xbf},
+	{0xa4, 0xe7, 0xba, 0x31, 0xb4, 0x70, 0xbf, 0xff, 0xd, 0x32, 0x44, 0x5, 0xde, 0xf8, 0xbc, 0x48, 0x3b, 0xae, 0xfc, 0x32, 0x53, 0xbb, 0xd3, 0x39, 0x45, 0x9f, 0xc3, 0xc1, 0xe0, 0x29, 0x8b, 0xa0},
+	{0xe5, 0xc9, 0x5, 0xfd, 0xf7, 0xae, 0x9, 0xf, 0x94, 0x70, 0x34, 0x12, 0x42, 0x90, 0xf1, 0x34, 0xa2, 0x71, 0xb7, 0x1, 0xe3, 0x44, 0xed, 0x95, 0xe9, 0x3b, 0x8e, 0x36, 0x4f, 0x2f, 0x98, 0x4a},
+	{0x88, 0x40, 0x1d, 0x63, 0xa0, 0x6c, 0xf6, 0x15, 0x47, 0xc1, 0x44, 0x4b, 0x87, 0x52, 0xaf, 0xff, 0x7e, 0xbb, 0x4a, 0xf1, 0xe2, 0xa, 0xc6, 0x30, 0x46, 0x70, 0xb6, 0xc5, 0xcc, 0x6e, 0x8c, 0xe6},
+	{0xa4, 0xd5, 0xa4, 0x56, 0xbd, 0x4f, 0xca, 0x0, 0xda, 0x9d, 0x84, 0x4b, 0xc8, 0x3e, 0x18, 0xae, 0x73, 0x57, 0xce, 0x45, 0x30, 0x64, 0xd1, 0xad, 0xe8, 0xa6, 0xce, 0x68, 0x14, 0x5c, 0x25, 0x67},
+	{0xa3, 0xda, 0x8c, 0xf2, 0xcb, 0xe, 0xe1, 0x16, 0x33, 0xe9, 0x6, 0x58, 0x9a, 0x94, 0x99, 0x9a, 0x1f, 0x60, 0xb2, 0x20, 0xc2, 0x6f, 0x84, 0x7b, 0xd1, 0xce, 0xac, 0x7f, 0xa0, 0xd1, 0x85, 0x18},
+	{0x32, 0x59, 0x5b, 0xa1, 0x8d, 0xdd, 0x19, 0xd3, 0x50, 0x9a, 0x1c, 0xc0, 0xaa, 0xa5, 0xb4, 0x46, 0x9f, 0x3d, 0x63, 0x67, 0xe4, 0x4, 0x6b, 0xba, 0xf6, 0xca, 0x19, 0xab, 0xb, 0x56, 0xee, 0x7e},
+	{0x1f, 0xb1, 0x79, 0xea, 0xa9, 0x28, 0x21, 0x74, 0xe9, 0xbd, 0xf7, 0x35, 0x3b, 0x36, 0x51, 0xee, 0x1d, 0x57, 0xac, 0x5a, 0x75, 0x50, 0xd3, 0x76, 0x3a, 0x46, 0xc2, 0xfe, 0xa3, 0x7d, 0x70, 0x1},
+	{0xf7, 0x35, 0xc1, 0xaf, 0x98, 0xa4, 0xd8, 0x42, 0x78, 0xed, 0xec, 0x20, 0x9e, 0x6b, 0x67, 0x79, 0x41, 0x83, 0x63, 0x15, 0xea, 0x3a, 0xdb, 0xa8, 0xfa, 0xc3, 0x3b, 0x4d, 0x32, 0x83, 0x2c, 0x83},
+	{0xa7, 0x40, 0x3b, 0x1f, 0x1c, 0x27, 0x47, 0xf3, 0x59, 0x40, 0xf0, 0x34, 0xb7, 0x2d, 0x76, 0x9a, 0xe7, 0x3e, 0x4e, 0x6c, 0xd2, 0x21, 0x4f, 0xfd, 0xb8, 0xfd, 0x8d, 0x39, 0xdc, 0x57, 0x59, 0xef},
+	{0x8d, 0x9b, 0xc, 0x49, 0x2b, 0x49, 0xeb, 0xda, 0x5b, 0xa2, 0xd7, 0x49, 0x68, 0xf3, 0x70, 0xd, 0x7d, 0x3b, 0xae, 0xd0, 0x7a, 0x8d, 0x55, 0x84, 0xf5, 0xa5, 0xe9, 0xf0, 0xe4, 0xf8, 0x8e, 0x65},
+	{0xa0, 0xb8, 0xa2, 0xf4, 0x36, 0x10, 0x3b, 0x53, 0xc, 0xa8, 0x7, 0x9e, 0x75, 0x3e, 0xec, 0x5a, 0x91, 0x68, 0x94, 0x92, 0x56, 0xe8, 0x88, 0x4f, 0x5b, 0xb0, 0x5c, 0x55, 0xf8, 0xba, 0xbc, 0x4c},
+	{0xe3, 0xbb, 0x3b, 0x99, 0xf3, 0x87, 0x94, 0x7b, 0x75, 0xda, 0xf4, 0xd6, 0x72, 0x6b, 0x1c, 0x5d, 0x64, 0xae, 0xac, 0x28, 0xdc, 0x34, 0xb3, 0x6d, 0x6c, 0x34, 0xa5, 0x50, 0xb8, 0x28, 0xdb, 0x71},
+	{0xf8, 0x61, 0xe2, 0xf2, 0x10, 0x8d, 0x51, 0x2a, 0xe3, 0xdb, 0x64, 0x33, 0x59, 0xdd, 0x75, 0xfc, 0x1c, 0xac, 0xbc, 0xf1, 0x43, 0xce, 0x3f, 0xa2, 0x67, 0xbb, 0xd1, 0x3c, 0x2, 0xe8, 0x43, 0xb0},
+	{0x33, 0xa, 0x5b, 0xca, 0x88, 0x29, 0xa1, 0x75, 0x7f, 0x34, 0x19, 0x4d, 0xb4, 0x16, 0x53, 0x5c, 0x92, 0x3b, 0x94, 0xc3, 0xe, 0x79, 0x4d, 0x1e, 0x79, 0x74, 0x75, 0xd7, 0xb6, 0xee, 0xaf, 0x3f},
+	{0xea, 0xa8, 0xd4, 0xf7, 0xbe, 0x1a, 0x39, 0x21, 0x5c, 0xf4, 0x7e, 0x9, 0x4c, 0x23, 0x27, 0x51, 0x26, 0xa3, 0x24, 0x53, 0xba, 0x32, 0x3c, 0xd2, 0x44, 0xa3, 0x17, 0x4a, 0x6d, 0xa6, 0xd5, 0xad},
+	{0xb5, 0x1d, 0x3e, 0xa6, 0xaf, 0xf2, 0xc9, 0x8, 0x83, 0x59, 0x3d, 0x98, 0x91, 0x6b, 0x3c, 0x56, 0x4c, 0xf8, 0x7c, 0xa1, 0x72, 0x86, 0x60, 0x4d, 0x46, 0xe2, 0x3e, 0xcc, 0x8, 0x6e, 0xc7, 0xf6},
+	{0x2f, 0x98, 0x33, 0xb3, 0xb1, 0xbc, 0x76, 0x5e, 0x2b, 0xd6, 0x66, 0xa5, 0xef, 0xc4, 0xe6, 0x2a, 0x6, 0xf4, 0xb6, 0xe8, 0xbe, 0xc1, 0xd4, 0x36, 0x74, 0xee, 0x82, 0x15, 0xbc, 0xef, 0x21, 0x63},
+	{0xfd, 0xc1, 0x4e, 0xd, 0xf4, 0x53, 0xc9, 0x69, 0xa7, 0x7d, 0x5a, 0xc4, 0x6, 0x58, 0x58, 0x26, 0x7e, 0xc1, 0x14, 0x16, 0x6, 0xe0, 0xfa, 0x16, 0x7e, 0x90, 0xaf, 0x3d, 0x28, 0x63, 0x9d, 0x3f},
+	{0xd2, 0xc9, 0xf2, 0xe3, 0x0, 0x9b, 0xd2, 0xc, 0x5f, 0xaa, 0xce, 0x30, 0xb7, 0xd4, 0xc, 0x30, 0x74, 0x2a, 0x51, 0x16, 0xf2, 0xe0, 0x32, 0x98, 0xd, 0xeb, 0x30, 0xd8, 0xe3, 0xce, 0xf8, 0x9a},
+	{0x4b, 0xc5, 0x9e, 0x7b, 0xb5, 0xf1, 0x79, 0x92, 0xff, 0x51, 0xe6, 0x6e, 0x4, 0x86, 0x68, 0xd3, 0x9b, 0x23, 0x4d, 0x57, 0xe6, 0x96, 0x67, 0x31, 0xcc, 0xe6, 0xa6, 0xf3, 0x17, 0xa, 0x75, 0x5},
+	{0xb1, 0x76, 0x81, 0xd9, 0x13, 0x32, 0x6c, 0xce, 0x3c, 0x17, 0x52, 0x84, 0xf8, 0x5, 0xa2, 0x62, 0xf4, 0x2b, 0xcb, 0xb3, 0x78, 0x47, 0x15, 0x47, 0xff, 0x46, 0x54, 0x82, 0x23, 0x93, 0x6a, 0x48},
+	{0x38, 0xdf, 0x58, 0x7, 0x4e, 0x5e, 0x65, 0x65, 0xf2, 0xfc, 0x7c, 0x89, 0xfc, 0x86, 0x50, 0x8e, 0x31, 0x70, 0x2e, 0x44, 0xd0, 0xb, 0xca, 0x86, 0xf0, 0x40, 0x9, 0xa2, 0x30, 0x78, 0x47, 0x4e},
+	{0x65, 0xa0, 0xee, 0x39, 0xd1, 0xf7, 0x38, 0x83, 0xf7, 0x5e, 0xe9, 0x37, 0xe4, 0x2c, 0x3a, 0xbd, 0x21, 0x97, 0xb2, 0x26, 0x1, 0x13, 0xf8, 0x6f, 0xa3, 0x44, 0xed, 0xd1, 0xef, 0x9f, 0xde, 0xe7},
+	{0x8b, 0xa0, 0xdf, 0x15, 0x76, 0x25, 0x92, 0xd9, 0x3c, 0x85, 0xf7, 0xf6, 0x12, 0xdc, 0x42, 0xbe, 0xd8, 0xa7, 0xec, 0x7c, 0xab, 0x27, 0xb0, 0x7e, 0x53, 0x8d, 0x7d, 0xda, 0xaa, 0x3e, 0xa8, 0xde},
+	{0xaa, 0x25, 0xce, 0x93, 0xbd, 0x2, 0x69, 0xd8, 0x5a, 0xf6, 0x43, 0xfd, 0x1a, 0x73, 0x8, 0xf9, 0xc0, 0x5f, 0xef, 0xda, 0x17, 0x4a, 0x19, 0xa5, 0x97, 0x4d, 0x66, 0x33, 0x4c, 0xfd, 0x21, 0x6a},
+	{0x35, 0xb4, 0x98, 0x31, 0xdb, 0x41, 0x15, 0x70, 0xea, 0x1e, 0xf, 0xbb, 0xed, 0xcd, 0x54, 0x9b, 0x9a, 0xd0, 0x63, 0xa1, 0x51, 0x97, 0x40, 0x72, 0xf6, 0x75, 0x9d, 0xbf, 0x91, 0x47, 0x6f, 0xe2}
+};


-static void E8(hashState *state);  /*The bijective function E8, in bitslice form*/
-static void F8(hashState *state);  /*The compression function F8 */
+static void E8(hashState* state);  /*The bijective function E8, in bitslice form*/
+static void F8(hashState* state);  /*The compression function F8 */

 /*The API functions*/
-static HashReturn Init(hashState *state, int hashbitlen);
-static HashReturn Update(hashState *state, const BitSequence *data, DataLength databitlen);
-static HashReturn Final(hashState *state, BitSequence *hashval);
-HashReturn jh_hash(int hashbitlen, const BitSequence *data,DataLength databitlen, BitSequence *hashval);
+static HashReturn Init(hashState* state, int hashbitlen);
+static HashReturn Update(hashState* state, const BitSequence* data, DataLength databitlen);
+static HashReturn Final(hashState* state, BitSequence* hashval);
+HashReturn jh_hash(int hashbitlen, const BitSequence* data, DataLength databitlen, BitSequence* hashval);

 /*swapping bit 2i with bit 2i+1 of 64-bit x*/
 #define SWAP1(x)   (x) = ((((x) & 0x5555555555555555ULL) << 1) | (((x) & 0xaaaaaaaaaaaaaaaaULL) >> 1));
@ -114,254 +119,350 @@ HashReturn jh_hash(int hashbitlen, const BitSequence *data,DataLength databitlen

 /*The MDS transform*/
 #define L(m0,m1,m2,m3,m4,m5,m6,m7) \
-      (m4) ^= (m1);                \
-      (m5) ^= (m2);                \
-      (m6) ^= (m0) ^ (m3);         \
-      (m7) ^= (m0);                \
-      (m0) ^= (m5);                \
-      (m1) ^= (m6);                \
-      (m2) ^= (m4) ^ (m7);         \
-      (m3) ^= (m4);
+	(m4) ^= (m1);                \
+	(m5) ^= (m2);                \
+	(m6) ^= (m0) ^ (m3);         \
+	(m7) ^= (m0);                \
+	(m0) ^= (m5);                \
+	(m1) ^= (m6);                \
+	(m2) ^= (m4) ^ (m7);         \
+	(m3) ^= (m4);

 /*Two Sboxes are computed in parallel, each Sbox implements S0 and S1, selected by a constant bit*/
 /*The reason to compute two Sboxes in parallel is to try to fully utilize the parallel processing power*/
 #define SS(m0,m1,m2,m3,m4,m5,m6,m7,cc0,cc1)   \
-      m3  = ~(m3);                  \
-      m7  = ~(m7);                  \
-      m0 ^= ((~(m2)) & (cc0));      \
-      m4 ^= ((~(m6)) & (cc1));      \
-      temp0 = (cc0) ^ ((m0) & (m1));\
-      temp1 = (cc1) ^ ((m4) & (m5));\
-      m0 ^= ((m2) & (m3));          \
-      m4 ^= ((m6) & (m7));          \
-      m3 ^= ((~(m1)) & (m2));       \
-      m7 ^= ((~(m5)) & (m6));       \
-      m1 ^= ((m0) & (m2));          \
-      m5 ^= ((m4) & (m6));          \
-      m2 ^= ((m0) & (~(m3)));       \
-      m6 ^= ((m4) & (~(m7)));       \
-      m0 ^= ((m1) | (m3));          \
-      m4 ^= ((m5) | (m7));          \
-      m3 ^= ((m1) & (m2));          \
-      m7 ^= ((m5) & (m6));          \
-      m1 ^= (temp0 & (m0));         \
-      m5 ^= (temp1 & (m4));         \
-      m2 ^= temp0;                  \
-      m6 ^= temp1;
+	m3  = ~(m3);                  \
+	m7  = ~(m7);                  \
+	m0 ^= ((~(m2)) & (cc0));      \
+	m4 ^= ((~(m6)) & (cc1));      \
+	temp0 = (cc0) ^ ((m0) & (m1));\
+	temp1 = (cc1) ^ ((m4) & (m5));\
+	m0 ^= ((m2) & (m3));          \
+	m4 ^= ((m6) & (m7));          \
+	m3 ^= ((~(m1)) & (m2));       \
+	m7 ^= ((~(m5)) & (m6));       \
+	m1 ^= ((m0) & (m2));          \
+	m5 ^= ((m4) & (m6));          \
+	m2 ^= ((m0) & (~(m3)));       \
+	m6 ^= ((m4) & (~(m7)));       \
+	m0 ^= ((m1) | (m3));          \
+	m4 ^= ((m5) | (m7));          \
+	m3 ^= ((m1) & (m2));          \
+	m7 ^= ((m5) & (m6));          \
+	m1 ^= (temp0 & (m0));         \
+	m5 ^= (temp1 & (m4));         \
+	m2 ^= temp0;                  \
+	m6 ^= temp1;

 /*The bijective function E8, in bitslice form*/
-static void E8(hashState *state)
+static void E8(hashState* state)
 {
-      uint64 i,roundnumber,temp0,temp1;
+	uint64 i, roundnumber, temp0, temp1;

-      for (roundnumber = 0; roundnumber < 42; roundnumber = roundnumber+7) {
-            /*round 7*roundnumber+0: Sbox, MDS and Swapping layers*/
-            for (i = 0; i < 2; i++) {
-                  SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+0])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+0])[i+2] );
-                  L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]);
-                  SWAP1(state->x[1][i]); SWAP1(state->x[3][i]); SWAP1(state->x[5][i]); SWAP1(state->x[7][i]);
-            }
+	for(roundnumber = 0; roundnumber < 42; roundnumber = roundnumber + 7)
+	{
+		/*round 7*roundnumber+0: Sbox, MDS and Swapping layers*/
+		for(i = 0; i < 2; i++)
+		{
+			SS(state->x[0][i], state->x[2][i], state->x[4][i], state->x[6][i], state->x[1][i], state->x[3][i],
+			   state->x[5][i], state->x[7][i], ((uint64*)E8_bitslice_roundconstant[roundnumber + 0])[i],
+			   ((uint64*)E8_bitslice_roundconstant[roundnumber + 0])[i + 2]);
+			L(state->x[0][i], state->x[2][i], state->x[4][i], state->x[6][i], state->x[1][i], state->x[3][i],
+			  state->x[5][i], state->x[7][i]);
+			SWAP1(state->x[1][i]);
+			SWAP1(state->x[3][i]);
+			SWAP1(state->x[5][i]);
+			SWAP1(state->x[7][i]);
+		}

-            /*round 7*roundnumber+1: Sbox, MDS and Swapping layers*/
-            for (i = 0; i < 2; i++) {
-                  SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+1])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+1])[i+2] );
-                  L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]);
-                  SWAP2(state->x[1][i]); SWAP2(state->x[3][i]); SWAP2(state->x[5][i]); SWAP2(state->x[7][i]);
-            }
+		/*round 7*roundnumber+1: Sbox, MDS and Swapping layers*/
+		for(i = 0; i < 2; i++)
+		{
+			SS(state->x[0][i], state->x[2][i], state->x[4][i], state->x[6][i], state->x[1][i], state->x[3][i],
+			   state->x[5][i], state->x[7][i], ((uint64*)E8_bitslice_roundconstant[roundnumber + 1])[i],
+			   ((uint64*)E8_bitslice_roundconstant[roundnumber + 1])[i + 2]);
+			L(state->x[0][i], state->x[2][i], state->x[4][i], state->x[6][i], state->x[1][i], state->x[3][i],
+			  state->x[5][i], state->x[7][i]);
+			SWAP2(state->x[1][i]);
+			SWAP2(state->x[3][i]);
+			SWAP2(state->x[5][i]);
+			SWAP2(state->x[7][i]);
+		}

-            /*round 7*roundnumber+2: Sbox, MDS and Swapping layers*/
-            for (i = 0; i < 2; i++) {
-                  SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+2])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+2])[i+2] );
-                  L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]);
-                  SWAP4(state->x[1][i]); SWAP4(state->x[3][i]); SWAP4(state->x[5][i]); SWAP4(state->x[7][i]);
-            }
+		/*round 7*roundnumber+2: Sbox, MDS and Swapping layers*/
+		for(i = 0; i < 2; i++)
+		{
+			SS(state->x[0][i], state->x[2][i], state->x[4][i], state->x[6][i], state->x[1][i], state->x[3][i],
+			   state->x[5][i], state->x[7][i], ((uint64*)E8_bitslice_roundconstant[roundnumber + 2])[i],
+			   ((uint64*)E8_bitslice_roundconstant[roundnumber + 2])[i + 2]);
+			L(state->x[0][i], state->x[2][i], state->x[4][i], state->x[6][i], state->x[1][i], state->x[3][i],
+			  state->x[5][i], state->x[7][i]);
+			SWAP4(state->x[1][i]);
+			SWAP4(state->x[3][i]);
+			SWAP4(state->x[5][i]);
+			SWAP4(state->x[7][i]);
+		}

-            /*round 7*roundnumber+3: Sbox, MDS and Swapping layers*/
-            for (i = 0; i < 2; i++) {
-                  SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+3])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+3])[i+2] );
-                  L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]);
-                  SWAP8(state->x[1][i]); SWAP8(state->x[3][i]); SWAP8(state->x[5][i]); SWAP8(state->x[7][i]);
-            }
+		/*round 7*roundnumber+3: Sbox, MDS and Swapping layers*/
+		for(i = 0; i < 2; i++)
+		{
+			SS(state->x[0][i], state->x[2][i], state->x[4][i], state->x[6][i], state->x[1][i], state->x[3][i],
+			   state->x[5][i], state->x[7][i], ((uint64*)E8_bitslice_roundconstant[roundnumber + 3])[i],
+			   ((uint64*)E8_bitslice_roundconstant[roundnumber + 3])[i + 2]);
+			L(state->x[0][i], state->x[2][i], state->x[4][i], state->x[6][i], state->x[1][i], state->x[3][i],
+			  state->x[5][i], state->x[7][i]);
+			SWAP8(state->x[1][i]);
+			SWAP8(state->x[3][i]);
+			SWAP8(state->x[5][i]);
+			SWAP8(state->x[7][i]);
+		}

-            /*round 7*roundnumber+4: Sbox, MDS and Swapping layers*/
-            for (i = 0; i < 2; i++) {
-                  SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+4])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+4])[i+2] );
-                  L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]);
-                  SWAP16(state->x[1][i]); SWAP16(state->x[3][i]); SWAP16(state->x[5][i]); SWAP16(state->x[7][i]);
-            }
+		/*round 7*roundnumber+4: Sbox, MDS and Swapping layers*/
+		for(i = 0; i < 2; i++)
+		{
+			SS(state->x[0][i], state->x[2][i], state->x[4][i], state->x[6][i], state->x[1][i], state->x[3][i],
+			   state->x[5][i], state->x[7][i], ((uint64*)E8_bitslice_roundconstant[roundnumber + 4])[i],
+			   ((uint64*)E8_bitslice_roundconstant[roundnumber + 4])[i + 2]);
+			L(state->x[0][i], state->x[2][i], state->x[4][i], state->x[6][i], state->x[1][i], state->x[3][i],
+			  state->x[5][i], state->x[7][i]);
+			SWAP16(state->x[1][i]);
+			SWAP16(state->x[3][i]);
+			SWAP16(state->x[5][i]);
+			SWAP16(state->x[7][i]);
+		}

-            /*round 7*roundnumber+5: Sbox, MDS and Swapping layers*/
-            for (i = 0; i < 2; i++) {
-                  SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+5])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+5])[i+2] );
-                  L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]);
-                  SWAP32(state->x[1][i]); SWAP32(state->x[3][i]); SWAP32(state->x[5][i]); SWAP32(state->x[7][i]);
-            }
+		/*round 7*roundnumber+5: Sbox, MDS and Swapping layers*/
+		for(i = 0; i < 2; i++)
+		{
+			SS(state->x[0][i], state->x[2][i], state->x[4][i], state->x[6][i], state->x[1][i], state->x[3][i],
+			   state->x[5][i], state->x[7][i], ((uint64*)E8_bitslice_roundconstant[roundnumber + 5])[i],
+			   ((uint64*)E8_bitslice_roundconstant[roundnumber + 5])[i + 2]);
+			L(state->x[0][i], state->x[2][i], state->x[4][i], state->x[6][i], state->x[1][i], state->x[3][i],
+			  state->x[5][i], state->x[7][i]);
+			SWAP32(state->x[1][i]);
+			SWAP32(state->x[3][i]);
+			SWAP32(state->x[5][i]);
+			SWAP32(state->x[7][i]);
+		}

-            /*round 7*roundnumber+6: Sbox and MDS layers*/
-            for (i = 0; i < 2; i++) {
-                  SS(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i],((uint64*)E8_bitslice_roundconstant[roundnumber+6])[i],((uint64*)E8_bitslice_roundconstant[roundnumber+6])[i+2] );
-                  L(state->x[0][i],state->x[2][i],state->x[4][i],state->x[6][i],state->x[1][i],state->x[3][i],state->x[5][i],state->x[7][i]);
-            }
-            /*round 7*roundnumber+6: swapping layer*/
-            for (i = 1; i < 8; i = i+2) {
-                  temp0 = state->x[i][0]; state->x[i][0] = state->x[i][1]; state->x[i][1] = temp0;
-            }
-      }
+		/*round 7*roundnumber+6: Sbox and MDS layers*/
+		for(i = 0; i < 2; i++)
+		{
+			SS(state->x[0][i], state->x[2][i], state->x[4][i], state->x[6][i], state->x[1][i], state->x[3][i],
+			   state->x[5][i], state->x[7][i], ((uint64*)E8_bitslice_roundconstant[roundnumber + 6])[i],
+			   ((uint64*)E8_bitslice_roundconstant[roundnumber + 6])[i + 2]);
+			L(state->x[0][i], state->x[2][i], state->x[4][i], state->x[6][i], state->x[1][i], state->x[3][i],
+			  state->x[5][i], state->x[7][i]);
+		}
+		/*round 7*roundnumber+6: swapping layer*/
+		for(i = 1; i < 8; i = i + 2)
+		{
+			temp0 = state->x[i][0];
+			state->x[i][0] = state->x[i][1];
+			state->x[i][1] = temp0;
+		}
+	}

 }

 /*The compression function F8 */
-static void F8(hashState *state)
+static void F8(hashState* state)
 {
-      uint64  i;
+	uint64  i;

-      /*xor the 512-bit message with the fist half of the 1024-bit hash state*/
-      for (i = 0; i < 8; i++)  state->x[i >> 1][i & 1] ^= ((uint64*)state->buffer)[i];
+	/*xor the 512-bit message with the fist half of the 1024-bit hash state*/
+	for(i = 0; i < 8; i++)
+	{
+		state->x[i >> 1][i & 1] ^= ((uint64*)state->buffer)[i];
+	}

-      /*the bijective function E8 */
-      E8(state);
+	/*the bijective function E8 */
+	E8(state);

-      /*xor the 512-bit message with the second half of the 1024-bit hash state*/
-      for (i = 0; i < 8; i++)  state->x[(8+i) >> 1][(8+i) & 1] ^= ((uint64*)state->buffer)[i];
+	/*xor the 512-bit message with the second half of the 1024-bit hash state*/
+	for(i = 0; i < 8; i++)
+	{
+		state->x[(8 + i) >> 1][(8 + i) & 1] ^= ((uint64*)state->buffer)[i];
+	}
 }

 /*before hashing a message, initialize the hash state as H0 */
-static HashReturn Init(hashState *state, int hashbitlen)
+static HashReturn Init(hashState* state, int hashbitlen)
 {
-	  state->databitlen = 0;
-	  state->datasize_in_buffer = 0;
+	state->databitlen = 0;
+	state->datasize_in_buffer = 0;

-      /*initialize the initial hash value of JH*/
-      state->hashbitlen = hashbitlen;
+	/*initialize the initial hash value of JH*/
+	state->hashbitlen = hashbitlen;

-      /*load the intital hash value into state*/
-      switch (hashbitlen)
-      {
-            case 224: memcpy(state->x,JH224_H0,128); break;
-            case 256: memcpy(state->x,JH256_H0,128); break;
-            case 384: memcpy(state->x,JH384_H0,128); break;
-            case 512: memcpy(state->x,JH512_H0,128); break;
-      }
+	/*load the intital hash value into state*/
+	switch(hashbitlen)
+	{
+	case 224:
+		memcpy(state->x, JH224_H0, 128);
+		break;
+	case 256:
+		memcpy(state->x, JH256_H0, 128);
+		break;
+	case 384:
+		memcpy(state->x, JH384_H0, 128);
+		break;
+	case 512:
+		memcpy(state->x, JH512_H0, 128);
+		break;
+	}

-      return(SUCCESS);
+	return(SUCCESS);
 }


 /*hash each 512-bit message block, except the last partial block*/
-static HashReturn Update(hashState *state, const BitSequence *data, DataLength databitlen)
+static HashReturn Update(hashState* state, const BitSequence* data, DataLength databitlen)
 {
-      DataLength index; /*the starting address of the data to be compressed*/
+	DataLength index; /*the starting address of the data to be compressed*/

-      state->databitlen += databitlen;
-      index = 0;
+	state->databitlen += databitlen;
+	index = 0;

-      /*if there is remaining data in the buffer, fill it to a full message block first*/
-      /*we assume that the size of the data in the buffer is the multiple of 8 bits if it is not at the end of a message*/
+	/*if there is remaining data in the buffer, fill it to a full message block first*/
+	/*we assume that the size of the data in the buffer is the multiple of 8 bits if it is not at the end of a message*/

-      /*There is data in the buffer, but the incoming data is insufficient for a full block*/
-      if ( (state->datasize_in_buffer > 0 ) && (( state->datasize_in_buffer + databitlen) < 512)  ) {
-            if ( (databitlen & 7) == 0 ) {
-                 memcpy(state->buffer + (state->datasize_in_buffer >> 3), data, 64-(state->datasize_in_buffer >> 3)) ;
-		    }
-            else memcpy(state->buffer + (state->datasize_in_buffer >> 3), data, 64-(state->datasize_in_buffer >> 3)+1) ;
-            state->datasize_in_buffer += databitlen;
-            databitlen = 0;
-      }
+	/*There is data in the buffer, but the incoming data is insufficient for a full block*/
+	if((state->datasize_in_buffer > 0) && ((state->datasize_in_buffer + databitlen) < 512))
+	{
+		if((databitlen & 7) == 0)
+		{
+			memcpy(state->buffer + (state->datasize_in_buffer >> 3), data, 64 - (state->datasize_in_buffer >> 3)) ;
+		}
+		else
+		{
+			memcpy(state->buffer + (state->datasize_in_buffer >> 3), data, 64 - (state->datasize_in_buffer >> 3) + 1) ;
+		}
+		state->datasize_in_buffer += databitlen;
+		databitlen = 0;
+	}

-      /*There is data in the buffer, and the incoming data is sufficient for a full block*/
-      if ( (state->datasize_in_buffer > 0 ) && (( state->datasize_in_buffer + databitlen) >= 512)  ) {
-	        memcpy( state->buffer + (state->datasize_in_buffer >> 3), data, 64-(state->datasize_in_buffer >> 3) ) ;
-	        index = 64-(state->datasize_in_buffer >> 3);
-	        databitlen = databitlen - (512 - state->datasize_in_buffer);
-	        F8(state);
-	        state->datasize_in_buffer = 0;
-      }
+	/*There is data in the buffer, and the incoming data is sufficient for a full block*/
+	if((state->datasize_in_buffer > 0) && ((state->datasize_in_buffer + databitlen) >= 512))
+	{
+		memcpy(state->buffer + (state->datasize_in_buffer >> 3), data, 64 - (state->datasize_in_buffer >> 3)) ;
+		index = 64 - (state->datasize_in_buffer >> 3);
+		databitlen = databitlen - (512 - state->datasize_in_buffer);
+		F8(state);
+		state->datasize_in_buffer = 0;
+	}

-      /*hash the remaining full message blocks*/
-      for ( ; databitlen >= 512; index = index+64, databitlen = databitlen - 512) {
-            memcpy(state->buffer, data+index, 64);
-            F8(state);
-      }
+	/*hash the remaining full message blocks*/
+	for(; databitlen >= 512; index = index + 64, databitlen = databitlen - 512)
+	{
+		memcpy(state->buffer, data + index, 64);
+		F8(state);
+	}

-      /*store the partial block into buffer, assume that -- if part of the last byte is not part of the message, then that part consists of 0 bits*/
-      if ( databitlen > 0) {
-            if ((databitlen & 7) == 0)
-                  memcpy(state->buffer, data+index, (databitlen & 0x1ff) >> 3);
-            else
-                  memcpy(state->buffer, data+index, ((databitlen & 0x1ff) >> 3)+1);
-            state->datasize_in_buffer = databitlen;
-      }
+	/*store the partial block into buffer, assume that -- if part of the last byte is not part of the message, then that part consists of 0 bits*/
+	if(databitlen > 0)
+	{
+		if((databitlen & 7) == 0)
+		{
+			memcpy(state->buffer, data + index, (databitlen & 0x1ff) >> 3);
+		}
+		else
+		{
+			memcpy(state->buffer, data + index, ((databitlen & 0x1ff) >> 3) + 1);
+		}
+		state->datasize_in_buffer = databitlen;
+	}

-      return(SUCCESS);
+	return(SUCCESS);
 }

 /*pad the message, process the padded block(s), truncate the hash value H to obtain the message digest*/
-static HashReturn Final(hashState *state, BitSequence *hashval)
+static HashReturn Final(hashState* state, BitSequence* hashval)
 {
-      unsigned int i;
+	unsigned int i;

-      if ( (state->databitlen & 0x1ff) == 0 ) {
-            /*pad the message when databitlen is multiple of 512 bits, then process the padded block*/
-            memset(state->buffer, 0, 64);
-            state->buffer[0]  = 0x80;
-            state->buffer[63] = state->databitlen & 0xff;
-            state->buffer[62] = (state->databitlen >> 8)  & 0xff;
-            state->buffer[61] = (state->databitlen >> 16) & 0xff;
-            state->buffer[60] = (state->databitlen >> 24) & 0xff;
-            state->buffer[59] = (state->databitlen >> 32) & 0xff;
-            state->buffer[58] = (state->databitlen >> 40) & 0xff;
-            state->buffer[57] = (state->databitlen >> 48) & 0xff;
-            state->buffer[56] = (state->databitlen >> 56) & 0xff;
-            F8(state);
-      }
-      else {
-		    /*set the rest of the bytes in the buffer to 0*/
-            if ( (state->datasize_in_buffer & 7) == 0)
-                  for (i = (state->databitlen & 0x1ff) >> 3; i < 64; i++)  state->buffer[i] = 0;
-            else
-                  for (i = ((state->databitlen & 0x1ff) >> 3)+1; i < 64; i++)  state->buffer[i] = 0;
+	if((state->databitlen & 0x1ff) == 0)
+	{
+		/*pad the message when databitlen is multiple of 512 bits, then process the padded block*/
+		memset(state->buffer, 0, 64);
+		state->buffer[0]  = 0x80;
+		state->buffer[63] = state->databitlen & 0xff;
+		state->buffer[62] = (state->databitlen >> 8)  & 0xff;
+		state->buffer[61] = (state->databitlen >> 16) & 0xff;
+		state->buffer[60] = (state->databitlen >> 24) & 0xff;
+		state->buffer[59] = (state->databitlen >> 32) & 0xff;
+		state->buffer[58] = (state->databitlen >> 40) & 0xff;
+		state->buffer[57] = (state->databitlen >> 48) & 0xff;
+		state->buffer[56] = (state->databitlen >> 56) & 0xff;
+		F8(state);
+	}
+	else
+	{
+		/*set the rest of the bytes in the buffer to 0*/
+		if((state->datasize_in_buffer & 7) == 0)
+			for(i = (state->databitlen & 0x1ff) >> 3; i < 64; i++)
+			{
+				state->buffer[i] = 0;
+			}
+		else
+			for(i = ((state->databitlen & 0x1ff) >> 3) + 1; i < 64; i++)
+			{
+				state->buffer[i] = 0;
+			}

-            /*pad and process the partial block when databitlen is not multiple of 512 bits, then hash the padded blocks*/
-            state->buffer[((state->databitlen & 0x1ff) >> 3)] |= 1 << (7- (state->databitlen & 7));
+		/*pad and process the partial block when databitlen is not multiple of 512 bits, then hash the padded blocks*/
+		state->buffer[((state->databitlen & 0x1ff) >> 3)] |= 1 << (7 - (state->databitlen & 7));

-            F8(state);
-            memset(state->buffer, 0, 64);
-            state->buffer[63] = state->databitlen & 0xff;
-            state->buffer[62] = (state->databitlen >> 8) & 0xff;
-            state->buffer[61] = (state->databitlen >> 16) & 0xff;
-            state->buffer[60] = (state->databitlen >> 24) & 0xff;
-            state->buffer[59] = (state->databitlen >> 32) & 0xff;
-            state->buffer[58] = (state->databitlen >> 40) & 0xff;
-            state->buffer[57] = (state->databitlen >> 48) & 0xff;
-            state->buffer[56] = (state->databitlen >> 56) & 0xff;
-            F8(state);
-      }
+		F8(state);
+		memset(state->buffer, 0, 64);
+		state->buffer[63] = state->databitlen & 0xff;
+		state->buffer[62] = (state->databitlen >> 8) & 0xff;
+		state->buffer[61] = (state->databitlen >> 16) & 0xff;
+		state->buffer[60] = (state->databitlen >> 24) & 0xff;
+		state->buffer[59] = (state->databitlen >> 32) & 0xff;
+		state->buffer[58] = (state->databitlen >> 40) & 0xff;
+		state->buffer[57] = (state->databitlen >> 48) & 0xff;
+		state->buffer[56] = (state->databitlen >> 56) & 0xff;
+		F8(state);
+	}

-      /*truncating the final hash value to generate the message digest*/
-      switch(state->hashbitlen) {
-            case 224: memcpy(hashval,(unsigned char*)state->x+64+36,28);  break;
-            case 256: memcpy(hashval,(unsigned char*)state->x+64+32,32);  break;
-            case 384: memcpy(hashval,(unsigned char*)state->x+64+16,48);  break;
-            case 512: memcpy(hashval,(unsigned char*)state->x+64,64);     break;
-      }
+	/*truncating the final hash value to generate the message digest*/
+	switch(state->hashbitlen)
+	{
+	case 224:
+		memcpy(hashval, (unsigned char*)state->x + 64 + 36, 28);
+		break;
+	case 256:
+		memcpy(hashval, (unsigned char*)state->x + 64 + 32, 32);
+		break;
+	case 384:
+		memcpy(hashval, (unsigned char*)state->x + 64 + 16, 48);
+		break;
+	case 512:
+		memcpy(hashval, (unsigned char*)state->x + 64, 64);
+		break;
+	}

-      return(SUCCESS);
+	return(SUCCESS);
 }

 /* hash a message,
   three inputs: message digest size in bits (hashbitlen); message (data); message length in bits (databitlen)
   one output:   message digest (hashval)
 */
-HashReturn jh_hash(int hashbitlen, const BitSequence *data,DataLength databitlen, BitSequence *hashval)
+HashReturn jh_hash(int hashbitlen, const BitSequence* data, DataLength databitlen, BitSequence* hashval)
 {
-      hashState state;
+	hashState state;

-      if ( hashbitlen == 224 || hashbitlen == 256 || hashbitlen == 384 || hashbitlen == 512 ) {
-            Init(&state, hashbitlen);
-            Update(&state, data, databitlen);
-            Final(&state, hashval);
-            return SUCCESS;
-      }
-      else
-            return(BAD_HASHLEN);
+	if(hashbitlen == 224 || hashbitlen == 256 || hashbitlen == 384 || hashbitlen == 512)
+	{
+		Init(&state, hashbitlen);
+		Update(&state, data, databitlen);
+		Final(&state, hashval);
+		return SUCCESS;
+	}
+	else
+	{
+		return(BAD_HASHLEN);
+	}
 }
--- a/src/crypto/c_jh.h
+++ b/src/crypto/c_jh.h
@ -16,4 +16,4 @@

 #include "hash.h"

-HashReturn jh_hash(int hashbitlen, const BitSequence *data, DataLength databitlen, BitSequence *hashval);
+HashReturn jh_hash(int hashbitlen, const BitSequence* data, DataLength databitlen, BitSequence* hashval);
--- a/src/crypto/c_keccak.c
+++ b/src/crypto/c_keccak.c
@ -12,165 +12,172 @@
 #define ROTL64(x, y) (((x) << (y)) | ((x) >> (64 - (y))))
 #endif

-const uint64_t keccakf_rndc[24] = 
+const uint64_t keccakf_rndc[24] =
 {
-    0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
-    0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
-    0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
-    0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
-    0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
-    0x8000000000008003, 0x8000000000008002, 0x8000000000000080, 
-    0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
-    0x8000000000008080, 0x0000000080000001, 0x8000000080008008
+	0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
+	0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
+	0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
+	0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
+	0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
+	0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
+	0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
+	0x8000000000008080, 0x0000000080000001, 0x8000000080008008
 };

 // update the state with given number of rounds

 void keccakf(uint64_t st[25], int rounds)
 {
-    int i, j, round;
-    uint64_t t, bc[5];
+	int i, j, round;
+	uint64_t t, bc[5];

-    for (round = 0; round < rounds; ++round) {
+	for(round = 0; round < rounds; ++round)
+	{

-        // Theta
-        bc[0] = st[0] ^ st[5] ^ st[10] ^ st[15] ^ st[20];
-        bc[1] = st[1] ^ st[6] ^ st[11] ^ st[16] ^ st[21];
-        bc[2] = st[2] ^ st[7] ^ st[12] ^ st[17] ^ st[22];
-        bc[3] = st[3] ^ st[8] ^ st[13] ^ st[18] ^ st[23];
-        bc[4] = st[4] ^ st[9] ^ st[14] ^ st[19] ^ st[24];
+		// Theta
+		bc[0] = st[0] ^ st[5] ^ st[10] ^ st[15] ^ st[20];
+		bc[1] = st[1] ^ st[6] ^ st[11] ^ st[16] ^ st[21];
+		bc[2] = st[2] ^ st[7] ^ st[12] ^ st[17] ^ st[22];
+		bc[3] = st[3] ^ st[8] ^ st[13] ^ st[18] ^ st[23];
+		bc[4] = st[4] ^ st[9] ^ st[14] ^ st[19] ^ st[24];

-        for (i = 0; i < 5; ++i) {
-            t = bc[(i + 4) % 5] ^ ROTL64(bc[(i + 1) % 5], 1);
-            st[i     ] ^= t;
-            st[i +  5] ^= t;
-            st[i + 10] ^= t;
-            st[i + 15] ^= t;
-            st[i + 20] ^= t;
-        }
+		for(i = 0; i < 5; ++i)
+		{
+			t = bc[(i + 4) % 5] ^ ROTL64(bc[(i + 1) % 5], 1);
+			st[i     ] ^= t;
+			st[i +  5] ^= t;
+			st[i + 10] ^= t;
+			st[i + 15] ^= t;
+			st[i + 20] ^= t;
+		}

-        // Rho Pi
-        t = st[1];
-        st[ 1] = ROTL64(st[ 6], 44);
-        st[ 6] = ROTL64(st[ 9], 20);
-        st[ 9] = ROTL64(st[22], 61);
-        st[22] = ROTL64(st[14], 39);
-        st[14] = ROTL64(st[20], 18);
-        st[20] = ROTL64(st[ 2], 62);
-        st[ 2] = ROTL64(st[12], 43);
-        st[12] = ROTL64(st[13], 25);
-        st[13] = ROTL64(st[19],  8);
-        st[19] = ROTL64(st[23], 56);
-        st[23] = ROTL64(st[15], 41);
-        st[15] = ROTL64(st[ 4], 27);
-        st[ 4] = ROTL64(st[24], 14);
-        st[24] = ROTL64(st[21],  2);
-        st[21] = ROTL64(st[ 8], 55);
-        st[ 8] = ROTL64(st[16], 45);
-        st[16] = ROTL64(st[ 5], 36);
-        st[ 5] = ROTL64(st[ 3], 28);
-        st[ 3] = ROTL64(st[18], 21);
-        st[18] = ROTL64(st[17], 15);
-        st[17] = ROTL64(st[11], 10);
-        st[11] = ROTL64(st[ 7],  6);
-        st[ 7] = ROTL64(st[10],  3);
-        st[10] = ROTL64(t, 1);
+		// Rho Pi
+		t = st[1];
+		st[ 1] = ROTL64(st[ 6], 44);
+		st[ 6] = ROTL64(st[ 9], 20);
+		st[ 9] = ROTL64(st[22], 61);
+		st[22] = ROTL64(st[14], 39);
+		st[14] = ROTL64(st[20], 18);
+		st[20] = ROTL64(st[ 2], 62);
+		st[ 2] = ROTL64(st[12], 43);
+		st[12] = ROTL64(st[13], 25);
+		st[13] = ROTL64(st[19],  8);
+		st[19] = ROTL64(st[23], 56);
+		st[23] = ROTL64(st[15], 41);
+		st[15] = ROTL64(st[ 4], 27);
+		st[ 4] = ROTL64(st[24], 14);
+		st[24] = ROTL64(st[21],  2);
+		st[21] = ROTL64(st[ 8], 55);
+		st[ 8] = ROTL64(st[16], 45);
+		st[16] = ROTL64(st[ 5], 36);
+		st[ 5] = ROTL64(st[ 3], 28);
+		st[ 3] = ROTL64(st[18], 21);
+		st[18] = ROTL64(st[17], 15);
+		st[17] = ROTL64(st[11], 10);
+		st[11] = ROTL64(st[ 7],  6);
+		st[ 7] = ROTL64(st[10],  3);
+		st[10] = ROTL64(t, 1);

-        //  Chi
-        // unrolled loop, where only last iteration is different
-        j = 0;
-        bc[0] = st[j    ];
-        bc[1] = st[j + 1];
+		//  Chi
+		// unrolled loop, where only last iteration is different
+		j = 0;
+		bc[0] = st[j    ];
+		bc[1] = st[j + 1];

-        st[j    ] ^= (~st[j + 1]) & st[j + 2];
-        st[j + 1] ^= (~st[j + 2]) & st[j + 3];
-        st[j + 2] ^= (~st[j + 3]) & st[j + 4];
-        st[j + 3] ^= (~st[j + 4]) & bc[0];
-        st[j + 4] ^= (~bc[0]) & bc[1];
+		st[j    ] ^= (~st[j + 1]) & st[j + 2];
+		st[j + 1] ^= (~st[j + 2]) & st[j + 3];
+		st[j + 2] ^= (~st[j + 3]) & st[j + 4];
+		st[j + 3] ^= (~st[j + 4]) & bc[0];
+		st[j + 4] ^= (~bc[0]) & bc[1];

-        j = 5;
-        bc[0] = st[j    ];
-        bc[1] = st[j + 1];
+		j = 5;
+		bc[0] = st[j    ];
+		bc[1] = st[j + 1];

-        st[j    ] ^= (~st[j + 1]) & st[j + 2];
-        st[j + 1] ^= (~st[j + 2]) & st[j + 3];
-        st[j + 2] ^= (~st[j + 3]) & st[j + 4];
-        st[j + 3] ^= (~st[j + 4]) & bc[0];
-        st[j + 4] ^= (~bc[0]) & bc[1];
+		st[j    ] ^= (~st[j + 1]) & st[j + 2];
+		st[j + 1] ^= (~st[j + 2]) & st[j + 3];
+		st[j + 2] ^= (~st[j + 3]) & st[j + 4];
+		st[j + 3] ^= (~st[j + 4]) & bc[0];
+		st[j + 4] ^= (~bc[0]) & bc[1];

-        j = 10;
-        bc[0] = st[j    ];
-        bc[1] = st[j + 1];
+		j = 10;
+		bc[0] = st[j    ];
+		bc[1] = st[j + 1];

-        st[j    ] ^= (~st[j + 1]) & st[j + 2];
-        st[j + 1] ^= (~st[j + 2]) & st[j + 3];
-        st[j + 2] ^= (~st[j + 3]) & st[j + 4];
-        st[j + 3] ^= (~st[j + 4]) & bc[0];
-        st[j + 4] ^= (~bc[0]) & bc[1];
+		st[j    ] ^= (~st[j + 1]) & st[j + 2];
+		st[j + 1] ^= (~st[j + 2]) & st[j + 3];
+		st[j + 2] ^= (~st[j + 3]) & st[j + 4];
+		st[j + 3] ^= (~st[j + 4]) & bc[0];
+		st[j + 4] ^= (~bc[0]) & bc[1];

-        j = 15;
-        bc[0] = st[j    ];
-        bc[1] = st[j + 1];
+		j = 15;
+		bc[0] = st[j    ];
+		bc[1] = st[j + 1];

-        st[j    ] ^= (~st[j + 1]) & st[j + 2];
-        st[j + 1] ^= (~st[j + 2]) & st[j + 3];
-        st[j + 2] ^= (~st[j + 3]) & st[j + 4];
-        st[j + 3] ^= (~st[j + 4]) & bc[0];
-        st[j + 4] ^= (~bc[0]) & bc[1];
+		st[j    ] ^= (~st[j + 1]) & st[j + 2];
+		st[j + 1] ^= (~st[j + 2]) & st[j + 3];
+		st[j + 2] ^= (~st[j + 3]) & st[j + 4];
+		st[j + 3] ^= (~st[j + 4]) & bc[0];
+		st[j + 4] ^= (~bc[0]) & bc[1];

-        j = 20;
-        bc[0] = st[j    ];
-        bc[1] = st[j + 1];
-        bc[2] = st[j + 2];
-        bc[3] = st[j + 3];
-        bc[4] = st[j + 4];
+		j = 20;
+		bc[0] = st[j    ];
+		bc[1] = st[j + 1];
+		bc[2] = st[j + 2];
+		bc[3] = st[j + 3];
+		bc[4] = st[j + 4];

-        st[j    ] ^= (~bc[1]) & bc[2];
-        st[j + 1] ^= (~bc[2]) & bc[3];
-        st[j + 2] ^= (~bc[3]) & bc[4];
-        st[j + 3] ^= (~bc[4]) & bc[0];
-        st[j + 4] ^= (~bc[0]) & bc[1];
-        
-        //  Iota
-        st[0] ^= keccakf_rndc[round];
-    }
+		st[j    ] ^= (~bc[1]) & bc[2];
+		st[j + 1] ^= (~bc[2]) & bc[3];
+		st[j + 2] ^= (~bc[3]) & bc[4];
+		st[j + 3] ^= (~bc[4]) & bc[0];
+		st[j + 4] ^= (~bc[0]) & bc[1];
+
+		//  Iota
+		st[0] ^= keccakf_rndc[round];
+	}
 }

 // compute a keccak hash (md) of given byte length from "in"
 typedef uint64_t state_t[25];

-void keccak(const uint8_t *in, int inlen, uint8_t *md, int mdlen)
+void keccak(const uint8_t* in, int inlen, uint8_t* md, int mdlen)
 {
-    state_t st;
-    uint8_t temp[144];
-    int i, rsiz, rsizw;
+	state_t st;
+	uint8_t temp[144];
+	int i, rsiz, rsizw;

-    rsiz = sizeof(state_t) == mdlen ? HASH_DATA_AREA : 200 - 2 * mdlen;
-    rsizw = rsiz / 8;
-    
-    memset(st, 0, sizeof(st));
+	rsiz = sizeof(state_t) == mdlen ? HASH_DATA_AREA : 200 - 2 * mdlen;
+	rsizw = rsiz / 8;

-    for ( ; inlen >= rsiz; inlen -= rsiz, in += rsiz) {
-        for (i = 0; i < rsizw; i++)
-            st[i] ^= ((uint64_t *) in)[i];
-        keccakf(st, KECCAK_ROUNDS);
-    }
-    
-    // last block and padding
-    memcpy(temp, in, inlen);
-    temp[inlen++] = 1;
-    memset(temp + inlen, 0, rsiz - inlen);
-    temp[rsiz - 1] |= 0x80;
+	memset(st, 0, sizeof(st));

-    for (i = 0; i < rsizw; i++)
-        st[i] ^= ((uint64_t *) temp)[i];
+	for(; inlen >= rsiz; inlen -= rsiz, in += rsiz)
+	{
+		for(i = 0; i < rsizw; i++)
+		{
+			st[i] ^= ((uint64_t*) in)[i];
+		}
+		keccakf(st, KECCAK_ROUNDS);
+	}

-    keccakf(st, KECCAK_ROUNDS);
+	// last block and padding
+	memcpy(temp, in, inlen);
+	temp[inlen++] = 1;
+	memset(temp + inlen, 0, rsiz - inlen);
+	temp[rsiz - 1] |= 0x80;

-    memcpy(md, st, mdlen);
+	for(i = 0; i < rsizw; i++)
+	{
+		st[i] ^= ((uint64_t*) temp)[i];
+	}
+
+	keccakf(st, KECCAK_ROUNDS);
+
+	memcpy(md, st, mdlen);
 }

-void keccak1600(const uint8_t *in, int inlen, uint8_t *md)
+void keccak1600(const uint8_t* in, int inlen, uint8_t* md)
 {
-    keccak(in, inlen, md, sizeof(state_t));
+	keccak(in, inlen, md, sizeof(state_t));
 }
--- a/src/crypto/c_keccak.h
+++ b/src/crypto/c_keccak.h
@ -16,11 +16,11 @@
 #endif

 // compute a keccak hash (md) of given byte length from "in"
-int keccak(const uint8_t *in, int inlen, uint8_t *md, int mdlen);
+int keccak(const uint8_t* in, int inlen, uint8_t* md, int mdlen);

 // update the state
 void keccakf(uint64_t st[25], int norounds);

-void keccak1600(const uint8_t *in, int inlen, uint8_t *md);
+void keccak1600(const uint8_t* in, int inlen, uint8_t* md);

 #endif
--- a/src/crypto/c_skein.c
+++ b/src/crypto/c_skein.c
@ -5,7 +5,7 @@
 ** Source code author: Doug Whiting, 2008.
 **
 ** This algorithm and source code is released to the public domain.
-** 
+**
 ************************************************************************/

 #define  SKEIN_PORT_CODE /* instantiate any code in skein_port.h */
@ -34,22 +34,22 @@

 typedef struct
 {
-  size_t  hashBitLen;                      /* size of hash result, in bits */
-  size_t  bCnt;                            /* current byte count in buffer b[] */
-  u64b_t  T[SKEIN_MODIFIER_WORDS];         /* tweak words: T[0]=byte cnt, T[1]=flags */
+	size_t  hashBitLen;                      /* size of hash result, in bits */
+	size_t  bCnt;                            /* current byte count in buffer b[] */
+	u64b_t  T[SKEIN_MODIFIER_WORDS];         /* tweak words: T[0]=byte cnt, T[1]=flags */
 } Skein_Ctxt_Hdr_t;

 typedef struct                               /*  512-bit Skein hash context structure */
 {
-  Skein_Ctxt_Hdr_t h;                      /* common header context variables */
-  u64b_t  X[SKEIN_512_STATE_WORDS];        /* chaining variables */
-  u08b_t  b[SKEIN_512_BLOCK_BYTES];        /* partial block buffer (8-byte aligned) */
+	Skein_Ctxt_Hdr_t h;                      /* common header context variables */
+	u64b_t  X[SKEIN_512_STATE_WORDS];        /* chaining variables */
+	u08b_t  b[SKEIN_512_BLOCK_BYTES];        /* partial block buffer (8-byte aligned) */
 } Skein_512_Ctxt_t;

 /*   Skein APIs for (incremental) "straight hashing" */
-static int  Skein_512_Init  (Skein_512_Ctxt_t *ctx, size_t hashBitLen);
-static int  Skein_512_Update(Skein_512_Ctxt_t *ctx, const u08b_t *msg, size_t msgByteCnt);
-static int  Skein_512_Final (Skein_512_Ctxt_t *ctx, u08b_t * hashVal);
+static int  Skein_512_Init(Skein_512_Ctxt_t* ctx, size_t hashBitLen);
+static int  Skein_512_Update(Skein_512_Ctxt_t* ctx, const u08b_t* msg, size_t msgByteCnt);
+static int  Skein_512_Final(Skein_512_Ctxt_t* ctx, u08b_t* hashVal);

 #ifndef SKEIN_TREE_HASH
 #define SKEIN_TREE_HASH (1)
@ -57,7 +57,7 @@ static int  Skein_512_Final (Skein_512_Ctxt_t *ctx, u08b_t * hashVal);

 /*****************************************************************
 ** "Internal" Skein definitions
-**    -- not needed for sequential hashing API, but will be 
+**    -- not needed for sequential hashing API, but will be
 **           helpful for other uses of Skein (e.g., tree hash mode).
 **    -- included here so that they can be shared between
 **           reference and optimized code.
@ -128,9 +128,9 @@ static int  Skein_512_Final (Skein_512_Ctxt_t *ctx, u08b_t * hashVal);
 #define SKEIN_CFG_TREE_MAX_LEVEL_MSK  (((u64b_t) 0xFF) << SKEIN_CFG_TREE_MAX_LEVEL_POS)

 #define SKEIN_CFG_TREE_INFO(leaf,node,maxLvl)                   \
-  ( (((u64b_t)(leaf  )) << SKEIN_CFG_TREE_LEAF_SIZE_POS) |    \
-  (((u64b_t)(node  )) << SKEIN_CFG_TREE_NODE_SIZE_POS) |    \
-  (((u64b_t)(maxLvl)) << SKEIN_CFG_TREE_MAX_LEVEL_POS) )
+	( (((u64b_t)(leaf  )) << SKEIN_CFG_TREE_LEAF_SIZE_POS) |    \
+	  (((u64b_t)(node  )) << SKEIN_CFG_TREE_NODE_SIZE_POS) |    \
+	  (((u64b_t)(maxLvl)) << SKEIN_CFG_TREE_MAX_LEVEL_POS) )

 #define SKEIN_CFG_TREE_INFO_SEQUENTIAL SKEIN_CFG_TREE_INFO(0,0,0) /* use as treeInfo in InitExt() call for sequential processing */

@ -148,17 +148,17 @@ static int  Skein_512_Final (Skein_512_Ctxt_t *ctx, u08b_t * hashVal);

 /* set both tweak words at once */
 #define Skein_Set_T0_T1(ctxPtr,T0,T1)           \
-{                                           \
-  Skein_Set_T0(ctxPtr,(T0));                  \
-  Skein_Set_T1(ctxPtr,(T1));                  \
-}
+	{                                           \
+		Skein_Set_T0(ctxPtr,(T0));                  \
+		Skein_Set_T1(ctxPtr,(T1));                  \
+	}

 #define Skein_Set_Type(ctxPtr,BLK_TYPE)         \
-  Skein_Set_T1(ctxPtr,SKEIN_T1_BLK_TYPE_##BLK_TYPE)
+	Skein_Set_T1(ctxPtr,SKEIN_T1_BLK_TYPE_##BLK_TYPE)

 /* set up for starting with a new type: h.T[0]=0; h.T[1] = NEW_TYPE; h.bCnt=0; */
 #define Skein_Start_New_Type(ctxPtr,BLK_TYPE)   \
-{ Skein_Set_T0_T1(ctxPtr,0,SKEIN_T1_FLAG_FIRST | SKEIN_T1_BLK_TYPE_##BLK_TYPE); (ctxPtr)->h.bCnt=0; }
+	{ Skein_Set_T0_T1(ctxPtr,0,SKEIN_T1_FLAG_FIRST | SKEIN_T1_BLK_TYPE_##BLK_TYPE); (ctxPtr)->h.bCnt=0; }

 #define Skein_Clear_First_Flag(hdr)      { (hdr).T[1] &= ~SKEIN_T1_FLAG_FIRST;       }
 #define Skein_Set_Bit_Pad_Flag(hdr)      { (hdr).T[1] |=  SKEIN_T1_FLAG_BIT_PAD;     }
@ -179,11 +179,11 @@ static int  Skein_512_Final (Skein_512_Ctxt_t *ctx, u08b_t * hashVal);
 #define Skein_Assert(x,retCode)/* default: ignore all Asserts, for performance */
 #define Skein_assert(x)
 #elif   defined(SKEIN_ASSERT)
-#include <assert.h>     
-#define Skein_Assert(x,retCode) assert(x) 
-#define Skein_assert(x)         assert(x) 
+#include <assert.h>
+#define Skein_Assert(x,retCode) assert(x)
+#define Skein_assert(x)         assert(x)
 #else
-#include <assert.h>     
+#include <assert.h>
 #define Skein_Assert(x,retCode) { if (!(x)) return retCode; } /*  caller  error */
 #define Skein_assert(x)         assert(x)                     /* internal error */
 #endif
@ -191,17 +191,17 @@ static int  Skein_512_Final (Skein_512_Ctxt_t *ctx, u08b_t * hashVal);
 /*****************************************************************
 ** Skein block function constants (shared across Ref and Opt code)
 ******************************************************************/
-enum    
-{   
-  /* Skein_512 round rotation constants */
-  R_512_0_0=46, R_512_0_1=36, R_512_0_2=19, R_512_0_3=37,
-  R_512_1_0=33, R_512_1_1=27, R_512_1_2=14, R_512_1_3=42,
-  R_512_2_0=17, R_512_2_1=49, R_512_2_2=36, R_512_2_3=39,
-  R_512_3_0=44, R_512_3_1= 9, R_512_3_2=54, R_512_3_3=56,
-  R_512_4_0=39, R_512_4_1=30, R_512_4_2=34, R_512_4_3=24,
-  R_512_5_0=13, R_512_5_1=50, R_512_5_2=10, R_512_5_3=17,
-  R_512_6_0=25, R_512_6_1=29, R_512_6_2=39, R_512_6_3=43,
-  R_512_7_0= 8, R_512_7_1=35, R_512_7_2=56, R_512_7_3=22,
+enum
+{
+	/* Skein_512 round rotation constants */
+	R_512_0_0 = 46, R_512_0_1 = 36, R_512_0_2 = 19, R_512_0_3 = 37,
+	R_512_1_0 = 33, R_512_1_1 = 27, R_512_1_2 = 14, R_512_1_3 = 42,
+	R_512_2_0 = 17, R_512_2_1 = 49, R_512_2_2 = 36, R_512_2_3 = 39,
+	R_512_3_0 = 44, R_512_3_1 = 9, R_512_3_2 = 54, R_512_3_3 = 56,
+	R_512_4_0 = 39, R_512_4_1 = 30, R_512_4_2 = 34, R_512_4_3 = 24,
+	R_512_5_0 = 13, R_512_5_1 = 50, R_512_5_2 = 10, R_512_5_3 = 17,
+	R_512_6_0 = 25, R_512_6_1 = 29, R_512_6_2 = 39, R_512_6_3 = 43,
+	R_512_7_0 = 8, R_512_7_1 = 35, R_512_7_2 = 56, R_512_7_3 = 22,
 };

 #ifndef SKEIN_ROUNDS
@ -229,16 +229,16 @@ enum

 /* blkSize =  512 bits. hashSize =  256 bits */
 const u64b_t SKEIN_512_IV_256[] =
-    {
-    MK_64(0xCCD044A1,0x2FDB3E13),
-    MK_64(0xE8359030,0x1A79A9EB),
-    MK_64(0x55AEA061,0x4F816E6F),
-    MK_64(0x2A2767A4,0xAE9B94DB),
-    MK_64(0xEC06025E,0x74DD7683),
-    MK_64(0xE7A436CD,0xC4746251),
-    MK_64(0xC36FBAF9,0x393AD185),
-    MK_64(0x3EEDBA18,0x33EDFC13)
-    };
+{
+	MK_64(0xCCD044A1, 0x2FDB3E13),
+	MK_64(0xE8359030, 0x1A79A9EB),
+	MK_64(0x55AEA061, 0x4F816E6F),
+	MK_64(0x2A2767A4, 0xAE9B94DB),
+	MK_64(0xEC06025E, 0x74DD7683),
+	MK_64(0xE7A436CD, 0xC4746251),
+	MK_64(0xC36FBAF9, 0x393AD185),
+	MK_64(0x3EEDBA18, 0x33EDFC13)
+};

 #ifndef SKEIN_USE_ASM
 #define SKEIN_USE_ASM   (0)                     /* default is all C code (no ASM) */
@ -251,7 +251,7 @@ const u64b_t SKEIN_512_IV_256[] =
 #define BLK_BITS        (WCNT*64)               /* some useful definitions for code here */
 #define KW_TWK_BASE     (0)
 #define KW_KEY_BASE     (3)
-#define ks              (kw + KW_KEY_BASE)                
+#define ks              (kw + KW_KEY_BASE)
 #define ts              (kw + KW_TWK_BASE)

 #ifdef SKEIN_DEBUG
@ -262,12 +262,14 @@ const u64b_t SKEIN_512_IV_256[] =

 /*****************************  Skein_512 ******************************/
 #if !(SKEIN_USE_ASM & 512)
-static void Skein_512_Process_Block(Skein_512_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t byteCntAdd)
-    { /* do it in C */
-    enum
-        {
-        WCNT = SKEIN_512_STATE_WORDS
-        };
+static void Skein_512_Process_Block(Skein_512_Ctxt_t* ctx, const u08b_t* blkPtr, size_t blkCnt,
+                                    size_t byteCntAdd)
+{
+	/* do it in C */
+	enum
+	{
+		WCNT = SKEIN_512_STATE_WORDS
+	};
 #undef  RCNT
 #define RCNT  (SKEIN_512_ROUNDS_TOTAL/8)

@ -281,179 +283,186 @@ static void Skein_512_Process_Block(Skein_512_Ctxt_t *ctx,const u08b_t *blkPtr,s
 #if (RCNT % SKEIN_UNROLL_512)
 #error "Invalid SKEIN_UNROLL_512"               /* sanity check on unroll count */
 #endif
-    size_t  r;
-    u64b_t  kw[WCNT+4+RCNT*2];                  /* key schedule words : chaining vars + tweak + "rotation"*/
+	size_t  r;
+	u64b_t  kw[WCNT + 4 + RCNT * 2];            /* key schedule words : chaining vars + tweak + "rotation"*/
 #else
-    u64b_t  kw[WCNT+4];                         /* key schedule words : chaining vars + tweak */
+	u64b_t  kw[WCNT + 4];                       /* key schedule words : chaining vars + tweak */
 #endif
-    u64b_t  X0,X1,X2,X3,X4,X5,X6,X7;            /* local copy of vars, for speed */
-    u64b_t  w [WCNT];                           /* local copy of input block */
+	u64b_t  X0, X1, X2, X3, X4, X5, X6, X7;     /* local copy of vars, for speed */
+	u64b_t  w [WCNT];                           /* local copy of input block */
 #ifdef SKEIN_DEBUG
-    const u64b_t *Xptr[8];                      /* use for debugging (help compiler put Xn in registers) */
-    Xptr[0] = &X0;  Xptr[1] = &X1;  Xptr[2] = &X2;  Xptr[3] = &X3;
-    Xptr[4] = &X4;  Xptr[5] = &X5;  Xptr[6] = &X6;  Xptr[7] = &X7;
+	const u64b_t* Xptr[8];                      /* use for debugging (help compiler put Xn in registers) */
+	Xptr[0] = &X0;
+	Xptr[1] = &X1;
+	Xptr[2] = &X2;
+	Xptr[3] = &X3;
+	Xptr[4] = &X4;
+	Xptr[5] = &X5;
+	Xptr[6] = &X6;
+	Xptr[7] = &X7;
 #endif

-    Skein_assert(blkCnt != 0);                  /* never call with blkCnt == 0! */
-    ts[0] = ctx->h.T[0];
-    ts[1] = ctx->h.T[1];
-    do  {
-        /* this implementation only supports 2**64 input bytes (no carry out here) */
-        ts[0] += byteCntAdd;                    /* update processed length */
+	Skein_assert(blkCnt != 0);                  /* never call with blkCnt == 0! */
+	ts[0] = ctx->h.T[0];
+	ts[1] = ctx->h.T[1];
+	do
+	{
+		/* this implementation only supports 2**64 input bytes (no carry out here) */
+		ts[0] += byteCntAdd;                    /* update processed length */

-        /* precompute the key schedule for this block */
-        ks[0] = ctx->X[0];
-        ks[1] = ctx->X[1];
-        ks[2] = ctx->X[2];
-        ks[3] = ctx->X[3];
-        ks[4] = ctx->X[4];
-        ks[5] = ctx->X[5];
-        ks[6] = ctx->X[6];
-        ks[7] = ctx->X[7];
-        ks[8] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^ 
-                ks[4] ^ ks[5] ^ ks[6] ^ ks[7] ^ SKEIN_KS_PARITY;
+		/* precompute the key schedule for this block */
+		ks[0] = ctx->X[0];
+		ks[1] = ctx->X[1];
+		ks[2] = ctx->X[2];
+		ks[3] = ctx->X[3];
+		ks[4] = ctx->X[4];
+		ks[5] = ctx->X[5];
+		ks[6] = ctx->X[6];
+		ks[7] = ctx->X[7];
+		ks[8] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^
+		        ks[4] ^ ks[5] ^ ks[6] ^ ks[7] ^ SKEIN_KS_PARITY;

-        ts[2] = ts[0] ^ ts[1];
+		ts[2] = ts[0] ^ ts[1];

-        Skein_Get64_LSB_First(w,blkPtr,WCNT); /* get input block in little-endian format */
-        DebugSaveTweak(ctx);
-        Skein_Show_Block(BLK_BITS,&ctx->h,ctx->X,blkPtr,w,ks,ts);
+		Skein_Get64_LSB_First(w, blkPtr, WCNT); /* get input block in little-endian format */
+		DebugSaveTweak(ctx);
+		Skein_Show_Block(BLK_BITS, &ctx->h, ctx->X, blkPtr, w, ks, ts);

-        X0   = w[0] + ks[0];                    /* do the first full key injection */
-        X1   = w[1] + ks[1];
-        X2   = w[2] + ks[2];
-        X3   = w[3] + ks[3];
-        X4   = w[4] + ks[4];
-        X5   = w[5] + ks[5] + ts[0];
-        X6   = w[6] + ks[6] + ts[1];
-        X7   = w[7] + ks[7];
+		X0   = w[0] + ks[0];                    /* do the first full key injection */
+		X1   = w[1] + ks[1];
+		X2   = w[2] + ks[2];
+		X3   = w[3] + ks[3];
+		X4   = w[4] + ks[4];
+		X5   = w[5] + ks[5] + ts[0];
+		X6   = w[6] + ks[6] + ts[1];
+		X7   = w[7] + ks[7];

-        blkPtr += SKEIN_512_BLOCK_BYTES;
+		blkPtr += SKEIN_512_BLOCK_BYTES;

-        Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INITIAL,Xptr);
-        /* run the rounds */
+		Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INITIAL, Xptr);
+		/* run the rounds */
 #define Round512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum)                  \
-    X##p0 += X##p1; X##p1 = RotL_64(X##p1,ROT##_0); X##p1 ^= X##p0; \
-    X##p2 += X##p3; X##p3 = RotL_64(X##p3,ROT##_1); X##p3 ^= X##p2; \
-    X##p4 += X##p5; X##p5 = RotL_64(X##p5,ROT##_2); X##p5 ^= X##p4; \
-    X##p6 += X##p7; X##p7 = RotL_64(X##p7,ROT##_3); X##p7 ^= X##p6; \
-
-#if SKEIN_UNROLL_512 == 0                       
+	X##p0 += X##p1; X##p1 = RotL_64(X##p1,ROT##_0); X##p1 ^= X##p0; \
+	X##p2 += X##p3; X##p3 = RotL_64(X##p3,ROT##_1); X##p3 ^= X##p2; \
+	X##p4 += X##p5; X##p5 = RotL_64(X##p5,ROT##_2); X##p5 ^= X##p4; \
+	X##p6 += X##p7; X##p7 = RotL_64(X##p7,ROT##_3); X##p7 ^= X##p6; \
+	 
+#if SKEIN_UNROLL_512 == 0
 #define R512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum)      /* unrolled */  \
-    Round512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum)                      \
-    Skein_Show_R_Ptr(BLK_BITS,&ctx->h,rNum,Xptr);
+	Round512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum)                      \
+	Skein_Show_R_Ptr(BLK_BITS,&ctx->h,rNum,Xptr);

 #define I512(R)                                                     \
-    X0   += ks[((R)+1) % 9];   /* inject the key schedule value */  \
-    X1   += ks[((R)+2) % 9];                                        \
-    X2   += ks[((R)+3) % 9];                                        \
-    X3   += ks[((R)+4) % 9];                                        \
-    X4   += ks[((R)+5) % 9];                                        \
-    X5   += ks[((R)+6) % 9] + ts[((R)+1) % 3];                      \
-    X6   += ks[((R)+7) % 9] + ts[((R)+2) % 3];                      \
-    X7   += ks[((R)+8) % 9] +     (R)+1;                            \
-    Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr);
+	X0   += ks[((R)+1) % 9];   /* inject the key schedule value */  \
+	X1   += ks[((R)+2) % 9];                                        \
+	X2   += ks[((R)+3) % 9];                                        \
+	X3   += ks[((R)+4) % 9];                                        \
+	X4   += ks[((R)+5) % 9];                                        \
+	X5   += ks[((R)+6) % 9] + ts[((R)+1) % 3];                      \
+	X6   += ks[((R)+7) % 9] + ts[((R)+2) % 3];                      \
+	X7   += ks[((R)+8) % 9] +     (R)+1;                            \
+	Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr);
 #else                                       /* looping version */
 #define R512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum)                      \
-    Round512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum)                      \
-    Skein_Show_R_Ptr(BLK_BITS,&ctx->h,4*(r-1)+rNum,Xptr);
+	Round512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum)                      \
+	Skein_Show_R_Ptr(BLK_BITS,&ctx->h,4*(r-1)+rNum,Xptr);

 #define I512(R)                                                     \
-    X0   += ks[r+(R)+0];        /* inject the key schedule value */ \
-    X1   += ks[r+(R)+1];                                            \
-    X2   += ks[r+(R)+2];                                            \
-    X3   += ks[r+(R)+3];                                            \
-    X4   += ks[r+(R)+4];                                            \
-    X5   += ks[r+(R)+5] + ts[r+(R)+0];                              \
-    X6   += ks[r+(R)+6] + ts[r+(R)+1];                              \
-    X7   += ks[r+(R)+7] +    r+(R)   ;                              \
-    ks[r +       (R)+8] = ks[r+(R)-1];  /* rotate key schedule */   \
-    ts[r +       (R)+2] = ts[r+(R)-1];                              \
-    Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr);
+	X0   += ks[r+(R)+0];        /* inject the key schedule value */ \
+	X1   += ks[r+(R)+1];                                            \
+	X2   += ks[r+(R)+2];                                            \
+	X3   += ks[r+(R)+3];                                            \
+	X4   += ks[r+(R)+4];                                            \
+	X5   += ks[r+(R)+5] + ts[r+(R)+0];                              \
+	X6   += ks[r+(R)+6] + ts[r+(R)+1];                              \
+	X7   += ks[r+(R)+7] +    r+(R)   ;                              \
+	ks[r +       (R)+8] = ks[r+(R)-1];  /* rotate key schedule */   \
+	ts[r +       (R)+2] = ts[r+(R)-1];                              \
+	Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr);

-    for (r=1;r < 2*RCNT;r+=2*SKEIN_UNROLL_512)   /* loop thru it */
+		for(r = 1; r < 2 * RCNT; r += 2 * SKEIN_UNROLL_512) /* loop thru it */
 #endif                         /* end of looped code definitions */
-        {
+		{
 #define R512_8_rounds(R)  /* do 8 full rounds */  \
-        R512(0,1,2,3,4,5,6,7,R_512_0,8*(R)+ 1);   \
-        R512(2,1,4,7,6,5,0,3,R_512_1,8*(R)+ 2);   \
-        R512(4,1,6,3,0,5,2,7,R_512_2,8*(R)+ 3);   \
-        R512(6,1,0,7,2,5,4,3,R_512_3,8*(R)+ 4);   \
-        I512(2*(R));                              \
-        R512(0,1,2,3,4,5,6,7,R_512_4,8*(R)+ 5);   \
-        R512(2,1,4,7,6,5,0,3,R_512_5,8*(R)+ 6);   \
-        R512(4,1,6,3,0,5,2,7,R_512_6,8*(R)+ 7);   \
-        R512(6,1,0,7,2,5,4,3,R_512_7,8*(R)+ 8);   \
-        I512(2*(R)+1);        /* and key injection */
+	R512(0,1,2,3,4,5,6,7,R_512_0,8*(R)+ 1);   \
+	R512(2,1,4,7,6,5,0,3,R_512_1,8*(R)+ 2);   \
+	R512(4,1,6,3,0,5,2,7,R_512_2,8*(R)+ 3);   \
+	R512(6,1,0,7,2,5,4,3,R_512_3,8*(R)+ 4);   \
+	I512(2*(R));                              \
+	R512(0,1,2,3,4,5,6,7,R_512_4,8*(R)+ 5);   \
+	R512(2,1,4,7,6,5,0,3,R_512_5,8*(R)+ 6);   \
+	R512(4,1,6,3,0,5,2,7,R_512_6,8*(R)+ 7);   \
+	R512(6,1,0,7,2,5,4,3,R_512_7,8*(R)+ 8);   \
+	I512(2*(R)+1);        /* and key injection */

-        R512_8_rounds( 0);
+			R512_8_rounds(0);

 #define R512_Unroll_R(NN) ((SKEIN_UNROLL_512 == 0 && SKEIN_512_ROUNDS_TOTAL/8 > (NN)) || (SKEIN_UNROLL_512 > (NN)))

-  #if   R512_Unroll_R( 1)
-        R512_8_rounds( 1);
-  #endif
-  #if   R512_Unroll_R( 2)
-        R512_8_rounds( 2);
-  #endif
-  #if   R512_Unroll_R( 3)
-        R512_8_rounds( 3);
-  #endif
-  #if   R512_Unroll_R( 4)
-        R512_8_rounds( 4);
-  #endif
-  #if   R512_Unroll_R( 5)
-        R512_8_rounds( 5);
-  #endif
-  #if   R512_Unroll_R( 6)
-        R512_8_rounds( 6);
-  #endif
-  #if   R512_Unroll_R( 7)
-        R512_8_rounds( 7);
-  #endif
-  #if   R512_Unroll_R( 8)
-        R512_8_rounds( 8);
-  #endif
-  #if   R512_Unroll_R( 9)
-        R512_8_rounds( 9);
-  #endif
-  #if   R512_Unroll_R(10)
-        R512_8_rounds(10);
-  #endif
-  #if   R512_Unroll_R(11)
-        R512_8_rounds(11);
-  #endif
-  #if   R512_Unroll_R(12)
-        R512_8_rounds(12);
-  #endif
-  #if   R512_Unroll_R(13)
-        R512_8_rounds(13);
-  #endif
-  #if   R512_Unroll_R(14)
-        R512_8_rounds(14);
-  #endif
-  #if  (SKEIN_UNROLL_512 > 14)
+#if   R512_Unroll_R( 1)
+			R512_8_rounds(1);
+#endif
+#if   R512_Unroll_R( 2)
+			R512_8_rounds(2);
+#endif
+#if   R512_Unroll_R( 3)
+			R512_8_rounds(3);
+#endif
+#if   R512_Unroll_R( 4)
+			R512_8_rounds(4);
+#endif
+#if   R512_Unroll_R( 5)
+			R512_8_rounds(5);
+#endif
+#if   R512_Unroll_R( 6)
+			R512_8_rounds(6);
+#endif
+#if   R512_Unroll_R( 7)
+			R512_8_rounds(7);
+#endif
+#if   R512_Unroll_R( 8)
+			R512_8_rounds(8);
+#endif
+#if   R512_Unroll_R( 9)
+			R512_8_rounds(9);
+#endif
+#if   R512_Unroll_R(10)
+			R512_8_rounds(10);
+#endif
+#if   R512_Unroll_R(11)
+			R512_8_rounds(11);
+#endif
+#if   R512_Unroll_R(12)
+			R512_8_rounds(12);
+#endif
+#if   R512_Unroll_R(13)
+			R512_8_rounds(13);
+#endif
+#if   R512_Unroll_R(14)
+			R512_8_rounds(14);
+#endif
+#if  (SKEIN_UNROLL_512 > 14)
 #error  "need more unrolling in Skein_512_Process_Block"
-  #endif
-        }
+#endif
+		}

-        /* do the final "feedforward" xor, update context chaining vars */
-        ctx->X[0] = X0 ^ w[0];
-        ctx->X[1] = X1 ^ w[1];
-        ctx->X[2] = X2 ^ w[2];
-        ctx->X[3] = X3 ^ w[3];
-        ctx->X[4] = X4 ^ w[4];
-        ctx->X[5] = X5 ^ w[5];
-        ctx->X[6] = X6 ^ w[6];
-        ctx->X[7] = X7 ^ w[7];
-        Skein_Show_Round(BLK_BITS,&ctx->h,SKEIN_RND_FEED_FWD,ctx->X);
+		/* do the final "feedforward" xor, update context chaining vars */
+		ctx->X[0] = X0 ^ w[0];
+		ctx->X[1] = X1 ^ w[1];
+		ctx->X[2] = X2 ^ w[2];
+		ctx->X[3] = X3 ^ w[3];
+		ctx->X[4] = X4 ^ w[4];
+		ctx->X[5] = X5 ^ w[5];
+		ctx->X[6] = X6 ^ w[6];
+		ctx->X[7] = X7 ^ w[7];
+		Skein_Show_Round(BLK_BITS, &ctx->h, SKEIN_RND_FEED_FWD, ctx->X);

-        ts[1] &= ~SKEIN_T1_FLAG_FIRST;
-        }
-    while (--blkCnt);
-    ctx->h.T[0] = ts[0];
-    ctx->h.T[1] = ts[1];
-    }
+		ts[1] &= ~SKEIN_T1_FLAG_FIRST;
+	}
+	while(--blkCnt);
+	ctx->h.T[0] = ts[0];
+	ctx->h.T[1] = ts[1];
+}
 #endif

 /*****************************************************************/
@ -462,240 +471,252 @@ static void Skein_512_Process_Block(Skein_512_Ctxt_t *ctx,const u08b_t *blkPtr,s

 /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
 /* init the context for a straight hashing operation  */
-static int Skein_512_Init(Skein_512_Ctxt_t *ctx, size_t hashBitLen)
-    {
-    union
-        {
-        u08b_t  b[SKEIN_512_STATE_BYTES];
-        u64b_t  w[SKEIN_512_STATE_WORDS];
-        } cfg;                              /* config block */
-        
-    Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN);
-    ctx->h.hashBitLen = hashBitLen;         /* output hash bit count */
+static int Skein_512_Init(Skein_512_Ctxt_t* ctx, size_t hashBitLen)
+{
+	union
+	{
+		u08b_t  b[SKEIN_512_STATE_BYTES];
+		u64b_t  w[SKEIN_512_STATE_WORDS];
+	} cfg;                              /* config block */

-    switch (hashBitLen)
-        {             /* use pre-computed values, where available */
+	Skein_Assert(hashBitLen > 0, SKEIN_BAD_HASHLEN);
+	ctx->h.hashBitLen = hashBitLen;         /* output hash bit count */
+
+	switch(hashBitLen)
+	{
+		/* use pre-computed values, where available */
 #ifndef SKEIN_NO_PRECOMP
-        case  256: memcpy(ctx->X,SKEIN_512_IV_256,sizeof(ctx->X));  break;
+	case  256:
+		memcpy(ctx->X, SKEIN_512_IV_256, sizeof(ctx->X));
+		break;
 #endif
-        default:
-            /* here if there is no precomputed IV value available */
-            /* build/process the config block, type == CONFIG (could be precomputed) */
-            Skein_Start_New_Type(ctx,CFG_FINAL);        /* set tweaks: T0=0; T1=CFG | FINAL */
+	default:
+		/* here if there is no precomputed IV value available */
+		/* build/process the config block, type == CONFIG (could be precomputed) */
+		Skein_Start_New_Type(ctx, CFG_FINAL);       /* set tweaks: T0=0; T1=CFG | FINAL */

-            cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER);  /* set the schema, version */
-            cfg.w[1] = Skein_Swap64(hashBitLen);        /* hash result length in bits */
-            cfg.w[2] = Skein_Swap64(SKEIN_CFG_TREE_INFO_SEQUENTIAL);
-            memset(&cfg.w[3],0,sizeof(cfg) - 3*sizeof(cfg.w[0])); /* zero pad config block */
+		cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER);  /* set the schema, version */
+		cfg.w[1] = Skein_Swap64(hashBitLen);        /* hash result length in bits */
+		cfg.w[2] = Skein_Swap64(SKEIN_CFG_TREE_INFO_SEQUENTIAL);
+		memset(&cfg.w[3], 0, sizeof(cfg) - 3 * sizeof(cfg.w[0])); /* zero pad config block */

-            /* compute the initial chaining values from config block */
-            memset(ctx->X,0,sizeof(ctx->X));            /* zero the chaining variables */
-            Skein_512_Process_Block(ctx,cfg.b,1,SKEIN_CFG_STR_LEN);
-            break;
-        }
+		/* compute the initial chaining values from config block */
+		memset(ctx->X, 0, sizeof(ctx->X));          /* zero the chaining variables */
+		Skein_512_Process_Block(ctx, cfg.b, 1, SKEIN_CFG_STR_LEN);
+		break;
+	}

-    /* The chaining vars ctx->X are now initialized for the given hashBitLen. */
-    /* Set up to process the data message portion of the hash (default) */
-    Skein_Start_New_Type(ctx,MSG);              /* T0=0, T1= MSG type */
+	/* The chaining vars ctx->X are now initialized for the given hashBitLen. */
+	/* Set up to process the data message portion of the hash (default) */
+	Skein_Start_New_Type(ctx, MSG);             /* T0=0, T1= MSG type */

-    return SKEIN_SUCCESS;
-    }
+	return SKEIN_SUCCESS;
+}

 /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
 /* process the input bytes */
-static int Skein_512_Update(Skein_512_Ctxt_t *ctx, const u08b_t *msg, size_t msgByteCnt)
-    {
-    size_t n;
+static int Skein_512_Update(Skein_512_Ctxt_t* ctx, const u08b_t* msg, size_t msgByteCnt)
+{
+	size_t n;

-    Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES,SKEIN_FAIL);    /* catch uninitialized context */
+	Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES, SKEIN_FAIL);   /* catch uninitialized context */

-    /* process full blocks, if any */
-    if (msgByteCnt + ctx->h.bCnt > SKEIN_512_BLOCK_BYTES)
-        {
-        if (ctx->h.bCnt)                              /* finish up any buffered message data */
-            {
-            n = SKEIN_512_BLOCK_BYTES - ctx->h.bCnt;  /* # bytes free in buffer b[] */
-            if (n)
-                {
-                Skein_assert(n < msgByteCnt);         /* check on our logic here */
-                memcpy(&ctx->b[ctx->h.bCnt],msg,n);
-                msgByteCnt  -= n;
-                msg         += n;
-                ctx->h.bCnt += n;
-                }
-            Skein_assert(ctx->h.bCnt == SKEIN_512_BLOCK_BYTES);
-            Skein_512_Process_Block(ctx,ctx->b,1,SKEIN_512_BLOCK_BYTES);
-            ctx->h.bCnt = 0;
-            }
-        /* now process any remaining full blocks, directly from input message data */
-        if (msgByteCnt > SKEIN_512_BLOCK_BYTES)
-            {
-            n = (msgByteCnt-1) / SKEIN_512_BLOCK_BYTES;   /* number of full blocks to process */
-            Skein_512_Process_Block(ctx,msg,n,SKEIN_512_BLOCK_BYTES);
-            msgByteCnt -= n * SKEIN_512_BLOCK_BYTES;
-            msg        += n * SKEIN_512_BLOCK_BYTES;
-            }
-        Skein_assert(ctx->h.bCnt == 0);
-        }
+	/* process full blocks, if any */
+	if(msgByteCnt + ctx->h.bCnt > SKEIN_512_BLOCK_BYTES)
+	{
+		if(ctx->h.bCnt)                               /* finish up any buffered message data */
+		{
+			n = SKEIN_512_BLOCK_BYTES - ctx->h.bCnt;  /* # bytes free in buffer b[] */
+			if(n)
+			{
+				Skein_assert(n < msgByteCnt);         /* check on our logic here */
+				memcpy(&ctx->b[ctx->h.bCnt], msg, n);
+				msgByteCnt  -= n;
+				msg         += n;
+				ctx->h.bCnt += n;
+			}
+			Skein_assert(ctx->h.bCnt == SKEIN_512_BLOCK_BYTES);
+			Skein_512_Process_Block(ctx, ctx->b, 1, SKEIN_512_BLOCK_BYTES);
+			ctx->h.bCnt = 0;
+		}
+		/* now process any remaining full blocks, directly from input message data */
+		if(msgByteCnt > SKEIN_512_BLOCK_BYTES)
+		{
+			n = (msgByteCnt - 1) / SKEIN_512_BLOCK_BYTES; /* number of full blocks to process */
+			Skein_512_Process_Block(ctx, msg, n, SKEIN_512_BLOCK_BYTES);
+			msgByteCnt -= n * SKEIN_512_BLOCK_BYTES;
+			msg        += n * SKEIN_512_BLOCK_BYTES;
+		}
+		Skein_assert(ctx->h.bCnt == 0);
+	}

-    /* copy any remaining source message data bytes into b[] */
-    if (msgByteCnt)
-        {
-        Skein_assert(msgByteCnt + ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES);
-        memcpy(&ctx->b[ctx->h.bCnt],msg,msgByteCnt);
-        ctx->h.bCnt += msgByteCnt;
-        }
+	/* copy any remaining source message data bytes into b[] */
+	if(msgByteCnt)
+	{
+		Skein_assert(msgByteCnt + ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES);
+		memcpy(&ctx->b[ctx->h.bCnt], msg, msgByteCnt);
+		ctx->h.bCnt += msgByteCnt;
+	}
+
+	return SKEIN_SUCCESS;
+}

-    return SKEIN_SUCCESS;
-    }
-   
 /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
 /* finalize the hash computation and output the result */
-static int Skein_512_Final(Skein_512_Ctxt_t *ctx, u08b_t *hashVal)
-    {
-    size_t i,n,byteCnt;
-    u64b_t X[SKEIN_512_STATE_WORDS];
-    Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES,SKEIN_FAIL);    /* catch uninitialized context */
+static int Skein_512_Final(Skein_512_Ctxt_t* ctx, u08b_t* hashVal)
+{
+	size_t i, n, byteCnt;
+	u64b_t X[SKEIN_512_STATE_WORDS];
+	Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES, SKEIN_FAIL);   /* catch uninitialized context */

-    ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL;                 /* tag as the final block */
-    if (ctx->h.bCnt < SKEIN_512_BLOCK_BYTES)            /* zero pad b[] if necessary */
-        memset(&ctx->b[ctx->h.bCnt],0,SKEIN_512_BLOCK_BYTES - ctx->h.bCnt);
+	ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL;                 /* tag as the final block */
+	if(ctx->h.bCnt < SKEIN_512_BLOCK_BYTES)             /* zero pad b[] if necessary */
+	{
+		memset(&ctx->b[ctx->h.bCnt], 0, SKEIN_512_BLOCK_BYTES - ctx->h.bCnt);
+	}

-    Skein_512_Process_Block(ctx,ctx->b,1,ctx->h.bCnt);  /* process the final block */
-    
-    /* now output the result */
-    byteCnt = (ctx->h.hashBitLen + 7) >> 3;             /* total number of output bytes */
+	Skein_512_Process_Block(ctx, ctx->b, 1, ctx->h.bCnt); /* process the final block */

-    /* run Threefish in "counter mode" to generate output */
-    memset(ctx->b,0,sizeof(ctx->b));  /* zero out b[], so it can hold the counter */
-    memcpy(X,ctx->X,sizeof(X));       /* keep a local copy of counter mode "key" */
-    for (i=0;i*SKEIN_512_BLOCK_BYTES < byteCnt;i++)
-        {
-        ((u64b_t *)ctx->b)[0]= Skein_Swap64((u64b_t) i); /* build the counter block */
-        Skein_Start_New_Type(ctx,OUT_FINAL);
-        Skein_512_Process_Block(ctx,ctx->b,1,sizeof(u64b_t)); /* run "counter mode" */
-        n = byteCnt - i*SKEIN_512_BLOCK_BYTES;   /* number of output bytes left to go */
-        if (n >= SKEIN_512_BLOCK_BYTES)
-            n  = SKEIN_512_BLOCK_BYTES;
-        Skein_Put64_LSB_First(hashVal+i*SKEIN_512_BLOCK_BYTES,ctx->X,n);   /* "output" the ctr mode bytes */
-        Skein_Show_Final(512,&ctx->h,n,hashVal+i*SKEIN_512_BLOCK_BYTES);
-        memcpy(ctx->X,X,sizeof(X));   /* restore the counter mode key for next time */
-        }
-    return SKEIN_SUCCESS;
-    }
+	/* now output the result */
+	byteCnt = (ctx->h.hashBitLen + 7) >> 3;             /* total number of output bytes */
+
+	/* run Threefish in "counter mode" to generate output */
+	memset(ctx->b, 0, sizeof(ctx->b)); /* zero out b[], so it can hold the counter */
+	memcpy(X, ctx->X, sizeof(X));     /* keep a local copy of counter mode "key" */
+	for(i = 0; i * SKEIN_512_BLOCK_BYTES < byteCnt; i++)
+	{
+		((u64b_t*)ctx->b)[0] = Skein_Swap64((u64b_t) i); /* build the counter block */
+		Skein_Start_New_Type(ctx, OUT_FINAL);
+		Skein_512_Process_Block(ctx, ctx->b, 1, sizeof(u64b_t)); /* run "counter mode" */
+		n = byteCnt - i * SKEIN_512_BLOCK_BYTES; /* number of output bytes left to go */
+		if(n >= SKEIN_512_BLOCK_BYTES)
+		{
+			n  = SKEIN_512_BLOCK_BYTES;
+		}
+		Skein_Put64_LSB_First(hashVal + i * SKEIN_512_BLOCK_BYTES, ctx->X, n); /* "output" the ctr mode bytes */
+		Skein_Show_Final(512, &ctx->h, n, hashVal + i * SKEIN_512_BLOCK_BYTES);
+		memcpy(ctx->X, X, sizeof(X)); /* restore the counter mode key for next time */
+	}
+	return SKEIN_SUCCESS;
+}

 #if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF)
 static size_t Skein_512_API_CodeSize(void)
-    {
-    return ((u08b_t *) Skein_512_API_CodeSize) -
-           ((u08b_t *) Skein_512_Init);
-    }
+{
+	return ((u08b_t*) Skein_512_API_CodeSize) -
+	       ((u08b_t*) Skein_512_Init);
+}
 #endif

 typedef struct
 {
-  uint_t  statebits;                      /* 256, 512, or 1024 */
-  union
-  {
-    Skein_Ctxt_Hdr_t h;                 /* common header "overlay" */
-    Skein_512_Ctxt_t ctx_512;
-  } u;
+	uint_t  statebits;                      /* 256, 512, or 1024 */
+	union
+	{
+		Skein_Ctxt_Hdr_t h;                 /* common header "overlay" */
+		Skein_512_Ctxt_t ctx_512;
+	} u;
 }
 hashState;

 /* "incremental" hashing API */
-static SkeinHashReturn Init  (hashState *state, int hashbitlen);
-static SkeinHashReturn Update(hashState *state, const SkeinBitSequence *data, SkeinDataLength databitlen);
-static SkeinHashReturn Final (hashState *state,       SkeinBitSequence *hashval);
+static SkeinHashReturn Init(hashState* state, int hashbitlen);
+static SkeinHashReturn Update(hashState* state, const SkeinBitSequence* data, SkeinDataLength databitlen);
+static SkeinHashReturn Final(hashState* state,       SkeinBitSequence* hashval);

 /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
 /* select the context size and init the context */
-static SkeinHashReturn Init(hashState *state, int hashbitlen)
+static SkeinHashReturn Init(hashState* state, int hashbitlen)
 {
-    state->statebits = 64*SKEIN_512_STATE_WORDS;
-    return Skein_512_Init(&state->u.ctx_512,(size_t) hashbitlen);
+	state->statebits = 64 * SKEIN_512_STATE_WORDS;
+	return Skein_512_Init(&state->u.ctx_512, (size_t) hashbitlen);
 }

 /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
 /* process data to be hashed */
-static SkeinHashReturn Update(hashState *state, const SkeinBitSequence *data, SkeinDataLength databitlen)
+static SkeinHashReturn Update(hashState* state, const SkeinBitSequence* data, SkeinDataLength databitlen)
 {
-  /* only the final Update() call is allowed do partial bytes, else assert an error */
-  Skein_Assert((state->u.h.T[1] & SKEIN_T1_FLAG_BIT_PAD) == 0 || databitlen == 0, SKEIN_FAIL);
+	/* only the final Update() call is allowed do partial bytes, else assert an error */
+	Skein_Assert((state->u.h.T[1] & SKEIN_T1_FLAG_BIT_PAD) == 0 || databitlen == 0, SKEIN_FAIL);

-  Skein_Assert(state->statebits % 256 == 0 && (state->statebits-256) < 1024,SKEIN_FAIL);
-  if ((databitlen & 7) == 0)  /* partial bytes? */
-  {
-    return Skein_512_Update(&state->u.ctx_512,data,databitlen >> 3);
-  }
-  else
-  {   /* handle partial final byte */
-    size_t bCnt = (databitlen >> 3) + 1;                  /* number of bytes to handle (nonzero here!) */
-    u08b_t b,mask;
+	Skein_Assert(state->statebits % 256 == 0 && (state->statebits - 256) < 1024, SKEIN_FAIL);
+	if((databitlen & 7) == 0)   /* partial bytes? */
+	{
+		return Skein_512_Update(&state->u.ctx_512, data, databitlen >> 3);
+	}
+	else
+	{
+		/* handle partial final byte */
+		size_t bCnt = (databitlen >> 3) + 1;                  /* number of bytes to handle (nonzero here!) */
+		u08b_t b, mask;

-    mask = (u08b_t) (1u << (7 - (databitlen & 7)));       /* partial byte bit mask */
-    b    = (u08b_t) ((data[bCnt-1] & (0-mask)) | mask);   /* apply bit padding on final byte */
+		mask = (u08b_t)(1u << (7 - (databitlen & 7)));        /* partial byte bit mask */
+		b    = (u08b_t)((data[bCnt - 1] & (0 - mask)) | mask); /* apply bit padding on final byte */

-    Skein_512_Update(&state->u.ctx_512,data,bCnt-1); /* process all but the final byte    */
-    Skein_512_Update(&state->u.ctx_512,&b  ,  1   ); /* process the (masked) partial byte */
-    Skein_Set_Bit_Pad_Flag(state->u.h);                    /* set tweak flag for the final call */
+		Skein_512_Update(&state->u.ctx_512, data, bCnt - 1); /* process all but the final byte    */
+		Skein_512_Update(&state->u.ctx_512, &b  ,  1);   /* process the (masked) partial byte */
+		Skein_Set_Bit_Pad_Flag(state->u.h);                    /* set tweak flag for the final call */

-    return SKEIN_SUCCESS;
-  }
+		return SKEIN_SUCCESS;
+	}
 }

 /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
 /* finalize hash computation and output the result (hashbitlen bits) */
-static SkeinHashReturn Final(hashState *state, SkeinBitSequence *hashval)
+static SkeinHashReturn Final(hashState* state, SkeinBitSequence* hashval)
 {
-  Skein_Assert(state->statebits % 256 == 0 && (state->statebits-256) < 1024,FAIL);
-  return Skein_512_Final(&state->u.ctx_512,hashval);
+	Skein_Assert(state->statebits % 256 == 0 && (state->statebits - 256) < 1024, FAIL);
+	return Skein_512_Final(&state->u.ctx_512, hashval);
 }

 /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
 /* all-in-one hash function */
-SkeinHashReturn skein_hash(int hashbitlen, const SkeinBitSequence *data, /* all-in-one call */
-                SkeinDataLength databitlen,SkeinBitSequence *hashval)
+SkeinHashReturn skein_hash(int hashbitlen, const SkeinBitSequence* data, /* all-in-one call */
+                           SkeinDataLength databitlen, SkeinBitSequence* hashval)
 {
-  hashState  state;
-  SkeinHashReturn r = Init(&state,hashbitlen);
-  if (r == SKEIN_SUCCESS)
-  { /* these calls do not fail when called properly */
-    r = Update(&state,data,databitlen);
-    Final(&state,hashval);
-  }
-  return r;
+	hashState  state;
+	SkeinHashReturn r = Init(&state, hashbitlen);
+	if(r == SKEIN_SUCCESS)
+	{
+		/* these calls do not fail when called properly */
+		r = Update(&state, data, databitlen);
+		Final(&state, hashval);
+	}
+	return r;
 }

-void xmr_skein(const SkeinBitSequence *data, SkeinBitSequence *hashval){
-  #define XMR_HASHBITLEN 256
-  #define XMR_DATABITLEN 1600
+void xmr_skein(const SkeinBitSequence* data, SkeinBitSequence* hashval)
+{
+#define XMR_HASHBITLEN 256
+#define XMR_DATABITLEN 1600

-  // Init
-  hashState  state;
-  state.statebits = 64*SKEIN_512_STATE_WORDS;
+	// Init
+	hashState  state;
+	state.statebits = 64 * SKEIN_512_STATE_WORDS;

-  // Skein_512_Init(&state.u.ctx_512, (size_t)XMR_HASHBITLEN);
-  state.u.ctx_512.h.hashBitLen = XMR_HASHBITLEN;
-  memcpy(state.u.ctx_512.X,SKEIN_512_IV_256,sizeof(state.u.ctx_512.X));
-  Skein_512_Ctxt_t* ctx = &(state.u.ctx_512);
-  Skein_Start_New_Type(ctx,MSG);
+	// Skein_512_Init(&state.u.ctx_512, (size_t)XMR_HASHBITLEN);
+	state.u.ctx_512.h.hashBitLen = XMR_HASHBITLEN;
+	memcpy(state.u.ctx_512.X, SKEIN_512_IV_256, sizeof(state.u.ctx_512.X));
+	Skein_Start_New_Type(&state.u.ctx_512, MSG);

-  // Update
-  if ((XMR_DATABITLEN & 7) == 0){  /* partial bytes? */
-    Skein_512_Update(&state.u.ctx_512,data,XMR_DATABITLEN >> 3);
-  }else{   /* handle partial final byte */
-    size_t bCnt = (XMR_DATABITLEN >> 3) + 1;                  /* number of bytes to handle (nonzero here!) */
-    u08b_t b,mask;
+	// Update
+	if((XMR_DATABITLEN & 7) == 0)    // partial bytes?
+	{
+		Skein_512_Update(&state.u.ctx_512, data, XMR_DATABITLEN >> 3);
+	}
+	else     // handle partial final byte
+	{
+		size_t bCnt = (XMR_DATABITLEN >> 3) + 1;                  // number of bytes to handle (nonzero here!)
+		u08b_t b, mask;

-    mask = (u08b_t) (1u << (7 - (XMR_DATABITLEN & 7)));       /* partial byte bit mask */
-    b    = (u08b_t) ((data[bCnt-1] & (0-mask)) | mask);   /* apply bit padding on final byte */
+		mask = (u08b_t)(1u << (7 - (XMR_DATABITLEN & 7)));        // partial byte bit mask
+		b    = (u08b_t)((data[bCnt - 1] & (0 - mask)) | mask); // apply bit padding on final byte

-    Skein_512_Update(&state.u.ctx_512,data,bCnt-1); /* process all but the final byte    */
-    Skein_512_Update(&state.u.ctx_512,&b  ,  1   ); /* process the (masked) partial byte */
-    Skein_Set_Bit_Pad_Flag(state.u.h);                    /* set tweak flag for the final call */
-  }
+		Skein_512_Update(&state.u.ctx_512, data, bCnt - 1); // process all but the final byte
+		Skein_512_Update(&state.u.ctx_512, &b  ,  1);   //process the (masked) partial byte
+		Skein_Set_Bit_Pad_Flag(state.u.h);                    // set tweak flag for the final call
+	}

-  // Finalize
-  Skein_512_Final(&state.u.ctx_512, hashval);
+	// Finalize
+	Skein_512_Final(&state.u.ctx_512, hashval);
 }
--- a/src/crypto/c_skein.h
+++ b/src/crypto/c_skein.h
@ -9,7 +9,7 @@
 ** This algorithm and source code is released to the public domain.
 **
 ***************************************************************************
-** 
+**
 ** The following compile-time switches may be defined to control some
 ** tradeoffs between speed, code size, error checking, and security.
 **
@ -20,8 +20,8 @@
 **                            [default: no callouts (no overhead)]
 **
 **  SKEIN_ERR_CHECK        -- how error checking is handled inside Skein
-**                            code. If not defined, most error checking 
-**                            is disabled (for performance). Otherwise, 
+**                            code. If not defined, most error checking
+**                            is disabled (for performance). Otherwise,
 **                            the switch value is interpreted as:
 **                                0: use assert()      to flag errors
 **                                1: return SKEIN_FAIL to flag errors
@ -31,9 +31,9 @@

 typedef enum
 {
-  SKEIN_SUCCESS         =      0,          /* return codes from Skein calls */
-  SKEIN_FAIL            =      1,
-  SKEIN_BAD_HASHLEN     =      2
+	SKEIN_SUCCESS         =      0,          /* return codes from Skein calls */
+	SKEIN_FAIL            =      1,
+	SKEIN_BAD_HASHLEN     =      2
 }
 SkeinHashReturn;

@ -41,9 +41,9 @@ typedef size_t   SkeinDataLength;                /* bit count  type */
 typedef u08b_t   SkeinBitSequence;               /* bit stream type */

 /* "all-in-one" call */
-SkeinHashReturn skein_hash(int hashbitlen,   const SkeinBitSequence *data,
-        SkeinDataLength databitlen, SkeinBitSequence *hashval);
+SkeinHashReturn skein_hash(int hashbitlen,   const SkeinBitSequence* data,
+                           SkeinDataLength databitlen, SkeinBitSequence* hashval);

-void xmr_skein(const SkeinBitSequence *data, SkeinBitSequence *hashval);
+void xmr_skein(const SkeinBitSequence* data, SkeinBitSequence* hashval);

 #endif  /* ifndef _SKEIN_H_ */
--- a/src/crypto/groestl_tables.h
+++ b/src/crypto/groestl_tables.h
@ -3,36 +3,37 @@


 const uint32_t T[512] = {0xa5f432c6, 0xc6a597f4, 0x84976ff8, 0xf884eb97, 0x99b05eee, 0xee99c7b0, 0x8d8c7af6, 0xf68df78c, 0xd17e8ff, 0xff0de517, 0xbddc0ad6, 0xd6bdb7dc, 0xb1c816de, 0xdeb1a7c8, 0x54fc6d91, 0x915439fc
-, 0x50f09060, 0x6050c0f0, 0x3050702, 0x2030405, 0xa9e02ece, 0xcea987e0, 0x7d87d156, 0x567dac87, 0x192bcce7, 0xe719d52b, 0x62a613b5, 0xb56271a6, 0xe6317c4d, 0x4de69a31, 0x9ab559ec, 0xec9ac3b5
-, 0x45cf408f, 0x8f4505cf, 0x9dbca31f, 0x1f9d3ebc, 0x40c04989, 0x894009c0, 0x879268fa, 0xfa87ef92, 0x153fd0ef, 0xef15c53f, 0xeb2694b2, 0xb2eb7f26, 0xc940ce8e, 0x8ec90740, 0xb1de6fb, 0xfb0bed1d
-, 0xec2f6e41, 0x41ec822f, 0x67a91ab3, 0xb3677da9, 0xfd1c435f, 0x5ffdbe1c, 0xea256045, 0x45ea8a25, 0xbfdaf923, 0x23bf46da, 0xf7025153, 0x53f7a602, 0x96a145e4, 0xe496d3a1, 0x5bed769b, 0x9b5b2ded
-, 0xc25d2875, 0x75c2ea5d, 0x1c24c5e1, 0xe11cd924, 0xaee9d43d, 0x3dae7ae9, 0x6abef24c, 0x4c6a98be, 0x5aee826c, 0x6c5ad8ee, 0x41c3bd7e, 0x7e41fcc3, 0x206f3f5, 0xf502f106, 0x4fd15283, 0x834f1dd1
-, 0x5ce48c68, 0x685cd0e4, 0xf4075651, 0x51f4a207, 0x345c8dd1, 0xd134b95c, 0x818e1f9, 0xf908e918, 0x93ae4ce2, 0xe293dfae, 0x73953eab, 0xab734d95, 0x53f59762, 0x6253c4f5, 0x3f416b2a, 0x2a3f5441
-, 0xc141c08, 0x80c1014, 0x52f66395, 0x955231f6, 0x65afe946, 0x46658caf, 0x5ee27f9d, 0x9d5e21e2, 0x28784830, 0x30286078, 0xa1f8cf37, 0x37a16ef8, 0xf111b0a, 0xa0f1411, 0xb5c4eb2f, 0x2fb55ec4
-, 0x91b150e, 0xe091c1b, 0x365a7e24, 0x2436485a, 0x9bb6ad1b, 0x1b9b36b6, 0x3d4798df, 0xdf3da547, 0x266aa7cd, 0xcd26816a, 0x69bbf54e, 0x4e699cbb, 0xcd4c337f, 0x7fcdfe4c, 0x9fba50ea, 0xea9fcfba
-, 0x1b2d3f12, 0x121b242d, 0x9eb9a41d, 0x1d9e3ab9, 0x749cc458, 0x5874b09c, 0x2e724634, 0x342e6872, 0x2d774136, 0x362d6c77, 0xb2cd11dc, 0xdcb2a3cd, 0xee299db4, 0xb4ee7329, 0xfb164d5b, 0x5bfbb616
-, 0xf601a5a4, 0xa4f65301, 0x4dd7a176, 0x764decd7, 0x61a314b7, 0xb76175a3, 0xce49347d, 0x7dcefa49, 0x7b8ddf52, 0x527ba48d, 0x3e429fdd, 0xdd3ea142, 0x7193cd5e, 0x5e71bc93, 0x97a2b113, 0x139726a2
-, 0xf504a2a6, 0xa6f55704, 0x68b801b9, 0xb96869b8, 0x0, 0x0, 0x2c74b5c1, 0xc12c9974, 0x60a0e040, 0x406080a0, 0x1f21c2e3, 0xe31fdd21, 0xc8433a79, 0x79c8f243, 0xed2c9ab6, 0xb6ed772c
-, 0xbed90dd4, 0xd4beb3d9, 0x46ca478d, 0x8d4601ca, 0xd9701767, 0x67d9ce70, 0x4bddaf72, 0x724be4dd, 0xde79ed94, 0x94de3379, 0xd467ff98, 0x98d42b67, 0xe82393b0, 0xb0e87b23, 0x4ade5b85, 0x854a11de
-, 0x6bbd06bb, 0xbb6b6dbd, 0x2a7ebbc5, 0xc52a917e, 0xe5347b4f, 0x4fe59e34, 0x163ad7ed, 0xed16c13a, 0xc554d286, 0x86c51754, 0xd762f89a, 0x9ad72f62, 0x55ff9966, 0x6655ccff, 0x94a7b611, 0x119422a7
-, 0xcf4ac08a, 0x8acf0f4a, 0x1030d9e9, 0xe910c930, 0x60a0e04, 0x406080a, 0x819866fe, 0xfe81e798, 0xf00baba0, 0xa0f05b0b, 0x44ccb478, 0x7844f0cc, 0xbad5f025, 0x25ba4ad5, 0xe33e754b, 0x4be3963e
-, 0xf30eaca2, 0xa2f35f0e, 0xfe19445d, 0x5dfeba19, 0xc05bdb80, 0x80c01b5b, 0x8a858005, 0x58a0a85, 0xadecd33f, 0x3fad7eec, 0xbcdffe21, 0x21bc42df, 0x48d8a870, 0x7048e0d8, 0x40cfdf1, 0xf104f90c
-, 0xdf7a1963, 0x63dfc67a, 0xc1582f77, 0x77c1ee58, 0x759f30af, 0xaf75459f, 0x63a5e742, 0x426384a5, 0x30507020, 0x20304050, 0x1a2ecbe5, 0xe51ad12e, 0xe12effd, 0xfd0ee112, 0x6db708bf, 0xbf6d65b7
-, 0x4cd45581, 0x814c19d4, 0x143c2418, 0x1814303c, 0x355f7926, 0x26354c5f, 0x2f71b2c3, 0xc32f9d71, 0xe13886be, 0xbee16738, 0xa2fdc835, 0x35a26afd, 0xcc4fc788, 0x88cc0b4f, 0x394b652e, 0x2e395c4b
-, 0x57f96a93, 0x93573df9, 0xf20d5855, 0x55f2aa0d, 0x829d61fc, 0xfc82e39d, 0x47c9b37a, 0x7a47f4c9, 0xacef27c8, 0xc8ac8bef, 0xe73288ba, 0xbae76f32, 0x2b7d4f32, 0x322b647d, 0x95a442e6, 0xe695d7a4
-, 0xa0fb3bc0, 0xc0a09bfb, 0x98b3aa19, 0x199832b3, 0xd168f69e, 0x9ed12768, 0x7f8122a3, 0xa37f5d81, 0x66aaee44, 0x446688aa, 0x7e82d654, 0x547ea882, 0xabe6dd3b, 0x3bab76e6, 0x839e950b, 0xb83169e
-, 0xca45c98c, 0x8cca0345, 0x297bbcc7, 0xc729957b, 0xd36e056b, 0x6bd3d66e, 0x3c446c28, 0x283c5044, 0x798b2ca7, 0xa779558b, 0xe23d81bc, 0xbce2633d, 0x1d273116, 0x161d2c27, 0x769a37ad, 0xad76419a
-, 0x3b4d96db, 0xdb3bad4d, 0x56fa9e64, 0x6456c8fa, 0x4ed2a674, 0x744ee8d2, 0x1e223614, 0x141e2822, 0xdb76e492, 0x92db3f76, 0xa1e120c, 0xc0a181e, 0x6cb4fc48, 0x486c90b4, 0xe4378fb8, 0xb8e46b37
-, 0x5de7789f, 0x9f5d25e7, 0x6eb20fbd, 0xbd6e61b2, 0xef2a6943, 0x43ef862a, 0xa6f135c4, 0xc4a693f1, 0xa8e3da39, 0x39a872e3, 0xa4f7c631, 0x31a462f7, 0x37598ad3, 0xd337bd59, 0x8b8674f2, 0xf28bff86
-, 0x325683d5, 0xd532b156, 0x43c54e8b, 0x8b430dc5, 0x59eb856e, 0x6e59dceb, 0xb7c218da, 0xdab7afc2, 0x8c8f8e01, 0x18c028f, 0x64ac1db1, 0xb16479ac, 0xd26df19c, 0x9cd2236d, 0xe03b7249, 0x49e0923b
-, 0xb4c71fd8, 0xd8b4abc7, 0xfa15b9ac, 0xacfa4315, 0x709faf3, 0xf307fd09, 0x256fa0cf, 0xcf25856f, 0xafea20ca, 0xcaaf8fea, 0x8e897df4, 0xf48ef389, 0xe9206747, 0x47e98e20, 0x18283810, 0x10182028
-, 0xd5640b6f, 0x6fd5de64, 0x888373f0, 0xf088fb83, 0x6fb1fb4a, 0x4a6f94b1, 0x7296ca5c, 0x5c72b896, 0x246c5438, 0x3824706c, 0xf1085f57, 0x57f1ae08, 0xc7522173, 0x73c7e652, 0x51f36497, 0x975135f3
-, 0x2365aecb, 0xcb238d65, 0x7c8425a1, 0xa17c5984, 0x9cbf57e8, 0xe89ccbbf, 0x21635d3e, 0x3e217c63, 0xdd7cea96, 0x96dd377c, 0xdc7f1e61, 0x61dcc27f, 0x86919c0d, 0xd861a91, 0x85949b0f, 0xf851e94
-, 0x90ab4be0, 0xe090dbab, 0x42c6ba7c, 0x7c42f8c6, 0xc4572671, 0x71c4e257, 0xaae529cc, 0xccaa83e5, 0xd873e390, 0x90d83b73, 0x50f0906, 0x6050c0f, 0x103f4f7, 0xf701f503, 0x12362a1c, 0x1c123836
-, 0xa3fe3cc2, 0xc2a39ffe, 0x5fe18b6a, 0x6a5fd4e1, 0xf910beae, 0xaef94710, 0xd06b0269, 0x69d0d26b, 0x91a8bf17, 0x17912ea8, 0x58e87199, 0x995829e8, 0x2769533a, 0x3a277469, 0xb9d0f727, 0x27b94ed0
-, 0x384891d9, 0xd938a948, 0x1335deeb, 0xeb13cd35, 0xb3cee52b, 0x2bb356ce, 0x33557722, 0x22334455, 0xbbd604d2, 0xd2bbbfd6, 0x709039a9, 0xa9704990, 0x89808707, 0x7890e80, 0xa7f2c133, 0x33a766f2
-, 0xb6c1ec2d, 0x2db65ac1, 0x22665a3c, 0x3c227866, 0x92adb815, 0x15922aad, 0x2060a9c9, 0xc9208960, 0x49db5c87, 0x874915db, 0xff1ab0aa, 0xaaff4f1a, 0x7888d850, 0x5078a088, 0x7a8e2ba5, 0xa57a518e
-, 0x8f8a8903, 0x38f068a, 0xf8134a59, 0x59f8b213, 0x809b9209, 0x980129b, 0x1739231a, 0x1a173439, 0xda751065, 0x65daca75, 0x315384d7, 0xd731b553, 0xc651d584, 0x84c61351, 0xb8d303d0, 0xd0b8bbd3
-, 0xc35edc82, 0x82c31f5e, 0xb0cbe229, 0x29b052cb, 0x7799c35a, 0x5a77b499, 0x11332d1e, 0x1e113c33, 0xcb463d7b, 0x7bcbf646, 0xfc1fb7a8, 0xa8fc4b1f, 0xd6610c6d, 0x6dd6da61, 0x3a4e622c, 0x2c3a584e};
+                         , 0x50f09060, 0x6050c0f0, 0x3050702, 0x2030405, 0xa9e02ece, 0xcea987e0, 0x7d87d156, 0x567dac87, 0x192bcce7, 0xe719d52b, 0x62a613b5, 0xb56271a6, 0xe6317c4d, 0x4de69a31, 0x9ab559ec, 0xec9ac3b5
+                         , 0x45cf408f, 0x8f4505cf, 0x9dbca31f, 0x1f9d3ebc, 0x40c04989, 0x894009c0, 0x879268fa, 0xfa87ef92, 0x153fd0ef, 0xef15c53f, 0xeb2694b2, 0xb2eb7f26, 0xc940ce8e, 0x8ec90740, 0xb1de6fb, 0xfb0bed1d
+                         , 0xec2f6e41, 0x41ec822f, 0x67a91ab3, 0xb3677da9, 0xfd1c435f, 0x5ffdbe1c, 0xea256045, 0x45ea8a25, 0xbfdaf923, 0x23bf46da, 0xf7025153, 0x53f7a602, 0x96a145e4, 0xe496d3a1, 0x5bed769b, 0x9b5b2ded
+                         , 0xc25d2875, 0x75c2ea5d, 0x1c24c5e1, 0xe11cd924, 0xaee9d43d, 0x3dae7ae9, 0x6abef24c, 0x4c6a98be, 0x5aee826c, 0x6c5ad8ee, 0x41c3bd7e, 0x7e41fcc3, 0x206f3f5, 0xf502f106, 0x4fd15283, 0x834f1dd1
+                         , 0x5ce48c68, 0x685cd0e4, 0xf4075651, 0x51f4a207, 0x345c8dd1, 0xd134b95c, 0x818e1f9, 0xf908e918, 0x93ae4ce2, 0xe293dfae, 0x73953eab, 0xab734d95, 0x53f59762, 0x6253c4f5, 0x3f416b2a, 0x2a3f5441
+                         , 0xc141c08, 0x80c1014, 0x52f66395, 0x955231f6, 0x65afe946, 0x46658caf, 0x5ee27f9d, 0x9d5e21e2, 0x28784830, 0x30286078, 0xa1f8cf37, 0x37a16ef8, 0xf111b0a, 0xa0f1411, 0xb5c4eb2f, 0x2fb55ec4
+                         , 0x91b150e, 0xe091c1b, 0x365a7e24, 0x2436485a, 0x9bb6ad1b, 0x1b9b36b6, 0x3d4798df, 0xdf3da547, 0x266aa7cd, 0xcd26816a, 0x69bbf54e, 0x4e699cbb, 0xcd4c337f, 0x7fcdfe4c, 0x9fba50ea, 0xea9fcfba
+                         , 0x1b2d3f12, 0x121b242d, 0x9eb9a41d, 0x1d9e3ab9, 0x749cc458, 0x5874b09c, 0x2e724634, 0x342e6872, 0x2d774136, 0x362d6c77, 0xb2cd11dc, 0xdcb2a3cd, 0xee299db4, 0xb4ee7329, 0xfb164d5b, 0x5bfbb616
+                         , 0xf601a5a4, 0xa4f65301, 0x4dd7a176, 0x764decd7, 0x61a314b7, 0xb76175a3, 0xce49347d, 0x7dcefa49, 0x7b8ddf52, 0x527ba48d, 0x3e429fdd, 0xdd3ea142, 0x7193cd5e, 0x5e71bc93, 0x97a2b113, 0x139726a2
+                         , 0xf504a2a6, 0xa6f55704, 0x68b801b9, 0xb96869b8, 0x0, 0x0, 0x2c74b5c1, 0xc12c9974, 0x60a0e040, 0x406080a0, 0x1f21c2e3, 0xe31fdd21, 0xc8433a79, 0x79c8f243, 0xed2c9ab6, 0xb6ed772c
+                         , 0xbed90dd4, 0xd4beb3d9, 0x46ca478d, 0x8d4601ca, 0xd9701767, 0x67d9ce70, 0x4bddaf72, 0x724be4dd, 0xde79ed94, 0x94de3379, 0xd467ff98, 0x98d42b67, 0xe82393b0, 0xb0e87b23, 0x4ade5b85, 0x854a11de
+                         , 0x6bbd06bb, 0xbb6b6dbd, 0x2a7ebbc5, 0xc52a917e, 0xe5347b4f, 0x4fe59e34, 0x163ad7ed, 0xed16c13a, 0xc554d286, 0x86c51754, 0xd762f89a, 0x9ad72f62, 0x55ff9966, 0x6655ccff, 0x94a7b611, 0x119422a7
+                         , 0xcf4ac08a, 0x8acf0f4a, 0x1030d9e9, 0xe910c930, 0x60a0e04, 0x406080a, 0x819866fe, 0xfe81e798, 0xf00baba0, 0xa0f05b0b, 0x44ccb478, 0x7844f0cc, 0xbad5f025, 0x25ba4ad5, 0xe33e754b, 0x4be3963e
+                         , 0xf30eaca2, 0xa2f35f0e, 0xfe19445d, 0x5dfeba19, 0xc05bdb80, 0x80c01b5b, 0x8a858005, 0x58a0a85, 0xadecd33f, 0x3fad7eec, 0xbcdffe21, 0x21bc42df, 0x48d8a870, 0x7048e0d8, 0x40cfdf1, 0xf104f90c
+                         , 0xdf7a1963, 0x63dfc67a, 0xc1582f77, 0x77c1ee58, 0x759f30af, 0xaf75459f, 0x63a5e742, 0x426384a5, 0x30507020, 0x20304050, 0x1a2ecbe5, 0xe51ad12e, 0xe12effd, 0xfd0ee112, 0x6db708bf, 0xbf6d65b7
+                         , 0x4cd45581, 0x814c19d4, 0x143c2418, 0x1814303c, 0x355f7926, 0x26354c5f, 0x2f71b2c3, 0xc32f9d71, 0xe13886be, 0xbee16738, 0xa2fdc835, 0x35a26afd, 0xcc4fc788, 0x88cc0b4f, 0x394b652e, 0x2e395c4b
+                         , 0x57f96a93, 0x93573df9, 0xf20d5855, 0x55f2aa0d, 0x829d61fc, 0xfc82e39d, 0x47c9b37a, 0x7a47f4c9, 0xacef27c8, 0xc8ac8bef, 0xe73288ba, 0xbae76f32, 0x2b7d4f32, 0x322b647d, 0x95a442e6, 0xe695d7a4
+                         , 0xa0fb3bc0, 0xc0a09bfb, 0x98b3aa19, 0x199832b3, 0xd168f69e, 0x9ed12768, 0x7f8122a3, 0xa37f5d81, 0x66aaee44, 0x446688aa, 0x7e82d654, 0x547ea882, 0xabe6dd3b, 0x3bab76e6, 0x839e950b, 0xb83169e
+                         , 0xca45c98c, 0x8cca0345, 0x297bbcc7, 0xc729957b, 0xd36e056b, 0x6bd3d66e, 0x3c446c28, 0x283c5044, 0x798b2ca7, 0xa779558b, 0xe23d81bc, 0xbce2633d, 0x1d273116, 0x161d2c27, 0x769a37ad, 0xad76419a
+                         , 0x3b4d96db, 0xdb3bad4d, 0x56fa9e64, 0x6456c8fa, 0x4ed2a674, 0x744ee8d2, 0x1e223614, 0x141e2822, 0xdb76e492, 0x92db3f76, 0xa1e120c, 0xc0a181e, 0x6cb4fc48, 0x486c90b4, 0xe4378fb8, 0xb8e46b37
+                         , 0x5de7789f, 0x9f5d25e7, 0x6eb20fbd, 0xbd6e61b2, 0xef2a6943, 0x43ef862a, 0xa6f135c4, 0xc4a693f1, 0xa8e3da39, 0x39a872e3, 0xa4f7c631, 0x31a462f7, 0x37598ad3, 0xd337bd59, 0x8b8674f2, 0xf28bff86
+                         , 0x325683d5, 0xd532b156, 0x43c54e8b, 0x8b430dc5, 0x59eb856e, 0x6e59dceb, 0xb7c218da, 0xdab7afc2, 0x8c8f8e01, 0x18c028f, 0x64ac1db1, 0xb16479ac, 0xd26df19c, 0x9cd2236d, 0xe03b7249, 0x49e0923b
+                         , 0xb4c71fd8, 0xd8b4abc7, 0xfa15b9ac, 0xacfa4315, 0x709faf3, 0xf307fd09, 0x256fa0cf, 0xcf25856f, 0xafea20ca, 0xcaaf8fea, 0x8e897df4, 0xf48ef389, 0xe9206747, 0x47e98e20, 0x18283810, 0x10182028
+                         , 0xd5640b6f, 0x6fd5de64, 0x888373f0, 0xf088fb83, 0x6fb1fb4a, 0x4a6f94b1, 0x7296ca5c, 0x5c72b896, 0x246c5438, 0x3824706c, 0xf1085f57, 0x57f1ae08, 0xc7522173, 0x73c7e652, 0x51f36497, 0x975135f3
+                         , 0x2365aecb, 0xcb238d65, 0x7c8425a1, 0xa17c5984, 0x9cbf57e8, 0xe89ccbbf, 0x21635d3e, 0x3e217c63, 0xdd7cea96, 0x96dd377c, 0xdc7f1e61, 0x61dcc27f, 0x86919c0d, 0xd861a91, 0x85949b0f, 0xf851e94
+                         , 0x90ab4be0, 0xe090dbab, 0x42c6ba7c, 0x7c42f8c6, 0xc4572671, 0x71c4e257, 0xaae529cc, 0xccaa83e5, 0xd873e390, 0x90d83b73, 0x50f0906, 0x6050c0f, 0x103f4f7, 0xf701f503, 0x12362a1c, 0x1c123836
+                         , 0xa3fe3cc2, 0xc2a39ffe, 0x5fe18b6a, 0x6a5fd4e1, 0xf910beae, 0xaef94710, 0xd06b0269, 0x69d0d26b, 0x91a8bf17, 0x17912ea8, 0x58e87199, 0x995829e8, 0x2769533a, 0x3a277469, 0xb9d0f727, 0x27b94ed0
+                         , 0x384891d9, 0xd938a948, 0x1335deeb, 0xeb13cd35, 0xb3cee52b, 0x2bb356ce, 0x33557722, 0x22334455, 0xbbd604d2, 0xd2bbbfd6, 0x709039a9, 0xa9704990, 0x89808707, 0x7890e80, 0xa7f2c133, 0x33a766f2
+                         , 0xb6c1ec2d, 0x2db65ac1, 0x22665a3c, 0x3c227866, 0x92adb815, 0x15922aad, 0x2060a9c9, 0xc9208960, 0x49db5c87, 0x874915db, 0xff1ab0aa, 0xaaff4f1a, 0x7888d850, 0x5078a088, 0x7a8e2ba5, 0xa57a518e
+                         , 0x8f8a8903, 0x38f068a, 0xf8134a59, 0x59f8b213, 0x809b9209, 0x980129b, 0x1739231a, 0x1a173439, 0xda751065, 0x65daca75, 0x315384d7, 0xd731b553, 0xc651d584, 0x84c61351, 0xb8d303d0, 0xd0b8bbd3
+                         , 0xc35edc82, 0x82c31f5e, 0xb0cbe229, 0x29b052cb, 0x7799c35a, 0x5a77b499, 0x11332d1e, 0x1e113c33, 0xcb463d7b, 0x7bcbf646, 0xfc1fb7a8, 0xa8fc4b1f, 0xd6610c6d, 0x6dd6da61, 0x3a4e622c, 0x2c3a584e
+                        };

 #endif /* __tables_h */
--- a/src/crypto/skein_port.h
+++ b/src/crypto/skein_port.h
@ -111,10 +111,10 @@ typedef uint64_t        u64b_t;             /* 64-bit unsigned integer */


 #if   PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN
-    /* here for big-endian CPUs */
+/* here for big-endian CPUs */
 #define SKEIN_NEED_SWAP   (1)
 #elif PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
-    /* here for x86 and x86-64 CPUs (and other detected little-endian CPUs) */
+/* here for x86 and x86-64 CPUs (and other detected little-endian CPUs) */
 #define SKEIN_NEED_SWAP   (0)
 #if   PLATFORM_MUST_ALIGN == 0              /* ok to use "fast" versions? */
 #define Skein_Put64_LSB_First(dst08,src64,bCnt) memcpy(dst08,src64,bCnt)
@ -134,14 +134,14 @@ typedef uint64_t        u64b_t;             /* 64-bit unsigned integer */
 #ifndef Skein_Swap64  /* swap for big-endian, nop for little-endian */
 #if     SKEIN_NEED_SWAP
 #define Skein_Swap64(w64)                       \
-  ( (( ((u64b_t)(w64))       & 0xFF) << 56) |   \
-    (((((u64b_t)(w64)) >> 8) & 0xFF) << 48) |   \
-    (((((u64b_t)(w64)) >>16) & 0xFF) << 40) |   \
-    (((((u64b_t)(w64)) >>24) & 0xFF) << 32) |   \
-    (((((u64b_t)(w64)) >>32) & 0xFF) << 24) |   \
-    (((((u64b_t)(w64)) >>40) & 0xFF) << 16) |   \
-    (((((u64b_t)(w64)) >>48) & 0xFF) <<  8) |   \
-    (((((u64b_t)(w64)) >>56) & 0xFF)      ) )
+	( (( ((u64b_t)(w64))       & 0xFF) << 56) |   \
+	  (((((u64b_t)(w64)) >> 8) & 0xFF) << 48) |   \
+	  (((((u64b_t)(w64)) >>16) & 0xFF) << 40) |   \
+	  (((((u64b_t)(w64)) >>24) & 0xFF) << 32) |   \
+	  (((((u64b_t)(w64)) >>32) & 0xFF) << 24) |   \
+	  (((((u64b_t)(w64)) >>40) & 0xFF) << 16) |   \
+	  (((((u64b_t)(w64)) >>48) & 0xFF) <<  8) |   \
+	  (((((u64b_t)(w64)) >>56) & 0xFF)      ) )
 #else
 #define Skein_Swap64(w64)  (w64)
 #endif
@ -149,38 +149,42 @@ typedef uint64_t        u64b_t;             /* 64-bit unsigned integer */


 #ifndef Skein_Put64_LSB_First
-void    Skein_Put64_LSB_First(u08b_t *dst,const u64b_t *src,size_t bCnt)
+void    Skein_Put64_LSB_First(u08b_t* dst, const u64b_t* src, size_t bCnt)
 #ifdef  SKEIN_PORT_CODE /* instantiate the function code here? */
-    { /* this version is fully portable (big-endian or little-endian), but slow */
-    size_t n;
+{
+	/* this version is fully portable (big-endian or little-endian), but slow */
+	size_t n;

-    for (n=0;n<bCnt;n++)
-        dst[n] = (u08b_t) (src[n>>3] >> (8*(n&7)));
-    }
+	for(n = 0; n < bCnt; n++)
+	{
+		dst[n] = (u08b_t)(src[n >> 3] >> (8 * (n & 7)));
+	}
+}
 #else
-    ;    /* output only the function prototype */
+;    /* output only the function prototype */
 #endif
 #endif   /* ifndef Skein_Put64_LSB_First */


 #ifndef Skein_Get64_LSB_First
-void    Skein_Get64_LSB_First(u64b_t *dst,const u08b_t *src,size_t wCnt)
+void    Skein_Get64_LSB_First(u64b_t* dst, const u08b_t* src, size_t wCnt)
 #ifdef  SKEIN_PORT_CODE /* instantiate the function code here? */
-    { /* this version is fully portable (big-endian or little-endian), but slow */
-    size_t n;
+{
+	/* this version is fully portable (big-endian or little-endian), but slow */
+	size_t n;

-    for (n=0;n<8*wCnt;n+=8)
-        dst[n/8] = (((u64b_t) src[n  ])      ) +
-                   (((u64b_t) src[n+1]) <<  8) +
-                   (((u64b_t) src[n+2]) << 16) +
-                   (((u64b_t) src[n+3]) << 24) +
-                   (((u64b_t) src[n+4]) << 32) +
-                   (((u64b_t) src[n+5]) << 40) +
-                   (((u64b_t) src[n+6]) << 48) +
-                   (((u64b_t) src[n+7]) << 56) ;
-    }
+	for(n = 0; n < 8 * wCnt; n += 8)
+		dst[n / 8] = (((u64b_t) src[n  ])) +
+		             (((u64b_t) src[n + 1]) <<  8) +
+		             (((u64b_t) src[n + 2]) << 16) +
+		             (((u64b_t) src[n + 3]) << 24) +
+		             (((u64b_t) src[n + 4]) << 32) +
+		             (((u64b_t) src[n + 5]) << 40) +
+		             (((u64b_t) src[n + 6]) << 48) +
+		             (((u64b_t) src[n + 7]) << 56) ;
+}
 #else
-    ;    /* output only the function prototype */
+;    /* output only the function prototype */
 #endif
 #endif   /* ifndef Skein_Get64_LSB_First */

--- a/src/crypto/soft_aes.h
+++ b/src/crypto/soft_aes.h
@ -39,43 +39,43 @@


 #define saes_data(w) {\
-    w(0x63), w(0x7c), w(0x77), w(0x7b), w(0xf2), w(0x6b), w(0x6f), w(0xc5),\
-    w(0x30), w(0x01), w(0x67), w(0x2b), w(0xfe), w(0xd7), w(0xab), w(0x76),\
-    w(0xca), w(0x82), w(0xc9), w(0x7d), w(0xfa), w(0x59), w(0x47), w(0xf0),\
-    w(0xad), w(0xd4), w(0xa2), w(0xaf), w(0x9c), w(0xa4), w(0x72), w(0xc0),\
-    w(0xb7), w(0xfd), w(0x93), w(0x26), w(0x36), w(0x3f), w(0xf7), w(0xcc),\
-    w(0x34), w(0xa5), w(0xe5), w(0xf1), w(0x71), w(0xd8), w(0x31), w(0x15),\
-    w(0x04), w(0xc7), w(0x23), w(0xc3), w(0x18), w(0x96), w(0x05), w(0x9a),\
-    w(0x07), w(0x12), w(0x80), w(0xe2), w(0xeb), w(0x27), w(0xb2), w(0x75),\
-    w(0x09), w(0x83), w(0x2c), w(0x1a), w(0x1b), w(0x6e), w(0x5a), w(0xa0),\
-    w(0x52), w(0x3b), w(0xd6), w(0xb3), w(0x29), w(0xe3), w(0x2f), w(0x84),\
-    w(0x53), w(0xd1), w(0x00), w(0xed), w(0x20), w(0xfc), w(0xb1), w(0x5b),\
-    w(0x6a), w(0xcb), w(0xbe), w(0x39), w(0x4a), w(0x4c), w(0x58), w(0xcf),\
-    w(0xd0), w(0xef), w(0xaa), w(0xfb), w(0x43), w(0x4d), w(0x33), w(0x85),\
-    w(0x45), w(0xf9), w(0x02), w(0x7f), w(0x50), w(0x3c), w(0x9f), w(0xa8),\
-    w(0x51), w(0xa3), w(0x40), w(0x8f), w(0x92), w(0x9d), w(0x38), w(0xf5),\
-    w(0xbc), w(0xb6), w(0xda), w(0x21), w(0x10), w(0xff), w(0xf3), w(0xd2),\
-    w(0xcd), w(0x0c), w(0x13), w(0xec), w(0x5f), w(0x97), w(0x44), w(0x17),\
-    w(0xc4), w(0xa7), w(0x7e), w(0x3d), w(0x64), w(0x5d), w(0x19), w(0x73),\
-    w(0x60), w(0x81), w(0x4f), w(0xdc), w(0x22), w(0x2a), w(0x90), w(0x88),\
-    w(0x46), w(0xee), w(0xb8), w(0x14), w(0xde), w(0x5e), w(0x0b), w(0xdb),\
-    w(0xe0), w(0x32), w(0x3a), w(0x0a), w(0x49), w(0x06), w(0x24), w(0x5c),\
-    w(0xc2), w(0xd3), w(0xac), w(0x62), w(0x91), w(0x95), w(0xe4), w(0x79),\
-    w(0xe7), w(0xc8), w(0x37), w(0x6d), w(0x8d), w(0xd5), w(0x4e), w(0xa9),\
-    w(0x6c), w(0x56), w(0xf4), w(0xea), w(0x65), w(0x7a), w(0xae), w(0x08),\
-    w(0xba), w(0x78), w(0x25), w(0x2e), w(0x1c), w(0xa6), w(0xb4), w(0xc6),\
-    w(0xe8), w(0xdd), w(0x74), w(0x1f), w(0x4b), w(0xbd), w(0x8b), w(0x8a),\
-    w(0x70), w(0x3e), w(0xb5), w(0x66), w(0x48), w(0x03), w(0xf6), w(0x0e),\
-    w(0x61), w(0x35), w(0x57), w(0xb9), w(0x86), w(0xc1), w(0x1d), w(0x9e),\
-    w(0xe1), w(0xf8), w(0x98), w(0x11), w(0x69), w(0xd9), w(0x8e), w(0x94),\
-    w(0x9b), w(0x1e), w(0x87), w(0xe9), w(0xce), w(0x55), w(0x28), w(0xdf),\
-    w(0x8c), w(0xa1), w(0x89), w(0x0d), w(0xbf), w(0xe6), w(0x42), w(0x68),\
-    w(0x41), w(0x99), w(0x2d), w(0x0f), w(0xb0), w(0x54), w(0xbb), w(0x16) }
+		w(0x63), w(0x7c), w(0x77), w(0x7b), w(0xf2), w(0x6b), w(0x6f), w(0xc5),\
+		w(0x30), w(0x01), w(0x67), w(0x2b), w(0xfe), w(0xd7), w(0xab), w(0x76),\
+		w(0xca), w(0x82), w(0xc9), w(0x7d), w(0xfa), w(0x59), w(0x47), w(0xf0),\
+		w(0xad), w(0xd4), w(0xa2), w(0xaf), w(0x9c), w(0xa4), w(0x72), w(0xc0),\
+		w(0xb7), w(0xfd), w(0x93), w(0x26), w(0x36), w(0x3f), w(0xf7), w(0xcc),\
+		w(0x34), w(0xa5), w(0xe5), w(0xf1), w(0x71), w(0xd8), w(0x31), w(0x15),\
+		w(0x04), w(0xc7), w(0x23), w(0xc3), w(0x18), w(0x96), w(0x05), w(0x9a),\
+		w(0x07), w(0x12), w(0x80), w(0xe2), w(0xeb), w(0x27), w(0xb2), w(0x75),\
+		w(0x09), w(0x83), w(0x2c), w(0x1a), w(0x1b), w(0x6e), w(0x5a), w(0xa0),\
+		w(0x52), w(0x3b), w(0xd6), w(0xb3), w(0x29), w(0xe3), w(0x2f), w(0x84),\
+		w(0x53), w(0xd1), w(0x00), w(0xed), w(0x20), w(0xfc), w(0xb1), w(0x5b),\
+		w(0x6a), w(0xcb), w(0xbe), w(0x39), w(0x4a), w(0x4c), w(0x58), w(0xcf),\
+		w(0xd0), w(0xef), w(0xaa), w(0xfb), w(0x43), w(0x4d), w(0x33), w(0x85),\
+		w(0x45), w(0xf9), w(0x02), w(0x7f), w(0x50), w(0x3c), w(0x9f), w(0xa8),\
+		w(0x51), w(0xa3), w(0x40), w(0x8f), w(0x92), w(0x9d), w(0x38), w(0xf5),\
+		w(0xbc), w(0xb6), w(0xda), w(0x21), w(0x10), w(0xff), w(0xf3), w(0xd2),\
+		w(0xcd), w(0x0c), w(0x13), w(0xec), w(0x5f), w(0x97), w(0x44), w(0x17),\
+		w(0xc4), w(0xa7), w(0x7e), w(0x3d), w(0x64), w(0x5d), w(0x19), w(0x73),\
+		w(0x60), w(0x81), w(0x4f), w(0xdc), w(0x22), w(0x2a), w(0x90), w(0x88),\
+		w(0x46), w(0xee), w(0xb8), w(0x14), w(0xde), w(0x5e), w(0x0b), w(0xdb),\
+		w(0xe0), w(0x32), w(0x3a), w(0x0a), w(0x49), w(0x06), w(0x24), w(0x5c),\
+		w(0xc2), w(0xd3), w(0xac), w(0x62), w(0x91), w(0x95), w(0xe4), w(0x79),\
+		w(0xe7), w(0xc8), w(0x37), w(0x6d), w(0x8d), w(0xd5), w(0x4e), w(0xa9),\
+		w(0x6c), w(0x56), w(0xf4), w(0xea), w(0x65), w(0x7a), w(0xae), w(0x08),\
+		w(0xba), w(0x78), w(0x25), w(0x2e), w(0x1c), w(0xa6), w(0xb4), w(0xc6),\
+		w(0xe8), w(0xdd), w(0x74), w(0x1f), w(0x4b), w(0xbd), w(0x8b), w(0x8a),\
+		w(0x70), w(0x3e), w(0xb5), w(0x66), w(0x48), w(0x03), w(0xf6), w(0x0e),\
+		w(0x61), w(0x35), w(0x57), w(0xb9), w(0x86), w(0xc1), w(0x1d), w(0x9e),\
+		w(0xe1), w(0xf8), w(0x98), w(0x11), w(0x69), w(0xd9), w(0x8e), w(0x94),\
+		w(0x9b), w(0x1e), w(0x87), w(0xe9), w(0xce), w(0x55), w(0x28), w(0xdf),\
+		w(0x8c), w(0xa1), w(0x89), w(0x0d), w(0xbf), w(0xe6), w(0x42), w(0x68),\
+		w(0x41), w(0x99), w(0x2d), w(0x0f), w(0xb0), w(0x54), w(0xbb), w(0x16) }

 #define SAES_WPOLY           0x011b

 #define saes_b2w(b0, b1, b2, b3) (((uint32_t)(b3) << 24) | \
-    ((uint32_t)(b2) << 16) | ((uint32_t)(b1) << 8) | (b0))
+                                  ((uint32_t)(b2) << 16) | ((uint32_t)(b1) << 8) | (b0))

 #define saes_f2(x)   ((x<<1) ^ (((x>>7) & 1) * SAES_WPOLY))
 #define saes_f3(x)   (saes_f2(x) ^ x)
@ -86,31 +86,35 @@
 #define saes_u2(p)   saes_b2w(         p, saes_f3(p), saes_f2(p),          p)
 #define saes_u3(p)   saes_b2w(         p,          p, saes_f3(p), saes_f2(p))

-alignas(16) const uint32_t saes_table[4][256] = { saes_data(saes_u0), saes_data(saes_u1), saes_data(saes_u2), saes_data(saes_u3) };
-alignas(16) const uint8_t  saes_sbox[256] = saes_data(saes_h0);
+const uint32_t saes_table[4][256] = { saes_data(saes_u0), saes_data(saes_u1), saes_data(saes_u2), saes_data(saes_u3) };
+const uint8_t  saes_sbox[256] = saes_data(saes_h0);

 static inline __m128i soft_aesenc(__m128i in, __m128i key)
 {
-    const uint32_t x0 = _mm_cvtsi128_si32(in);
-    const uint32_t x1 = _mm_cvtsi128_si32(_mm_shuffle_epi32(in, 0x55));
-    const uint32_t x2 = _mm_cvtsi128_si32(_mm_shuffle_epi32(in, 0xAA));
-    const uint32_t x3 = _mm_cvtsi128_si32(_mm_shuffle_epi32(in, 0xFF));
+	const uint32_t x0 = _mm_cvtsi128_si32(in);
+	const uint32_t x1 = _mm_cvtsi128_si32(_mm_shuffle_epi32(in, 0x55));
+	const uint32_t x2 = _mm_cvtsi128_si32(_mm_shuffle_epi32(in, 0xAA));
+	const uint32_t x3 = _mm_cvtsi128_si32(_mm_shuffle_epi32(in, 0xFF));

 	__m128i out = _mm_set_epi32(
-		(saes_table[0][x3 & 0xff] ^ saes_table[1][(x0 >> 8) & 0xff] ^ saes_table[2][(x1 >> 16) & 0xff] ^ saes_table[3][x2 >> 24]),
-		(saes_table[0][x2 & 0xff] ^ saes_table[1][(x3 >> 8) & 0xff] ^ saes_table[2][(x0 >> 16) & 0xff] ^ saes_table[3][x1 >> 24]),
-		(saes_table[0][x1 & 0xff] ^ saes_table[1][(x2 >> 8) & 0xff] ^ saes_table[2][(x3 >> 16) & 0xff] ^ saes_table[3][x0 >> 24]),
-		(saes_table[0][x0 & 0xff] ^ saes_table[1][(x1 >> 8) & 0xff] ^ saes_table[2][(x2 >> 16) & 0xff] ^ saes_table[3][x3 >> 24]));
+	                  (saes_table[0][x3 & 0xff] ^ saes_table[1][(x0 >> 8) & 0xff] ^ saes_table[2][(x1 >> 16) & 0xff] ^
+	                   saes_table[3][x2 >> 24]),
+	                  (saes_table[0][x2 & 0xff] ^ saes_table[1][(x3 >> 8) & 0xff] ^ saes_table[2][(x0 >> 16) & 0xff] ^
+	                   saes_table[3][x1 >> 24]),
+	                  (saes_table[0][x1 & 0xff] ^ saes_table[1][(x2 >> 8) & 0xff] ^ saes_table[2][(x3 >> 16) & 0xff] ^
+	                   saes_table[3][x0 >> 24]),
+	                  (saes_table[0][x0 & 0xff] ^ saes_table[1][(x1 >> 8) & 0xff] ^ saes_table[2][(x2 >> 16) & 0xff] ^
+	                   saes_table[3][x3 >> 24]));

 	return _mm_xor_si128(out, key);
 }

 static inline uint32_t sub_word(uint32_t key)
 {
-	return (saes_sbox[key >> 24 ] << 24)   | 
-		(saes_sbox[(key >> 16) & 0xff] << 16 ) | 
-		(saes_sbox[(key >> 8)  & 0xff] << 8  ) | 
-		 saes_sbox[key & 0xff];
+	return (saes_sbox[key >> 24 ] << 24)   |
+	       (saes_sbox[(key >> 16) & 0xff] << 16) |
+	       (saes_sbox[(key >> 8)  & 0xff] << 8) |
+	       saes_sbox[key & 0xff];
 }

 #if defined(__clang__) || defined(XMRIG_ARM)
@ -123,7 +127,7 @@ static inline uint32_t _rotr(uint32_t value, uint32_t amount)
 template<uint8_t rcon>
 static inline __m128i soft_aeskeygenassist(__m128i key)
 {
-    const uint32_t X1 = sub_word(_mm_cvtsi128_si32(_mm_shuffle_epi32(key, 0x55)));
-    const uint32_t X3 = sub_word(_mm_cvtsi128_si32(_mm_shuffle_epi32(key, 0xFF)));
-    return _mm_set_epi32(_rotr(X3, 8) ^ rcon, X3, _rotr(X1, 8) ^ rcon, X1);
+	const uint32_t X1 = sub_word(_mm_cvtsi128_si32(_mm_shuffle_epi32(key, 0x55)));
+	const uint32_t X3 = sub_word(_mm_cvtsi128_si32(_mm_shuffle_epi32(key, 0xFF)));
+	return _mm_set_epi32(_rotr(X3, 8) ^ rcon, X3, _rotr(X1, 8) ^ rcon, X1);
 }