Integrated CN-Conceal (CCX) algo (#259)

* Integrated embedded config parsing #245

* cleanup

* Cleanup in remotelog

* Fixed MS Visual Studio 2019 compatibility

* Embedded config parsing only for miner not server

* wip

* Finished delete template

* WIP

* Integrated Argon2id/chukwa algo

* Added chukwa-wrkz algo variant

* Integrated cn-conceal/ccx for x86 arch
This commit is contained in:
Ben Gräf 2019-07-24 14:26:23 -07:00 committed by GitHub
parent dea9b975f8
commit 161856b2b1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 301 additions and 18 deletions

44
config.json Normal file
View file

@ -0,0 +1,44 @@
{
"algo": "cryptonight", // cryptonight (default), cryptonight-lite, cryptonight-ultralite, cryptonight-extremelite or cryptonight-heavy
"aesni": 0, // selection of AES-NI mode (0 auto, 1 on, 2 off)
"threads": 0, // number of miner threads (not set or 0 enables automatic selection of optimal thread count)
"multihash-factor": 0, // number of hash blocks to process at a time (not set or 0 enables automatic selection of optimal number of hash blocks)
"multihash-thread-mask" : null, // for multihash-factors>0 only, limits multihash to given threads (mask), mask "0x3" means run multihash on thread 0 and 1 only (default: all threads)
"pow-variant" : "auto", // specificy the PoW variat to use: -> auto (default), '0', '1', '2', 'ipbc', 'xao', 'xtl', 'rto', 'xfh', 'upx', 'turtle', 'hosp', 'r', 'wow', 'double (xcash)', 'zls' (zelerius), 'rwz' (graft), 'upx2'
// for further help see: https://github.com/Bendr0id/xmrigCC/wiki/Coin-configurations
"asm-optimization" : "auto", // specificy the ASM optimization to use: -> auto (default), intel, ryzen, bulldozer, off
"background": false, // true to run the miner in the background (Windows only, for *nix plase use screen/tmux or systemd service instead)
"colors": true, // false to disable colored output
"cpu-affinity": null, // set process affinity to CPU core(s), mask "0x3" for cores 0 and 1
"cpu-priority": null, // set process priority (0 idle, 2 normal to 5 highest)
"donate-level": 5, // donate level, mininum 1%
"log-file": null, // log all output to a file, example: "c:/some/path/xmrig.log"
"max-cpu-usage": 100, // maximum CPU usage for automatic mode, usually limiting factor is CPU cache not this option.
"print-time": 60, // print hashrate report every N seconds
"retries": 5, // number of times to retry before switch to backup server
"retry-pause": 5, // time to pause between retries
"safe": false, // true to safe adjust threads and av settings for current CPU
"syslog": false, // use system log for output messages
"reboot-cmd" : "", // command to execute to reboot the OS
"force-pow-variant" : false, // force pow variant, dont parse pow/variant from pool job
"skip-self-check" : false, // skip the self check on startup
"pools": [
{
"url": "donate2.graef.in:80", // URL of mining server
"user": "YOUR_WALLET_ID", // username for mining server
"pass": "x", // password for mining server
"use-tls" : false, // enable tls for pool communication (need pool support)
"keepalive": true, // send keepalived for prevent timeout (need pool support)
"nicehash": false // enable nicehash/xmrig-proxy support
}
],
"cc-client": {
"url": "localhost:3344", // url of the CC Server (ip:port)
"use-tls" : false, // enable tls for CC communication (needs to be enabled on CC Server too)
"access-token": "mySecret", // access token for CC Server (has to be the same in config_cc.json)
"worker-id": null, // custom worker-id for CC Server (otherwise hostname is used)
"update-interval-s": 10, // status update interval in seconds (default: 10 min: 1)
"use-remote-logging" : true, // enable remote logging on CC Server
"upload-config-on-startup" : true // upload current miner config to CC Server on startup
}
}

View file

@ -348,6 +348,7 @@ constexpr static const char *pow_variant_names[] = {
"zls", "zls",
"graft", "graft",
"upx2", "upx2",
"conceal",
"chukwa", "chukwa",
"wrkz" "wrkz"
}; };
@ -1305,7 +1306,17 @@ bool Options::parsePowVariant(const char *powVariant)
break; break;
} }
if (i == ARRAY_SIZE(pow_variant_names) - 1 && (!strcmp(powVariant, "chukwa"))) { if (i == ARRAY_SIZE(pow_variant_names) - 1 && (!strcmp(powVariant, "conceal") || !strcmp(powVariant, "ccx"))) {
m_powVariant = POW_CONCEAL;
break;
}
if (i == ARRAY_SIZE(pow_variant_names) - 1 && (!strcmp(powVariant, "trtl-chukwa") || !strcmp(powVariant, "trtl_chukwa") || !strcmp(powVariant, "chuckwa"))) {
m_powVariant = POW_ARGON2_CHUKWA;
break;
}
if (i == ARRAY_SIZE(pow_variant_names) - 1 && (!strcmp(powVariant, "chukwa-wrkz") || !strcmp(powVariant, "chukwa_wrkz") || !strcmp(powVariant, "trtl-wrkz"))) {
m_powVariant = POW_ARGON2_CHUKWA; m_powVariant = POW_ARGON2_CHUKWA;
break; break;
} }

View file

@ -45,6 +45,7 @@ enum PowVariant
POW_ZELERIUS, POW_ZELERIUS,
POW_RWZ, POW_RWZ,
POW_UPX2, POW_UPX2,
POW_CONCEAL,
POW_ARGON2_CHUKWA, POW_ARGON2_CHUKWA,
POW_ARGON2_WRKZ, POW_ARGON2_WRKZ,
LAST_ITEM LAST_ITEM
@ -94,6 +95,8 @@ inline std::string getPowVariantName(PowVariant powVariant)
return "rwz"; return "rwz";
case POW_UPX2: case POW_UPX2:
return "upx2"; return "upx2";
case POW_CONCEAL:
return "conceal";
case POW_ARGON2_CHUKWA: case POW_ARGON2_CHUKWA:
return "chukwa"; return "chukwa";
case POW_ARGON2_WRKZ: case POW_ARGON2_WRKZ:
@ -185,6 +188,8 @@ inline PowVariant parseVariant(const std::string variant)
powVariant = PowVariant::POW_RWZ; powVariant = PowVariant::POW_RWZ;
} else if (variant == "upx2") { } else if (variant == "upx2") {
powVariant = PowVariant::POW_UPX2; powVariant = PowVariant::POW_UPX2;
} else if (variant == "conceal" || variant == "ccx") {
powVariant = PowVariant::POW_CONCEAL;
} else if (variant == "chukwa" || variant == "trtl-chukwa" || variant == "argon2-chukwa") { } else if (variant == "chukwa" || variant == "trtl-chukwa" || variant == "argon2-chukwa") {
powVariant = PowVariant::POW_ARGON2_CHUKWA; powVariant = PowVariant::POW_ARGON2_CHUKWA;
} else if (variant == "chukwa_wrkz" || variant == "wrkz" || variant == "argon2-wrkz") { } else if (variant == "chukwa_wrkz" || variant == "wrkz" || variant == "argon2-wrkz") {

View file

@ -69,6 +69,21 @@ const static uint8_t test_output_v0[160] = {
}; };
// CN CONCEAL
const static uint8_t test_output_conceal[160] = {
0xB3, 0xA1, 0x67, 0x86, 0xD2, 0xC9, 0x85, 0xEC, 0xAD, 0xC4, 0x5F, 0x91, 0x05, 0x27, 0xC7, 0xA1,
0x96, 0xF0, 0xE1, 0xE9, 0x7C, 0x87, 0x09, 0x38, 0x1D, 0x7D, 0x41, 0x93, 0x35, 0xF8, 0x16, 0x72,
0xC3, 0xBD, 0x8D, 0xE8, 0xD5, 0xAE, 0xB8, 0x59, 0x0A, 0x6C, 0xCB, 0x7B, 0x41, 0x30, 0xF7, 0x04,
0xA5, 0x7C, 0xF9, 0xCA, 0x20, 0x49, 0x9C, 0xFD, 0xE8, 0x43, 0xCF, 0x66, 0x78, 0xEA, 0x76, 0xDD,
0x91, 0x0C, 0xDE, 0x29, 0x2A, 0xE0, 0xA8, 0xCA, 0xBC, 0xAA, 0x53, 0x4C, 0x93, 0x3E, 0x7B, 0x2C,
0xF1, 0xF9, 0xE1, 0x98, 0xB2, 0x92, 0x1E, 0x19, 0x93, 0x2A, 0x74, 0x9D, 0xDB, 0x10, 0x0F, 0x16,
0xD5, 0x3D, 0xE4, 0xC4, 0x23, 0xD9, 0x2E, 0xFD, 0x79, 0x8D, 0x1E, 0x48, 0x4E, 0x46, 0x08, 0x6C,
0xFF, 0x8A, 0x49, 0xFA, 0x1E, 0xB0, 0xB6, 0x9A, 0x47, 0x1C, 0xC6, 0x30, 0x36, 0x5D, 0xFD, 0x76,
0x10, 0x07, 0x44, 0xE6, 0xC8, 0x20, 0x2A, 0x84, 0x9D, 0x70, 0x22, 0x00, 0x8B, 0x9B, 0xBD, 0x8D,
0x27, 0x49, 0xA6, 0x06, 0xDC, 0xF0, 0xA1, 0x4B, 0x50, 0xA0, 0x12, 0xCD, 0x77, 0x01, 0x4C, 0x28
};
// CN v7 // CN v7
const static uint8_t test_output_v1[160] = { const static uint8_t test_output_v1[160] = {
0xF2, 0x2D, 0x3D, 0x62, 0x03, 0xD2, 0xA0, 0x8B, 0x41, 0xD9, 0x02, 0x72, 0x78, 0xD8, 0xBC, 0xC9, 0xF2, 0x2D, 0x3D, 0x62, 0x03, 0xD2, 0xA0, 0x8B, 0x41, 0xD9, 0x02, 0x72, 0x78, 0xD8, 0xBC, 0xC9,

View file

@ -191,11 +191,14 @@ static inline uint64_t __umul128(uint64_t multiplier, uint64_t multiplicand, uin
#ifdef _MSC_VER #ifdef _MSC_VER
# define SET_ROUNDING_MODE_UP() _control87(RC_UP, MCW_RC); # define SET_ROUNDING_MODE_UP() _control87(RC_UP, MCW_RC);
# define SET_ROUNDING_MODE_DOWN() _control87(RC_DOWN, MCW_RC); # define SET_ROUNDING_MODE_DOWN() _control87(RC_DOWN, MCW_RC);
# define SET_ROUNDING_MODE_NEAREST() _control87(RC_NEAR, MCW_RC);;
#else #else
# define SET_ROUNDING_MODE_UP() std::fesetround(FE_UPWARD); # define SET_ROUNDING_MODE_UP() std::fesetround(FE_UPWARD);
# define SET_ROUNDING_MODE_DOWN() fesetround(FE_DOWNWARD); # define SET_ROUNDING_MODE_DOWN() fesetround(FE_DOWNWARD);
# define SET_ROUNDING_MODE_NEAREST() fesetround(FE_TONEAREST);
#endif #endif
# define SHUFFLE_PHASE_1(l, idx, bx0, bx1, ax, reverse) \ # define SHUFFLE_PHASE_1(l, idx, bx0, bx1, ax, reverse) \
{ \ { \
const __m128i chunk1 = _mm_load_si128((__m128i *)((l) + ((idx) ^ (reverse ? 0x30 : 0x10)))); \ const __m128i chunk1 = _mm_load_si128((__m128i *)((l) + ((idx) ^ (reverse ? 0x30 : 0x10)))); \
@ -663,7 +666,27 @@ int_sqrt_v2_fixup(r, n0);
return r; return r;
} }
// n-Loop version. Seems to be little bit slower then the hardcoded one. inline __m128 _mm_set1_ps_epi32(uint32_t x)
{
return _mm_castsi128_ps(_mm_set1_epi32(x));
}
inline void cryptonight_conceal_tweak(__m128i& cx, __m128& conc_var)
{
__m128 r = _mm_cvtepi32_ps(cx);
__m128 c_old = conc_var;
r = _mm_add_ps(r, conc_var);
r = _mm_mul_ps(r, _mm_mul_ps(r, r));
r = _mm_and_ps(_mm_set1_ps_epi32(0x807FFFFF), r);
r = _mm_or_ps(_mm_set1_ps_epi32(0x40000000), r);
conc_var = _mm_add_ps(conc_var, r);
c_old = _mm_and_ps(_mm_set1_ps_epi32(0x807FFFFF), c_old);
c_old = _mm_or_ps(_mm_set1_ps_epi32(0x40000000), c_old);
__m128 nc = _mm_mul_ps(c_old, _mm_set1_ps(536870880.0f));
cx = _mm_xor_si128(cx, _mm_cvttps_epi32(nc));
}
template<size_t ITERATIONS, size_t INDEX_SHIFT, size_t MEM, size_t MASK, bool SOFT_AES, PowVariant VARIANT, size_t NUM_HASH_BLOCKS> template<size_t ITERATIONS, size_t INDEX_SHIFT, size_t MEM, size_t MASK, bool SOFT_AES, PowVariant VARIANT, size_t NUM_HASH_BLOCKS>
class CryptoNightMultiHash class CryptoNightMultiHash
{ {
@ -768,8 +791,14 @@ public:
uint64_t* h; uint64_t* h;
uint64_t al; uint64_t al;
uint64_t ah; uint64_t ah;
__m128i bx;
uint64_t idx; uint64_t idx;
__m128i bx;
__m128 conc_var;
if (VARIANT == POW_CONCEAL) {
SET_ROUNDING_MODE_NEAREST()
conc_var = _mm_setzero_ps();
}
keccak(static_cast<const uint8_t*>(input), (int) size, scratchPad[0]->state, 200); keccak(static_cast<const uint8_t*>(input), (int) size, scratchPad[0]->state, 200);
@ -787,9 +816,19 @@ public:
__m128i cx; __m128i cx;
if (SOFT_AES) { if (SOFT_AES) {
if (VARIANT == POW_CONCEAL) {
cx = _mm_load_si128((__m128i*) &l[idx & MASK]);
cryptonight_conceal_tweak(cx, conc_var);
cx = soft_aesenc(cx, _mm_set_epi64x(ah, al));
} else {
cx = soft_aesenc((uint32_t*)&l[idx & MASK], _mm_set_epi64x(ah, al)); cx = soft_aesenc((uint32_t*)&l[idx & MASK], _mm_set_epi64x(ah, al));
}
} else { } else {
cx = _mm_load_si128((__m128i*) &l[idx & MASK]); cx = _mm_load_si128((__m128i*) &l[idx & MASK]);
if (VARIANT == POW_CONCEAL)
cryptonight_conceal_tweak(cx, conc_var);
cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah, al)); cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah, al));
} }
@ -1552,6 +1591,15 @@ public:
__m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]); __m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
__m128 conc_var0;
__m128 conc_var1;
if (VARIANT == POW_CONCEAL) {
SET_ROUNDING_MODE_NEAREST()
conc_var0 = _mm_setzero_ps();
conc_var1 = _mm_setzero_ps();
}
uint64_t idx0 = h0[0] ^h0[4]; uint64_t idx0 = h0[0] ^h0[4];
uint64_t idx1 = h1[0] ^h1[4]; uint64_t idx1 = h1[0] ^h1[4];
@ -1560,12 +1608,28 @@ public:
__m128i cx1; __m128i cx1;
if (SOFT_AES) { if (SOFT_AES) {
cx0 = soft_aesenc((uint32_t*)&l0[idx0 & MASK], _mm_set_epi64x(ah0, al0)); if (VARIANT == POW_CONCEAL) {
cx1 = soft_aesenc((uint32_t*)&l1[idx1 & MASK], _mm_set_epi64x(ah1, al1)); cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]);
cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]);
cryptonight_conceal_tweak(cx0, conc_var0);
cryptonight_conceal_tweak(cx1, conc_var1);
cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0));
cx1 = soft_aesenc(cx1, _mm_set_epi64x(ah1, al1));
} else {
cx0 = soft_aesenc((uint32_t *) &l0[idx0 & MASK], _mm_set_epi64x(ah0, al0));
cx1 = soft_aesenc((uint32_t *) &l1[idx1 & MASK], _mm_set_epi64x(ah1, al1));
}
} else { } else {
cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]); cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]);
cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]); cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]);
if (VARIANT == POW_CONCEAL) {
cryptonight_conceal_tweak(cx0, conc_var0);
cryptonight_conceal_tweak(cx1, conc_var1);
}
cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0)); cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0));
cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1)); cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1));
} }
@ -2572,6 +2636,17 @@ public:
__m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]); __m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
__m128i bx2 = _mm_set_epi64x(h2[3] ^ h2[7], h2[2] ^ h2[6]); __m128i bx2 = _mm_set_epi64x(h2[3] ^ h2[7], h2[2] ^ h2[6]);
__m128 conc_var0;
__m128 conc_var1;
__m128 conc_var2;
if (VARIANT == POW_CONCEAL) {
SET_ROUNDING_MODE_NEAREST()
conc_var0 = _mm_setzero_ps();
conc_var1 = _mm_setzero_ps();
conc_var2 = _mm_setzero_ps();
}
uint64_t idx0 = h0[0] ^h0[4]; uint64_t idx0 = h0[0] ^h0[4];
uint64_t idx1 = h1[0] ^h1[4]; uint64_t idx1 = h1[0] ^h1[4];
uint64_t idx2 = h2[0] ^h2[4]; uint64_t idx2 = h2[0] ^h2[4];
@ -2582,14 +2657,34 @@ public:
__m128i cx2; __m128i cx2;
if (SOFT_AES) { if (SOFT_AES) {
cx0 = soft_aesenc((uint32_t*)&l0[idx0 & MASK], _mm_set_epi64x(ah0, al0)); if (VARIANT == POW_CONCEAL) {
cx1 = soft_aesenc((uint32_t*)&l1[idx1 & MASK], _mm_set_epi64x(ah1, al1)); cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]);
cx2 = soft_aesenc((uint32_t*)&l2[idx2 & MASK], _mm_set_epi64x(ah2, al2)); cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]);
cx2 = _mm_load_si128((__m128i*) &l2[idx2 & MASK]);
cryptonight_conceal_tweak(cx0, conc_var0);
cryptonight_conceal_tweak(cx1, conc_var1);
cryptonight_conceal_tweak(cx2, conc_var2);
cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0));
cx1 = soft_aesenc(cx1, _mm_set_epi64x(ah1, al1));
cx2 = soft_aesenc(cx2, _mm_set_epi64x(ah2, al2));
} else {
cx0 = soft_aesenc((uint32_t *) &l0[idx0 & MASK], _mm_set_epi64x(ah0, al0));
cx1 = soft_aesenc((uint32_t *) &l1[idx1 & MASK], _mm_set_epi64x(ah1, al1));
cx2 = soft_aesenc((uint32_t *) &l2[idx2 & MASK], _mm_set_epi64x(ah2, al2));
}
} else { } else {
cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]); cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]);
cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]); cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]);
cx2 = _mm_load_si128((__m128i*) &l2[idx2 & MASK]); cx2 = _mm_load_si128((__m128i*) &l2[idx2 & MASK]);
if (VARIANT == POW_CONCEAL) {
cryptonight_conceal_tweak(cx0, conc_var0);
cryptonight_conceal_tweak(cx1, conc_var1);
cryptonight_conceal_tweak(cx2, conc_var2);
}
cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0)); cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0));
cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1)); cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1));
cx2 = _mm_aesenc_si128(cx2, _mm_set_epi64x(ah2, al2)); cx2 = _mm_aesenc_si128(cx2, _mm_set_epi64x(ah2, al2));
@ -3771,6 +3866,19 @@ public:
__m128i bx2 = _mm_set_epi64x(h2[3] ^ h2[7], h2[2] ^ h2[6]); __m128i bx2 = _mm_set_epi64x(h2[3] ^ h2[7], h2[2] ^ h2[6]);
__m128i bx3 = _mm_set_epi64x(h3[3] ^ h3[7], h3[2] ^ h3[6]); __m128i bx3 = _mm_set_epi64x(h3[3] ^ h3[7], h3[2] ^ h3[6]);
__m128 conc_var0;
__m128 conc_var1;
__m128 conc_var2;
__m128 conc_var3;
if (VARIANT == POW_CONCEAL) {
SET_ROUNDING_MODE_NEAREST()
conc_var0 = _mm_setzero_ps();
conc_var1 = _mm_setzero_ps();
conc_var2 = _mm_setzero_ps();
conc_var3 = _mm_setzero_ps();
}
uint64_t idx0 = h0[0] ^h0[4]; uint64_t idx0 = h0[0] ^h0[4];
uint64_t idx1 = h1[0] ^h1[4]; uint64_t idx1 = h1[0] ^h1[4];
uint64_t idx2 = h2[0] ^h2[4]; uint64_t idx2 = h2[0] ^h2[4];
@ -3783,16 +3891,40 @@ public:
__m128i cx3; __m128i cx3;
if (SOFT_AES) { if (SOFT_AES) {
if (VARIANT == POW_CONCEAL) {
cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]);
cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]);
cx2 = _mm_load_si128((__m128i*) &l2[idx2 & MASK]);
cx3 = _mm_load_si128((__m128i*) &l3[idx3 & MASK]);
cryptonight_conceal_tweak(cx0, conc_var0);
cryptonight_conceal_tweak(cx1, conc_var1);
cryptonight_conceal_tweak(cx2, conc_var2);
cryptonight_conceal_tweak(cx3, conc_var3);
cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0));
cx1 = soft_aesenc(cx1, _mm_set_epi64x(ah1, al1));
cx2 = soft_aesenc(cx2, _mm_set_epi64x(ah2, al2));
cx3 = soft_aesenc(cx3, _mm_set_epi64x(ah3, al3));
} else {
cx0 = soft_aesenc((uint32_t*)&l0[idx0 & MASK], _mm_set_epi64x(ah0, al0)); cx0 = soft_aesenc((uint32_t*)&l0[idx0 & MASK], _mm_set_epi64x(ah0, al0));
cx1 = soft_aesenc((uint32_t*)&l1[idx1 & MASK], _mm_set_epi64x(ah1, al1)); cx1 = soft_aesenc((uint32_t*)&l1[idx1 & MASK], _mm_set_epi64x(ah1, al1));
cx2 = soft_aesenc((uint32_t*)&l2[idx2 & MASK], _mm_set_epi64x(ah2, al2)); cx2 = soft_aesenc((uint32_t*)&l2[idx2 & MASK], _mm_set_epi64x(ah2, al2));
cx3 = soft_aesenc((uint32_t*)&l3[idx3 & MASK], _mm_set_epi64x(ah3, al3)); cx3 = soft_aesenc((uint32_t*)&l3[idx3 & MASK], _mm_set_epi64x(ah3, al3));
}
} else { } else {
cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]); cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]);
cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]); cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]);
cx2 = _mm_load_si128((__m128i*) &l2[idx2 & MASK]); cx2 = _mm_load_si128((__m128i*) &l2[idx2 & MASK]);
cx3 = _mm_load_si128((__m128i*) &l3[idx3 & MASK]); cx3 = _mm_load_si128((__m128i*) &l3[idx3 & MASK]);
if (VARIANT == POW_CONCEAL) {
cryptonight_conceal_tweak(cx0, conc_var0);
cryptonight_conceal_tweak(cx1, conc_var1);
cryptonight_conceal_tweak(cx2, conc_var2);
cryptonight_conceal_tweak(cx3, conc_var3);
}
cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0)); cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0));
cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1)); cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1));
cx2 = _mm_aesenc_si128(cx2, _mm_set_epi64x(ah2, al2)); cx2 = _mm_aesenc_si128(cx2, _mm_set_epi64x(ah2, al2));
@ -4821,6 +4953,21 @@ public:
__m128i bx3 = _mm_set_epi64x(h3[3] ^ h3[7], h3[2] ^ h3[6]); __m128i bx3 = _mm_set_epi64x(h3[3] ^ h3[7], h3[2] ^ h3[6]);
__m128i bx4 = _mm_set_epi64x(h4[3] ^ h4[7], h4[2] ^ h4[6]); __m128i bx4 = _mm_set_epi64x(h4[3] ^ h4[7], h4[2] ^ h4[6]);
__m128 conc_var0;
__m128 conc_var1;
__m128 conc_var2;
__m128 conc_var3;
__m128 conc_var4;
if (VARIANT == POW_CONCEAL) {
SET_ROUNDING_MODE_NEAREST()
conc_var0 = _mm_setzero_ps();
conc_var1 = _mm_setzero_ps();
conc_var2 = _mm_setzero_ps();
conc_var3 = _mm_setzero_ps();
conc_var4 = _mm_setzero_ps();
}
uint64_t idx0 = h0[0] ^h0[4]; uint64_t idx0 = h0[0] ^h0[4];
uint64_t idx1 = h1[0] ^h1[4]; uint64_t idx1 = h1[0] ^h1[4];
uint64_t idx2 = h2[0] ^h2[4]; uint64_t idx2 = h2[0] ^h2[4];
@ -4835,11 +4982,31 @@ public:
__m128i cx4; __m128i cx4;
if (SOFT_AES) { if (SOFT_AES) {
if (VARIANT == POW_CONCEAL) {
cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]);
cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]);
cx2 = _mm_load_si128((__m128i*) &l2[idx2 & MASK]);
cx3 = _mm_load_si128((__m128i*) &l3[idx3 & MASK]);
cx4 = _mm_load_si128((__m128i*) &l4[idx4 & MASK]);
cryptonight_conceal_tweak(cx0, conc_var0);
cryptonight_conceal_tweak(cx1, conc_var1);
cryptonight_conceal_tweak(cx2, conc_var2);
cryptonight_conceal_tweak(cx3, conc_var3);
cryptonight_conceal_tweak(cx4, conc_var4);
cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0));
cx1 = soft_aesenc(cx1, _mm_set_epi64x(ah1, al1));
cx2 = soft_aesenc(cx2, _mm_set_epi64x(ah2, al2));
cx3 = soft_aesenc(cx3, _mm_set_epi64x(ah3, al3));
cx4 = soft_aesenc(cx4, _mm_set_epi64x(ah4, al4));
} else {
cx0 = soft_aesenc((uint32_t*)&l0[idx0 & MASK], _mm_set_epi64x(ah0, al0)); cx0 = soft_aesenc((uint32_t*)&l0[idx0 & MASK], _mm_set_epi64x(ah0, al0));
cx1 = soft_aesenc((uint32_t*)&l1[idx1 & MASK], _mm_set_epi64x(ah1, al1)); cx1 = soft_aesenc((uint32_t*)&l1[idx1 & MASK], _mm_set_epi64x(ah1, al1));
cx2 = soft_aesenc((uint32_t*)&l2[idx2 & MASK], _mm_set_epi64x(ah2, al2)); cx2 = soft_aesenc((uint32_t*)&l2[idx2 & MASK], _mm_set_epi64x(ah2, al2));
cx3 = soft_aesenc((uint32_t*)&l3[idx3 & MASK], _mm_set_epi64x(ah3, al3)); cx3 = soft_aesenc((uint32_t*)&l3[idx3 & MASK], _mm_set_epi64x(ah3, al3));
cx4 = soft_aesenc((uint32_t*)&l4[idx4 & MASK], _mm_set_epi64x(ah4, al4)); cx4 = soft_aesenc((uint32_t*)&l4[idx4 & MASK], _mm_set_epi64x(ah4, al4));
}
} else { } else {
cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]); cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]);
cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]); cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]);
@ -4847,6 +5014,14 @@ public:
cx3 = _mm_load_si128((__m128i*) &l3[idx3 & MASK]); cx3 = _mm_load_si128((__m128i*) &l3[idx3 & MASK]);
cx4 = _mm_load_si128((__m128i*) &l4[idx4 & MASK]); cx4 = _mm_load_si128((__m128i*) &l4[idx4 & MASK]);
if (VARIANT == POW_CONCEAL) {
cryptonight_conceal_tweak(cx0, conc_var0);
cryptonight_conceal_tweak(cx1, conc_var1);
cryptonight_conceal_tweak(cx2, conc_var2);
cryptonight_conceal_tweak(cx3, conc_var3);
cryptonight_conceal_tweak(cx4, conc_var4);
}
cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0)); cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0));
cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1)); cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1));
cx2 = _mm_aesenc_si128(cx2, _mm_set_epi64x(ah2, al2)); cx2 = _mm_aesenc_si128(cx2, _mm_set_epi64x(ah2, al2));

View file

@ -173,6 +173,8 @@ static void cryptonight_aesni(AsmOptimization asmOptimization, uint64_t height,
#endif #endif
} else if (variant == PowVariant::POW_XFH) { } else if (variant == PowVariant::POW_XFH) {
CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_XFH, NUM_HASH_BLOCKS>::hashHeavy(input, size, output, scratchPad); CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_XFH, NUM_HASH_BLOCKS>::hashHeavy(input, size, output, scratchPad);
} else if (variant == PowVariant::POW_CONCEAL) {
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_CONCEAL, NUM_HASH_BLOCKS>::hash(input, size, output, scratchPad);
} else { } else {
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_V0, NUM_HASH_BLOCKS>::hash(input, size, output, scratchPad); CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, POW_V0, NUM_HASH_BLOCKS>::hash(input, size, output, scratchPad);
} }
@ -297,6 +299,8 @@ static void cryptonight_softaes(AsmOptimization asmOptimization, uint64_t height
#endif #endif
} else if (variant == PowVariant::POW_XFH) { } else if (variant == PowVariant::POW_XFH) {
CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_XFH, NUM_HASH_BLOCKS>::hashHeavy(input, size, output, scratchPad); CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_XFH, NUM_HASH_BLOCKS>::hashHeavy(input, size, output, scratchPad);
} else if (variant == PowVariant::POW_CONCEAL) {
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_CONCEAL, NUM_HASH_BLOCKS>::hash(input, size, output, scratchPad);
} else { } else {
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_V0, NUM_HASH_BLOCKS>::hash(input, size, output, scratchPad); CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, POW_V0, NUM_HASH_BLOCKS>::hash(input, size, output, scratchPad);
} }
@ -844,6 +848,35 @@ bool HashSelector::selfCheck(Options::Algo algo)
result = result && memcmp(output, test_output_v2, 160) == 0; result = result && memcmp(output, test_output_v2, 160) == 0;
#endif #endif
// cn conceal
#if !defined(XMRIG_ARM)
hash_ctx[0](asmOptimization, 0, PowVariant::POW_CONCEAL, test_input, 76, output, scratchPads);
result = result && memcmp(output, test_output_conceal, 32) == 0;
#if MAX_NUM_HASH_BLOCKS > 1
hash_ctx[1](asmOptimization, 0, PowVariant::POW_CONCEAL, test_input, 76, output, scratchPads);
result = result && memcmp(output, test_output_conceal, 64) == 0;
#endif
#if MAX_NUM_HASH_BLOCKS > 2
hash_ctx[2](asmOptimization, 0, PowVariant::POW_CONCEAL, test_input, 76, output, scratchPads);
result = result && memcmp(output, test_output_conceal, 96) == 0;
#endif
#if MAX_NUM_HASH_BLOCKS > 3
hash_ctx[3](asmOptimization, 0, PowVariant::POW_CONCEAL, test_input, 76, output, scratchPads);
result = result && memcmp(output, test_output_conceal, 128) == 0;
#endif
#if MAX_NUM_HASH_BLOCKS > 4
hash_ctx[4](asmOptimization, 0, PowVariant::POW_CONCEAL, test_input, 76, output, scratchPads);
result = result && memcmp(output, test_output_conceal, 160) == 0;
#endif
#endif
// cn xfh aka cn-heavy-superfast // cn xfh aka cn-heavy-superfast
hash_ctx[0](asmOptimization, 0, PowVariant::POW_XFH, test_input, 76, output, scratchPads); hash_ctx[0](asmOptimization, 0, PowVariant::POW_XFH, test_input, 76, output, scratchPads);