Manual merge

This commit is contained in:
BenDroid 2018-03-02 21:29:53 +01:00
parent e9edd366fe
commit 4f6200f057
4 changed files with 75 additions and 54 deletions

View file

@ -147,13 +147,6 @@ int App::start()
Summary::print(); Summary::print();
if (m_options->dryRun()) {
LOG_NOTICE("OK");
release();
return 0;
}
# ifndef XMRIG_NO_API # ifndef XMRIG_NO_API
Api::start(); Api::start();
# endif # endif

View file

@ -136,7 +136,6 @@ static struct option const options[] = {
{ "cpu-affinity", 1, nullptr, 1020 }, { "cpu-affinity", 1, nullptr, 1020 },
{ "cpu-priority", 1, nullptr, 1021 }, { "cpu-priority", 1, nullptr, 1021 },
{ "donate-level", 1, nullptr, 1003 }, { "donate-level", 1, nullptr, 1003 },
{ "dry-run", 0, nullptr, 5000 },
{ "help", 0, nullptr, 'h' }, { "help", 0, nullptr, 'h' },
{ "keepalive", 0, nullptr ,'k' }, { "keepalive", 0, nullptr ,'k' },
{ "log-file", 1, nullptr, 'l' }, { "log-file", 1, nullptr, 'l' },
@ -185,7 +184,6 @@ static struct option const config_options[] = {
{ "cpu-affinity", 1, nullptr, 1020 }, { "cpu-affinity", 1, nullptr, 1020 },
{ "cpu-priority", 1, nullptr, 1021 }, { "cpu-priority", 1, nullptr, 1021 },
{ "donate-level", 1, nullptr, 1003 }, { "donate-level", 1, nullptr, 1003 },
{ "dry-run", 0, nullptr, 5000 },
{ "huge-pages", 0, nullptr, 1009 }, { "huge-pages", 0, nullptr, 1009 },
{ "log-file", 1, nullptr, 'l' }, { "log-file", 1, nullptr, 'l' },
{ "max-cpu-usage", 1, nullptr, 1004 }, { "max-cpu-usage", 1, nullptr, 1004 },
@ -510,8 +508,6 @@ bool Options::parseArg(int key, const char *arg)
case 'S': /* --syslog */ case 'S': /* --syslog */
case 1005: /* --safe */ case 1005: /* --safe */
case 1006: /* --nicehash */ case 1006: /* --nicehash */
case 5000: /* --dry-run */
return parseBoolean(key, true);
case 1002: /* --no-color */ case 1002: /* --no-color */
case 1009: /* --no-huge-pages */ case 1009: /* --no-huge-pages */
@ -725,10 +721,6 @@ bool Options::parseBoolean(int key, bool enable)
m_colors = enable; m_colors = enable;
break; break;
case 5000: /* --dry-run */
m_dryRun = enable;
break;
default: default:
break; break;
} }

View file

@ -392,11 +392,11 @@ public:
for (size_t i = 0; i < ITERATIONS; i++) { for (size_t i = 0; i < ITERATIONS; i++) {
for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) { for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
__m128i cx; __m128i cx;
cx = _mm_load_si128((__m128i*) &l[hashBlock][idx[hashBlock] & MASK]);
if (SOFT_AES) { if (SOFT_AES) {
cx = soft_aesenc(cx, _mm_set_epi64x(ah[hashBlock], al[hashBlock])); cx = soft_aesenc(cx, _mm_set_epi64x(ah[hashBlock], al[hashBlock]));
} else { } else {
cx = _mm_load_si128((__m128i*) &l[hashBlock][idx[hashBlock] & MASK]);
cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah[hashBlock], al[hashBlock])); cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah[hashBlock], al[hashBlock]));
} }
@ -459,18 +459,18 @@ public:
bx = _mm_set_epi64x(h[3] ^ h[7], h[2] ^ h[6]); bx = _mm_set_epi64x(h[3] ^ h[7], h[2] ^ h[6]);
idx = h[0] ^ h[4]; idx = h[0] ^ h[4];
for (size_t i = 0; i < ITERATIONS; i++) { for (size_t i = 0; i < ITERATIONS; i++) {
__m128i cx; __m128i cx;
if (SOFT_AES) { if (SOFT_AES) {
cx = soft_aesenc((uint32_t*)&l0[idx0 & MASK], _mm_set_epi64x(ah0, al0)); cx = soft_aesenc((uint32_t*)&l0[idx0 & MASK], _mm_set_epi64x(ah0, al0));
}
else {
cx = _mm_load_si128((__m128i *) &l0[idx0 & MASK]);
# ifndef XMRIG_ARMv7
cx = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah0, al0);
# endif
} }
else {
cx = _mm_load_si128((__m128i *) &l0[idx0 & MASK]);
# ifndef XMRIG_ARMv7
cx = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah0, al0);
# endif
}
_mm_store_si128((__m128i*) &l[idx & MASK], _mm_xor_si128(bx, cx)); _mm_store_si128((__m128i*) &l[idx & MASK], _mm_xor_si128(bx, cx));
idx = EXTRACT64(cx); idx = EXTRACT64(cx);
@ -530,13 +530,16 @@ public:
uint64_t idx1 = h1[0] ^h1[4]; uint64_t idx1 = h1[0] ^h1[4];
for (size_t i = 0; i < ITERATIONS; i++) { for (size_t i = 0; i < ITERATIONS; i++) {
__m128i cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]); __m128i cx0;
__m128i cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]); __m128i cx1;
if (SOFT_AES) { if (SOFT_AES) {
cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0)); cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0));
cx1 = soft_aesenc(cx1, _mm_set_epi64x(ah1, al1)); cx1 = soft_aesenc(cx1, _mm_set_epi64x(ah1, al1));
} else { } else {
cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]);
cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]);
# ifndef XMRIG_ARMv7 # ifndef XMRIG_ARMv7
cx0 = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx0, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah0, al0); cx0 = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx0, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah0, al0);
cx1 = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx1, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah1, al1); cx1 = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx1, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah1, al1);
@ -633,18 +636,23 @@ public:
uint64_t idx2 = h2[0] ^h2[4]; uint64_t idx2 = h2[0] ^h2[4];
for (size_t i = 0; i < ITERATIONS; i++) { for (size_t i = 0; i < ITERATIONS; i++) {
__m128i cx0, cx1; __m128i cx0;
__m128i cx1;
__m128i cx2;
if (SOFT_AES) { if (SOFT_AES) {
cx0 = soft_aesenc((uint32_t*)&l0[idx0 & MASK], _mm_set_epi64x(ah0, al0)); cx0 = soft_aesenc((uint32_t*)&l0[idx0 & MASK], _mm_set_epi64x(ah0, al0));
cx1 = soft_aesenc((uint32_t*)&l1[idx1 & MASK], _mm_set_epi64x(ah1, al1)); cx1 = soft_aesenc((uint32_t*)&l1[idx1 & MASK], _mm_set_epi64x(ah1, al1));
cx2 = soft_aesenc((uint32_t*)&l2[idx2 & MASK], _mm_set_epi64x(ah2, al2));
} }
else { else {
cx0 = _mm_load_si128((__m128i *) &l0[idx0 & MASK]); cx0 = _mm_load_si128((__m128i *) &l0[idx0 & MASK]);
cx1 = _mm_load_si128((__m128i *) &l1[idx1 & MASK]); cx1 = _mm_load_si128((__m128i *) &l1[idx1 & MASK]);
cx2 = _mm_load_si128((__m128i *) &l2[idx2 & MASK]);
# ifndef XMRIG_ARMv7 # ifndef XMRIG_ARMv7
cx0 = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx0, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah0, al0); cx0 = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx0, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah0, al0);
cx1 = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx1, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah1, al1); cx1 = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx1, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah1, al1);
cx2 = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx2, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah2, al2);
# endif # endif
} }
@ -769,10 +777,10 @@ public:
uint64_t idx3 = h3[0] ^h3[4]; uint64_t idx3 = h3[0] ^h3[4];
for (size_t i = 0; i < ITERATIONS; i++) { for (size_t i = 0; i < ITERATIONS; i++) {
__m128i cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]); __m128i cx0;
__m128i cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]); __m128i cx1;
__m128i cx2 = _mm_load_si128((__m128i*) &l2[idx2 & MASK]); __m128i cx2;
__m128i cx3 = _mm_load_si128((__m128i*) &l3[idx3 & MASK]); __m128i cx3;
if (SOFT_AES) { if (SOFT_AES) {
cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0)); cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0));
@ -781,6 +789,11 @@ public:
cx3 = soft_aesenc(cx3, _mm_set_epi64x(ah3, al3)); cx3 = soft_aesenc(cx3, _mm_set_epi64x(ah3, al3));
} else { } else {
# ifndef XMRIG_ARMv7 # ifndef XMRIG_ARMv7
cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]);
cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]);
cx2 = _mm_load_si128((__m128i*) &l2[idx2 & MASK]);
cx3 = _mm_load_si128((__m128i*) &l3[idx3 & MASK]);
cx0 = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx0, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah0, al0); cx0 = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx0, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah0, al0);
cx1 = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx1, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah1, al1); cx1 = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx1, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah1, al1);
cx2 = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx2, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah2, al2); cx2 = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx2, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah2, al2);
@ -938,11 +951,11 @@ public:
uint64_t idx4 = h4[0] ^h4[4]; uint64_t idx4 = h4[0] ^h4[4];
for (size_t i = 0; i < ITERATIONS; i++) { for (size_t i = 0; i < ITERATIONS; i++) {
__m128i cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]); __m128i cx0;
__m128i cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]); __m128i cx1;
__m128i cx2 = _mm_load_si128((__m128i*) &l2[idx2 & MASK]); __m128i cx2;
__m128i cx3 = _mm_load_si128((__m128i*) &l3[idx3 & MASK]); __m128i cx3;
__m128i cx4 = _mm_load_si128((__m128i*) &l4[idx4 & MASK]); __m128i cx4;
if (SOFT_AES) { if (SOFT_AES) {
cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0)); cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0));
@ -952,6 +965,12 @@ public:
cx4 = soft_aesenc(cx4, _mm_set_epi64x(ah4, al4)); cx4 = soft_aesenc(cx4, _mm_set_epi64x(ah4, al4));
} else { } else {
# ifndef XMRIG_ARMv7 # ifndef XMRIG_ARMv7
cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]);
cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]);
cx2 = _mm_load_si128((__m128i*) &l2[idx2 & MASK]);
cx3 = _mm_load_si128((__m128i*) &l3[idx3 & MASK]);
cx4 = _mm_load_si128((__m128i*) &l4[idx4 & MASK]);
cx0 = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx0, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah0, al0); cx0 = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx0, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah0, al0);
cx1 = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx1, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah1, al1); cx1 = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx1, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah1, al1);
cx2 = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx2, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah2, al2); cx2 = vreinterpretq_m128i_u8(vaesmcq_u8(vaeseq_u8(cx2, vdupq_n_u8(0)))) ^ _mm_set_epi64x(ah2, al2);

View file

@ -354,11 +354,11 @@ public:
for (size_t i = 0; i < ITERATIONS; i++) { for (size_t i = 0; i < ITERATIONS; i++) {
for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) { for (size_t hashBlock = 0; hashBlock < NUM_HASH_BLOCKS; ++hashBlock) {
__m128i cx; __m128i cx;
cx = _mm_load_si128((__m128i*) &l[hashBlock][idx[hashBlock] & MASK]);
if (SOFT_AES) { if (SOFT_AES) {
cx = soft_aesenc(cx, _mm_set_epi64x(ah[hashBlock], al[hashBlock])); cx = soft_aesenc(cx, _mm_set_epi64x(ah[hashBlock], al[hashBlock]));
} else { } else {
cx = _mm_load_si128((__m128i*) &l[hashBlock][idx[hashBlock] & MASK]);
cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah[hashBlock], al[hashBlock])); cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah[hashBlock], al[hashBlock]));
} }
@ -422,11 +422,12 @@ public:
idx = h[0] ^ h[4]; idx = h[0] ^ h[4];
for (size_t i = 0; i < ITERATIONS; i++) { for (size_t i = 0; i < ITERATIONS; i++) {
__m128i cx = _mm_load_si128((__m128i*) &l[idx & MASK]); __m128i cx;
if (SOFT_AES) { if (SOFT_AES) {
cx = soft_aesenc(cx, _mm_set_epi64x(ah, al)); cx = soft_aesenc(cx, _mm_set_epi64x(ah, al));
} else { } else {
cx = _mm_load_si128((__m128i*) &l[hashBlock][idx[hashBlock] & MASK]);
cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah, al)); cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah, al));
} }
@ -488,13 +489,15 @@ public:
uint64_t idx1 = h1[0] ^h1[4]; uint64_t idx1 = h1[0] ^h1[4];
for (size_t i = 0; i < ITERATIONS; i++) { for (size_t i = 0; i < ITERATIONS; i++) {
__m128i cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]); __m128i cx0;
__m128i cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]); __m128i cx1;
if (SOFT_AES) { if (SOFT_AES) {
cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0)); cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0));
cx1 = soft_aesenc(cx1, _mm_set_epi64x(ah1, al1)); cx1 = soft_aesenc(cx1, _mm_set_epi64x(ah1, al1));
} else { } else {
cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]);
cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]);
cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0)); cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0));
cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1)); cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1));
} }
@ -589,15 +592,18 @@ public:
uint64_t idx2 = h2[0] ^h2[4]; uint64_t idx2 = h2[0] ^h2[4];
for (size_t i = 0; i < ITERATIONS; i++) { for (size_t i = 0; i < ITERATIONS; i++) {
__m128i cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]); __m128i cx0;
__m128i cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]); __m128i cx1;
__m128i cx2 = _mm_load_si128((__m128i*) &l2[idx2 & MASK]); __m128i cx2;
if (SOFT_AES) { if (SOFT_AES) {
cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0)); cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0));
cx1 = soft_aesenc(cx1, _mm_set_epi64x(ah1, al1)); cx1 = soft_aesenc(cx1, _mm_set_epi64x(ah1, al1));
cx2 = soft_aesenc(cx2, _mm_set_epi64x(ah2, al2)); cx2 = soft_aesenc(cx2, _mm_set_epi64x(ah2, al2));
} else { } else {
cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]);
cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]);
cx2 = _mm_load_si128((__m128i*) &l2[idx2 & MASK]);
cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0)); cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0));
cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1)); cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1));
cx2 = _mm_aesenc_si128(cx2, _mm_set_epi64x(ah2, al2)); cx2 = _mm_aesenc_si128(cx2, _mm_set_epi64x(ah2, al2));
@ -724,10 +730,10 @@ public:
uint64_t idx3 = h3[0] ^h3[4]; uint64_t idx3 = h3[0] ^h3[4];
for (size_t i = 0; i < ITERATIONS; i++) { for (size_t i = 0; i < ITERATIONS; i++) {
__m128i cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]); __m128i cx0;
__m128i cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]); __m128i cx1;
__m128i cx2 = _mm_load_si128((__m128i*) &l2[idx2 & MASK]); __m128i cx2;
__m128i cx3 = _mm_load_si128((__m128i*) &l3[idx3 & MASK]); __m128i cx3;
if (SOFT_AES) { if (SOFT_AES) {
cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0)); cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0));
@ -735,6 +741,11 @@ public:
cx2 = soft_aesenc(cx2, _mm_set_epi64x(ah2, al2)); cx2 = soft_aesenc(cx2, _mm_set_epi64x(ah2, al2));
cx3 = soft_aesenc(cx3, _mm_set_epi64x(ah3, al3)); cx3 = soft_aesenc(cx3, _mm_set_epi64x(ah3, al3));
} else { } else {
cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]);
cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]);
cx2 = _mm_load_si128((__m128i*) &l2[idx2 & MASK]);
cx3 = _mm_load_si128((__m128i*) &l3[idx3 & MASK]);
cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0)); cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0));
cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1)); cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1));
cx2 = _mm_aesenc_si128(cx2, _mm_set_epi64x(ah2, al2)); cx2 = _mm_aesenc_si128(cx2, _mm_set_epi64x(ah2, al2));
@ -891,11 +902,11 @@ public:
uint64_t idx4 = h4[0] ^h4[4]; uint64_t idx4 = h4[0] ^h4[4];
for (size_t i = 0; i < ITERATIONS; i++) { for (size_t i = 0; i < ITERATIONS; i++) {
__m128i cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]); __m128i cx0;
__m128i cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]); __m128i cx1;
__m128i cx2 = _mm_load_si128((__m128i*) &l2[idx2 & MASK]); __m128i cx2;
__m128i cx3 = _mm_load_si128((__m128i*) &l3[idx3 & MASK]); __m128i cx3;
__m128i cx4 = _mm_load_si128((__m128i*) &l4[idx4 & MASK]); __m128i cx4;
if (SOFT_AES) { if (SOFT_AES) {
cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0)); cx0 = soft_aesenc(cx0, _mm_set_epi64x(ah0, al0));
@ -904,6 +915,12 @@ public:
cx3 = soft_aesenc(cx3, _mm_set_epi64x(ah3, al3)); cx3 = soft_aesenc(cx3, _mm_set_epi64x(ah3, al3));
cx4 = soft_aesenc(cx4, _mm_set_epi64x(ah4, al4)); cx4 = soft_aesenc(cx4, _mm_set_epi64x(ah4, al4));
} else { } else {
cx0 = _mm_load_si128((__m128i*) &l0[idx0 & MASK]);
cx1 = _mm_load_si128((__m128i*) &l1[idx1 & MASK]);
cx2 = _mm_load_si128((__m128i*) &l2[idx2 & MASK]);
cx3 = _mm_load_si128((__m128i*) &l3[idx3 & MASK]);
cx4 = _mm_load_si128((__m128i*) &l4[idx4 & MASK]);
cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0)); cx0 = _mm_aesenc_si128(cx0, _mm_set_epi64x(ah0, al0));
cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1)); cx1 = _mm_aesenc_si128(cx1, _mm_set_epi64x(ah1, al1));
cx2 = _mm_aesenc_si128(cx2, _mm_set_epi64x(ah2, al2)); cx2 = _mm_aesenc_si128(cx2, _mm_set_epi64x(ah2, al2));