Integrated new Algos (#224)
- Added XLT v5/9 with autodetect(algo: "cryptonight", variant: "xtl" (autodetect), "xtlv9" (force v9)) - Added cn-lite variant UPX/uPlexa (algo: "cryptonight-lite", variant "upx") - Added force-pow-variant parameter to force usage of the variant from the config and skip parsing of pow/variant from job/pool
This commit is contained in:
parent
d36797d696
commit
1273e45e46
30 changed files with 3372 additions and 52 deletions
|
@ -1,3 +1,7 @@
|
||||||
|
# 1.8.8
|
||||||
|
- Added XLT v5/9 with autodetect(algo: "cryptonight", variant: "xtl" (autodetect), "xtlv9" (force v9))
|
||||||
|
- Added cn-lite variant UPX/uPlexa (algo: "cryptonight-lite", variant "upx")
|
||||||
|
- Added force-pow-variant parameter to force usage of the variant from the config and skip parsing of pow/variant from job/pool
|
||||||
# 1.8.7
|
# 1.8.7
|
||||||
- Implemented Template based mass config editor to simple swap configs on your rigs
|
- Implemented Template based mass config editor to simple swap configs on your rigs
|
||||||
# 1.8.6
|
# 1.8.6
|
||||||
|
|
|
@ -73,7 +73,7 @@ Options:\n"
|
||||||
-k, --keepalive send keepalived for prevent timeout (need pool support)\n\
|
-k, --keepalive send keepalived for prevent timeout (need pool support)\n\
|
||||||
-r, --retries=N number of times to retry before switch to backup server (default: 5)\n\
|
-r, --retries=N number of times to retry before switch to backup server (default: 5)\n\
|
||||||
-R, --retry-pause=N time to pause between retries (default: 5)\n\
|
-R, --retry-pause=N time to pause between retries (default: 5)\n\
|
||||||
--pow-variant=V specificy the PoW variat to use: -> 'auto' (default), '0' (v0), '1' (v1, aka cnv7), '2' (v2, aka cnv8), 'ipbc' (tube), 'xao', 'xtl' (including autodetect for v5), 'rto', 'xfh'\n\
|
--pow-variant=V specificy the PoW variat to use: -> 'auto' (default), '0' (v0), '1' (v1, aka cnv7), '2' (v2, aka cnv8), 'ipbc' (tube), 'xao', 'xtl' (including autodetect for > v5), 'rto', 'xfh', 'upx'\n\
|
||||||
for further help see: https://github.com/Bendr0id/xmrigCC/wiki/Coin-configurations\n\
|
for further help see: https://github.com/Bendr0id/xmrigCC/wiki/Coin-configurations\n\
|
||||||
--asm-optimization=V specificy the ASM optimization to use: -> 'auto' (default), 'intel', 'ryzen', 'bulldozer', 'off' \n\
|
--asm-optimization=V specificy the ASM optimization to use: -> 'auto' (default), 'intel', 'ryzen', 'bulldozer', 'off' \n\
|
||||||
--multihash-factor=N number of hash blocks to process at a time (don't set or 0 enables automatic selection of optimal number of hash blocks)\n\
|
--multihash-factor=N number of hash blocks to process at a time (don't set or 0 enables automatic selection of optimal number of hash blocks)\n\
|
||||||
|
@ -90,7 +90,9 @@ Options:\n"
|
||||||
--print-time=N print hashrate report every N seconds\n\
|
--print-time=N print hashrate report every N seconds\n\
|
||||||
--api-port=N port for the miner API\n\
|
--api-port=N port for the miner API\n\
|
||||||
--api-access-token=T access token for API\n\
|
--api-access-token=T access token for API\n\
|
||||||
--api-worker-id=ID custom worker-id for API\n"
|
--api-worker-id=ID custom worker-id for API\n\
|
||||||
|
--reboot-cmd command/bat to execute to Reboot miner\n\
|
||||||
|
--force-pow-variant disable pow/variant parsing from pool\n"
|
||||||
# ifndef XMRIG_NO_CC
|
# ifndef XMRIG_NO_CC
|
||||||
"\
|
"\
|
||||||
--cc-url=URL url of the CC Server\n\
|
--cc-url=URL url of the CC Server\n\
|
||||||
|
@ -99,8 +101,7 @@ Options:\n"
|
||||||
--cc-worker-id=ID custom worker-id for CC Server\n\
|
--cc-worker-id=ID custom worker-id for CC Server\n\
|
||||||
--cc-update-interval-s=N status update interval in seconds (default: 10 min: 1)\n\
|
--cc-update-interval-s=N status update interval in seconds (default: 10 min: 1)\n\
|
||||||
--cc-use-remote-logging enable remote logging on CC Server\n\
|
--cc-use-remote-logging enable remote logging on CC Server\n\
|
||||||
--cc-upload-config-on-startup upload current miner config to CC Server on startup\n\
|
--cc-upload-config-on-startup upload current miner config to CC Server on startup\n"
|
||||||
--cc-reboot-cmd command/bat to execute to Reboot miner\n"
|
|
||||||
# endif
|
# endif
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
|
@ -175,8 +176,9 @@ static struct option const options[] = {
|
||||||
{ "userpass", 1, nullptr, 'O' },
|
{ "userpass", 1, nullptr, 'O' },
|
||||||
{ "version", 0, nullptr, 'V' },
|
{ "version", 0, nullptr, 'V' },
|
||||||
{ "use-tls", 0, nullptr, 1015 },
|
{ "use-tls", 0, nullptr, 1015 },
|
||||||
{ "force-pow-version", 1, nullptr, 1016 },
|
{ "force-pow-variant", 0, nullptr, 1016 },
|
||||||
{ "pow-variant" ,1, nullptr, 1017 },
|
{ "pow-variant", 1, nullptr, 1017 },
|
||||||
|
{ "variant", 1, nullptr, 1017 },
|
||||||
{ "api-port", 1, nullptr, 4000 },
|
{ "api-port", 1, nullptr, 4000 },
|
||||||
{ "api-access-token", 1, nullptr, 4001 },
|
{ "api-access-token", 1, nullptr, 4001 },
|
||||||
{ "api-worker-id", 1, nullptr, 4002 },
|
{ "api-worker-id", 1, nullptr, 4002 },
|
||||||
|
@ -232,8 +234,9 @@ static struct option const config_options[] = {
|
||||||
{ "syslog", 0, nullptr, 'S' },
|
{ "syslog", 0, nullptr, 'S' },
|
||||||
{ "threads", 1, nullptr, 't' },
|
{ "threads", 1, nullptr, 't' },
|
||||||
{ "user-agent", 1, nullptr, 1008 },
|
{ "user-agent", 1, nullptr, 1008 },
|
||||||
{ "force-pow-version", 1, nullptr, 1016 },
|
{ "force-pow-variant", 0, nullptr, 1016 },
|
||||||
{ "pow-variant", 1, nullptr, 1017 },
|
{ "pow-variant", 1, nullptr, 1017 },
|
||||||
|
{ "variant", 1, nullptr, 1017 },
|
||||||
{ "doublehash-thread-mask", 1, nullptr, 4013 },
|
{ "doublehash-thread-mask", 1, nullptr, 4013 },
|
||||||
{ "multihash-thread-mask", 1, nullptr, 4013 },
|
{ "multihash-thread-mask", 1, nullptr, 4013 },
|
||||||
{ "asm-optimization", 1, nullptr, 4020 },
|
{ "asm-optimization", 1, nullptr, 4020 },
|
||||||
|
@ -250,6 +253,8 @@ static struct option const pool_options[] = {
|
||||||
{ "keepalive", 0, nullptr ,'k' },
|
{ "keepalive", 0, nullptr ,'k' },
|
||||||
{ "nicehash", 0, nullptr, 1006 },
|
{ "nicehash", 0, nullptr, 1006 },
|
||||||
{ "use-tls", 0, nullptr, 1015 },
|
{ "use-tls", 0, nullptr, 1015 },
|
||||||
|
{ "pow-variant", 1, nullptr, 1017 },
|
||||||
|
{ "variant", 1, nullptr, 1017 },
|
||||||
{ nullptr, 0, nullptr, 0 }
|
{ nullptr, 0, nullptr, 0 }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -318,7 +323,9 @@ constexpr static const char *pow_variant_names[] = {
|
||||||
"msr",
|
"msr",
|
||||||
"xhv",
|
"xhv",
|
||||||
"rto",
|
"rto",
|
||||||
"xfh"
|
"xfh",
|
||||||
|
"xtlv9",
|
||||||
|
"upx"
|
||||||
};
|
};
|
||||||
|
|
||||||
constexpr static const char *asm_optimization_names[] = {
|
constexpr static const char *asm_optimization_names[] = {
|
||||||
|
@ -366,6 +373,7 @@ Options::Options(int argc, char **argv) :
|
||||||
m_ccPushOfflineMiners(false),
|
m_ccPushOfflineMiners(false),
|
||||||
m_ccPushPeriodicStatus(false),
|
m_ccPushPeriodicStatus(false),
|
||||||
m_ccPushZeroHashrateMiners(false),
|
m_ccPushZeroHashrateMiners(false),
|
||||||
|
m_forcePowVariant(false),
|
||||||
m_fileName(Platform::defaultConfigName()),
|
m_fileName(Platform::defaultConfigName()),
|
||||||
m_apiToken(nullptr),
|
m_apiToken(nullptr),
|
||||||
m_apiWorkerId(nullptr),
|
m_apiWorkerId(nullptr),
|
||||||
|
@ -606,7 +614,6 @@ bool Options::parseArg(int key, const char *arg)
|
||||||
case 1003: /* --donate-level */
|
case 1003: /* --donate-level */
|
||||||
case 1004: /* --max-cpu-usage */
|
case 1004: /* --max-cpu-usage */
|
||||||
case 1007: /* --print-time */
|
case 1007: /* --print-time */
|
||||||
case 1016: /* --force-pow-version */
|
|
||||||
case 1021: /* --cpu-priority */
|
case 1021: /* --cpu-priority */
|
||||||
case 4000: /* --api-port */
|
case 4000: /* --api-port */
|
||||||
case 4006: /* --cc-port */
|
case 4006: /* --cc-port */
|
||||||
|
@ -628,7 +635,10 @@ bool Options::parseArg(int key, const char *arg)
|
||||||
case 1015: /* --use-tls */
|
case 1015: /* --use-tls */
|
||||||
return parseBoolean(key, true);
|
return parseBoolean(key, true);
|
||||||
|
|
||||||
case 1017: /* --pow-variant */
|
case 1016: /* --force-pow-variant */
|
||||||
|
return parseBoolean(key, false);
|
||||||
|
|
||||||
|
case 1017: /* --pow-variant/--variant */
|
||||||
return parsePowVariant(arg);
|
return parsePowVariant(arg);
|
||||||
|
|
||||||
case 4016: /* --cc-use-tls */
|
case 4016: /* --cc-use-tls */
|
||||||
|
@ -803,16 +813,6 @@ bool Options::parseArg(int key, uint64_t arg)
|
||||||
m_printTime = (int) arg;
|
m_printTime = (int) arg;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 1016: /* --force-pow-version */
|
|
||||||
showDeprecateWarning("force-pow-version", "pow-variant");
|
|
||||||
if (arg != POW_AUTODETECT && arg != POW_V0 && arg != POW_V1) {
|
|
||||||
showUsage(1);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
m_powVariant = static_cast<PowVariant>(arg);
|
|
||||||
break;
|
|
||||||
|
|
||||||
case 1020: /* --cpu-affinity */
|
case 1020: /* --cpu-affinity */
|
||||||
if (arg) {
|
if (arg) {
|
||||||
m_affinity = arg;
|
m_affinity = arg;
|
||||||
|
@ -901,6 +901,10 @@ bool Options::parseBoolean(int key, bool enable)
|
||||||
m_pools.back()->setUseTls(enable);
|
m_pools.back()->setUseTls(enable);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case 1016: /* --force-pow-variant */
|
||||||
|
m_forcePowVariant = enable;
|
||||||
|
break;
|
||||||
|
|
||||||
case 2000: /* --colors */
|
case 2000: /* --colors */
|
||||||
m_colors = enable;
|
m_colors = enable;
|
||||||
break;
|
break;
|
||||||
|
@ -1151,11 +1155,21 @@ bool Options::parsePowVariant(const char *powVariant)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (i == ARRAY_SIZE(pow_variant_names) - 1 && (!strcmp(powVariant, "freehaven") || !strcmp(powVariant, "faven"))) {
|
if (i == ARRAY_SIZE(pow_variant_names) - 1 && (!strcmp(powVariant, "freehaven") || !strcmp(powVariant, "faven") || !strcmp(powVariant, "swap"))) {
|
||||||
m_powVariant = POW_XFH;
|
m_powVariant = POW_XFH;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (i == ARRAY_SIZE(pow_variant_names) - 1 && !strcmp(powVariant, "stellitev9")) {
|
||||||
|
m_powVariant = POW_XTL_V9;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i == ARRAY_SIZE(pow_variant_names) - 1 && !strcmp(powVariant, "uplexa")) {
|
||||||
|
m_powVariant = POW_UPX;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
if (i == ARRAY_SIZE(pow_variant_names) - 1) {
|
if (i == ARRAY_SIZE(pow_variant_names) - 1) {
|
||||||
showUsage(1);
|
showUsage(1);
|
||||||
return false;
|
return false;
|
||||||
|
|
|
@ -80,6 +80,7 @@ public:
|
||||||
inline bool ccPushZeroHashrateMiners() const { return m_ccPushZeroHashrateMiners; }
|
inline bool ccPushZeroHashrateMiners() const { return m_ccPushZeroHashrateMiners; }
|
||||||
inline bool ccUsePushover() const { return ccPushoverUser() && ccPushoverToken(); }
|
inline bool ccUsePushover() const { return ccPushoverUser() && ccPushoverToken(); }
|
||||||
inline bool ccUseTelegram() const { return ccTelegramBotToken() && ccTelegramChatId(); }
|
inline bool ccUseTelegram() const { return ccTelegramBotToken() && ccTelegramChatId(); }
|
||||||
|
inline bool forcePowVariant() const { return m_forcePowVariant; };
|
||||||
inline const char *fileName() const { return m_fileName; }
|
inline const char *fileName() const { return m_fileName; }
|
||||||
inline const char *apiToken() const { return m_apiToken; }
|
inline const char *apiToken() const { return m_apiToken; }
|
||||||
inline const char *apiWorkerId() const { return m_apiWorkerId; }
|
inline const char *apiWorkerId() const { return m_apiWorkerId; }
|
||||||
|
@ -165,6 +166,7 @@ private:
|
||||||
bool m_ccPushOfflineMiners;
|
bool m_ccPushOfflineMiners;
|
||||||
bool m_ccPushPeriodicStatus;
|
bool m_ccPushPeriodicStatus;
|
||||||
bool m_ccPushZeroHashrateMiners;
|
bool m_ccPushZeroHashrateMiners;
|
||||||
|
bool m_forcePowVariant;
|
||||||
const char* m_fileName;
|
const char* m_fileName;
|
||||||
char *m_apiToken;
|
char *m_apiToken;
|
||||||
char *m_apiWorkerId;
|
char *m_apiWorkerId;
|
||||||
|
|
|
@ -35,6 +35,8 @@ enum PowVariant
|
||||||
POW_XHV,
|
POW_XHV,
|
||||||
POW_RTO,
|
POW_RTO,
|
||||||
POW_XFH,
|
POW_XFH,
|
||||||
|
POW_XTL_V9,
|
||||||
|
POW_UPX,
|
||||||
LAST_ITEM
|
LAST_ITEM
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -62,6 +64,10 @@ inline std::string getPowVariantName(PowVariant powVariant)
|
||||||
return "rto";
|
return "rto";
|
||||||
case POW_XFH:
|
case POW_XFH:
|
||||||
return "xfh";
|
return "xfh";
|
||||||
|
case POW_XTL_V9:
|
||||||
|
return "xtlv9";
|
||||||
|
case POW_UPX:
|
||||||
|
return "upx";
|
||||||
case POW_AUTODETECT:
|
case POW_AUTODETECT:
|
||||||
default:
|
default:
|
||||||
return "-1";
|
return "-1";
|
||||||
|
@ -129,6 +135,10 @@ inline PowVariant parseVariant(const std::string variant)
|
||||||
powVariant = PowVariant::POW_RTO;
|
powVariant = PowVariant::POW_RTO;
|
||||||
} else if (variant == "xfh" || variant == "freehaven" || variant == "faven") {
|
} else if (variant == "xfh" || variant == "freehaven" || variant == "faven") {
|
||||||
powVariant = PowVariant::POW_XFH;
|
powVariant = PowVariant::POW_XFH;
|
||||||
|
} else if (variant == "xtlv9" || variant == "stellite_v9") {
|
||||||
|
powVariant = PowVariant::POW_XTL_V9;
|
||||||
|
} else if (variant == "upx" || variant == "uplexa" || variant == "cn-upx") {
|
||||||
|
powVariant = PowVariant::POW_XTL_V9;
|
||||||
}
|
}
|
||||||
|
|
||||||
return powVariant;
|
return powVariant;
|
||||||
|
|
|
@ -63,8 +63,8 @@ static void print_cpu()
|
||||||
Cpu::brand(),
|
Cpu::brand(),
|
||||||
Cpu::sockets(),
|
Cpu::sockets(),
|
||||||
Cpu::isX64() ? "\x1B[01;32m" : "\x1B[01;31m-",
|
Cpu::isX64() ? "\x1B[01;32m" : "\x1B[01;31m-",
|
||||||
Cpu::hasAES() ? "\x1B[01;32m" : "\x1B[01;31m-",
|
Cpu::hasAES() && Options::i()->aesni() ? "\x1B[01;32m" : "\x1B[01;31m-",
|
||||||
Options::i()->asmOptimization() != AsmOptimization::ASM_OFF ? "\x1B[01;32m" : "\x1B[01;31m",
|
Options::i()->asmOptimization() != AsmOptimization::ASM_OFF ? "\x1B[01;32m" : "\x1B[01;31m-",
|
||||||
getAsmOptimizationName(Options::i()->asmOptimization()).c_str());
|
getAsmOptimizationName(Options::i()->asmOptimization()).c_str());
|
||||||
# ifndef XMRIG_NO_LIBCPUID
|
# ifndef XMRIG_NO_LIBCPUID
|
||||||
Log::i()->text("\x1B[01;32m * \x1B[01;37mCPU L2/L3: %.1f MB/%.1f MB", Cpu::l2() / 1024.0, Cpu::l3() / 1024.0);
|
Log::i()->text("\x1B[01;32m * \x1B[01;37mCPU L2/L3: %.1f MB/%.1f MB", Cpu::l2() / 1024.0, Cpu::l3() / 1024.0);
|
||||||
|
@ -121,12 +121,10 @@ static void print_threads()
|
||||||
}
|
}
|
||||||
|
|
||||||
Log::i()->text(Options::i()->colors() ?
|
Log::i()->text(Options::i()->colors() ?
|
||||||
"\x1B[01;32m * \x1B[01;37mTHREADS: \x1B[01;36m%d\x1B[01;37m, %s, %saes=%d\x1B[01;37m, hf=%zu, %sdonate=%d%%\x1B[01;37m%s%s" :
|
"\x1B[01;32m * \x1B[01;37mTHREADS: \x1B[01;36m%d\x1B[01;37m, %s, hf=%zu, %sdonate=%d%%\x1B[01;37m%s%s" :
|
||||||
" * THREADS: %d, %s, %saes=%d, hf=%zu, %sdonate=%d%%%s%s",
|
" * THREADS: %d, %s, %saes=%d, hf=%zu, %sdonate=%d%%%s%s",
|
||||||
Options::i()->threads(),
|
Options::i()->threads(),
|
||||||
Options::i()->algoName(),
|
Options::i()->algoName(),
|
||||||
Options::i()->colors() && Options::i()->aesni() == 0 ? "\x1B[01;31m" : "",
|
|
||||||
Options::i()->aesni(),
|
|
||||||
Options::i()->hashFactor(),
|
Options::i()->hashFactor(),
|
||||||
Options::i()->colors() && Options::i()->donateLevel() == 0 ? "\x1B[01;31m" : "",
|
Options::i()->colors() && Options::i()->donateLevel() == 0 ? "\x1B[01;31m" : "",
|
||||||
Options::i()->donateLevel(),
|
Options::i()->donateLevel(),
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
"threads": 0, // number of miner threads (not set or 0 enables automatic selection of optimal thread count)
|
"threads": 0, // number of miner threads (not set or 0 enables automatic selection of optimal thread count)
|
||||||
"multihash-factor": 0, // number of hash blocks to process at a time (not set or 0 enables automatic selection of optimal number of hash blocks)
|
"multihash-factor": 0, // number of hash blocks to process at a time (not set or 0 enables automatic selection of optimal number of hash blocks)
|
||||||
"multihash-thread-mask" : null, // for multihash-factors>0 only, limits multihash to given threads (mask), mask "0x3" means run multihash on thread 0 and 1 only (default: all threads)
|
"multihash-thread-mask" : null, // for multihash-factors>0 only, limits multihash to given threads (mask), mask "0x3" means run multihash on thread 0 and 1 only (default: all threads)
|
||||||
"pow-variant" : "auto", // specificy the PoW variat to use: -> auto (default), 0 (v0), 1 (v1, aka monerov7, aeonv7), 2 (v2, aka monerov8), tube (ipbc), alloy (xao), xtl (including autodetect for v5), msr, xhv, rto, xfh
|
"pow-variant" : "auto", // specificy the PoW variat to use: -> auto (default), 0 (v0), 1 (v1, aka monerov7, aeonv7), 2 (v2, aka monerov8), tube (ipbc), alloy (xao), xtl (including autodetect for > v5), msr, xhv, rto, xfh, upx
|
||||||
// for further help see: https://github.com/Bendr0id/xmrigCC/wiki/Coin-configurations
|
// for further help see: https://github.com/Bendr0id/xmrigCC/wiki/Coin-configurations
|
||||||
"asm-optimization" : "auto", // specificy the ASM optimization to use: -> auto (default), intel, ryzen, bulldozer, off
|
"asm-optimization" : "auto", // specificy the ASM optimization to use: -> auto (default), intel, ryzen, bulldozer, off
|
||||||
"background": false, // true to run the miner in the background (Windows only, for *nix plase use screen/tmux or systemd service instead)
|
"background": false, // true to run the miner in the background (Windows only, for *nix plase use screen/tmux or systemd service instead)
|
||||||
|
@ -20,6 +20,7 @@
|
||||||
"safe": false, // true to safe adjust threads and av settings for current CPU
|
"safe": false, // true to safe adjust threads and av settings for current CPU
|
||||||
"syslog": false, // use system log for output messages
|
"syslog": false, // use system log for output messages
|
||||||
"reboot-cmd" : "", // command to execute to reboot the OS
|
"reboot-cmd" : "", // command to execute to reboot the OS
|
||||||
|
"force-pow-variant" : false, // force pow variant, dont parse pow/variant from pool job
|
||||||
"pools": [
|
"pools": [
|
||||||
{
|
{
|
||||||
"url": "donate2.graef.in:80", // URL of mining server
|
"url": "donate2.graef.in:80", // URL of mining server
|
||||||
|
|
|
@ -70,6 +70,18 @@ static void cryptonight_aesni(AsmOptimization asmOptimization, PowVariant powVer
|
||||||
CryptoNightMultiHash<0x80000, POW_XLT_V4_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
CryptoNightMultiHash<0x80000, POW_XLT_V4_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
} else if (powVersion == PowVariant::POW_XTL_V9) {
|
||||||
|
#if defined(XMRIG_ARM)
|
||||||
|
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
|
||||||
|
#else
|
||||||
|
if ((asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS <= 2) ||
|
||||||
|
(asmOptimization == AsmOptimization::ASM_RYZEN && NUM_HASH_BLOCKS == 1) ||
|
||||||
|
(asmOptimization == AsmOptimization::ASM_BULLDOZER && NUM_HASH_BLOCKS == 1)) {
|
||||||
|
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowFastV2_asm(input, size, output, scratchPad, asmOptimization);
|
||||||
|
} else {
|
||||||
|
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
} else if (powVersion == PowVariant::POW_MSR) {
|
} else if (powVersion == PowVariant::POW_MSR) {
|
||||||
#if defined(XMRIG_ARM)
|
#if defined(XMRIG_ARM)
|
||||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||||
|
@ -111,6 +123,16 @@ static void cryptonight_softaes(AsmOptimization asmOptimization, PowVariant powV
|
||||||
} else {
|
} else {
|
||||||
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
|
CryptoNightMultiHash<0x80000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
} else if (powVersion == PowVariant::POW_XTL_V9) {
|
||||||
|
#if defined(XMRIG_ARM)
|
||||||
|
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, false, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
|
||||||
|
#else
|
||||||
|
if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
|
||||||
|
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowFastV2_asm(input, size, output, scratchPad, asmOptimization);
|
||||||
|
} else {
|
||||||
|
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hashPowV3(input, size, output, scratchPad);
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
} else if (powVersion == PowVariant::POW_ALLOY) {
|
} else if (powVersion == PowVariant::POW_ALLOY) {
|
||||||
CryptoNightMultiHash<0x100000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hash(input, size, output, scratchPad);
|
CryptoNightMultiHash<0x100000, POW_DEFAULT_INDEX_SHIFT, MEMORY, 0x1FFFF0, true, NUM_HASH_BLOCKS>::hash(input, size, output, scratchPad);
|
||||||
|
@ -158,6 +180,16 @@ static void cryptonight_lite_aesni(AsmOptimization asmOptimization, PowVariant p
|
||||||
#endif
|
#endif
|
||||||
} else if (powVersion == PowVariant::POW_TUBE) {
|
} else if (powVersion == PowVariant::POW_TUBE) {
|
||||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, NUM_HASH_BLOCKS>::hashLiteTube(input, size, output, scratchPad);
|
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, NUM_HASH_BLOCKS>::hashLiteTube(input, size, output, scratchPad);
|
||||||
|
} else if (powVersion == PowVariant::POW_UPX) {
|
||||||
|
#if defined(XMRIG_ARM)
|
||||||
|
CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||||
|
#else
|
||||||
|
if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
|
||||||
|
CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization);
|
||||||
|
} else {
|
||||||
|
CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
} else {
|
} else {
|
||||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, NUM_HASH_BLOCKS>::hash(input, size, output, scratchPad);
|
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, false, NUM_HASH_BLOCKS>::hash(input, size, output, scratchPad);
|
||||||
}
|
}
|
||||||
|
@ -178,6 +210,16 @@ static void cryptonight_lite_softaes(AsmOptimization asmOptimization, PowVariant
|
||||||
#endif
|
#endif
|
||||||
} else if (powVersion == PowVariant::POW_TUBE) {
|
} else if (powVersion == PowVariant::POW_TUBE) {
|
||||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, NUM_HASH_BLOCKS>::hashLiteTube(input, size, output, scratchPad);
|
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, NUM_HASH_BLOCKS>::hashLiteTube(input, size, output, scratchPad);
|
||||||
|
} else if (powVersion == PowVariant::POW_UPX) {
|
||||||
|
#if defined(XMRIG_ARM)
|
||||||
|
CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||||
|
#else
|
||||||
|
if (asmOptimization == AsmOptimization::ASM_INTEL && NUM_HASH_BLOCKS == 1) {
|
||||||
|
CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, NUM_HASH_BLOCKS>::hashPowV2_asm(input, size, output, scratchPad, asmOptimization);
|
||||||
|
} else {
|
||||||
|
CryptoNightMultiHash<0x20000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, NUM_HASH_BLOCKS>::hashPowV2(input, size, output, scratchPad);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
} else {
|
} else {
|
||||||
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, NUM_HASH_BLOCKS>::hash(input, size, output, scratchPad);
|
CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, NUM_HASH_BLOCKS>::hash(input, size, output, scratchPad);
|
||||||
}
|
}
|
||||||
|
@ -430,6 +472,10 @@ bool CryptoNight::selfTest(int algo)
|
||||||
resultLite = resultLite && memcmp(output, test_output_ipbc_lite, 160) == 0;
|
resultLite = resultLite && memcmp(output, test_output_ipbc_lite, 160) == 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// cn-lite upx
|
||||||
|
|
||||||
|
cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_UPX, test_input, 76, output, scratchPads);
|
||||||
|
resultLite = resultLite && memcmp(output, test_output_upx, 32) == 0;
|
||||||
} else {
|
} else {
|
||||||
// cn v0 aka orignal
|
// cn v0 aka orignal
|
||||||
|
|
||||||
|
@ -525,6 +571,11 @@ bool CryptoNight::selfTest(int algo)
|
||||||
|
|
||||||
cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_XFH, test_input, 76, output, scratchPads);
|
cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_XFH, test_input, 76, output, scratchPads);
|
||||||
result = result && memcmp(output, test_output_xfh, 32) == 0;
|
result = result && memcmp(output, test_output_xfh, 32) == 0;
|
||||||
|
|
||||||
|
// cnv8 + xtl aka cn-fast2
|
||||||
|
|
||||||
|
cryptonight_hash_ctx[0](asmOptimization, PowVariant::POW_XTL_V9, test_input, 76, output, scratchPads);
|
||||||
|
result = result && memcmp(output, test_output_xtl_v9, 32) == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (size_t i = 0; i < MAX_NUM_HASH_BLOCKS; ++i) {
|
for (size_t i = 0; i < MAX_NUM_HASH_BLOCKS; ++i) {
|
||||||
|
|
|
@ -122,6 +122,12 @@ const static uint8_t test_output_xfh[32] = {
|
||||||
0x54, 0x71, 0x58, 0xDB, 0x94, 0x69, 0x8E, 0x3C, 0xA0, 0x3D, 0xE4, 0x81, 0x9A, 0x65, 0x9F, 0xEF
|
0x54, 0x71, 0x58, 0xDB, 0x94, 0x69, 0x8E, 0x3C, 0xA0, 0x3D, 0xE4, 0x81, 0x9A, 0x65, 0x9F, 0xEF
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// CN XTL V9
|
||||||
|
const static uint8_t test_output_xtl_v9[32] = {
|
||||||
|
0x5D, 0x4F, 0xBC, 0x35, 0x60, 0x97, 0xEA, 0x64, 0x40, 0xB0, 0x88, 0x8E, 0xDE, 0xB6, 0x35, 0xDD,
|
||||||
|
0xC8, 0x4A, 0x0E, 0x39, 0x7C, 0x86, 0x84, 0x56, 0x89, 0x5C, 0x3F, 0x29, 0xBE, 0x73, 0x12, 0xA7
|
||||||
|
};
|
||||||
|
|
||||||
// CN-LITE
|
// CN-LITE
|
||||||
const static uint8_t test_output_v0_lite[160] = {
|
const static uint8_t test_output_v0_lite[160] = {
|
||||||
0x36, 0x95, 0xB4, 0xB5, 0x3B, 0xB0, 0x03, 0x58, 0xB0, 0xAD, 0x38, 0xDC, 0x16, 0x0F, 0xEB, 0x9E,
|
0x36, 0x95, 0xB4, 0xB5, 0x3B, 0xB0, 0x03, 0x58, 0xB0, 0xAD, 0x38, 0xDC, 0x16, 0x0F, 0xEB, 0x9E,
|
||||||
|
@ -151,7 +157,6 @@ const static uint8_t test_output_v1_lite[160] = {
|
||||||
0x87, 0xF7, 0x37, 0xDA, 0xFD, 0xBA, 0xBA, 0xD2, 0xF2, 0x68, 0xDC, 0x26, 0x8D, 0x1B, 0x08, 0xC6
|
0x87, 0xF7, 0x37, 0xDA, 0xFD, 0xBA, 0xBA, 0xD2, 0xF2, 0x68, 0xDC, 0x26, 0x8D, 0x1B, 0x08, 0xC6
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
// CN-Lite IPBC
|
// CN-Lite IPBC
|
||||||
const static uint8_t test_output_ipbc_lite[160] = {
|
const static uint8_t test_output_ipbc_lite[160] = {
|
||||||
0xE4, 0x93, 0x8C, 0xAA, 0x59, 0x8D, 0x02, 0x8A, 0xB8, 0x6F, 0x25, 0xD2, 0xB1, 0x23, 0xD0, 0xD5,
|
0xE4, 0x93, 0x8C, 0xAA, 0x59, 0x8D, 0x02, 0x8A, 0xB8, 0x6F, 0x25, 0xD2, 0xB1, 0x23, 0xD0, 0xD5,
|
||||||
|
@ -167,6 +172,12 @@ const static uint8_t test_output_ipbc_lite[160] = {
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
// CN-Lite v7
|
||||||
|
const static uint8_t test_output_upx[32] = {
|
||||||
|
0xD1, 0x13, 0xE1, 0x1B, 0xBE, 0xD3, 0x2A, 0xC1, 0x7C, 0x2C, 0xAA, 0x55, 0xCC, 0x84, 0x2F, 0xA4,
|
||||||
|
0x88, 0x91, 0xEE, 0x45, 0x63, 0x22, 0xA3, 0x0A, 0xB2, 0x80, 0xDF, 0x35, 0x16, 0x5C, 0xAF, 0x9A
|
||||||
|
};
|
||||||
|
|
||||||
// CN-Heavy
|
// CN-Heavy
|
||||||
const static uint8_t test_output_heavy[160] = {
|
const static uint8_t test_output_heavy[160] = {
|
||||||
0x99, 0x83, 0xF2, 0x1B, 0xDF, 0x20, 0x10, 0xA8, 0xD7, 0x07, 0xBB, 0x2F, 0x14, 0xD7, 0x86, 0x64,
|
0x99, 0x83, 0xF2, 0x1B, 0xDF, 0x20, 0x10, 0xA8, 0xD7, 0x07, 0xBB, 0x2F, 0x14, 0xD7, 0x86, 0x64,
|
||||||
|
|
|
@ -57,10 +57,17 @@ extern "C"
|
||||||
void cnv2_mainloop_ryzen_asm(ScratchPad* ctx0);
|
void cnv2_mainloop_ryzen_asm(ScratchPad* ctx0);
|
||||||
void cnv2_mainloop_bulldozer_asm(ScratchPad* ctx0);
|
void cnv2_mainloop_bulldozer_asm(ScratchPad* ctx0);
|
||||||
void cnv2_double_mainloop_sandybridge_asm(ScratchPad* ctx0, ScratchPad* ctx1);
|
void cnv2_double_mainloop_sandybridge_asm(ScratchPad* ctx0, ScratchPad* ctx1);
|
||||||
|
void cn_fastv2_mainloop_ivybridge_asm(ScratchPad* ctx0);
|
||||||
|
void cn_fastv2_mainloop_ryzen_asm(ScratchPad* ctx0);
|
||||||
|
void cn_fastv2_mainloop_bulldozer_asm(ScratchPad* ctx0);
|
||||||
|
void cn_fastv2_double_mainloop_sandybridge_asm(ScratchPad* ctx0, ScratchPad* ctx1);
|
||||||
|
void cn_liteupx_mainloop_sandybridge_asm(ScratchPad* ctx0);
|
||||||
void cnv1_mainloop_soft_aes_sandybridge_asm(ScratchPad* ctx0);
|
void cnv1_mainloop_soft_aes_sandybridge_asm(ScratchPad* ctx0);
|
||||||
void cn_fast_mainloop_soft_aes_sandybridge_asm(ScratchPad* ctx0);
|
void cn_fast_mainloop_soft_aes_sandybridge_asm(ScratchPad* ctx0);
|
||||||
void cn_litev1_mainloop_soft_aes_sandybridge_asm(ScratchPad* ctx0);
|
void cn_litev1_mainloop_soft_aes_sandybridge_asm(ScratchPad* ctx0);
|
||||||
void cnv2_mainloop_soft_aes_sandybridge_asm(ScratchPad* ctx0);
|
void cnv2_mainloop_soft_aes_sandybridge_asm(ScratchPad* ctx0);
|
||||||
|
void cn_fastv2_mainloop_soft_aes_sandybridge_asm(ScratchPad* ctx0);
|
||||||
|
void cn_liteupx_mainloop_soft_aes_sandybridge_asm(ScratchPad* ctx0);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -768,7 +775,8 @@ public:
|
||||||
uint64_t* h[NUM_HASH_BLOCKS];
|
uint64_t* h[NUM_HASH_BLOCKS];
|
||||||
uint64_t al[NUM_HASH_BLOCKS];
|
uint64_t al[NUM_HASH_BLOCKS];
|
||||||
uint64_t ah[NUM_HASH_BLOCKS];
|
uint64_t ah[NUM_HASH_BLOCKS];
|
||||||
uint64_t idx[NUM_HASH_BLOCKS];
|
uint64_t idx[NUM_HASH_BLOCKS];CryptoNightMultiHash<0x40000, POW_DEFAULT_INDEX_SHIFT, MEMORY_LITE, 0xFFFF0, true, NUM_HASH_BLOCKS>::hashLiteTube(
|
||||||
|
input, size, output, scratchPad);
|
||||||
uint64_t sqrt_result[NUM_HASH_BLOCKS];
|
uint64_t sqrt_result[NUM_HASH_BLOCKS];
|
||||||
__m128i bx0[NUM_HASH_BLOCKS];
|
__m128i bx0[NUM_HASH_BLOCKS];
|
||||||
__m128i bx1[NUM_HASH_BLOCKS];
|
__m128i bx1[NUM_HASH_BLOCKS];
|
||||||
|
@ -875,6 +883,15 @@ public:
|
||||||
// not supported
|
// not supported
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline static void hashPowFastV2_asm(const uint8_t* __restrict__ input,
|
||||||
|
size_t size,
|
||||||
|
uint8_t* __restrict__ output,
|
||||||
|
ScratchPad** __restrict__ scratchPad,
|
||||||
|
AsmOptimization asmOptimization)
|
||||||
|
{
|
||||||
|
// not supported
|
||||||
|
}
|
||||||
|
|
||||||
inline static void hashLiteTube(const uint8_t* __restrict__ input,
|
inline static void hashLiteTube(const uint8_t* __restrict__ input,
|
||||||
size_t size,
|
size_t size,
|
||||||
uint8_t* __restrict__ output,
|
uint8_t* __restrict__ output,
|
||||||
|
@ -1433,6 +1450,8 @@ public:
|
||||||
} else {
|
} else {
|
||||||
cn_litev1_mainloop_soft_aes_sandybridge_asm(scratchPad[0]);
|
cn_litev1_mainloop_soft_aes_sandybridge_asm(scratchPad[0]);
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
cn_liteupx_mainloop_soft_aes_sandybridge_asm(scratchPad[0]);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (ITERATIONS == 0x80000) {
|
if (ITERATIONS == 0x80000) {
|
||||||
|
@ -1443,6 +1462,8 @@ public:
|
||||||
} else {
|
} else {
|
||||||
cn_litev1_mainloop_sandybridge_asm(scratchPad[0]);
|
cn_litev1_mainloop_sandybridge_asm(scratchPad[0]);
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
cn_liteupx_mainloop_sandybridge_asm(scratchPad[0]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -1559,6 +1580,40 @@ public:
|
||||||
extra_hashes[scratchPad[0]->state[0] & 3](scratchPad[0]->state, 200, output);
|
extra_hashes[scratchPad[0]->state[0] & 3](scratchPad[0]->state, 200, output);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// single asm
|
||||||
|
inline static void hashPowFastV2_asm(const uint8_t* __restrict__ input,
|
||||||
|
size_t size,
|
||||||
|
uint8_t* __restrict__ output,
|
||||||
|
ScratchPad** __restrict__ scratchPad,
|
||||||
|
AsmOptimization asmOptimization)
|
||||||
|
{
|
||||||
|
const uint8_t* l = scratchPad[0]->memory;
|
||||||
|
uint64_t* h = reinterpret_cast<uint64_t*>(scratchPad[0]->state);
|
||||||
|
|
||||||
|
keccak(static_cast<const uint8_t*>(input), (int) size, scratchPad[0]->state, 200);
|
||||||
|
cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h, (__m128i*) l);
|
||||||
|
|
||||||
|
#ifndef XMRIG_NO_ASM
|
||||||
|
if (asmOptimization == AsmOptimization::ASM_INTEL) {
|
||||||
|
if (SOFT_AES) {
|
||||||
|
scratchPad[0]->input = input;
|
||||||
|
scratchPad[0]->t_fn = (const uint32_t*)saes_table;
|
||||||
|
cn_fastv2_mainloop_soft_aes_sandybridge_asm(scratchPad[0]);
|
||||||
|
} else {
|
||||||
|
cn_fastv2_mainloop_ivybridge_asm(scratchPad[0]);
|
||||||
|
}
|
||||||
|
} else if (asmOptimization == AsmOptimization::ASM_RYZEN) {
|
||||||
|
cn_fastv2_mainloop_ryzen_asm(scratchPad[0]);
|
||||||
|
} else if (asmOptimization == AsmOptimization::ASM_BULLDOZER) {
|
||||||
|
cn_fastv2_mainloop_bulldozer_asm(scratchPad[0]);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l, (__m128i*) h);
|
||||||
|
keccakf(h, 24);
|
||||||
|
extra_hashes[scratchPad[0]->state[0] & 3](scratchPad[0]->state, 200, output);
|
||||||
|
}
|
||||||
|
|
||||||
inline static void hashLiteTube(const uint8_t* __restrict__ input,
|
inline static void hashLiteTube(const uint8_t* __restrict__ input,
|
||||||
size_t size,
|
size_t size,
|
||||||
uint8_t* __restrict__ output,
|
uint8_t* __restrict__ output,
|
||||||
|
@ -2278,6 +2333,38 @@ public:
|
||||||
extra_hashes[scratchPad[1]->state[0] & 3](scratchPad[1]->state, 200, output + 32);
|
extra_hashes[scratchPad[1]->state[0] & 3](scratchPad[1]->state, 200, output + 32);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// double asm
|
||||||
|
inline static void hashPowFastV2_asm(const uint8_t* __restrict__ input,
|
||||||
|
size_t size,
|
||||||
|
uint8_t* __restrict__ output,
|
||||||
|
ScratchPad** __restrict__ scratchPad,
|
||||||
|
AsmOptimization asmOptimization)
|
||||||
|
{
|
||||||
|
keccak((const uint8_t*) input, (int) size, scratchPad[0]->state, 200);
|
||||||
|
keccak((const uint8_t*) input + size, (int) size, scratchPad[1]->state, 200);
|
||||||
|
|
||||||
|
const uint8_t* l0 = scratchPad[0]->memory;
|
||||||
|
const uint8_t* l1 = scratchPad[1]->memory;
|
||||||
|
uint64_t* h0 = reinterpret_cast<uint64_t*>(scratchPad[0]->state);
|
||||||
|
uint64_t* h1 = reinterpret_cast<uint64_t*>(scratchPad[1]->state);
|
||||||
|
|
||||||
|
cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h0, (__m128i*) l0);
|
||||||
|
cn_explode_scratchpad<MEM, SOFT_AES>((__m128i*) h1, (__m128i*) l1);
|
||||||
|
|
||||||
|
#ifndef XMRIG_NO_ASM
|
||||||
|
cn_fastv2_double_mainloop_sandybridge_asm(scratchPad[0], scratchPad[1]);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l0, (__m128i*) h0);
|
||||||
|
cn_implode_scratchpad<MEM, SOFT_AES>((__m128i*) l1, (__m128i*) h1);
|
||||||
|
|
||||||
|
keccakf(h0, 24);
|
||||||
|
keccakf(h1, 24);
|
||||||
|
|
||||||
|
extra_hashes[scratchPad[0]->state[0] & 3](scratchPad[0]->state, 200, output);
|
||||||
|
extra_hashes[scratchPad[1]->state[0] & 3](scratchPad[1]->state, 200, output + 32);
|
||||||
|
}
|
||||||
|
|
||||||
inline static void hashLiteTube(const uint8_t* __restrict__ input,
|
inline static void hashLiteTube(const uint8_t* __restrict__ input,
|
||||||
size_t size,
|
size_t size,
|
||||||
uint8_t* __restrict__ output,
|
uint8_t* __restrict__ output,
|
||||||
|
@ -3225,6 +3312,15 @@ public:
|
||||||
// not supported
|
// not supported
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline static void hashPowFastV2_asm(const uint8_t* __restrict__ input,
|
||||||
|
size_t size,
|
||||||
|
uint8_t* __restrict__ output,
|
||||||
|
ScratchPad** __restrict__ scratchPad,
|
||||||
|
AsmOptimization asmOptimization)
|
||||||
|
{
|
||||||
|
// not supported
|
||||||
|
}
|
||||||
|
|
||||||
inline static void hashLiteTube(const uint8_t* __restrict__ input,
|
inline static void hashLiteTube(const uint8_t* __restrict__ input,
|
||||||
size_t size,
|
size_t size,
|
||||||
uint8_t* __restrict__ output,
|
uint8_t* __restrict__ output,
|
||||||
|
@ -4480,6 +4576,15 @@ public:
|
||||||
// not supported
|
// not supported
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline static void hashPowFastV2_asm(const uint8_t* __restrict__ input,
|
||||||
|
size_t size,
|
||||||
|
uint8_t* __restrict__ output,
|
||||||
|
ScratchPad** __restrict__ scratchPad,
|
||||||
|
AsmOptimization asmOptimization)
|
||||||
|
{
|
||||||
|
// not supported
|
||||||
|
}
|
||||||
|
|
||||||
inline static void hashLiteTube(const uint8_t* __restrict__ input,
|
inline static void hashLiteTube(const uint8_t* __restrict__ input,
|
||||||
size_t size,
|
size_t size,
|
||||||
uint8_t* __restrict__ output,
|
uint8_t* __restrict__ output,
|
||||||
|
@ -5405,6 +5510,15 @@ public:
|
||||||
// not supported
|
// not supported
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline static void hashPowFastV2_asm(const uint8_t* __restrict__ input,
|
||||||
|
size_t size,
|
||||||
|
uint8_t* __restrict__ output,
|
||||||
|
ScratchPad** __restrict__ scratchPad,
|
||||||
|
AsmOptimization asmOptimization)
|
||||||
|
{
|
||||||
|
// not supported
|
||||||
|
}
|
||||||
|
|
||||||
inline static void hashLiteTube(const uint8_t* __restrict__ input,
|
inline static void hashLiteTube(const uint8_t* __restrict__ input,
|
||||||
size_t size,
|
size_t size,
|
||||||
uint8_t* __restrict__ output,
|
uint8_t* __restrict__ output,
|
||||||
|
|
414
src/crypto/asm/cn_fastv2_double_main_loop_sandybridge.inc
Normal file
414
src/crypto/asm/cn_fastv2_double_main_loop_sandybridge.inc
Normal file
|
@ -0,0 +1,414 @@
|
||||||
|
mov rax, rsp
|
||||||
|
push rbx
|
||||||
|
push rbp
|
||||||
|
push rsi
|
||||||
|
push rdi
|
||||||
|
push r12
|
||||||
|
push r13
|
||||||
|
push r14
|
||||||
|
push r15
|
||||||
|
sub rsp, 184
|
||||||
|
|
||||||
|
stmxcsr DWORD PTR [rsp+272]
|
||||||
|
mov DWORD PTR [rsp+276], 24448
|
||||||
|
ldmxcsr DWORD PTR [rsp+276]
|
||||||
|
|
||||||
|
mov r13, QWORD PTR [rcx+224]
|
||||||
|
mov r9, rdx
|
||||||
|
mov r10, QWORD PTR [rcx+32]
|
||||||
|
mov r8, rcx
|
||||||
|
xor r10, QWORD PTR [rcx]
|
||||||
|
mov r14d, 262144
|
||||||
|
mov r11, QWORD PTR [rcx+40]
|
||||||
|
xor r11, QWORD PTR [rcx+8]
|
||||||
|
mov rsi, QWORD PTR [rdx+224]
|
||||||
|
mov rdx, QWORD PTR [rcx+56]
|
||||||
|
xor rdx, QWORD PTR [rcx+24]
|
||||||
|
mov rdi, QWORD PTR [r9+32]
|
||||||
|
xor rdi, QWORD PTR [r9]
|
||||||
|
mov rbp, QWORD PTR [r9+40]
|
||||||
|
xor rbp, QWORD PTR [r9+8]
|
||||||
|
movq xmm0, rdx
|
||||||
|
movaps XMMWORD PTR [rax-88], xmm6
|
||||||
|
movaps XMMWORD PTR [rax-104], xmm7
|
||||||
|
movaps XMMWORD PTR [rax-120], xmm8
|
||||||
|
movaps XMMWORD PTR [rsp+112], xmm9
|
||||||
|
movaps XMMWORD PTR [rsp+96], xmm10
|
||||||
|
movaps XMMWORD PTR [rsp+80], xmm11
|
||||||
|
movaps XMMWORD PTR [rsp+64], xmm12
|
||||||
|
movaps XMMWORD PTR [rsp+48], xmm13
|
||||||
|
movaps XMMWORD PTR [rsp+32], xmm14
|
||||||
|
movaps XMMWORD PTR [rsp+16], xmm15
|
||||||
|
mov rdx, r10
|
||||||
|
movq xmm4, QWORD PTR [r8+96]
|
||||||
|
and edx, 2097136
|
||||||
|
mov rax, QWORD PTR [rcx+48]
|
||||||
|
xorps xmm13, xmm13
|
||||||
|
xor rax, QWORD PTR [rcx+16]
|
||||||
|
mov rcx, QWORD PTR [rcx+88]
|
||||||
|
xor rcx, QWORD PTR [r8+72]
|
||||||
|
movq xmm5, QWORD PTR [r8+104]
|
||||||
|
movq xmm7, rax
|
||||||
|
|
||||||
|
mov eax, 1
|
||||||
|
shl rax, 52
|
||||||
|
movq xmm14, rax
|
||||||
|
punpcklqdq xmm14, xmm14
|
||||||
|
|
||||||
|
mov eax, 1023
|
||||||
|
shl rax, 52
|
||||||
|
movq xmm12, rax
|
||||||
|
punpcklqdq xmm12, xmm12
|
||||||
|
|
||||||
|
mov rax, QWORD PTR [r8+80]
|
||||||
|
xor rax, QWORD PTR [r8+64]
|
||||||
|
punpcklqdq xmm7, xmm0
|
||||||
|
movq xmm0, rcx
|
||||||
|
mov rcx, QWORD PTR [r9+56]
|
||||||
|
xor rcx, QWORD PTR [r9+24]
|
||||||
|
movq xmm3, rax
|
||||||
|
mov rax, QWORD PTR [r9+48]
|
||||||
|
xor rax, QWORD PTR [r9+16]
|
||||||
|
punpcklqdq xmm3, xmm0
|
||||||
|
movq xmm0, rcx
|
||||||
|
mov QWORD PTR [rsp], r13
|
||||||
|
mov rcx, QWORD PTR [r9+88]
|
||||||
|
xor rcx, QWORD PTR [r9+72]
|
||||||
|
movq xmm6, rax
|
||||||
|
mov rax, QWORD PTR [r9+80]
|
||||||
|
xor rax, QWORD PTR [r9+64]
|
||||||
|
punpcklqdq xmm6, xmm0
|
||||||
|
movq xmm0, rcx
|
||||||
|
mov QWORD PTR [rsp+256], r10
|
||||||
|
mov rcx, rdi
|
||||||
|
mov QWORD PTR [rsp+264], r11
|
||||||
|
movq xmm8, rax
|
||||||
|
and ecx, 2097136
|
||||||
|
punpcklqdq xmm8, xmm0
|
||||||
|
movq xmm0, QWORD PTR [r9+96]
|
||||||
|
punpcklqdq xmm4, xmm0
|
||||||
|
movq xmm0, QWORD PTR [r9+104]
|
||||||
|
lea r8, QWORD PTR [rcx+rsi]
|
||||||
|
movdqu xmm11, XMMWORD PTR [r8]
|
||||||
|
punpcklqdq xmm5, xmm0
|
||||||
|
lea r9, QWORD PTR [rdx+r13]
|
||||||
|
movdqu xmm15, XMMWORD PTR [r9]
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
ALIGN 16
|
||||||
|
#else
|
||||||
|
ALIGN 64
|
||||||
|
#endif
|
||||||
|
main_loop_double_fast2_sandybridge:
|
||||||
|
movdqu xmm9, xmm15
|
||||||
|
mov eax, edx
|
||||||
|
mov ebx, edx
|
||||||
|
xor eax, 16
|
||||||
|
xor ebx, 32
|
||||||
|
xor edx, 48
|
||||||
|
|
||||||
|
movq xmm0, r11
|
||||||
|
movq xmm2, r10
|
||||||
|
punpcklqdq xmm2, xmm0
|
||||||
|
aesenc xmm9, xmm2
|
||||||
|
|
||||||
|
movdqu xmm0, XMMWORD PTR [rax+r13]
|
||||||
|
movdqu xmm1, XMMWORD PTR [rbx+r13]
|
||||||
|
paddq xmm0, xmm7
|
||||||
|
paddq xmm1, xmm2
|
||||||
|
movdqu XMMWORD PTR [rbx+r13], xmm0
|
||||||
|
movdqu xmm0, XMMWORD PTR [rdx+r13]
|
||||||
|
movdqu XMMWORD PTR [rdx+r13], xmm1
|
||||||
|
paddq xmm0, xmm3
|
||||||
|
movdqu XMMWORD PTR [rax+r13], xmm0
|
||||||
|
|
||||||
|
movq r11, xmm9
|
||||||
|
mov edx, r11d
|
||||||
|
and edx, 2097136
|
||||||
|
movdqa xmm0, xmm9
|
||||||
|
pxor xmm0, xmm7
|
||||||
|
movdqu XMMWORD PTR [r9], xmm0
|
||||||
|
|
||||||
|
lea rbx, QWORD PTR [rdx+r13]
|
||||||
|
mov r10, QWORD PTR [rdx+r13]
|
||||||
|
|
||||||
|
movdqu xmm10, xmm11
|
||||||
|
movq xmm0, rbp
|
||||||
|
movq xmm11, rdi
|
||||||
|
punpcklqdq xmm11, xmm0
|
||||||
|
aesenc xmm10, xmm11
|
||||||
|
|
||||||
|
mov eax, ecx
|
||||||
|
mov r12d, ecx
|
||||||
|
xor eax, 16
|
||||||
|
xor r12d, 32
|
||||||
|
xor ecx, 48
|
||||||
|
|
||||||
|
movdqu xmm0, XMMWORD PTR [rax+rsi]
|
||||||
|
paddq xmm0, xmm6
|
||||||
|
movdqu xmm1, XMMWORD PTR [r12+rsi]
|
||||||
|
movdqu XMMWORD PTR [r12+rsi], xmm0
|
||||||
|
paddq xmm1, xmm11
|
||||||
|
movdqu xmm0, XMMWORD PTR [rcx+rsi]
|
||||||
|
movdqu XMMWORD PTR [rcx+rsi], xmm1
|
||||||
|
paddq xmm0, xmm8
|
||||||
|
movdqu XMMWORD PTR [rax+rsi], xmm0
|
||||||
|
|
||||||
|
movq rcx, xmm10
|
||||||
|
and ecx, 2097136
|
||||||
|
|
||||||
|
movdqa xmm0, xmm10
|
||||||
|
pxor xmm0, xmm6
|
||||||
|
movdqu XMMWORD PTR [r8], xmm0
|
||||||
|
mov r12, QWORD PTR [rcx+rsi]
|
||||||
|
|
||||||
|
mov r9, QWORD PTR [rbx+8]
|
||||||
|
|
||||||
|
xor edx, 16
|
||||||
|
mov r8d, edx
|
||||||
|
mov r15d, edx
|
||||||
|
|
||||||
|
movq rdx, xmm5
|
||||||
|
shl rdx, 32
|
||||||
|
movq rax, xmm4
|
||||||
|
xor rdx, rax
|
||||||
|
xor r10, rdx
|
||||||
|
mov rax, r10
|
||||||
|
mul r11
|
||||||
|
mov r11d, r8d
|
||||||
|
xor r11d, 48
|
||||||
|
movq xmm0, rdx
|
||||||
|
xor rdx, [r11+r13]
|
||||||
|
movq xmm1, rax
|
||||||
|
xor rax, [r11+r13+8]
|
||||||
|
punpcklqdq xmm0, xmm1
|
||||||
|
|
||||||
|
pxor xmm0, XMMWORD PTR [r8+r13]
|
||||||
|
xor r8d, 32
|
||||||
|
movdqu xmm1, XMMWORD PTR [r11+r13]
|
||||||
|
paddq xmm0, xmm7
|
||||||
|
paddq xmm1, xmm2
|
||||||
|
movdqu XMMWORD PTR [r11+r13], xmm0
|
||||||
|
movdqu xmm0, XMMWORD PTR [r8+r13]
|
||||||
|
movdqu XMMWORD PTR [r8+r13], xmm1
|
||||||
|
paddq xmm0, xmm3
|
||||||
|
movdqu XMMWORD PTR [r15+r13], xmm0
|
||||||
|
|
||||||
|
mov r11, QWORD PTR [rsp+256]
|
||||||
|
add r11, rdx
|
||||||
|
mov rdx, QWORD PTR [rsp+264]
|
||||||
|
add rdx, rax
|
||||||
|
mov QWORD PTR [rbx], r11
|
||||||
|
xor r11, r10
|
||||||
|
mov QWORD PTR [rbx+8], rdx
|
||||||
|
xor rdx, r9
|
||||||
|
mov QWORD PTR [rsp+256], r11
|
||||||
|
and r11d, 2097136
|
||||||
|
mov QWORD PTR [rsp+264], rdx
|
||||||
|
mov QWORD PTR [rsp+8], r11
|
||||||
|
lea r15, QWORD PTR [r11+r13]
|
||||||
|
movdqu xmm15, XMMWORD PTR [r11+r13]
|
||||||
|
lea r13, QWORD PTR [rsi+rcx]
|
||||||
|
movdqa xmm0, xmm5
|
||||||
|
psrldq xmm0, 8
|
||||||
|
movaps xmm2, xmm13
|
||||||
|
movq r10, xmm0
|
||||||
|
psllq xmm5, 1
|
||||||
|
shl r10, 32
|
||||||
|
movdqa xmm0, xmm9
|
||||||
|
psrldq xmm0, 8
|
||||||
|
movdqa xmm1, xmm10
|
||||||
|
movq r11, xmm0
|
||||||
|
psrldq xmm1, 8
|
||||||
|
movq r8, xmm1
|
||||||
|
psrldq xmm4, 8
|
||||||
|
movaps xmm0, xmm13
|
||||||
|
movq rax, xmm4
|
||||||
|
xor r10, rax
|
||||||
|
movaps xmm1, xmm13
|
||||||
|
xor r10, r12
|
||||||
|
lea rax, QWORD PTR [r11+1]
|
||||||
|
shr rax, 1
|
||||||
|
movdqa xmm3, xmm9
|
||||||
|
punpcklqdq xmm3, xmm10
|
||||||
|
paddq xmm5, xmm3
|
||||||
|
movq rdx, xmm5
|
||||||
|
psrldq xmm5, 8
|
||||||
|
cvtsi2sd xmm2, rax
|
||||||
|
or edx, -2147483647
|
||||||
|
lea rax, QWORD PTR [r8+1]
|
||||||
|
shr rax, 1
|
||||||
|
movq r9, xmm5
|
||||||
|
cvtsi2sd xmm0, rax
|
||||||
|
or r9d, -2147483647
|
||||||
|
cvtsi2sd xmm1, rdx
|
||||||
|
unpcklpd xmm2, xmm0
|
||||||
|
movaps xmm0, xmm13
|
||||||
|
cvtsi2sd xmm0, r9
|
||||||
|
unpcklpd xmm1, xmm0
|
||||||
|
divpd xmm2, xmm1
|
||||||
|
paddq xmm2, xmm14
|
||||||
|
cvttsd2si rax, xmm2
|
||||||
|
psrldq xmm2, 8
|
||||||
|
mov rbx, rax
|
||||||
|
imul rax, rdx
|
||||||
|
sub r11, rax
|
||||||
|
js div_fix_1_fast2_sandybridge
|
||||||
|
div_fix_1_ret_fast2_sandybridge:
|
||||||
|
|
||||||
|
cvttsd2si rdx, xmm2
|
||||||
|
mov rax, rdx
|
||||||
|
imul rax, r9
|
||||||
|
movd xmm2, r11d
|
||||||
|
movd xmm4, ebx
|
||||||
|
sub r8, rax
|
||||||
|
js div_fix_2_fast2_sandybridge
|
||||||
|
div_fix_2_ret_fast2_sandybridge:
|
||||||
|
|
||||||
|
movd xmm1, r8d
|
||||||
|
movd xmm0, edx
|
||||||
|
punpckldq xmm2, xmm1
|
||||||
|
punpckldq xmm4, xmm0
|
||||||
|
punpckldq xmm4, xmm2
|
||||||
|
paddq xmm3, xmm4
|
||||||
|
movdqa xmm0, xmm3
|
||||||
|
psrlq xmm0, 12
|
||||||
|
paddq xmm0, xmm12
|
||||||
|
sqrtpd xmm1, xmm0
|
||||||
|
movq r9, xmm1
|
||||||
|
movdqa xmm5, xmm1
|
||||||
|
psrlq xmm5, 19
|
||||||
|
test r9, 524287
|
||||||
|
je sqrt_fix_1_fast2_sandybridge
|
||||||
|
sqrt_fix_1_ret_fast2_sandybridge:
|
||||||
|
|
||||||
|
movq r9, xmm10
|
||||||
|
psrldq xmm1, 8
|
||||||
|
movq r8, xmm1
|
||||||
|
test r8, 524287
|
||||||
|
je sqrt_fix_2_fast2_sandybridge
|
||||||
|
sqrt_fix_2_ret_fast2_sandybridge:
|
||||||
|
|
||||||
|
mov r12d, ecx
|
||||||
|
mov r8d, ecx
|
||||||
|
xor r12d, 16
|
||||||
|
xor r8d, 32
|
||||||
|
xor ecx, 48
|
||||||
|
mov rax, r10
|
||||||
|
mul r9
|
||||||
|
movq xmm0, rax
|
||||||
|
movq xmm3, rdx
|
||||||
|
punpcklqdq xmm3, xmm0
|
||||||
|
|
||||||
|
movdqu xmm0, XMMWORD PTR [r12+rsi]
|
||||||
|
pxor xmm0, xmm3
|
||||||
|
movdqu xmm1, XMMWORD PTR [r8+rsi]
|
||||||
|
xor rdx, [r8+rsi]
|
||||||
|
xor rax, [r8+rsi+8]
|
||||||
|
movdqu xmm3, XMMWORD PTR [rcx+rsi]
|
||||||
|
paddq xmm0, xmm6
|
||||||
|
paddq xmm1, xmm11
|
||||||
|
paddq xmm3, xmm8
|
||||||
|
movdqu XMMWORD PTR [r8+rsi], xmm0
|
||||||
|
movdqu XMMWORD PTR [rcx+rsi], xmm1
|
||||||
|
movdqu XMMWORD PTR [r12+rsi], xmm3
|
||||||
|
|
||||||
|
add rdi, rdx
|
||||||
|
mov QWORD PTR [r13], rdi
|
||||||
|
xor rdi, r10
|
||||||
|
mov ecx, edi
|
||||||
|
and ecx, 2097136
|
||||||
|
lea r8, QWORD PTR [rcx+rsi]
|
||||||
|
|
||||||
|
mov rdx, QWORD PTR [r13+8]
|
||||||
|
add rbp, rax
|
||||||
|
mov QWORD PTR [r13+8], rbp
|
||||||
|
movdqu xmm11, XMMWORD PTR [rcx+rsi]
|
||||||
|
xor rbp, rdx
|
||||||
|
mov r13, QWORD PTR [rsp]
|
||||||
|
movdqa xmm3, xmm7
|
||||||
|
mov rdx, QWORD PTR [rsp+8]
|
||||||
|
movdqa xmm8, xmm6
|
||||||
|
mov r10, QWORD PTR [rsp+256]
|
||||||
|
movdqa xmm7, xmm9
|
||||||
|
mov r11, QWORD PTR [rsp+264]
|
||||||
|
movdqa xmm6, xmm10
|
||||||
|
mov r9, r15
|
||||||
|
dec r14d
|
||||||
|
jne main_loop_double_fast2_sandybridge
|
||||||
|
|
||||||
|
ldmxcsr DWORD PTR [rsp+272]
|
||||||
|
movaps xmm13, XMMWORD PTR [rsp+48]
|
||||||
|
lea r11, QWORD PTR [rsp+184]
|
||||||
|
movaps xmm6, XMMWORD PTR [r11-24]
|
||||||
|
movaps xmm7, XMMWORD PTR [r11-40]
|
||||||
|
movaps xmm8, XMMWORD PTR [r11-56]
|
||||||
|
movaps xmm9, XMMWORD PTR [r11-72]
|
||||||
|
movaps xmm10, XMMWORD PTR [r11-88]
|
||||||
|
movaps xmm11, XMMWORD PTR [r11-104]
|
||||||
|
movaps xmm12, XMMWORD PTR [r11-120]
|
||||||
|
movaps xmm14, XMMWORD PTR [rsp+32]
|
||||||
|
movaps xmm15, XMMWORD PTR [rsp+16]
|
||||||
|
mov rsp, r11
|
||||||
|
pop r15
|
||||||
|
pop r14
|
||||||
|
pop r13
|
||||||
|
pop r12
|
||||||
|
pop rdi
|
||||||
|
pop rsi
|
||||||
|
pop rbp
|
||||||
|
pop rbx
|
||||||
|
jmp cnv2_double_mainloop_asm_fast2_sandybridge_endp
|
||||||
|
|
||||||
|
div_fix_1_fast2_sandybridge:
|
||||||
|
dec rbx
|
||||||
|
add r11, rdx
|
||||||
|
jmp div_fix_1_ret_fast2_sandybridge
|
||||||
|
|
||||||
|
div_fix_2_fast2_sandybridge:
|
||||||
|
dec rdx
|
||||||
|
add r8, r9
|
||||||
|
jmp div_fix_2_ret_fast2_sandybridge
|
||||||
|
|
||||||
|
sqrt_fix_1_fast2_sandybridge:
|
||||||
|
movq r8, xmm3
|
||||||
|
movdqa xmm0, xmm5
|
||||||
|
psrldq xmm0, 8
|
||||||
|
dec r9
|
||||||
|
mov r11d, -1022
|
||||||
|
shl r11, 32
|
||||||
|
mov rax, r9
|
||||||
|
shr r9, 19
|
||||||
|
shr rax, 20
|
||||||
|
mov rdx, r9
|
||||||
|
sub rdx, rax
|
||||||
|
lea rdx, [rdx+r11+1]
|
||||||
|
add rax, r11
|
||||||
|
imul rdx, rax
|
||||||
|
sub rdx, r8
|
||||||
|
adc r9, 0
|
||||||
|
movq xmm5, r9
|
||||||
|
punpcklqdq xmm5, xmm0
|
||||||
|
jmp sqrt_fix_1_ret_fast2_sandybridge
|
||||||
|
|
||||||
|
sqrt_fix_2_fast2_sandybridge:
|
||||||
|
psrldq xmm3, 8
|
||||||
|
movq r11, xmm3
|
||||||
|
dec r8
|
||||||
|
mov ebx, -1022
|
||||||
|
shl rbx, 32
|
||||||
|
mov rax, r8
|
||||||
|
shr r8, 19
|
||||||
|
shr rax, 20
|
||||||
|
mov rdx, r8
|
||||||
|
sub rdx, rax
|
||||||
|
lea rdx, [rdx+rbx+1]
|
||||||
|
add rax, rbx
|
||||||
|
imul rdx, rax
|
||||||
|
sub rdx, r11
|
||||||
|
adc r8, 0
|
||||||
|
movq xmm0, r8
|
||||||
|
punpcklqdq xmm5, xmm0
|
||||||
|
jmp sqrt_fix_2_ret_fast2_sandybridge
|
||||||
|
|
||||||
|
cnv2_double_mainloop_asm_fast2_sandybridge_endp:
|
180
src/crypto/asm/cn_fastv2_main_loop_bulldozer.inc
Normal file
180
src/crypto/asm/cn_fastv2_main_loop_bulldozer.inc
Normal file
|
@ -0,0 +1,180 @@
|
||||||
|
mov QWORD PTR [rsp+16], rbx
|
||||||
|
mov QWORD PTR [rsp+24], rbp
|
||||||
|
mov QWORD PTR [rsp+32], rsi
|
||||||
|
push rdi
|
||||||
|
push r12
|
||||||
|
push r13
|
||||||
|
push r14
|
||||||
|
push r15
|
||||||
|
sub rsp, 64
|
||||||
|
|
||||||
|
stmxcsr DWORD PTR [rsp]
|
||||||
|
mov DWORD PTR [rsp+4], 24448
|
||||||
|
ldmxcsr DWORD PTR [rsp+4]
|
||||||
|
|
||||||
|
mov rax, QWORD PTR [rcx+48]
|
||||||
|
mov r9, rcx
|
||||||
|
xor rax, QWORD PTR [rcx+16]
|
||||||
|
mov ebp, 262144
|
||||||
|
mov r8, QWORD PTR [rcx+32]
|
||||||
|
xor r8, QWORD PTR [rcx]
|
||||||
|
mov r11, QWORD PTR [rcx+40]
|
||||||
|
mov r10, r8
|
||||||
|
mov rdx, QWORD PTR [rcx+56]
|
||||||
|
movq xmm3, rax
|
||||||
|
xor rdx, QWORD PTR [rcx+24]
|
||||||
|
xor r11, QWORD PTR [rcx+8]
|
||||||
|
mov rbx, QWORD PTR [rcx+224]
|
||||||
|
mov rax, QWORD PTR [r9+80]
|
||||||
|
xor rax, QWORD PTR [r9+64]
|
||||||
|
movq xmm0, rdx
|
||||||
|
mov rcx, QWORD PTR [rcx+88]
|
||||||
|
xor rcx, QWORD PTR [r9+72]
|
||||||
|
mov rdi, QWORD PTR [r9+104]
|
||||||
|
and r10d, 2097136
|
||||||
|
movaps XMMWORD PTR [rsp+48], xmm6
|
||||||
|
movq xmm4, rax
|
||||||
|
movaps XMMWORD PTR [rsp+32], xmm7
|
||||||
|
movaps XMMWORD PTR [rsp+16], xmm8
|
||||||
|
xorps xmm8, xmm8
|
||||||
|
mov ax, 1023
|
||||||
|
shl rax, 52
|
||||||
|
movq xmm7, rax
|
||||||
|
mov r15, QWORD PTR [r9+96]
|
||||||
|
punpcklqdq xmm3, xmm0
|
||||||
|
movq xmm0, rcx
|
||||||
|
punpcklqdq xmm4, xmm0
|
||||||
|
|
||||||
|
ALIGN 16
|
||||||
|
cnv2_main_loop_fast2_bulldozer:
|
||||||
|
movdqa xmm5, XMMWORD PTR [r10+rbx]
|
||||||
|
movq xmm6, r8
|
||||||
|
pinsrq xmm6, r11, 1
|
||||||
|
lea rdx, QWORD PTR [r10+rbx]
|
||||||
|
lea r9, QWORD PTR [rdi+rdi]
|
||||||
|
shl rdi, 32
|
||||||
|
|
||||||
|
mov ecx, r10d
|
||||||
|
mov eax, r10d
|
||||||
|
xor ecx, 16
|
||||||
|
xor eax, 32
|
||||||
|
xor r10d, 48
|
||||||
|
aesenc xmm5, xmm6
|
||||||
|
movdqa xmm2, XMMWORD PTR [rcx+rbx]
|
||||||
|
movdqa xmm1, XMMWORD PTR [rax+rbx]
|
||||||
|
movdqa xmm0, XMMWORD PTR [r10+rbx]
|
||||||
|
paddq xmm2, xmm3
|
||||||
|
paddq xmm1, xmm6
|
||||||
|
paddq xmm0, xmm4
|
||||||
|
movdqa XMMWORD PTR [rcx+rbx], xmm0
|
||||||
|
movdqa XMMWORD PTR [rax+rbx], xmm2
|
||||||
|
movdqa XMMWORD PTR [r10+rbx], xmm1
|
||||||
|
|
||||||
|
movaps xmm1, xmm8
|
||||||
|
mov rsi, r15
|
||||||
|
xor rsi, rdi
|
||||||
|
|
||||||
|
mov edi, 1023
|
||||||
|
shl rdi, 52
|
||||||
|
|
||||||
|
movq r14, xmm5
|
||||||
|
pextrq rax, xmm5, 1
|
||||||
|
|
||||||
|
movdqa xmm0, xmm5
|
||||||
|
pxor xmm0, xmm3
|
||||||
|
mov r10, r14
|
||||||
|
and r10d, 2097136
|
||||||
|
movdqa XMMWORD PTR [rdx], xmm0
|
||||||
|
xor rsi, QWORD PTR [r10+rbx]
|
||||||
|
lea r12, QWORD PTR [r10+rbx]
|
||||||
|
mov r13, QWORD PTR [r10+rbx+8]
|
||||||
|
|
||||||
|
add r9d, r14d
|
||||||
|
or r9d, -2147483647
|
||||||
|
xor edx, edx
|
||||||
|
div r9
|
||||||
|
mov eax, eax
|
||||||
|
shl rdx, 32
|
||||||
|
lea r15, [rax+rdx]
|
||||||
|
lea rax, [r14+r15]
|
||||||
|
shr rax, 12
|
||||||
|
add rax, rdi
|
||||||
|
movq xmm0, rax
|
||||||
|
sqrtsd xmm1, xmm0
|
||||||
|
movq rdi, xmm1
|
||||||
|
test rdi, 524287
|
||||||
|
je sqrt_fixup_fast2_bulldozer
|
||||||
|
shr rdi, 19
|
||||||
|
|
||||||
|
sqrt_fixup_fast2_bulldozer_ret:
|
||||||
|
mov rax, rsi
|
||||||
|
mul r14
|
||||||
|
movq xmm1, rax
|
||||||
|
movq xmm0, rdx
|
||||||
|
punpcklqdq xmm0, xmm1
|
||||||
|
|
||||||
|
mov r9d, r10d
|
||||||
|
mov ecx, r10d
|
||||||
|
xor r9d, 16
|
||||||
|
xor ecx, 32
|
||||||
|
xor r10d, 48
|
||||||
|
movdqa xmm1, XMMWORD PTR [rcx+rbx]
|
||||||
|
xor rdx, [rcx+rbx]
|
||||||
|
xor rax, [rcx+rbx+8]
|
||||||
|
movdqa xmm2, XMMWORD PTR [r9+rbx]
|
||||||
|
pxor xmm2, xmm0
|
||||||
|
paddq xmm4, XMMWORD PTR [r10+rbx]
|
||||||
|
paddq xmm2, xmm3
|
||||||
|
paddq xmm1, xmm6
|
||||||
|
movdqa XMMWORD PTR [r9+rbx], xmm4
|
||||||
|
movdqa XMMWORD PTR [rcx+rbx], xmm2
|
||||||
|
movdqa XMMWORD PTR [r10+rbx], xmm1
|
||||||
|
|
||||||
|
movdqa xmm4, xmm3
|
||||||
|
add r8, rdx
|
||||||
|
add r11, rax
|
||||||
|
mov QWORD PTR [r12], r8
|
||||||
|
xor r8, rsi
|
||||||
|
mov QWORD PTR [r12+8], r11
|
||||||
|
mov r10, r8
|
||||||
|
xor r11, r13
|
||||||
|
and r10d, 2097136
|
||||||
|
movdqa xmm3, xmm5
|
||||||
|
dec ebp
|
||||||
|
jne cnv2_main_loop_fast2_bulldozer
|
||||||
|
|
||||||
|
ldmxcsr DWORD PTR [rsp]
|
||||||
|
movaps xmm6, XMMWORD PTR [rsp+48]
|
||||||
|
lea r11, QWORD PTR [rsp+64]
|
||||||
|
mov rbx, QWORD PTR [r11+56]
|
||||||
|
mov rbp, QWORD PTR [r11+64]
|
||||||
|
mov rsi, QWORD PTR [r11+72]
|
||||||
|
movaps xmm8, XMMWORD PTR [r11-48]
|
||||||
|
movaps xmm7, XMMWORD PTR [rsp+32]
|
||||||
|
mov rsp, r11
|
||||||
|
pop r15
|
||||||
|
pop r14
|
||||||
|
pop r13
|
||||||
|
pop r12
|
||||||
|
pop rdi
|
||||||
|
jmp cnv2_main_loop_fast2_bulldozer_endp
|
||||||
|
|
||||||
|
sqrt_fixup_fast2_bulldozer:
|
||||||
|
movq r9, xmm5
|
||||||
|
add r9, r15
|
||||||
|
dec rdi
|
||||||
|
mov edx, -1022
|
||||||
|
shl rdx, 32
|
||||||
|
mov rax, rdi
|
||||||
|
shr rdi, 19
|
||||||
|
shr rax, 20
|
||||||
|
mov rcx, rdi
|
||||||
|
sub rcx, rax
|
||||||
|
lea rcx, [rcx+rdx+1]
|
||||||
|
add rax, rdx
|
||||||
|
imul rcx, rax
|
||||||
|
sub rcx, r9
|
||||||
|
adc rdi, 0
|
||||||
|
jmp sqrt_fixup_fast2_bulldozer_ret
|
||||||
|
|
||||||
|
cnv2_main_loop_fast2_bulldozer_endp:
|
186
src/crypto/asm/cn_fastv2_main_loop_ivybridge.inc
Normal file
186
src/crypto/asm/cn_fastv2_main_loop_ivybridge.inc
Normal file
|
@ -0,0 +1,186 @@
|
||||||
|
mov QWORD PTR [rsp+24], rbx
|
||||||
|
push rbp
|
||||||
|
push rsi
|
||||||
|
push rdi
|
||||||
|
push r12
|
||||||
|
push r13
|
||||||
|
push r14
|
||||||
|
push r15
|
||||||
|
sub rsp, 80
|
||||||
|
|
||||||
|
stmxcsr DWORD PTR [rsp]
|
||||||
|
mov DWORD PTR [rsp+4], 24448
|
||||||
|
ldmxcsr DWORD PTR [rsp+4]
|
||||||
|
|
||||||
|
mov rax, QWORD PTR [rcx+48]
|
||||||
|
mov r9, rcx
|
||||||
|
xor rax, QWORD PTR [rcx+16]
|
||||||
|
mov esi, 262144
|
||||||
|
mov r8, QWORD PTR [rcx+32]
|
||||||
|
mov r13d, -2147483647
|
||||||
|
xor r8, QWORD PTR [rcx]
|
||||||
|
mov r11, QWORD PTR [rcx+40]
|
||||||
|
mov r10, r8
|
||||||
|
mov rdx, QWORD PTR [rcx+56]
|
||||||
|
movq xmm4, rax
|
||||||
|
xor rdx, QWORD PTR [rcx+24]
|
||||||
|
xor r11, QWORD PTR [rcx+8]
|
||||||
|
mov rbx, QWORD PTR [rcx+224]
|
||||||
|
mov rax, QWORD PTR [r9+80]
|
||||||
|
xor rax, QWORD PTR [r9+64]
|
||||||
|
movq xmm0, rdx
|
||||||
|
mov rcx, QWORD PTR [rcx+88]
|
||||||
|
xor rcx, QWORD PTR [r9+72]
|
||||||
|
movq xmm3, QWORD PTR [r9+104]
|
||||||
|
movaps XMMWORD PTR [rsp+64], xmm6
|
||||||
|
movaps XMMWORD PTR [rsp+48], xmm7
|
||||||
|
movaps XMMWORD PTR [rsp+32], xmm8
|
||||||
|
and r10d, 2097136
|
||||||
|
movq xmm5, rax
|
||||||
|
|
||||||
|
mov ax, 1023
|
||||||
|
shl rax, 52
|
||||||
|
movq xmm8, rax
|
||||||
|
mov r15, QWORD PTR [r9+96]
|
||||||
|
punpcklqdq xmm4, xmm0
|
||||||
|
movq xmm0, rcx
|
||||||
|
punpcklqdq xmm5, xmm0
|
||||||
|
movdqu xmm6, XMMWORD PTR [r10+rbx]
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
ALIGN 16
|
||||||
|
#else
|
||||||
|
ALIGN 64
|
||||||
|
#endif
|
||||||
|
$main_loop_fast2_ivybridge:
|
||||||
|
lea rdx, QWORD PTR [r10+rbx]
|
||||||
|
mov ecx, r10d
|
||||||
|
mov eax, r10d
|
||||||
|
mov rdi, r15
|
||||||
|
xor ecx, 16
|
||||||
|
xor eax, 32
|
||||||
|
xor r10d, 48
|
||||||
|
movq xmm0, r11
|
||||||
|
movq xmm7, r8
|
||||||
|
punpcklqdq xmm7, xmm0
|
||||||
|
aesenc xmm6, xmm7
|
||||||
|
movq rbp, xmm6
|
||||||
|
mov r9, rbp
|
||||||
|
and r9d, 2097136
|
||||||
|
movdqu xmm2, XMMWORD PTR [rcx+rbx]
|
||||||
|
movdqu xmm1, XMMWORD PTR [rax+rbx]
|
||||||
|
movdqu xmm0, XMMWORD PTR [r10+rbx]
|
||||||
|
paddq xmm1, xmm7
|
||||||
|
paddq xmm0, xmm5
|
||||||
|
paddq xmm2, xmm4
|
||||||
|
movdqu XMMWORD PTR [rcx+rbx], xmm0
|
||||||
|
movdqu XMMWORD PTR [rax+rbx], xmm2
|
||||||
|
movdqu XMMWORD PTR [r10+rbx], xmm1
|
||||||
|
mov r10, r9
|
||||||
|
xor r10d, 32
|
||||||
|
movq rcx, xmm3
|
||||||
|
mov rax, rcx
|
||||||
|
shl rax, 32
|
||||||
|
xor rdi, rax
|
||||||
|
movdqa xmm0, xmm6
|
||||||
|
pxor xmm0, xmm4
|
||||||
|
movdqu XMMWORD PTR [rdx], xmm0
|
||||||
|
xor rdi, QWORD PTR [r9+rbx]
|
||||||
|
lea r14, QWORD PTR [r9+rbx]
|
||||||
|
mov r12, QWORD PTR [r14+8]
|
||||||
|
xor edx, edx
|
||||||
|
lea r9d, DWORD PTR [ecx+ecx]
|
||||||
|
add r9d, ebp
|
||||||
|
movdqa xmm0, xmm6
|
||||||
|
psrldq xmm0, 8
|
||||||
|
or r9d, r13d
|
||||||
|
movq rax, xmm0
|
||||||
|
div r9
|
||||||
|
xorps xmm3, xmm3
|
||||||
|
mov eax, eax
|
||||||
|
shl rdx, 32
|
||||||
|
add rdx, rax
|
||||||
|
lea r9, QWORD PTR [rdx+rbp]
|
||||||
|
mov r15, rdx
|
||||||
|
mov rax, r9
|
||||||
|
shr rax, 12
|
||||||
|
movq xmm0, rax
|
||||||
|
paddq xmm0, xmm8
|
||||||
|
sqrtsd xmm3, xmm0
|
||||||
|
movq rdx, xmm3
|
||||||
|
test edx, 524287
|
||||||
|
je $sqrt_fixup_fast2_ivybridge
|
||||||
|
psrlq xmm3, 19
|
||||||
|
$sqrt_fixup_fast2_ivybridge_ret:
|
||||||
|
|
||||||
|
mov ecx, r10d
|
||||||
|
mov rax, rdi
|
||||||
|
mul rbp
|
||||||
|
movq xmm2, rdx
|
||||||
|
xor rdx, [rcx+rbx]
|
||||||
|
add r8, rdx
|
||||||
|
mov QWORD PTR [r14], r8
|
||||||
|
xor r8, rdi
|
||||||
|
mov edi, r8d
|
||||||
|
and edi, 2097136
|
||||||
|
movq xmm0, rax
|
||||||
|
xor rax, [rcx+rbx+8]
|
||||||
|
add r11, rax
|
||||||
|
mov QWORD PTR [r14+8], r11
|
||||||
|
punpcklqdq xmm2, xmm0
|
||||||
|
|
||||||
|
mov r9d, r10d
|
||||||
|
xor r9d, 48
|
||||||
|
xor r10d, 16
|
||||||
|
pxor xmm2, XMMWORD PTR [r9+rbx]
|
||||||
|
movdqu xmm0, XMMWORD PTR [r10+rbx]
|
||||||
|
paddq xmm0, xmm5
|
||||||
|
movdqu xmm1, XMMWORD PTR [rcx+rbx]
|
||||||
|
paddq xmm2, xmm4
|
||||||
|
paddq xmm1, xmm7
|
||||||
|
movdqa xmm5, xmm4
|
||||||
|
movdqu XMMWORD PTR [r9+rbx], xmm0
|
||||||
|
movdqa xmm4, xmm6
|
||||||
|
movdqu XMMWORD PTR [rcx+rbx], xmm2
|
||||||
|
movdqu XMMWORD PTR [r10+rbx], xmm1
|
||||||
|
movdqu xmm6, [rdi+rbx]
|
||||||
|
mov r10d, edi
|
||||||
|
xor r11, r12
|
||||||
|
dec rsi
|
||||||
|
jne $main_loop_fast2_ivybridge
|
||||||
|
|
||||||
|
ldmxcsr DWORD PTR [rsp]
|
||||||
|
mov rbx, QWORD PTR [rsp+160]
|
||||||
|
movaps xmm6, XMMWORD PTR [rsp+64]
|
||||||
|
movaps xmm7, XMMWORD PTR [rsp+48]
|
||||||
|
movaps xmm8, XMMWORD PTR [rsp+32]
|
||||||
|
add rsp, 80
|
||||||
|
pop r15
|
||||||
|
pop r14
|
||||||
|
pop r13
|
||||||
|
pop r12
|
||||||
|
pop rdi
|
||||||
|
pop rsi
|
||||||
|
pop rbp
|
||||||
|
jmp $cnv2_main_loop_fast2_ivybridge_endp
|
||||||
|
|
||||||
|
$sqrt_fixup_fast2_ivybridge:
|
||||||
|
dec rdx
|
||||||
|
mov r13d, -1022
|
||||||
|
shl r13, 32
|
||||||
|
mov rax, rdx
|
||||||
|
shr rdx, 19
|
||||||
|
shr rax, 20
|
||||||
|
mov rcx, rdx
|
||||||
|
sub rcx, rax
|
||||||
|
add rax, r13
|
||||||
|
not r13
|
||||||
|
sub rcx, r13
|
||||||
|
mov r13d, -2147483647
|
||||||
|
imul rcx, rax
|
||||||
|
sub rcx, r9
|
||||||
|
adc rdx, 0
|
||||||
|
movq xmm3, rdx
|
||||||
|
jmp $sqrt_fixup_fast2_ivybridge_ret
|
||||||
|
|
||||||
|
$cnv2_main_loop_fast2_ivybridge_endp:
|
183
src/crypto/asm/cn_fastv2_main_loop_ryzen.inc
Normal file
183
src/crypto/asm/cn_fastv2_main_loop_ryzen.inc
Normal file
|
@ -0,0 +1,183 @@
|
||||||
|
mov QWORD PTR [rsp+16], rbx
|
||||||
|
mov QWORD PTR [rsp+24], rbp
|
||||||
|
mov QWORD PTR [rsp+32], rsi
|
||||||
|
push rdi
|
||||||
|
push r12
|
||||||
|
push r13
|
||||||
|
push r14
|
||||||
|
push r15
|
||||||
|
sub rsp, 64
|
||||||
|
|
||||||
|
stmxcsr DWORD PTR [rsp]
|
||||||
|
mov DWORD PTR [rsp+4], 24448
|
||||||
|
ldmxcsr DWORD PTR [rsp+4]
|
||||||
|
|
||||||
|
mov rax, QWORD PTR [rcx+48]
|
||||||
|
mov r9, rcx
|
||||||
|
xor rax, QWORD PTR [rcx+16]
|
||||||
|
mov ebp, 262144
|
||||||
|
mov r8, QWORD PTR [rcx+32]
|
||||||
|
xor r8, QWORD PTR [rcx]
|
||||||
|
mov r11, QWORD PTR [rcx+40]
|
||||||
|
mov r10, r8
|
||||||
|
mov rdx, QWORD PTR [rcx+56]
|
||||||
|
movq xmm3, rax
|
||||||
|
xor rdx, QWORD PTR [rcx+24]
|
||||||
|
xor r11, QWORD PTR [rcx+8]
|
||||||
|
mov rbx, QWORD PTR [rcx+224]
|
||||||
|
mov rax, QWORD PTR [r9+80]
|
||||||
|
xor rax, QWORD PTR [r9+64]
|
||||||
|
movq xmm0, rdx
|
||||||
|
mov rcx, QWORD PTR [rcx+88]
|
||||||
|
xor rcx, QWORD PTR [r9+72]
|
||||||
|
mov rdi, QWORD PTR [r9+104]
|
||||||
|
and r10d, 2097136
|
||||||
|
movaps XMMWORD PTR [rsp+48], xmm6
|
||||||
|
movq xmm4, rax
|
||||||
|
movaps XMMWORD PTR [rsp+32], xmm7
|
||||||
|
movaps XMMWORD PTR [rsp+16], xmm8
|
||||||
|
xorps xmm8, xmm8
|
||||||
|
mov ax, 1023
|
||||||
|
shl rax, 52
|
||||||
|
movq xmm7, rax
|
||||||
|
mov r15, QWORD PTR [r9+96]
|
||||||
|
punpcklqdq xmm3, xmm0
|
||||||
|
movq xmm0, rcx
|
||||||
|
punpcklqdq xmm4, xmm0
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
ALIGN 16
|
||||||
|
#else
|
||||||
|
ALIGN 64
|
||||||
|
#endif
|
||||||
|
$main_loop_fast2_ryzen:
|
||||||
|
movdqa xmm5, XMMWORD PTR [r10+rbx]
|
||||||
|
movq xmm0, r11
|
||||||
|
movq xmm6, r8
|
||||||
|
punpcklqdq xmm6, xmm0
|
||||||
|
lea rdx, QWORD PTR [r10+rbx]
|
||||||
|
lea r9, QWORD PTR [rdi+rdi]
|
||||||
|
shl rdi, 32
|
||||||
|
|
||||||
|
mov ecx, r10d
|
||||||
|
mov eax, r10d
|
||||||
|
xor ecx, 16
|
||||||
|
xor eax, 32
|
||||||
|
xor r10d, 48
|
||||||
|
aesenc xmm5, xmm6
|
||||||
|
movdqa xmm2, XMMWORD PTR [rcx+rbx]
|
||||||
|
movdqa xmm1, XMMWORD PTR [rax+rbx]
|
||||||
|
movdqa xmm0, XMMWORD PTR [r10+rbx]
|
||||||
|
paddq xmm2, xmm3
|
||||||
|
paddq xmm1, xmm6
|
||||||
|
paddq xmm0, xmm4
|
||||||
|
movdqa XMMWORD PTR [rcx+rbx], xmm0
|
||||||
|
movdqa XMMWORD PTR [rax+rbx], xmm2
|
||||||
|
movdqa XMMWORD PTR [r10+rbx], xmm1
|
||||||
|
|
||||||
|
movaps xmm1, xmm8
|
||||||
|
mov rsi, r15
|
||||||
|
xor rsi, rdi
|
||||||
|
movq r14, xmm5
|
||||||
|
movdqa xmm0, xmm5
|
||||||
|
pxor xmm0, xmm3
|
||||||
|
mov r10, r14
|
||||||
|
and r10d, 2097136
|
||||||
|
movdqa XMMWORD PTR [rdx], xmm0
|
||||||
|
xor rsi, QWORD PTR [r10+rbx]
|
||||||
|
lea r12, QWORD PTR [r10+rbx]
|
||||||
|
mov r13, QWORD PTR [r10+rbx+8]
|
||||||
|
|
||||||
|
add r9d, r14d
|
||||||
|
or r9d, -2147483647
|
||||||
|
xor edx, edx
|
||||||
|
movdqa xmm0, xmm5
|
||||||
|
psrldq xmm0, 8
|
||||||
|
movq rax, xmm0
|
||||||
|
|
||||||
|
div r9
|
||||||
|
movq xmm0, rax
|
||||||
|
movq xmm1, rdx
|
||||||
|
punpckldq xmm0, xmm1
|
||||||
|
movq r15, xmm0
|
||||||
|
paddq xmm0, xmm5
|
||||||
|
movdqa xmm2, xmm0
|
||||||
|
psrlq xmm0, 12
|
||||||
|
paddq xmm0, xmm7
|
||||||
|
sqrtsd xmm1, xmm0
|
||||||
|
movq rdi, xmm1
|
||||||
|
test rdi, 524287
|
||||||
|
je $sqrt_fixup_fast2_ryzen
|
||||||
|
shr rdi, 19
|
||||||
|
|
||||||
|
$sqrt_fixup_fast2_ryzen_ret:
|
||||||
|
mov rax, rsi
|
||||||
|
mul r14
|
||||||
|
movq xmm1, rax
|
||||||
|
movq xmm0, rdx
|
||||||
|
punpcklqdq xmm0, xmm1
|
||||||
|
|
||||||
|
mov r9d, r10d
|
||||||
|
mov ecx, r10d
|
||||||
|
xor r9d, 16
|
||||||
|
xor ecx, 32
|
||||||
|
xor r10d, 48
|
||||||
|
movdqa xmm1, XMMWORD PTR [rcx+rbx]
|
||||||
|
xor rdx, [rcx+rbx]
|
||||||
|
xor rax, [rcx+rbx+8]
|
||||||
|
movdqa xmm2, XMMWORD PTR [r9+rbx]
|
||||||
|
pxor xmm2, xmm0
|
||||||
|
paddq xmm4, XMMWORD PTR [r10+rbx]
|
||||||
|
paddq xmm2, xmm3
|
||||||
|
paddq xmm1, xmm6
|
||||||
|
movdqa XMMWORD PTR [r9+rbx], xmm4
|
||||||
|
movdqa XMMWORD PTR [rcx+rbx], xmm2
|
||||||
|
movdqa XMMWORD PTR [r10+rbx], xmm1
|
||||||
|
|
||||||
|
movdqa xmm4, xmm3
|
||||||
|
add r8, rdx
|
||||||
|
add r11, rax
|
||||||
|
mov QWORD PTR [r12], r8
|
||||||
|
xor r8, rsi
|
||||||
|
mov QWORD PTR [r12+8], r11
|
||||||
|
mov r10, r8
|
||||||
|
xor r11, r13
|
||||||
|
and r10d, 2097136
|
||||||
|
movdqa xmm3, xmm5
|
||||||
|
dec ebp
|
||||||
|
jne $main_loop_fast2_ryzen
|
||||||
|
|
||||||
|
ldmxcsr DWORD PTR [rsp]
|
||||||
|
movaps xmm6, XMMWORD PTR [rsp+48]
|
||||||
|
lea r11, QWORD PTR [rsp+64]
|
||||||
|
mov rbx, QWORD PTR [r11+56]
|
||||||
|
mov rbp, QWORD PTR [r11+64]
|
||||||
|
mov rsi, QWORD PTR [r11+72]
|
||||||
|
movaps xmm8, XMMWORD PTR [r11-48]
|
||||||
|
movaps xmm7, XMMWORD PTR [rsp+32]
|
||||||
|
mov rsp, r11
|
||||||
|
pop r15
|
||||||
|
pop r14
|
||||||
|
pop r13
|
||||||
|
pop r12
|
||||||
|
pop rdi
|
||||||
|
jmp $cnv2_main_loop_fast2_ryzen_endp
|
||||||
|
|
||||||
|
$sqrt_fixup_fast2_ryzen:
|
||||||
|
movq r9, xmm2
|
||||||
|
dec rdi
|
||||||
|
mov edx, -1022
|
||||||
|
shl rdx, 32
|
||||||
|
mov rax, rdi
|
||||||
|
shr rdi, 19
|
||||||
|
shr rax, 20
|
||||||
|
mov rcx, rdi
|
||||||
|
sub rcx, rax
|
||||||
|
lea rcx, [rcx+rdx+1]
|
||||||
|
add rax, rdx
|
||||||
|
imul rcx, rax
|
||||||
|
sub rcx, r9
|
||||||
|
adc rdi, 0
|
||||||
|
jmp $sqrt_fixup_fast2_ryzen_ret
|
||||||
|
|
||||||
|
$cnv2_main_loop_fast2_ryzen_endp:
|
271
src/crypto/asm/cn_fastv2_mainloop_soft_aes_sandybridge.inc
Normal file
271
src/crypto/asm/cn_fastv2_mainloop_soft_aes_sandybridge.inc
Normal file
|
@ -0,0 +1,271 @@
|
||||||
|
mov QWORD PTR [rsp+8], rcx
|
||||||
|
push rbx
|
||||||
|
push rbp
|
||||||
|
push rsi
|
||||||
|
push rdi
|
||||||
|
push r12
|
||||||
|
push r13
|
||||||
|
push r14
|
||||||
|
push r15
|
||||||
|
sub rsp, 152
|
||||||
|
|
||||||
|
stmxcsr DWORD PTR [rsp+4]
|
||||||
|
mov DWORD PTR [rsp], 24448
|
||||||
|
ldmxcsr DWORD PTR [rsp]
|
||||||
|
|
||||||
|
mov rax, QWORD PTR [rcx+48]
|
||||||
|
mov r10, rcx
|
||||||
|
xor rax, QWORD PTR [rcx+16]
|
||||||
|
mov r8, QWORD PTR [rcx+32]
|
||||||
|
xor r8, QWORD PTR [rcx]
|
||||||
|
mov r9, QWORD PTR [rcx+40]
|
||||||
|
xor r9, QWORD PTR [rcx+8]
|
||||||
|
movq xmm4, rax
|
||||||
|
mov rdx, QWORD PTR [rcx+56]
|
||||||
|
xor rdx, QWORD PTR [rcx+24]
|
||||||
|
mov r11, QWORD PTR [rcx+224]
|
||||||
|
mov rcx, QWORD PTR [rcx+88]
|
||||||
|
xor rcx, QWORD PTR [r10+72]
|
||||||
|
mov rax, QWORD PTR [r10+80]
|
||||||
|
movq xmm0, rdx
|
||||||
|
xor rax, QWORD PTR [r10+64]
|
||||||
|
|
||||||
|
movaps XMMWORD PTR [rsp+16], xmm6
|
||||||
|
movaps XMMWORD PTR [rsp+32], xmm7
|
||||||
|
movaps XMMWORD PTR [rsp+48], xmm8
|
||||||
|
movaps XMMWORD PTR [rsp+64], xmm9
|
||||||
|
movaps XMMWORD PTR [rsp+80], xmm10
|
||||||
|
movaps XMMWORD PTR [rsp+96], xmm11
|
||||||
|
movaps XMMWORD PTR [rsp+112], xmm12
|
||||||
|
movaps XMMWORD PTR [rsp+128], xmm13
|
||||||
|
|
||||||
|
movq xmm5, rax
|
||||||
|
|
||||||
|
mov ax, 1023
|
||||||
|
shl rax, 52
|
||||||
|
movq xmm8, rax
|
||||||
|
|
||||||
|
mov rax, r8
|
||||||
|
punpcklqdq xmm4, xmm0
|
||||||
|
and eax, 2097136
|
||||||
|
movq xmm10, QWORD PTR [r10+96]
|
||||||
|
movq xmm0, rcx
|
||||||
|
mov rcx, QWORD PTR [r10+104]
|
||||||
|
xorps xmm9, xmm9
|
||||||
|
mov QWORD PTR [rsp+248], rax
|
||||||
|
movq xmm12, r11
|
||||||
|
mov QWORD PTR [rsp+240], r9
|
||||||
|
punpcklqdq xmm5, xmm0
|
||||||
|
movq xmm13, rcx
|
||||||
|
mov r12d, 262144
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
ALIGN 16
|
||||||
|
#else
|
||||||
|
ALIGN 64
|
||||||
|
#endif
|
||||||
|
cnv2_mainloop_soft_aes_fast2_sandybridge:
|
||||||
|
movd xmm11, r12d
|
||||||
|
mov r12, QWORD PTR [r10+272]
|
||||||
|
lea r13, QWORD PTR [rax+r11]
|
||||||
|
mov esi, DWORD PTR [r13]
|
||||||
|
movq xmm0, r9
|
||||||
|
mov r10d, DWORD PTR [r13+4]
|
||||||
|
movq xmm7, r8
|
||||||
|
mov ebp, DWORD PTR [r13+12]
|
||||||
|
mov r14d, DWORD PTR [r13+8]
|
||||||
|
mov rdx, QWORD PTR [rsp+248]
|
||||||
|
movzx ecx, sil
|
||||||
|
shr esi, 8
|
||||||
|
punpcklqdq xmm7, xmm0
|
||||||
|
mov r15d, DWORD PTR [r12+rcx*4]
|
||||||
|
movzx ecx, r10b
|
||||||
|
shr r10d, 8
|
||||||
|
mov edi, DWORD PTR [r12+rcx*4]
|
||||||
|
movzx ecx, r14b
|
||||||
|
shr r14d, 8
|
||||||
|
mov ebx, DWORD PTR [r12+rcx*4]
|
||||||
|
movzx ecx, bpl
|
||||||
|
shr ebp, 8
|
||||||
|
mov r9d, DWORD PTR [r12+rcx*4]
|
||||||
|
movzx ecx, r10b
|
||||||
|
shr r10d, 8
|
||||||
|
xor r15d, DWORD PTR [r12+rcx*4+1024]
|
||||||
|
movzx ecx, r14b
|
||||||
|
shr r14d, 8
|
||||||
|
mov eax, r14d
|
||||||
|
shr eax, 8
|
||||||
|
xor edi, DWORD PTR [r12+rcx*4+1024]
|
||||||
|
add eax, 256
|
||||||
|
movzx ecx, bpl
|
||||||
|
shr ebp, 8
|
||||||
|
xor ebx, DWORD PTR [r12+rcx*4+1024]
|
||||||
|
movzx ecx, sil
|
||||||
|
shr esi, 8
|
||||||
|
xor r9d, DWORD PTR [r12+rcx*4+1024]
|
||||||
|
add r12, 2048
|
||||||
|
movzx ecx, r10b
|
||||||
|
shr r10d, 8
|
||||||
|
add r10d, 256
|
||||||
|
mov r11d, DWORD PTR [r12+rax*4]
|
||||||
|
xor r11d, DWORD PTR [r12+rcx*4]
|
||||||
|
xor r11d, r9d
|
||||||
|
movzx ecx, sil
|
||||||
|
mov r10d, DWORD PTR [r12+r10*4]
|
||||||
|
shr esi, 8
|
||||||
|
add esi, 256
|
||||||
|
xor r10d, DWORD PTR [r12+rcx*4]
|
||||||
|
movzx ecx, bpl
|
||||||
|
xor r10d, ebx
|
||||||
|
shr ebp, 8
|
||||||
|
movd xmm1, r11d
|
||||||
|
add ebp, 256
|
||||||
|
movq r11, xmm12
|
||||||
|
mov r9d, DWORD PTR [r12+rcx*4]
|
||||||
|
xor r9d, DWORD PTR [r12+rsi*4]
|
||||||
|
mov eax, DWORD PTR [r12+rbp*4]
|
||||||
|
xor r9d, edi
|
||||||
|
movzx ecx, r14b
|
||||||
|
movd xmm0, r10d
|
||||||
|
movd xmm2, r9d
|
||||||
|
xor eax, DWORD PTR [r12+rcx*4]
|
||||||
|
mov rcx, rdx
|
||||||
|
xor eax, r15d
|
||||||
|
punpckldq xmm2, xmm1
|
||||||
|
xor rcx, 16
|
||||||
|
movd xmm6, eax
|
||||||
|
mov rax, rdx
|
||||||
|
punpckldq xmm6, xmm0
|
||||||
|
xor rax, 32
|
||||||
|
punpckldq xmm6, xmm2
|
||||||
|
xor rdx, 48
|
||||||
|
movdqu xmm2, XMMWORD PTR [rcx+r11]
|
||||||
|
pxor xmm6, xmm7
|
||||||
|
paddq xmm2, xmm4
|
||||||
|
movdqu xmm1, XMMWORD PTR [rax+r11]
|
||||||
|
movdqu xmm0, XMMWORD PTR [rdx+r11]
|
||||||
|
paddq xmm0, xmm5
|
||||||
|
movdqu XMMWORD PTR [rcx+r11], xmm0
|
||||||
|
movdqu XMMWORD PTR [rax+r11], xmm2
|
||||||
|
movq rcx, xmm13
|
||||||
|
paddq xmm1, xmm7
|
||||||
|
movdqu XMMWORD PTR [rdx+r11], xmm1
|
||||||
|
movq rdi, xmm6
|
||||||
|
mov r10, rdi
|
||||||
|
and r10d, 2097136
|
||||||
|
xor edx, edx
|
||||||
|
mov rax, rcx
|
||||||
|
shl rax, 32
|
||||||
|
movq rbx, xmm10
|
||||||
|
xor rbx, rax
|
||||||
|
lea r9, QWORD PTR [rcx+rcx]
|
||||||
|
add r9d, edi
|
||||||
|
movdqa xmm0, xmm6
|
||||||
|
pxor xmm0, xmm4
|
||||||
|
mov ecx, -2147483647
|
||||||
|
movdqu XMMWORD PTR [r13], xmm0
|
||||||
|
or r9, rcx
|
||||||
|
movdqa xmm0, xmm6
|
||||||
|
movaps xmm1, xmm9
|
||||||
|
psrldq xmm0, 8
|
||||||
|
movq rax, xmm0
|
||||||
|
xor rbx, QWORD PTR [r10+r11]
|
||||||
|
lea r14, QWORD PTR [r10+r11]
|
||||||
|
mov rbp, QWORD PTR [r14+8]
|
||||||
|
div r9
|
||||||
|
shl rdx, 32
|
||||||
|
mov eax, eax
|
||||||
|
add rdx, rax
|
||||||
|
lea r9, QWORD PTR [rdx+rdi]
|
||||||
|
movq xmm10, rdx
|
||||||
|
mov rax, r9
|
||||||
|
shr rax, 12
|
||||||
|
movq xmm0, rax
|
||||||
|
paddq xmm0, xmm8
|
||||||
|
sqrtsd xmm1, xmm0
|
||||||
|
movq rdx, xmm1
|
||||||
|
test rdx, 524287
|
||||||
|
je sqrt_fixup_soft_aes_fast2_sandybridge
|
||||||
|
psrlq xmm1, 19
|
||||||
|
sqrt_fixup_soft_aes_fast2_sandybridge_ret:
|
||||||
|
|
||||||
|
mov r9, r10
|
||||||
|
movdqa xmm13, xmm1
|
||||||
|
xor r9, 16
|
||||||
|
mov rcx, r10
|
||||||
|
xor rcx, 32
|
||||||
|
xor r10, 48
|
||||||
|
mov rax, rbx
|
||||||
|
mul rdi
|
||||||
|
movdqu xmm2, XMMWORD PTR [r9+r11]
|
||||||
|
movdqu xmm1, XMMWORD PTR [rcx+r11]
|
||||||
|
paddq xmm1, xmm7
|
||||||
|
movq xmm0, rax
|
||||||
|
movq xmm3, rdx
|
||||||
|
xor rax, QWORD PTR [r11+rcx+8]
|
||||||
|
xor rdx, QWORD PTR [rcx+r11]
|
||||||
|
punpcklqdq xmm3, xmm0
|
||||||
|
add r8, rdx
|
||||||
|
movdqu xmm0, XMMWORD PTR [r10+r11]
|
||||||
|
pxor xmm2, xmm3
|
||||||
|
paddq xmm0, xmm5
|
||||||
|
paddq xmm2, xmm4
|
||||||
|
movdqu XMMWORD PTR [r9+r11], xmm0
|
||||||
|
movdqa xmm5, xmm4
|
||||||
|
mov r9, QWORD PTR [rsp+240]
|
||||||
|
movdqa xmm4, xmm6
|
||||||
|
add r9, rax
|
||||||
|
movdqu XMMWORD PTR [rcx+r11], xmm2
|
||||||
|
movdqu XMMWORD PTR [r10+r11], xmm1
|
||||||
|
mov r10, QWORD PTR [rsp+224]
|
||||||
|
movd r12d, xmm11
|
||||||
|
mov QWORD PTR [r14], r8
|
||||||
|
xor r8, rbx
|
||||||
|
mov rax, r8
|
||||||
|
mov QWORD PTR [r14+8], r9
|
||||||
|
and eax, 2097136
|
||||||
|
xor r9, rbp
|
||||||
|
mov QWORD PTR [rsp+240], r9
|
||||||
|
mov QWORD PTR [rsp+248], rax
|
||||||
|
sub r12d, 1
|
||||||
|
jne cnv2_mainloop_soft_aes_fast2_sandybridge
|
||||||
|
|
||||||
|
ldmxcsr DWORD PTR [rsp+4]
|
||||||
|
movaps xmm6, XMMWORD PTR [rsp+16]
|
||||||
|
movaps xmm7, XMMWORD PTR [rsp+32]
|
||||||
|
movaps xmm8, XMMWORD PTR [rsp+48]
|
||||||
|
movaps xmm9, XMMWORD PTR [rsp+64]
|
||||||
|
movaps xmm10, XMMWORD PTR [rsp+80]
|
||||||
|
movaps xmm11, XMMWORD PTR [rsp+96]
|
||||||
|
movaps xmm12, XMMWORD PTR [rsp+112]
|
||||||
|
movaps xmm13, XMMWORD PTR [rsp+128]
|
||||||
|
|
||||||
|
add rsp, 152
|
||||||
|
pop r15
|
||||||
|
pop r14
|
||||||
|
pop r13
|
||||||
|
pop r12
|
||||||
|
pop rdi
|
||||||
|
pop rsi
|
||||||
|
pop rbp
|
||||||
|
pop rbx
|
||||||
|
jmp cnv2_mainloop_soft_aes_fast2_sandybridge_asm_endp
|
||||||
|
|
||||||
|
sqrt_fixup_soft_aes_fast2_sandybridge:
|
||||||
|
dec rdx
|
||||||
|
mov r15d, -1022
|
||||||
|
shl r15, 32
|
||||||
|
mov rax, rdx
|
||||||
|
shr rdx, 19
|
||||||
|
shr rax, 20
|
||||||
|
mov rcx, rdx
|
||||||
|
sub rcx, rax
|
||||||
|
lea rcx, [rcx+r15+1]
|
||||||
|
add rax, r15
|
||||||
|
imul rcx, rax
|
||||||
|
sub rcx, r9
|
||||||
|
adc rdx, 0
|
||||||
|
movq xmm1, rdx
|
||||||
|
jmp sqrt_fixup_soft_aes_fast2_sandybridge_ret
|
||||||
|
|
||||||
|
cnv2_mainloop_soft_aes_fast2_sandybridge_asm_endp:
|
74
src/crypto/asm/cn_liteupx_mainloop_sandybridge.inc
Normal file
74
src/crypto/asm/cn_liteupx_mainloop_sandybridge.inc
Normal file
|
@ -0,0 +1,74 @@
|
||||||
|
mov QWORD PTR [rsp+8], rbx
|
||||||
|
mov QWORD PTR [rsp+16], rbp
|
||||||
|
mov QWORD PTR [rsp+24], rsi
|
||||||
|
mov QWORD PTR [rsp+32], rdi
|
||||||
|
push r14
|
||||||
|
push r15
|
||||||
|
mov rax, QWORD PTR [rcx+48]
|
||||||
|
mov ebp, 131072
|
||||||
|
xor rax, QWORD PTR [rcx+16]
|
||||||
|
mov rdx, QWORD PTR [rcx+56]
|
||||||
|
xor rdx, QWORD PTR [rcx+24]
|
||||||
|
mov r8, QWORD PTR [rcx+32]
|
||||||
|
xor r8, QWORD PTR [rcx]
|
||||||
|
movq xmm3, rax
|
||||||
|
mov rax, QWORD PTR [rcx+256]
|
||||||
|
mov rdi, QWORD PTR [rcx+40]
|
||||||
|
movq xmm0, rdx
|
||||||
|
xor rdi, QWORD PTR [rcx+8]
|
||||||
|
mov rdx, r8
|
||||||
|
mov r15, QWORD PTR [rcx+264]
|
||||||
|
and edx, 1048560
|
||||||
|
mov r14, QWORD PTR [rax+35]
|
||||||
|
xor r14, QWORD PTR [rcx+192]
|
||||||
|
mov rsi, QWORD PTR [rcx+224]
|
||||||
|
punpcklqdq xmm3, xmm0
|
||||||
|
movdqu xmm2, XMMWORD PTR [rdx+rsi]
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
ALIGN 16
|
||||||
|
#else
|
||||||
|
ALIGN 64
|
||||||
|
#endif
|
||||||
|
cn_liteupx_mainloop_sandybridge:
|
||||||
|
movq xmm0, rdi
|
||||||
|
movq xmm1, r8
|
||||||
|
punpcklqdq xmm1, xmm0
|
||||||
|
aesenc xmm2, xmm1
|
||||||
|
movq r10, xmm2
|
||||||
|
mov r9d, r10d
|
||||||
|
and r9d, 1048560
|
||||||
|
add r9, rsi
|
||||||
|
movdqa xmm0, xmm2
|
||||||
|
pxor xmm0, xmm3
|
||||||
|
movdqa xmm3, xmm2
|
||||||
|
movdqu XMMWORD PTR [rdx+rsi], xmm0
|
||||||
|
psrldq xmm0, 11
|
||||||
|
movq rax, xmm0
|
||||||
|
movzx eax, al
|
||||||
|
movzx eax, BYTE PTR [rax+r15]
|
||||||
|
mov BYTE PTR [rsi+rdx+11], al
|
||||||
|
mov rbx, QWORD PTR [r9]
|
||||||
|
mov r11, QWORD PTR [r9+8]
|
||||||
|
mov rax, rbx
|
||||||
|
mul r10
|
||||||
|
add r8, rdx
|
||||||
|
mov QWORD PTR [r9], r8
|
||||||
|
add rdi, rax
|
||||||
|
mov rax, r14
|
||||||
|
xor rax, rdi
|
||||||
|
mov QWORD PTR [r9+8], rax
|
||||||
|
xor r8, rbx
|
||||||
|
mov rdx, r8
|
||||||
|
and edx, 1048560
|
||||||
|
movdqu xmm2, XMMWORD PTR [rdx+rsi]
|
||||||
|
xor rdi, r11
|
||||||
|
dec ebp
|
||||||
|
jne cn_liteupx_mainloop_sandybridge
|
||||||
|
|
||||||
|
mov rbx, QWORD PTR [rsp+24]
|
||||||
|
mov rbp, QWORD PTR [rsp+32]
|
||||||
|
mov rsi, QWORD PTR [rsp+40]
|
||||||
|
mov rdi, QWORD PTR [rsp+48]
|
||||||
|
pop r15
|
||||||
|
pop r14
|
166
src/crypto/asm/cn_liteupx_mainloop_soft_aes_sandybridge.inc
Normal file
166
src/crypto/asm/cn_liteupx_mainloop_soft_aes_sandybridge.inc
Normal file
|
@ -0,0 +1,166 @@
|
||||||
|
push rbx
|
||||||
|
push rbp
|
||||||
|
push rsi
|
||||||
|
push rdi
|
||||||
|
push r12
|
||||||
|
push r13
|
||||||
|
push r14
|
||||||
|
push r15
|
||||||
|
sub rsp, 72
|
||||||
|
|
||||||
|
movaps XMMWORD PTR [rsp], xmm6
|
||||||
|
movaps XMMWORD PTR [rsp+16], xmm7
|
||||||
|
movaps XMMWORD PTR [rsp+32], xmm8
|
||||||
|
movaps XMMWORD PTR [rsp+48], xmm9
|
||||||
|
|
||||||
|
mov rax, QWORD PTR [rcx+48]
|
||||||
|
xor rax, QWORD PTR [rcx+16]
|
||||||
|
mov rdx, QWORD PTR [rcx+56]
|
||||||
|
xor rdx, QWORD PTR [rcx+24]
|
||||||
|
mov r8, QWORD PTR [rcx+32]
|
||||||
|
xor r8, QWORD PTR [rcx]
|
||||||
|
movq xmm4, rax
|
||||||
|
mov rax, QWORD PTR [rcx+256]
|
||||||
|
mov r13, QWORD PTR [rcx+40]
|
||||||
|
movq xmm0, rdx
|
||||||
|
xor r13, QWORD PTR [rcx+8]
|
||||||
|
mov rdx, r8
|
||||||
|
mov rdi, QWORD PTR [rcx+224]
|
||||||
|
and edx, 1048560
|
||||||
|
mov rax, QWORD PTR [rax+35]
|
||||||
|
xor rax, QWORD PTR [rcx+192]
|
||||||
|
movq xmm5, rax
|
||||||
|
movq xmm8, rdi
|
||||||
|
punpcklqdq xmm4, xmm0
|
||||||
|
mov QWORD PTR [rsp+64], rdx
|
||||||
|
|
||||||
|
movq xmm6, rcx
|
||||||
|
mov rax, QWORD PTR [rcx+264]
|
||||||
|
movq xmm7, rax
|
||||||
|
|
||||||
|
mov eax, 131072
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
ALIGN 16
|
||||||
|
#else
|
||||||
|
ALIGN 64
|
||||||
|
#endif
|
||||||
|
cn_liteupx_mainloop_soft_aes_sandybridge:
|
||||||
|
movq xmm9, rax
|
||||||
|
mov r12, QWORD PTR [rcx+272]
|
||||||
|
mov esi, DWORD PTR [rdx+rdi]
|
||||||
|
mov r10d, DWORD PTR [rdx+rdi+4]
|
||||||
|
mov ebp, DWORD PTR [rdx+rdi+12]
|
||||||
|
mov r14d, DWORD PTR [rdx+rdi+8]
|
||||||
|
mov rdx, QWORD PTR [rsp+64]
|
||||||
|
movzx ecx, sil
|
||||||
|
shr esi, 8
|
||||||
|
mov r15d, DWORD PTR [r12+rcx*4]
|
||||||
|
movzx ecx, r10b
|
||||||
|
shr r10d, 8
|
||||||
|
mov edi, DWORD PTR [r12+rcx*4]
|
||||||
|
movzx ecx, r14b
|
||||||
|
shr r14d, 8
|
||||||
|
mov ebx, DWORD PTR [r12+rcx*4]
|
||||||
|
movzx ecx, bpl
|
||||||
|
shr ebp, 8
|
||||||
|
mov r9d, DWORD PTR [r12+rcx*4]
|
||||||
|
movzx ecx, r10b
|
||||||
|
shr r10d, 8
|
||||||
|
xor r15d, DWORD PTR [r12+rcx*4+1024]
|
||||||
|
movzx ecx, r14b
|
||||||
|
shr r14d, 8
|
||||||
|
mov eax, r14d
|
||||||
|
shr eax, 8
|
||||||
|
xor edi, DWORD PTR [r12+rcx*4+1024]
|
||||||
|
add eax, 256
|
||||||
|
movzx ecx, bpl
|
||||||
|
shr ebp, 8
|
||||||
|
xor ebx, DWORD PTR [r12+rcx*4+1024]
|
||||||
|
movzx ecx, sil
|
||||||
|
shr esi, 8
|
||||||
|
xor r9d, DWORD PTR [r12+rcx*4+1024]
|
||||||
|
add r12, 2048
|
||||||
|
movzx ecx, r10b
|
||||||
|
shr r10d, 8
|
||||||
|
add r10d, 256
|
||||||
|
mov r11d, DWORD PTR [r12+rax*4]
|
||||||
|
xor r11d, DWORD PTR [r12+rcx*4]
|
||||||
|
xor r11d, r9d
|
||||||
|
movzx ecx, sil
|
||||||
|
mov r10d, DWORD PTR [r12+r10*4]
|
||||||
|
shr esi, 8
|
||||||
|
add esi, 256
|
||||||
|
xor r10d, DWORD PTR [r12+rcx*4]
|
||||||
|
movzx ecx, bpl
|
||||||
|
xor r10d, ebx
|
||||||
|
shr ebp, 8
|
||||||
|
add ebp, 256
|
||||||
|
movd xmm1, r11d
|
||||||
|
mov r9d, DWORD PTR [r12+rcx*4]
|
||||||
|
xor r9d, DWORD PTR [r12+rsi*4]
|
||||||
|
mov eax, DWORD PTR [r12+rbp*4]
|
||||||
|
xor r9d, edi
|
||||||
|
movq rdi, xmm8
|
||||||
|
movzx ecx, r14b
|
||||||
|
movd xmm0, r10d
|
||||||
|
movd xmm2, r9d
|
||||||
|
punpckldq xmm2, xmm1
|
||||||
|
movq xmm1, r8
|
||||||
|
xor eax, DWORD PTR [r12+rcx*4]
|
||||||
|
xor eax, r15d
|
||||||
|
movd xmm3, eax
|
||||||
|
movq rax, xmm7
|
||||||
|
punpckldq xmm3, xmm0
|
||||||
|
movq xmm0, r13
|
||||||
|
punpcklqdq xmm1, xmm0
|
||||||
|
punpckldq xmm3, xmm2
|
||||||
|
pxor xmm3, xmm1
|
||||||
|
movq r9, xmm3
|
||||||
|
mov r10d, r9d
|
||||||
|
and r10d, 1048560
|
||||||
|
movdqa xmm0, xmm3
|
||||||
|
pxor xmm0, xmm4
|
||||||
|
movdqu XMMWORD PTR [rdx+rdi], xmm0
|
||||||
|
psrldq xmm0, 11
|
||||||
|
movq rcx, xmm0
|
||||||
|
movzx ecx, cl
|
||||||
|
mov cl, BYTE PTR [rcx+rax]
|
||||||
|
mov BYTE PTR [rdi+rdx+11], cl
|
||||||
|
mov rbx, QWORD PTR [r10+rdi]
|
||||||
|
mov rcx, r9
|
||||||
|
lea r9, QWORD PTR [r10+rdi]
|
||||||
|
mov r11, QWORD PTR [r9+8]
|
||||||
|
mov rax, rbx
|
||||||
|
movdqa xmm4, xmm3
|
||||||
|
mul rcx
|
||||||
|
movq rcx, xmm6
|
||||||
|
add r8, rdx
|
||||||
|
add r13, rax
|
||||||
|
movq rax, xmm5
|
||||||
|
xor rax, r13
|
||||||
|
mov QWORD PTR [r9], r8
|
||||||
|
xor r8, rbx
|
||||||
|
mov QWORD PTR [r9+8], rax
|
||||||
|
movq rax, xmm9
|
||||||
|
mov rdx, r8
|
||||||
|
xor r13, r11
|
||||||
|
and edx, 1048560
|
||||||
|
mov QWORD PTR [rsp+64], rdx
|
||||||
|
sub eax, 1
|
||||||
|
jne cn_liteupx_mainloop_soft_aes_sandybridge
|
||||||
|
|
||||||
|
movaps xmm6, XMMWORD PTR [rsp]
|
||||||
|
movaps xmm7, XMMWORD PTR [rsp+16]
|
||||||
|
movaps xmm8, XMMWORD PTR [rsp+32]
|
||||||
|
movaps xmm9, XMMWORD PTR [rsp+48]
|
||||||
|
|
||||||
|
add rsp, 72
|
||||||
|
pop r15
|
||||||
|
pop r14
|
||||||
|
pop r13
|
||||||
|
pop r12
|
||||||
|
pop rdi
|
||||||
|
pop rsi
|
||||||
|
pop rbp
|
||||||
|
pop rbx
|
|
@ -14,11 +14,18 @@
|
||||||
.global FN_PREFIX(cnv2_mainloop_ryzen_asm)
|
.global FN_PREFIX(cnv2_mainloop_ryzen_asm)
|
||||||
.global FN_PREFIX(cnv2_mainloop_bulldozer_asm)
|
.global FN_PREFIX(cnv2_mainloop_bulldozer_asm)
|
||||||
.global FN_PREFIX(cnv2_double_mainloop_sandybridge_asm)
|
.global FN_PREFIX(cnv2_double_mainloop_sandybridge_asm)
|
||||||
|
.global FN_PREFIX(cn_fastv2_mainloop_ivybridge_asm)
|
||||||
|
.global FN_PREFIX(cn_fastv2_mainloop_ryzen_asm)
|
||||||
|
.global FN_PREFIX(cn_fastv2_mainloop_bulldozer_asm)
|
||||||
|
.global FN_PREFIX(cn_fastv2_double_mainloop_sandybridge_asm)
|
||||||
|
.global FN_PREFIX(cn_liteupx_mainloop_sandybridge_asm)
|
||||||
|
|
||||||
.global FN_PREFIX(cnv1_mainloop_soft_aes_sandybridge_asm)
|
.global FN_PREFIX(cnv1_mainloop_soft_aes_sandybridge_asm)
|
||||||
.global FN_PREFIX(cn_litev1_mainloop_soft_aes_sandybridge_asm)
|
.global FN_PREFIX(cn_litev1_mainloop_soft_aes_sandybridge_asm)
|
||||||
.global FN_PREFIX(cn_fast_mainloop_soft_aes_sandybridge_asm)
|
.global FN_PREFIX(cn_fast_mainloop_soft_aes_sandybridge_asm)
|
||||||
.global FN_PREFIX(cnv2_mainloop_soft_aes_sandybridge_asm)
|
.global FN_PREFIX(cnv2_mainloop_soft_aes_sandybridge_asm)
|
||||||
|
.global FN_PREFIX(cn_fastv2_mainloop_soft_aes_sandybridge_asm)
|
||||||
|
.global FN_PREFIX(cn_liteupx_mainloop_soft_aes_sandybridge_asm)
|
||||||
|
|
||||||
#ifdef __APPLE__
|
#ifdef __APPLE__
|
||||||
ALIGN 16
|
ALIGN 16
|
||||||
|
@ -105,6 +112,67 @@ FN_PREFIX(cnv2_double_mainloop_sandybridge_asm):
|
||||||
add rsp, 48
|
add rsp, 48
|
||||||
ret 0
|
ret 0
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
ALIGN 16
|
||||||
|
#else
|
||||||
|
ALIGN 64
|
||||||
|
#endif
|
||||||
|
FN_PREFIX(cn_fastv2_mainloop_ivybridge_asm):
|
||||||
|
sub rsp, 48
|
||||||
|
mov rcx, rdi
|
||||||
|
#include "cn_fastv2_main_loop_ivybridge.inc"
|
||||||
|
add rsp, 48
|
||||||
|
ret 0
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
ALIGN 16
|
||||||
|
#else
|
||||||
|
ALIGN 64
|
||||||
|
#endif
|
||||||
|
FN_PREFIX(cn_fastv2_mainloop_ryzen_asm):
|
||||||
|
sub rsp, 48
|
||||||
|
mov rcx, rdi
|
||||||
|
#include "cn_fastv2_main_loop_ryzen.inc"
|
||||||
|
add rsp, 48
|
||||||
|
ret 0
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
ALIGN 16
|
||||||
|
#else
|
||||||
|
ALIGN 64
|
||||||
|
#endif
|
||||||
|
FN_PREFIX(cn_fastv2_mainloop_bulldozer_asm):
|
||||||
|
sub rsp, 48
|
||||||
|
mov rcx, rdi
|
||||||
|
#include "cn_fastv2_main_loop_bulldozer.inc"
|
||||||
|
add rsp, 48
|
||||||
|
ret 0
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
ALIGN 16
|
||||||
|
#else
|
||||||
|
ALIGN 64
|
||||||
|
#endif
|
||||||
|
FN_PREFIX(cn_fastv2_double_mainloop_sandybridge_asm):
|
||||||
|
sub rsp, 48
|
||||||
|
mov rcx, rdi
|
||||||
|
mov rdx, rsi
|
||||||
|
#include "cn_fastv2_double_main_loop_sandybridge.inc"
|
||||||
|
add rsp, 48
|
||||||
|
ret 0
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
ALIGN 16
|
||||||
|
#else
|
||||||
|
ALIGN 64
|
||||||
|
#endif
|
||||||
|
FN_PREFIX(cn_liteupx_mainloop_sandybridge_asm):
|
||||||
|
sub rsp, 48
|
||||||
|
mov rcx, rdi
|
||||||
|
#include "cn_liteupx_mainloop_sandybridge.inc"
|
||||||
|
add rsp, 48
|
||||||
|
ret 0
|
||||||
|
|
||||||
#ifdef __APPLE__
|
#ifdef __APPLE__
|
||||||
ALIGN 16
|
ALIGN 16
|
||||||
#else
|
#else
|
||||||
|
@ -152,3 +220,27 @@ FN_PREFIX(cnv2_mainloop_soft_aes_sandybridge_asm):
|
||||||
#include "cnv2_mainloop_soft_aes_sandybridge.inc"
|
#include "cnv2_mainloop_soft_aes_sandybridge.inc"
|
||||||
add rsp, 48
|
add rsp, 48
|
||||||
ret 0
|
ret 0
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
ALIGN 16
|
||||||
|
#else
|
||||||
|
ALIGN 64
|
||||||
|
#endif
|
||||||
|
FN_PREFIX(cn_fastv2_mainloop_soft_aes_sandybridge_asm):
|
||||||
|
sub rsp, 48
|
||||||
|
mov rcx, rdi
|
||||||
|
#include "cn_fastv2_mainloop_soft_aes_sandybridge.inc"
|
||||||
|
add rsp, 48
|
||||||
|
ret 0
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
ALIGN 16
|
||||||
|
#else
|
||||||
|
ALIGN 64
|
||||||
|
#endif
|
||||||
|
FN_PREFIX(cn_liteupx_mainloop_soft_aes_sandybridge_asm):
|
||||||
|
sub rsp, 48
|
||||||
|
mov rcx, rdi
|
||||||
|
#include "cn_liteupx_mainloop_soft_aes_sandybridge.inc"
|
||||||
|
add rsp, 48
|
||||||
|
ret 0
|
||||||
|
|
410
src/crypto/asm/win/cn_fastv2_double_main_loop_sandybridge.inc
Normal file
410
src/crypto/asm/win/cn_fastv2_double_main_loop_sandybridge.inc
Normal file
|
@ -0,0 +1,410 @@
|
||||||
|
mov rax, rsp
|
||||||
|
push rbx
|
||||||
|
push rbp
|
||||||
|
push rsi
|
||||||
|
push rdi
|
||||||
|
push r12
|
||||||
|
push r13
|
||||||
|
push r14
|
||||||
|
push r15
|
||||||
|
sub rsp, 184
|
||||||
|
|
||||||
|
stmxcsr DWORD PTR [rsp+272]
|
||||||
|
mov DWORD PTR [rsp+276], 24448
|
||||||
|
ldmxcsr DWORD PTR [rsp+276]
|
||||||
|
|
||||||
|
mov r13, QWORD PTR [rcx+224]
|
||||||
|
mov r9, rdx
|
||||||
|
mov r10, QWORD PTR [rcx+32]
|
||||||
|
mov r8, rcx
|
||||||
|
xor r10, QWORD PTR [rcx]
|
||||||
|
mov r14d, 262144
|
||||||
|
mov r11, QWORD PTR [rcx+40]
|
||||||
|
xor r11, QWORD PTR [rcx+8]
|
||||||
|
mov rsi, QWORD PTR [rdx+224]
|
||||||
|
mov rdx, QWORD PTR [rcx+56]
|
||||||
|
xor rdx, QWORD PTR [rcx+24]
|
||||||
|
mov rdi, QWORD PTR [r9+32]
|
||||||
|
xor rdi, QWORD PTR [r9]
|
||||||
|
mov rbp, QWORD PTR [r9+40]
|
||||||
|
xor rbp, QWORD PTR [r9+8]
|
||||||
|
movq xmm0, rdx
|
||||||
|
movaps XMMWORD PTR [rax-88], xmm6
|
||||||
|
movaps XMMWORD PTR [rax-104], xmm7
|
||||||
|
movaps XMMWORD PTR [rax-120], xmm8
|
||||||
|
movaps XMMWORD PTR [rsp+112], xmm9
|
||||||
|
movaps XMMWORD PTR [rsp+96], xmm10
|
||||||
|
movaps XMMWORD PTR [rsp+80], xmm11
|
||||||
|
movaps XMMWORD PTR [rsp+64], xmm12
|
||||||
|
movaps XMMWORD PTR [rsp+48], xmm13
|
||||||
|
movaps XMMWORD PTR [rsp+32], xmm14
|
||||||
|
movaps XMMWORD PTR [rsp+16], xmm15
|
||||||
|
mov rdx, r10
|
||||||
|
movq xmm4, QWORD PTR [r8+96]
|
||||||
|
and edx, 2097136
|
||||||
|
mov rax, QWORD PTR [rcx+48]
|
||||||
|
xorps xmm13, xmm13
|
||||||
|
xor rax, QWORD PTR [rcx+16]
|
||||||
|
mov rcx, QWORD PTR [rcx+88]
|
||||||
|
xor rcx, QWORD PTR [r8+72]
|
||||||
|
movq xmm5, QWORD PTR [r8+104]
|
||||||
|
movq xmm7, rax
|
||||||
|
|
||||||
|
mov eax, 1
|
||||||
|
shl rax, 52
|
||||||
|
movq xmm14, rax
|
||||||
|
punpcklqdq xmm14, xmm14
|
||||||
|
|
||||||
|
mov eax, 1023
|
||||||
|
shl rax, 52
|
||||||
|
movq xmm12, rax
|
||||||
|
punpcklqdq xmm12, xmm12
|
||||||
|
|
||||||
|
mov rax, QWORD PTR [r8+80]
|
||||||
|
xor rax, QWORD PTR [r8+64]
|
||||||
|
punpcklqdq xmm7, xmm0
|
||||||
|
movq xmm0, rcx
|
||||||
|
mov rcx, QWORD PTR [r9+56]
|
||||||
|
xor rcx, QWORD PTR [r9+24]
|
||||||
|
movq xmm3, rax
|
||||||
|
mov rax, QWORD PTR [r9+48]
|
||||||
|
xor rax, QWORD PTR [r9+16]
|
||||||
|
punpcklqdq xmm3, xmm0
|
||||||
|
movq xmm0, rcx
|
||||||
|
mov QWORD PTR [rsp], r13
|
||||||
|
mov rcx, QWORD PTR [r9+88]
|
||||||
|
xor rcx, QWORD PTR [r9+72]
|
||||||
|
movq xmm6, rax
|
||||||
|
mov rax, QWORD PTR [r9+80]
|
||||||
|
xor rax, QWORD PTR [r9+64]
|
||||||
|
punpcklqdq xmm6, xmm0
|
||||||
|
movq xmm0, rcx
|
||||||
|
mov QWORD PTR [rsp+256], r10
|
||||||
|
mov rcx, rdi
|
||||||
|
mov QWORD PTR [rsp+264], r11
|
||||||
|
movq xmm8, rax
|
||||||
|
and ecx, 2097136
|
||||||
|
punpcklqdq xmm8, xmm0
|
||||||
|
movq xmm0, QWORD PTR [r9+96]
|
||||||
|
punpcklqdq xmm4, xmm0
|
||||||
|
movq xmm0, QWORD PTR [r9+104]
|
||||||
|
lea r8, QWORD PTR [rcx+rsi]
|
||||||
|
movdqu xmm11, XMMWORD PTR [r8]
|
||||||
|
punpcklqdq xmm5, xmm0
|
||||||
|
lea r9, QWORD PTR [rdx+r13]
|
||||||
|
movdqu xmm15, XMMWORD PTR [r9]
|
||||||
|
|
||||||
|
ALIGN 64
|
||||||
|
main_loop_double_fast2_sandybridge:
|
||||||
|
movdqu xmm9, xmm15
|
||||||
|
mov eax, edx
|
||||||
|
mov ebx, edx
|
||||||
|
xor eax, 16
|
||||||
|
xor ebx, 32
|
||||||
|
xor edx, 48
|
||||||
|
|
||||||
|
movq xmm0, r11
|
||||||
|
movq xmm2, r10
|
||||||
|
punpcklqdq xmm2, xmm0
|
||||||
|
aesenc xmm9, xmm2
|
||||||
|
|
||||||
|
movdqu xmm0, XMMWORD PTR [rax+r13]
|
||||||
|
movdqu xmm1, XMMWORD PTR [rbx+r13]
|
||||||
|
paddq xmm0, xmm7
|
||||||
|
paddq xmm1, xmm2
|
||||||
|
movdqu XMMWORD PTR [rbx+r13], xmm0
|
||||||
|
movdqu xmm0, XMMWORD PTR [rdx+r13]
|
||||||
|
movdqu XMMWORD PTR [rdx+r13], xmm1
|
||||||
|
paddq xmm0, xmm3
|
||||||
|
movdqu XMMWORD PTR [rax+r13], xmm0
|
||||||
|
|
||||||
|
movq r11, xmm9
|
||||||
|
mov edx, r11d
|
||||||
|
and edx, 2097136
|
||||||
|
movdqa xmm0, xmm9
|
||||||
|
pxor xmm0, xmm7
|
||||||
|
movdqu XMMWORD PTR [r9], xmm0
|
||||||
|
|
||||||
|
lea rbx, QWORD PTR [rdx+r13]
|
||||||
|
mov r10, QWORD PTR [rdx+r13]
|
||||||
|
|
||||||
|
movdqu xmm10, xmm11
|
||||||
|
movq xmm0, rbp
|
||||||
|
movq xmm11, rdi
|
||||||
|
punpcklqdq xmm11, xmm0
|
||||||
|
aesenc xmm10, xmm11
|
||||||
|
|
||||||
|
mov eax, ecx
|
||||||
|
mov r12d, ecx
|
||||||
|
xor eax, 16
|
||||||
|
xor r12d, 32
|
||||||
|
xor ecx, 48
|
||||||
|
|
||||||
|
movdqu xmm0, XMMWORD PTR [rax+rsi]
|
||||||
|
paddq xmm0, xmm6
|
||||||
|
movdqu xmm1, XMMWORD PTR [r12+rsi]
|
||||||
|
movdqu XMMWORD PTR [r12+rsi], xmm0
|
||||||
|
paddq xmm1, xmm11
|
||||||
|
movdqu xmm0, XMMWORD PTR [rcx+rsi]
|
||||||
|
movdqu XMMWORD PTR [rcx+rsi], xmm1
|
||||||
|
paddq xmm0, xmm8
|
||||||
|
movdqu XMMWORD PTR [rax+rsi], xmm0
|
||||||
|
|
||||||
|
movq rcx, xmm10
|
||||||
|
and ecx, 2097136
|
||||||
|
|
||||||
|
movdqa xmm0, xmm10
|
||||||
|
pxor xmm0, xmm6
|
||||||
|
movdqu XMMWORD PTR [r8], xmm0
|
||||||
|
mov r12, QWORD PTR [rcx+rsi]
|
||||||
|
|
||||||
|
mov r9, QWORD PTR [rbx+8]
|
||||||
|
|
||||||
|
xor edx, 16
|
||||||
|
mov r8d, edx
|
||||||
|
mov r15d, edx
|
||||||
|
|
||||||
|
movq rdx, xmm5
|
||||||
|
shl rdx, 32
|
||||||
|
movq rax, xmm4
|
||||||
|
xor rdx, rax
|
||||||
|
xor r10, rdx
|
||||||
|
mov rax, r10
|
||||||
|
mul r11
|
||||||
|
mov r11d, r8d
|
||||||
|
xor r11d, 48
|
||||||
|
movq xmm0, rdx
|
||||||
|
xor rdx, [r11+r13]
|
||||||
|
movq xmm1, rax
|
||||||
|
xor rax, [r11+r13+8]
|
||||||
|
punpcklqdq xmm0, xmm1
|
||||||
|
|
||||||
|
pxor xmm0, XMMWORD PTR [r8+r13]
|
||||||
|
xor r8d, 32
|
||||||
|
movdqu xmm1, XMMWORD PTR [r11+r13]
|
||||||
|
paddq xmm0, xmm7
|
||||||
|
paddq xmm1, xmm2
|
||||||
|
movdqu XMMWORD PTR [r11+r13], xmm0
|
||||||
|
movdqu xmm0, XMMWORD PTR [r8+r13]
|
||||||
|
movdqu XMMWORD PTR [r8+r13], xmm1
|
||||||
|
paddq xmm0, xmm3
|
||||||
|
movdqu XMMWORD PTR [r15+r13], xmm0
|
||||||
|
|
||||||
|
mov r11, QWORD PTR [rsp+256]
|
||||||
|
add r11, rdx
|
||||||
|
mov rdx, QWORD PTR [rsp+264]
|
||||||
|
add rdx, rax
|
||||||
|
mov QWORD PTR [rbx], r11
|
||||||
|
xor r11, r10
|
||||||
|
mov QWORD PTR [rbx+8], rdx
|
||||||
|
xor rdx, r9
|
||||||
|
mov QWORD PTR [rsp+256], r11
|
||||||
|
and r11d, 2097136
|
||||||
|
mov QWORD PTR [rsp+264], rdx
|
||||||
|
mov QWORD PTR [rsp+8], r11
|
||||||
|
lea r15, QWORD PTR [r11+r13]
|
||||||
|
movdqu xmm15, XMMWORD PTR [r11+r13]
|
||||||
|
lea r13, QWORD PTR [rsi+rcx]
|
||||||
|
movdqa xmm0, xmm5
|
||||||
|
psrldq xmm0, 8
|
||||||
|
movaps xmm2, xmm13
|
||||||
|
movq r10, xmm0
|
||||||
|
psllq xmm5, 1
|
||||||
|
shl r10, 32
|
||||||
|
movdqa xmm0, xmm9
|
||||||
|
psrldq xmm0, 8
|
||||||
|
movdqa xmm1, xmm10
|
||||||
|
movq r11, xmm0
|
||||||
|
psrldq xmm1, 8
|
||||||
|
movq r8, xmm1
|
||||||
|
psrldq xmm4, 8
|
||||||
|
movaps xmm0, xmm13
|
||||||
|
movq rax, xmm4
|
||||||
|
xor r10, rax
|
||||||
|
movaps xmm1, xmm13
|
||||||
|
xor r10, r12
|
||||||
|
lea rax, QWORD PTR [r11+1]
|
||||||
|
shr rax, 1
|
||||||
|
movdqa xmm3, xmm9
|
||||||
|
punpcklqdq xmm3, xmm10
|
||||||
|
paddq xmm5, xmm3
|
||||||
|
movq rdx, xmm5
|
||||||
|
psrldq xmm5, 8
|
||||||
|
cvtsi2sd xmm2, rax
|
||||||
|
or edx, -2147483647
|
||||||
|
lea rax, QWORD PTR [r8+1]
|
||||||
|
shr rax, 1
|
||||||
|
movq r9, xmm5
|
||||||
|
cvtsi2sd xmm0, rax
|
||||||
|
or r9d, -2147483647
|
||||||
|
cvtsi2sd xmm1, rdx
|
||||||
|
unpcklpd xmm2, xmm0
|
||||||
|
movaps xmm0, xmm13
|
||||||
|
cvtsi2sd xmm0, r9
|
||||||
|
unpcklpd xmm1, xmm0
|
||||||
|
divpd xmm2, xmm1
|
||||||
|
paddq xmm2, xmm14
|
||||||
|
cvttsd2si rax, xmm2
|
||||||
|
psrldq xmm2, 8
|
||||||
|
mov rbx, rax
|
||||||
|
imul rax, rdx
|
||||||
|
sub r11, rax
|
||||||
|
js div_fix_1_fast2_sandybridge
|
||||||
|
div_fix_1_ret_fast2_sandybridge:
|
||||||
|
|
||||||
|
cvttsd2si rdx, xmm2
|
||||||
|
mov rax, rdx
|
||||||
|
imul rax, r9
|
||||||
|
movd xmm2, r11d
|
||||||
|
movd xmm4, ebx
|
||||||
|
sub r8, rax
|
||||||
|
js div_fix_2_fast2_sandybridge
|
||||||
|
div_fix_2_ret_fast2_sandybridge:
|
||||||
|
|
||||||
|
movd xmm1, r8d
|
||||||
|
movd xmm0, edx
|
||||||
|
punpckldq xmm2, xmm1
|
||||||
|
punpckldq xmm4, xmm0
|
||||||
|
punpckldq xmm4, xmm2
|
||||||
|
paddq xmm3, xmm4
|
||||||
|
movdqa xmm0, xmm3
|
||||||
|
psrlq xmm0, 12
|
||||||
|
paddq xmm0, xmm12
|
||||||
|
sqrtpd xmm1, xmm0
|
||||||
|
movq r9, xmm1
|
||||||
|
movdqa xmm5, xmm1
|
||||||
|
psrlq xmm5, 19
|
||||||
|
test r9, 524287
|
||||||
|
je sqrt_fix_1_fast2_sandybridge
|
||||||
|
sqrt_fix_1_ret_fast2_sandybridge:
|
||||||
|
|
||||||
|
movq r9, xmm10
|
||||||
|
psrldq xmm1, 8
|
||||||
|
movq r8, xmm1
|
||||||
|
test r8, 524287
|
||||||
|
je sqrt_fix_2_fast2_sandybridge
|
||||||
|
sqrt_fix_2_ret_fast2_sandybridge:
|
||||||
|
|
||||||
|
mov r12d, ecx
|
||||||
|
mov r8d, ecx
|
||||||
|
xor r12d, 16
|
||||||
|
xor r8d, 32
|
||||||
|
xor ecx, 48
|
||||||
|
mov rax, r10
|
||||||
|
mul r9
|
||||||
|
movq xmm0, rax
|
||||||
|
movq xmm3, rdx
|
||||||
|
punpcklqdq xmm3, xmm0
|
||||||
|
|
||||||
|
movdqu xmm0, XMMWORD PTR [r12+rsi]
|
||||||
|
pxor xmm0, xmm3
|
||||||
|
movdqu xmm1, XMMWORD PTR [r8+rsi]
|
||||||
|
xor rdx, [r8+rsi]
|
||||||
|
xor rax, [r8+rsi+8]
|
||||||
|
movdqu xmm3, XMMWORD PTR [rcx+rsi]
|
||||||
|
paddq xmm0, xmm6
|
||||||
|
paddq xmm1, xmm11
|
||||||
|
paddq xmm3, xmm8
|
||||||
|
movdqu XMMWORD PTR [r8+rsi], xmm0
|
||||||
|
movdqu XMMWORD PTR [rcx+rsi], xmm1
|
||||||
|
movdqu XMMWORD PTR [r12+rsi], xmm3
|
||||||
|
|
||||||
|
add rdi, rdx
|
||||||
|
mov QWORD PTR [r13], rdi
|
||||||
|
xor rdi, r10
|
||||||
|
mov ecx, edi
|
||||||
|
and ecx, 2097136
|
||||||
|
lea r8, QWORD PTR [rcx+rsi]
|
||||||
|
|
||||||
|
mov rdx, QWORD PTR [r13+8]
|
||||||
|
add rbp, rax
|
||||||
|
mov QWORD PTR [r13+8], rbp
|
||||||
|
movdqu xmm11, XMMWORD PTR [rcx+rsi]
|
||||||
|
xor rbp, rdx
|
||||||
|
mov r13, QWORD PTR [rsp]
|
||||||
|
movdqa xmm3, xmm7
|
||||||
|
mov rdx, QWORD PTR [rsp+8]
|
||||||
|
movdqa xmm8, xmm6
|
||||||
|
mov r10, QWORD PTR [rsp+256]
|
||||||
|
movdqa xmm7, xmm9
|
||||||
|
mov r11, QWORD PTR [rsp+264]
|
||||||
|
movdqa xmm6, xmm10
|
||||||
|
mov r9, r15
|
||||||
|
dec r14d
|
||||||
|
jne main_loop_double_fast2_sandybridge
|
||||||
|
|
||||||
|
ldmxcsr DWORD PTR [rsp+272]
|
||||||
|
movaps xmm13, XMMWORD PTR [rsp+48]
|
||||||
|
lea r11, QWORD PTR [rsp+184]
|
||||||
|
movaps xmm6, XMMWORD PTR [r11-24]
|
||||||
|
movaps xmm7, XMMWORD PTR [r11-40]
|
||||||
|
movaps xmm8, XMMWORD PTR [r11-56]
|
||||||
|
movaps xmm9, XMMWORD PTR [r11-72]
|
||||||
|
movaps xmm10, XMMWORD PTR [r11-88]
|
||||||
|
movaps xmm11, XMMWORD PTR [r11-104]
|
||||||
|
movaps xmm12, XMMWORD PTR [r11-120]
|
||||||
|
movaps xmm14, XMMWORD PTR [rsp+32]
|
||||||
|
movaps xmm15, XMMWORD PTR [rsp+16]
|
||||||
|
mov rsp, r11
|
||||||
|
pop r15
|
||||||
|
pop r14
|
||||||
|
pop r13
|
||||||
|
pop r12
|
||||||
|
pop rdi
|
||||||
|
pop rsi
|
||||||
|
pop rbp
|
||||||
|
pop rbx
|
||||||
|
jmp cnv2_double_mainloop_asm_fast2_sandybridge_endp
|
||||||
|
|
||||||
|
div_fix_1_fast2_sandybridge:
|
||||||
|
dec rbx
|
||||||
|
add r11, rdx
|
||||||
|
jmp div_fix_1_ret_fast2_sandybridge
|
||||||
|
|
||||||
|
div_fix_2_fast2_sandybridge:
|
||||||
|
dec rdx
|
||||||
|
add r8, r9
|
||||||
|
jmp div_fix_2_ret_fast2_sandybridge
|
||||||
|
|
||||||
|
sqrt_fix_1_fast2_sandybridge:
|
||||||
|
movq r8, xmm3
|
||||||
|
movdqa xmm0, xmm5
|
||||||
|
psrldq xmm0, 8
|
||||||
|
dec r9
|
||||||
|
mov r11d, -1022
|
||||||
|
shl r11, 32
|
||||||
|
mov rax, r9
|
||||||
|
shr r9, 19
|
||||||
|
shr rax, 20
|
||||||
|
mov rdx, r9
|
||||||
|
sub rdx, rax
|
||||||
|
lea rdx, [rdx+r11+1]
|
||||||
|
add rax, r11
|
||||||
|
imul rdx, rax
|
||||||
|
sub rdx, r8
|
||||||
|
adc r9, 0
|
||||||
|
movq xmm5, r9
|
||||||
|
punpcklqdq xmm5, xmm0
|
||||||
|
jmp sqrt_fix_1_ret_fast2_sandybridge
|
||||||
|
|
||||||
|
sqrt_fix_2_fast2_sandybridge:
|
||||||
|
psrldq xmm3, 8
|
||||||
|
movq r11, xmm3
|
||||||
|
dec r8
|
||||||
|
mov ebx, -1022
|
||||||
|
shl rbx, 32
|
||||||
|
mov rax, r8
|
||||||
|
shr r8, 19
|
||||||
|
shr rax, 20
|
||||||
|
mov rdx, r8
|
||||||
|
sub rdx, rax
|
||||||
|
lea rdx, [rdx+rbx+1]
|
||||||
|
add rax, rbx
|
||||||
|
imul rdx, rax
|
||||||
|
sub rdx, r11
|
||||||
|
adc r8, 0
|
||||||
|
movq xmm0, r8
|
||||||
|
punpcklqdq xmm5, xmm0
|
||||||
|
jmp sqrt_fix_2_ret_fast2_sandybridge
|
||||||
|
|
||||||
|
cnv2_double_mainloop_asm_fast2_sandybridge_endp:
|
180
src/crypto/asm/win/cn_fastv2_main_loop_bulldozer.inc
Normal file
180
src/crypto/asm/win/cn_fastv2_main_loop_bulldozer.inc
Normal file
|
@ -0,0 +1,180 @@
|
||||||
|
mov QWORD PTR [rsp+16], rbx
|
||||||
|
mov QWORD PTR [rsp+24], rbp
|
||||||
|
mov QWORD PTR [rsp+32], rsi
|
||||||
|
push rdi
|
||||||
|
push r12
|
||||||
|
push r13
|
||||||
|
push r14
|
||||||
|
push r15
|
||||||
|
sub rsp, 64
|
||||||
|
|
||||||
|
stmxcsr DWORD PTR [rsp]
|
||||||
|
mov DWORD PTR [rsp+4], 24448
|
||||||
|
ldmxcsr DWORD PTR [rsp+4]
|
||||||
|
|
||||||
|
mov rax, QWORD PTR [rcx+48]
|
||||||
|
mov r9, rcx
|
||||||
|
xor rax, QWORD PTR [rcx+16]
|
||||||
|
mov ebp, 262144
|
||||||
|
mov r8, QWORD PTR [rcx+32]
|
||||||
|
xor r8, QWORD PTR [rcx]
|
||||||
|
mov r11, QWORD PTR [rcx+40]
|
||||||
|
mov r10, r8
|
||||||
|
mov rdx, QWORD PTR [rcx+56]
|
||||||
|
movd xmm3, rax
|
||||||
|
xor rdx, QWORD PTR [rcx+24]
|
||||||
|
xor r11, QWORD PTR [rcx+8]
|
||||||
|
mov rbx, QWORD PTR [rcx+224]
|
||||||
|
mov rax, QWORD PTR [r9+80]
|
||||||
|
xor rax, QWORD PTR [r9+64]
|
||||||
|
movd xmm0, rdx
|
||||||
|
mov rcx, QWORD PTR [rcx+88]
|
||||||
|
xor rcx, QWORD PTR [r9+72]
|
||||||
|
mov rdi, QWORD PTR [r9+104]
|
||||||
|
and r10d, 2097136
|
||||||
|
movaps XMMWORD PTR [rsp+48], xmm6
|
||||||
|
movd xmm4, rax
|
||||||
|
movaps XMMWORD PTR [rsp+32], xmm7
|
||||||
|
movaps XMMWORD PTR [rsp+16], xmm8
|
||||||
|
xorps xmm8, xmm8
|
||||||
|
mov ax, 1023
|
||||||
|
shl rax, 52
|
||||||
|
movd xmm7, rax
|
||||||
|
mov r15, QWORD PTR [r9+96]
|
||||||
|
punpcklqdq xmm3, xmm0
|
||||||
|
movd xmm0, rcx
|
||||||
|
punpcklqdq xmm4, xmm0
|
||||||
|
|
||||||
|
ALIGN 16
|
||||||
|
cnv2_main_loop_fast2_bulldozer:
|
||||||
|
movdqa xmm5, XMMWORD PTR [r10+rbx]
|
||||||
|
movd xmm6, r8
|
||||||
|
pinsrq xmm6, r11, 1
|
||||||
|
lea rdx, QWORD PTR [r10+rbx]
|
||||||
|
lea r9, QWORD PTR [rdi+rdi]
|
||||||
|
shl rdi, 32
|
||||||
|
|
||||||
|
mov ecx, r10d
|
||||||
|
mov eax, r10d
|
||||||
|
xor ecx, 16
|
||||||
|
xor eax, 32
|
||||||
|
xor r10d, 48
|
||||||
|
aesenc xmm5, xmm6
|
||||||
|
movdqa xmm2, XMMWORD PTR [rcx+rbx]
|
||||||
|
movdqa xmm1, XMMWORD PTR [rax+rbx]
|
||||||
|
movdqa xmm0, XMMWORD PTR [r10+rbx]
|
||||||
|
paddq xmm2, xmm3
|
||||||
|
paddq xmm1, xmm6
|
||||||
|
paddq xmm0, xmm4
|
||||||
|
movdqa XMMWORD PTR [rcx+rbx], xmm0
|
||||||
|
movdqa XMMWORD PTR [rax+rbx], xmm2
|
||||||
|
movdqa XMMWORD PTR [r10+rbx], xmm1
|
||||||
|
|
||||||
|
movaps xmm1, xmm8
|
||||||
|
mov rsi, r15
|
||||||
|
xor rsi, rdi
|
||||||
|
|
||||||
|
mov edi, 1023
|
||||||
|
shl rdi, 52
|
||||||
|
|
||||||
|
movd r14, xmm5
|
||||||
|
pextrq rax, xmm5, 1
|
||||||
|
|
||||||
|
movdqa xmm0, xmm5
|
||||||
|
pxor xmm0, xmm3
|
||||||
|
mov r10, r14
|
||||||
|
and r10d, 2097136
|
||||||
|
movdqa XMMWORD PTR [rdx], xmm0
|
||||||
|
xor rsi, QWORD PTR [r10+rbx]
|
||||||
|
lea r12, QWORD PTR [r10+rbx]
|
||||||
|
mov r13, QWORD PTR [r10+rbx+8]
|
||||||
|
|
||||||
|
add r9d, r14d
|
||||||
|
or r9d, -2147483647
|
||||||
|
xor edx, edx
|
||||||
|
div r9
|
||||||
|
mov eax, eax
|
||||||
|
shl rdx, 32
|
||||||
|
lea r15, [rax+rdx]
|
||||||
|
lea rax, [r14+r15]
|
||||||
|
shr rax, 12
|
||||||
|
add rax, rdi
|
||||||
|
movd xmm0, rax
|
||||||
|
sqrtsd xmm1, xmm0
|
||||||
|
movd rdi, xmm1
|
||||||
|
test rdi, 524287
|
||||||
|
je sqrt_fixup_fast2_bulldozer
|
||||||
|
shr rdi, 19
|
||||||
|
|
||||||
|
sqrt_fixup_fast2_bulldozer_ret:
|
||||||
|
mov rax, rsi
|
||||||
|
mul r14
|
||||||
|
movd xmm1, rax
|
||||||
|
movd xmm0, rdx
|
||||||
|
punpcklqdq xmm0, xmm1
|
||||||
|
|
||||||
|
mov r9d, r10d
|
||||||
|
mov ecx, r10d
|
||||||
|
xor r9d, 16
|
||||||
|
xor ecx, 32
|
||||||
|
xor r10d, 48
|
||||||
|
movdqa xmm1, XMMWORD PTR [rcx+rbx]
|
||||||
|
xor rdx, [rcx+rbx]
|
||||||
|
xor rax, [rcx+rbx+8]
|
||||||
|
movdqa xmm2, XMMWORD PTR [r9+rbx]
|
||||||
|
pxor xmm2, xmm0
|
||||||
|
paddq xmm4, XMMWORD PTR [r10+rbx]
|
||||||
|
paddq xmm2, xmm3
|
||||||
|
paddq xmm1, xmm6
|
||||||
|
movdqa XMMWORD PTR [r9+rbx], xmm4
|
||||||
|
movdqa XMMWORD PTR [rcx+rbx], xmm2
|
||||||
|
movdqa XMMWORD PTR [r10+rbx], xmm1
|
||||||
|
|
||||||
|
movdqa xmm4, xmm3
|
||||||
|
add r8, rdx
|
||||||
|
add r11, rax
|
||||||
|
mov QWORD PTR [r12], r8
|
||||||
|
xor r8, rsi
|
||||||
|
mov QWORD PTR [r12+8], r11
|
||||||
|
mov r10, r8
|
||||||
|
xor r11, r13
|
||||||
|
and r10d, 2097136
|
||||||
|
movdqa xmm3, xmm5
|
||||||
|
dec ebp
|
||||||
|
jne cnv2_main_loop_fast2_bulldozer
|
||||||
|
|
||||||
|
ldmxcsr DWORD PTR [rsp]
|
||||||
|
movaps xmm6, XMMWORD PTR [rsp+48]
|
||||||
|
lea r11, QWORD PTR [rsp+64]
|
||||||
|
mov rbx, QWORD PTR [r11+56]
|
||||||
|
mov rbp, QWORD PTR [r11+64]
|
||||||
|
mov rsi, QWORD PTR [r11+72]
|
||||||
|
movaps xmm8, XMMWORD PTR [r11-48]
|
||||||
|
movaps xmm7, XMMWORD PTR [rsp+32]
|
||||||
|
mov rsp, r11
|
||||||
|
pop r15
|
||||||
|
pop r14
|
||||||
|
pop r13
|
||||||
|
pop r12
|
||||||
|
pop rdi
|
||||||
|
jmp cnv2_main_loop_fast2_bulldozer_endp
|
||||||
|
|
||||||
|
sqrt_fixup_fast2_bulldozer:
|
||||||
|
movd r9, xmm5
|
||||||
|
add r9, r15
|
||||||
|
dec rdi
|
||||||
|
mov edx, -1022
|
||||||
|
shl rdx, 32
|
||||||
|
mov rax, rdi
|
||||||
|
shr rdi, 19
|
||||||
|
shr rax, 20
|
||||||
|
mov rcx, rdi
|
||||||
|
sub rcx, rax
|
||||||
|
lea rcx, [rcx+rdx+1]
|
||||||
|
add rax, rdx
|
||||||
|
imul rcx, rax
|
||||||
|
sub rcx, r9
|
||||||
|
adc rdi, 0
|
||||||
|
jmp sqrt_fixup_fast2_bulldozer_ret
|
||||||
|
|
||||||
|
cnv2_main_loop_fast2_bulldozer_endp:
|
182
src/crypto/asm/win/cn_fastv2_main_loop_ivybridge.inc
Normal file
182
src/crypto/asm/win/cn_fastv2_main_loop_ivybridge.inc
Normal file
|
@ -0,0 +1,182 @@
|
||||||
|
mov QWORD PTR [rsp+24], rbx
|
||||||
|
push rbp
|
||||||
|
push rsi
|
||||||
|
push rdi
|
||||||
|
push r12
|
||||||
|
push r13
|
||||||
|
push r14
|
||||||
|
push r15
|
||||||
|
sub rsp, 80
|
||||||
|
|
||||||
|
stmxcsr DWORD PTR [rsp]
|
||||||
|
mov DWORD PTR [rsp+4], 24448
|
||||||
|
ldmxcsr DWORD PTR [rsp+4]
|
||||||
|
|
||||||
|
mov rax, QWORD PTR [rcx+48]
|
||||||
|
mov r9, rcx
|
||||||
|
xor rax, QWORD PTR [rcx+16]
|
||||||
|
mov esi, 262144
|
||||||
|
mov r8, QWORD PTR [rcx+32]
|
||||||
|
mov r13d, -2147483647
|
||||||
|
xor r8, QWORD PTR [rcx]
|
||||||
|
mov r11, QWORD PTR [rcx+40]
|
||||||
|
mov r10, r8
|
||||||
|
mov rdx, QWORD PTR [rcx+56]
|
||||||
|
movq xmm4, rax
|
||||||
|
xor rdx, QWORD PTR [rcx+24]
|
||||||
|
xor r11, QWORD PTR [rcx+8]
|
||||||
|
mov rbx, QWORD PTR [rcx+224]
|
||||||
|
mov rax, QWORD PTR [r9+80]
|
||||||
|
xor rax, QWORD PTR [r9+64]
|
||||||
|
movq xmm0, rdx
|
||||||
|
mov rcx, QWORD PTR [rcx+88]
|
||||||
|
xor rcx, QWORD PTR [r9+72]
|
||||||
|
movq xmm3, QWORD PTR [r9+104]
|
||||||
|
movaps XMMWORD PTR [rsp+64], xmm6
|
||||||
|
movaps XMMWORD PTR [rsp+48], xmm7
|
||||||
|
movaps XMMWORD PTR [rsp+32], xmm8
|
||||||
|
and r10d, 2097136
|
||||||
|
movq xmm5, rax
|
||||||
|
|
||||||
|
mov ax, 1023
|
||||||
|
shl rax, 52
|
||||||
|
movq xmm8, rax
|
||||||
|
mov r15, QWORD PTR [r9+96]
|
||||||
|
punpcklqdq xmm4, xmm0
|
||||||
|
movq xmm0, rcx
|
||||||
|
punpcklqdq xmm5, xmm0
|
||||||
|
movdqu xmm6, XMMWORD PTR [r10+rbx]
|
||||||
|
|
||||||
|
ALIGN 64
|
||||||
|
$main_loop_fast2_ivybridge:
|
||||||
|
lea rdx, QWORD PTR [r10+rbx]
|
||||||
|
mov ecx, r10d
|
||||||
|
mov eax, r10d
|
||||||
|
mov rdi, r15
|
||||||
|
xor ecx, 16
|
||||||
|
xor eax, 32
|
||||||
|
xor r10d, 48
|
||||||
|
movq xmm0, r11
|
||||||
|
movq xmm7, r8
|
||||||
|
punpcklqdq xmm7, xmm0
|
||||||
|
aesenc xmm6, xmm7
|
||||||
|
movq rbp, xmm6
|
||||||
|
mov r9, rbp
|
||||||
|
and r9d, 2097136
|
||||||
|
movdqu xmm2, XMMWORD PTR [rcx+rbx]
|
||||||
|
movdqu xmm1, XMMWORD PTR [rax+rbx]
|
||||||
|
movdqu xmm0, XMMWORD PTR [r10+rbx]
|
||||||
|
paddq xmm1, xmm7
|
||||||
|
paddq xmm0, xmm5
|
||||||
|
paddq xmm2, xmm4
|
||||||
|
movdqu XMMWORD PTR [rcx+rbx], xmm0
|
||||||
|
movdqu XMMWORD PTR [rax+rbx], xmm2
|
||||||
|
movdqu XMMWORD PTR [r10+rbx], xmm1
|
||||||
|
mov r10, r9
|
||||||
|
xor r10d, 32
|
||||||
|
movq rcx, xmm3
|
||||||
|
mov rax, rcx
|
||||||
|
shl rax, 32
|
||||||
|
xor rdi, rax
|
||||||
|
movdqa xmm0, xmm6
|
||||||
|
pxor xmm0, xmm4
|
||||||
|
movdqu XMMWORD PTR [rdx], xmm0
|
||||||
|
xor rdi, QWORD PTR [r9+rbx]
|
||||||
|
lea r14, QWORD PTR [r9+rbx]
|
||||||
|
mov r12, QWORD PTR [r14+8]
|
||||||
|
xor edx, edx
|
||||||
|
lea r9d, DWORD PTR [ecx+ecx]
|
||||||
|
add r9d, ebp
|
||||||
|
movdqa xmm0, xmm6
|
||||||
|
psrldq xmm0, 8
|
||||||
|
or r9d, r13d
|
||||||
|
movq rax, xmm0
|
||||||
|
div r9
|
||||||
|
xorps xmm3, xmm3
|
||||||
|
mov eax, eax
|
||||||
|
shl rdx, 32
|
||||||
|
add rdx, rax
|
||||||
|
lea r9, QWORD PTR [rdx+rbp]
|
||||||
|
mov r15, rdx
|
||||||
|
mov rax, r9
|
||||||
|
shr rax, 12
|
||||||
|
movq xmm0, rax
|
||||||
|
paddq xmm0, xmm8
|
||||||
|
sqrtsd xmm3, xmm0
|
||||||
|
movq rdx, xmm3
|
||||||
|
test edx, 524287
|
||||||
|
je $sqrt_fixup_fast2_ivybridge
|
||||||
|
psrlq xmm3, 19
|
||||||
|
$sqrt_fixup_fast2_ivybridge_ret:
|
||||||
|
|
||||||
|
mov ecx, r10d
|
||||||
|
mov rax, rdi
|
||||||
|
mul rbp
|
||||||
|
movq xmm2, rdx
|
||||||
|
xor rdx, [rcx+rbx]
|
||||||
|
add r8, rdx
|
||||||
|
mov QWORD PTR [r14], r8
|
||||||
|
xor r8, rdi
|
||||||
|
mov edi, r8d
|
||||||
|
and edi, 2097136
|
||||||
|
movq xmm0, rax
|
||||||
|
xor rax, [rcx+rbx+8]
|
||||||
|
add r11, rax
|
||||||
|
mov QWORD PTR [r14+8], r11
|
||||||
|
punpcklqdq xmm2, xmm0
|
||||||
|
|
||||||
|
mov r9d, r10d
|
||||||
|
xor r9d, 48
|
||||||
|
xor r10d, 16
|
||||||
|
pxor xmm2, XMMWORD PTR [r9+rbx]
|
||||||
|
movdqu xmm0, XMMWORD PTR [r10+rbx]
|
||||||
|
paddq xmm0, xmm5
|
||||||
|
movdqu xmm1, XMMWORD PTR [rcx+rbx]
|
||||||
|
paddq xmm2, xmm4
|
||||||
|
paddq xmm1, xmm7
|
||||||
|
movdqa xmm5, xmm4
|
||||||
|
movdqu XMMWORD PTR [r9+rbx], xmm0
|
||||||
|
movdqa xmm4, xmm6
|
||||||
|
movdqu XMMWORD PTR [rcx+rbx], xmm2
|
||||||
|
movdqu XMMWORD PTR [r10+rbx], xmm1
|
||||||
|
movdqu xmm6, [rdi+rbx]
|
||||||
|
mov r10d, edi
|
||||||
|
xor r11, r12
|
||||||
|
dec rsi
|
||||||
|
jne $main_loop_fast2_ivybridge
|
||||||
|
|
||||||
|
ldmxcsr DWORD PTR [rsp]
|
||||||
|
mov rbx, QWORD PTR [rsp+160]
|
||||||
|
movaps xmm6, XMMWORD PTR [rsp+64]
|
||||||
|
movaps xmm7, XMMWORD PTR [rsp+48]
|
||||||
|
movaps xmm8, XMMWORD PTR [rsp+32]
|
||||||
|
add rsp, 80
|
||||||
|
pop r15
|
||||||
|
pop r14
|
||||||
|
pop r13
|
||||||
|
pop r12
|
||||||
|
pop rdi
|
||||||
|
pop rsi
|
||||||
|
pop rbp
|
||||||
|
jmp $cnv2_main_loop_fast2_ivybridge_endp
|
||||||
|
|
||||||
|
$sqrt_fixup_fast2_ivybridge:
|
||||||
|
dec rdx
|
||||||
|
mov r13d, -1022
|
||||||
|
shl r13, 32
|
||||||
|
mov rax, rdx
|
||||||
|
shr rdx, 19
|
||||||
|
shr rax, 20
|
||||||
|
mov rcx, rdx
|
||||||
|
sub rcx, rax
|
||||||
|
add rax, r13
|
||||||
|
not r13
|
||||||
|
sub rcx, r13
|
||||||
|
mov r13d, -2147483647
|
||||||
|
imul rcx, rax
|
||||||
|
sub rcx, r9
|
||||||
|
adc rdx, 0
|
||||||
|
movq xmm3, rdx
|
||||||
|
jmp $sqrt_fixup_fast2_ivybridge_ret
|
||||||
|
|
||||||
|
$cnv2_main_loop_fast2_ivybridge_endp:
|
179
src/crypto/asm/win/cn_fastv2_main_loop_ryzen.inc
Normal file
179
src/crypto/asm/win/cn_fastv2_main_loop_ryzen.inc
Normal file
|
@ -0,0 +1,179 @@
|
||||||
|
mov QWORD PTR [rsp+16], rbx
|
||||||
|
mov QWORD PTR [rsp+24], rbp
|
||||||
|
mov QWORD PTR [rsp+32], rsi
|
||||||
|
push rdi
|
||||||
|
push r12
|
||||||
|
push r13
|
||||||
|
push r14
|
||||||
|
push r15
|
||||||
|
sub rsp, 64
|
||||||
|
|
||||||
|
stmxcsr DWORD PTR [rsp]
|
||||||
|
mov DWORD PTR [rsp+4], 24448
|
||||||
|
ldmxcsr DWORD PTR [rsp+4]
|
||||||
|
|
||||||
|
mov rax, QWORD PTR [rcx+48]
|
||||||
|
mov r9, rcx
|
||||||
|
xor rax, QWORD PTR [rcx+16]
|
||||||
|
mov ebp, 262144
|
||||||
|
mov r8, QWORD PTR [rcx+32]
|
||||||
|
xor r8, QWORD PTR [rcx]
|
||||||
|
mov r11, QWORD PTR [rcx+40]
|
||||||
|
mov r10, r8
|
||||||
|
mov rdx, QWORD PTR [rcx+56]
|
||||||
|
movq xmm3, rax
|
||||||
|
xor rdx, QWORD PTR [rcx+24]
|
||||||
|
xor r11, QWORD PTR [rcx+8]
|
||||||
|
mov rbx, QWORD PTR [rcx+224]
|
||||||
|
mov rax, QWORD PTR [r9+80]
|
||||||
|
xor rax, QWORD PTR [r9+64]
|
||||||
|
movq xmm0, rdx
|
||||||
|
mov rcx, QWORD PTR [rcx+88]
|
||||||
|
xor rcx, QWORD PTR [r9+72]
|
||||||
|
mov rdi, QWORD PTR [r9+104]
|
||||||
|
and r10d, 2097136
|
||||||
|
movaps XMMWORD PTR [rsp+48], xmm6
|
||||||
|
movq xmm4, rax
|
||||||
|
movaps XMMWORD PTR [rsp+32], xmm7
|
||||||
|
movaps XMMWORD PTR [rsp+16], xmm8
|
||||||
|
xorps xmm8, xmm8
|
||||||
|
mov ax, 1023
|
||||||
|
shl rax, 52
|
||||||
|
movq xmm7, rax
|
||||||
|
mov r15, QWORD PTR [r9+96]
|
||||||
|
punpcklqdq xmm3, xmm0
|
||||||
|
movq xmm0, rcx
|
||||||
|
punpcklqdq xmm4, xmm0
|
||||||
|
|
||||||
|
ALIGN 64
|
||||||
|
$main_loop_fast2_ryzen:
|
||||||
|
movdqa xmm5, XMMWORD PTR [r10+rbx]
|
||||||
|
movq xmm0, r11
|
||||||
|
movq xmm6, r8
|
||||||
|
punpcklqdq xmm6, xmm0
|
||||||
|
lea rdx, QWORD PTR [r10+rbx]
|
||||||
|
lea r9, QWORD PTR [rdi+rdi]
|
||||||
|
shl rdi, 32
|
||||||
|
|
||||||
|
mov ecx, r10d
|
||||||
|
mov eax, r10d
|
||||||
|
xor ecx, 16
|
||||||
|
xor eax, 32
|
||||||
|
xor r10d, 48
|
||||||
|
aesenc xmm5, xmm6
|
||||||
|
movdqa xmm2, XMMWORD PTR [rcx+rbx]
|
||||||
|
movdqa xmm1, XMMWORD PTR [rax+rbx]
|
||||||
|
movdqa xmm0, XMMWORD PTR [r10+rbx]
|
||||||
|
paddq xmm2, xmm3
|
||||||
|
paddq xmm1, xmm6
|
||||||
|
paddq xmm0, xmm4
|
||||||
|
movdqa XMMWORD PTR [rcx+rbx], xmm0
|
||||||
|
movdqa XMMWORD PTR [rax+rbx], xmm2
|
||||||
|
movdqa XMMWORD PTR [r10+rbx], xmm1
|
||||||
|
|
||||||
|
movaps xmm1, xmm8
|
||||||
|
mov rsi, r15
|
||||||
|
xor rsi, rdi
|
||||||
|
movq r14, xmm5
|
||||||
|
movdqa xmm0, xmm5
|
||||||
|
pxor xmm0, xmm3
|
||||||
|
mov r10, r14
|
||||||
|
and r10d, 2097136
|
||||||
|
movdqa XMMWORD PTR [rdx], xmm0
|
||||||
|
xor rsi, QWORD PTR [r10+rbx]
|
||||||
|
lea r12, QWORD PTR [r10+rbx]
|
||||||
|
mov r13, QWORD PTR [r10+rbx+8]
|
||||||
|
|
||||||
|
add r9d, r14d
|
||||||
|
or r9d, -2147483647
|
||||||
|
xor edx, edx
|
||||||
|
movdqa xmm0, xmm5
|
||||||
|
psrldq xmm0, 8
|
||||||
|
movq rax, xmm0
|
||||||
|
|
||||||
|
div r9
|
||||||
|
movq xmm0, rax
|
||||||
|
movq xmm1, rdx
|
||||||
|
punpckldq xmm0, xmm1
|
||||||
|
movq r15, xmm0
|
||||||
|
paddq xmm0, xmm5
|
||||||
|
movdqa xmm2, xmm0
|
||||||
|
psrlq xmm0, 12
|
||||||
|
paddq xmm0, xmm7
|
||||||
|
sqrtsd xmm1, xmm0
|
||||||
|
movq rdi, xmm1
|
||||||
|
test rdi, 524287
|
||||||
|
je $sqrt_fixup_fast2_ryzen
|
||||||
|
shr rdi, 19
|
||||||
|
|
||||||
|
$sqrt_fixup_fast2_ryzen_ret:
|
||||||
|
mov rax, rsi
|
||||||
|
mul r14
|
||||||
|
movq xmm1, rax
|
||||||
|
movq xmm0, rdx
|
||||||
|
punpcklqdq xmm0, xmm1
|
||||||
|
|
||||||
|
mov r9d, r10d
|
||||||
|
mov ecx, r10d
|
||||||
|
xor r9d, 16
|
||||||
|
xor ecx, 32
|
||||||
|
xor r10d, 48
|
||||||
|
movdqa xmm1, XMMWORD PTR [rcx+rbx]
|
||||||
|
xor rdx, [rcx+rbx]
|
||||||
|
xor rax, [rcx+rbx+8]
|
||||||
|
movdqa xmm2, XMMWORD PTR [r9+rbx]
|
||||||
|
pxor xmm2, xmm0
|
||||||
|
paddq xmm4, XMMWORD PTR [r10+rbx]
|
||||||
|
paddq xmm2, xmm3
|
||||||
|
paddq xmm1, xmm6
|
||||||
|
movdqa XMMWORD PTR [r9+rbx], xmm4
|
||||||
|
movdqa XMMWORD PTR [rcx+rbx], xmm2
|
||||||
|
movdqa XMMWORD PTR [r10+rbx], xmm1
|
||||||
|
|
||||||
|
movdqa xmm4, xmm3
|
||||||
|
add r8, rdx
|
||||||
|
add r11, rax
|
||||||
|
mov QWORD PTR [r12], r8
|
||||||
|
xor r8, rsi
|
||||||
|
mov QWORD PTR [r12+8], r11
|
||||||
|
mov r10, r8
|
||||||
|
xor r11, r13
|
||||||
|
and r10d, 2097136
|
||||||
|
movdqa xmm3, xmm5
|
||||||
|
dec ebp
|
||||||
|
jne $main_loop_fast2_ryzen
|
||||||
|
|
||||||
|
ldmxcsr DWORD PTR [rsp]
|
||||||
|
movaps xmm6, XMMWORD PTR [rsp+48]
|
||||||
|
lea r11, QWORD PTR [rsp+64]
|
||||||
|
mov rbx, QWORD PTR [r11+56]
|
||||||
|
mov rbp, QWORD PTR [r11+64]
|
||||||
|
mov rsi, QWORD PTR [r11+72]
|
||||||
|
movaps xmm8, XMMWORD PTR [r11-48]
|
||||||
|
movaps xmm7, XMMWORD PTR [rsp+32]
|
||||||
|
mov rsp, r11
|
||||||
|
pop r15
|
||||||
|
pop r14
|
||||||
|
pop r13
|
||||||
|
pop r12
|
||||||
|
pop rdi
|
||||||
|
jmp $cnv2_main_loop_fast2_ryzen_endp
|
||||||
|
|
||||||
|
$sqrt_fixup_fast2_ryzen:
|
||||||
|
movq r9, xmm2
|
||||||
|
dec rdi
|
||||||
|
mov edx, -1022
|
||||||
|
shl rdx, 32
|
||||||
|
mov rax, rdi
|
||||||
|
shr rdi, 19
|
||||||
|
shr rax, 20
|
||||||
|
mov rcx, rdi
|
||||||
|
sub rcx, rax
|
||||||
|
lea rcx, [rcx+rdx+1]
|
||||||
|
add rax, rdx
|
||||||
|
imul rcx, rax
|
||||||
|
sub rcx, r9
|
||||||
|
adc rdi, 0
|
||||||
|
jmp $sqrt_fixup_fast2_ryzen_ret
|
||||||
|
|
||||||
|
$cnv2_main_loop_fast2_ryzen_endp:
|
267
src/crypto/asm/win/cn_fastv2_mainloop_soft_aes_sandybridge.inc
Normal file
267
src/crypto/asm/win/cn_fastv2_mainloop_soft_aes_sandybridge.inc
Normal file
|
@ -0,0 +1,267 @@
|
||||||
|
mov QWORD PTR [rsp+8], rcx
|
||||||
|
push rbx
|
||||||
|
push rbp
|
||||||
|
push rsi
|
||||||
|
push rdi
|
||||||
|
push r12
|
||||||
|
push r13
|
||||||
|
push r14
|
||||||
|
push r15
|
||||||
|
sub rsp, 152
|
||||||
|
|
||||||
|
stmxcsr DWORD PTR [rsp+4]
|
||||||
|
mov DWORD PTR [rsp], 24448
|
||||||
|
ldmxcsr DWORD PTR [rsp]
|
||||||
|
|
||||||
|
mov rax, QWORD PTR [rcx+48]
|
||||||
|
mov r10, rcx
|
||||||
|
xor rax, QWORD PTR [rcx+16]
|
||||||
|
mov r8, QWORD PTR [rcx+32]
|
||||||
|
xor r8, QWORD PTR [rcx]
|
||||||
|
mov r9, QWORD PTR [rcx+40]
|
||||||
|
xor r9, QWORD PTR [rcx+8]
|
||||||
|
movq xmm4, rax
|
||||||
|
mov rdx, QWORD PTR [rcx+56]
|
||||||
|
xor rdx, QWORD PTR [rcx+24]
|
||||||
|
mov r11, QWORD PTR [rcx+224]
|
||||||
|
mov rcx, QWORD PTR [rcx+88]
|
||||||
|
xor rcx, QWORD PTR [r10+72]
|
||||||
|
mov rax, QWORD PTR [r10+80]
|
||||||
|
movq xmm0, rdx
|
||||||
|
xor rax, QWORD PTR [r10+64]
|
||||||
|
|
||||||
|
movaps XMMWORD PTR [rsp+16], xmm6
|
||||||
|
movaps XMMWORD PTR [rsp+32], xmm7
|
||||||
|
movaps XMMWORD PTR [rsp+48], xmm8
|
||||||
|
movaps XMMWORD PTR [rsp+64], xmm9
|
||||||
|
movaps XMMWORD PTR [rsp+80], xmm10
|
||||||
|
movaps XMMWORD PTR [rsp+96], xmm11
|
||||||
|
movaps XMMWORD PTR [rsp+112], xmm12
|
||||||
|
movaps XMMWORD PTR [rsp+128], xmm13
|
||||||
|
|
||||||
|
movq xmm5, rax
|
||||||
|
|
||||||
|
mov ax, 1023
|
||||||
|
shl rax, 52
|
||||||
|
movq xmm8, rax
|
||||||
|
|
||||||
|
mov rax, r8
|
||||||
|
punpcklqdq xmm4, xmm0
|
||||||
|
and eax, 2097136
|
||||||
|
movq xmm10, QWORD PTR [r10+96]
|
||||||
|
movq xmm0, rcx
|
||||||
|
mov rcx, QWORD PTR [r10+104]
|
||||||
|
xorps xmm9, xmm9
|
||||||
|
mov QWORD PTR [rsp+248], rax
|
||||||
|
movq xmm12, r11
|
||||||
|
mov QWORD PTR [rsp+240], r9
|
||||||
|
punpcklqdq xmm5, xmm0
|
||||||
|
movq xmm13, rcx
|
||||||
|
mov r12d, 262144
|
||||||
|
|
||||||
|
ALIGN 64
|
||||||
|
cnv2_mainloop_soft_aes_fast2_sandybridge:
|
||||||
|
movd xmm11, r12d
|
||||||
|
mov r12, QWORD PTR [r10+272]
|
||||||
|
lea r13, QWORD PTR [rax+r11]
|
||||||
|
mov esi, DWORD PTR [r13]
|
||||||
|
movq xmm0, r9
|
||||||
|
mov r10d, DWORD PTR [r13+4]
|
||||||
|
movq xmm7, r8
|
||||||
|
mov ebp, DWORD PTR [r13+12]
|
||||||
|
mov r14d, DWORD PTR [r13+8]
|
||||||
|
mov rdx, QWORD PTR [rsp+248]
|
||||||
|
movzx ecx, sil
|
||||||
|
shr esi, 8
|
||||||
|
punpcklqdq xmm7, xmm0
|
||||||
|
mov r15d, DWORD PTR [r12+rcx*4]
|
||||||
|
movzx ecx, r10b
|
||||||
|
shr r10d, 8
|
||||||
|
mov edi, DWORD PTR [r12+rcx*4]
|
||||||
|
movzx ecx, r14b
|
||||||
|
shr r14d, 8
|
||||||
|
mov ebx, DWORD PTR [r12+rcx*4]
|
||||||
|
movzx ecx, bpl
|
||||||
|
shr ebp, 8
|
||||||
|
mov r9d, DWORD PTR [r12+rcx*4]
|
||||||
|
movzx ecx, r10b
|
||||||
|
shr r10d, 8
|
||||||
|
xor r15d, DWORD PTR [r12+rcx*4+1024]
|
||||||
|
movzx ecx, r14b
|
||||||
|
shr r14d, 8
|
||||||
|
mov eax, r14d
|
||||||
|
shr eax, 8
|
||||||
|
xor edi, DWORD PTR [r12+rcx*4+1024]
|
||||||
|
add eax, 256
|
||||||
|
movzx ecx, bpl
|
||||||
|
shr ebp, 8
|
||||||
|
xor ebx, DWORD PTR [r12+rcx*4+1024]
|
||||||
|
movzx ecx, sil
|
||||||
|
shr esi, 8
|
||||||
|
xor r9d, DWORD PTR [r12+rcx*4+1024]
|
||||||
|
add r12, 2048
|
||||||
|
movzx ecx, r10b
|
||||||
|
shr r10d, 8
|
||||||
|
add r10d, 256
|
||||||
|
mov r11d, DWORD PTR [r12+rax*4]
|
||||||
|
xor r11d, DWORD PTR [r12+rcx*4]
|
||||||
|
xor r11d, r9d
|
||||||
|
movzx ecx, sil
|
||||||
|
mov r10d, DWORD PTR [r12+r10*4]
|
||||||
|
shr esi, 8
|
||||||
|
add esi, 256
|
||||||
|
xor r10d, DWORD PTR [r12+rcx*4]
|
||||||
|
movzx ecx, bpl
|
||||||
|
xor r10d, ebx
|
||||||
|
shr ebp, 8
|
||||||
|
movd xmm1, r11d
|
||||||
|
add ebp, 256
|
||||||
|
movq r11, xmm12
|
||||||
|
mov r9d, DWORD PTR [r12+rcx*4]
|
||||||
|
xor r9d, DWORD PTR [r12+rsi*4]
|
||||||
|
mov eax, DWORD PTR [r12+rbp*4]
|
||||||
|
xor r9d, edi
|
||||||
|
movzx ecx, r14b
|
||||||
|
movd xmm0, r10d
|
||||||
|
movd xmm2, r9d
|
||||||
|
xor eax, DWORD PTR [r12+rcx*4]
|
||||||
|
mov rcx, rdx
|
||||||
|
xor eax, r15d
|
||||||
|
punpckldq xmm2, xmm1
|
||||||
|
xor rcx, 16
|
||||||
|
movd xmm6, eax
|
||||||
|
mov rax, rdx
|
||||||
|
punpckldq xmm6, xmm0
|
||||||
|
xor rax, 32
|
||||||
|
punpckldq xmm6, xmm2
|
||||||
|
xor rdx, 48
|
||||||
|
movdqu xmm2, XMMWORD PTR [rcx+r11]
|
||||||
|
pxor xmm6, xmm7
|
||||||
|
paddq xmm2, xmm4
|
||||||
|
movdqu xmm1, XMMWORD PTR [rax+r11]
|
||||||
|
movdqu xmm0, XMMWORD PTR [rdx+r11]
|
||||||
|
paddq xmm0, xmm5
|
||||||
|
movdqu XMMWORD PTR [rcx+r11], xmm0
|
||||||
|
movdqu XMMWORD PTR [rax+r11], xmm2
|
||||||
|
movq rcx, xmm13
|
||||||
|
paddq xmm1, xmm7
|
||||||
|
movdqu XMMWORD PTR [rdx+r11], xmm1
|
||||||
|
movq rdi, xmm6
|
||||||
|
mov r10, rdi
|
||||||
|
and r10d, 2097136
|
||||||
|
xor edx, edx
|
||||||
|
mov rax, rcx
|
||||||
|
shl rax, 32
|
||||||
|
movq rbx, xmm10
|
||||||
|
xor rbx, rax
|
||||||
|
lea r9, QWORD PTR [rcx+rcx]
|
||||||
|
add r9d, edi
|
||||||
|
movdqa xmm0, xmm6
|
||||||
|
pxor xmm0, xmm4
|
||||||
|
mov ecx, -2147483647
|
||||||
|
movdqu XMMWORD PTR [r13], xmm0
|
||||||
|
or r9, rcx
|
||||||
|
movdqa xmm0, xmm6
|
||||||
|
movaps xmm1, xmm9
|
||||||
|
psrldq xmm0, 8
|
||||||
|
movq rax, xmm0
|
||||||
|
xor rbx, QWORD PTR [r10+r11]
|
||||||
|
lea r14, QWORD PTR [r10+r11]
|
||||||
|
mov rbp, QWORD PTR [r14+8]
|
||||||
|
div r9
|
||||||
|
shl rdx, 32
|
||||||
|
mov eax, eax
|
||||||
|
add rdx, rax
|
||||||
|
lea r9, QWORD PTR [rdx+rdi]
|
||||||
|
movq xmm10, rdx
|
||||||
|
mov rax, r9
|
||||||
|
shr rax, 12
|
||||||
|
movq xmm0, rax
|
||||||
|
paddq xmm0, xmm8
|
||||||
|
sqrtsd xmm1, xmm0
|
||||||
|
movq rdx, xmm1
|
||||||
|
test rdx, 524287
|
||||||
|
je sqrt_fixup_soft_aes_fast2_sandybridge
|
||||||
|
psrlq xmm1, 19
|
||||||
|
sqrt_fixup_soft_aes_fast2_sandybridge_ret:
|
||||||
|
|
||||||
|
mov r9, r10
|
||||||
|
movdqa xmm13, xmm1
|
||||||
|
xor r9, 16
|
||||||
|
mov rcx, r10
|
||||||
|
xor rcx, 32
|
||||||
|
xor r10, 48
|
||||||
|
mov rax, rbx
|
||||||
|
mul rdi
|
||||||
|
movdqu xmm2, XMMWORD PTR [r9+r11]
|
||||||
|
movdqu xmm1, XMMWORD PTR [rcx+r11]
|
||||||
|
paddq xmm1, xmm7
|
||||||
|
movq xmm0, rax
|
||||||
|
movq xmm3, rdx
|
||||||
|
xor rax, QWORD PTR [r11+rcx+8]
|
||||||
|
xor rdx, QWORD PTR [rcx+r11]
|
||||||
|
punpcklqdq xmm3, xmm0
|
||||||
|
add r8, rdx
|
||||||
|
movdqu xmm0, XMMWORD PTR [r10+r11]
|
||||||
|
pxor xmm2, xmm3
|
||||||
|
paddq xmm0, xmm5
|
||||||
|
paddq xmm2, xmm4
|
||||||
|
movdqu XMMWORD PTR [r9+r11], xmm0
|
||||||
|
movdqa xmm5, xmm4
|
||||||
|
mov r9, QWORD PTR [rsp+240]
|
||||||
|
movdqa xmm4, xmm6
|
||||||
|
add r9, rax
|
||||||
|
movdqu XMMWORD PTR [rcx+r11], xmm2
|
||||||
|
movdqu XMMWORD PTR [r10+r11], xmm1
|
||||||
|
mov r10, QWORD PTR [rsp+224]
|
||||||
|
movd r12d, xmm11
|
||||||
|
mov QWORD PTR [r14], r8
|
||||||
|
xor r8, rbx
|
||||||
|
mov rax, r8
|
||||||
|
mov QWORD PTR [r14+8], r9
|
||||||
|
and eax, 2097136
|
||||||
|
xor r9, rbp
|
||||||
|
mov QWORD PTR [rsp+240], r9
|
||||||
|
mov QWORD PTR [rsp+248], rax
|
||||||
|
sub r12d, 1
|
||||||
|
jne cnv2_mainloop_soft_aes_fast2_sandybridge
|
||||||
|
|
||||||
|
ldmxcsr DWORD PTR [rsp+4]
|
||||||
|
movaps xmm6, XMMWORD PTR [rsp+16]
|
||||||
|
movaps xmm7, XMMWORD PTR [rsp+32]
|
||||||
|
movaps xmm8, XMMWORD PTR [rsp+48]
|
||||||
|
movaps xmm9, XMMWORD PTR [rsp+64]
|
||||||
|
movaps xmm10, XMMWORD PTR [rsp+80]
|
||||||
|
movaps xmm11, XMMWORD PTR [rsp+96]
|
||||||
|
movaps xmm12, XMMWORD PTR [rsp+112]
|
||||||
|
movaps xmm13, XMMWORD PTR [rsp+128]
|
||||||
|
|
||||||
|
add rsp, 152
|
||||||
|
pop r15
|
||||||
|
pop r14
|
||||||
|
pop r13
|
||||||
|
pop r12
|
||||||
|
pop rdi
|
||||||
|
pop rsi
|
||||||
|
pop rbp
|
||||||
|
pop rbx
|
||||||
|
jmp cnv2_mainloop_soft_aes_fast2_sandybridge_asm_endp
|
||||||
|
|
||||||
|
sqrt_fixup_soft_aes_fast2_sandybridge:
|
||||||
|
dec rdx
|
||||||
|
mov r15d, -1022
|
||||||
|
shl r15, 32
|
||||||
|
mov rax, rdx
|
||||||
|
shr rdx, 19
|
||||||
|
shr rax, 20
|
||||||
|
mov rcx, rdx
|
||||||
|
sub rcx, rax
|
||||||
|
lea rcx, [rcx+r15+1]
|
||||||
|
add rax, r15
|
||||||
|
imul rcx, rax
|
||||||
|
sub rcx, r9
|
||||||
|
adc rdx, 0
|
||||||
|
movq xmm1, rdx
|
||||||
|
jmp sqrt_fixup_soft_aes_fast2_sandybridge_ret
|
||||||
|
|
||||||
|
cnv2_mainloop_soft_aes_fast2_sandybridge_asm_endp:
|
70
src/crypto/asm/win/cn_liteupx_mainloop_sandybridge.inc
Normal file
70
src/crypto/asm/win/cn_liteupx_mainloop_sandybridge.inc
Normal file
|
@ -0,0 +1,70 @@
|
||||||
|
mov QWORD PTR [rsp+8], rbx
|
||||||
|
mov QWORD PTR [rsp+16], rbp
|
||||||
|
mov QWORD PTR [rsp+24], rsi
|
||||||
|
mov QWORD PTR [rsp+32], rdi
|
||||||
|
push r14
|
||||||
|
push r15
|
||||||
|
mov rax, QWORD PTR [rcx+48]
|
||||||
|
mov ebp, 131072
|
||||||
|
xor rax, QWORD PTR [rcx+16]
|
||||||
|
mov rdx, QWORD PTR [rcx+56]
|
||||||
|
xor rdx, QWORD PTR [rcx+24]
|
||||||
|
mov r8, QWORD PTR [rcx+32]
|
||||||
|
xor r8, QWORD PTR [rcx]
|
||||||
|
movq xmm3, rax
|
||||||
|
mov rax, QWORD PTR [rcx+256]
|
||||||
|
mov rdi, QWORD PTR [rcx+40]
|
||||||
|
movq xmm0, rdx
|
||||||
|
xor rdi, QWORD PTR [rcx+8]
|
||||||
|
mov rdx, r8
|
||||||
|
mov r15, QWORD PTR [rcx+264]
|
||||||
|
and edx, 1048560
|
||||||
|
mov r14, QWORD PTR [rax+35]
|
||||||
|
xor r14, QWORD PTR [rcx+192]
|
||||||
|
mov rsi, QWORD PTR [rcx+224]
|
||||||
|
punpcklqdq xmm3, xmm0
|
||||||
|
movdqu xmm2, XMMWORD PTR [rdx+rsi]
|
||||||
|
|
||||||
|
ALIGN 64
|
||||||
|
cn_litev1_mainloop_sandybridge:
|
||||||
|
movq xmm0, rdi
|
||||||
|
movq xmm1, r8
|
||||||
|
punpcklqdq xmm1, xmm0
|
||||||
|
aesenc xmm2, xmm1
|
||||||
|
movq r10, xmm2
|
||||||
|
mov r9d, r10d
|
||||||
|
and r9d, 1048560
|
||||||
|
add r9, rsi
|
||||||
|
movdqa xmm0, xmm2
|
||||||
|
pxor xmm0, xmm3
|
||||||
|
movdqa xmm3, xmm2
|
||||||
|
movdqu XMMWORD PTR [rdx+rsi], xmm0
|
||||||
|
psrldq xmm0, 11
|
||||||
|
movq rax, xmm0
|
||||||
|
movzx eax, al
|
||||||
|
movzx eax, BYTE PTR [rax+r15]
|
||||||
|
mov BYTE PTR [rsi+rdx+11], al
|
||||||
|
mov rbx, QWORD PTR [r9]
|
||||||
|
mov r11, QWORD PTR [r9+8]
|
||||||
|
mov rax, rbx
|
||||||
|
mul r10
|
||||||
|
add r8, rdx
|
||||||
|
mov QWORD PTR [r9], r8
|
||||||
|
add rdi, rax
|
||||||
|
mov rax, r14
|
||||||
|
xor rax, rdi
|
||||||
|
mov QWORD PTR [r9+8], rax
|
||||||
|
xor r8, rbx
|
||||||
|
mov rdx, r8
|
||||||
|
and edx, 1048560
|
||||||
|
movdqu xmm2, XMMWORD PTR [rdx+rsi]
|
||||||
|
xor rdi, r11
|
||||||
|
dec ebp
|
||||||
|
jne cn_litev1_mainloop_sandybridge
|
||||||
|
|
||||||
|
mov rbx, QWORD PTR [rsp+24]
|
||||||
|
mov rbp, QWORD PTR [rsp+32]
|
||||||
|
mov rsi, QWORD PTR [rsp+40]
|
||||||
|
mov rdi, QWORD PTR [rsp+48]
|
||||||
|
pop r15
|
||||||
|
pop r14
|
162
src/crypto/asm/win/cn_liteupx_mainloop_soft_aes_sandybridge.inc
Normal file
162
src/crypto/asm/win/cn_liteupx_mainloop_soft_aes_sandybridge.inc
Normal file
|
@ -0,0 +1,162 @@
|
||||||
|
push rbx
|
||||||
|
push rbp
|
||||||
|
push rsi
|
||||||
|
push rdi
|
||||||
|
push r12
|
||||||
|
push r13
|
||||||
|
push r14
|
||||||
|
push r15
|
||||||
|
sub rsp, 72
|
||||||
|
|
||||||
|
movaps XMMWORD PTR [rsp], xmm6
|
||||||
|
movaps XMMWORD PTR [rsp+16], xmm7
|
||||||
|
movaps XMMWORD PTR [rsp+32], xmm8
|
||||||
|
movaps XMMWORD PTR [rsp+48], xmm9
|
||||||
|
|
||||||
|
mov rax, QWORD PTR [rcx+48]
|
||||||
|
xor rax, QWORD PTR [rcx+16]
|
||||||
|
mov rdx, QWORD PTR [rcx+56]
|
||||||
|
xor rdx, QWORD PTR [rcx+24]
|
||||||
|
mov r8, QWORD PTR [rcx+32]
|
||||||
|
xor r8, QWORD PTR [rcx]
|
||||||
|
movq xmm4, rax
|
||||||
|
mov rax, QWORD PTR [rcx+256]
|
||||||
|
mov r13, QWORD PTR [rcx+40]
|
||||||
|
movq xmm0, rdx
|
||||||
|
xor r13, QWORD PTR [rcx+8]
|
||||||
|
mov rdx, r8
|
||||||
|
mov rdi, QWORD PTR [rcx+224]
|
||||||
|
and edx, 1048560
|
||||||
|
mov rax, QWORD PTR [rax+35]
|
||||||
|
xor rax, QWORD PTR [rcx+192]
|
||||||
|
movq xmm5, rax
|
||||||
|
movq xmm8, rdi
|
||||||
|
punpcklqdq xmm4, xmm0
|
||||||
|
mov QWORD PTR [rsp+64], rdx
|
||||||
|
|
||||||
|
movq xmm6, rcx
|
||||||
|
mov rax, QWORD PTR [rcx+264]
|
||||||
|
movq xmm7, rax
|
||||||
|
|
||||||
|
mov eax, 131072
|
||||||
|
|
||||||
|
ALIGN 64
|
||||||
|
cn_litev1_mainloop_soft_aes_sandybridge:
|
||||||
|
movq xmm9, rax
|
||||||
|
mov r12, QWORD PTR [rcx+272]
|
||||||
|
mov esi, DWORD PTR [rdx+rdi]
|
||||||
|
mov r10d, DWORD PTR [rdx+rdi+4]
|
||||||
|
mov ebp, DWORD PTR [rdx+rdi+12]
|
||||||
|
mov r14d, DWORD PTR [rdx+rdi+8]
|
||||||
|
mov rdx, QWORD PTR [rsp+64]
|
||||||
|
movzx ecx, sil
|
||||||
|
shr esi, 8
|
||||||
|
mov r15d, DWORD PTR [r12+rcx*4]
|
||||||
|
movzx ecx, r10b
|
||||||
|
shr r10d, 8
|
||||||
|
mov edi, DWORD PTR [r12+rcx*4]
|
||||||
|
movzx ecx, r14b
|
||||||
|
shr r14d, 8
|
||||||
|
mov ebx, DWORD PTR [r12+rcx*4]
|
||||||
|
movzx ecx, bpl
|
||||||
|
shr ebp, 8
|
||||||
|
mov r9d, DWORD PTR [r12+rcx*4]
|
||||||
|
movzx ecx, r10b
|
||||||
|
shr r10d, 8
|
||||||
|
xor r15d, DWORD PTR [r12+rcx*4+1024]
|
||||||
|
movzx ecx, r14b
|
||||||
|
shr r14d, 8
|
||||||
|
mov eax, r14d
|
||||||
|
shr eax, 8
|
||||||
|
xor edi, DWORD PTR [r12+rcx*4+1024]
|
||||||
|
add eax, 256
|
||||||
|
movzx ecx, bpl
|
||||||
|
shr ebp, 8
|
||||||
|
xor ebx, DWORD PTR [r12+rcx*4+1024]
|
||||||
|
movzx ecx, sil
|
||||||
|
shr esi, 8
|
||||||
|
xor r9d, DWORD PTR [r12+rcx*4+1024]
|
||||||
|
add r12, 2048
|
||||||
|
movzx ecx, r10b
|
||||||
|
shr r10d, 8
|
||||||
|
add r10d, 256
|
||||||
|
mov r11d, DWORD PTR [r12+rax*4]
|
||||||
|
xor r11d, DWORD PTR [r12+rcx*4]
|
||||||
|
xor r11d, r9d
|
||||||
|
movzx ecx, sil
|
||||||
|
mov r10d, DWORD PTR [r12+r10*4]
|
||||||
|
shr esi, 8
|
||||||
|
add esi, 256
|
||||||
|
xor r10d, DWORD PTR [r12+rcx*4]
|
||||||
|
movzx ecx, bpl
|
||||||
|
xor r10d, ebx
|
||||||
|
shr ebp, 8
|
||||||
|
add ebp, 256
|
||||||
|
movd xmm1, r11d
|
||||||
|
mov r9d, DWORD PTR [r12+rcx*4]
|
||||||
|
xor r9d, DWORD PTR [r12+rsi*4]
|
||||||
|
mov eax, DWORD PTR [r12+rbp*4]
|
||||||
|
xor r9d, edi
|
||||||
|
movq rdi, xmm8
|
||||||
|
movzx ecx, r14b
|
||||||
|
movd xmm0, r10d
|
||||||
|
movd xmm2, r9d
|
||||||
|
punpckldq xmm2, xmm1
|
||||||
|
movq xmm1, r8
|
||||||
|
xor eax, DWORD PTR [r12+rcx*4]
|
||||||
|
xor eax, r15d
|
||||||
|
movd xmm3, eax
|
||||||
|
movq rax, xmm7
|
||||||
|
punpckldq xmm3, xmm0
|
||||||
|
movq xmm0, r13
|
||||||
|
punpcklqdq xmm1, xmm0
|
||||||
|
punpckldq xmm3, xmm2
|
||||||
|
pxor xmm3, xmm1
|
||||||
|
movq r9, xmm3
|
||||||
|
mov r10d, r9d
|
||||||
|
and r10d, 1048560
|
||||||
|
movdqa xmm0, xmm3
|
||||||
|
pxor xmm0, xmm4
|
||||||
|
movdqu XMMWORD PTR [rdx+rdi], xmm0
|
||||||
|
psrldq xmm0, 11
|
||||||
|
movq rcx, xmm0
|
||||||
|
movzx ecx, cl
|
||||||
|
mov cl, BYTE PTR [rcx+rax]
|
||||||
|
mov BYTE PTR [rdi+rdx+11], cl
|
||||||
|
mov rbx, QWORD PTR [r10+rdi]
|
||||||
|
mov rcx, r9
|
||||||
|
lea r9, QWORD PTR [r10+rdi]
|
||||||
|
mov r11, QWORD PTR [r9+8]
|
||||||
|
mov rax, rbx
|
||||||
|
movdqa xmm4, xmm3
|
||||||
|
mul rcx
|
||||||
|
movq rcx, xmm6
|
||||||
|
add r8, rdx
|
||||||
|
add r13, rax
|
||||||
|
movq rax, xmm5
|
||||||
|
xor rax, r13
|
||||||
|
mov QWORD PTR [r9], r8
|
||||||
|
xor r8, rbx
|
||||||
|
mov QWORD PTR [r9+8], rax
|
||||||
|
movq rax, xmm9
|
||||||
|
mov rdx, r8
|
||||||
|
xor r13, r11
|
||||||
|
and edx, 1048560
|
||||||
|
mov QWORD PTR [rsp+64], rdx
|
||||||
|
sub eax, 1
|
||||||
|
jne cn_litev1_mainloop_soft_aes_sandybridge
|
||||||
|
|
||||||
|
movaps xmm6, XMMWORD PTR [rsp]
|
||||||
|
movaps xmm7, XMMWORD PTR [rsp+16]
|
||||||
|
movaps xmm8, XMMWORD PTR [rsp+32]
|
||||||
|
movaps xmm9, XMMWORD PTR [rsp+48]
|
||||||
|
|
||||||
|
add rsp, 72
|
||||||
|
pop r15
|
||||||
|
pop r14
|
||||||
|
pop r13
|
||||||
|
pop r12
|
||||||
|
pop rdi
|
||||||
|
pop rsi
|
||||||
|
pop rbp
|
||||||
|
pop rbx
|
|
@ -6,11 +6,18 @@ PUBLIC cnv2_mainloop_ivybridge_asm
|
||||||
PUBLIC cnv2_mainloop_ryzen_asm
|
PUBLIC cnv2_mainloop_ryzen_asm
|
||||||
PUBLIC cnv2_mainloop_bulldozer_asm
|
PUBLIC cnv2_mainloop_bulldozer_asm
|
||||||
PUBLIC cnv2_double_mainloop_sandybridge_asm
|
PUBLIC cnv2_double_mainloop_sandybridge_asm
|
||||||
|
PUBLIC cn_fast2_mainloop_ivybridge_asm
|
||||||
|
PUBLIC cn_fast2_mainloop_ryzen_asm
|
||||||
|
PUBLIC cn_fast2_mainloop_bulldozer_asm
|
||||||
|
PUBLIC cn_fast2_double_mainloop_sandybridge_asm
|
||||||
|
PUBLIC cn_liteupx_mainloop_sandybridge_asm
|
||||||
|
|
||||||
PUBLIC cnv1_mainloop_soft_aes_sandybridge_asm
|
PUBLIC cnv1_mainloop_soft_aes_sandybridge_asm
|
||||||
PUBLIC cn_litev1_mainloop_soft_aes_sandybridge_asm
|
PUBLIC cn_litev1_mainloop_soft_aes_sandybridge_asm
|
||||||
PUBLIC cn_fast_mainloop_soft_aes_sandybridge_asm
|
PUBLIC cn_fast_mainloop_soft_aes_sandybridge_asm
|
||||||
PUBLIC cnv2_mainloop_soft_aes_sandybridge_asm
|
PUBLIC cnv2_mainloop_soft_aes_sandybridge_asm
|
||||||
|
PUBLIC cn_fast2_mainloop_soft_aes_sandybridge_asm
|
||||||
|
PUBLIC cn_liteupx_mainloop_soft_aes_sandybridge_asm
|
||||||
|
|
||||||
ALIGN 64
|
ALIGN 64
|
||||||
cnv1_mainloop_sandybridge_asm PROC
|
cnv1_mainloop_sandybridge_asm PROC
|
||||||
|
@ -54,6 +61,36 @@ cnv2_double_mainloop_sandybridge_asm PROC
|
||||||
ret 0
|
ret 0
|
||||||
cnv2_double_mainloop_sandybridge_asm ENDP
|
cnv2_double_mainloop_sandybridge_asm ENDP
|
||||||
|
|
||||||
|
ALIGN 64
|
||||||
|
cn_fast2_mainloop_ivybridge_asm PROC
|
||||||
|
INCLUDE cn_fast2_main_loop_ivybridge.inc
|
||||||
|
ret 0
|
||||||
|
cn_fast2_mainloop_ivybridge_asm ENDP
|
||||||
|
|
||||||
|
ALIGN 64
|
||||||
|
cn_fast2_mainloop_ryzen_asm PROC
|
||||||
|
INCLUDE cn_fast2_main_loop_ryzen.inc
|
||||||
|
ret 0
|
||||||
|
cn_fast2_mainloop_ryzen_asm ENDP
|
||||||
|
|
||||||
|
ALIGN 64
|
||||||
|
cn_fast2_mainloop_bulldozer_asm PROC
|
||||||
|
INCLUDE cn_fast2_main_loop_bulldozer.inc
|
||||||
|
ret 0
|
||||||
|
cn_fast2_mainloop_bulldozer_asm ENDP
|
||||||
|
|
||||||
|
ALIGN 64
|
||||||
|
cn_fast2_double_mainloop_sandybridge_asm PROC
|
||||||
|
INCLUDE cn_fast2_double_main_loop_sandybridge.inc
|
||||||
|
ret 0
|
||||||
|
cn_fast2_double_mainloop_sandybridge_asm ENDP
|
||||||
|
|
||||||
|
ALIGN 64
|
||||||
|
cn_liteupx_mainloop_sandybridge_asm PROC
|
||||||
|
INCLUDE cn_liteupx_mainloop_sandybridge.inc
|
||||||
|
ret 0
|
||||||
|
cn_liteupx_mainloop_sandybridge_asm ENDP
|
||||||
|
|
||||||
ALIGN 64
|
ALIGN 64
|
||||||
cnv1_mainloop_soft_aes_sandybridge_asm PROC
|
cnv1_mainloop_soft_aes_sandybridge_asm PROC
|
||||||
INCLUDE cnv1_mainloop_soft_aes_sandybridge.inc
|
INCLUDE cnv1_mainloop_soft_aes_sandybridge.inc
|
||||||
|
@ -78,5 +115,17 @@ cnv2_mainloop_soft_aes_sandybridge_asm PROC
|
||||||
ret 0
|
ret 0
|
||||||
cnv2_mainloop_soft_aes_sandybridge_asm ENDP
|
cnv2_mainloop_soft_aes_sandybridge_asm ENDP
|
||||||
|
|
||||||
|
ALIGN 64
|
||||||
|
cn_fast2_mainloop_soft_aes_sandybridge_asm PROC
|
||||||
|
INCLUDE cn_fast2_mainloop_soft_aes_sandybridge.inc
|
||||||
|
ret 0
|
||||||
|
cn_fast2_mainloop_soft_aes_sandybridge_asm ENDP
|
||||||
|
|
||||||
_TEXT_CN_MAINLOOP ENDS
|
_TEXT_CN_MAINLOOP ENDS
|
||||||
END
|
END
|
||||||
|
|
||||||
|
ALIGN 64
|
||||||
|
cn_liteupx_mainloop_soft_aes_sandybridge_asm PROC
|
||||||
|
INCLUDE cn_liteupx_mainloop_soft_aes_sandybridge.inc
|
||||||
|
ret 0
|
||||||
|
cn_liteupx_mainloop_soft_aes_sandybridge_asm ENDP
|
|
@ -10,11 +10,18 @@
|
||||||
.global FN_PREFIX(cnv2_mainloop_ryzen_asm)
|
.global FN_PREFIX(cnv2_mainloop_ryzen_asm)
|
||||||
.global FN_PREFIX(cnv2_mainloop_bulldozer_asm)
|
.global FN_PREFIX(cnv2_mainloop_bulldozer_asm)
|
||||||
.global FN_PREFIX(cnv2_double_mainloop_sandybridge_asm)
|
.global FN_PREFIX(cnv2_double_mainloop_sandybridge_asm)
|
||||||
|
.global FN_PREFIX(cn_fast2_mainloop_ivybridge_asm)
|
||||||
|
.global FN_PREFIX(cn_fast2_mainloop_ryzen_asm)
|
||||||
|
.global FN_PREFIX(cn_fast2_mainloop_bulldozer_asm)
|
||||||
|
.global FN_PREFIX(cn_fast2_double_mainloop_sandybridge_asm)
|
||||||
|
.global FN_PREFIX(cn_liteupx_mainloop_sandybridge_asm)
|
||||||
|
|
||||||
.global FN_PREFIX(cnv1_mainloop_soft_aes_sandybridge_asm)
|
.global FN_PREFIX(cnv1_mainloop_soft_aes_sandybridge_asm)
|
||||||
.global FN_PREFIX(cn_litev1_mainloop_soft_aes_sandybridge_asm)
|
.global FN_PREFIX(cn_litev1_mainloop_soft_aes_sandybridge_asm)
|
||||||
.global FN_PREFIX(cn_fast_mainloop_soft_aes_sandybridge_asm)
|
.global FN_PREFIX(cn_fast_mainloop_soft_aes_sandybridge_asm)
|
||||||
.global FN_PREFIX(cnv2_mainloop_soft_aes_sandybridge_asm)
|
.global FN_PREFIX(cnv2_mainloop_soft_aes_sandybridge_asm)
|
||||||
|
.global FN_PREFIX(cn_fast2_mainloop_soft_aes_sandybridge_asm)
|
||||||
|
.global FN_PREFIX(cn_liteupx_mainloop_soft_aes_sandybridge_asm)
|
||||||
|
|
||||||
ALIGN 64
|
ALIGN 64
|
||||||
FN_PREFIX(cnv1_mainloop_sandybridge_asm):
|
FN_PREFIX(cnv1_mainloop_sandybridge_asm):
|
||||||
|
@ -51,6 +58,31 @@ FN_PREFIX(cnv2_double_mainloop_sandybridge_asm):
|
||||||
#include "../cnv2_double_main_loop_sandybridge.inc"
|
#include "../cnv2_double_main_loop_sandybridge.inc"
|
||||||
ret 0
|
ret 0
|
||||||
|
|
||||||
|
ALIGN 64
|
||||||
|
FN_PREFIX(cn_fast2_mainloop_ivybridge_asm):
|
||||||
|
#include "../cn_fast2_main_loop_ivybridge.inc"
|
||||||
|
ret 0
|
||||||
|
|
||||||
|
ALIGN 64
|
||||||
|
FN_PREFIX(cn_fast2_mainloop_ryzen_asm):
|
||||||
|
#include "../cn_fast2_main_loop_ryzen.inc"
|
||||||
|
ret 0
|
||||||
|
|
||||||
|
ALIGN 64
|
||||||
|
FN_PREFIX(cn_fast2_mainloop_bulldozer_asm):
|
||||||
|
#include "../cn_fast2_main_loop_bulldozer.inc"
|
||||||
|
ret 0
|
||||||
|
|
||||||
|
ALIGN 64
|
||||||
|
FN_PREFIX(cn_fast2_double_mainloop_sandybridge_asm):
|
||||||
|
#include "../cn_fast2_double_main_loop_sandybridge.inc"
|
||||||
|
ret 0
|
||||||
|
|
||||||
|
ALIGN 64
|
||||||
|
FN_PREFIX(cn_liteupx_mainloop_sandybridge_asm):
|
||||||
|
#include "../cn_liteupx_mainloop_sandybridge.inc"
|
||||||
|
ret 0
|
||||||
|
|
||||||
ALIGN 64
|
ALIGN 64
|
||||||
FN_PREFIX(cnv1_mainloop_soft_aes_sandybridge_asm):
|
FN_PREFIX(cnv1_mainloop_soft_aes_sandybridge_asm):
|
||||||
#include "../cnv1_mainloop_soft_aes_sandybridge.inc"
|
#include "../cnv1_mainloop_soft_aes_sandybridge.inc"
|
||||||
|
@ -70,3 +102,14 @@ ALIGN 64
|
||||||
FN_PREFIX(cnv2_mainloop_soft_aes_sandybridge_asm):
|
FN_PREFIX(cnv2_mainloop_soft_aes_sandybridge_asm):
|
||||||
#include "../cnv2_mainloop_soft_aes_sandybridge.inc"
|
#include "../cnv2_mainloop_soft_aes_sandybridge.inc"
|
||||||
ret 0
|
ret 0
|
||||||
|
|
||||||
|
ALIGN 64
|
||||||
|
FN_PREFIX(cn_fast2_mainloop_soft_aes_sandybridge_asm):
|
||||||
|
#include "../cn_fast2_mainloop_soft_aes_sandybridge.inc"
|
||||||
|
ret 0
|
||||||
|
|
||||||
|
|
||||||
|
ALIGN 64
|
||||||
|
FN_PREFIX(cn_liteupx_mainloop_soft_aes_sandybridge_asm):
|
||||||
|
#include "../cn_liteupx_mainloop_soft_aes_sandybridge.inc"
|
||||||
|
ret 0
|
|
@ -4,7 +4,7 @@
|
||||||
"threads": 0, // number of miner threads (not set or 0 enables automatic selection of optimal thread count)
|
"threads": 0, // number of miner threads (not set or 0 enables automatic selection of optimal thread count)
|
||||||
"multihash-factor": 0, // number of hash blocks to process at a time (not set or 0 enables automatic selection of optimal number of hash blocks)
|
"multihash-factor": 0, // number of hash blocks to process at a time (not set or 0 enables automatic selection of optimal number of hash blocks)
|
||||||
"multihash-thread-mask" : null, // for multihash-factors>0 only, limits multihash to given threads (mask), mask "0x3" means run multihash on thread 0 and 1 only (default: all threads)
|
"multihash-thread-mask" : null, // for multihash-factors>0 only, limits multihash to given threads (mask), mask "0x3" means run multihash on thread 0 and 1 only (default: all threads)
|
||||||
"pow-variant" : "auto", // specificy the PoW variat to use: -> auto (default), 0 (v0), 1 (v1, aka monerov7, aeonv7), 2 (v2, aka monerov8), tube (ipbc), alloy (xao), xtl (including autodetect for v5), msr, xhv, rto, xfh
|
"pow-variant" : "auto", // specificy the PoW variat to use: -> auto (default), 0 (v0), 1 (v1, aka monerov7, aeonv7), 2 (v2, aka monerov8), tube (ipbc), alloy (xao), xtl (including autodetect for > v5), msr, xhv, rto, xfh, upx
|
||||||
// for further help see: https://github.com/Bendr0id/xmrigCC/wiki/Coin-configurations
|
// for further help see: https://github.com/Bendr0id/xmrigCC/wiki/Coin-configurations
|
||||||
"asm-optimization" : "auto", // specificy the ASM optimization to use: -> auto (default), intel, ryzen, bulldozer, off
|
"asm-optimization" : "auto", // specificy the ASM optimization to use: -> auto (default), intel, ryzen, bulldozer, off
|
||||||
"background": false, // true to run the miner in the background (Windows only, for *nix plase use screen/tmux or systemd service instead)
|
"background": false, // true to run the miner in the background (Windows only, for *nix plase use screen/tmux or systemd service instead)
|
||||||
|
@ -20,6 +20,7 @@
|
||||||
"safe": false, // true to safe adjust threads and av settings for current CPU
|
"safe": false, // true to safe adjust threads and av settings for current CPU
|
||||||
"syslog": false, // use system log for output messages
|
"syslog": false, // use system log for output messages
|
||||||
"reboot-cmd" : "", // command to execute to reboot the OS
|
"reboot-cmd" : "", // command to execute to reboot the OS
|
||||||
|
"force-pow-variant" : false, // force pow variant, dont parse pow/variant from pool job
|
||||||
"pools": [
|
"pools": [
|
||||||
{
|
{
|
||||||
"url": "donate2.graef.in:80", // URL of mining server
|
"url": "donate2.graef.in:80", // URL of mining server
|
||||||
|
|
|
@ -231,16 +231,17 @@ bool Client::parseJob(const rapidjson::Value ¶ms, int *code)
|
||||||
|
|
||||||
PowVariant powVariant = Options::i()->powVariant();
|
PowVariant powVariant = Options::i()->powVariant();
|
||||||
|
|
||||||
|
if (!Options::i()->forcePowVariant()) {
|
||||||
if (params.HasMember("algo")) {
|
if (params.HasMember("algo")) {
|
||||||
std::string algo = params["algo"].GetString();
|
std::string algo = params["algo"].GetString();
|
||||||
|
|
||||||
if (algo.find("/") != std::string::npos) {
|
if (algo.find("/") != std::string::npos) {
|
||||||
powVariant = parseVariant(algo.substr(algo.find("/")+1));
|
powVariant = parseVariant(algo.substr(algo.find("/") + 1));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (params.HasMember("variant")) {
|
if (params.HasMember("variant")) {
|
||||||
const rapidjson::Value &variant = params["variant"];
|
const rapidjson::Value& variant = params["variant"];
|
||||||
|
|
||||||
PowVariant parsedVariant = powVariant;
|
PowVariant parsedVariant = powVariant;
|
||||||
|
|
||||||
|
@ -254,6 +255,7 @@ bool Client::parseJob(const rapidjson::Value ¶ms, int *code)
|
||||||
powVariant = parsedVariant;
|
powVariant = parsedVariant;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
job.setPowVariant(powVariant);
|
job.setPowVariant(powVariant);
|
||||||
|
|
||||||
|
|
|
@ -146,6 +146,10 @@ PowVariant Job::powVariant() const
|
||||||
} else {
|
} else {
|
||||||
return PowVariant::POW_V0;
|
return PowVariant::POW_V0;
|
||||||
}
|
}
|
||||||
|
} else if (m_powVariant == PowVariant::POW_XTL) {
|
||||||
|
if (m_blob[0] > 5) {
|
||||||
|
return PowVariant::POW_XTL_V9;
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
return m_powVariant;
|
return m_powVariant;
|
||||||
}
|
}
|
||||||
|
|
|
@ -36,14 +36,14 @@
|
||||||
#define APP_DESC "XMRigCC CPU miner"
|
#define APP_DESC "XMRigCC CPU miner"
|
||||||
#define APP_COPYRIGHT "Copyright (C) 2017- BenDr0id"
|
#define APP_COPYRIGHT "Copyright (C) 2017- BenDr0id"
|
||||||
#endif
|
#endif
|
||||||
#define APP_VERSION "1.8.7 (based on XMRig)"
|
#define APP_VERSION "1.8.8 (based on XMRig)"
|
||||||
#define APP_DOMAIN ""
|
#define APP_DOMAIN ""
|
||||||
#define APP_SITE "https://github.com/Bendr0id/xmrigCC"
|
#define APP_SITE "https://github.com/Bendr0id/xmrigCC"
|
||||||
#define APP_KIND "cpu"
|
#define APP_KIND "cpu"
|
||||||
|
|
||||||
#define APP_VER_MAJOR 1
|
#define APP_VER_MAJOR 1
|
||||||
#define APP_VER_MINOR 8
|
#define APP_VER_MINOR 8
|
||||||
#define APP_VER_BUILD 7
|
#define APP_VER_BUILD 8
|
||||||
#define APP_VER_REV 0
|
#define APP_VER_REV 0
|
||||||
|
|
||||||
#ifndef NDEBUG
|
#ifndef NDEBUG
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue