Conversion to NinjaRig.

This commit is contained in:
Haifa Bogdan Adnan 2019-08-26 12:38:34 +03:00
parent 84f56f0a4e
commit 2845347881
280 changed files with 18971 additions and 32469 deletions

View file

@ -0,0 +1,16 @@
//
// Created by Haifa Bogdan Adnan on 04.11.2018.
//
#ifndef ARGON2_DLLEXPORT_H
#define ARGON2_DLLEXPORT_H
#undef DLLEXPORT
#ifndef _WIN64
#define DLLEXPORT
#else
#define DLLEXPORT __declspec(dllexport)
#endif
#endif //ARGON2_DLLEXPORT_H

View file

@ -0,0 +1,16 @@
//
// Created by Haifa Bogdan Adnan on 04.11.2018.
//
#ifndef ARGON2_DLLIMPORT_H
#define ARGON2_DLLIMPORT_H
#ifndef DLLEXPORT
#ifndef _WIN64
#define DLLEXPORT
#else
#define DLLEXPORT __declspec(dllimport)
#endif
#endif
#endif //ARGON2_DLLIMPORT_H

View file

@ -0,0 +1,21 @@
//
// Created by Haifa Bogdan Adnan on 05/08/2018.
//
#include "DLLExport.h"
#include "common.h"
#include <dirent.h>
vector<string> getFiles(const string &folder) {
vector<string> result;
DIR *dir;
struct dirent *ent;
if ((dir = opendir (folder.c_str())) != NULL) {
while ((ent = readdir (dir)) != NULL) {
if(ent->d_type == DT_REG)
result.push_back(ent->d_name);
}
closedir (dir);
}
return result;
}

View file

@ -0,0 +1,56 @@
//
// Created by Haifa Bogdan Adnan on 04/08/2018.
//
#ifndef ARGON2_COMMON_H
#define ARGON2_COMMON_H
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <string>
#include <vector>
#include <queue>
#include <list>
#include <map>
#include <iostream>
#include <sstream>
#include <fstream>
#include <iomanip>
#include <regex>
#include <random>
#include <algorithm>
#include <thread>
#include <mutex>
#include <chrono>
#include <cmath>
#include <signal.h>
#include <dlfcn.h>
#include "DLLImport.h"
#ifndef _WIN64
#include <unistd.h>
#include <sys/time.h>
#include<sys/socket.h>
#include<netdb.h>
#include<arpa/inet.h>
#include <fcntl.h>
#else
#include <win64.h>
#endif
#ifdef __APPLE__
#include "../macosx/cpu_affinity.h"
#endif
using namespace std;
#define LOG(msg) cout<<msg<<endl<<flush
DLLEXPORT vector<string> getFiles(const string &folder);
#endif //ARGON2_COMMON_H

View file

@ -0,0 +1,103 @@
//
// Created by Haifa Bogdan Adnan on 17/08/2018.
//
#include "crypto/argon2_hasher/common/DLLExport.h"
#include "../common/common.h"
#include "base64.h"
static const string base64_chars =
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz"
"0123456789+/";
static inline bool is_base64(unsigned char c) {
return (isalnum(c) || (c == '+') || (c == '/'));
}
void base64::encode(const char *input, int input_size, char *output) {
char *ret = output;
int i = 0;
int j = 0;
unsigned char char_array_3[3];
unsigned char char_array_4[4];
while (input_size--) {
char_array_3[i++] = *(input++);
if (i == 3) {
char_array_4[0] = (char_array_3[0] & 0xfc) >> 2;
char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4);
char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6);
char_array_4[3] = char_array_3[2] & 0x3f;
for(i = 0; (i <4) ; i++)
*(ret++) = base64_chars[char_array_4[i]];
i = 0;
}
}
if (i)
{
for(j = i; j < 3; j++)
char_array_3[j] = '\0';
char_array_4[0] = (char_array_3[0] & 0xfc) >> 2;
char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4);
char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6);
char_array_4[3] = char_array_3[2] & 0x3f;
for (j = 0; (j < i + 1); j++)
*(ret++) = base64_chars[char_array_4[j]];
while((i++ < 3))
*(ret++) = '=';
}
}
int base64::decode(const char *input, char *output, int output_size) {
size_t in_len = strlen(input);
int i = 0;
int j = 0;
int in_ = 0;
unsigned char char_array_4[4], char_array_3[3];
char *ret = output;
int out_size = 0;
while (in_len-- && ( input[in_] != '=') && is_base64(input[in_])) {
char_array_4[i++] = input[in_]; in_++;
if (i ==4) {
for (i = 0; i <4; i++)
char_array_4[i] = base64_chars.find(char_array_4[i]);
char_array_3[0] = ( char_array_4[0] << 2 ) + ((char_array_4[1] & 0x30) >> 4);
char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
for (i = 0; (i < 3); i++) {
out_size ++;
if(output_size < out_size)
return -1;
*(ret++) = char_array_3[i];
}
i = 0;
}
}
if (i) {
for (j = 0; j < i; j++)
char_array_4[j] = base64_chars.find(char_array_4[j]);
char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
for (j = 0; (j < i - 1); j++) {
out_size ++;
if(output_size < out_size)
return -1;
*(ret++) = char_array_3[j];
}
}
return out_size;
}

View file

@ -0,0 +1,14 @@
//
// Created by Haifa Bogdan Adnan on 17/08/2018.
//
#ifndef ARGON2_BASE64_H
#define ARGON2_BASE64_H
class DLLEXPORT base64 {
public:
static void encode(const char *input, int input_size, char *output);
static int decode(const char *input, char *output, int output_size);
};
#endif //ARGON2_BASE64_H

View file

@ -0,0 +1,30 @@
//
// Created by Haifa Bogdan Adnan on 30/05/2019.
//
#include "crypto/argon2_hasher/common/DLLExport.h"
#include "../common/common.h"
#include "hex.h"
void hex::encode(const unsigned char *input, int input_size, char *output) {
for ( int i=0; i<input_size; i++ ) {
char b1= *input >> 4; // hi nybble
char b2= *input & 0x0f; // lo nybble
b1+='0'; if (b1>'9') b1 += 7; // gap between '9' and 'A'
b2+='0'; if (b2>'9') b2 += 7;
*(output++)= b1;
*(output++) = b2;
input++;
}
*output = 0;
}
int hex::decode(const char *input, unsigned char *output, int output_size) {
size_t in_len = strlen(input);
for ( int i=0; i<in_len; i+=2 ) {
unsigned char b1= input[i] -'0'; if (b1>9) b1 -= 7;
unsigned char b2= input[i+1] -'0'; if (b2>9) b2 -= 7;
*(output++) = (b1<<4) + b2; // <<4 multiplies by 16
}
return in_len / 2;
}

View file

@ -0,0 +1,14 @@
//
// Created by Haifa Bogdan Adnan on 30/05/2019.
//
#ifndef ARGON2_HEX_H
#define ARGON2_HEX_H
class DLLEXPORT hex {
public:
static void encode(const unsigned char *input, int input_size, char *output);
static int decode(const char *input, unsigned char *output, int output_size);
};
#endif //ARGON2_HEX_H

View file

@ -0,0 +1,27 @@
//
// Created by Haifa Bogdan Adnan on 17/08/2018.
//
#include "crypto/argon2_hasher/common/DLLExport.h"
#include "../common/common.h"
#include "random_generator.h"
random_generator::random_generator() : __mt19937Gen(__randomDevice()), __mt19937Distr(0, 255) {
}
random_generator &random_generator::instance() {
return __instance;
}
void random_generator::get_random_data(unsigned char *buffer, int length) {
// __thread_lock.lock();
for(int i=0;i<length;i++) {
buffer[i] = (unsigned char)__mt19937Distr(__mt19937Gen);
}
// __thread_lock.unlock();
}
random_generator random_generator::__instance;

View file

@ -0,0 +1,24 @@
//
// Created by Haifa Bogdan Adnan on 17/08/2018.
//
#ifndef ARGON2_RANDOM_GENERATOR_H
#define ARGON2_RANDOM_GENERATOR_H
class DLLEXPORT random_generator {
public:
random_generator();
static random_generator &instance();
void get_random_data(unsigned char *buffer, int length);
private:
random_device __randomDevice;
mt19937 __mt19937Gen;
uniform_int_distribution<> __mt19937Distr;
mutex __thread_lock;
static random_generator __instance;
};
#endif //ARGON2_RANDOM_GENERATOR_H

View file

@ -0,0 +1,152 @@
#include "crypto/argon2_hasher/common/DLLExport.h"
#include <cstring>
#include <fstream>
#include "sha512.h"
const unsigned long long SHA512::sha512_k[80] = //ULL = uint64
{0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL,
0xb5c0fbcfec4d3b2fULL, 0xe9b5dba58189dbbcULL,
0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL,
0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL,
0xd807aa98a3030242ULL, 0x12835b0145706fbeULL,
0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL,
0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL,
0x9bdc06a725c71235ULL, 0xc19bf174cf692694ULL,
0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL,
0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL,
0x2de92c6f592b0275ULL, 0x4a7484aa6ea6e483ULL,
0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL,
0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL,
0xb00327c898fb213fULL, 0xbf597fc7beef0ee4ULL,
0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL,
0x06ca6351e003826fULL, 0x142929670a0e6e70ULL,
0x27b70a8546d22ffcULL, 0x2e1b21385c26c926ULL,
0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL,
0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL,
0x81c2c92e47edaee6ULL, 0x92722c851482353bULL,
0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL,
0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL,
0xd192e819d6ef5218ULL, 0xd69906245565a910ULL,
0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL,
0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL,
0x2748774cdf8eeb99ULL, 0x34b0bcb5e19b48a8ULL,
0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL,
0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL,
0x748f82ee5defb2fcULL, 0x78a5636f43172f60ULL,
0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL,
0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL,
0xbef9a3f7b2c67915ULL, 0xc67178f2e372532bULL,
0xca273eceea26619cULL, 0xd186b8c721c0c207ULL,
0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL,
0x06f067aa72176fbaULL, 0x0a637dc5a2c898a6ULL,
0x113f9804bef90daeULL, 0x1b710b35131c471bULL,
0x28db77f523047d84ULL, 0x32caab7b40c72493ULL,
0x3c9ebe0a15c9bebcULL, 0x431d67c49c100d4cULL,
0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL,
0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL};
void SHA512::transform(const unsigned char *message, unsigned int block_nb)
{
uint64 w[80];
uint64 wv[8];
uint64 t1, t2;
const unsigned char *sub_block;
int i, j;
for (i = 0; i < (int) block_nb; i++) {
sub_block = message + (i << 7);
for (j = 0; j < 16; j++) {
SHA2_PACK64(&sub_block[j << 3], &w[j]);
}
for (j = 16; j < 80; j++) {
w[j] = SHA512_F4(w[j - 2]) + w[j - 7] + SHA512_F3(w[j - 15]) + w[j - 16];
}
for (j = 0; j < 8; j++) {
wv[j] = m_h[j];
}
for (j = 0; j < 80; j++) {
t1 = wv[7] + SHA512_F2(wv[4]) + SHA2_CH(wv[4], wv[5], wv[6])
+ sha512_k[j] + w[j];
t2 = SHA512_F1(wv[0]) + SHA2_MAJ(wv[0], wv[1], wv[2]);
wv[7] = wv[6];
wv[6] = wv[5];
wv[5] = wv[4];
wv[4] = wv[3] + t1;
wv[3] = wv[2];
wv[2] = wv[1];
wv[1] = wv[0];
wv[0] = t1 + t2;
}
for (j = 0; j < 8; j++) {
m_h[j] += wv[j];
}
}
}
void SHA512::init()
{
m_h[0] = 0x6a09e667f3bcc908ULL;
m_h[1] = 0xbb67ae8584caa73bULL;
m_h[2] = 0x3c6ef372fe94f82bULL;
m_h[3] = 0xa54ff53a5f1d36f1ULL;
m_h[4] = 0x510e527fade682d1ULL;
m_h[5] = 0x9b05688c2b3e6c1fULL;
m_h[6] = 0x1f83d9abfb41bd6bULL;
m_h[7] = 0x5be0cd19137e2179ULL;
m_len = 0;
m_tot_len = 0;
}
void SHA512::update(const unsigned char *message, unsigned int len)
{
unsigned int block_nb;
unsigned int new_len, rem_len, tmp_len;
const unsigned char *shifted_message;
tmp_len = SHA384_512_BLOCK_SIZE - m_len;
rem_len = len < tmp_len ? len : tmp_len;
memcpy(&m_block[m_len], message, rem_len);
if (m_len + len < SHA384_512_BLOCK_SIZE) {
m_len += len;
return;
}
new_len = len - rem_len;
block_nb = new_len / SHA384_512_BLOCK_SIZE;
shifted_message = message + rem_len;
transform(m_block, 1);
transform(shifted_message, block_nb);
rem_len = new_len % SHA384_512_BLOCK_SIZE;
memcpy(m_block, &shifted_message[block_nb << 7], rem_len);
m_len = rem_len;
m_tot_len += (block_nb + 1) << 7;
}
void SHA512::final(unsigned char *digest)
{
unsigned int block_nb;
unsigned int pm_len;
unsigned int len_b;
int i;
block_nb = 1 + ((SHA384_512_BLOCK_SIZE - 17)
< (m_len % SHA384_512_BLOCK_SIZE));
len_b = (m_tot_len + m_len) << 3;
pm_len = block_nb << 7;
memset(m_block + m_len, 0, pm_len - m_len);
m_block[m_len] = 0x80;
SHA2_UNPACK32(len_b, m_block + pm_len - 4);
transform(m_block, block_nb);
for (i = 0 ; i < 8; i++) {
SHA2_UNPACK64(m_h[i], &digest[i << 3]);
}
}
unsigned char *SHA512::hash(unsigned char *input, size_t length)
{
unsigned char *digest = (unsigned char*)malloc(SHA512::DIGEST_SIZE);
memset(digest,0,SHA512::DIGEST_SIZE);
SHA512 ctx = SHA512();
ctx.init();
ctx.update(input, length);
ctx.final(digest);
return digest;
}

View file

@ -0,0 +1,70 @@
#ifndef SHA512_H
#define SHA512_H
#include <string>
class DLLEXPORT SHA512
{
protected:
typedef unsigned char uint8;
typedef unsigned int uint32;
typedef unsigned long long uint64;
const static uint64 sha512_k[];
static const unsigned int SHA384_512_BLOCK_SIZE = (1024/8);
public:
void init();
void update(const unsigned char *message, unsigned int len);
void final(unsigned char *digest);
static const unsigned int DIGEST_SIZE = ( 512 / 8);
static unsigned char *hash(unsigned char *input, size_t length);
protected:
void transform(const unsigned char *message, unsigned int block_nb);
unsigned int m_tot_len;
unsigned int m_len;
unsigned char m_block[2 * SHA384_512_BLOCK_SIZE];
uint64 m_h[8];
};
#define SHA2_SHFR(x, n) (x >> n)
#define SHA2_ROTR(x, n) ((x >> n) | (x << ((sizeof(x) << 3) - n)))
#define SHA2_ROTL(x, n) ((x << n) | (x >> ((sizeof(x) << 3) - n)))
#define SHA2_CH(x, y, z) ((x & y) ^ (~x & z))
#define SHA2_MAJ(x, y, z) ((x & y) ^ (x & z) ^ (y & z))
#define SHA512_F1(x) (SHA2_ROTR(x, 28) ^ SHA2_ROTR(x, 34) ^ SHA2_ROTR(x, 39))
#define SHA512_F2(x) (SHA2_ROTR(x, 14) ^ SHA2_ROTR(x, 18) ^ SHA2_ROTR(x, 41))
#define SHA512_F3(x) (SHA2_ROTR(x, 1) ^ SHA2_ROTR(x, 8) ^ SHA2_SHFR(x, 7))
#define SHA512_F4(x) (SHA2_ROTR(x, 19) ^ SHA2_ROTR(x, 61) ^ SHA2_SHFR(x, 6))
#define SHA2_UNPACK32(x, str) \
{ \
*((str) + 3) = (uint8) ((x) ); \
*((str) + 2) = (uint8) ((x) >> 8); \
*((str) + 1) = (uint8) ((x) >> 16); \
*((str) + 0) = (uint8) ((x) >> 24); \
}
#define SHA2_UNPACK64(x, str) \
{ \
*((str) + 7) = (uint8) ((x) ); \
*((str) + 6) = (uint8) ((x) >> 8); \
*((str) + 5) = (uint8) ((x) >> 16); \
*((str) + 4) = (uint8) ((x) >> 24); \
*((str) + 3) = (uint8) ((x) >> 32); \
*((str) + 2) = (uint8) ((x) >> 40); \
*((str) + 1) = (uint8) ((x) >> 48); \
*((str) + 0) = (uint8) ((x) >> 56); \
}
#define SHA2_PACK64(str, x) \
{ \
*(x) = ((uint64) *((str) + 7) ) \
| ((uint64) *((str) + 6) << 8) \
| ((uint64) *((str) + 5) << 16) \
| ((uint64) *((str) + 4) << 24) \
| ((uint64) *((str) + 3) << 32) \
| ((uint64) *((str) + 2) << 40) \
| ((uint64) *((str) + 1) << 48) \
| ((uint64) *((str) + 0) << 56); \
}
#endif

View file

@ -0,0 +1,132 @@
//
// Created by Haifa Bogdan Adnan on 03/08/2018.
//
#include "../common/common.h"
#include "../crypt/base64.h"
#include "../crypt/hex.h"
#include "../crypt/random_generator.h"
#include "crypto/argon2_hasher/common/DLLExport.h"
#include "crypto/argon2_hasher/hash/argon2/Argon2.h"
#include "Hasher.h"
vector<Hasher *> *Hasher::m_registeredHashers = NULL;
string Hasher::m_appFolder = "";
typedef void (*hasherLoader)();
Hasher::Hasher() {
m_intensity = 0;
m_type = "";
m_subType = "";
m_shortSubType = "";
m_description = "";
m_computingThreads = 1;
if(m_registeredHashers == NULL) {
m_registeredHashers = new vector<Hasher*>();
}
m_registeredHashers->push_back(this);
}
Hasher::~Hasher() {};
string Hasher::type() {
return m_type;
}
string Hasher::subType(bool shortName) {
if(shortName && !(m_shortSubType.empty())) {
string shortVersion = m_shortSubType;
shortVersion.erase(3);
return shortVersion;
}
else
return m_subType;
}
string Hasher::info() {
return m_description;
}
int Hasher::computingThreads() {
return m_computingThreads;
}
void Hasher::loadHashers(const string &appPath) {
m_registeredHashers = new vector<Hasher*>();
string modulePath = ".";
size_t lastSlash = appPath.find_last_of("/\\");
if (lastSlash != string::npos) {
modulePath = appPath.substr(0, lastSlash);
if(modulePath.empty()) {
modulePath = ".";
}
}
m_appFolder = modulePath;
modulePath += "/modules/";
vector<string> files = getFiles(modulePath);
for(string file : files) {
if(file.find(".hsh") != string::npos) {
void *dllHandle = dlopen((modulePath + file).c_str(), RTLD_LAZY);
if(dllHandle != NULL) {
hasherLoader hasherLoaderPtr = (hasherLoader) dlsym(dllHandle, "hasherLoader");
(*hasherLoaderPtr)();
}
}
}
}
vector<Hasher *> Hasher::getHashers() {
return *m_registeredHashers;
}
vector<Hasher *> Hasher::getActiveHashers() {
vector<Hasher *> filtered;
for(Hasher *hasher : *m_registeredHashers) {
if(hasher->m_intensity != 0)
filtered.push_back(hasher);
}
return filtered;
}
vector<Hasher *> Hasher::getHashers(const string &type) {
vector<Hasher *> filtered;
for(Hasher *hasher : *m_registeredHashers) {
if(hasher->m_type == type)
filtered.push_back(hasher);
}
return filtered;
}
map<int, DeviceInfo> &Hasher::devices() {
return m_deviceInfos;
}
void Hasher::storeDeviceInfo(int deviceId, DeviceInfo device) {
m_deviceInfosMutex.lock();
m_deviceInfos[deviceId] = device;
m_deviceInfosMutex.unlock();
}
Argon2Profile *Hasher::getArgon2Profile(xmrig::Algo algorithm, xmrig::Variant variant) {
if(algorithm == xmrig::ARGON2) {
switch(variant) {
case xmrig::VARIANT_CHUKWA:
return &argon2profile_3_1_512;
case xmrig::VARIANT_CHUKWA_LITE:
return &argon2profile_4_1_256;
default:
return nullptr;
}
}
return nullptr;
}

View file

@ -0,0 +1,63 @@
//
// Created by Haifa Bogdan Adnan on 03/08/2018.
//
#ifndef ARGON2_HASHER_H
#define ARGON2_HASHER_H
#include "crypto/argon2_hasher/hash/argon2/Defs.h"
#include "../../../core/HasherConfig.h"
#include "../../../common/xmrig.h"
struct DeviceInfo {
string name;
string bus_id;
double intensity;
};
#define REGISTER_HASHER(x) extern "C" { DLLEXPORT void hasherLoader() { x *instance = new x(); } }
class DLLEXPORT Hasher {
public:
Hasher();
virtual ~Hasher();
virtual bool initialize(xmrig::Algo algorithm, xmrig::Variant variant) = 0;
virtual bool configure(xmrig::HasherConfig &config) = 0;
virtual void cleanup() = 0;
virtual int compute(int threadIdx, uint8_t *input, size_t size, uint8_t *output) = 0;
virtual size_t parallelism(int workerIdx) = 0;
virtual size_t deviceCount() = 0;
string type();
string subType(bool shortName = false);
string info();
int computingThreads();
map<int, DeviceInfo> &devices();
static vector<Hasher*> getHashers(const string &type);
static vector<Hasher*> getHashers();
static vector<Hasher*> getActiveHashers();
static void loadHashers(const string &appPath);
protected:
double m_intensity;
string m_type;
string m_subType;
string m_shortSubType; //max 3 characters
string m_description;
int m_computingThreads;
static string m_appFolder;
void storeDeviceInfo(int deviceId, DeviceInfo device);
Argon2Profile *getArgon2Profile(xmrig::Algo algorithm, xmrig::Variant variant);
private:
static vector<Hasher*> *m_registeredHashers;
map<int, DeviceInfo> m_deviceInfos;
mutex m_deviceInfosMutex;
};
#endif //ARGON2_HASHER_H

View file

@ -0,0 +1,143 @@
//
// Created by Haifa Bogdan Adnan on 05/08/2018.
//
#include "../../common/common.h"
#include "../../crypt/base64.h"
#include "../../crypt/hex.h"
#include "../../crypt/random_generator.h"
#include "blake2/blake2.h"
#include "../../common/DLLExport.h"
#include "../../../Argon2_constants.h"
#include "Argon2.h"
#include "Defs.h"
Argon2::Argon2(argon2BlocksPrehash prehash, argon2BlocksFillerPtr filler, argon2BlocksPosthash posthash, void *memory, void *userData) {
m_prehash = prehash;
m_filler = filler;
m_posthash = posthash;
m_outputMemory = m_seedMemory = (uint8_t*)memory;
m_userData = userData;
m_threads = 1;
}
int Argon2::generateHashes(const Argon2Profile &profile, HashData &hashData) {
if(initializeSeeds(profile, hashData)) {
if(fillBlocks(profile)) {
return encodeHashes(profile, hashData);
}
}
return 0;
}
bool Argon2::initializeSeeds(const Argon2Profile &profile, HashData &hashData) {
if(m_prehash != NULL) {
return (*m_prehash)(hashData.input, m_threads, (Argon2Profile*)&profile, m_userData);
}
else {
uint8_t blockhash[ARGON2_PREHASH_SEED_LENGTH];
for (int i = 0; i < m_threads; i++, (*(nonce(hashData)))++) {
initialHash(profile, blockhash, (char *) hashData.input, hashData.inSize, xmrig::ARGON2_HASHLEN);
memset(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, 0,
ARGON2_PREHASH_SEED_LENGTH -
ARGON2_PREHASH_DIGEST_LENGTH);
fillFirstBlocks(profile, blockhash, i);
}
return true;
}
}
bool Argon2::fillBlocks(const Argon2Profile &profile) {
m_outputMemory = (uint8_t *)(*m_filler) (m_threads, (Argon2Profile*)&profile, m_userData);
return m_outputMemory != NULL;
}
int Argon2::encodeHashes(const Argon2Profile &profile, HashData &hashData) {
if(m_posthash != NULL) {
if((*m_posthash)(hashData.output, m_threads, (Argon2Profile*)&profile, m_userData)) {
return m_threads;
}
return 0;
}
else {
if (m_outputMemory != NULL) {
uint32_t nonceInfo = *(nonce(hashData)) - m_threads;
for (int i = 0; i < m_threads; i++, nonceInfo++) {
blake2b_long((void *) (hashData.output + i * hashData.outSize), xmrig::ARGON2_HASHLEN,
(void *) (m_outputMemory + i * profile.memSize), ARGON2_BLOCK_SIZE);
memcpy(hashData.output + i * hashData.outSize + xmrig::ARGON2_HASHLEN, &nonceInfo, 4);
}
return m_threads;
}
else
return 0;
}
}
void Argon2::initialHash(const Argon2Profile &profile, uint8_t *blockhash, const char *data, size_t dataSz,size_t outSz) {
blake2b_state BlakeHash;
uint32_t value;
blake2b_init(&BlakeHash, ARGON2_PREHASH_DIGEST_LENGTH);
value = profile.thrCost;
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
value = outSz;
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
value = profile.memCost;
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
value = profile.tmCost;
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
value = ARGON2_VERSION;
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
value = ARGON2_TYPE_VALUE;
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
value = (uint32_t)dataSz;
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
blake2b_update(&BlakeHash, (const uint8_t *)data, dataSz);
value = xmrig::ARGON2_SALTLEN;
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
blake2b_update(&BlakeHash, (const uint8_t *)data, xmrig::ARGON2_SALTLEN);
value = 0;
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
blake2b_final(&BlakeHash, blockhash, ARGON2_PREHASH_DIGEST_LENGTH);
}
void Argon2::fillFirstBlocks(const Argon2Profile &profile, uint8_t *blockhash, int thread) {
block *blocks = (block *)(m_seedMemory + thread * profile.memSize);
size_t lane_length = profile.memCost / profile.thrCost;
for (uint32_t l = 0; l < profile.thrCost; ++l) {
*((uint32_t*)(blockhash + ARGON2_PREHASH_DIGEST_LENGTH)) = 0;
*((uint32_t*)(blockhash + ARGON2_PREHASH_DIGEST_LENGTH + 4)) = l;
blake2b_long((void *)(blocks + l * lane_length), ARGON2_BLOCK_SIZE, blockhash,
ARGON2_PREHASH_SEED_LENGTH);
*((uint32_t*)(blockhash + ARGON2_PREHASH_DIGEST_LENGTH)) = 1;
blake2b_long((void *)(blocks + l * lane_length + 1), ARGON2_BLOCK_SIZE, blockhash,
ARGON2_PREHASH_SEED_LENGTH);
}
}
void Argon2::setThreads(int threads) {
m_threads = threads;
}

View file

@ -0,0 +1,56 @@
//
// Created by Haifa Bogdan Adnan on 05/08/2018.
//
#ifndef ARIOMINER_ARGON2_H
#define ARIOMINER_ARGON2_H
#include "Defs.h"
#include "crypto/argon2_hasher/hash/Hasher.h"
typedef bool (*argon2BlocksPrehash)(void *, int, Argon2Profile *, void *); // data_memory
typedef void *(*argon2BlocksFillerPtr)(int, Argon2Profile *, void *);
typedef bool (*argon2BlocksPosthash)(void *, int, Argon2Profile *, void *); // raw_hash_mem
struct HashData {
uint8_t *input;
uint8_t *output;
size_t inSize;
size_t outSize;
};
class DLLEXPORT Argon2 {
public:
Argon2(argon2BlocksPrehash prehash, argon2BlocksFillerPtr filler, argon2BlocksPosthash posthash, void *memory, void *userData);
int generateHashes(const Argon2Profile &profile, HashData &hashData);
bool initializeSeeds(const Argon2Profile &profile, HashData &hashData);
bool fillBlocks(const Argon2Profile &profile);
int encodeHashes(const Argon2Profile &profile, HashData &hashData);
void setThreads(int threads);
private:
void initialHash(const Argon2Profile &profile, uint8_t *blockhash, const char *data, size_t dataSz, size_t outSz);
void fillFirstBlocks(const Argon2Profile &profile, uint8_t *blockhash, int thread);
inline uint32_t *nonce(HashData &hashData)
{
return reinterpret_cast<uint32_t*>(hashData.input + 39);
}
argon2BlocksPrehash m_prehash;
argon2BlocksFillerPtr m_filler;
argon2BlocksPosthash m_posthash;
int m_threads;
uint8_t *m_seedMemory;
uint8_t *m_outputMemory;
void *m_userData;
};
#endif //ARIOMINER_ARGON2_H

View file

@ -0,0 +1,50 @@
//
// Created by Haifa Bogdan Adnan on 06/08/2018.
//
#ifndef ARIOMINER_DEFS_H
#define ARIOMINER_DEFS_H
#define ARGON2_RAW_LENGTH 32
#define ARGON2_TYPE_VALUE 2
#define ARGON2_VERSION 0x13
#define ARGON2_BLOCK_SIZE 1024
#define ARGON2_DWORDS_IN_BLOCK ARGON2_BLOCK_SIZE / 4
#define ARGON2_QWORDS_IN_BLOCK ARGON2_BLOCK_SIZE / 8
#define ARGON2_OWORDS_IN_BLOCK ARGON2_BLOCK_SIZE / 16
#define ARGON2_HWORDS_IN_BLOCK ARGON2_BLOCK_SIZE / 32
#define ARGON2_512BIT_WORDS_IN_BLOCK ARGON2_BLOCK_SIZE / 64
#define ARGON2_PREHASH_DIGEST_LENGTH 64
#define ARGON2_PREHASH_SEED_LENGTH 72
#ifdef __cplusplus
extern "C" {
#endif
typedef struct block_ { uint64_t v[ARGON2_QWORDS_IN_BLOCK]; } block;
typedef struct Argon2Profile_ {
uint32_t memCost;
uint32_t thrCost;
uint32_t tmCost;
size_t memSize;
int32_t *blockRefs;
size_t blockRefsSize;
char profileName[15];
int32_t *segments; // { start segment / current block, stop segment (excluding) / previous block, addressing type = 0 -> i, 1 -> d }
uint32_t segSize;
uint32_t segCount;
uint32_t succesiveIdxs; // 0 - idx are precalculated, 1 - idx are successive
int pwdLen; // in dwords
int saltLen; // in dwords
} Argon2Profile;
extern DLLEXPORT Argon2Profile argon2profile_3_1_512;
extern DLLEXPORT Argon2Profile argon2profile_4_1_256;
#ifdef __cplusplus
}
#endif
#endif //ARIOMINER_DEFS_H

View file

@ -0,0 +1,292 @@
#include <stdint.h>
#include <stddef.h>
#include "../../common/DLLExport.h"
#include "Defs.h"
int32_t blocks_refs_3_1_512[] = {
2, 0, 1,
3, 1, 1,
4, 2, 1,
5, 3, 1,
6, 3, 1,
7, 3, 1,
8, 2, 1,
9, 5, 1,
10, 0, 1,
11, 9, 1,
12, 10, 1,
13, 9, 1,
14, 12, 1,
15, 8, 1,
16, 5, 1,
17, 15, 1,
18, 10, 1,
19, 14, 1,
20, 7, 1,
21, 19, 1,
22, 14, 1,
23, 7, 1,
24, 14, 1,
25, 23, 1,
26, 24, 1,
27, 0, 1,
28, 9, 1,
29, 11, 1,
30, 12, 1,
31, 29, 1,
32, 12, 1,
33, 23, 1,
34, 30, 1,
35, 1, 1,
36, 32, 1,
37, 8, 1,
38, 30, 1,
39, 31, 1,
40, 15, 1,
41, 38, 1,
42, 29, 1,
43, 18, 1,
44, 33, 1,
45, 18, 1,
46, 39, 1,
47, 43, 1,
48, 40, 1,
49, 38, 1,
50, 5, 1,
51, 47, 1,
52, 14, 1,
53, 45, 1,
54, 30, 1,
55, 13, 1,
56, 47, 1,
57, 30, 1,
58, 21, 1,
59, 18, 1,
60, 36, 1,
61, 58, 1,
62, 58, 1,
63, 19, 1,
64, 59, 1,
65, 29, 1,
66, 10, 1,
67, 48, 1,
68, 39, 1,
69, 25, 1,
70, 63, 1,
71, 57, 1,
72, 70, 1,
73, 16, 1,
74, 20, 1,
75, 72, 1,
76, 67, 1,
77, 61, 1,
78, 49, 1,
79, 63, 1,
80, 9, 1,
81, 19, 1,
82, 80, 1,
83, 36, 1,
84, 20, 1,
85, 23, 1,
86, 52, 1,
87, 85, 1,
88, 75, 1,
89, 18, 1,
90, 85, 1,
91, 2, 1,
92, 81, 1,
93, 91, 1,
94, 91, 1,
95, 3, 1,
96, 45, 1,
97, 16, 1,
98, 11, 1,
99, 60, 1,
100, 89, 1,
101, 65, 1,
102, 39, 1,
103, 63, 1,
104, 66, 1,
105, 74, 1,
106, 54, 1,
107, 88, 1,
108, 106, 1,
109, 107, 1,
110, 47, 1,
111, 8, 1,
112, 95, 1,
113, 66, 1,
114, 1, 1,
115, 2, 1,
116, 20, 1,
117, 110, 1,
118, 47, 1,
119, 117, 1,
120, 114, 1,
121, 37, 1,
122, 71, 1,
123, 51, 1,
124, 122, 1,
125, 44, 1,
126, 92, 1,
127, 120, 1,
128, 123, 1,
129, 127, 1,
130, 11, 1,
131, 110, 1,
132, 93, 1,
133, 20, 1,
134, 58, 1,
135, 13, 1,
136, 73, 1,
137, 27, 1,
138, 94, 1,
139, 110, 1,
140, 96, 1,
141, 57, 1,
142, 137, 1,
143, 116, 1,
144, 119, 1,
145, 141, 1,
146, 73, 1,
147, 26, 1,
148, 103, 1,
149, 125, 1,
150, 146, 1,
151, 149, 1,
152, 28, 1,
153, 149, 1,
154, 125, 1,
155, 104, 1,
156, 61, 1,
157, 128, 1,
158, 156, 1,
159, 122, 1,
160, 96, 1,
161, 92, 1,
162, 160, 1,
163, 154, 1,
164, 88, 1,
165, 160, 1,
166, 134, 1,
167, 116, 1,
168, 23, 1,
169, 167, 1,
170, 100, 1,
171, 169, 1,
172, 169, 1,
173, 127, 1,
174, 0, 1,
175, 78, 1,
176, 155, 1,
177, 124, 1,
178, 138, 1,
179, 41, 1,
180, 156, 1,
181, 173, 1,
182, 122, 1,
183, 173, 1,
184, 112, 1,
185, 15, 1,
186, 183, 1,
187, 171, 1,
188, 163, 1,
189, 85, 1,
190, 45, 1,
191, 171, 1,
192, 139, 1,
193, 188, 1,
194, 192, 1,
195, 78, 1,
196, 5, 1,
197, 187, 1,
198, 180, 1,
199, 195, 1,
200, 102, 1,
201, 89, 1,
202, 165, 1,
203, 144, 1,
204, 171, 1,
205, 152, 1,
206, 53, 1,
207, 19, 1,
208, 206, 1,
209, 165, 1,
210, 208, 1,
211, 76, 1,
212, 177, 1,
213, 189, 1,
214, 43, 1,
215, 120, 1,
216, 122, 1,
217, 189, 1,
218, 45, 1,
219, 217, 1,
220, 207, 1,
221, 202, 1,
222, 169, 1,
223, 194, 1,
224, 213, 1,
225, 178, 1,
226, 175, 1,
227, 221, 1,
228, 212, 1,
229, 220, 1,
230, 227, 1,
231, 30, 1,
232, 34, 1,
233, 91, 1,
234, 231, 1,
235, 154, 1,
236, 100, 1,
237, 166, 1,
238, 216, 1,
239, 229, 1,
240, 177, 1,
241, 123, 1,
242, 172, 1,
243, 71, 1,
244, 241, 1,
245, 236, 1,
246, 109, 1,
247, 4, 1,
248, 246, 1,
249, 166, 1,
250, 248, 1,
251, 243, 1,
252, 248, 1,
253, 39, 1,
254, 98, 1,
255, 253, 1
};
int32_t segments_3_1_512[] = { // current_idx, previous_idx, seg_type 0=i 1=d
2, 1, 0,
128, 127, 0,
256, 255, 1,
384, 383, 1,
0, 511, 1,
128, 127, 1,
256, 255, 1,
384, 383, 1,
0, 511, 1,
128, 127, 1,
256, 255, 1,
384, 383, 1
};
DLLEXPORT Argon2Profile argon2profile_3_1_512 = {
512,
1,
3,
524288, //256 blocks of 1024 bytes
blocks_refs_3_1_512,
sizeof(blocks_refs_3_1_512) / (3 * sizeof(int32_t)),
"3_1_512",
segments_3_1_512,
128,
12,
1,
32,
4
};

View file

@ -0,0 +1,168 @@
#include <stdint.h>
#include <stddef.h>
#include "../../common/DLLExport.h"
#include "Defs.h"
int32_t blocks_refs_4_1_256[] = {
2, 0, 1,
3, 1, 1,
4, 2, 1,
5, 3, 1,
6, 0, 1,
7, 4, 1,
8, 5, 1,
9, 7, 1,
10, 7, 1,
11, 9, 1,
12, 5, 1,
13, 11, 1,
14, 3, 1,
15, 2, 1,
16, 12, 1,
17, 15, 1,
18, 15, 1,
19, 10, 1,
20, 4, 1,
21, 18, 1,
22, 17, 1,
23, 19, 1,
24, 2, 1,
25, 23, 1,
26, 22, 1,
27, 12, 1,
28, 23, 1,
29, 27, 1,
30, 26, 1,
31, 19, 1,
32, 27, 1,
33, 29, 1,
34, 32, 1,
35, 18, 1,
36, 32, 1,
37, 16, 1,
38, 35, 1,
39, 22, 1,
40, 30, 1,
41, 31, 1,
42, 39, 1,
43, 36, 1,
44, 18, 1,
45, 0, 1,
46, 36, 1,
47, 12, 1,
48, 28, 1,
49, 39, 1,
50, 4, 1,
51, 48, 1,
52, 48, 1,
53, 51, 1,
54, 50, 1,
55, 3, 1,
56, 54, 1,
57, 53, 1,
58, 48, 1,
59, 47, 1,
60, 25, 1,
61, 53, 1,
62, 31, 1,
63, 59, 1,
64, 45, 1,
65, 63, 1,
66, 48, 1,
67, 58, 1,
68, 40, 1,
69, 17, 1,
70, 62, 1,
71, 24, 1,
72, 60, 1,
73, 71, 1,
74, 72, 1,
75, 57, 1,
76, 69, 1,
77, 58, 1,
78, 74, 1,
79, 69, 1,
80, 75, 1,
81, 74, 1,
82, 56, 1,
83, 67, 1,
84, 15, 1,
85, 83, 1,
86, 69, 1,
87, 83, 1,
88, 85, 1,
89, 24, 1,
90, 52, 1,
91, 70, 1,
92, 88, 1,
93, 42, 1,
94, 61, 1,
95, 93, 1,
96, 22, 1,
97, 37, 1,
98, 15, 1,
99, 91, 1,
100, 14, 1,
101, 98, 1,
102, 24, 1,
103, 84, 1,
104, 44, 1,
105, 103, 1,
106, 12, 1,
107, 15, 1,
108, 79, 1,
109, 35, 1,
110, 4, 1,
111, 109, 1,
112, 90, 1,
113, 109, 1,
114, 43, 1,
115, 73, 1,
116, 113, 1,
117, 107, 1,
118, 51, 1,
119, 117, 1,
120, 118, 1,
121, 115, 1,
122, 74, 1,
123, 67, 1,
124, 102, 1,
125, 17, 1,
126, 113, 1,
127, 110, 1
};
int32_t segments_4_1_256[] = { // current_idx, previous_idx, seg_type 0=i 1=d
2, 1, 0,
64, 63, 0,
128, 127, 1,
192, 191, 1,
0, 255, 1,
64, 63, 1,
128, 127, 1,
192, 191, 1,
0, 255, 1,
64, 63, 1,
128, 127, 1,
192, 191, 1,
0, 255, 1,
64, 63, 1,
128, 127, 1,
192, 191, 1
};
DLLEXPORT Argon2Profile argon2profile_4_1_256 = {
256,
1,
4,
262144, //256 blocks of 1024 bytes
blocks_refs_4_1_256,
sizeof(blocks_refs_4_1_256) / (3 * sizeof(int32_t)),
"4_1_256",
segments_4_1_256,
64,
16,
1,
32,
4
};

View file

@ -0,0 +1,76 @@
/*
BLAKE2 reference source code package - optimized C implementations
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
your option. The terms of these licenses can be found at:
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- OpenSSL license : https://www.openssl.org/source/license.html
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
More information about the BLAKE2 hash function can be found at
https://blake2.net.
*/
#ifndef BLAKE2_CONFIG_H
#define BLAKE2_CONFIG_H
/* These don't work everywhere */
#if defined(__SSE2__) || defined(__x86_64__) || defined(__amd64__) || defined(_M_X64)
#define HAVE_SSE2
#endif
#if defined(__SSSE3__)
#define HAVE_SSSE3
#endif
#if defined(__SSE4_1__)
#define HAVE_SSE41
#endif
#if defined(__AVX__)
#define HAVE_AVX
#endif
#if defined(__AVX2__)
#define HAVE_AVX2
#endif
#if defined(__XOP__)
#define HAVE_XOP
#endif
#ifdef HAVE_AVX2
#ifndef HAVE_AVX
#define HAVE_AVX
#endif
#endif
#ifdef HAVE_XOP
#ifndef HAVE_AVX
#define HAVE_AVX
#endif
#endif
#ifdef HAVE_AVX
#ifndef HAVE_SSE41
#define HAVE_SSE41
#endif
#endif
#ifdef HAVE_SSE41
#ifndef HAVE_SSSE3
#define HAVE_SSSE3
#endif
#endif
#ifdef HAVE_SSSE3
#define HAVE_SSE2
#endif
#if !defined(HAVE_SSE2)
#error "This code requires at least SSE2."
#endif
#endif

View file

@ -0,0 +1,154 @@
/*
* Argon2 reference source code package - reference C implementations
*
* Copyright 2015
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
*
* You may use this work under the terms of a Creative Commons CC0 1.0
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
* these licenses can be found at:
*
* - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
* - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
*
* You should have received a copy of both of these licenses along with this
* software. If not, they may be obtained at the above URLs.
*/
#ifndef PORTABLE_BLAKE2_IMPL_H
#define PORTABLE_BLAKE2_IMPL_H
#include <stdint.h>
#include <string.h>
#if defined(_MSC_VER)
#define BLAKE2_INLINE __inline
#elif defined(__GNUC__) || defined(__clang__)
#define BLAKE2_INLINE __inline__
#else
#define BLAKE2_INLINE
#endif
/* Argon2 Team - Begin Code */
/*
Not an exhaustive list, but should cover the majority of modern platforms
Additionally, the code will always be correct---this is only a performance
tweak.
*/
#if (defined(__BYTE_ORDER__) && \
(__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)) || \
defined(__LITTLE_ENDIAN__) || defined(__ARMEL__) || defined(__MIPSEL__) || \
defined(__AARCH64EL__) || defined(__amd64__) || defined(__i386__) || \
defined(_M_IX86) || defined(_M_X64) || defined(_M_AMD64) || \
defined(_M_ARM)
#define NATIVE_LITTLE_ENDIAN
#endif
/* Argon2 Team - End Code */
static BLAKE2_INLINE uint32_t load32(const void *src) {
#if defined(NATIVE_LITTLE_ENDIAN)
uint32_t w;
memcpy(&w, src, sizeof w);
return w;
#else
const uint8_t *p = (const uint8_t *)src;
uint32_t w = *p++;
w |= (uint32_t)(*p++) << 8;
w |= (uint32_t)(*p++) << 16;
w |= (uint32_t)(*p++) << 24;
return w;
#endif
}
static BLAKE2_INLINE uint64_t load64(const void *src) {
#if defined(NATIVE_LITTLE_ENDIAN)
uint64_t w;
memcpy(&w, src, sizeof w);
return w;
#else
const uint8_t *p = (const uint8_t *)src;
uint64_t w = *p++;
w |= (uint64_t)(*p++) << 8;
w |= (uint64_t)(*p++) << 16;
w |= (uint64_t)(*p++) << 24;
w |= (uint64_t)(*p++) << 32;
w |= (uint64_t)(*p++) << 40;
w |= (uint64_t)(*p++) << 48;
w |= (uint64_t)(*p++) << 56;
return w;
#endif
}
static BLAKE2_INLINE void store32(void *dst, uint32_t w) {
#if defined(NATIVE_LITTLE_ENDIAN)
memcpy(dst, &w, sizeof w);
#else
uint8_t *p = (uint8_t *)dst;
*p++ = (uint8_t)w;
w >>= 8;
*p++ = (uint8_t)w;
w >>= 8;
*p++ = (uint8_t)w;
w >>= 8;
*p++ = (uint8_t)w;
#endif
}
static BLAKE2_INLINE void store64(void *dst, uint64_t w) {
#if defined(NATIVE_LITTLE_ENDIAN)
memcpy(dst, &w, sizeof w);
#else
uint8_t *p = (uint8_t *)dst;
*p++ = (uint8_t)w;
w >>= 8;
*p++ = (uint8_t)w;
w >>= 8;
*p++ = (uint8_t)w;
w >>= 8;
*p++ = (uint8_t)w;
w >>= 8;
*p++ = (uint8_t)w;
w >>= 8;
*p++ = (uint8_t)w;
w >>= 8;
*p++ = (uint8_t)w;
w >>= 8;
*p++ = (uint8_t)w;
#endif
}
static BLAKE2_INLINE uint64_t load48(const void *src) {
const uint8_t *p = (const uint8_t *)src;
uint64_t w = *p++;
w |= (uint64_t)(*p++) << 8;
w |= (uint64_t)(*p++) << 16;
w |= (uint64_t)(*p++) << 24;
w |= (uint64_t)(*p++) << 32;
w |= (uint64_t)(*p++) << 40;
return w;
}
static BLAKE2_INLINE void store48(void *dst, uint64_t w) {
uint8_t *p = (uint8_t *)dst;
*p++ = (uint8_t)w;
w >>= 8;
*p++ = (uint8_t)w;
w >>= 8;
*p++ = (uint8_t)w;
w >>= 8;
*p++ = (uint8_t)w;
w >>= 8;
*p++ = (uint8_t)w;
w >>= 8;
*p++ = (uint8_t)w;
}
static BLAKE2_INLINE uint32_t rotr32(const uint32_t w, const unsigned c) {
return (w >> c) | (w << (32 - c));
}
static BLAKE2_INLINE uint64_t rotr64(const uint64_t w, const unsigned c) {
return (w >> c) | (w << (64 - c));
}
#endif

View file

@ -0,0 +1,90 @@
/*
* Argon2 reference source code package - reference C implementations
*
* Copyright 2015
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
*
* You may use this work under the terms of a Creative Commons CC0 1.0
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
* these licenses can be found at:
*
* - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
* - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
*
* You should have received a copy of both of these licenses along with this
* software. If not, they may be obtained at the above URLs.
*/
#ifndef PORTABLE_BLAKE2_H
#define PORTABLE_BLAKE2_H
#include <limits.h>
#if defined(__cplusplus)
extern "C" {
#endif
enum blake2b_constant {
BLAKE2B_BLOCKBYTES = 128,
BLAKE2B_OUTBYTES = 64,
BLAKE2B_KEYBYTES = 64,
BLAKE2B_SALTBYTES = 16,
BLAKE2B_PERSONALBYTES = 16
};
#pragma pack(push, 1)
typedef struct __blake2b_param {
uint8_t digest_length; /* 1 */
uint8_t key_length; /* 2 */
uint8_t fanout; /* 3 */
uint8_t depth; /* 4 */
uint32_t leaf_length; /* 8 */
uint64_t node_offset; /* 16 */
uint8_t node_depth; /* 17 */
uint8_t inner_length; /* 18 */
uint8_t reserved[14]; /* 32 */
uint8_t salt[BLAKE2B_SALTBYTES]; /* 48 */
uint8_t personal[BLAKE2B_PERSONALBYTES]; /* 64 */
} blake2b_param;
#pragma pack(pop)
typedef struct __blake2b_state {
uint64_t h[8];
uint64_t t[2];
uint64_t f[2];
uint8_t buf[BLAKE2B_BLOCKBYTES];
unsigned buflen;
unsigned outlen;
uint8_t last_node;
} blake2b_state;
/* Ensure param structs have not been wrongly padded */
/* Poor man's static_assert */
enum {
blake2_size_check_0 = 1 / !!(CHAR_BIT == 8),
blake2_size_check_2 =
1 / !!(sizeof(blake2b_param) == sizeof(uint64_t) * CHAR_BIT)
};
/* Streaming API */
int blake2b_init(blake2b_state *S, size_t outlen);
int blake2b_init_key(blake2b_state *S, size_t outlen, const void *key,
size_t keylen);
int blake2b_init_param(blake2b_state *S, const blake2b_param *P);
int blake2b_update(blake2b_state *S, const void *in, size_t inlen);
int blake2b_update_static(blake2b_state *S, const char in, size_t inlen);
int blake2b_final(blake2b_state *S, void *out, size_t outlen);
/* Simple API */
int blake2b(void *out, size_t outlen, const void *in, size_t inlen,
const void *key, size_t keylen);
/* Argon2 Team - Begin Code */
int blake2b_long(void *out, size_t outlen, const void *in, size_t inlen);
/* Argon2 Team - End Code */
#if defined(__cplusplus)
}
#endif
#endif

View file

@ -0,0 +1,68 @@
/*
BLAKE2 reference source code package - optimized C implementations
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
your option. The terms of these licenses can be found at:
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- OpenSSL license : https://www.openssl.org/source/license.html
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
More information about the BLAKE2 hash function can be found at
https://blake2.net.
*/
#ifndef BLAKE2B_LOAD_SSE2_H
#define BLAKE2B_LOAD_SSE2_H
#define LOAD_MSG_0_1(b0, b1) b0 = _mm_set_epi64x(m2, m0); b1 = _mm_set_epi64x(m6, m4)
#define LOAD_MSG_0_2(b0, b1) b0 = _mm_set_epi64x(m3, m1); b1 = _mm_set_epi64x(m7, m5)
#define LOAD_MSG_0_3(b0, b1) b0 = _mm_set_epi64x(m10, m8); b1 = _mm_set_epi64x(m14, m12)
#define LOAD_MSG_0_4(b0, b1) b0 = _mm_set_epi64x(m11, m9); b1 = _mm_set_epi64x(m15, m13)
#define LOAD_MSG_1_1(b0, b1) b0 = _mm_set_epi64x(m4, m14); b1 = _mm_set_epi64x(m13, m9)
#define LOAD_MSG_1_2(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m6, m15)
#define LOAD_MSG_1_3(b0, b1) b0 = _mm_set_epi64x(m0, m1); b1 = _mm_set_epi64x(m5, m11)
#define LOAD_MSG_1_4(b0, b1) b0 = _mm_set_epi64x(m2, m12); b1 = _mm_set_epi64x(m3, m7)
#define LOAD_MSG_2_1(b0, b1) b0 = _mm_set_epi64x(m12, m11); b1 = _mm_set_epi64x(m15, m5)
#define LOAD_MSG_2_2(b0, b1) b0 = _mm_set_epi64x(m0, m8); b1 = _mm_set_epi64x(m13, m2)
#define LOAD_MSG_2_3(b0, b1) b0 = _mm_set_epi64x(m3, m10); b1 = _mm_set_epi64x(m9, m7)
#define LOAD_MSG_2_4(b0, b1) b0 = _mm_set_epi64x(m6, m14); b1 = _mm_set_epi64x(m4, m1)
#define LOAD_MSG_3_1(b0, b1) b0 = _mm_set_epi64x(m3, m7); b1 = _mm_set_epi64x(m11, m13)
#define LOAD_MSG_3_2(b0, b1) b0 = _mm_set_epi64x(m1, m9); b1 = _mm_set_epi64x(m14, m12)
#define LOAD_MSG_3_3(b0, b1) b0 = _mm_set_epi64x(m5, m2); b1 = _mm_set_epi64x(m15, m4)
#define LOAD_MSG_3_4(b0, b1) b0 = _mm_set_epi64x(m10, m6); b1 = _mm_set_epi64x(m8, m0)
#define LOAD_MSG_4_1(b0, b1) b0 = _mm_set_epi64x(m5, m9); b1 = _mm_set_epi64x(m10, m2)
#define LOAD_MSG_4_2(b0, b1) b0 = _mm_set_epi64x(m7, m0); b1 = _mm_set_epi64x(m15, m4)
#define LOAD_MSG_4_3(b0, b1) b0 = _mm_set_epi64x(m11, m14); b1 = _mm_set_epi64x(m3, m6)
#define LOAD_MSG_4_4(b0, b1) b0 = _mm_set_epi64x(m12, m1); b1 = _mm_set_epi64x(m13, m8)
#define LOAD_MSG_5_1(b0, b1) b0 = _mm_set_epi64x(m6, m2); b1 = _mm_set_epi64x(m8, m0)
#define LOAD_MSG_5_2(b0, b1) b0 = _mm_set_epi64x(m10, m12); b1 = _mm_set_epi64x(m3, m11)
#define LOAD_MSG_5_3(b0, b1) b0 = _mm_set_epi64x(m7, m4); b1 = _mm_set_epi64x(m1, m15)
#define LOAD_MSG_5_4(b0, b1) b0 = _mm_set_epi64x(m5, m13); b1 = _mm_set_epi64x(m9, m14)
#define LOAD_MSG_6_1(b0, b1) b0 = _mm_set_epi64x(m1, m12); b1 = _mm_set_epi64x(m4, m14)
#define LOAD_MSG_6_2(b0, b1) b0 = _mm_set_epi64x(m15, m5); b1 = _mm_set_epi64x(m10, m13)
#define LOAD_MSG_6_3(b0, b1) b0 = _mm_set_epi64x(m6, m0); b1 = _mm_set_epi64x(m8, m9)
#define LOAD_MSG_6_4(b0, b1) b0 = _mm_set_epi64x(m3, m7); b1 = _mm_set_epi64x(m11, m2)
#define LOAD_MSG_7_1(b0, b1) b0 = _mm_set_epi64x(m7, m13); b1 = _mm_set_epi64x(m3, m12)
#define LOAD_MSG_7_2(b0, b1) b0 = _mm_set_epi64x(m14, m11); b1 = _mm_set_epi64x(m9, m1)
#define LOAD_MSG_7_3(b0, b1) b0 = _mm_set_epi64x(m15, m5); b1 = _mm_set_epi64x(m2, m8)
#define LOAD_MSG_7_4(b0, b1) b0 = _mm_set_epi64x(m4, m0); b1 = _mm_set_epi64x(m10, m6)
#define LOAD_MSG_8_1(b0, b1) b0 = _mm_set_epi64x(m14, m6); b1 = _mm_set_epi64x(m0, m11)
#define LOAD_MSG_8_2(b0, b1) b0 = _mm_set_epi64x(m9, m15); b1 = _mm_set_epi64x(m8, m3)
#define LOAD_MSG_8_3(b0, b1) b0 = _mm_set_epi64x(m13, m12); b1 = _mm_set_epi64x(m10, m1)
#define LOAD_MSG_8_4(b0, b1) b0 = _mm_set_epi64x(m7, m2); b1 = _mm_set_epi64x(m5, m4)
#define LOAD_MSG_9_1(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m1, m7)
#define LOAD_MSG_9_2(b0, b1) b0 = _mm_set_epi64x(m4, m2); b1 = _mm_set_epi64x(m5, m6)
#define LOAD_MSG_9_3(b0, b1) b0 = _mm_set_epi64x(m9, m15); b1 = _mm_set_epi64x(m13, m3)
#define LOAD_MSG_9_4(b0, b1) b0 = _mm_set_epi64x(m14, m11); b1 = _mm_set_epi64x(m0, m12)
#define LOAD_MSG_10_1(b0, b1) b0 = _mm_set_epi64x(m2, m0); b1 = _mm_set_epi64x(m6, m4)
#define LOAD_MSG_10_2(b0, b1) b0 = _mm_set_epi64x(m3, m1); b1 = _mm_set_epi64x(m7, m5)
#define LOAD_MSG_10_3(b0, b1) b0 = _mm_set_epi64x(m10, m8); b1 = _mm_set_epi64x(m14, m12)
#define LOAD_MSG_10_4(b0, b1) b0 = _mm_set_epi64x(m11, m9); b1 = _mm_set_epi64x(m15, m13)
#define LOAD_MSG_11_1(b0, b1) b0 = _mm_set_epi64x(m4, m14); b1 = _mm_set_epi64x(m13, m9)
#define LOAD_MSG_11_2(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m6, m15)
#define LOAD_MSG_11_3(b0, b1) b0 = _mm_set_epi64x(m0, m1); b1 = _mm_set_epi64x(m5, m11)
#define LOAD_MSG_11_4(b0, b1) b0 = _mm_set_epi64x(m2, m12); b1 = _mm_set_epi64x(m3, m7)
#endif

View file

@ -0,0 +1,402 @@
/*
BLAKE2 reference source code package - optimized C implementations
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
your option. The terms of these licenses can be found at:
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- OpenSSL license : https://www.openssl.org/source/license.html
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
More information about the BLAKE2 hash function can be found at
https://blake2.net.
*/
#ifndef BLAKE2B_LOAD_SSE41_H
#define BLAKE2B_LOAD_SSE41_H
#define LOAD_MSG_0_1(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m0, m1); \
b1 = _mm_unpacklo_epi64(m2, m3); \
} while(0)
#define LOAD_MSG_0_2(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m0, m1); \
b1 = _mm_unpackhi_epi64(m2, m3); \
} while(0)
#define LOAD_MSG_0_3(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m4, m5); \
b1 = _mm_unpacklo_epi64(m6, m7); \
} while(0)
#define LOAD_MSG_0_4(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m4, m5); \
b1 = _mm_unpackhi_epi64(m6, m7); \
} while(0)
#define LOAD_MSG_1_1(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m7, m2); \
b1 = _mm_unpackhi_epi64(m4, m6); \
} while(0)
#define LOAD_MSG_1_2(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m5, m4); \
b1 = _mm_alignr_epi8(m3, m7, 8); \
} while(0)
#define LOAD_MSG_1_3(b0, b1) \
do \
{ \
b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \
b1 = _mm_unpackhi_epi64(m5, m2); \
} while(0)
#define LOAD_MSG_1_4(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m6, m1); \
b1 = _mm_unpackhi_epi64(m3, m1); \
} while(0)
#define LOAD_MSG_2_1(b0, b1) \
do \
{ \
b0 = _mm_alignr_epi8(m6, m5, 8); \
b1 = _mm_unpackhi_epi64(m2, m7); \
} while(0)
#define LOAD_MSG_2_2(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m4, m0); \
b1 = _mm_blend_epi16(m1, m6, 0xF0); \
} while(0)
#define LOAD_MSG_2_3(b0, b1) \
do \
{ \
b0 = _mm_blend_epi16(m5, m1, 0xF0); \
b1 = _mm_unpackhi_epi64(m3, m4); \
} while(0)
#define LOAD_MSG_2_4(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m7, m3); \
b1 = _mm_alignr_epi8(m2, m0, 8); \
} while(0)
#define LOAD_MSG_3_1(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m3, m1); \
b1 = _mm_unpackhi_epi64(m6, m5); \
} while(0)
#define LOAD_MSG_3_2(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m4, m0); \
b1 = _mm_unpacklo_epi64(m6, m7); \
} while(0)
#define LOAD_MSG_3_3(b0, b1) \
do \
{ \
b0 = _mm_blend_epi16(m1, m2, 0xF0); \
b1 = _mm_blend_epi16(m2, m7, 0xF0); \
} while(0)
#define LOAD_MSG_3_4(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m3, m5); \
b1 = _mm_unpacklo_epi64(m0, m4); \
} while(0)
#define LOAD_MSG_4_1(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m4, m2); \
b1 = _mm_unpacklo_epi64(m1, m5); \
} while(0)
#define LOAD_MSG_4_2(b0, b1) \
do \
{ \
b0 = _mm_blend_epi16(m0, m3, 0xF0); \
b1 = _mm_blend_epi16(m2, m7, 0xF0); \
} while(0)
#define LOAD_MSG_4_3(b0, b1) \
do \
{ \
b0 = _mm_blend_epi16(m7, m5, 0xF0); \
b1 = _mm_blend_epi16(m3, m1, 0xF0); \
} while(0)
#define LOAD_MSG_4_4(b0, b1) \
do \
{ \
b0 = _mm_alignr_epi8(m6, m0, 8); \
b1 = _mm_blend_epi16(m4, m6, 0xF0); \
} while(0)
#define LOAD_MSG_5_1(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m1, m3); \
b1 = _mm_unpacklo_epi64(m0, m4); \
} while(0)
#define LOAD_MSG_5_2(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m6, m5); \
b1 = _mm_unpackhi_epi64(m5, m1); \
} while(0)
#define LOAD_MSG_5_3(b0, b1) \
do \
{ \
b0 = _mm_blend_epi16(m2, m3, 0xF0); \
b1 = _mm_unpackhi_epi64(m7, m0); \
} while(0)
#define LOAD_MSG_5_4(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m6, m2); \
b1 = _mm_blend_epi16(m7, m4, 0xF0); \
} while(0)
#define LOAD_MSG_6_1(b0, b1) \
do \
{ \
b0 = _mm_blend_epi16(m6, m0, 0xF0); \
b1 = _mm_unpacklo_epi64(m7, m2); \
} while(0)
#define LOAD_MSG_6_2(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m2, m7); \
b1 = _mm_alignr_epi8(m5, m6, 8); \
} while(0)
#define LOAD_MSG_6_3(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m0, m3); \
b1 = _mm_shuffle_epi32(m4, _MM_SHUFFLE(1,0,3,2)); \
} while(0)
#define LOAD_MSG_6_4(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m3, m1); \
b1 = _mm_blend_epi16(m1, m5, 0xF0); \
} while(0)
#define LOAD_MSG_7_1(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m6, m3); \
b1 = _mm_blend_epi16(m6, m1, 0xF0); \
} while(0)
#define LOAD_MSG_7_2(b0, b1) \
do \
{ \
b0 = _mm_alignr_epi8(m7, m5, 8); \
b1 = _mm_unpackhi_epi64(m0, m4); \
} while(0)
#define LOAD_MSG_7_3(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m2, m7); \
b1 = _mm_unpacklo_epi64(m4, m1); \
} while(0)
#define LOAD_MSG_7_4(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m0, m2); \
b1 = _mm_unpacklo_epi64(m3, m5); \
} while(0)
#define LOAD_MSG_8_1(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m3, m7); \
b1 = _mm_alignr_epi8(m0, m5, 8); \
} while(0)
#define LOAD_MSG_8_2(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m7, m4); \
b1 = _mm_alignr_epi8(m4, m1, 8); \
} while(0)
#define LOAD_MSG_8_3(b0, b1) \
do \
{ \
b0 = m6; \
b1 = _mm_alignr_epi8(m5, m0, 8); \
} while(0)
#define LOAD_MSG_8_4(b0, b1) \
do \
{ \
b0 = _mm_blend_epi16(m1, m3, 0xF0); \
b1 = m2; \
} while(0)
#define LOAD_MSG_9_1(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m5, m4); \
b1 = _mm_unpackhi_epi64(m3, m0); \
} while(0)
#define LOAD_MSG_9_2(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m1, m2); \
b1 = _mm_blend_epi16(m3, m2, 0xF0); \
} while(0)
#define LOAD_MSG_9_3(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m7, m4); \
b1 = _mm_unpackhi_epi64(m1, m6); \
} while(0)
#define LOAD_MSG_9_4(b0, b1) \
do \
{ \
b0 = _mm_alignr_epi8(m7, m5, 8); \
b1 = _mm_unpacklo_epi64(m6, m0); \
} while(0)
#define LOAD_MSG_10_1(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m0, m1); \
b1 = _mm_unpacklo_epi64(m2, m3); \
} while(0)
#define LOAD_MSG_10_2(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m0, m1); \
b1 = _mm_unpackhi_epi64(m2, m3); \
} while(0)
#define LOAD_MSG_10_3(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m4, m5); \
b1 = _mm_unpacklo_epi64(m6, m7); \
} while(0)
#define LOAD_MSG_10_4(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m4, m5); \
b1 = _mm_unpackhi_epi64(m6, m7); \
} while(0)
#define LOAD_MSG_11_1(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m7, m2); \
b1 = _mm_unpackhi_epi64(m4, m6); \
} while(0)
#define LOAD_MSG_11_2(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m5, m4); \
b1 = _mm_alignr_epi8(m3, m7, 8); \
} while(0)
#define LOAD_MSG_11_3(b0, b1) \
do \
{ \
b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \
b1 = _mm_unpackhi_epi64(m5, m2); \
} while(0)
#define LOAD_MSG_11_4(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m6, m1); \
b1 = _mm_unpackhi_epi64(m3, m1); \
} while(0)
#endif

View file

@ -0,0 +1,154 @@
/*
BLAKE2 reference source code package - optimized C implementations
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
your option. The terms of these licenses can be found at:
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- OpenSSL license : https://www.openssl.org/source/license.html
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
More information about the BLAKE2 hash function can be found at
https://blake2.net.
*/
#ifndef BLAKE2B_ROUND_H
#define BLAKE2B_ROUND_H
#define LOADU(p) _mm_loadu_si128( (const __m128i *)(p) )
#define STOREU(p,r) _mm_storeu_si128((__m128i *)(p), r)
#define TOF(reg) _mm_castsi128_ps((reg))
#define TOI(reg) _mm_castps_si128((reg))
#define LIKELY(x) __builtin_expect((x),1)
/* Microarchitecture-specific macros */
#ifndef HAVE_XOP
#ifdef HAVE_SSSE3
#define _mm_roti_epi64(x, c) \
(-(c) == 32) ? _mm_shuffle_epi32((x), _MM_SHUFFLE(2,3,0,1)) \
: (-(c) == 24) ? _mm_shuffle_epi8((x), r24) \
: (-(c) == 16) ? _mm_shuffle_epi8((x), r16) \
: (-(c) == 63) ? _mm_xor_si128(_mm_srli_epi64((x), -(c)), _mm_add_epi64((x), (x))) \
: _mm_xor_si128(_mm_srli_epi64((x), -(c)), _mm_slli_epi64((x), 64-(-(c))))
#else
#define _mm_roti_epi64(r, c) _mm_xor_si128(_mm_srli_epi64( (r), -(c) ),_mm_slli_epi64( (r), 64-(-(c)) ))
#endif
#else
/* ... */
#endif
#define G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \
row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \
row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \
\
row4l = _mm_xor_si128(row4l, row1l); \
row4h = _mm_xor_si128(row4h, row1h); \
\
row4l = _mm_roti_epi64(row4l, -32); \
row4h = _mm_roti_epi64(row4h, -32); \
\
row3l = _mm_add_epi64(row3l, row4l); \
row3h = _mm_add_epi64(row3h, row4h); \
\
row2l = _mm_xor_si128(row2l, row3l); \
row2h = _mm_xor_si128(row2h, row3h); \
\
row2l = _mm_roti_epi64(row2l, -24); \
row2h = _mm_roti_epi64(row2h, -24); \
#define G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \
row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \
row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \
\
row4l = _mm_xor_si128(row4l, row1l); \
row4h = _mm_xor_si128(row4h, row1h); \
\
row4l = _mm_roti_epi64(row4l, -16); \
row4h = _mm_roti_epi64(row4h, -16); \
\
row3l = _mm_add_epi64(row3l, row4l); \
row3h = _mm_add_epi64(row3h, row4h); \
\
row2l = _mm_xor_si128(row2l, row3l); \
row2h = _mm_xor_si128(row2h, row3h); \
\
row2l = _mm_roti_epi64(row2l, -63); \
row2h = _mm_roti_epi64(row2h, -63); \
#if defined(HAVE_SSSE3)
#define DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
t0 = _mm_alignr_epi8(row2h, row2l, 8); \
t1 = _mm_alignr_epi8(row2l, row2h, 8); \
row2l = t0; \
row2h = t1; \
\
t0 = row3l; \
row3l = row3h; \
row3h = t0; \
\
t0 = _mm_alignr_epi8(row4h, row4l, 8); \
t1 = _mm_alignr_epi8(row4l, row4h, 8); \
row4l = t1; \
row4h = t0;
#define UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
t0 = _mm_alignr_epi8(row2l, row2h, 8); \
t1 = _mm_alignr_epi8(row2h, row2l, 8); \
row2l = t0; \
row2h = t1; \
\
t0 = row3l; \
row3l = row3h; \
row3h = t0; \
\
t0 = _mm_alignr_epi8(row4l, row4h, 8); \
t1 = _mm_alignr_epi8(row4h, row4l, 8); \
row4l = t1; \
row4h = t0;
#else
#define DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
t0 = row4l;\
t1 = row2l;\
row4l = row3l;\
row3l = row3h;\
row3h = row4l;\
row4l = _mm_unpackhi_epi64(row4h, _mm_unpacklo_epi64(t0, t0)); \
row4h = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(row4h, row4h)); \
row2l = _mm_unpackhi_epi64(row2l, _mm_unpacklo_epi64(row2h, row2h)); \
row2h = _mm_unpackhi_epi64(row2h, _mm_unpacklo_epi64(t1, t1))
#define UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
t0 = row3l;\
row3l = row3h;\
row3h = t0;\
t0 = row2l;\
t1 = row4l;\
row2l = _mm_unpackhi_epi64(row2h, _mm_unpacklo_epi64(row2l, row2l)); \
row2h = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(row2h, row2h)); \
row4l = _mm_unpackhi_epi64(row4l, _mm_unpacklo_epi64(row4h, row4h)); \
row4h = _mm_unpackhi_epi64(row4h, _mm_unpacklo_epi64(t1, t1))
#endif
#if defined(HAVE_SSE41)
#include "blake2b-load-sse41.h"
#else
#include "blake2b-load-sse2.h"
#endif
#define ROUND(r) \
LOAD_MSG_ ##r ##_1(b0, b1); \
G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
LOAD_MSG_ ##r ##_2(b0, b1); \
G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); \
LOAD_MSG_ ##r ##_3(b0, b1); \
G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
LOAD_MSG_ ##r ##_4(b0, b1); \
G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h);
#endif

View file

@ -0,0 +1,514 @@
/*
* Argon2 reference source code package - reference C implementations
*
* Copyright 2015
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
*
* You may use this work under the terms of a Creative Commons CC0 1.0
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
* these licenses can be found at:
*
* - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
* - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
*
* You should have received a copy of both of these licenses along with this
* software. If not, they may be obtained at the above URLs.
*/
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include "blake2.h"
#include "blake2-impl.h"
#if !defined(BUILD_REF) && (defined(__x86_64__) || defined(_WIN64))
#include "blake2-config.h"
#ifdef _MSC_VER
#include <intrin.h> /* for _mm_set_epi64x */
#endif
#include <emmintrin.h>
#if defined(HAVE_SSSE3)
#include <tmmintrin.h>
#endif
#if defined(HAVE_SSE41)
#include <smmintrin.h>
#endif
#if defined(HAVE_AVX)
#include <immintrin.h>
#endif
#if defined(HAVE_XOP)
#include <x86intrin.h>
#endif
#include "blake2b-round.h"
#endif
static const uint64_t blake2b_IV[8] = {
UINT64_C(0x6a09e667f3bcc908), UINT64_C(0xbb67ae8584caa73b),
UINT64_C(0x3c6ef372fe94f82b), UINT64_C(0xa54ff53a5f1d36f1),
UINT64_C(0x510e527fade682d1), UINT64_C(0x9b05688c2b3e6c1f),
UINT64_C(0x1f83d9abfb41bd6b), UINT64_C(0x5be0cd19137e2179)};
static const unsigned int blake2b_sigma[12][16] = {
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
{11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4},
{7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8},
{9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13},
{2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9},
{12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11},
{13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10},
{6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5},
{10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0},
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
};
static BLAKE2_INLINE void blake2b_set_lastnode(blake2b_state *S) {
S->f[1] = (uint64_t)-1;
}
static BLAKE2_INLINE void blake2b_set_lastblock(blake2b_state *S) {
if (S->last_node) {
blake2b_set_lastnode(S);
}
S->f[0] = (uint64_t)-1;
}
static BLAKE2_INLINE void blake2b_increment_counter(blake2b_state *S,
uint64_t inc) {
S->t[0] += inc;
S->t[1] += (S->t[0] < inc);
}
static BLAKE2_INLINE void blake2b_invalidate_state(blake2b_state *S) {
blake2b_set_lastblock(S); /* invalidate for further use */
}
static BLAKE2_INLINE void blake2b_init0(blake2b_state *S) {
memset(S, 0, sizeof(*S));
memcpy(S->h, blake2b_IV, sizeof(S->h));
}
int blake2b_init_param(blake2b_state *S, const blake2b_param *P) {
const unsigned char *p = (const unsigned char *)P;
unsigned int i;
if (NULL == P || NULL == S) {
return -1;
}
blake2b_init0(S);
/* IV XOR Parameter Block */
for (i = 0; i < 8; ++i) {
S->h[i] ^= load64(&p[i * sizeof(S->h[i])]);
}
S->outlen = P->digest_length;
return 0;
}
/* Sequential blake2b initialization */
int blake2b_init(blake2b_state *S, size_t outlen) {
blake2b_param P;
if (S == NULL) {
return -1;
}
if ((outlen == 0) || (outlen > BLAKE2B_OUTBYTES)) {
blake2b_invalidate_state(S);
return -1;
}
/* Setup Parameter Block for unkeyed BLAKE2 */
P.digest_length = (uint8_t)outlen;
P.key_length = 0;
P.fanout = 1;
P.depth = 1;
P.leaf_length = 0;
P.node_offset = 0;
P.node_depth = 0;
P.inner_length = 0;
memset(P.reserved, 0, sizeof(P.reserved));
memset(P.salt, 0, sizeof(P.salt));
memset(P.personal, 0, sizeof(P.personal));
return blake2b_init_param(S, &P);
}
int blake2b_init_key(blake2b_state *S, size_t outlen, const void *key,
size_t keylen) {
blake2b_param P;
if (S == NULL) {
return -1;
}
if ((outlen == 0) || (outlen > BLAKE2B_OUTBYTES)) {
blake2b_invalidate_state(S);
return -1;
}
if ((key == 0) || (keylen == 0) || (keylen > BLAKE2B_KEYBYTES)) {
blake2b_invalidate_state(S);
return -1;
}
/* Setup Parameter Block for keyed BLAKE2 */
P.digest_length = (uint8_t)outlen;
P.key_length = (uint8_t)keylen;
P.fanout = 1;
P.depth = 1;
P.leaf_length = 0;
P.node_offset = 0;
P.node_depth = 0;
P.inner_length = 0;
memset(P.reserved, 0, sizeof(P.reserved));
memset(P.salt, 0, sizeof(P.salt));
memset(P.personal, 0, sizeof(P.personal));
if (blake2b_init_param(S, &P) < 0) {
blake2b_invalidate_state(S);
return -1;
}
{
uint8_t block[BLAKE2B_BLOCKBYTES];
memset(block, 0, BLAKE2B_BLOCKBYTES);
memcpy(block, key, keylen);
blake2b_update(S, block, BLAKE2B_BLOCKBYTES);
}
return 0;
}
#if !defined(BUILD_REF) && (defined(__x86_64__) || defined(_WIN64))
static void blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] )
{
__m128i row1l, row1h;
__m128i row2l, row2h;
__m128i row3l, row3h;
__m128i row4l, row4h;
__m128i b0, b1;
__m128i t0, t1;
#if defined(HAVE_SSSE3) && !defined(HAVE_XOP)
const __m128i r16 = _mm_setr_epi8( 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9 );
const __m128i r24 = _mm_setr_epi8( 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10 );
#endif
#if defined(HAVE_SSE41)
const __m128i m0 = LOADU( block + 00 );
const __m128i m1 = LOADU( block + 16 );
const __m128i m2 = LOADU( block + 32 );
const __m128i m3 = LOADU( block + 48 );
const __m128i m4 = LOADU( block + 64 );
const __m128i m5 = LOADU( block + 80 );
const __m128i m6 = LOADU( block + 96 );
const __m128i m7 = LOADU( block + 112 );
#else
const uint64_t m0 = load64(block + 0 * sizeof(uint64_t));
const uint64_t m1 = load64(block + 1 * sizeof(uint64_t));
const uint64_t m2 = load64(block + 2 * sizeof(uint64_t));
const uint64_t m3 = load64(block + 3 * sizeof(uint64_t));
const uint64_t m4 = load64(block + 4 * sizeof(uint64_t));
const uint64_t m5 = load64(block + 5 * sizeof(uint64_t));
const uint64_t m6 = load64(block + 6 * sizeof(uint64_t));
const uint64_t m7 = load64(block + 7 * sizeof(uint64_t));
const uint64_t m8 = load64(block + 8 * sizeof(uint64_t));
const uint64_t m9 = load64(block + 9 * sizeof(uint64_t));
const uint64_t m10 = load64(block + 10 * sizeof(uint64_t));
const uint64_t m11 = load64(block + 11 * sizeof(uint64_t));
const uint64_t m12 = load64(block + 12 * sizeof(uint64_t));
const uint64_t m13 = load64(block + 13 * sizeof(uint64_t));
const uint64_t m14 = load64(block + 14 * sizeof(uint64_t));
const uint64_t m15 = load64(block + 15 * sizeof(uint64_t));
#endif
row1l = LOADU( &S->h[0] );
row1h = LOADU( &S->h[2] );
row2l = LOADU( &S->h[4] );
row2h = LOADU( &S->h[6] );
row3l = LOADU( &blake2b_IV[0] );
row3h = LOADU( &blake2b_IV[2] );
row4l = _mm_xor_si128( LOADU( &blake2b_IV[4] ), LOADU( &S->t[0] ) );
row4h = _mm_xor_si128( LOADU( &blake2b_IV[6] ), LOADU( &S->f[0] ) );
ROUND( 0 );
ROUND( 1 );
ROUND( 2 );
ROUND( 3 );
ROUND( 4 );
ROUND( 5 );
ROUND( 6 );
ROUND( 7 );
ROUND( 8 );
ROUND( 9 );
ROUND( 10 );
ROUND( 11 );
row1l = _mm_xor_si128( row3l, row1l );
row1h = _mm_xor_si128( row3h, row1h );
STOREU( &S->h[0], _mm_xor_si128( LOADU( &S->h[0] ), row1l ) );
STOREU( &S->h[2], _mm_xor_si128( LOADU( &S->h[2] ), row1h ) );
row2l = _mm_xor_si128( row4l, row2l );
row2h = _mm_xor_si128( row4h, row2h );
STOREU( &S->h[4], _mm_xor_si128( LOADU( &S->h[4] ), row2l ) );
STOREU( &S->h[6], _mm_xor_si128( LOADU( &S->h[6] ), row2h ) );
}
#else
static void blake2b_compress(blake2b_state *S, const uint8_t *block) {
uint64_t m[16];
uint64_t v[16];
unsigned int i, r;
for (i = 0; i < 16; ++i) {
m[i] = load64(block + i * sizeof(m[i]));
}
for (i = 0; i < 8; ++i) {
v[i] = S->h[i];
}
v[8] = blake2b_IV[0];
v[9] = blake2b_IV[1];
v[10] = blake2b_IV[2];
v[11] = blake2b_IV[3];
v[12] = blake2b_IV[4] ^ S->t[0];
v[13] = blake2b_IV[5] ^ S->t[1];
v[14] = blake2b_IV[6] ^ S->f[0];
v[15] = blake2b_IV[7] ^ S->f[1];
#define G(r, i, a, b, c, d) \
do { \
a = a + b + m[blake2b_sigma[r][2 * i + 0]]; \
d = rotr64(d ^ a, 32); \
c = c + d; \
b = rotr64(b ^ c, 24); \
a = a + b + m[blake2b_sigma[r][2 * i + 1]]; \
d = rotr64(d ^ a, 16); \
c = c + d; \
b = rotr64(b ^ c, 63); \
} while ((void)0, 0)
#define ROUND(r) \
do { \
G(r, 0, v[0], v[4], v[8], v[12]); \
G(r, 1, v[1], v[5], v[9], v[13]); \
G(r, 2, v[2], v[6], v[10], v[14]); \
G(r, 3, v[3], v[7], v[11], v[15]); \
G(r, 4, v[0], v[5], v[10], v[15]); \
G(r, 5, v[1], v[6], v[11], v[12]); \
G(r, 6, v[2], v[7], v[8], v[13]); \
G(r, 7, v[3], v[4], v[9], v[14]); \
} while ((void)0, 0)
for (r = 0; r < 12; ++r) {
ROUND(r);
}
for (i = 0; i < 8; ++i) {
S->h[i] = S->h[i] ^ v[i] ^ v[i + 8];
}
#undef G
#undef ROUND
}
#endif
int blake2b_update(blake2b_state *S, const void *in, size_t inlen) {
const uint8_t *pin = (const uint8_t *)in;
if (inlen == 0) {
return 0;
}
/* Sanity check */
if (S == NULL || in == NULL) {
return -1;
}
/* Is this a reused state? */
if (S->f[0] != 0) {
return -1;
}
if (S->buflen + inlen > BLAKE2B_BLOCKBYTES) {
/* Complete current block */
size_t left = S->buflen;
size_t fill = BLAKE2B_BLOCKBYTES - left;
memcpy(&S->buf[left], pin, fill);
blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
blake2b_compress(S, S->buf);
S->buflen = 0;
inlen -= fill;
pin += fill;
/* Avoid buffer copies when possible */
while (inlen > BLAKE2B_BLOCKBYTES) {
blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
blake2b_compress(S, pin);
inlen -= BLAKE2B_BLOCKBYTES;
pin += BLAKE2B_BLOCKBYTES;
}
}
memcpy(&S->buf[S->buflen], pin, inlen);
S->buflen += (unsigned int)inlen;
return 0;
}
int blake2b_update_static(blake2b_state *S, const char in, size_t inlen) {
if (inlen == 0) {
return 0;
}
/* Sanity check */
if (S == NULL) {
return -1;
}
/* Is this a reused state? */
if (S->f[0] != 0) {
return -1;
}
if (S->buflen + inlen > BLAKE2B_BLOCKBYTES) {
/* Complete current block */
size_t left = S->buflen;
size_t fill = BLAKE2B_BLOCKBYTES - left;
memset(&S->buf[left], in, fill);
blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
blake2b_compress(S, S->buf);
S->buflen = 0;
inlen -= fill;
/* Avoid buffer copies when possible */
while (inlen > BLAKE2B_BLOCKBYTES) {
memset(S->buf, in, BLAKE2B_BLOCKBYTES);
blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
blake2b_compress(S, S->buf);
inlen -= BLAKE2B_BLOCKBYTES;
}
}
memset(&S->buf[S->buflen], in, inlen);
S->buflen += (unsigned int)inlen;
return 0;
}
int blake2b_final(blake2b_state *S, void *out, size_t outlen) {
uint8_t buffer[BLAKE2B_OUTBYTES] = {0};
unsigned int i;
/* Sanity checks */
if (S == NULL || out == NULL || outlen < S->outlen) {
return -1;
}
/* Is this a reused state? */
if (S->f[0] != 0) {
return -1;
}
blake2b_increment_counter(S, S->buflen);
blake2b_set_lastblock(S);
memset(&S->buf[S->buflen], 0, BLAKE2B_BLOCKBYTES - S->buflen); /* Padding */
blake2b_compress(S, S->buf);
for (i = 0; i < 8; ++i) { /* Output full hash to temp buffer */
store64(buffer + sizeof(S->h[i]) * i, S->h[i]);
}
memcpy(out, buffer, S->outlen);
return 0;
}
int blake2b(void *out, size_t outlen, const void *in, size_t inlen,
const void *key, size_t keylen) {
blake2b_state S;
int ret = -1;
/* Verify parameters */
if (NULL == in && inlen > 0) {
goto fail;
}
if (NULL == out || outlen == 0 || outlen > BLAKE2B_OUTBYTES) {
goto fail;
}
if ((NULL == key && keylen > 0) || keylen > BLAKE2B_KEYBYTES) {
goto fail;
}
if (keylen > 0) {
if (blake2b_init_key(&S, outlen, key, keylen) < 0) {
goto fail;
}
} else {
if (blake2b_init(&S, outlen) < 0) {
goto fail;
}
}
if (blake2b_update(&S, in, inlen) < 0) {
goto fail;
}
ret = blake2b_final(&S, out, outlen);
fail:
return ret;
}
/* Argon2 Team - Begin Code */
int blake2b_long(void *pout, size_t outlen, const void *in, size_t inlen) {
uint8_t *out = (uint8_t *)pout;
blake2b_state blake_state;
uint8_t outlen_bytes[sizeof(uint32_t)] = {0};
int ret = -1;
if (outlen > UINT32_MAX) {
goto fail;
}
/* Ensure little-endian byte order! */
store32(outlen_bytes, (uint32_t)outlen);
#define TRY(statement) \
do { \
ret = statement; \
if (ret < 0) { \
goto fail; \
} \
} while ((void)0, 0)
if (outlen <= BLAKE2B_OUTBYTES) {
TRY(blake2b_init(&blake_state, outlen));
TRY(blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes)));
TRY(blake2b_update(&blake_state, in, inlen));
TRY(blake2b_final(&blake_state, out, outlen));
} else {
uint32_t toproduce;
uint8_t out_buffer[BLAKE2B_OUTBYTES];
uint8_t in_buffer[BLAKE2B_OUTBYTES];
TRY(blake2b_init(&blake_state, BLAKE2B_OUTBYTES));
TRY(blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes)));
TRY(blake2b_update(&blake_state, in, inlen));
TRY(blake2b_final(&blake_state, out_buffer, BLAKE2B_OUTBYTES));
memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2);
out += BLAKE2B_OUTBYTES / 2;
toproduce = (uint32_t)outlen - BLAKE2B_OUTBYTES / 2;
while (toproduce > BLAKE2B_OUTBYTES) {
memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES);
TRY(blake2b(out_buffer, BLAKE2B_OUTBYTES, in_buffer,
BLAKE2B_OUTBYTES, NULL, 0));
memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2);
out += BLAKE2B_OUTBYTES / 2;
toproduce -= BLAKE2B_OUTBYTES / 2;
}
memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES);
TRY(blake2b(out_buffer, toproduce, in_buffer, BLAKE2B_OUTBYTES, NULL,
0));
memcpy(out, out_buffer, toproduce);
}
fail:
return ret;
#undef TRY
}
/* Argon2 Team - End Code */

View file

@ -0,0 +1,227 @@
//
// Created by Haifa Bogdan Adnan on 03/08/2018.
//
#if defined(__x86_64__) || defined(__i386__) || defined(_WIN64)
#include <cpuinfo_x86.h>
#endif
#if defined(__arm__)
#include <cpuinfo_arm.h>
#endif
#include <crypto/Argon2_constants.h>
#include "../../common/common.h"
#include "crypto/argon2_hasher/hash/Hasher.h"
#include "crypto/argon2_hasher/hash/argon2/Argon2.h"
#include "CpuHasher.h"
#include "crypto/argon2_hasher/common/DLLExport.h"
CpuHasher::CpuHasher() : Hasher() {
m_type = "CPU";
m_subType = "CPU";
m_shortSubType = "CPU";
m_optimization = "REF";
m_computingThreads = 0;
m_availableProcessingThr = 1;
m_availableMemoryThr = 1;
m_argon2BlocksFillerPtr = nullptr;
m_dllHandle = nullptr;
m_profile = nullptr;
m_threadData = nullptr;
}
CpuHasher::~CpuHasher() {
this->cleanup();
}
bool CpuHasher::initialize(xmrig::Algo algorithm, xmrig::Variant variant) {
m_profile = getArgon2Profile(algorithm, variant);
m_description = detectFeaturesAndMakeDescription();
return true;
}
bool CpuHasher::configure(xmrig::HasherConfig &config) {
m_intensity = 100;
if(config.cpuOptimization() != "") {
m_description += "Overiding detected optimization feature with " + config.cpuOptimization() + ".\n";
m_optimization = config.cpuOptimization();
}
loadArgon2BlockFiller();
if(m_argon2BlocksFillerPtr == NULL) {
m_intensity = 0;
m_description += "Status: DISABLED - argon2 hashing module not found.";
return false;
}
m_computingThreads = min(m_availableProcessingThr, m_availableMemoryThr);
if (m_computingThreads == 0) {
m_intensity = 0;
m_description += "Status: DISABLED - not enough resources.";
return false;
}
if(config.cpuThreads() > -1) {
m_intensity = min(100.0 * config.cpuThreads() / m_computingThreads, 100.0);
m_computingThreads = min(config.cpuThreads(), m_computingThreads);
}
if (m_intensity == 0) {
m_description += "Status: DISABLED - by user.";
return false;
}
m_deviceInfo.intensity = m_intensity;
storeDeviceInfo(0, m_deviceInfo);
m_threadData = new CpuHasherThread[m_computingThreads];
for(int i=0; i < m_computingThreads; i++) {
void *buffer = NULL;
void *mem = allocateMemory(buffer);
if(mem == NULL) {
m_intensity = 0;
m_description += "Status: DISABLED - error allocating memory.";
return false;
}
m_threadData[i].mem = buffer;
m_threadData[i].argon2 = new Argon2(NULL, m_argon2BlocksFillerPtr, NULL, mem, mem);
m_threadData[i].hashData.outSize = xmrig::ARGON2_HASHLEN + sizeof(uint32_t);
}
m_description += "Status: ENABLED - with " + to_string(m_computingThreads) + " threads.";
return true;
}
string CpuHasher::detectFeaturesAndMakeDescription() {
stringstream ss;
#if defined(__x86_64__) || defined(__i386__) || defined(_WIN64)
char brand_string[49];
cpu_features::FillX86BrandString(brand_string);
m_deviceInfo.name = brand_string;
ss << brand_string << endl;
cpu_features::X86Features features = cpu_features::GetX86Info().features;
ss << "Optimization features: ";
#if defined(__x86_64__) || defined(_WIN64)
ss << "SSE2 ";
m_optimization = "SSE2";
#else
ss << "none";
m_optimization = "REF";
#endif
if(features.ssse3 || features.avx2 || features.avx512f) {
if (features.ssse3) {
ss << "SSSE3 ";
m_optimization = "SSSE3";
}
if (features.avx) {
ss << "AVX ";
m_optimization = "AVX";
}
if (features.avx2) {
ss << "AVX2 ";
m_optimization = "AVX2";
}
if (features.avx512f) {
ss << "AVX512F ";
m_optimization = "AVX512F";
}
}
ss << endl;
#endif
#if defined(__arm__)
m_deviceInfo.name = "ARM processor";
cpu_features::ArmFeatures features = cpu_features::GetArmInfo().features;
ss << "ARM processor" << endl;
ss << "Optimization features: ";
m_optimization = "REF";
if(features.neon) {
ss << "NEON";
m_optimization = "NEON";
}
else {
ss << "none";
}
ss << endl;
#endif
ss << "Selecting " << m_optimization << " as candidate for hashing algorithm." << endl;
m_availableProcessingThr = thread::hardware_concurrency();
ss << "Parallelism: " << m_availableProcessingThr << " concurent threads supported." << endl;
//check available memory
vector<void *> memoryTest;
for(m_availableMemoryThr = 0;m_availableMemoryThr < m_availableProcessingThr;m_availableMemoryThr++) {
void *memory = malloc(m_profile->memSize + 64); //64 bytes for alignament - to work on AVX512F optimisations
if(memory == NULL)
break;
memoryTest.push_back(memory);
}
for(vector<void*>::iterator it=memoryTest.begin(); it != memoryTest.end(); ++it) {
free(*it);
}
ss << "Memory: there is enough memory for " << m_availableMemoryThr << " concurent threads." << endl;
return ss.str();
}
void CpuHasher::cleanup() {
for(int i=0; i < m_computingThreads; i++) {
delete m_threadData[i].argon2;
free(m_threadData[i].mem);
}
delete[] m_threadData;
if(m_dllHandle != NULL)
dlclose(m_dllHandle);
}
void CpuHasher::loadArgon2BlockFiller() {
string module_path = m_appFolder;
module_path += "/modules/argon2_fill_blocks_" + m_optimization + ".opt";
m_dllHandle = dlopen(module_path.c_str(), RTLD_LAZY);
if(m_dllHandle != NULL)
m_argon2BlocksFillerPtr = (argon2BlocksFillerPtr)dlsym(m_dllHandle, "fill_memory_blocks");
}
int CpuHasher::compute(int threadIdx, uint8_t *input, size_t size, uint8_t *output) {
CpuHasherThread &threadData = m_threadData[threadIdx];
threadData.hashData.input = input;
threadData.hashData.inSize = size;
threadData.hashData.output = output;
return threadData.argon2->generateHashes(*m_profile, threadData.hashData);
}
void *CpuHasher::allocateMemory(void *&buffer) {
size_t mem_size = m_profile->memSize + 64;
void *mem = malloc(mem_size);
buffer = mem;
return align(64, m_profile->memSize, mem, mem_size);
}
size_t CpuHasher::parallelism(int workerIdx) {
if(workerIdx < 0 || workerIdx > computingThreads())
return 0;
return 1;
}
size_t CpuHasher::deviceCount() {
return computingThreads();
}
REGISTER_HASHER(CpuHasher);

View file

@ -0,0 +1,41 @@
//
// Created by Haifa Bogdan Adnan on 03/08/2018.
//
#ifndef ARGON2_CPU_HASHER_H
#define ARGON2_CPU_HASHER_H
struct CpuHasherThread {
Argon2 *argon2;
HashData hashData;
void *mem;
};
class CpuHasher : public Hasher {
public:
CpuHasher();
~CpuHasher();
virtual bool initialize(xmrig::Algo algorithm, xmrig::Variant variant);
virtual bool configure(xmrig::HasherConfig &config);
virtual void cleanup();
virtual int compute(int threadIdx, uint8_t *input, size_t size, uint8_t *output);
virtual size_t parallelism(int workerIdx);
virtual size_t deviceCount();
private:
string detectFeaturesAndMakeDescription();
void loadArgon2BlockFiller();
void *allocateMemory(void *&buffer);
DeviceInfo m_deviceInfo;
string m_optimization;
int m_availableProcessingThr;
int m_availableMemoryThr;
void *m_dllHandle;
Argon2Profile *m_profile;
argon2BlocksFillerPtr m_argon2BlocksFillerPtr;
CpuHasherThread *m_threadData;
};
#endif //ARGON2_CPU_HASHER_H

View file

@ -0,0 +1,567 @@
/*
* Argon2 reference source code package - reference C implementations
*
* Copyright 2015
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
*
* You may use this work under the terms of a Creative Commons CC0 1.0
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
* these licenses can be found at:
*
* - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
* - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
*
* You should have received a copy of both of these licenses along with this
* software. If not, they may be obtained at the above URLs.
*/
#ifndef BLAKE_ROUND_MKA_OPT_H
#define BLAKE_ROUND_MKA_OPT_H
#include "../../argon2/blake2/blake2-impl.h"
#if !defined(__NEON__)
#include <emmintrin.h>
#if defined(__SSSE3__)
#include <tmmintrin.h> /* for _mm_shuffle_epi8 and _mm_alignr_epi8 */
#endif
#if (defined(__XOP__) || defined(__AVX__)) && (defined(__GNUC__) || defined(__clang__))
#include <x86intrin.h>
#endif
#else
#include <arm_neon.h>
#endif
#if !defined(__NEON__)
#if !defined(__AVX512F__)
#if !defined(__AVX2__)
#if !defined(__XOP__)
#if defined(__SSSE3__)
#define r16 \
(_mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9))
#define r24 \
(_mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10))
#define _mm_roti_epi64(x, c) \
(-(c) == 32) \
? _mm_shuffle_epi32((x), _MM_SHUFFLE(2, 3, 0, 1)) \
: (-(c) == 24) \
? _mm_shuffle_epi8((x), r24) \
: (-(c) == 16) \
? _mm_shuffle_epi8((x), r16) \
: (-(c) == 63) \
? _mm_xor_si128(_mm_srli_epi64((x), -(c)), \
_mm_add_epi64((x), (x))) \
: _mm_xor_si128(_mm_srli_epi64((x), -(c)), \
_mm_slli_epi64((x), 64 - (-(c))))
#else /* defined(__SSE2__) */
#define _mm_roti_epi64(r, c) \
_mm_xor_si128(_mm_srli_epi64((r), -(c)), _mm_slli_epi64((r), 64 - (-(c))))
#endif
#else
#endif
static BLAKE2_INLINE __m128i fBlaMka(__m128i x, __m128i y) {
const __m128i z = _mm_mul_epu32(x, y);
return _mm_add_epi64(_mm_add_epi64(x, y), _mm_add_epi64(z, z));
}
#define G1(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
A0 = fBlaMka(A0, B0); \
A1 = fBlaMka(A1, B1); \
\
D0 = _mm_xor_si128(D0, A0); \
D1 = _mm_xor_si128(D1, A1); \
\
D0 = _mm_roti_epi64(D0, -32); \
D1 = _mm_roti_epi64(D1, -32); \
\
C0 = fBlaMka(C0, D0); \
C1 = fBlaMka(C1, D1); \
\
B0 = _mm_xor_si128(B0, C0); \
B1 = _mm_xor_si128(B1, C1); \
\
B0 = _mm_roti_epi64(B0, -24); \
B1 = _mm_roti_epi64(B1, -24); \
} while ((void)0, 0)
#define G2(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
A0 = fBlaMka(A0, B0); \
A1 = fBlaMka(A1, B1); \
\
D0 = _mm_xor_si128(D0, A0); \
D1 = _mm_xor_si128(D1, A1); \
\
D0 = _mm_roti_epi64(D0, -16); \
D1 = _mm_roti_epi64(D1, -16); \
\
C0 = fBlaMka(C0, D0); \
C1 = fBlaMka(C1, D1); \
\
B0 = _mm_xor_si128(B0, C0); \
B1 = _mm_xor_si128(B1, C1); \
\
B0 = _mm_roti_epi64(B0, -63); \
B1 = _mm_roti_epi64(B1, -63); \
} while ((void)0, 0)
#if defined(__SSSE3__)
#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
__m128i t0 = _mm_alignr_epi8(B1, B0, 8); \
__m128i t1 = _mm_alignr_epi8(B0, B1, 8); \
B0 = t0; \
B1 = t1; \
\
t0 = C0; \
C0 = C1; \
C1 = t0; \
\
t0 = _mm_alignr_epi8(D1, D0, 8); \
t1 = _mm_alignr_epi8(D0, D1, 8); \
D0 = t1; \
D1 = t0; \
} while ((void)0, 0)
#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
__m128i t0 = _mm_alignr_epi8(B0, B1, 8); \
__m128i t1 = _mm_alignr_epi8(B1, B0, 8); \
B0 = t0; \
B1 = t1; \
\
t0 = C0; \
C0 = C1; \
C1 = t0; \
\
t0 = _mm_alignr_epi8(D0, D1, 8); \
t1 = _mm_alignr_epi8(D1, D0, 8); \
D0 = t1; \
D1 = t0; \
} while ((void)0, 0)
#else /* SSE2 */
#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
__m128i t0 = D0; \
__m128i t1 = B0; \
D0 = C0; \
C0 = C1; \
C1 = D0; \
D0 = _mm_unpackhi_epi64(D1, _mm_unpacklo_epi64(t0, t0)); \
D1 = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(D1, D1)); \
B0 = _mm_unpackhi_epi64(B0, _mm_unpacklo_epi64(B1, B1)); \
B1 = _mm_unpackhi_epi64(B1, _mm_unpacklo_epi64(t1, t1)); \
} while ((void)0, 0)
#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
__m128i t0, t1; \
t0 = C0; \
C0 = C1; \
C1 = t0; \
t0 = B0; \
t1 = D0; \
B0 = _mm_unpackhi_epi64(B1, _mm_unpacklo_epi64(B0, B0)); \
B1 = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(B1, B1)); \
D0 = _mm_unpackhi_epi64(D0, _mm_unpacklo_epi64(D1, D1)); \
D1 = _mm_unpackhi_epi64(D1, _mm_unpacklo_epi64(t1, t1)); \
} while ((void)0, 0)
#endif
#define BLAKE2_ROUND(A0, A1, B0, B1, C0, C1, D0, D1) \
do { \
G1(A0, B0, C0, D0, A1, B1, C1, D1); \
G2(A0, B0, C0, D0, A1, B1, C1, D1); \
\
DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
\
G1(A0, B0, C0, D0, A1, B1, C1, D1); \
G2(A0, B0, C0, D0, A1, B1, C1, D1); \
\
UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
} while ((void)0, 0)
#else /* __AVX2__ */
#include <immintrin.h>
#define rotr32(x) _mm256_shuffle_epi32(x, _MM_SHUFFLE(2, 3, 0, 1))
#define rotr24(x) _mm256_shuffle_epi8(x, _mm256_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10, 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10))
#define rotr16(x) _mm256_shuffle_epi8(x, _mm256_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9, 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9))
#define rotr63(x) _mm256_xor_si256(_mm256_srli_epi64((x), 63), _mm256_add_epi64((x), (x)))
#define G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
do { \
__m256i ml = _mm256_mul_epu32(A0, B0); \
ml = _mm256_add_epi64(ml, ml); \
A0 = _mm256_add_epi64(A0, _mm256_add_epi64(B0, ml)); \
D0 = _mm256_xor_si256(D0, A0); \
D0 = rotr32(D0); \
\
ml = _mm256_mul_epu32(C0, D0); \
ml = _mm256_add_epi64(ml, ml); \
C0 = _mm256_add_epi64(C0, _mm256_add_epi64(D0, ml)); \
\
B0 = _mm256_xor_si256(B0, C0); \
B0 = rotr24(B0); \
\
ml = _mm256_mul_epu32(A1, B1); \
ml = _mm256_add_epi64(ml, ml); \
A1 = _mm256_add_epi64(A1, _mm256_add_epi64(B1, ml)); \
D1 = _mm256_xor_si256(D1, A1); \
D1 = rotr32(D1); \
\
ml = _mm256_mul_epu32(C1, D1); \
ml = _mm256_add_epi64(ml, ml); \
C1 = _mm256_add_epi64(C1, _mm256_add_epi64(D1, ml)); \
\
B1 = _mm256_xor_si256(B1, C1); \
B1 = rotr24(B1); \
} while((void)0, 0);
#define G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
do { \
__m256i ml = _mm256_mul_epu32(A0, B0); \
ml = _mm256_add_epi64(ml, ml); \
A0 = _mm256_add_epi64(A0, _mm256_add_epi64(B0, ml)); \
D0 = _mm256_xor_si256(D0, A0); \
D0 = rotr16(D0); \
\
ml = _mm256_mul_epu32(C0, D0); \
ml = _mm256_add_epi64(ml, ml); \
C0 = _mm256_add_epi64(C0, _mm256_add_epi64(D0, ml)); \
B0 = _mm256_xor_si256(B0, C0); \
B0 = rotr63(B0); \
\
ml = _mm256_mul_epu32(A1, B1); \
ml = _mm256_add_epi64(ml, ml); \
A1 = _mm256_add_epi64(A1, _mm256_add_epi64(B1, ml)); \
D1 = _mm256_xor_si256(D1, A1); \
D1 = rotr16(D1); \
\
ml = _mm256_mul_epu32(C1, D1); \
ml = _mm256_add_epi64(ml, ml); \
C1 = _mm256_add_epi64(C1, _mm256_add_epi64(D1, ml)); \
B1 = _mm256_xor_si256(B1, C1); \
B1 = rotr63(B1); \
} while((void)0, 0);
#define DIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
B0 = _mm256_permute4x64_epi64(B0, _MM_SHUFFLE(0, 3, 2, 1)); \
C0 = _mm256_permute4x64_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
D0 = _mm256_permute4x64_epi64(D0, _MM_SHUFFLE(2, 1, 0, 3)); \
\
B1 = _mm256_permute4x64_epi64(B1, _MM_SHUFFLE(0, 3, 2, 1)); \
C1 = _mm256_permute4x64_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
D1 = _mm256_permute4x64_epi64(D1, _MM_SHUFFLE(2, 1, 0, 3)); \
} while((void)0, 0);
#define DIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
do { \
__m256i tmp1 = _mm256_blend_epi32(B0, B1, 0xCC); \
__m256i tmp2 = _mm256_blend_epi32(B0, B1, 0x33); \
B1 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
B0 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
\
tmp1 = C0; \
C0 = C1; \
C1 = tmp1; \
\
tmp1 = _mm256_blend_epi32(D0, D1, 0xCC); \
tmp2 = _mm256_blend_epi32(D0, D1, 0x33); \
D0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
D1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
} while(0);
#define UNDIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
B0 = _mm256_permute4x64_epi64(B0, _MM_SHUFFLE(2, 1, 0, 3)); \
C0 = _mm256_permute4x64_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
D0 = _mm256_permute4x64_epi64(D0, _MM_SHUFFLE(0, 3, 2, 1)); \
\
B1 = _mm256_permute4x64_epi64(B1, _MM_SHUFFLE(2, 1, 0, 3)); \
C1 = _mm256_permute4x64_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
D1 = _mm256_permute4x64_epi64(D1, _MM_SHUFFLE(0, 3, 2, 1)); \
} while((void)0, 0);
#define UNDIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
do { \
__m256i tmp1 = _mm256_blend_epi32(B0, B1, 0xCC); \
__m256i tmp2 = _mm256_blend_epi32(B0, B1, 0x33); \
B0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
B1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
\
tmp1 = C0; \
C0 = C1; \
C1 = tmp1; \
\
tmp1 = _mm256_blend_epi32(D0, D1, 0x33); \
tmp2 = _mm256_blend_epi32(D0, D1, 0xCC); \
D0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
D1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
} while((void)0, 0);
#define BLAKE2_ROUND_1(A0, A1, B0, B1, C0, C1, D0, D1) \
do{ \
G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
\
DIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
\
G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
\
UNDIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
} while((void)0, 0);
#define BLAKE2_ROUND_2(A0, A1, B0, B1, C0, C1, D0, D1) \
do{ \
G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
\
DIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
\
G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
\
UNDIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
} while((void)0, 0);
#endif /* __AVX2__ */
#else /* __AVX512F__ */
#include <immintrin.h>
#define ror64(x, n) _mm512_ror_epi64((x), (n))
static BLAKE2_INLINE __m512i muladd(__m512i x, __m512i y)
{
__m512i z = _mm512_mul_epu32(x, y);
return _mm512_add_epi64(_mm512_add_epi64(x, y), _mm512_add_epi64(z, z));
}
#define G1(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
A0 = muladd(A0, B0); \
A1 = muladd(A1, B1); \
\
D0 = _mm512_xor_si512(D0, A0); \
D1 = _mm512_xor_si512(D1, A1); \
\
D0 = ror64(D0, 32); \
D1 = ror64(D1, 32); \
\
C0 = muladd(C0, D0); \
C1 = muladd(C1, D1); \
\
B0 = _mm512_xor_si512(B0, C0); \
B1 = _mm512_xor_si512(B1, C1); \
\
B0 = ror64(B0, 24); \
B1 = ror64(B1, 24); \
} while ((void)0, 0)
#define G2(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
A0 = muladd(A0, B0); \
A1 = muladd(A1, B1); \
\
D0 = _mm512_xor_si512(D0, A0); \
D1 = _mm512_xor_si512(D1, A1); \
\
D0 = ror64(D0, 16); \
D1 = ror64(D1, 16); \
\
C0 = muladd(C0, D0); \
C1 = muladd(C1, D1); \
\
B0 = _mm512_xor_si512(B0, C0); \
B1 = _mm512_xor_si512(B1, C1); \
\
B0 = ror64(B0, 63); \
B1 = ror64(B1, 63); \
} while ((void)0, 0)
#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
B0 = _mm512_permutex_epi64(B0, _MM_SHUFFLE(0, 3, 2, 1)); \
B1 = _mm512_permutex_epi64(B1, _MM_SHUFFLE(0, 3, 2, 1)); \
\
C0 = _mm512_permutex_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
C1 = _mm512_permutex_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
\
D0 = _mm512_permutex_epi64(D0, _MM_SHUFFLE(2, 1, 0, 3)); \
D1 = _mm512_permutex_epi64(D1, _MM_SHUFFLE(2, 1, 0, 3)); \
} while ((void)0, 0)
#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
B0 = _mm512_permutex_epi64(B0, _MM_SHUFFLE(2, 1, 0, 3)); \
B1 = _mm512_permutex_epi64(B1, _MM_SHUFFLE(2, 1, 0, 3)); \
\
C0 = _mm512_permutex_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
C1 = _mm512_permutex_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
\
D0 = _mm512_permutex_epi64(D0, _MM_SHUFFLE(0, 3, 2, 1)); \
D1 = _mm512_permutex_epi64(D1, _MM_SHUFFLE(0, 3, 2, 1)); \
} while ((void)0, 0)
#define BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
G1(A0, B0, C0, D0, A1, B1, C1, D1); \
G2(A0, B0, C0, D0, A1, B1, C1, D1); \
\
DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
\
G1(A0, B0, C0, D0, A1, B1, C1, D1); \
G2(A0, B0, C0, D0, A1, B1, C1, D1); \
\
UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
} while ((void)0, 0)
#define SWAP_HALVES(A0, A1) \
do { \
__m512i t0, t1; \
t0 = _mm512_shuffle_i64x2(A0, A1, _MM_SHUFFLE(1, 0, 1, 0)); \
t1 = _mm512_shuffle_i64x2(A0, A1, _MM_SHUFFLE(3, 2, 3, 2)); \
A0 = t0; \
A1 = t1; \
} while((void)0, 0)
#define SWAP_QUARTERS(A0, A1) \
do { \
SWAP_HALVES(A0, A1); \
A0 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A0); \
A1 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A1); \
} while((void)0, 0)
#define UNSWAP_QUARTERS(A0, A1) \
do { \
A0 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A0); \
A1 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A1); \
SWAP_HALVES(A0, A1); \
} while((void)0, 0)
#define BLAKE2_ROUND_1(A0, C0, B0, D0, A1, C1, B1, D1) \
do { \
SWAP_HALVES(A0, B0); \
SWAP_HALVES(C0, D0); \
SWAP_HALVES(A1, B1); \
SWAP_HALVES(C1, D1); \
BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1); \
SWAP_HALVES(A0, B0); \
SWAP_HALVES(C0, D0); \
SWAP_HALVES(A1, B1); \
SWAP_HALVES(C1, D1); \
} while ((void)0, 0)
#define BLAKE2_ROUND_2(A0, A1, B0, B1, C0, C1, D0, D1) \
do { \
SWAP_QUARTERS(A0, A1); \
SWAP_QUARTERS(B0, B1); \
SWAP_QUARTERS(C0, C1); \
SWAP_QUARTERS(D0, D1); \
BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1); \
UNSWAP_QUARTERS(A0, A1); \
UNSWAP_QUARTERS(B0, B1); \
UNSWAP_QUARTERS(C0, C1); \
UNSWAP_QUARTERS(D0, D1); \
} while ((void)0, 0)
#endif /* __AVX512F__ */
#else /* __NEON__ */
static BLAKE2_INLINE uint64x2_t fBlaMka(uint64x2_t x, uint64x2_t y) {
const uint64x2_t z = vmull_u32(vmovn_u64(x), vmovn_u64(y));
return vaddq_u64(vaddq_u64(x, y), vaddq_u64(z, z));
}
#define vrorq_n_u64_32(x) vreinterpretq_u64_u32(vrev64q_u32(vreinterpretq_u32_u64((x))))
#define vrorq_n_u64_24(x) vcombine_u64( \
vreinterpret_u64_u8(vext_u8(vreinterpret_u8_u64(vget_low_u64(x)), vreinterpret_u8_u64(vget_low_u64(x)), 3)), \
vreinterpret_u64_u8(vext_u8(vreinterpret_u8_u64(vget_high_u64(x)), vreinterpret_u8_u64(vget_high_u64(x)), 3)))
#define vrorq_n_u64_16(x) vcombine_u64( \
vreinterpret_u64_u8(vext_u8(vreinterpret_u8_u64(vget_low_u64(x)), vreinterpret_u8_u64(vget_low_u64(x)), 2)), \
vreinterpret_u64_u8(vext_u8(vreinterpret_u8_u64(vget_high_u64(x)), vreinterpret_u8_u64(vget_high_u64(x)), 2)))
#define vrorq_n_u64_63(x) veorq_u64(vaddq_u64(x, x), vshrq_n_u64(x, 63))
#define G1(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
A0 = fBlaMka(A0, B0); \
A1 = fBlaMka(A1, B1); \
\
D0 = veorq_u64(D0, A0); \
D1 = veorq_u64(D1, A1); \
\
D0 = vrorq_n_u64_32(D0); \
D1 = vrorq_n_u64_32(D1); \
\
C0 = fBlaMka(C0, D0); \
C1 = fBlaMka(C1, D1); \
\
B0 = veorq_u64(B0, C0); \
B1 = veorq_u64(B1, C1); \
\
B0 = vrorq_n_u64_24(B0); \
B1 = vrorq_n_u64_24(B1); \
} while ((void)0, 0)
#define G2(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
A0 = fBlaMka(A0, B0); \
A1 = fBlaMka(A1, B1); \
\
D0 = veorq_u64(D0, A0); \
D1 = veorq_u64(D1, A1); \
\
D0 = vrorq_n_u64_16(D0); \
D1 = vrorq_n_u64_16(D1); \
\
C0 = fBlaMka(C0, D0); \
C1 = fBlaMka(C1, D1); \
\
B0 = veorq_u64(B0, C0); \
B1 = veorq_u64(B1, C1); \
\
B0 = vrorq_n_u64_63(B0); \
B1 = vrorq_n_u64_63(B1); \
} while ((void)0, 0)
#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
t0 = vextq_u64(B0, B1, 1); \
t1 = vextq_u64(B1, B0, 1); \
B0 = t0; B1 = t1; t0 = C0; C0 = C1; C1 = t0; \
t0 = vextq_u64(D1, D0, 1); t1 = vextq_u64(D0, D1, 1); \
D0 = t0; D1 = t1;
#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
t0 = vextq_u64(B1, B0, 1); \
t1 = vextq_u64(B0, B1, 1); \
B0 = t0; B1 = t1; t0 = C0; C0 = C1; C1 = t0; \
t0 = vextq_u64(D0, D1, 1); t1 = vextq_u64(D1, D0, 1); \
D0 = t0; D1 = t1;
#define BLAKE2_ROUND(A0, A1, B0, B1, C0, C1, D0, D1) \
do { \
G1(A0, B0, C0, D0, A1, B1, C1, D1); \
G2(A0, B0, C0, D0, A1, B1, C1, D1); \
\
DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
\
G1(A0, B0, C0, D0, A1, B1, C1, D1); \
G2(A0, B0, C0, D0, A1, B1, C1, D1); \
\
UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
} while ((void)0, 0)
#endif /* __NEON__ */
#endif /* BLAKE_ROUND_MKA_OPT_H */

View file

@ -0,0 +1,55 @@
/*
* Argon2 reference source code package - reference C implementations
*
* Copyright 2015
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
*
* You may use this work under the terms of a Creative Commons CC0 1.0
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
* these licenses can be found at:
*
* - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
* - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
*
* You should have received a copy of both of these licenses along with this
* software. If not, they may be obtained at the above URLs.
*/
#ifndef BLAKE_ROUND_MKA_H
#define BLAKE_ROUND_MKA_H
#include "../../argon2/blake2/blake2-impl.h"
/* designed by the Lyra PHC team */
static BLAKE2_INLINE uint64_t fBlaMka(uint64_t x, uint64_t y) {
const uint64_t m = UINT64_C(0xFFFFFFFF);
const uint64_t xy = (x & m) * (y & m);
return x + y + 2 * xy;
}
#define G(a, b, c, d) \
do { \
a = fBlaMka(a, b); \
d = rotr64(d ^ a, 32); \
c = fBlaMka(c, d); \
b = rotr64(b ^ c, 24); \
a = fBlaMka(a, b); \
d = rotr64(d ^ a, 16); \
c = fBlaMka(c, d); \
b = rotr64(b ^ c, 63); \
} while ((void)0, 0)
#define BLAKE2_ROUND_NOMSG(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, \
v12, v13, v14, v15) \
do { \
G(v0, v4, v8, v12); \
G(v1, v5, v9, v13); \
G(v2, v6, v10, v14); \
G(v3, v7, v11, v15); \
G(v0, v5, v10, v15); \
G(v1, v6, v11, v12); \
G(v2, v7, v8, v13); \
G(v3, v4, v9, v14); \
} while ((void)0, 0)
#endif

View file

@ -0,0 +1,448 @@
//
// Created by Haifa Bogdan Adnan on 06/08/2018.
//
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "../../../common/DLLImport.h"
#include "../../argon2/Defs.h"
#include "../../../common/DLLExport.h"
#if !defined(BUILD_REF) && (defined(__x86_64__) || defined(_WIN64) || defined(__NEON__))
#include "blamka-round-opt.h"
#else
#include "blamka-round-ref.h"
#endif
void copy_block(block *dst, const block *src) {
memcpy(dst->v, src->v, sizeof(uint64_t) * ARGON2_QWORDS_IN_BLOCK);
}
void xor_block(block *dst, const block *src) {
int i;
for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) {
dst->v[i] ^= src->v[i];
}
}
#ifndef BUILD_REF
#if defined(__AVX512F__)
static void fill_block(__m512i *state, const block *ref_block,
block *next_block, int with_xor, int keep) {
__m512i block_XY[ARGON2_512BIT_WORDS_IN_BLOCK];
unsigned int i;
if (with_xor) {
for (i = 0; i < ARGON2_512BIT_WORDS_IN_BLOCK; i++) {
state[i] = _mm512_xor_si512(
state[i], _mm512_loadu_si512((const __m512i *)ref_block->v + i));
block_XY[i] = _mm512_xor_si512(
state[i], _mm512_loadu_si512((const __m512i *)next_block->v + i));
}
} else {
for (i = 0; i < ARGON2_512BIT_WORDS_IN_BLOCK; i++) {
block_XY[i] = state[i] = _mm512_xor_si512(
state[i], _mm512_loadu_si512((const __m512i *)ref_block->v + i));
}
}
for (i = 0; i < 2; ++i) {
BLAKE2_ROUND_1(
state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3],
state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]);
}
for (i = 0; i < 2; ++i) {
BLAKE2_ROUND_2(
state[2 * 0 + i], state[2 * 1 + i], state[2 * 2 + i], state[2 * 3 + i],
state[2 * 4 + i], state[2 * 5 + i], state[2 * 6 + i], state[2 * 7 + i]);
}
if(keep) {
for (i = 0; i < ARGON2_512BIT_WORDS_IN_BLOCK; i++) {
state[i] = _mm512_xor_si512(state[i], block_XY[i]);
_mm512_storeu_si512((__m512i *)next_block->v + i, state[i]);
}
}
else {
for (i = 0; i < ARGON2_512BIT_WORDS_IN_BLOCK; i++) {
state[i] = _mm512_xor_si512(state[i], block_XY[i]);
}
}
}
#elif defined(__AVX2__)
static void fill_block(__m256i *state, const block *ref_block,
block *next_block, int with_xor, int keep) {
__m256i block_XY[ARGON2_HWORDS_IN_BLOCK];
unsigned int i;
if (with_xor) {
for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) {
state[i] = _mm256_xor_si256(
state[i], _mm256_loadu_si256((const __m256i *)ref_block->v + i));
block_XY[i] = _mm256_xor_si256(
state[i], _mm256_loadu_si256((const __m256i *)next_block->v + i));
}
} else {
for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) {
block_XY[i] = state[i] = _mm256_xor_si256(
state[i], _mm256_loadu_si256((const __m256i *)ref_block->v + i));
}
}
for (i = 0; i < 4; ++i) {
BLAKE2_ROUND_1(state[8 * i + 0], state[8 * i + 4], state[8 * i + 1], state[8 * i + 5],
state[8 * i + 2], state[8 * i + 6], state[8 * i + 3], state[8 * i + 7]);
}
for (i = 0; i < 4; ++i) {
BLAKE2_ROUND_2(state[ 0 + i], state[ 4 + i], state[ 8 + i], state[12 + i],
state[16 + i], state[20 + i], state[24 + i], state[28 + i]);
}
if(keep) {
for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) {
state[i] = _mm256_xor_si256(state[i], block_XY[i]);
_mm256_store_si256((__m256i *)next_block->v + i, state[i]);
}
}
else {
for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) {
state[i] = _mm256_xor_si256(state[i], block_XY[i]);
}
}
}
#elif defined(__AVX__)
#define I2D(x) _mm256_castsi256_pd(x)
#define D2I(x) _mm256_castpd_si256(x)
static void fill_block(__m128i *state, const block *ref_block,
block *next_block, int with_xor, int keep) {
__m128i block_XY[ARGON2_OWORDS_IN_BLOCK];
unsigned int i;
__m256i t;
__m256i *s256 = (__m256i *) state, *block256 = (__m256i *) block_XY;
if (with_xor) {
for (i = 0; i < ARGON2_OWORDS_IN_BLOCK / 2; i++) {
t = D2I(_mm256_xor_pd(I2D(_mm256_loadu_si256(s256 + i)), \
I2D(_mm256_loadu_si256((const __m256i *)ref_block->v + i))));
_mm256_storeu_si256(s256 + i, t);
t = D2I(_mm256_xor_pd(I2D(t), \
I2D(_mm256_loadu_si256((const __m256i *)next_block->v + i))));
_mm256_storeu_si256(block256 + i, t);
}
} else {
for (i = 0; i < ARGON2_OWORDS_IN_BLOCK / 2; i++) {
t = D2I(_mm256_xor_pd(I2D(_mm256_loadu_si256(s256 + i)), \
I2D(_mm256_loadu_si256((const __m256i *)ref_block->v + i))));
_mm256_storeu_si256(s256 + i, t);
_mm256_storeu_si256(block256 + i, t);
}
}
for (i = 0; i < 8; ++i) {
BLAKE2_ROUND(state[8 * i + 0], state[8 * i + 1], state[8 * i + 2],
state[8 * i + 3], state[8 * i + 4], state[8 * i + 5],
state[8 * i + 6], state[8 * i + 7]);
}
for (i = 0; i < 8; ++i) {
BLAKE2_ROUND(state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i],
state[8 * 3 + i], state[8 * 4 + i], state[8 * 5 + i],
state[8 * 6 + i], state[8 * 7 + i]);
}
if(keep) {
for (i = 0; i < ARGON2_OWORDS_IN_BLOCK / 2; i++) {
t = D2I(_mm256_xor_pd(I2D(_mm256_loadu_si256(s256 + i)), \
I2D(_mm256_loadu_si256(block256 + i))));
_mm256_storeu_si256(s256 + i, t);
_mm256_storeu_si256((__m256i *)next_block->v + i, t);
}
}
else {
for (i = 0; i < ARGON2_OWORDS_IN_BLOCK / 2; i++) {
t = D2I(_mm256_xor_pd(I2D(_mm256_loadu_si256(s256 + i)), \
I2D(_mm256_loadu_si256(block256 + i))));
_mm256_storeu_si256(s256 + i, t);
}
}
}
#elif defined(__NEON__)
static void fill_block(uint64x2_t *state, const block *ref_block,
block *next_block, int with_xor, int keep) {
uint64x2_t block_XY[ARGON2_OWORDS_IN_BLOCK];
uint64x2_t t0, t1;
unsigned int i;
if (with_xor) {
for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
state[i] = veorq_u64(state[i], vld1q_u64(ref_block->v + i*2));
block_XY[i] = veorq_u64(state[i], vld1q_u64(next_block->v + i*2));
}
} else {
for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
block_XY[i] = state[i] = veorq_u64(state[i], vld1q_u64(ref_block->v + i*2));
}
}
for (i = 0; i < 8; ++i) {
BLAKE2_ROUND(state[8 * i + 0], state[8 * i + 1], state[8 * i + 2],
state[8 * i + 3], state[8 * i + 4], state[8 * i + 5],
state[8 * i + 6], state[8 * i + 7]);
}
for (i = 0; i < 8; ++i) {
BLAKE2_ROUND(state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i],
state[8 * 3 + i], state[8 * 4 + i], state[8 * 5 + i],
state[8 * 6 + i], state[8 * 7 + i]);
}
if(keep) {
for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
state[i] = veorq_u64(state[i], block_XY[i]);
vst1q_u64(next_block->v + i*2, state[i]);
}
}
else {
for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
state[i] = veorq_u64(state[i], block_XY[i]);
}
}
}
#else
static void fill_block(__m128i *state, const block *ref_block,
block *next_block, int with_xor, int keep) {
__m128i block_XY[ARGON2_OWORDS_IN_BLOCK];
unsigned int i;
if (with_xor) {
for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
state[i] = _mm_xor_si128(
state[i], _mm_loadu_si128((const __m128i *)ref_block->v + i));
block_XY[i] = _mm_xor_si128(
state[i], _mm_loadu_si128((const __m128i *)next_block->v + i));
}
} else {
for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
block_XY[i] = state[i] = _mm_xor_si128(
state[i], _mm_loadu_si128((const __m128i *)ref_block->v + i));
}
}
for (i = 0; i < 8; ++i) {
BLAKE2_ROUND(state[8 * i + 0], state[8 * i + 1], state[8 * i + 2],
state[8 * i + 3], state[8 * i + 4], state[8 * i + 5],
state[8 * i + 6], state[8 * i + 7]);
}
for (i = 0; i < 8; ++i) {
BLAKE2_ROUND(state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i],
state[8 * 3 + i], state[8 * 4 + i], state[8 * 5 + i],
state[8 * 6 + i], state[8 * 7 + i]);
}
if(keep) {
for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
state[i] = _mm_xor_si128(state[i], block_XY[i]);
_mm_storeu_si128((__m128i *)next_block->v + i, state[i]);
}
}
else {
for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
state[i] = _mm_xor_si128(state[i], block_XY[i]);
}
}
}
#endif
#else
static void fill_block(block *prev_block, const block *ref_block,
block *next_block, int with_xor, int keep) {
block block_tmp;
unsigned i;
xor_block(prev_block, ref_block);
copy_block(&block_tmp, prev_block);
if (with_xor && next_block != NULL) {
xor_block(&block_tmp, next_block);
}
/* Apply Blake2 on columns of 64-bit words: (0,1,...,15) , then
(16,17,..31)... finally (112,113,...127) */
for (i = 0; i < 8; ++i) {
BLAKE2_ROUND_NOMSG(
prev_block->v[16 * i], prev_block->v[16 * i + 1], prev_block->v[16 * i + 2],
prev_block->v[16 * i + 3], prev_block->v[16 * i + 4], prev_block->v[16 * i + 5],
prev_block->v[16 * i + 6], prev_block->v[16 * i + 7], prev_block->v[16 * i + 8],
prev_block->v[16 * i + 9], prev_block->v[16 * i + 10], prev_block->v[16 * i + 11],
prev_block->v[16 * i + 12], prev_block->v[16 * i + 13], prev_block->v[16 * i + 14],
prev_block->v[16 * i + 15]);
}
/* Apply Blake2 on rows of 64-bit words: (0,1,16,17,...112,113), then
(2,3,18,19,...,114,115).. finally (14,15,30,31,...,126,127) */
for (i = 0; i < 8; i++) {
BLAKE2_ROUND_NOMSG(
prev_block->v[2 * i], prev_block->v[2 * i + 1], prev_block->v[2 * i + 16],
prev_block->v[2 * i + 17], prev_block->v[2 * i + 32], prev_block->v[2 * i + 33],
prev_block->v[2 * i + 48], prev_block->v[2 * i + 49], prev_block->v[2 * i + 64],
prev_block->v[2 * i + 65], prev_block->v[2 * i + 80], prev_block->v[2 * i + 81],
prev_block->v[2 * i + 96], prev_block->v[2 * i + 97], prev_block->v[2 * i + 112],
prev_block->v[2 * i + 113]);
}
xor_block(prev_block, &block_tmp);
if(keep)
copy_block(next_block, prev_block);
}
#endif
DLLEXPORT void *fill_memory_blocks(int threads, Argon2Profile *profile, void *user_data) {
void *memory = user_data;
#ifndef BUILD_REF
#if defined(__AVX512F__)
__m512i state[ARGON2_512BIT_WORDS_IN_BLOCK];
uint64_t buff_512[8];
#elif defined(__AVX2__)
__m256i state[ARGON2_HWORDS_IN_BLOCK];
uint64_t buff_256[4];
#elif defined(__x86_64__) || defined(_WIN64)
__m128i state[ARGON2_OWORDS_IN_BLOCK];
#elif defined(__NEON__)
uint64x2_t state[ARGON2_OWORDS_IN_BLOCK];
#endif
#else
block state_;
block *state = &state_;
#endif
int lane_length = profile->segSize * 4;
int seg_length = profile->segSize;
int suc_idx = profile->succesiveIdxs;
for(int thr = 0; thr < threads;thr++) {
block *ref_block = NULL, *curr_block = NULL;
int32_t ref_idx = 0;
int32_t cur_idx = 0;
int32_t prev_idx = 0;
int32_t seg_type = 0;
int32_t idx = 0;
int32_t keep = 1;
int32_t with_xor = 0;
block *blocks = (block *)((uint8_t*)memory + thr * profile->memSize);
int32_t *address = profile->blockRefs;
for(uint32_t s = 0; s < profile->segCount; s++) {
cur_idx = profile->segments[s * 3];
prev_idx = profile->segments[s * 3 + 1];
seg_type = profile->segments[s * 3 + 2];
keep = 1;
with_xor = (s >= profile->thrCost * 4) ? 1 : 0;
idx = (s < profile->thrCost) ? 2 : 0;
int32_t lane = s % profile->thrCost;
int32_t slice = (s / profile->thrCost) % 4;
int32_t pass = (s / profile->thrCost) / 4;
memcpy(state, (void *) (blocks + prev_idx), ARGON2_BLOCK_SIZE);
if(seg_type == 0) {
if(s < profile->thrCost)
address = &profile->blockRefs[(s * (profile->segSize - 2)) * 3];
else
address = &profile->blockRefs[(profile->thrCost * (profile->segSize - 2) + (s - profile->thrCost) * profile->segSize) * 3];
}
for (int i = idx; i < seg_length; ++i, cur_idx ++) {
if (seg_type == 1) { // data dependent addressing
#ifndef BUILD_REF
#if defined(__AVX512F__)
_mm512_storeu_si512(buff_512, state[0]);
uint64_t pseudo_rand = buff_512[0];
#elif defined(__AVX2__)
_mm256_storeu_si256(buff_256, state[0]);
uint64_t pseudo_rand = buff_256[0];
#elif defined(__x86_64__) || defined(_WIN64)
uint64_t pseudo_rand = _mm_cvtsi128_si64(state[0]);
#elif defined(__NEON__)
uint64_t pseudo_rand = 0;
vst1q_lane_u64(&pseudo_rand, state[0], 0);
#endif
#else
uint64_t pseudo_rand = state->v[0];
#endif
uint64_t ref_lane = ((pseudo_rand >> 32)) % profile->thrCost;
uint32_t reference_area_size = 0;
if(pass > 0) {
if (lane == ref_lane) {
reference_area_size = lane_length - seg_length + i - 1;
} else {
reference_area_size = lane_length - seg_length + ((i == 0) ? (-1) : 0);
}
}
else {
if (lane == ref_lane) {
reference_area_size = slice * seg_length + i - 1;
} else {
reference_area_size = slice * seg_length + ((i == 0) ? (-1) : 0);
}
}
uint64_t relative_position = pseudo_rand & 0xFFFFFFFF;
relative_position = relative_position * relative_position >> 32;
relative_position = reference_area_size - 1 -
(reference_area_size * relative_position >> 32);
ref_idx = ref_lane * lane_length + (((pass > 0 && slice < 3) ? ((slice + 1) * seg_length) : 0) + relative_position) % lane_length;
}
else {
ref_idx = address[1];
if(suc_idx == 0)
cur_idx = address[0];
keep = address[2];
address += 3;
}
ref_block = blocks + ref_idx;
curr_block = blocks + cur_idx;
fill_block(state, ref_block, curr_block, with_xor, keep);
}
}
uint32_t dst = -1;
for(; address < (profile->blockRefs + profile->blockRefsSize * 3); address += 3) {
if (address[2] == -1) {
curr_block = blocks + address[0];
ref_block = blocks + address[1];
dst = address[0];
xor_block(curr_block, ref_block);
}
}
if(dst != -1)
copy_block(blocks, blocks + dst);
else
copy_block(blocks, state);
}
return memory;
}

View file

@ -0,0 +1,4 @@
---
Language: Cpp
BasedOnStyle: Google
...

View file

@ -0,0 +1 @@
cmake_build/

View file

@ -0,0 +1,91 @@
language: c
sudo: false
cache:
directories:
- $HOME/cpu_features_archives
matrix:
include:
- os: linux
compiler: gcc
env:
TOOLCHAIN=NATIVE
TARGET=native
- os: linux
compiler: clang
env:
TOOLCHAIN=NATIVE
TARGET=native
- os: osx
compiler: gcc
env:
TOOLCHAIN=NATIVE
TARGET=native
- os: osx
compiler: clang
env:
TOOLCHAIN=NATIVE
TARGET=native
- os: linux-ppc64le
compiler: gcc
env:
TOOLCHAIN=NATIVE
TARGET=native
- os: linux-ppc64le
compiler: clang
env:
TOOLCHAIN=NATIVE
TARGET=native
# Toolchains for little-endian, 64-bit ARMv8 for GNU/Linux systems
- os: linux
env:
TOOLCHAIN=LINARO
TARGET=aarch64-linux-gnu
QEMU_ARCH=aarch64
# Toolchains for little-endian, hard-float, 32-bit ARMv7 (and earlier) for GNU/Linux systems
- os: linux
env:
TOOLCHAIN=LINARO
TARGET=arm-linux-gnueabihf
QEMU_ARCH=arm
# Toolchains for little-endian, 32-bit ARMv8 for GNU/Linux systems
- os: linux
env:
TOOLCHAIN=LINARO
TARGET=armv8l-linux-gnueabihf
QEMU_ARCH=arm
# Toolchains for little-endian, soft-float, 32-bit ARMv7 (and earlier) for GNU/Linux systems
- os: linux
env:
TOOLCHAIN=LINARO
TARGET=arm-linux-gnueabi
QEMU_ARCH=arm
# Toolchains for big-endian, 64-bit ARMv8 for GNU/Linux systems
- os: linux
env:
TOOLCHAIN=LINARO
TARGET=aarch64_be-linux-gnu
QEMU_ARCH=DISABLED
# Toolchains for big-endian, hard-float, 32-bit ARMv7 (and earlier) for GNU/Linux systems
- os: linux
env:
TOOLCHAIN=LINARO
TARGET=armeb-linux-gnueabihf
QEMU_ARCH=DISABLED
# Toolchains for big-endian, soft-float, 32-bit ARMv7 (and earlier) for GNU/Linux systems
- os: linux
env:
TOOLCHAIN=LINARO
TARGET=armeb-linux-gnueabi
QEMU_ARCH=DISABLED
- os: linux
env:
TOOLCHAIN=CODESCAPE
TARGET=mips-mti-linux-gnu
QEMU_ARCH=DISABLED
script:
- cmake --version
- bash -e -x ./scripts/run_integration.sh

View file

@ -0,0 +1,165 @@
cmake_minimum_required(VERSION 3.0)
project(CpuFeatures VERSION 0.1.0)
# Default Build Type to be Release
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "Release" CACHE STRING
"Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel."
FORCE)
endif(NOT CMAKE_BUILD_TYPE)
# BUILD_TESTING is a standard CMake variable, but we declare it here to make it
# prominent in the GUI.
option(BUILD_TESTING "Enable test (depends on googletest)." OFF)
# BUILD_SHARED_LIBS is a standard CMake variable, but we declare it here to make
# it prominent in the GUI.
option(BUILD_SHARED_LIBS "Build library as shared." OFF)
#
# library : cpu_features
#
set(_HDRS
include/cpuinfo_aarch64.h
include/cpuinfo_arm.h
include/cpuinfo_mips.h
include/cpuinfo_ppc.h
include/cpuinfo_x86.h
include/cpu_features_macros.h
)
add_library(cpu_features
${_HDRS}
include/internal/bit_utils.h
include/internal/linux_features_aggregator.h
include/internal/cpuid_x86.h
include/internal/filesystem.h
include/internal/hwcaps.h
include/internal/stack_line_reader.h
include/internal/string_view.h
include/cpu_features_macros.h
src/linux_features_aggregator.c
src/cpuid_x86_clang_gcc.c
src/cpuid_x86_msvc.c
src/cpuinfo_aarch64.c
src/cpuinfo_arm.c
src/cpuinfo_mips.c
src/cpuinfo_ppc.c
src/cpuinfo_x86.c
src/filesystem.c
src/hwcaps.c
src/stack_line_reader.c
src/string_view.c
)
target_include_directories(cpu_features
PUBLIC
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>
$<INSTALL_INTERFACE:include/cpu_features>
PRIVATE
include/internal
)
set_target_properties(cpu_features PROPERTIES PUBLIC_HEADER "${_HDRS}")
target_compile_definitions(cpu_features
PUBLIC STACK_LINE_READER_BUFFER_SIZE=1024)
target_link_libraries(cpu_features PUBLIC ${CMAKE_DL_LIBS})
# The use of shared libraries is discouraged.
# For API / ABI compatibility reasons, it is recommended to build and use
# cpu_features in a subdirectory of your project or as an embedded dependency.
if(BUILD_SHARED_LIBS)
set_property(TARGET cpu_features PROPERTY POSITION_INDEPENDENT_CODE ON)
endif()
add_library(CpuFeature::cpu_features ALIAS cpu_features)
#
# program : list_cpu_features
#
add_executable(list_cpu_features src/utils/list_cpu_features.c)
target_link_libraries(list_cpu_features PRIVATE cpu_features)
add_executable(CpuFeature::list_cpu_features ALIAS list_cpu_features)
#
# tests
#
include(CTest)
if(BUILD_TESTING)
# Automatically incorporate googletest into the CMake Project if target not
# found.
if(NOT TARGET gtest OR NOT TARGET gmock_main)
# Download and unpack googletest at configure time.
configure_file(
cmake/googletest.CMakeLists.txt.in
googletest-download/CMakeLists.txt
)
execute_process(
COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" .
RESULT_VARIABLE result
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/googletest-download)
if(result)
message(FATAL_ERROR "CMake step for googletest failed: ${result}")
endif()
execute_process(
COMMAND ${CMAKE_COMMAND} --build .
RESULT_VARIABLE result
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/googletest-download)
if(result)
message(FATAL_ERROR "Build step for googletest failed: ${result}")
endif()
# Prevent overriding the parent project's compiler/linker settings on
# Windows.
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
# Add googletest directly to our build. This defines the gtest and
# gtest_main targets.
add_subdirectory(${CMAKE_BINARY_DIR}/googletest-src
${CMAKE_BINARY_DIR}/googletest-build
EXCLUDE_FROM_ALL)
endif()
add_subdirectory(test)
endif()
#
# Install
#
include(GNUInstallDirs)
install(TARGETS cpu_features list_cpu_features
EXPORT CpuFeaturesTargets
PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/cpu_features
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
)
install(EXPORT CpuFeaturesTargets
NAMESPACE CpuFeatures::
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/CpuFeatures
COMPONENT Devel
)
include(CMakePackageConfigHelpers)
configure_package_config_file(cmake/CpuFeaturesConfig.cmake.in
"${PROJECT_BINARY_DIR}/CpuFeaturesConfig.cmake"
INSTALL_DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/CpuFeatures"
NO_SET_AND_CHECK_MACRO
NO_CHECK_REQUIRED_COMPONENTS_MACRO
)
write_basic_package_version_file(
"${PROJECT_BINARY_DIR}/CpuFeaturesConfigVersion.cmake"
COMPATIBILITY SameMajorVersion
)
install(
FILES
"${PROJECT_BINARY_DIR}/CpuFeaturesConfig.cmake"
"${PROJECT_BINARY_DIR}/CpuFeaturesConfigVersion.cmake"
DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/CpuFeatures"
COMPONENT Devel
)

View file

@ -0,0 +1,23 @@
# How to Contribute
We'd love to accept your patches and contributions to this project. There are
just a few small guidelines you need to follow.
## Contributor License Agreement
Contributions to this project must be accompanied by a Contributor License
Agreement. You (or your employer) retain the copyright to your contribution;
this simply gives us permission to use and redistribute your contributions as
part of the project. Head over to <https://cla.developers.google.com/> to see
your current agreements on file or to sign a new one.
You generally only need to submit a CLA once, so if you've already submitted one
(even if it was for a different project), you probably don't need to do it
again.
## Code reviews
All submissions, including submissions by project members, require review. We
use GitHub pull requests for this purpose. Consult
[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
information on using pull requests.

View file

@ -0,0 +1,202 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View file

@ -0,0 +1,165 @@
# cpu_features [![Build Status](https://travis-ci.org/google/cpu_features.svg?branch=master)](https://travis-ci.org/google/cpu_features) [![Build status](https://ci.appveyor.com/api/projects/status/46d1owsj7n8dsylq/branch/master?svg=true)](https://ci.appveyor.com/project/gchatelet/cpu-features/branch/master)
A cross-platform C library to retrieve CPU features (such as available
instructions) at runtime.
## Table of Contents
- [Design Rationale](#rationale)
- [Code samples](#codesample)
- [Running sample code](#usagesample)
- [What's supported](#support)
- [License](#license)
- [Build with cmake](#cmake)
<a name="rationale"></a>
## Design Rationale
- **Simple to use.** See the snippets below for examples.
- **Extensible.** Easy to add missing features or architectures.
- **Compatible with old compilers** and available on many architectures so it
can be used widely. To ensure that cpu_features works on as many platforms
as possible, we implemented it in a highly portable version of C: C99.
- **Sandbox-compatible.** The library uses a variety of strategies to cope
with sandboxed environments or when `cpuid` is unavailable. This is useful
when running integration tests in hermetic environments.
- **Thread safe, no memory allocation, and raises no exceptions.**
cpu_features is suitable for implementing fundamental libc functions like
`malloc`, `memcpy`, and `memcmp`.
- **Unit tested.**
<a name="codesample"></a>
### Checking features at runtime
Here's a simple example that executes a codepath if the CPU supports both the
AES and the SSE4.2 instruction sets:
```c
#include "cpuinfo_x86.h"
static const X86Features features = GetX86Info().features;
void Compute(void) {
if (features.aes && features.sse4_2) {
// Run optimized code.
} else {
// Run standard code.
}
}
```
### Caching for faster evaluation of complex checks
If you wish, you can read all the features at once into a global variable, and
then query for the specific features you care about. Below, we store all the ARM
features and then check whether AES and NEON are supported.
```c
#include <stdbool.h>
#include "cpuinfo_arm.h"
static const ArmFeatures features = GetArmInfo().features;
static const bool has_aes_and_neon = features.aes && features.neon;
// use has_aes_and_neon.
```
This is a good approach to take if you're checking for combinations of features
when using a compiler that is slow to extract individual bits from bit-packed
structures.
### Checking compile time flags
The following code determines whether the compiler was told to use the AVX
instruction set (e.g., `g++ -mavx`) and sets `has_avx` accordingly.
```c
#include <stdbool.h>
#include "cpuinfo_x86.h"
static const X86Features features = GetX86Info().features;
static const bool has_avx = CPU_FEATURES_COMPILED_X86_AVX || features.avx;
// use has_avx.
```
`CPU_FEATURES_COMPILED_X86_AVX` is set to 1 if the compiler was instructed to
use AVX and 0 otherwise, combining compile time and runtime knowledge.
### Rejecting poor hardware implementations based on microarchitecture
On x86, the first incarnation of a feature in a microarchitecture might not be
the most efficient (e.g. AVX on Sandy Bridge). We provide a function to retrieve
the underlying microarchitecture so you can decide whether to use it.
Below, `has_fast_avx` is set to 1 if the CPU supports the AVX instruction
set&mdash;but only if it's not Sandy Bridge.
```c
#include <stdbool.h>
#include "cpuinfo_x86.h"
static const X86Info info = GetX86Info();
static const X86Microarchitecture uarch = GetX86Microarchitecture(&info);
static const bool has_fast_avx = info.features.avx && uarch != INTEL_SNB;
// use has_fast_avx.
```
This feature is currently available only for x86 microarchitectures.
<a name="usagesample"></a>
### Running sample code
Building `cpu_features` brings a small executable to test the library.
```shell
% ./build/list_cpu_features
arch : x86
brand : Intel(R) Xeon(R) CPU E5-1650 0 @ 3.20GHz
family : 6 (0x06)
model : 45 (0x2D)
stepping : 7 (0x07)
uarch : INTEL_SNB
flags : aes,avx,cx16,smx,sse4_1,sse4_2,ssse3
```
```shell
% ./build/list_cpu_features --json
{"arch":"x86","brand":" Intel(R) Xeon(R) CPU E5-1650 0 @ 3.20GHz","family":6,"model":45,"stepping":7,"uarch":"INTEL_SNB","flags":["aes","avx","cx16","smx","sse4_1","sse4_2","ssse3"]}
```
<a name="support"></a>
## What's supported
| | x86³ | ARM | AArch64 | MIPSel | POWER |
|---------|:----:|:-------:|:-------:|:------:|:-------:|
| Android | yes² | yes¹ | yes¹ | yes¹ | N/A |
| iOS | N/A | not yet | not yet | N/A | N/A |
| Linux | yes² | yes¹ | yes¹ | yes¹ | yes¹ |
| MacOs | yes² | N/A | not yet | N/A | no |
| Windows | yes² | not yet | not yet | N/A | N/A |
1. **Features revealed from Linux.** We gather data from several sources
depending on availability:
+ from glibc's
[getauxval](https://www.gnu.org/software/libc/manual/html_node/Auxiliary-Vector.html)
+ by parsing `/proc/self/auxv`
+ by parsing `/proc/cpuinfo`
2. **Features revealed from CPU.** features are retrieved by using the `cpuid`
instruction.
3. **Microarchitecture detection.** On x86 some features are not always
implemented efficiently in hardware (e.g. AVX on Sandybridge). Exposing the
microarchitecture allows the client to reject particular microarchitectures.
<a name="license"></a>
## License
The cpu_features library is licensed under the terms of the Apache license.
See [LICENSE](LICENSE) for more information.
<a name="cmake"></a>
## Build with CMake
Please check the [CMake build instructions](cmake/README.md).

View file

@ -0,0 +1,7 @@
# ===== googletest =====
git_repository(
name = "com_google_googletest",
remote = "https://github.com/google/googletest.git",
commit = "c3f65335b79f47b05629e79a54685d899bc53b93",
)

View file

@ -0,0 +1,24 @@
version: '{build}'
shallow_clone: true
platform: x64
environment:
matrix:
- APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017
CMAKE_GENERATOR: "Visual Studio 15 2017 Win64"
- APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2015
CMAKE_GENERATOR: "Visual Studio 14 2015 Win64"
matrix:
fast_finish: true
before_build:
- cmake --version
- cmake -DCMAKE_BUILD_TYPE=Debug -DBUILD_TESTING=ON -H. -Bcmake_build -G "%CMAKE_GENERATOR%"
build_script:
- cmake --build cmake_build --config Debug --target ALL_BUILD
test_script:
- cmake --build cmake_build --config Debug --target RUN_TESTS

View file

@ -0,0 +1,3 @@
# CpuFeatures CMake configuration file
include("${CMAKE_CURRENT_LIST_DIR}/CpuFeaturesTargets.cmake")

View file

@ -0,0 +1,28 @@
# CMake build instructions
## Recommended usage : Incorporating cpu_features into a CMake project
For API / ABI compatibility reasons, it is recommended to build and use
cpu_features in a subdirectory of your project or as an embedded dependency.
This is similar to the recommended usage of the googletest framework
( https://github.com/google/googletest/blob/master/googletest/README.md )
Build and use step-by-step
1- Download cpu_features and copy it in a sub-directory in your project.
or add cpu_features as a git-submodule in your project
2- You can then use the cmake command `add_subdirectory()` to include
cpu_features directly and use the `cpu_features` target in your project.
3- Add the `cpu_features` target to the `target_link_libraries()` section of
your executable or of your library.
## Enabling tests
CMake default options for cpu_features is Release built type with tests
disabled. To enable testing set cmake `BUILD_TESTING` variable to `ON`,
[.travis.yml](../.travis.yml) and [appveyor.yml](../appveyor.yml) have up to
date examples.

View file

@ -0,0 +1,15 @@
cmake_minimum_required(VERSION 2.8.2)
project(googletest-download NONE)
include(ExternalProject)
ExternalProject_Add(googletest
GIT_REPOSITORY https://github.com/google/googletest.git
GIT_TAG master
SOURCE_DIR "${CMAKE_BINARY_DIR}/googletest-src"
BINARY_DIR "${CMAKE_BINARY_DIR}/googletest-build"
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
TEST_COMMAND ""
)

View file

@ -0,0 +1,34 @@
set(CMAKE_SYSTEM_NAME "Linux")
set(CMAKE_SYSTEM_PROCESSOR "mips32")
if (ENABLE_DSPR2 AND ENABLE_MSA)
message(FATAL_ERROR "ENABLE_DSPR2 and ENABLE_MSA cannot be combined.")
endif ()
if (ENABLE_DSPR2)
set(HAVE_DSPR2 1 CACHE BOOL "" FORCE)
set(MIPS_CFLAGS "-mdspr2")
set(MIPS_CXXFLAGS "-mdspr2")
elseif (ENABLE_MSA)
set(HAVE_MSA 1 CACHE BOOL "" FORCE)
set(MIPS_CFLAGS "-mmsa")
set(MIPS_CXXFLAGS "-mmsa")
endif ()
if ("${MIPS_CPU}" STREQUAL "")
set(MIPS_CFLAGS "${MIPS_CFLAGS} -mips32r2")
set(MIPS_CXXFLAGS "${MIPS_CXXFLAGS} -mips32r2")
elseif ("${MIPS_CPU}" STREQUAL "p5600")
set(P56_FLAGS "-mips32r5 -mload-store-pairs -msched-weight -mhard-float -mfp64")
set(MIPS_CFLAGS "${MIPS_CFLAGS} ${P56_FLAGS}")
set(MIPS_CXXFLAGS "${MIPS_CXXFLAGS} ${P56_FLAGS}")
set(CMAKE_EXE_LINKER_FLAGS "-mfp64 ${CMAKE_EXE_LINKER_FLAGS}")
endif ()
set(CMAKE_C_COMPILER ${CROSS}gcc)
set(CMAKE_CXX_COMPILER ${CROSS}g++)
set(AS_EXECUTABLE ${CROSS}as)
set(CMAKE_C_COMPILER_ARG1 "-EL ${MIPS_CFLAGS}")
set(CMAKE_CXX_COMPILER_ARG1 "-EL ${MIPS_CXXFLAGS}")
set(THREADS_PTHREAD_ARG "2" CACHE STRING "Forcibly set by CMakeLists.txt." FORCE)

View file

@ -0,0 +1,125 @@
// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef CPU_FEATURES_INCLUDE_CPU_FEATURES_MACROS_H_
#define CPU_FEATURES_INCLUDE_CPU_FEATURES_MACROS_H_
////////////////////////////////////////////////////////////////////////////////
// Architectures
////////////////////////////////////////////////////////////////////////////////
#if ((defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || \
defined(__x86_64__)) && \
!defined(__pnacl__) && !defined(__CLR_VER))
#define CPU_FEATURES_ARCH_X86
#endif
#if (defined(__arm__) || defined(_M_ARM))
#define CPU_FEATURES_ARCH_ARM
#endif
#if defined(__aarch64__)
#define CPU_FEATURES_ARCH_AARCH64
#endif
#if (defined(CPU_FEATURES_ARCH_AARCH64) || defined(CPU_FEATURES_ARCH_ARM))
#define CPU_FEATURES_ARCH_ANY_ARM
#endif
#if defined(__mips__)
#define CPU_FEATURES_ARCH_MIPS
#endif
#if defined(__powerpc__)
#define CPU_FEATURES_ARCH_PPC
#endif
////////////////////////////////////////////////////////////////////////////////
// Os
////////////////////////////////////////////////////////////////////////////////
#if defined(__linux__)
#define CPU_FEATURES_OS_LINUX_OR_ANDROID
#endif
#if defined(__ANDROID__)
#define CPU_FEATURES_OS_ANDROID
#endif
#if (defined(_WIN64) || defined(_WIN32))
#define CPU_FEATURES_OS_WINDOWS
#endif
////////////////////////////////////////////////////////////////////////////////
// Compilers
////////////////////////////////////////////////////////////////////////////////
#if defined(__clang__)
#define CPU_FEATURES_COMPILER_CLANG
#endif
#if defined(__GNUC__) && !defined(__clang__)
#define CPU_FEATURES_COMPILER_GCC
#endif
#if defined(_MSC_VER)
#define CPU_FEATURES_COMPILER_MSC
#endif
////////////////////////////////////////////////////////////////////////////////
// Cpp
////////////////////////////////////////////////////////////////////////////////
#if defined(__cplusplus)
#define CPU_FEATURES_START_CPP_NAMESPACE \
namespace cpu_features { \
extern "C" {
#define CPU_FEATURES_END_CPP_NAMESPACE \
} \
}
#else
#define CPU_FEATURES_START_CPP_NAMESPACE
#define CPU_FEATURES_END_CPP_NAMESPACE
#endif
////////////////////////////////////////////////////////////////////////////////
// Compiler flags
////////////////////////////////////////////////////////////////////////////////
// Use the following to check if a feature is known to be available at compile
// time. See README.md for an example.
#if defined(CPU_FEATURES_ARCH_X86)
#define CPU_FEATURES_COMPILED_X86_AES defined(__AES__)
#define CPU_FEATURES_COMPILED_X86_F16C defined(__F16C__)
#define CPU_FEATURES_COMPILED_X86_BMI defined(__BMI__)
#define CPU_FEATURES_COMPILED_X86_BMI2 defined(__BMI2__)
#define CPU_FEATURES_COMPILED_X86_SSE (defined(__SSE__) || (_M_IX86_FP >= 1))
#define CPU_FEATURES_COMPILED_X86_SSE2 (defined(__SSE2__) || (_M_IX86_FP >= 2))
#define CPU_FEATURES_COMPILED_X86_SSE3 defined(__SSE3__)
#define CPU_FEATURES_COMPILED_X86_SSSE3 defined(__SSSE3__)
#define CPU_FEATURES_COMPILED_X86_SSE4_1 defined(__SSE4_1__)
#define CPU_FEATURES_COMPILED_X86_SSE4_2 defined(__SSE4_2__)
#define CPU_FEATURES_COMPILED_X86_AVX defined(__AVX__)
#define CPU_FEATURES_COMPILED_x86_AVX2 defined(__AVX2__)
#endif
#if defined(CPU_FEATURES_ARCH_ANY_ARM)
#define CPU_FEATURES_COMPILED_ANY_ARM_NEON defined(__ARM_NEON__)
#endif
#if defined(CPU_FEATURES_ARCH_MIPS)
#define CPU_FEATURES_COMPILED_MIPS_MSA defined(__mips_msa)
#endif
#endif // CPU_FEATURES_INCLUDE_CPU_FEATURES_MACROS_H_

View file

@ -0,0 +1,65 @@
// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef CPU_FEATURES_INCLUDE_CPUINFO_AARCH64_H_
#define CPU_FEATURES_INCLUDE_CPUINFO_AARCH64_H_
#include "cpu_features_macros.h"
CPU_FEATURES_START_CPP_NAMESPACE
typedef struct {
int fp : 1; // Floating-point.
int asimd : 1; // Advanced SIMD.
int aes : 1; // Hardware-accelerated Advanced Encryption Standard.
int pmull : 1; // Polynomial multiply long.
int sha1 : 1; // Hardware-accelerated SHA1.
int sha2 : 1; // Hardware-accelerated SHA2-256.
int crc32 : 1; // Hardware-accelerated CRC-32.
// Make sure to update Aarch64FeaturesEnum below if you add a field here.
} Aarch64Features;
typedef struct {
Aarch64Features features;
int implementer;
int variant;
int part;
int revision;
} Aarch64Info;
Aarch64Info GetAarch64Info(void);
////////////////////////////////////////////////////////////////////////////////
// Introspection functions
typedef enum {
AARCH64_FP,
AARCH64_ASIMD,
AARCH64_AES,
AARCH64_PMULL,
AARCH64_SHA1,
AARCH64_SHA2,
AARCH64_CRC32,
AARCH64_LAST_,
} Aarch64FeaturesEnum;
int GetAarch64FeaturesEnumValue(const Aarch64Features* features,
Aarch64FeaturesEnum value);
const char* GetAarch64FeaturesEnumName(Aarch64FeaturesEnum);
CPU_FEATURES_END_CPP_NAMESPACE
#endif // CPU_FEATURES_INCLUDE_CPUINFO_AARCH64_H_

View file

@ -0,0 +1,80 @@
// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef CPU_FEATURES_INCLUDE_CPUINFO_ARM_H_
#define CPU_FEATURES_INCLUDE_CPUINFO_ARM_H_
#include "cpu_features_macros.h"
CPU_FEATURES_START_CPP_NAMESPACE
typedef struct {
int vfp : 1; // Vector Floating Point.
int iwmmxt : 1; // Intel Wireless MMX Technology.
int neon : 1; // Advanced SIMD.
int vfpv3 : 1; // VFP version 3
int vfpv3d16 : 1; // VFP version 3 with 16 D-registers
int vfpv4 : 1; // VFP version 4 with fast context switching
int idiva : 1; // SDIV and UDIV hardware division in ARM mode.
int idivt : 1; // SDIV and UDIV hardware division in Thumb mode.
int aes : 1; // Hardware-accelerated Advanced Encryption Standard.
int pmull : 1; // Polynomial multiply long.
int sha1 : 1; // Hardware-accelerated SHA1.
int sha2 : 1; // Hardware-accelerated SHA2-256.
int crc32 : 1; // Hardware-accelerated CRC-32.
// Make sure to update ArmFeaturesEnum below if you add a field here.
} ArmFeatures;
typedef struct {
ArmFeatures features;
int implementer;
int architecture;
int variant;
int part;
int revision;
} ArmInfo;
// TODO(user): Add macros to know which features are present at compile
// time.
ArmInfo GetArmInfo(void);
////////////////////////////////////////////////////////////////////////////////
// Introspection functions
typedef enum {
ARM_VFP,
ARM_IWMMXT,
ARM_NEON,
ARM_VFPV3,
ARM_VFPV3D16,
ARM_VFPV4,
ARM_IDIVA,
ARM_IDIVT,
ARM_AES,
ARM_PMULL,
ARM_SHA1,
ARM_SHA2,
ARM_CRC32,
ARM_LAST_,
} ArmFeaturesEnum;
int GetArmFeaturesEnumValue(const ArmFeatures* features, ArmFeaturesEnum value);
const char* GetArmFeaturesEnumName(ArmFeaturesEnum);
CPU_FEATURES_END_CPP_NAMESPACE
#endif // CPU_FEATURES_INCLUDE_CPUINFO_ARM_H_

View file

@ -0,0 +1,53 @@
// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef CPU_FEATURES_INCLUDE_CPUINFO_MIPS_H_
#define CPU_FEATURES_INCLUDE_CPUINFO_MIPS_H_
#include "cpu_features_macros.h"
CPU_FEATURES_START_CPP_NAMESPACE
typedef struct {
int msa : 1; // MIPS SIMD Architecture
// https://www.mips.com/products/architectures/ase/simd/
int eva : 1; // Enhanced Virtual Addressing
// https://www.mips.com/products/architectures/mips64/
// Make sure to update MipsFeaturesEnum below if you add a field here.
} MipsFeatures;
typedef struct {
MipsFeatures features;
} MipsInfo;
MipsInfo GetMipsInfo(void);
////////////////////////////////////////////////////////////////////////////////
// Introspection functions
typedef enum {
MIPS_MSA,
MIPS_EVA,
MIPS_LAST_,
} MipsFeaturesEnum;
int GetMipsFeaturesEnumValue(const MipsFeatures* features,
MipsFeaturesEnum value);
const char* GetMipsFeaturesEnumName(MipsFeaturesEnum);
CPU_FEATURES_END_CPP_NAMESPACE
#endif // CPU_FEATURES_INCLUDE_CPUINFO_MIPS_H_

View file

@ -0,0 +1,141 @@
// Copyright 2018 IBM
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef CPU_FEATURES_INCLUDE_CPUINFO_PPC_H_
#define CPU_FEATURES_INCLUDE_CPUINFO_PPC_H_
#include "cpu_features_macros.h"
#include "internal/hwcaps.h"
CPU_FEATURES_START_CPP_NAMESPACE
typedef struct {
int ppc32 : 1;
int ppc64 : 1;
int ppc601 : 1;
int altivec : 1;
int fpu : 1;
int mmu : 1;
int mac_4xx : 1;
int unifiedcache : 1;
int spe : 1;
int efpsingle : 1;
int efpdouble : 1;
int no_tb : 1;
int power4 : 1;
int power5 : 1;
int power5plus : 1;
int cell : 1;
int booke : 1;
int smt : 1;
int icachesnoop : 1;
int arch205 : 1;
int pa6t : 1;
int dfp : 1;
int power6ext : 1;
int arch206 : 1;
int vsx : 1;
int pseries_perfmon_compat : 1;
int truele : 1;
int ppcle : 1;
int arch207 : 1;
int htm : 1;
int dscr : 1;
int ebb : 1;
int isel : 1;
int tar : 1;
int vcrypto : 1;
int htm_nosc : 1;
int arch300 : 1;
int ieee128 : 1;
int darn : 1;
int scv : 1;
int htm_no_suspend : 1;
// Make sure to update PPCFeaturesEnum below if you add a field here.
} PPCFeatures;
typedef struct {
PPCFeatures features;
} PPCInfo;
// This function is guaranteed to be malloc, memset and memcpy free.
PPCInfo GetPPCInfo(void);
typedef struct {
char platform[64]; // 0 terminated string
char model[64]; // 0 terminated string
char machine[64]; // 0 terminated string
char cpu[64]; // 0 terminated string
PlatformType type;
} PPCPlatformStrings;
PPCPlatformStrings GetPPCPlatformStrings(void);
////////////////////////////////////////////////////////////////////////////////
// Introspection functions
typedef enum {
PPC_32, /* 32 bit mode execution */
PPC_64, /* 64 bit mode execution */
PPC_601_INSTR, /* Old POWER ISA */
PPC_HAS_ALTIVEC, /* SIMD Unit*/
PPC_HAS_FPU, /* Floating Point Unit */
PPC_HAS_MMU, /* Memory management unit */
PPC_HAS_4xxMAC,
PPC_UNIFIED_CACHE, /* Unified instruction and data cache */
PPC_HAS_SPE, /* Signal processing extention unit */
PPC_HAS_EFP_SINGLE, /* SPE single precision fpu */
PPC_HAS_EFP_DOUBLE, /* SPE double precision fpu */
PPC_NO_TB, /* No timebase */
PPC_POWER4,
PPC_POWER5,
PPC_POWER5_PLUS,
PPC_CELL, /* Cell broadband engine */
PPC_BOOKE, /* Embedded ISA */
PPC_SMT, /* Simultaneous multi-threading */
PPC_ICACHE_SNOOP,
PPC_ARCH_2_05, /* ISA 2.05 - POWER6 */
PPC_PA6T, /* PA Semi 6T core ISA */
PPC_HAS_DFP, /* Decimal floating point unit */
PPC_POWER6_EXT,
PPC_ARCH_2_06, /* ISA 2.06 - POWER7 */
PPC_HAS_VSX, /* Vector-scalar extension */
PPC_PSERIES_PERFMON_COMPAT, /* Set of backwards compatibile performance
monitoring events */
PPC_TRUE_LE,
PPC_PPC_LE,
PPC_ARCH_2_07, /* ISA 2.07 - POWER8 */
PPC_HTM, /* Hardware Transactional Memory */
PPC_DSCR, /* Data stream control register */
PPC_EBB, /* Event base branching */
PPC_ISEL, /* Integer select instructions */
PPC_TAR, /* Target address register */
PPC_VEC_CRYPTO, /* Vector cryptography instructions */
PPC_HTM_NOSC, /* Transactions aborted when syscall made*/
PPC_ARCH_3_00, /* ISA 3.00 - POWER9 */
PPC_HAS_IEEE128, /* VSX IEEE Binary Float 128-bit */
PPC_DARN, /* Deliver a random number instruction */
PPC_SCV, /* scv syscall */
PPC_HTM_NO_SUSPEND, /* TM w/out suspended state */
PPC_LAST_,
} PPCFeaturesEnum;
int GetPPCFeaturesEnumValue(const PPCFeatures* features, PPCFeaturesEnum value);
const char* GetPPCFeaturesEnumName(PPCFeaturesEnum);
CPU_FEATURES_END_CPP_NAMESPACE
#endif // CPU_FEATURES_INCLUDE_CPUINFO_PPC_H_

View file

@ -0,0 +1,154 @@
// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef CPU_FEATURES_INCLUDE_CPUINFO_X86_H_
#define CPU_FEATURES_INCLUDE_CPUINFO_X86_H_
#include "cpu_features_macros.h"
CPU_FEATURES_START_CPP_NAMESPACE
// See https://en.wikipedia.org/wiki/CPUID for a list of x86 cpu features.
typedef struct {
int aes : 1;
int erms : 1;
int f16c : 1;
int fma3 : 1;
int vpclmulqdq : 1;
int bmi1 : 1;
int bmi2 : 1;
int ssse3 : 1;
int sse4_1 : 1;
int sse4_2 : 1;
int avx : 1;
int avx2 : 1;
int avx512f : 1;
int avx512cd : 1;
int avx512er : 1;
int avx512pf : 1;
int avx512bw : 1;
int avx512dq : 1;
int avx512vl : 1;
int avx512ifma : 1;
int avx512vbmi : 1;
int avx512vbmi2 : 1;
int avx512vnni : 1;
int avx512bitalg : 1;
int avx512vpopcntdq : 1;
int avx512_4vnniw : 1;
int avx512_4vbmi2 : 1;
int smx : 1;
int sgx : 1;
int cx16 : 1; // aka. CMPXCHG16B
// Make sure to update X86FeaturesEnum below if you add a field here.
} X86Features;
typedef struct {
X86Features features;
int family;
int model;
int stepping;
char vendor[13]; // 0 terminated string
} X86Info;
// Calls cpuid and returns an initialized X86info.
// This function is guaranteed to be malloc, memset and memcpy free.
X86Info GetX86Info(void);
typedef enum {
X86_UNKNOWN,
INTEL_CORE, // CORE
INTEL_PNR, // PENRYN
INTEL_NHM, // NEHALEM
INTEL_ATOM_BNL, // BONNELL
INTEL_WSM, // WESTMERE
INTEL_SNB, // SANDYBRIDGE
INTEL_IVB, // IVYBRIDGE
INTEL_ATOM_SMT, // SILVERMONT
INTEL_HSW, // HASWELL
INTEL_BDW, // BROADWELL
INTEL_SKL, // SKYLAKE
INTEL_ATOM_GMT, // GOLDMONT
INTEL_KBL, // KABY LAKE
INTEL_CFL, // COFFEE LAKE
INTEL_CNL, // CANNON LAKE
AMD_HAMMER, // K8
AMD_K10, // K10
AMD_BOBCAT, // K14
AMD_BULLDOZER, // K15
AMD_JAGUAR, // K16
AMD_ZEN, // K17
} X86Microarchitecture;
// Returns the underlying microarchitecture by looking at X86Info's vendor,
// family and model.
X86Microarchitecture GetX86Microarchitecture(const X86Info* info);
// Calls cpuid and fills the brand_string.
// - brand_string *must* be of size 49 (beware of array decaying).
// - brand_string will be zero terminated.
// - This function calls memcpy.
void FillX86BrandString(char brand_string[49]);
////////////////////////////////////////////////////////////////////////////////
// Introspection functions
typedef enum {
X86_AES,
X86_ERMS,
X86_F16C,
X86_FMA3,
X86_VPCLMULQDQ,
X86_BMI1,
X86_BMI2,
X86_SSSE3,
X86_SSE4_1,
X86_SSE4_2,
X86_AVX,
X86_AVX2,
X86_AVX512F,
X86_AVX512CD,
X86_AVX512ER,
X86_AVX512PF,
X86_AVX512BW,
X86_AVX512DQ,
X86_AVX512VL,
X86_AVX512IFMA,
X86_AVX512VBMI,
X86_AVX512VBMI2,
X86_AVX512VNNI,
X86_AVX512BITALG,
X86_AVX512VPOPCNTDQ,
X86_AVX512_4VNNIW,
X86_AVX512_4VBMI2,
X86_SMX,
X86_SGX,
X86_CX16,
X86_LAST_,
} X86FeaturesEnum;
int GetX86FeaturesEnumValue(const X86Features* features, X86FeaturesEnum value);
const char* GetX86FeaturesEnumName(X86FeaturesEnum);
const char* GetX86MicroarchitectureName(X86Microarchitecture);
CPU_FEATURES_END_CPP_NAMESPACE
#endif // CPU_FEATURES_INCLUDE_CPUINFO_X86_H_

View file

@ -0,0 +1,39 @@
// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef CPU_FEATURES_INCLUDE_INTERNAL_BIT_UTILS_H_
#define CPU_FEATURES_INCLUDE_INTERNAL_BIT_UTILS_H_
#include <assert.h>
#include <stdbool.h>
#include <stdint.h>
#include "cpu_features_macros.h"
CPU_FEATURES_START_CPP_NAMESPACE
inline static bool IsBitSet(uint32_t reg, uint32_t bit) {
return (reg >> bit) & 0x1;
}
inline static uint32_t ExtractBitRange(uint32_t reg, uint32_t msb,
uint32_t lsb) {
const uint64_t bits = msb - lsb + 1;
const uint64_t mask = (1ULL << bits) - 1ULL;
assert(msb >= lsb);
return (reg >> lsb) & mask;
}
CPU_FEATURES_END_CPP_NAMESPACE
#endif // CPU_FEATURES_INCLUDE_INTERNAL_BIT_UTILS_H_

View file

@ -0,0 +1,37 @@
// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef CPU_FEATURES_INCLUDE_INTERNAL_CPUID_X86_H_
#define CPU_FEATURES_INCLUDE_INTERNAL_CPUID_X86_H_
#include <stdint.h>
#include "cpu_features_macros.h"
CPU_FEATURES_START_CPP_NAMESPACE
// A struct to hold the result of a call to cpuid.
typedef struct {
uint32_t eax, ebx, ecx, edx;
} Leaf;
// Retrieves the leaf for a particular cpuid.
Leaf CpuId(uint32_t leaf_id);
// Returns the eax value of the XCR0 register.
uint32_t GetXCR0Eax(void);
CPU_FEATURES_END_CPP_NAMESPACE
#endif // CPU_FEATURES_INCLUDE_INTERNAL_CPUID_X86_H_

View file

@ -0,0 +1,38 @@
// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// An interface for the filesystem that allows mocking the filesystem in
// unittests.
#ifndef CPU_FEATURES_INCLUDE_INTERNAL_FILESYSTEM_H_
#define CPU_FEATURES_INCLUDE_INTERNAL_FILESYSTEM_H_
#include <stddef.h>
#include <stdint.h>
#include "cpu_features_macros.h"
CPU_FEATURES_START_CPP_NAMESPACE
// Same as linux "open(filename, O_RDONLY)", retries automatically on EINTR.
int CpuFeatures_OpenFile(const char* filename);
// Same as linux "read(file_descriptor, buffer, buffer_size)", retries
// automatically on EINTR.
int CpuFeatures_ReadFile(int file_descriptor, void* buffer, size_t buffer_size);
// Same as linux "close(file_descriptor)".
void CpuFeatures_CloseFile(int file_descriptor);
CPU_FEATURES_END_CPP_NAMESPACE
#endif // CPU_FEATURES_INCLUDE_INTERNAL_FILESYSTEM_H_

View file

@ -0,0 +1,131 @@
// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Interface to retrieve hardware capabilities. It relies on Linux's getauxval
// or `/proc/self/auxval` under the hood.
#ifndef CPU_FEATURES_INCLUDE_INTERNAL_HWCAPS_H_
#define CPU_FEATURES_INCLUDE_INTERNAL_HWCAPS_H_
#include <stdint.h>
#include "cpu_features_macros.h"
CPU_FEATURES_START_CPP_NAMESPACE
// To avoid depending on the linux kernel we reproduce the architecture specific
// constants here.
// http://elixir.free-electrons.com/linux/latest/source/arch/arm64/include/uapi/asm/hwcap.h
#define AARCH64_HWCAP_FP (1UL << 0)
#define AARCH64_HWCAP_ASIMD (1UL << 1)
#define AARCH64_HWCAP_AES (1UL << 3)
#define AARCH64_HWCAP_PMULL (1UL << 4)
#define AARCH64_HWCAP_SHA1 (1UL << 5)
#define AARCH64_HWCAP_SHA2 (1UL << 6)
#define AARCH64_HWCAP_CRC32 (1UL << 7)
// http://elixir.free-electrons.com/linux/latest/source/arch/arm/include/uapi/asm/hwcap.h
#define ARM_HWCAP_VFP (1UL << 6)
#define ARM_HWCAP_IWMMXT (1UL << 9)
#define ARM_HWCAP_NEON (1UL << 12)
#define ARM_HWCAP_VFPV3 (1UL << 13)
#define ARM_HWCAP_VFPV3D16 (1UL << 14)
#define ARM_HWCAP_VFPV4 (1UL << 16)
#define ARM_HWCAP_IDIVA (1UL << 17)
#define ARM_HWCAP_IDIVT (1UL << 18)
#define ARM_HWCAP2_AES (1UL << 0)
#define ARM_HWCAP2_PMULL (1UL << 1)
#define ARM_HWCAP2_SHA1 (1UL << 2)
#define ARM_HWCAP2_SHA2 (1UL << 3)
#define ARM_HWCAP2_CRC32 (1UL << 4)
// http://elixir.free-electrons.com/linux/latest/source/arch/mips/include/uapi/asm/hwcap.h
#define MIPS_HWCAP_VZ (1UL << 0)
#define MIPS_HWCAP_EVA (1UL << 1)
#define MIPS_HWCAP_HTW (1UL << 2)
#define MIPS_HWCAP_FPU (1UL << 3)
#define MIPS_HWCAP_MIPS32R2 (1UL << 4)
#define MIPS_HWCAP_MIPS32R5 (1UL << 5)
#define MIPS_HWCAP_MIPS64R6 (1UL << 6)
#define MIPS_HWCAP_DSPR1 (1UL << 7)
#define MIPS_HWCAP_DSPR2 (1UL << 8)
#define MIPS_HWCAP_MSA (1UL << 9)
// http://elixir.free-electrons.com/linux/latest/source/arch/powerpc/include/uapi/asm/cputable.h
#ifndef _UAPI__ASM_POWERPC_CPUTABLE_H
/* in AT_HWCAP */
#define PPC_FEATURE_32 0x80000000
#define PPC_FEATURE_64 0x40000000
#define PPC_FEATURE_601_INSTR 0x20000000
#define PPC_FEATURE_HAS_ALTIVEC 0x10000000
#define PPC_FEATURE_HAS_FPU 0x08000000
#define PPC_FEATURE_HAS_MMU 0x04000000
#define PPC_FEATURE_HAS_4xxMAC 0x02000000
#define PPC_FEATURE_UNIFIED_CACHE 0x01000000
#define PPC_FEATURE_HAS_SPE 0x00800000
#define PPC_FEATURE_HAS_EFP_SINGLE 0x00400000
#define PPC_FEATURE_HAS_EFP_DOUBLE 0x00200000
#define PPC_FEATURE_NO_TB 0x00100000
#define PPC_FEATURE_POWER4 0x00080000
#define PPC_FEATURE_POWER5 0x00040000
#define PPC_FEATURE_POWER5_PLUS 0x00020000
#define PPC_FEATURE_CELL 0x00010000
#define PPC_FEATURE_BOOKE 0x00008000
#define PPC_FEATURE_SMT 0x00004000
#define PPC_FEATURE_ICACHE_SNOOP 0x00002000
#define PPC_FEATURE_ARCH_2_05 0x00001000
#define PPC_FEATURE_PA6T 0x00000800
#define PPC_FEATURE_HAS_DFP 0x00000400
#define PPC_FEATURE_POWER6_EXT 0x00000200
#define PPC_FEATURE_ARCH_2_06 0x00000100
#define PPC_FEATURE_HAS_VSX 0x00000080
#define PPC_FEATURE_PSERIES_PERFMON_COMPAT 0x00000040
/* Reserved - do not use 0x00000004 */
#define PPC_FEATURE_TRUE_LE 0x00000002
#define PPC_FEATURE_PPC_LE 0x00000001
/* in AT_HWCAP2 */
#define PPC_FEATURE2_ARCH_2_07 0x80000000
#define PPC_FEATURE2_HTM 0x40000000
#define PPC_FEATURE2_DSCR 0x20000000
#define PPC_FEATURE2_EBB 0x10000000
#define PPC_FEATURE2_ISEL 0x08000000
#define PPC_FEATURE2_TAR 0x04000000
#define PPC_FEATURE2_VEC_CRYPTO 0x02000000
#define PPC_FEATURE2_HTM_NOSC 0x01000000
#define PPC_FEATURE2_ARCH_3_00 0x00800000
#define PPC_FEATURE2_HAS_IEEE128 0x00400000
#define PPC_FEATURE2_DARN 0x00200000
#define PPC_FEATURE2_SCV 0x00100000
#define PPC_FEATURE2_HTM_NO_SUSPEND 0x00080000
#endif
typedef struct {
unsigned long hwcaps;
unsigned long hwcaps2;
} HardwareCapabilities;
HardwareCapabilities CpuFeatures_GetHardwareCapabilities(void);
typedef struct {
char platform[64]; // 0 terminated string
char base_platform[64]; // 0 terminated string
} PlatformType;
PlatformType CpuFeatures_GetPlatformType(void);
CPU_FEATURES_END_CPP_NAMESPACE
#endif // CPU_FEATURES_INCLUDE_INTERNAL_HWCAPS_H_

View file

@ -0,0 +1,60 @@
// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// CapabilityConfig provides a way to map cpu features to hardware caps and
// /proc/cpuinfo flags. We then provide functions to update capabilities from
// either source.
#ifndef CPU_FEATURES_INCLUDE_INTERNAL_LINUX_FEATURES_AGGREGATOR_H_
#define CPU_FEATURES_INCLUDE_INTERNAL_LINUX_FEATURES_AGGREGATOR_H_
#include <ctype.h>
#include <stdint.h>
#include "cpu_features_macros.h"
#include "internal/hwcaps.h"
#include "internal/string_view.h"
CPU_FEATURES_START_CPP_NAMESPACE
// Use the following macro to declare setter functions to be used in
// CapabilityConfig.
#define DECLARE_SETTER(FeatureType, FeatureName) \
static void set_##FeatureName(void* const features, bool value) { \
((FeatureType*)features)->FeatureName = value; \
}
// Describes the relationship between hardware caps and /proc/cpuinfo flags.
typedef struct {
const HardwareCapabilities hwcaps_mask;
const char* const proc_cpuinfo_flag;
void (*set_bit)(void* const, bool); // setter for the corresponding bit.
} CapabilityConfig;
// For every config, looks into flags_line for the presence of the
// corresponding proc_cpuinfo_flag, calls `set_bit` accordingly.
// Note: features is a pointer to the underlying Feature struct.
void CpuFeatures_SetFromFlags(const size_t configs_size,
const CapabilityConfig* configs,
const StringView flags_line,
void* const features);
// For every config, looks into hwcaps for the presence of the feature. Calls
// `set_bit` with true if the hardware capability is found.
// Note: features is a pointer to the underlying Feature struct.
void CpuFeatures_OverrideFromHwCaps(const size_t configs_size,
const CapabilityConfig* configs,
const HardwareCapabilities hwcaps,
void* const features);
CPU_FEATURES_END_CPP_NAMESPACE
#endif // CPU_FEATURES_INCLUDE_INTERNAL_LINUX_FEATURES_AGGREGATOR_H_

View file

@ -0,0 +1,49 @@
// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Reads a file line by line and stores the data on the stack. This allows
// parsing files in one go without allocating.
#ifndef CPU_FEATURES_INCLUDE_INTERNAL_STACK_LINE_READER_H_
#define CPU_FEATURES_INCLUDE_INTERNAL_STACK_LINE_READER_H_
#include <stdbool.h>
#include "cpu_features_macros.h"
#include "internal/string_view.h"
CPU_FEATURES_START_CPP_NAMESPACE
typedef struct {
char buffer[STACK_LINE_READER_BUFFER_SIZE];
StringView view;
int fd;
bool skip_mode;
} StackLineReader;
// Initializes a StackLineReader.
void StackLineReader_Initialize(StackLineReader* reader, int fd);
typedef struct {
StringView line; // A view of the line.
bool eof; // Nothing more to read, we reached EOF.
bool full_line; // If false the line was truncated to
// STACK_LINE_READER_BUFFER_SIZE.
} LineResult;
// Reads the file pointed to by fd and tries to read a full line.
LineResult StackLineReader_NextLine(StackLineReader* reader);
CPU_FEATURES_END_CPP_NAMESPACE
#endif // CPU_FEATURES_INCLUDE_INTERNAL_STACK_LINE_READER_H_

View file

@ -0,0 +1,108 @@
// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// A view over a piece of string. The view is not 0 terminated.
#ifndef CPU_FEATURES_INCLUDE_INTERNAL_STRING_VIEW_H_
#define CPU_FEATURES_INCLUDE_INTERNAL_STRING_VIEW_H_
#include <stdbool.h>
#include <stddef.h>
#include <string.h>
#include "cpu_features_macros.h"
CPU_FEATURES_START_CPP_NAMESPACE
typedef struct {
const char* ptr;
size_t size;
} StringView;
#ifdef __cplusplus
static const StringView kEmptyStringView = {NULL, 0};
#else
static const StringView kEmptyStringView;
#endif
// Returns a StringView from the provided string.
// Passing NULL is valid only if size is 0.
static inline StringView view(const char* str, const size_t size) {
StringView view;
view.ptr = str;
view.size = size;
return view;
}
static inline StringView str(const char* str) { return view(str, strlen(str)); }
// Returns the index of the first occurrence of c in view or -1 if not found.
int CpuFeatures_StringView_IndexOfChar(const StringView view, char c);
// Returns the index of the first occurrence of sub_view in view or -1 if not
// found.
int CpuFeatures_StringView_IndexOf(const StringView view,
const StringView sub_view);
// Returns whether a is equal to b (same content).
bool CpuFeatures_StringView_IsEquals(const StringView a, const StringView b);
// Returns whether a starts with b.
bool CpuFeatures_StringView_StartsWith(const StringView a, const StringView b);
// Removes count characters from the beginning of view or kEmptyStringView if
// count if greater than view.size.
StringView CpuFeatures_StringView_PopFront(const StringView str_view,
size_t count);
// Removes count characters from the end of view or kEmptyStringView if count if
// greater than view.size.
StringView CpuFeatures_StringView_PopBack(const StringView str_view,
size_t count);
// Keeps the count first characters of view or view if count if greater than
// view.size.
StringView CpuFeatures_StringView_KeepFront(const StringView str_view,
size_t count);
// Retrieves the first character of view. If view is empty the behavior is
// undefined.
char CpuFeatures_StringView_Front(const StringView view);
// Retrieves the last character of view. If view is empty the behavior is
// undefined.
char CpuFeatures_StringView_Back(const StringView view);
// Removes leading and tailing space characters.
StringView CpuFeatures_StringView_TrimWhitespace(StringView view);
// Convert StringView to positive integer. e.g. "42", "0x2a".
// Returns -1 on error.
int CpuFeatures_StringView_ParsePositiveNumber(const StringView view);
// Copies src StringView to dst buffer.
void CpuFeatures_StringView_CopyString(const StringView src, char* dst,
size_t dst_size);
// Checks if line contains the specified whitespace separated word.
bool CpuFeatures_StringView_HasWord(const StringView line,
const char* const word);
// Get key/value from line. key and value are separated by ": ".
// key and value are cleaned up from leading and trailing whitespaces.
bool CpuFeatures_StringView_GetAttributeKeyValue(const StringView line,
StringView* key,
StringView* value);
CPU_FEATURES_END_CPP_NAMESPACE
#endif // CPU_FEATURES_INCLUDE_INTERNAL_STRING_VIEW_H_

View file

@ -0,0 +1,173 @@
#!/bin/bash
readonly SCRIPT_FOLDER=$(cd -P -- "$(dirname -- "$0")" && pwd -P)
readonly PROJECT_FOLDER="${SCRIPT_FOLDER}/.."
readonly ARCHIVE_FOLDER=~/cpu_features_archives
readonly QEMU_INSTALL=${ARCHIVE_FOLDER}/qemu
readonly DEFAULT_CMAKE_ARGS=" -DCMAKE_BUILD_TYPE=Debug -DBUILD_TESTING=ON"
function extract() {
case $1 in
*.tar.bz2) tar xjf "$1" ;;
*.tar.xz) tar xJf "$1" ;;
*.tar.gz) tar xzf "$1" ;;
*)
echo "don't know how to extract '$1'..."
exit 1
esac
}
function unpackifnotexists() {
mkdir -p "${ARCHIVE_FOLDER}"
cd "${ARCHIVE_FOLDER}" || exit
local URL=$1
local RELATIVE_FOLDER=$2
local DESTINATION="${ARCHIVE_FOLDER}/${RELATIVE_FOLDER}"
if [[ ! -d "${DESTINATION}" ]] ; then
local ARCHIVE_NAME=$(echo ${URL} | sed 's/.*\///')
test -f "${ARCHIVE_NAME}" || wget -q "${URL}"
extract "${ARCHIVE_NAME}"
fi
}
function installqemuifneeded() {
local VERSION=${QEMU_VERSION:=2.11.1}
local ARCHES=${QEMU_ARCHES:=arm aarch64 i386 x86_64 mips mipsel}
local TARGETS=${QEMU_TARGETS:=$(echo "$ARCHES" | sed 's#$# #;s#\([^ ]*\) #\1-linux-user #g')}
if echo "${VERSION} ${TARGETS}" | cmp --silent ${QEMU_INSTALL}/.build -; then
echo "qemu ${VERSION} up to date!"
return 0
fi
echo "VERSION: ${VERSION}"
echo "TARGETS: ${TARGETS}"
rm -rf ${QEMU_INSTALL}
# Checking for a tarball before downloading makes testing easier :-)
local QEMU_URL="http://wiki.qemu-project.org/download/qemu-${VERSION}.tar.xz"
local QEMU_FOLDER="qemu-${VERSION}"
unpackifnotexists ${QEMU_URL} ${QEMU_FOLDER}
cd ${QEMU_FOLDER} || exit
./configure \
--prefix="${QEMU_INSTALL}" \
--target-list="${TARGETS}" \
--disable-docs \
--disable-sdl \
--disable-gtk \
--disable-gnutls \
--disable-gcrypt \
--disable-nettle \
--disable-curses \
--static
make -j4
make install
echo "$VERSION $TARGETS" > ${QEMU_INSTALL}/.build
}
function assert_defined(){
local VALUE=${1}
: "${VALUE?"${1} needs to be defined"}"
}
function integrate() {
cd "${PROJECT_FOLDER}" || exit
cmake -H. -B"${BUILD_DIR}" ${DEFAULT_CMAKE_ARGS} ${CMAKE_ADDITIONAL_ARGS}
cmake --build "${BUILD_DIR}" --target all
if [[ -n "${QEMU_ARCH}" ]]; then
if [[ "${QEMU_ARCH}" == "DISABLED" ]]; then
QEMU="true || "
else
installqemuifneeded
QEMU="${QEMU_INSTALL}/bin/qemu-${QEMU_ARCH} ${QEMU_ARGS}"
fi
else
QEMU=""
fi
# Run tests
for test_binary in ${BUILD_DIR}/test/*_test; do ${QEMU} ${test_binary}; done
# Run demo program
${QEMU} "${BUILD_DIR}/list_cpu_features"
}
function expand_linaro_config() {
assert_defined TARGET
local LINARO_ROOT_URL=https://releases.linaro.org/components/toolchain/binaries/7.2-2017.11
local GCC_URL=${LINARO_ROOT_URL}/${TARGET}/gcc-linaro-7.2.1-2017.11-x86_64_${TARGET}.tar.xz
local GCC_RELATIVE_FOLDER="gcc-linaro-7.2.1-2017.11-x86_64_${TARGET}"
unpackifnotexists "${GCC_URL}" "${GCC_RELATIVE_FOLDER}"
local SYSROOT_URL=${LINARO_ROOT_URL}/${TARGET}/sysroot-glibc-linaro-2.25-2017.11-${TARGET}.tar.xz
local SYSROOT_RELATIVE_FOLDER=sysroot-glibc-linaro-2.25-2017.11-${TARGET}
unpackifnotexists "${SYSROOT_URL}" "${SYSROOT_RELATIVE_FOLDER}"
local SYSROOT_FOLDER=${ARCHIVE_FOLDER}/${SYSROOT_RELATIVE_FOLDER}
local GCC_FOLDER=${ARCHIVE_FOLDER}/${GCC_RELATIVE_FOLDER}
CMAKE_ADDITIONAL_ARGS+=" -DCMAKE_SYSROOT=${SYSROOT_FOLDER}"
CMAKE_ADDITIONAL_ARGS+=" -DCMAKE_C_COMPILER=${GCC_FOLDER}/bin/${TARGET}-gcc"
CMAKE_ADDITIONAL_ARGS+=" -DCMAKE_CXX_COMPILER=${GCC_FOLDER}/bin/${TARGET}-g++"
CMAKE_ADDITIONAL_ARGS+=" -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER"
CMAKE_ADDITIONAL_ARGS+=" -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=ONLY"
CMAKE_ADDITIONAL_ARGS+=" -DCMAKE_FIND_ROOT_PATH_MODE_PACKAGE=ONLY"
QEMU_ARGS+=" -L ${SYSROOT_FOLDER}"
QEMU_ARGS+=" -E LD_LIBRARY_PATH=/lib"
}
function expand_codescape_config() {
assert_defined TARGET
local FLAVOUR=${QEMU_ARCH}-r2-hard
local DATE=2016.05-03
local CODESCAPE_URL=http://codescape-mips-sdk.imgtec.com/components/toolchain/${DATE}/Codescape.GNU.Tools.Package.${DATE}.for.MIPS.MTI.Linux.CentOS-5.x86_64.tar.gz
local GCC_URL=${CODESCAPE_URL}
local GCC_RELATIVE_FOLDER=${TARGET}/${DATE}
unpackifnotexists "${GCC_URL}" "${GCC_RELATIVE_FOLDER}"
local SYSROOT_URL=${CODESCAPE_URL}
local SYSROOT_FOLDER=${ARCHIVE_FOLDER}/${GCC_RELATIVE_FOLDER}/sysroot/${FLAVOUR}
unpackifnotexists "${SYSROOT_URL}" "${SYSROOT_RELATIVE_FOLDER}"
CMAKE_ADDITIONAL_ARGS+=" -DENABLE_MSA=1"
CMAKE_ADDITIONAL_ARGS+=" -DMIPS_CPU=p5600"
CMAKE_ADDITIONAL_ARGS+=" -DCMAKE_TOOLCHAIN_FILE=cmake/mips32-linux-gcc.cmake"
CMAKE_ADDITIONAL_ARGS+=" -DCROSS=${TARGET}-"
CMAKE_ADDITIONAL_ARGS+=" -DCMAKE_FIND_ROOT_PATH=${ARCHIVE_FOLDER}/${GCC_RELATIVE_FOLDER}"
QEMU_ARGS+=" -L ${SYSROOT_FOLDER}"
QEMU_ARGS+=" -E LD_LIBRARY_PATH=/lib"
QEMU_ARGS+=" -cpu P5600"
}
function expand_environment_and_integrate() {
assert_defined PROJECT_FOLDER
assert_defined TARGET
BUILD_DIR="${PROJECT_FOLDER}/cmake_build/${TARGET}"
mkdir -p "${BUILD_DIR}"
CMAKE_ADDITIONAL_ARGS=""
QEMU_ARGS=""
case ${TOOLCHAIN} in
LINARO) expand_linaro_config ;;
CODESCAPE) expand_codescape_config ;;
NATIVE) QEMU_ARCH="" ;;
*)
echo "Unknown toolchain '${TOOLCHAIN}'..."
exit 1
esac
integrate
}
if [ "${CONTINUOUS_INTEGRATION}" = "true" ]; then
QEMU_ARCHES=${QEMU_ARCH}
expand_environment_and_integrate
fi

View file

@ -0,0 +1,80 @@
source "$(dirname -- "$0")"/run_integration.sh
# Toolchains for little-endian, 64-bit ARMv8 for GNU/Linux systems
function set_aarch64-linux-gnu() {
TOOLCHAIN=LINARO
TARGET=aarch64-linux-gnu
QEMU_ARCH=aarch64
}
# Toolchains for little-endian, hard-float, 32-bit ARMv7 (and earlier) for GNU/Linux systems
function set_arm-linux-gnueabihf() {
TOOLCHAIN=LINARO
TARGET=arm-linux-gnueabihf
QEMU_ARCH=arm
}
# Toolchains for little-endian, 32-bit ARMv8 for GNU/Linux systems
function set_armv8l-linux-gnueabihf() {
TOOLCHAIN=LINARO
TARGET=armv8l-linux-gnueabihf
QEMU_ARCH=arm
}
# Toolchains for little-endian, soft-float, 32-bit ARMv7 (and earlier) for GNU/Linux systems
function set_arm-linux-gnueabi() {
TOOLCHAIN=LINARO
TARGET=arm-linux-gnueabi
QEMU_ARCH=arm
}
# Toolchains for big-endian, 64-bit ARMv8 for GNU/Linux systems
function set_aarch64_be-linux-gnu() {
TOOLCHAIN=LINARO
TARGET=aarch64_be-linux-gnu
QEMU_ARCH="DISABLED"
}
# Toolchains for big-endian, hard-float, 32-bit ARMv7 (and earlier) for GNU/Linux systems
function set_armeb-linux-gnueabihf() {
TOOLCHAIN=LINARO
TARGET=armeb-linux-gnueabihf
QEMU_ARCH="DISABLED"
}
# Toolchains for big-endian, soft-float, 32-bit ARMv7 (and earlier) for GNU/Linux systems
function set_armeb-linux-gnueabi() {
TOOLCHAIN=LINARO
TARGET=armeb-linux-gnueabi
QEMU_ARCH="DISABLED"
}
function set_mips() {
TOOLCHAIN=CODESCAPE
TARGET=mips-mti-linux-gnu
QEMU_ARCH="DISABLED"
}
function set_native() {
TOOLCHAIN=NATIVE
TARGET=native
QEMU_ARCH=""
}
ENVIRONMENTS="
set_aarch64-linux-gnu
set_arm-linux-gnueabihf
set_armv8l-linux-gnueabihf
set_arm-linux-gnueabi
set_aarch64_be-linux-gnu
set_armeb-linux-gnueabihf
set_armeb-linux-gnueabi
set_native
set_mips
"
for SET_ENVIRONMENT in ${ENVIRONMENTS}; do
${SET_ENVIRONMENT}
expand_environment_and_integrate
done

View file

@ -0,0 +1,36 @@
// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "internal/cpuid_x86.h"
#if defined(CPU_FEATURES_ARCH_X86)
#if defined(CPU_FEATURES_COMPILER_CLANG) || defined(CPU_FEATURES_COMPILER_GCC)
#include <cpuid.h>
Leaf CpuId(uint32_t leaf_id) {
Leaf leaf;
__cpuid_count(leaf_id, 0, leaf.eax, leaf.ebx, leaf.ecx, leaf.edx);
return leaf;
}
uint32_t GetXCR0Eax(void) {
uint32_t eax, edx;
__asm("XGETBV" : "=a"(eax), "=d"(edx) : "c"(0));
return eax;
}
#endif // defined(CPU_FEATURES_COMPILER_CLANG) ||
// defined(CPU_FEATURES_COMPILER_GCC)
#endif // defined(CPU_FEATURES_ARCH_X86)

View file

@ -0,0 +1,34 @@
// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "internal/cpuid_x86.h"
#if defined(CPU_FEATURES_ARCH_X86) && defined(CPU_FEATURES_COMPILER_MSC)
#include <immintrin.h>
#include <intrin.h> // For __cpuidex()
Leaf CpuId(uint32_t leaf_id) {
Leaf leaf;
int data[4];
__cpuid(data, leaf_id);
leaf.eax = data[0];
leaf.ebx = data[1];
leaf.ecx = data[2];
leaf.edx = data[3];
return leaf;
}
uint32_t GetXCR0Eax(void) { return _xgetbv(0); }
#endif // defined(CPU_FEATURES_ARCH_X86) && defined(CPU_FEATURES_COMPILER_MSC)

View file

@ -0,0 +1,141 @@
// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "cpuinfo_aarch64.h"
#include "internal/filesystem.h"
#include "internal/hwcaps.h"
#include "internal/linux_features_aggregator.h"
#include "internal/stack_line_reader.h"
#include "internal/string_view.h"
#include <ctype.h>
DECLARE_SETTER(Aarch64Features, fp)
DECLARE_SETTER(Aarch64Features, asimd)
DECLARE_SETTER(Aarch64Features, aes)
DECLARE_SETTER(Aarch64Features, pmull)
DECLARE_SETTER(Aarch64Features, sha1)
DECLARE_SETTER(Aarch64Features, sha2)
DECLARE_SETTER(Aarch64Features, crc32)
static const CapabilityConfig kConfigs[] = {
{{AARCH64_HWCAP_FP, 0}, "fp", &set_fp}, //
{{AARCH64_HWCAP_ASIMD, 0}, "asimd", &set_asimd}, //
{{AARCH64_HWCAP_AES, 0}, "aes", &set_aes}, //
{{AARCH64_HWCAP_PMULL, 0}, "pmull", &set_pmull}, //
{{AARCH64_HWCAP_SHA1, 0}, "sha1", &set_sha1}, //
{{AARCH64_HWCAP_SHA2, 0}, "sha2", &set_sha2}, //
{{AARCH64_HWCAP_CRC32, 0}, "crc32", &set_crc32}, //
};
static const size_t kConfigsSize = sizeof(kConfigs) / sizeof(CapabilityConfig);
static bool HandleAarch64Line(const LineResult result,
Aarch64Info* const info) {
StringView line = result.line;
StringView key, value;
if (CpuFeatures_StringView_GetAttributeKeyValue(line, &key, &value)) {
if (CpuFeatures_StringView_IsEquals(key, str("Features"))) {
CpuFeatures_SetFromFlags(kConfigsSize, kConfigs, value, &info->features);
} else if (CpuFeatures_StringView_IsEquals(key, str("CPU implementer"))) {
info->implementer = CpuFeatures_StringView_ParsePositiveNumber(value);
} else if (CpuFeatures_StringView_IsEquals(key, str("CPU variant"))) {
info->variant = CpuFeatures_StringView_ParsePositiveNumber(value);
} else if (CpuFeatures_StringView_IsEquals(key, str("CPU part"))) {
info->part = CpuFeatures_StringView_ParsePositiveNumber(value);
} else if (CpuFeatures_StringView_IsEquals(key, str("CPU revision"))) {
info->revision = CpuFeatures_StringView_ParsePositiveNumber(value);
}
}
return !result.eof;
}
static void FillProcCpuInfoData(Aarch64Info* const info) {
const int fd = CpuFeatures_OpenFile("/proc/cpuinfo");
if (fd >= 0) {
StackLineReader reader;
StackLineReader_Initialize(&reader, fd);
for (;;) {
if (!HandleAarch64Line(StackLineReader_NextLine(&reader), info)) {
break;
}
}
CpuFeatures_CloseFile(fd);
}
}
static const Aarch64Info kEmptyAarch64Info;
Aarch64Info GetAarch64Info(void) {
// capabilities are fetched from both getauxval and /proc/cpuinfo so we can
// have some information if the executable is sandboxed (aka no access to
// /proc/cpuinfo).
Aarch64Info info = kEmptyAarch64Info;
FillProcCpuInfoData(&info);
CpuFeatures_OverrideFromHwCaps(kConfigsSize, kConfigs,
CpuFeatures_GetHardwareCapabilities(),
&info.features);
return info;
}
////////////////////////////////////////////////////////////////////////////////
// Introspection functions
int GetAarch64FeaturesEnumValue(const Aarch64Features* features,
Aarch64FeaturesEnum value) {
switch (value) {
case AARCH64_FP:
return features->fp;
case AARCH64_ASIMD:
return features->asimd;
case AARCH64_AES:
return features->aes;
case AARCH64_PMULL:
return features->pmull;
case AARCH64_SHA1:
return features->sha1;
case AARCH64_SHA2:
return features->sha2;
case AARCH64_CRC32:
return features->crc32;
case AARCH64_LAST_:
break;
}
return false;
}
const char* GetAarch64FeaturesEnumName(Aarch64FeaturesEnum value) {
switch (value) {
case AARCH64_FP:
return "fp";
case AARCH64_ASIMD:
return "asimd";
case AARCH64_AES:
return "aes";
case AARCH64_PMULL:
return "pmull";
case AARCH64_SHA1:
return "sha1";
case AARCH64_SHA2:
return "sha2";
case AARCH64_CRC32:
return "crc32";
case AARCH64_LAST_:
break;
}
return "unknown feature";
}

View file

@ -0,0 +1,259 @@
// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "cpuinfo_arm.h"
#include "internal/bit_utils.h"
#include "internal/filesystem.h"
#include "internal/hwcaps.h"
#include "internal/linux_features_aggregator.h"
#include "internal/stack_line_reader.h"
#include "internal/string_view.h"
#include <ctype.h>
DECLARE_SETTER(ArmFeatures, vfp)
DECLARE_SETTER(ArmFeatures, iwmmxt)
DECLARE_SETTER(ArmFeatures, neon)
DECLARE_SETTER(ArmFeatures, vfpv3)
DECLARE_SETTER(ArmFeatures, vfpv3d16)
DECLARE_SETTER(ArmFeatures, vfpv4)
DECLARE_SETTER(ArmFeatures, idiva)
DECLARE_SETTER(ArmFeatures, idivt)
DECLARE_SETTER(ArmFeatures, aes)
DECLARE_SETTER(ArmFeatures, pmull)
DECLARE_SETTER(ArmFeatures, sha1)
DECLARE_SETTER(ArmFeatures, sha2)
DECLARE_SETTER(ArmFeatures, crc32)
static const CapabilityConfig kConfigs[] = {
{{ARM_HWCAP_VFP, 0}, "vfp", &set_vfp}, //
{{ARM_HWCAP_IWMMXT, 0}, "iwmmxt", &set_iwmmxt}, //
{{ARM_HWCAP_NEON, 0}, "neon", &set_neon}, //
{{ARM_HWCAP_VFPV3, 0}, "vfpv3", &set_vfpv3}, //
{{ARM_HWCAP_VFPV3D16, 0}, "vfpv3d16", &set_vfpv3d16}, //
{{ARM_HWCAP_VFPV4, 0}, "vfpv4", &set_vfpv4}, //
{{ARM_HWCAP_IDIVA, 0}, "idiva", &set_idiva}, //
{{ARM_HWCAP_IDIVT, 0}, "idivt", &set_idivt}, //
{{0, ARM_HWCAP2_AES}, "aes", &set_aes}, //
{{0, ARM_HWCAP2_PMULL}, "pmull", &set_pmull}, //
{{0, ARM_HWCAP2_SHA1}, "sha1", &set_sha1}, //
{{0, ARM_HWCAP2_SHA2}, "sha2", &set_sha2}, //
{{0, ARM_HWCAP2_CRC32}, "crc32", &set_crc32}, //
};
static const size_t kConfigsSize = sizeof(kConfigs) / sizeof(CapabilityConfig);
typedef struct {
bool processor_reports_armv6;
bool hardware_reports_goldfish;
} ProcCpuInfoData;
static int IndexOfNonDigit(StringView str) {
size_t index = 0;
while (str.size && isdigit(CpuFeatures_StringView_Front(str))) {
str = CpuFeatures_StringView_PopFront(str, 1);
++index;
}
return index;
}
static bool HandleArmLine(const LineResult result, ArmInfo* const info,
ProcCpuInfoData* const proc_info) {
StringView line = result.line;
StringView key, value;
if (CpuFeatures_StringView_GetAttributeKeyValue(line, &key, &value)) {
if (CpuFeatures_StringView_IsEquals(key, str("Features"))) {
CpuFeatures_SetFromFlags(kConfigsSize, kConfigs, value, &info->features);
} else if (CpuFeatures_StringView_IsEquals(key, str("CPU implementer"))) {
info->implementer = CpuFeatures_StringView_ParsePositiveNumber(value);
} else if (CpuFeatures_StringView_IsEquals(key, str("CPU variant"))) {
info->variant = CpuFeatures_StringView_ParsePositiveNumber(value);
} else if (CpuFeatures_StringView_IsEquals(key, str("CPU part"))) {
info->part = CpuFeatures_StringView_ParsePositiveNumber(value);
} else if (CpuFeatures_StringView_IsEquals(key, str("CPU revision"))) {
info->revision = CpuFeatures_StringView_ParsePositiveNumber(value);
} else if (CpuFeatures_StringView_IsEquals(key, str("CPU architecture"))) {
// CPU architecture is a number that may be followed by letters. e.g.
// "6TEJ", "7".
const StringView digits =
CpuFeatures_StringView_KeepFront(value, IndexOfNonDigit(value));
info->architecture = CpuFeatures_StringView_ParsePositiveNumber(digits);
} else if (CpuFeatures_StringView_IsEquals(key, str("Processor"))) {
proc_info->processor_reports_armv6 =
CpuFeatures_StringView_IndexOf(value, str("(v6l)")) >= 0;
} else if (CpuFeatures_StringView_IsEquals(key, str("Hardware"))) {
proc_info->hardware_reports_goldfish =
CpuFeatures_StringView_IsEquals(value, str("Goldfish"));
}
}
return !result.eof;
}
static uint32_t GetCpuId(const ArmInfo* const info) {
return (ExtractBitRange(info->implementer, 7, 0) << 24) |
(ExtractBitRange(info->variant, 3, 0) << 20) |
(ExtractBitRange(info->part, 11, 0) << 4) |
(ExtractBitRange(info->revision, 3, 0) << 0);
}
static void FixErrors(ArmInfo* const info,
ProcCpuInfoData* const proc_cpu_info_data) {
// Fixing Samsung kernel reporting invalid cpu architecture.
// http://code.google.com/p/android/issues/detail?id=10812
if (proc_cpu_info_data->processor_reports_armv6 && info->architecture >= 7) {
info->architecture = 6;
}
// Handle kernel configuration bugs that prevent the correct reporting of CPU
// features.
switch (GetCpuId(info)) {
case 0x4100C080:
// Special case: The emulator-specific Android 4.2 kernel fails to report
// support for the 32-bit ARM IDIV instruction. Technically, this is a
// feature of the virtual CPU implemented by the emulator. Note that it
// could also support Thumb IDIV in the future, and this will have to be
// slightly updated.
if (info->architecture >= 7 &&
proc_cpu_info_data->hardware_reports_goldfish) {
info->features.idiva = true;
}
break;
case 0x511004D0:
// https://crbug.com/341598.
info->features.neon = false;
break;
case 0x510006F2:
case 0x510006F3:
// The Nexus 4 (Qualcomm Krait) kernel configuration forgets to report
// IDIV support.
info->features.idiva = true;
info->features.idivt = true;
break;
}
// Propagate cpu features.
if (info->features.vfpv4) info->features.vfpv3 = true;
if (info->features.neon) info->features.vfpv3 = true;
if (info->features.vfpv3) info->features.vfp = true;
}
static void FillProcCpuInfoData(ArmInfo* const info,
ProcCpuInfoData* proc_cpu_info_data) {
const int fd = CpuFeatures_OpenFile("/proc/cpuinfo");
if (fd >= 0) {
StackLineReader reader;
StackLineReader_Initialize(&reader, fd);
for (;;) {
if (!HandleArmLine(StackLineReader_NextLine(&reader), info,
proc_cpu_info_data)) {
break;
}
}
CpuFeatures_CloseFile(fd);
}
}
static const ArmInfo kEmptyArmInfo;
static const ProcCpuInfoData kEmptyProcCpuInfoData;
ArmInfo GetArmInfo(void) {
// capabilities are fetched from both getauxval and /proc/cpuinfo so we can
// have some information if the executable is sandboxed (aka no access to
// /proc/cpuinfo).
ArmInfo info = kEmptyArmInfo;
ProcCpuInfoData proc_cpu_info_data = kEmptyProcCpuInfoData;
FillProcCpuInfoData(&info, &proc_cpu_info_data);
CpuFeatures_OverrideFromHwCaps(kConfigsSize, kConfigs,
CpuFeatures_GetHardwareCapabilities(),
&info.features);
FixErrors(&info, &proc_cpu_info_data);
return info;
}
////////////////////////////////////////////////////////////////////////////////
// Introspection functions
int GetArmFeaturesEnumValue(const ArmFeatures* features,
ArmFeaturesEnum value) {
switch (value) {
case ARM_VFP:
return features->vfp;
case ARM_IWMMXT:
return features->iwmmxt;
case ARM_NEON:
return features->neon;
case ARM_VFPV3:
return features->vfpv3;
case ARM_VFPV3D16:
return features->vfpv3d16;
case ARM_VFPV4:
return features->vfpv4;
case ARM_IDIVA:
return features->idiva;
case ARM_IDIVT:
return features->idivt;
case ARM_AES:
return features->aes;
case ARM_PMULL:
return features->pmull;
case ARM_SHA1:
return features->sha1;
case ARM_SHA2:
return features->sha2;
case ARM_CRC32:
return features->crc32;
case ARM_LAST_:
break;
}
return false;
}
const char* GetArmFeaturesEnumName(ArmFeaturesEnum value) {
switch (value) {
case ARM_VFP:
return "vfp";
case ARM_IWMMXT:
return "iwmmxt";
case ARM_NEON:
return "neon";
case ARM_VFPV3:
return "vfpv3";
case ARM_VFPV3D16:
return "vfpv3d16";
case ARM_VFPV4:
return "vfpv4";
case ARM_IDIVA:
return "idiva";
case ARM_IDIVT:
return "idivt";
case ARM_AES:
return "aes";
case ARM_PMULL:
return "pmull";
case ARM_SHA1:
return "sha1";
case ARM_SHA2:
return "sha2";
case ARM_CRC32:
return "crc32";
case ARM_LAST_:
break;
}
return "unknown feature";
}

View file

@ -0,0 +1,98 @@
// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "cpuinfo_mips.h"
#include "internal/filesystem.h"
#include "internal/linux_features_aggregator.h"
#include "internal/stack_line_reader.h"
#include "internal/string_view.h"
DECLARE_SETTER(MipsFeatures, msa)
DECLARE_SETTER(MipsFeatures, eva)
static const CapabilityConfig kConfigs[] = {
{{MIPS_HWCAP_MSA, 0}, "msa", &set_msa}, //
{{MIPS_HWCAP_EVA, 0}, "eva", &set_eva}, //
};
static const size_t kConfigsSize = sizeof(kConfigs) / sizeof(CapabilityConfig);
static bool HandleMipsLine(const LineResult result,
MipsFeatures* const features) {
StringView key, value;
// See tests for an example.
if (CpuFeatures_StringView_GetAttributeKeyValue(result.line, &key, &value)) {
if (CpuFeatures_StringView_IsEquals(key, str("ASEs implemented"))) {
CpuFeatures_SetFromFlags(kConfigsSize, kConfigs, value, features);
}
}
return !result.eof;
}
static void FillProcCpuInfoData(MipsFeatures* const features) {
const int fd = CpuFeatures_OpenFile("/proc/cpuinfo");
if (fd >= 0) {
StackLineReader reader;
StackLineReader_Initialize(&reader, fd);
for (;;) {
if (!HandleMipsLine(StackLineReader_NextLine(&reader), features)) {
break;
}
}
CpuFeatures_CloseFile(fd);
}
}
static const MipsInfo kEmptyMipsInfo;
MipsInfo GetMipsInfo(void) {
// capabilities are fetched from both getauxval and /proc/cpuinfo so we can
// have some information if the executable is sandboxed (aka no access to
// /proc/cpuinfo).
MipsInfo info = kEmptyMipsInfo;
FillProcCpuInfoData(&info.features);
CpuFeatures_OverrideFromHwCaps(kConfigsSize, kConfigs,
CpuFeatures_GetHardwareCapabilities(),
&info.features);
return info;
}
////////////////////////////////////////////////////////////////////////////////
// Introspection functions
int GetMipsFeaturesEnumValue(const MipsFeatures* features,
MipsFeaturesEnum value) {
switch (value) {
case MIPS_MSA:
return features->msa;
case MIPS_EVA:
return features->eva;
case MIPS_LAST_:
break;
}
return false;
}
const char* GetMipsFeaturesEnumName(MipsFeaturesEnum value) {
switch (value) {
case MIPS_MSA:
return "msa";
case MIPS_EVA:
return "eva";
case MIPS_LAST_:
break;
}
return "unknown feature";
}

View file

@ -0,0 +1,358 @@
// Copyright 2018 IBM.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <stdbool.h>
#include <string.h>
#include "cpuinfo_ppc.h"
#include "internal/bit_utils.h"
#include "internal/filesystem.h"
#include "internal/linux_features_aggregator.h"
#include "internal/stack_line_reader.h"
#include "internal/string_view.h"
DECLARE_SETTER(PPCFeatures, ppc32)
DECLARE_SETTER(PPCFeatures, ppc64)
DECLARE_SETTER(PPCFeatures, ppc601)
DECLARE_SETTER(PPCFeatures, altivec)
DECLARE_SETTER(PPCFeatures, fpu)
DECLARE_SETTER(PPCFeatures, mmu)
DECLARE_SETTER(PPCFeatures, mac_4xx)
DECLARE_SETTER(PPCFeatures, unifiedcache)
DECLARE_SETTER(PPCFeatures, spe)
DECLARE_SETTER(PPCFeatures, efpsingle)
DECLARE_SETTER(PPCFeatures, efpdouble)
DECLARE_SETTER(PPCFeatures, no_tb)
DECLARE_SETTER(PPCFeatures, power4)
DECLARE_SETTER(PPCFeatures, power5)
DECLARE_SETTER(PPCFeatures, power5plus)
DECLARE_SETTER(PPCFeatures, cell)
DECLARE_SETTER(PPCFeatures, booke)
DECLARE_SETTER(PPCFeatures, smt)
DECLARE_SETTER(PPCFeatures, icachesnoop)
DECLARE_SETTER(PPCFeatures, arch205)
DECLARE_SETTER(PPCFeatures, pa6t)
DECLARE_SETTER(PPCFeatures, dfp)
DECLARE_SETTER(PPCFeatures, power6ext)
DECLARE_SETTER(PPCFeatures, arch206)
DECLARE_SETTER(PPCFeatures, vsx)
DECLARE_SETTER(PPCFeatures, pseries_perfmon_compat)
DECLARE_SETTER(PPCFeatures, truele)
DECLARE_SETTER(PPCFeatures, ppcle)
DECLARE_SETTER(PPCFeatures, arch207)
DECLARE_SETTER(PPCFeatures, htm)
DECLARE_SETTER(PPCFeatures, dscr)
DECLARE_SETTER(PPCFeatures, ebb)
DECLARE_SETTER(PPCFeatures, isel)
DECLARE_SETTER(PPCFeatures, tar)
DECLARE_SETTER(PPCFeatures, vcrypto)
DECLARE_SETTER(PPCFeatures, htm_nosc)
DECLARE_SETTER(PPCFeatures, arch300)
DECLARE_SETTER(PPCFeatures, ieee128)
DECLARE_SETTER(PPCFeatures, darn)
DECLARE_SETTER(PPCFeatures, scv)
DECLARE_SETTER(PPCFeatures, htm_no_suspend)
static const CapabilityConfig kConfigs[] = {
{{PPC_FEATURE_32, 0}, "ppc32", &set_ppc32},
{{PPC_FEATURE_64, 0}, "ppc64", &set_ppc64},
{{PPC_FEATURE_601_INSTR, 0}, "ppc601", &set_ppc601},
{{PPC_FEATURE_HAS_ALTIVEC, 0}, "altivec", &set_altivec},
{{PPC_FEATURE_HAS_FPU, 0}, "fpu", &set_fpu},
{{PPC_FEATURE_HAS_MMU, 0}, "mmu", &set_mmu},
{{PPC_FEATURE_HAS_4xxMAC, 0}, "4xxmac", &set_mac_4xx},
{{PPC_FEATURE_UNIFIED_CACHE, 0}, "ucache", &set_unifiedcache},
{{PPC_FEATURE_HAS_SPE, 0}, "spe", &set_spe},
{{PPC_FEATURE_HAS_EFP_SINGLE, 0}, "efpsingle", &set_efpsingle},
{{PPC_FEATURE_HAS_EFP_DOUBLE, 0}, "efpdouble", &set_efpdouble},
{{PPC_FEATURE_NO_TB, 0}, "notb", &set_no_tb},
{{PPC_FEATURE_POWER4, 0}, "power4", &set_power4},
{{PPC_FEATURE_POWER5, 0}, "power5", &set_power5},
{{PPC_FEATURE_POWER5_PLUS, 0}, "power5+", &set_power5plus},
{{PPC_FEATURE_CELL, 0}, "cellbe", &set_cell},
{{PPC_FEATURE_BOOKE, 0}, "booke", &set_booke},
{{PPC_FEATURE_SMT, 0}, "smt", &set_smt},
{{PPC_FEATURE_ICACHE_SNOOP, 0}, "ic_snoop", &set_icachesnoop},
{{PPC_FEATURE_ARCH_2_05, 0}, "arch_2_05", &set_arch205},
{{PPC_FEATURE_PA6T, 0}, "pa6t", &set_pa6t},
{{PPC_FEATURE_HAS_DFP, 0}, "dfp", &set_dfp},
{{PPC_FEATURE_POWER6_EXT, 0}, "power6x", &set_power6ext},
{{PPC_FEATURE_ARCH_2_06, 0}, "arch_2_06", &set_arch206},
{{PPC_FEATURE_HAS_VSX, 0}, "vsx", &set_vsx},
{{PPC_FEATURE_PSERIES_PERFMON_COMPAT, 0},
"archpmu",
&set_pseries_perfmon_compat},
{{PPC_FEATURE_TRUE_LE, 0}, "true_le", &set_truele},
{{PPC_FEATURE_PPC_LE, 0}, "ppcle", &set_ppcle},
{{0, PPC_FEATURE2_ARCH_2_07}, "arch_2_07", &set_arch207},
{{0, PPC_FEATURE2_HTM}, "htm", &set_htm},
{{0, PPC_FEATURE2_DSCR}, "dscr", &set_dscr},
{{0, PPC_FEATURE2_EBB}, "ebb", &set_ebb},
{{0, PPC_FEATURE2_ISEL}, "isel", &set_isel},
{{0, PPC_FEATURE2_TAR}, "tar", &set_tar},
{{0, PPC_FEATURE2_VEC_CRYPTO}, "vcrypto", &set_vcrypto},
{{0, PPC_FEATURE2_HTM_NOSC}, "htm-nosc", &set_htm_nosc},
{{0, PPC_FEATURE2_ARCH_3_00}, "arch_3_00", &set_arch300},
{{0, PPC_FEATURE2_HAS_IEEE128}, "ieee128", &set_ieee128},
{{0, PPC_FEATURE2_DARN}, "darn", &set_darn},
{{0, PPC_FEATURE2_SCV}, "scv", &set_scv},
{{0, PPC_FEATURE2_HTM_NO_SUSPEND}, "htm-no-suspend", &set_htm_no_suspend},
};
static const size_t kConfigsSize = sizeof(kConfigs) / sizeof(CapabilityConfig);
static bool HandlePPCLine(const LineResult result,
PPCPlatformStrings* const strings) {
StringView line = result.line;
StringView key, value;
if (CpuFeatures_StringView_GetAttributeKeyValue(line, &key, &value)) {
if (CpuFeatures_StringView_HasWord(key, "platform")) {
CpuFeatures_StringView_CopyString(value, strings->platform,
sizeof(strings->platform));
} else if (CpuFeatures_StringView_IsEquals(key, str("model"))) {
CpuFeatures_StringView_CopyString(value, strings->model,
sizeof(strings->platform));
} else if (CpuFeatures_StringView_IsEquals(key, str("machine"))) {
CpuFeatures_StringView_CopyString(value, strings->machine,
sizeof(strings->platform));
} else if (CpuFeatures_StringView_IsEquals(key, str("cpu"))) {
CpuFeatures_StringView_CopyString(value, strings->cpu,
sizeof(strings->platform));
}
}
return !result.eof;
}
static void FillProcCpuInfoData(PPCPlatformStrings* const strings) {
const int fd = CpuFeatures_OpenFile("/proc/cpuinfo");
if (fd >= 0) {
StackLineReader reader;
StackLineReader_Initialize(&reader, fd);
for (;;) {
if (!HandlePPCLine(StackLineReader_NextLine(&reader), strings)) {
break;
}
}
CpuFeatures_CloseFile(fd);
}
}
static const PPCInfo kEmptyPPCInfo;
PPCInfo GetPPCInfo(void) {
/*
* On Power feature flags aren't currently in cpuinfo so we only look at
* the auxilary vector.
*/
PPCInfo info = kEmptyPPCInfo;
CpuFeatures_OverrideFromHwCaps(kConfigsSize, kConfigs,
CpuFeatures_GetHardwareCapabilities(),
&info.features);
return info;
}
static const PPCPlatformStrings kEmptyPPCPlatformStrings;
PPCPlatformStrings GetPPCPlatformStrings(void) {
PPCPlatformStrings strings = kEmptyPPCPlatformStrings;
FillProcCpuInfoData(&strings);
strings.type = CpuFeatures_GetPlatformType();
return strings;
}
////////////////////////////////////////////////////////////////////////////////
// Introspection functions
int GetPPCFeaturesEnumValue(const PPCFeatures* features,
PPCFeaturesEnum value) {
switch (value) {
case PPC_32:
return features->ppc32;
case PPC_64:
return features->ppc64;
case PPC_601_INSTR:
return features->ppc601;
case PPC_HAS_ALTIVEC:
return features->altivec;
case PPC_HAS_FPU:
return features->fpu;
case PPC_HAS_MMU:
return features->mmu;
case PPC_HAS_4xxMAC:
return features->mac_4xx;
case PPC_UNIFIED_CACHE:
return features->unifiedcache;
case PPC_HAS_SPE:
return features->spe;
case PPC_HAS_EFP_SINGLE:
return features->efpsingle;
case PPC_HAS_EFP_DOUBLE:
return features->efpdouble;
case PPC_NO_TB:
return features->no_tb;
case PPC_POWER4:
return features->power4;
case PPC_POWER5:
return features->power5;
case PPC_POWER5_PLUS:
return features->power5plus;
case PPC_CELL:
return features->cell;
case PPC_BOOKE:
return features->booke;
case PPC_SMT:
return features->smt;
case PPC_ICACHE_SNOOP:
return features->icachesnoop;
case PPC_ARCH_2_05:
return features->arch205;
case PPC_PA6T:
return features->pa6t;
case PPC_HAS_DFP:
return features->dfp;
case PPC_POWER6_EXT:
return features->power6ext;
case PPC_ARCH_2_06:
return features->arch206;
case PPC_HAS_VSX:
return features->vsx;
case PPC_PSERIES_PERFMON_COMPAT:
return features->pseries_perfmon_compat;
case PPC_TRUE_LE:
return features->truele;
case PPC_PPC_LE:
return features->ppcle;
case PPC_ARCH_2_07:
return features->arch207;
case PPC_HTM:
return features->htm;
case PPC_DSCR:
return features->dscr;
case PPC_EBB:
return features->ebb;
case PPC_ISEL:
return features->isel;
case PPC_TAR:
return features->tar;
case PPC_VEC_CRYPTO:
return features->vcrypto;
case PPC_HTM_NOSC:
return features->htm_nosc;
case PPC_ARCH_3_00:
return features->arch300;
case PPC_HAS_IEEE128:
return features->ieee128;
case PPC_DARN:
return features->darn;
case PPC_SCV:
return features->scv;
case PPC_HTM_NO_SUSPEND:
return features->htm_no_suspend;
case PPC_LAST_:
break;
}
return false;
}
/* Have used the same names as glibc */
const char* GetPPCFeaturesEnumName(PPCFeaturesEnum value) {
switch (value) {
case PPC_32:
return "ppc32";
case PPC_64:
return "ppc64";
case PPC_601_INSTR:
return "ppc601";
case PPC_HAS_ALTIVEC:
return "altivec";
case PPC_HAS_FPU:
return "fpu";
case PPC_HAS_MMU:
return "mmu";
case PPC_HAS_4xxMAC:
return "4xxmac";
case PPC_UNIFIED_CACHE:
return "ucache";
case PPC_HAS_SPE:
return "spe";
case PPC_HAS_EFP_SINGLE:
return "efpsingle";
case PPC_HAS_EFP_DOUBLE:
return "efpdouble";
case PPC_NO_TB:
return "notb";
case PPC_POWER4:
return "power4";
case PPC_POWER5:
return "power5";
case PPC_POWER5_PLUS:
return "power5+";
case PPC_CELL:
return "cellbe";
case PPC_BOOKE:
return "booke";
case PPC_SMT:
return "smt";
case PPC_ICACHE_SNOOP:
return "ic_snoop";
case PPC_ARCH_2_05:
return "arch_2_05";
case PPC_PA6T:
return "pa6t";
case PPC_HAS_DFP:
return "dfp";
case PPC_POWER6_EXT:
return "power6x";
case PPC_ARCH_2_06:
return "arch_2_06";
case PPC_HAS_VSX:
return "vsx";
case PPC_PSERIES_PERFMON_COMPAT:
return "archpmu";
case PPC_TRUE_LE:
return "true_le";
case PPC_PPC_LE:
return "ppcle";
case PPC_ARCH_2_07:
return "arch_2_07";
case PPC_HTM:
return "htm";
case PPC_DSCR:
return "dscr";
case PPC_EBB:
return "ebb";
case PPC_ISEL:
return "isel";
case PPC_TAR:
return "tar";
case PPC_VEC_CRYPTO:
return "vcrypto";
case PPC_HTM_NOSC:
return "htm-nosc";
case PPC_ARCH_3_00:
return "arch_3_00";
case PPC_HAS_IEEE128:
return "ieee128";
case PPC_DARN:
return "darn";
case PPC_SCV:
return "scv";
case PPC_HTM_NO_SUSPEND:
return "htm-no-suspend";
case PPC_LAST_:
break;
}
return "unknown_feature";
}

View file

@ -0,0 +1,447 @@
// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "cpuinfo_x86.h"
#include "internal/bit_utils.h"
#include "internal/cpuid_x86.h"
#include <stdbool.h>
#include <string.h>
static const Leaf kEmptyLeaf;
static Leaf SafeCpuId(uint32_t max_cpuid_leaf, uint32_t leaf_id) {
if (leaf_id <= max_cpuid_leaf) {
return CpuId(leaf_id);
} else {
return kEmptyLeaf;
}
}
#define MASK_XMM 0x2
#define MASK_YMM 0x4
#define MASK_MASKREG 0x20
#define MASK_ZMM0_15 0x40
#define MASK_ZMM16_31 0x80
static bool HasMask(uint32_t value, uint32_t mask) {
return (value & mask) == mask;
}
// Checks that operating system saves and restores xmm registers during context
// switches.
static bool HasXmmOsXSave(uint32_t xcr0_eax) {
return HasMask(xcr0_eax, MASK_XMM);
}
// Checks that operating system saves and restores ymm registers during context
// switches.
static bool HasYmmOsXSave(uint32_t xcr0_eax) {
return HasMask(xcr0_eax, MASK_XMM | MASK_YMM);
}
// Checks that operating system saves and restores zmm registers during context
// switches.
static bool HasZmmOsXSave(uint32_t xcr0_eax) {
return HasMask(xcr0_eax, MASK_XMM | MASK_YMM | MASK_MASKREG | MASK_ZMM0_15 |
MASK_ZMM16_31);
}
static void SetVendor(const Leaf leaf, char* const vendor) {
*(uint32_t*)(vendor) = leaf.ebx;
*(uint32_t*)(vendor + 4) = leaf.edx;
*(uint32_t*)(vendor + 8) = leaf.ecx;
vendor[12] = '\0';
}
static int IsVendor(const Leaf leaf, const char* const name) {
const uint32_t ebx = *(const uint32_t*)(name);
const uint32_t edx = *(const uint32_t*)(name + 4);
const uint32_t ecx = *(const uint32_t*)(name + 8);
return leaf.ebx == ebx && leaf.ecx == ecx && leaf.edx == edx;
}
// Reference https://en.wikipedia.org/wiki/CPUID.
static void ParseCpuId(const uint32_t max_cpuid_leaf, X86Info* info) {
const Leaf leaf_1 = SafeCpuId(max_cpuid_leaf, 1);
const Leaf leaf_7 = SafeCpuId(max_cpuid_leaf, 7);
const bool have_xsave = IsBitSet(leaf_1.ecx, 26);
const bool have_osxsave = IsBitSet(leaf_1.ecx, 27);
const uint32_t xcr0_eax = (have_xsave && have_osxsave) ? GetXCR0Eax() : 0;
const bool have_sse_os_support = HasXmmOsXSave(xcr0_eax);
const bool have_avx_os_support = HasYmmOsXSave(xcr0_eax);
const bool have_avx512_os_support = HasZmmOsXSave(xcr0_eax);
const uint32_t family = ExtractBitRange(leaf_1.eax, 11, 8);
const uint32_t extended_family = ExtractBitRange(leaf_1.eax, 27, 20);
const uint32_t model = ExtractBitRange(leaf_1.eax, 7, 4);
const uint32_t extended_model = ExtractBitRange(leaf_1.eax, 19, 16);
X86Features* const features = &info->features;
info->family = extended_family + family;
info->model = (extended_model << 4) + model;
info->stepping = ExtractBitRange(leaf_1.eax, 3, 0);
features->smx = IsBitSet(leaf_1.ecx, 6);
features->cx16 = IsBitSet(leaf_1.ecx, 13);
features->aes = IsBitSet(leaf_1.ecx, 25);
features->f16c = IsBitSet(leaf_1.ecx, 29);
features->sgx = IsBitSet(leaf_7.ebx, 2);
features->bmi1 = IsBitSet(leaf_7.ebx, 3);
features->bmi2 = IsBitSet(leaf_7.ebx, 8);
features->erms = IsBitSet(leaf_7.ebx, 9);
features->vpclmulqdq = IsBitSet(leaf_7.ecx, 10);
if (have_sse_os_support) {
features->ssse3 = IsBitSet(leaf_1.ecx, 9);
features->sse4_1 = IsBitSet(leaf_1.ecx, 19);
features->sse4_2 = IsBitSet(leaf_1.ecx, 20);
}
if (have_avx_os_support) {
features->fma3 = IsBitSet(leaf_1.ecx, 12);
features->avx = IsBitSet(leaf_1.ecx, 28);
features->avx2 = IsBitSet(leaf_7.ebx, 5);
}
if (have_avx512_os_support) {
features->avx512f = IsBitSet(leaf_7.ebx, 16);
features->avx512cd = IsBitSet(leaf_7.ebx, 28);
features->avx512er = IsBitSet(leaf_7.ebx, 27);
features->avx512pf = IsBitSet(leaf_7.ebx, 26);
features->avx512bw = IsBitSet(leaf_7.ebx, 30);
features->avx512dq = IsBitSet(leaf_7.ebx, 17);
features->avx512vl = IsBitSet(leaf_7.ebx, 31);
features->avx512ifma = IsBitSet(leaf_7.ebx, 21);
features->avx512vbmi = IsBitSet(leaf_7.ecx, 1);
features->avx512vbmi2 = IsBitSet(leaf_7.ecx, 6);
features->avx512vnni = IsBitSet(leaf_7.ecx, 11);
features->avx512bitalg = IsBitSet(leaf_7.ecx, 12);
features->avx512vpopcntdq = IsBitSet(leaf_7.ecx, 14);
features->avx512_4vnniw = IsBitSet(leaf_7.edx, 2);
features->avx512_4vbmi2 = IsBitSet(leaf_7.edx, 3);
}
}
static const X86Info kEmptyX86Info;
X86Info GetX86Info(void) {
X86Info info = kEmptyX86Info;
const Leaf leaf_0 = CpuId(0);
const uint32_t max_cpuid_leaf = leaf_0.eax;
SetVendor(leaf_0, info.vendor);
if (IsVendor(leaf_0, "GenuineIntel") || IsVendor(leaf_0, "AuthenticAMD")) {
ParseCpuId(max_cpuid_leaf, &info);
}
return info;
}
#define CPUID(FAMILY, MODEL) (((FAMILY & 0xFF) << 8) | (MODEL & 0xFF))
X86Microarchitecture GetX86Microarchitecture(const X86Info* info) {
if (memcmp(info->vendor, "GenuineIntel", sizeof(info->vendor)) == 0) {
switch (CPUID(info->family, info->model)) {
case CPUID(0x06, 0x35):
case CPUID(0x06, 0x36):
// https://en.wikipedia.org/wiki/Bonnell_(microarchitecture)
return INTEL_ATOM_BNL;
case CPUID(0x06, 0x37):
case CPUID(0x06, 0x4C):
// https://en.wikipedia.org/wiki/Silvermont
return INTEL_ATOM_SMT;
case CPUID(0x06, 0x5C):
// https://en.wikipedia.org/wiki/Goldmont
return INTEL_ATOM_GMT;
case CPUID(0x06, 0x0F):
case CPUID(0x06, 0x16):
// https://en.wikipedia.org/wiki/Intel_Core_(microarchitecture)
return INTEL_CORE;
case CPUID(0x06, 0x17):
case CPUID(0x06, 0x1D):
// https://en.wikipedia.org/wiki/Penryn_(microarchitecture)
return INTEL_PNR;
case CPUID(0x06, 0x1A):
case CPUID(0x06, 0x1E):
case CPUID(0x06, 0x1F):
case CPUID(0x06, 0x2E):
// https://en.wikipedia.org/wiki/Nehalem_(microarchitecture)
return INTEL_NHM;
case CPUID(0x06, 0x25):
case CPUID(0x06, 0x2C):
case CPUID(0x06, 0x2F):
// https://en.wikipedia.org/wiki/Westmere_(microarchitecture)
return INTEL_WSM;
case CPUID(0x06, 0x2A):
case CPUID(0x06, 0x2D):
// https://en.wikipedia.org/wiki/Sandy_Bridge#Models_and_steppings
return INTEL_SNB;
case CPUID(0x06, 0x3A):
case CPUID(0x06, 0x3E):
// https://en.wikipedia.org/wiki/Ivy_Bridge_(microarchitecture)#Models_and_steppings
return INTEL_IVB;
case CPUID(0x06, 0x3C):
case CPUID(0x06, 0x3F):
case CPUID(0x06, 0x45):
case CPUID(0x06, 0x46):
// https://en.wikipedia.org/wiki/Haswell_(microarchitecture)
return INTEL_HSW;
case CPUID(0x06, 0x3D):
case CPUID(0x06, 0x47):
case CPUID(0x06, 0x4F):
case CPUID(0x06, 0x56):
// https://en.wikipedia.org/wiki/Broadwell_(microarchitecture)
return INTEL_BDW;
case CPUID(0x06, 0x4E):
case CPUID(0x06, 0x55):
case CPUID(0x06, 0x5E):
// https://en.wikipedia.org/wiki/Skylake_(microarchitecture)
return INTEL_SKL;
case CPUID(0x06, 0x8E):
case CPUID(0x06, 0x9E):
// https://en.wikipedia.org/wiki/Kaby_Lake
return INTEL_KBL;
default:
return X86_UNKNOWN;
}
}
if (memcmp(info->vendor, "AuthenticAMD", sizeof(info->vendor)) == 0) {
switch (info->family) {
// https://en.wikipedia.org/wiki/List_of_AMD_CPU_microarchitectures
case 0x0F:
return AMD_HAMMER;
case 0x10:
return AMD_K10;
case 0x14:
return AMD_BOBCAT;
case 0x15:
return AMD_BULLDOZER;
case 0x16:
return AMD_JAGUAR;
case 0x17:
return AMD_ZEN;
default:
return X86_UNKNOWN;
}
}
return X86_UNKNOWN;
}
static void SetString(const uint32_t max_cpuid_ext_leaf, const uint32_t leaf_id,
char* buffer) {
const Leaf leaf = SafeCpuId(max_cpuid_ext_leaf, leaf_id);
// We allow calling memcpy from SetString which is only called when requesting
// X86BrandString.
memcpy(buffer, &leaf, sizeof(Leaf));
}
void FillX86BrandString(char brand_string[49]) {
const Leaf leaf_ext_0 = CpuId(0x80000000);
const uint32_t max_cpuid_leaf_ext = leaf_ext_0.eax;
SetString(max_cpuid_leaf_ext, 0x80000002, brand_string);
SetString(max_cpuid_leaf_ext, 0x80000003, brand_string + 16);
SetString(max_cpuid_leaf_ext, 0x80000004, brand_string + 32);
brand_string[48] = '\0';
}
////////////////////////////////////////////////////////////////////////////////
// Introspection functions
int GetX86FeaturesEnumValue(const X86Features* features,
X86FeaturesEnum value) {
switch (value) {
case X86_AES:
return features->aes;
case X86_ERMS:
return features->erms;
case X86_F16C:
return features->f16c;
case X86_FMA3:
return features->fma3;
case X86_VPCLMULQDQ:
return features->vpclmulqdq;
case X86_BMI1:
return features->bmi1;
case X86_BMI2:
return features->bmi2;
case X86_SSSE3:
return features->ssse3;
case X86_SSE4_1:
return features->sse4_1;
case X86_SSE4_2:
return features->sse4_2;
case X86_AVX:
return features->avx;
case X86_AVX2:
return features->avx2;
case X86_AVX512F:
return features->avx512f;
case X86_AVX512CD:
return features->avx512cd;
case X86_AVX512ER:
return features->avx512er;
case X86_AVX512PF:
return features->avx512pf;
case X86_AVX512BW:
return features->avx512bw;
case X86_AVX512DQ:
return features->avx512dq;
case X86_AVX512VL:
return features->avx512vl;
case X86_AVX512IFMA:
return features->avx512ifma;
case X86_AVX512VBMI:
return features->avx512vbmi;
case X86_AVX512VBMI2:
return features->avx512vbmi2;
case X86_AVX512VNNI:
return features->avx512vnni;
case X86_AVX512BITALG:
return features->avx512bitalg;
case X86_AVX512VPOPCNTDQ:
return features->avx512vpopcntdq;
case X86_AVX512_4VNNIW:
return features->avx512_4vnniw;
case X86_AVX512_4VBMI2:
return features->avx512_4vbmi2;
case X86_SMX:
return features->smx;
case X86_SGX:
return features->sgx;
case X86_CX16:
return features->cx16;
case X86_LAST_:
break;
}
return false;
}
const char* GetX86FeaturesEnumName(X86FeaturesEnum value) {
switch (value) {
case X86_AES:
return "aes";
case X86_ERMS:
return "erms";
case X86_F16C:
return "f16c";
case X86_FMA3:
return "fma3";
case X86_VPCLMULQDQ:
return "vpclmulqdq";
case X86_BMI1:
return "bmi1";
case X86_BMI2:
return "bmi2";
case X86_SSSE3:
return "ssse3";
case X86_SSE4_1:
return "sse4_1";
case X86_SSE4_2:
return "sse4_2";
case X86_AVX:
return "avx";
case X86_AVX2:
return "avx2";
case X86_AVX512F:
return "avx512f";
case X86_AVX512CD:
return "avx512cd";
case X86_AVX512ER:
return "avx512er";
case X86_AVX512PF:
return "avx512pf";
case X86_AVX512BW:
return "avx512bw";
case X86_AVX512DQ:
return "avx512dq";
case X86_AVX512VL:
return "avx512vl";
case X86_AVX512IFMA:
return "avx512ifma";
case X86_AVX512VBMI:
return "avx512vbmi";
case X86_AVX512VBMI2:
return "avx512vbmi2";
case X86_AVX512VNNI:
return "avx512vnni";
case X86_AVX512BITALG:
return "avx512bitalg";
case X86_AVX512VPOPCNTDQ:
return "avx512vpopcntdq";
case X86_AVX512_4VNNIW:
return "avx512_4vnniw";
case X86_AVX512_4VBMI2:
return "avx512_4vbmi2";
case X86_SMX:
return "smx";
case X86_SGX:
return "sgx";
case X86_CX16:
return "cx16";
case X86_LAST_:
break;
}
return "unknown_feature";
}
const char* GetX86MicroarchitectureName(X86Microarchitecture uarch) {
switch (uarch) {
case X86_UNKNOWN:
return "X86_UNKNOWN";
case INTEL_CORE:
return "INTEL_CORE";
case INTEL_PNR:
return "INTEL_PNR";
case INTEL_NHM:
return "INTEL_NHM";
case INTEL_ATOM_BNL:
return "INTEL_ATOM_BNL";
case INTEL_WSM:
return "INTEL_WSM";
case INTEL_SNB:
return "INTEL_SNB";
case INTEL_IVB:
return "INTEL_IVB";
case INTEL_ATOM_SMT:
return "INTEL_ATOM_SMT";
case INTEL_HSW:
return "INTEL_HSW";
case INTEL_BDW:
return "INTEL_BDW";
case INTEL_SKL:
return "INTEL_SKL";
case INTEL_ATOM_GMT:
return "INTEL_ATOM_GMT";
case INTEL_KBL:
return "INTEL_KBL";
case INTEL_CFL:
return "INTEL_CFL";
case INTEL_CNL:
return "INTEL_CNL";
case AMD_HAMMER:
return "AMD_HAMMER";
case AMD_K10:
return "AMD_K10";
case AMD_BOBCAT:
return "AMD_BOBCAT";
case AMD_BULLDOZER:
return "AMD_BULLDOZER";
case AMD_JAGUAR:
return "AMD_JAGUAR";
case AMD_ZEN:
return "AMD_ZEN";
}
return "unknown microarchitecture";
}

View file

@ -0,0 +1,57 @@
// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "internal/filesystem.h"
#include <errno.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/types.h>
#if defined(_MSC_VER)
#include <io.h>
int CpuFeatures_OpenFile(const char* filename) {
return _open(filename, _O_RDONLY);
}
void CpuFeatures_CloseFile(int file_descriptor) { _close(file_descriptor); }
int CpuFeatures_ReadFile(int file_descriptor, void* buffer,
size_t buffer_size) {
return _read(file_descriptor, buffer, buffer_size);
}
#else
#include <unistd.h>
int CpuFeatures_OpenFile(const char* filename) {
int result;
do {
result = open(filename, O_RDONLY);
} while (result == -1L && errno == EINTR);
return result;
}
void CpuFeatures_CloseFile(int file_descriptor) { close(file_descriptor); }
int CpuFeatures_ReadFile(int file_descriptor, void* buffer,
size_t buffer_size) {
int result;
do {
result = read(file_descriptor, buffer, buffer_size);
} while (result == -1L && errno == EINTR);
return result;
}
#endif

View file

@ -0,0 +1,194 @@
// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <stdlib.h>
#include <string.h>
#include "cpu_features_macros.h"
#include "internal/filesystem.h"
#include "internal/hwcaps.h"
#include "internal/string_view.h"
#if defined(NDEBUG)
#define D(...)
#else
#include <stdio.h>
#define D(...) \
do { \
printf(__VA_ARGS__); \
fflush(stdout); \
} while (0)
#endif
#if defined(CPU_FEATURES_ARCH_MIPS) || defined(CPU_FEATURES_ARCH_ANY_ARM)
#define HWCAPS_ANDROID_MIPS_OR_ARM
#endif
#if defined(CPU_FEATURES_OS_LINUX_OR_ANDROID) && \
!defined(HWCAPS_ANDROID_MIPS_OR_ARM)
#define HWCAPS_REGULAR_LINUX
#endif
#if defined(HWCAPS_ANDROID_MIPS_OR_ARM) || defined(HWCAPS_REGULAR_LINUX)
#define HWCAPS_SUPPORTED
#endif
////////////////////////////////////////////////////////////////////////////////
// Implementation of GetElfHwcapFromGetauxval
////////////////////////////////////////////////////////////////////////////////
// On Linux we simply use getauxval.
#if defined(HWCAPS_REGULAR_LINUX)
#include <dlfcn.h>
#include <sys/auxv.h>
static unsigned long GetElfHwcapFromGetauxval(uint32_t hwcap_type) {
return getauxval(hwcap_type);
}
#endif // defined(HWCAPS_REGULAR_LINUX)
// On Android we probe the system's C library for a 'getauxval' function and
// call it if it exits, or return 0 for failure. This function is available
// since API level 20.
//
// This code does *NOT* check for '__ANDROID_API__ >= 20' to support the edge
// case where some NDK developers use headers for a platform that is newer than
// the one really targetted by their application. This is typically done to use
// newer native APIs only when running on more recent Android versions, and
// requires careful symbol management.
//
// Note that getauxval() can't really be re-implemented here, because its
// implementation does not parse /proc/self/auxv. Instead it depends on values
// that are passed by the kernel at process-init time to the C runtime
// initialization layer.
#if defined(HWCAPS_ANDROID_MIPS_OR_ARM)
#include <dlfcn.h>
#define AT_HWCAP 16
#define AT_HWCAP2 26
#define AT_PLATFORM 15
#define AT_BASE_PLATFORM 24
typedef unsigned long getauxval_func_t(unsigned long);
static uint32_t GetElfHwcapFromGetauxval(uint32_t hwcap_type) {
uint32_t ret = 0;
void* libc_handle = NULL;
getauxval_func_t* func = NULL;
dlerror(); // Cleaning error state before calling dlopen.
libc_handle = dlopen("libc.so", RTLD_NOW);
if (!libc_handle) {
D("Could not dlopen() C library: %s\n", dlerror());
return 0;
}
func = (getauxval_func_t*)dlsym(libc_handle, "getauxval");
if (!func) {
D("Could not find getauxval() in C library\n");
} else {
// Note: getauxval() returns 0 on failure. Doesn't touch errno.
ret = (uint32_t)(*func)(hwcap_type);
}
dlclose(libc_handle);
return ret;
}
#endif // defined(HWCAPS_ANDROID_MIPS_OR_ARM)
#if defined(HWCAPS_SUPPORTED)
////////////////////////////////////////////////////////////////////////////////
// Implementation of GetHardwareCapabilities for Android and Linux
////////////////////////////////////////////////////////////////////////////////
// Fallback when getauxval is not available, retrieves hwcaps from
// "/proc/self/auxv".
static uint32_t GetElfHwcapFromProcSelfAuxv(uint32_t hwcap_type) {
struct {
uint32_t tag;
uint32_t value;
} entry;
uint32_t result = 0;
const char filepath[] = "/proc/self/auxv";
const int fd = CpuFeatures_OpenFile(filepath);
if (fd < 0) {
D("Could not open %s\n", filepath);
return 0;
}
for (;;) {
const int ret = CpuFeatures_ReadFile(fd, (char*)&entry, sizeof entry);
if (ret < 0) {
D("Error while reading %s\n", filepath);
break;
}
// Detect end of list.
if (ret == 0 || (entry.tag == 0 && entry.value == 0)) {
break;
}
if (entry.tag == hwcap_type) {
result = entry.value;
break;
}
}
CpuFeatures_CloseFile(fd);
return result;
}
// Retrieves hardware capabilities by first trying to call getauxval, if not
// available falls back to reading "/proc/self/auxv".
static unsigned long GetHardwareCapabilitiesFor(uint32_t type) {
unsigned long hwcaps = GetElfHwcapFromGetauxval(type);
if (!hwcaps) {
D("Parsing /proc/self/auxv to extract ELF hwcaps!\n");
hwcaps = GetElfHwcapFromProcSelfAuxv(type);
}
return hwcaps;
}
HardwareCapabilities CpuFeatures_GetHardwareCapabilities(void) {
HardwareCapabilities capabilities;
capabilities.hwcaps = GetHardwareCapabilitiesFor(AT_HWCAP);
capabilities.hwcaps2 = GetHardwareCapabilitiesFor(AT_HWCAP2);
return capabilities;
}
PlatformType kEmptyPlatformType;
PlatformType CpuFeatures_GetPlatformType(void) {
PlatformType type = kEmptyPlatformType;
char *platform = (char *)GetHardwareCapabilitiesFor(AT_PLATFORM);
char *base_platform = (char *)GetHardwareCapabilitiesFor(AT_BASE_PLATFORM);
if (platform != NULL)
CpuFeatures_StringView_CopyString(str(platform), type.platform,
sizeof(type.platform));
if (base_platform != NULL)
CpuFeatures_StringView_CopyString(str(base_platform), type.base_platform,
sizeof(type.base_platform));
return type;
}
#else // (defined(HWCAPS_SUPPORTED)
PlatformType kEmptyPlatformType;
PlatformType CpuFeatures_GetPlatformType(void) {
PlatformType type = kEmptyPlatformType;
return type;
}
////////////////////////////////////////////////////////////////////////////////
// Implementation of GetHardwareCapabilities for unsupported platforms.
////////////////////////////////////////////////////////////////////////////////
const HardwareCapabilities kEmptyHardwareCapabilities;
HardwareCapabilities CpuFeatures_GetHardwareCapabilities(void) {
return kEmptyHardwareCapabilities;
}
#endif

View file

@ -0,0 +1,51 @@
// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "internal/linux_features_aggregator.h"
#include "internal/string_view.h"
void CpuFeatures_SetFromFlags(const size_t configs_size,
const CapabilityConfig* configs,
const StringView flags_line,
void* const features) {
size_t i = 0;
for (; i < configs_size; ++i) {
const CapabilityConfig config = configs[i];
config.set_bit(features, CpuFeatures_StringView_HasWord(
flags_line, config.proc_cpuinfo_flag));
}
}
static bool IsSet(const uint32_t mask, const uint32_t value) {
return (value & mask) == mask;
}
static bool IsHwCapsSet(const HardwareCapabilities hwcaps_mask,
const HardwareCapabilities hwcaps) {
return IsSet(hwcaps_mask.hwcaps, hwcaps.hwcaps) &&
IsSet(hwcaps_mask.hwcaps2, hwcaps.hwcaps2);
}
void CpuFeatures_OverrideFromHwCaps(const size_t configs_size,
const CapabilityConfig* configs,
const HardwareCapabilities hwcaps,
void* const features) {
size_t i = 0;
for (; i < configs_size; ++i) {
const CapabilityConfig* config = &configs[i];
if (IsHwCapsSet(config->hwcaps_mask, hwcaps)) {
config->set_bit(features, true);
}
}
}

View file

@ -0,0 +1,131 @@
// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "internal/stack_line_reader.h"
#include "internal/filesystem.h"
#include <assert.h>
#include <errno.h>
#include <stdio.h>
void StackLineReader_Initialize(StackLineReader* reader, int fd) {
reader->view.ptr = reader->buffer;
reader->view.size = 0;
reader->skip_mode = false;
reader->fd = fd;
}
// Replaces the content of buffer with bytes from the file.
static int LoadFullBuffer(StackLineReader* reader) {
const int read = CpuFeatures_ReadFile(reader->fd, reader->buffer,
STACK_LINE_READER_BUFFER_SIZE);
assert(read >= 0);
reader->view.ptr = reader->buffer;
reader->view.size = read;
return read;
}
// Appends with bytes from the file to buffer, filling the remaining space.
static int LoadMore(StackLineReader* reader) {
char* const ptr = reader->buffer + reader->view.size;
const size_t size_to_read = STACK_LINE_READER_BUFFER_SIZE - reader->view.size;
const int read = CpuFeatures_ReadFile(reader->fd, ptr, size_to_read);
assert(read >= 0);
assert(read <= (int)size_to_read);
reader->view.size += read;
return read;
}
static int IndexOfEol(StackLineReader* reader) {
return CpuFeatures_StringView_IndexOfChar(reader->view, '\n');
}
// Relocate buffer's pending bytes at the beginning of the array and fills the
// remaining space with bytes from the file.
static int BringToFrontAndLoadMore(StackLineReader* reader) {
if (reader->view.size && reader->view.ptr != reader->buffer) {
memmove(reader->buffer, reader->view.ptr, reader->view.size);
}
reader->view.ptr = reader->buffer;
return LoadMore(reader);
}
// Loads chunks of buffer size from disks until it contains a newline character
// or end of file.
static void SkipToNextLine(StackLineReader* reader) {
for (;;) {
const int read = LoadFullBuffer(reader);
if (read == 0) {
break;
} else {
const int eol_index = IndexOfEol(reader);
if (eol_index >= 0) {
reader->view =
CpuFeatures_StringView_PopFront(reader->view, eol_index + 1);
break;
}
}
}
}
static LineResult CreateLineResult(bool eof, bool full_line, StringView view) {
LineResult result;
result.eof = eof;
result.full_line = full_line;
result.line = view;
return result;
}
// Helper methods to provide clearer semantic in StackLineReader_NextLine.
static LineResult CreateEOFLineResult(StringView view) {
return CreateLineResult(true, true, view);
}
static LineResult CreateTruncatedLineResult(StringView view) {
return CreateLineResult(false, false, view);
}
static LineResult CreateValidLineResult(StringView view) {
return CreateLineResult(false, true, view);
}
LineResult StackLineReader_NextLine(StackLineReader* reader) {
if (reader->skip_mode) {
SkipToNextLine(reader);
reader->skip_mode = false;
}
{
const bool can_load_more =
reader->view.size < STACK_LINE_READER_BUFFER_SIZE;
int eol_index = IndexOfEol(reader);
if (eol_index < 0 && can_load_more) {
const int read = BringToFrontAndLoadMore(reader);
if (read == 0) {
return CreateEOFLineResult(reader->view);
}
eol_index = IndexOfEol(reader);
}
if (eol_index < 0) {
reader->skip_mode = true;
return CreateTruncatedLineResult(reader->view);
}
{
StringView line =
CpuFeatures_StringView_KeepFront(reader->view, eol_index);
reader->view =
CpuFeatures_StringView_PopFront(reader->view, eol_index + 1);
return CreateValidLineResult(line);
}
}
}

View file

@ -0,0 +1,182 @@
// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "internal/string_view.h"
#include <assert.h>
#include <ctype.h>
#include <string.h>
int CpuFeatures_StringView_IndexOfChar(const StringView view, char c) {
if (view.ptr && view.size) {
const char* const found = (const char*)memchr(view.ptr, c, view.size);
if (found) {
return found - view.ptr;
}
}
return -1;
}
int CpuFeatures_StringView_IndexOf(const StringView view,
const StringView sub_view) {
if (sub_view.size) {
StringView remainder = view;
while (remainder.size >= sub_view.size) {
const int found_index =
CpuFeatures_StringView_IndexOfChar(remainder, sub_view.ptr[0]);
if (found_index < 0) break;
remainder = CpuFeatures_StringView_PopFront(remainder, found_index);
if (CpuFeatures_StringView_StartsWith(remainder, sub_view)) {
return remainder.ptr - view.ptr;
}
remainder = CpuFeatures_StringView_PopFront(remainder, 1);
}
}
return -1;
}
bool CpuFeatures_StringView_IsEquals(const StringView a, const StringView b) {
if (a.size == b.size) {
return a.ptr == b.ptr || memcmp(a.ptr, b.ptr, b.size) == 0;
}
return false;
}
bool CpuFeatures_StringView_StartsWith(const StringView a, const StringView b) {
return a.ptr && b.ptr && b.size && a.size >= b.size
? memcmp(a.ptr, b.ptr, b.size) == 0
: false;
}
StringView CpuFeatures_StringView_PopFront(const StringView str_view,
size_t count) {
if (count > str_view.size) {
return kEmptyStringView;
}
return view(str_view.ptr + count, str_view.size - count);
}
StringView CpuFeatures_StringView_PopBack(const StringView str_view,
size_t count) {
if (count > str_view.size) {
return kEmptyStringView;
}
return view(str_view.ptr, str_view.size - count);
}
StringView CpuFeatures_StringView_KeepFront(const StringView str_view,
size_t count) {
return count <= str_view.size ? view(str_view.ptr, count) : str_view;
}
char CpuFeatures_StringView_Front(const StringView view) {
assert(view.size);
assert(view.ptr);
return view.ptr[0];
}
char CpuFeatures_StringView_Back(const StringView view) {
assert(view.size);
return view.ptr[view.size - 1];
}
StringView CpuFeatures_StringView_TrimWhitespace(StringView view) {
while (view.size && isspace(CpuFeatures_StringView_Front(view)))
view = CpuFeatures_StringView_PopFront(view, 1);
while (view.size && isspace(CpuFeatures_StringView_Back(view)))
view = CpuFeatures_StringView_PopBack(view, 1);
return view;
}
static int HexValue(const char c) {
if (c >= '0' && c <= '9') return c - '0';
if (c >= 'a' && c <= 'f') return c - 'a' + 10;
if (c >= 'A' && c <= 'F') return c - 'A' + 10;
return -1;
}
// Returns -1 if view contains non digits.
static int ParsePositiveNumberWithBase(const StringView view, int base) {
int result = 0;
StringView remainder = view;
for (; remainder.size;
remainder = CpuFeatures_StringView_PopFront(remainder, 1)) {
const int value = HexValue(CpuFeatures_StringView_Front(remainder));
if (value < 0 || value >= base) return -1;
result = (result * base) + value;
}
return result;
}
int CpuFeatures_StringView_ParsePositiveNumber(const StringView view) {
if (view.size) {
const StringView hex_prefix = str("0x");
if (CpuFeatures_StringView_StartsWith(view, hex_prefix)) {
const StringView span_no_prefix =
CpuFeatures_StringView_PopFront(view, hex_prefix.size);
return ParsePositiveNumberWithBase(span_no_prefix, 16);
}
return ParsePositiveNumberWithBase(view, 10);
}
return -1;
}
void CpuFeatures_StringView_CopyString(const StringView src, char* dst,
size_t dst_size) {
if (dst_size > 0) {
const size_t max_copy_size = dst_size - 1;
const size_t copy_size =
src.size > max_copy_size ? max_copy_size : src.size;
memcpy(dst, src.ptr, copy_size);
dst[copy_size] = '\0';
}
}
bool CpuFeatures_StringView_HasWord(const StringView line,
const char* const word_str) {
const StringView word = str(word_str);
StringView remainder = line;
for (;;) {
const int index_of_word = CpuFeatures_StringView_IndexOf(remainder, word);
if (index_of_word < 0) {
return false;
} else {
const StringView before =
CpuFeatures_StringView_KeepFront(line, index_of_word);
const StringView after =
CpuFeatures_StringView_PopFront(line, index_of_word + word.size);
const bool valid_before =
before.size == 0 || CpuFeatures_StringView_Back(before) == ' ';
const bool valid_after =
after.size == 0 || CpuFeatures_StringView_Front(after) == ' ';
if (valid_before && valid_after) return true;
remainder =
CpuFeatures_StringView_PopFront(remainder, index_of_word + word.size);
}
}
return false;
}
bool CpuFeatures_StringView_GetAttributeKeyValue(const StringView line,
StringView* key,
StringView* value) {
const StringView sep = str(": ");
const int index_of_separator = CpuFeatures_StringView_IndexOf(line, sep);
if (index_of_separator < 0) return false;
*value = CpuFeatures_StringView_TrimWhitespace(
CpuFeatures_StringView_PopFront(line, index_of_separator + sep.size));
*key = CpuFeatures_StringView_TrimWhitespace(
CpuFeatures_StringView_KeepFront(line, index_of_separator));
return true;
}

View file

@ -0,0 +1,237 @@
// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "cpu_features_macros.h"
#include "cpuinfo_aarch64.h"
#include "cpuinfo_arm.h"
#include "cpuinfo_mips.h"
#include "cpuinfo_ppc.h"
#include "cpuinfo_x86.h"
static void PrintEscapedAscii(const char* str) {
putchar('"');
for (; str && *str; ++str) {
switch (*str) {
case '\"':
case '\\':
case '/':
case '\b':
case '\f':
case '\n':
case '\r':
case '\t':
putchar('\\');
}
putchar(*str);
}
putchar('"');
}
static void PrintVoid(void) {}
static void PrintComma(void) { putchar(','); }
static void PrintLineFeed(void) { putchar('\n'); }
static void PrintOpenBrace(void) { putchar('{'); }
static void PrintCloseBrace(void) { putchar('}'); }
static void PrintOpenBracket(void) { putchar('['); }
static void PrintCloseBracket(void) { putchar(']'); }
static void PrintString(const char* field) { printf("%s", field); }
static void PrintAlignedHeader(const char* field) { printf("%-15s : ", field); }
static void PrintIntValue(int value) { printf("%d", value); }
static void PrintDecHexValue(int value) {
printf("%3d (0x%02X)", value, value);
}
static void PrintJsonHeader(const char* field) {
PrintEscapedAscii(field);
putchar(':');
}
typedef struct {
void (*Start)(void);
void (*ArrayStart)(void);
void (*ArraySeparator)(void);
void (*ArrayEnd)(void);
void (*PrintString)(const char* value);
void (*PrintValue)(int value);
void (*EndField)(void);
void (*StartField)(const char* field);
void (*End)(void);
} Printer;
static Printer getJsonPrinter(void) {
return (Printer){
.Start = &PrintOpenBrace,
.ArrayStart = &PrintOpenBracket,
.ArraySeparator = &PrintComma,
.ArrayEnd = &PrintCloseBracket,
.PrintString = &PrintEscapedAscii,
.PrintValue = &PrintIntValue,
.EndField = &PrintComma,
.StartField = &PrintJsonHeader,
.End = &PrintCloseBrace,
};
}
static Printer getTextPrinter(void) {
return (Printer){
.Start = &PrintVoid,
.ArrayStart = &PrintVoid,
.ArraySeparator = &PrintComma,
.ArrayEnd = &PrintVoid,
.PrintString = &PrintString,
.PrintValue = &PrintDecHexValue,
.EndField = &PrintLineFeed,
.StartField = &PrintAlignedHeader,
.End = &PrintVoid,
};
}
// Prints a named numeric value in both decimal and hexadecimal.
static void PrintN(const Printer p, const char* field, int value) {
p.StartField(field);
p.PrintValue(value);
p.EndField();
}
// Prints a named string.
static void PrintS(const Printer p, const char* field, const char* value) {
p.StartField(field);
p.PrintString(value);
p.EndField();
}
static int cmp(const void* p1, const void* p2) {
return strcmp(*(const char* const*)p1, *(const char* const*)p2);
}
#define DEFINE_PRINT_FLAGS(HasFeature, FeatureName, FeatureType, LastEnum) \
static void PrintFlags(const Printer p, const FeatureType* features) { \
size_t i; \
const char* ptrs[LastEnum] = {0}; \
size_t count = 0; \
for (i = 0; i < LastEnum; ++i) { \
if (HasFeature(features, i)) { \
ptrs[count] = FeatureName(i); \
++count; \
} \
} \
qsort(ptrs, count, sizeof(char*), cmp); \
p.StartField("flags"); \
p.ArrayStart(); \
for (i = 0; i < count; ++i) { \
if (i > 0) p.ArraySeparator(); \
p.PrintString(ptrs[i]); \
} \
p.ArrayEnd(); \
}
#if defined(CPU_FEATURES_ARCH_X86)
DEFINE_PRINT_FLAGS(GetX86FeaturesEnumValue, GetX86FeaturesEnumName, X86Features,
X86_LAST_)
#elif defined(CPU_FEATURES_ARCH_ARM)
DEFINE_PRINT_FLAGS(GetArmFeaturesEnumValue, GetArmFeaturesEnumName, ArmFeatures,
ARM_LAST_)
#elif defined(CPU_FEATURES_ARCH_AARCH64)
DEFINE_PRINT_FLAGS(GetAarch64FeaturesEnumValue, GetAarch64FeaturesEnumName,
Aarch64Features, AARCH64_LAST_)
#elif defined(CPU_FEATURES_ARCH_MIPS)
DEFINE_PRINT_FLAGS(GetMipsFeaturesEnumValue, GetMipsFeaturesEnumName,
MipsFeatures, MIPS_LAST_)
#elif defined(CPU_FEATURES_ARCH_PPC)
DEFINE_PRINT_FLAGS(GetPPCFeaturesEnumValue, GetPPCFeaturesEnumName, PPCFeatures,
PPC_LAST_)
#endif
static void PrintFeatures(const Printer printer) {
#if defined(CPU_FEATURES_ARCH_X86)
char brand_string[49];
const X86Info info = GetX86Info();
FillX86BrandString(brand_string);
PrintS(printer, "arch", "x86");
PrintS(printer, "brand", brand_string);
PrintN(printer, "family", info.family);
PrintN(printer, "model", info.model);
PrintN(printer, "stepping", info.stepping);
PrintS(printer, "uarch",
GetX86MicroarchitectureName(GetX86Microarchitecture(&info)));
PrintFlags(printer, &info.features);
#elif defined(CPU_FEATURES_ARCH_ARM)
const ArmInfo info = GetArmInfo();
PrintS(printer, "arch", "ARM");
PrintN(printer, "implementer", info.implementer);
PrintN(printer, "architecture", info.architecture);
PrintN(printer, "variant", info.variant);
PrintN(printer, "part", info.part);
PrintN(printer, "revision", info.revision);
PrintFlags(printer, &info.features);
#elif defined(CPU_FEATURES_ARCH_AARCH64)
const Aarch64Info info = GetAarch64Info();
PrintS(printer, "arch", "aarch64");
PrintN(printer, "implementer", info.implementer);
PrintN(printer, "variant", info.variant);
PrintN(printer, "part", info.part);
PrintN(printer, "revision", info.revision);
PrintFlags(printer, &info.features);
#elif defined(CPU_FEATURES_ARCH_MIPS)
const MipsInfo info = GetMipsInfo();
PrintS(printer, "arch", "mips");
PrintFlags(printer, &info.features);
#elif defined(CPU_FEATURES_ARCH_PPC)
const PPCInfo info = GetPPCInfo();
const PPCPlatformStrings strings = GetPPCPlatformStrings();
PrintS(printer, "arch", "ppc");
PrintS(printer, "platform", strings.platform);
PrintS(printer, "model", strings.model);
PrintS(printer, "machine", strings.machine);
PrintS(printer, "cpu", strings.cpu);
PrintS(printer, "instruction set", strings.type.platform);
PrintS(printer, "microarchitecture", strings.type.base_platform);
PrintFlags(printer, &info.features);
#endif
}
static void showUsage(const char* name) {
printf(
"\n"
"Usage: %s [options]\n"
" Options:\n"
" -h | --help Show help message.\n"
" -j | --json Format output as json instead of plain text.\n"
"\n",
name);
}
int main(int argc, char** argv) {
Printer printer = getTextPrinter();
int i = 1;
for (; i < argc; ++i) {
const char* arg = argv[i];
if (strcmp(arg, "-j") == 0 || strcmp(arg, "--json") == 0) {
printer = getJsonPrinter();
} else {
showUsage(argv[0]);
if (strcmp(arg, "-h") == 0 || strcmp(arg, "--help") == 0)
return EXIT_SUCCESS;
return EXIT_FAILURE;
}
}
printer.Start();
PrintFeatures(printer);
printer.End();
PrintLineFeed();
return EXIT_SUCCESS;
}

View file

@ -0,0 +1,79 @@
#
# libraries for tests
#
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF) # prefer use of -std11 instead of -gnustd11
include_directories(../include)
add_definitions(-DCPU_FEATURES_TEST)
##------------------------------------------------------------------------------
add_library(string_view ../src/string_view.c)
##------------------------------------------------------------------------------
add_library(filesystem_for_testing filesystem_for_testing.cc)
##------------------------------------------------------------------------------
add_library(hwcaps_for_testing hwcaps_for_testing.cc)
target_link_libraries(hwcaps_for_testing filesystem_for_testing)
##------------------------------------------------------------------------------
add_library(stack_line_reader ../src/stack_line_reader.c)
target_compile_definitions(stack_line_reader PUBLIC STACK_LINE_READER_BUFFER_SIZE=1024)
target_link_libraries(stack_line_reader string_view)
##------------------------------------------------------------------------------
add_library(stack_line_reader_for_test ../src/stack_line_reader.c)
target_compile_definitions(stack_line_reader_for_test PUBLIC STACK_LINE_READER_BUFFER_SIZE=16)
target_link_libraries(stack_line_reader_for_test string_view filesystem_for_testing)
##------------------------------------------------------------------------------
add_library(all_libraries ../src/stack_line_reader.c ../src/linux_features_aggregator.c)
target_link_libraries(all_libraries hwcaps_for_testing stack_line_reader string_view)
#
# tests
#
link_libraries(gtest gmock_main)
## bit_utils_test
add_executable(bit_utils_test bit_utils_test.cc)
target_link_libraries(bit_utils_test)
add_test(NAME bit_utils_test COMMAND bit_utils_test)
##------------------------------------------------------------------------------
## string_view_test
add_executable(string_view_test string_view_test.cc ../src/string_view.c)
target_link_libraries(string_view_test string_view)
add_test(NAME string_view_test COMMAND string_view_test)
##------------------------------------------------------------------------------
## stack_line_reader_test
add_executable(stack_line_reader_test stack_line_reader_test.cc)
target_link_libraries(stack_line_reader_test stack_line_reader_for_test)
add_test(NAME stack_line_reader_test COMMAND stack_line_reader_test)
##------------------------------------------------------------------------------
## linux_features_aggregator_test
add_executable(linux_features_aggregator_test linux_features_aggregator_test.cc)
target_link_libraries(linux_features_aggregator_test all_libraries)
add_test(NAME linux_features_aggregator_test COMMAND linux_features_aggregator_test)
##------------------------------------------------------------------------------
## cpuinfo_x86_test
add_executable(cpuinfo_x86_test cpuinfo_x86_test.cc ../src/cpuinfo_x86.c)
target_link_libraries(cpuinfo_x86_test all_libraries)
add_test(NAME cpuinfo_x86_test COMMAND cpuinfo_x86_test)
##------------------------------------------------------------------------------
## cpuinfo_arm_test
add_executable(cpuinfo_arm_test cpuinfo_arm_test.cc ../src/cpuinfo_arm.c)
target_link_libraries(cpuinfo_arm_test all_libraries)
add_test(NAME cpuinfo_arm_test COMMAND cpuinfo_arm_test)
##------------------------------------------------------------------------------
## cpuinfo_aarch64_test
add_executable(cpuinfo_aarch64_test cpuinfo_aarch64_test.cc ../src/cpuinfo_aarch64.c)
target_link_libraries(cpuinfo_aarch64_test all_libraries)
add_test(NAME cpuinfo_aarch64_test COMMAND cpuinfo_aarch64_test)
##------------------------------------------------------------------------------
## cpuinfo_mips_test
add_executable(cpuinfo_mips_test cpuinfo_mips_test.cc ../src/cpuinfo_mips.c)
target_link_libraries(cpuinfo_mips_test all_libraries)
add_test(NAME cpuinfo_mips_test COMMAND cpuinfo_mips_test)
##------------------------------------------------------------------------------
## cpuinfo_ppc_test
add_executable(cpuinfo_ppc_test cpuinfo_ppc_test.cc ../src/cpuinfo_ppc.c)
target_link_libraries(cpuinfo_ppc_test all_libraries)
add_test(NAME cpuinfo_ppc_test COMMAND cpuinfo_ppc_test)

View file

@ -0,0 +1,53 @@
// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "internal/bit_utils.h"
#include "gtest/gtest.h"
namespace cpu_features {
namespace {
TEST(UtilsTest, IsBitSet) {
for (size_t bit_set = 0; bit_set < 32; ++bit_set) {
const uint32_t value = 1UL << bit_set;
for (size_t i = 0; i < 32; ++i) {
EXPECT_EQ(IsBitSet(value, i), i == bit_set);
}
}
// testing 0, all bits should be 0.
for (size_t i = 0; i < 32; ++i) {
EXPECT_FALSE(IsBitSet(0, i));
}
// testing ~0, all bits should be 1.
for (size_t i = 0; i < 32; ++i) {
EXPECT_TRUE(IsBitSet(-1, i));
}
}
TEST(UtilsTest, ExtractBitRange) {
// Extracting all bits gives the same number.
EXPECT_EQ(ExtractBitRange(123, 31, 0), 123);
// Extracting 1 bit gives parity.
EXPECT_EQ(ExtractBitRange(123, 0, 0), 1);
EXPECT_EQ(ExtractBitRange(122, 0, 0), 0);
EXPECT_EQ(ExtractBitRange(0xF0, 7, 4), 0xF);
EXPECT_EQ(ExtractBitRange(0x42 << 2, 10, 2), 0x42);
}
} // namespace
} // namespace cpu_features

View file

@ -0,0 +1,74 @@
// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "cpuinfo_aarch64.h"
#include "filesystem_for_testing.h"
#include "hwcaps_for_testing.h"
#include "gtest/gtest.h"
namespace cpu_features {
namespace {
void DisableHardwareCapabilities() { SetHardwareCapabilities(0, 0); }
TEST(CpuinfoAarch64Test, FromHardwareCap) {
SetHardwareCapabilities(AARCH64_HWCAP_FP | AARCH64_HWCAP_AES, 0);
GetEmptyFilesystem(); // disabling /proc/cpuinfo
const auto info = GetAarch64Info();
EXPECT_TRUE(info.features.fp);
EXPECT_FALSE(info.features.asimd);
EXPECT_TRUE(info.features.aes);
EXPECT_FALSE(info.features.pmull);
EXPECT_FALSE(info.features.sha1);
EXPECT_FALSE(info.features.sha2);
EXPECT_FALSE(info.features.crc32);
}
TEST(CpuinfoAarch64Test, ARMCortexA53) {
DisableHardwareCapabilities();
auto& fs = GetEmptyFilesystem();
fs.CreateFile("/proc/cpuinfo",
R"(Processor : AArch64 Processor rev 3 (aarch64)
processor : 0
processor : 1
processor : 2
processor : 3
processor : 4
processor : 5
processor : 6
processor : 7
Features : fp asimd evtstrm aes pmull sha1 sha2 crc32
CPU implementer : 0x41
CPU architecture: AArch64
CPU variant : 0x0
CPU part : 0xd03
CPU revision : 3)");
const auto info = GetAarch64Info();
EXPECT_EQ(info.implementer, 0x41);
EXPECT_EQ(info.variant, 0x0);
EXPECT_EQ(info.part, 0xd03);
EXPECT_EQ(info.revision, 3);
EXPECT_TRUE(info.features.fp);
EXPECT_TRUE(info.features.asimd);
EXPECT_TRUE(info.features.aes);
EXPECT_TRUE(info.features.pmull);
EXPECT_TRUE(info.features.sha1);
EXPECT_TRUE(info.features.sha2);
EXPECT_TRUE(info.features.crc32);
}
} // namespace
} // namespace cpu_features

View file

@ -0,0 +1,182 @@
// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "cpuinfo_arm.h"
#include "filesystem_for_testing.h"
#include "hwcaps_for_testing.h"
#include "gtest/gtest.h"
namespace cpu_features {
namespace {
void DisableHardwareCapabilities() { SetHardwareCapabilities(0, 0); }
TEST(CpuinfoArmTest, FromHardwareCap) {
SetHardwareCapabilities(ARM_HWCAP_NEON, ARM_HWCAP2_AES | ARM_HWCAP2_CRC32);
GetEmptyFilesystem(); // disabling /proc/cpuinfo
const auto info = GetArmInfo();
EXPECT_TRUE(info.features.vfp); // triggered by vfpv3
EXPECT_TRUE(info.features.vfpv3); // triggered by neon
EXPECT_TRUE(info.features.neon);
EXPECT_TRUE(info.features.aes);
EXPECT_TRUE(info.features.crc32);
EXPECT_FALSE(info.features.vfpv4);
EXPECT_FALSE(info.features.iwmmxt);
EXPECT_FALSE(info.features.vfpv3d16);
EXPECT_FALSE(info.features.idiva);
EXPECT_FALSE(info.features.idivt);
EXPECT_FALSE(info.features.pmull);
EXPECT_FALSE(info.features.sha1);
EXPECT_FALSE(info.features.sha2);
}
TEST(CpuinfoArmTest, ODroidFromCpuInfo) {
DisableHardwareCapabilities();
auto& fs = GetEmptyFilesystem();
fs.CreateFile("/proc/cpuinfo", R"(processor : 0
model name : ARMv7 Processor rev 3 (v71)
BogoMIPS : 120.00
Features : half thumb fastmult vfp edsp neon vfpv3 tls vfpv4 idiva idivt vfpd32 lpae
CPU implementer : 0x41
CPU architecture: 7
CPU variant : 0x2
CPU part : 0xc0f
CPU revision : 3)");
const auto info = GetArmInfo();
EXPECT_EQ(info.implementer, 0x41);
EXPECT_EQ(info.variant, 0x2);
EXPECT_EQ(info.part, 0xc0f);
EXPECT_EQ(info.revision, 3);
EXPECT_EQ(info.architecture, 7);
EXPECT_TRUE(info.features.vfp);
EXPECT_FALSE(info.features.iwmmxt);
EXPECT_TRUE(info.features.neon);
EXPECT_TRUE(info.features.vfpv3);
EXPECT_FALSE(info.features.vfpv3d16);
EXPECT_TRUE(info.features.vfpv4);
EXPECT_TRUE(info.features.idiva);
EXPECT_TRUE(info.features.idivt);
EXPECT_FALSE(info.features.aes);
EXPECT_FALSE(info.features.pmull);
EXPECT_FALSE(info.features.sha1);
EXPECT_FALSE(info.features.sha2);
EXPECT_FALSE(info.features.crc32);
}
// http://code.google.com/p/android/issues/detail?id=10812
TEST(CpuinfoArmTest, InvalidArmv7) {
DisableHardwareCapabilities();
auto& fs = GetEmptyFilesystem();
fs.CreateFile("/proc/cpuinfo",
R"(Processor : ARMv6-compatible processor rev 6 (v6l)
BogoMIPS : 199.47
Features : swp half thumb fastmult vfp edsp java
CPU implementer : 0x41
CPU architecture: 7
CPU variant : 0x0
CPU part : 0xb76
CPU revision : 6
Hardware : SPICA
Revision : 0020
Serial : 33323613546d00ec )");
const auto info = GetArmInfo();
EXPECT_EQ(info.architecture, 6);
}
// https://crbug.com/341598.
TEST(CpuinfoArmTest, InvalidNeon) {
auto& fs = GetEmptyFilesystem();
fs.CreateFile("/proc/cpuinfo",
R"(Processor: ARMv7 Processory rev 0 (v71)
processor: 0
BogoMIPS: 13.50
Processor: 1
BogoMIPS: 13.50
Features: swp half thumb fastmult vfp edsp neon vfpv3 tls vfpv4 idiva idivt
CPU implementer : 0x51
CPU architecture: 7
CPU variant: 0x1
CPU part: 0x04d
CPU revision: 0
Hardware: SAMSUNG M2
Revision: 0010
Serial: 00001e030000354e)");
const auto info = GetArmInfo();
EXPECT_FALSE(info.features.neon);
}
// The Nexus 4 (Qualcomm Krait) kernel configuration forgets to report IDIV
// support.
TEST(CpuinfoArmTest, Nexus4_0x510006f2) {
DisableHardwareCapabilities();
auto& fs = GetEmptyFilesystem();
fs.CreateFile("/proc/cpuinfo",
R"(CPU implementer : 0x51
CPU architecture: 7
CPU variant : 0x0
CPU part : 0x6f
CPU revision : 2)");
const auto info = GetArmInfo();
EXPECT_TRUE(info.features.idiva);
EXPECT_TRUE(info.features.idivt);
}
// The Nexus 4 (Qualcomm Krait) kernel configuration forgets to report IDIV
// support.
TEST(CpuinfoArmTest, Nexus4_0x510006f3) {
DisableHardwareCapabilities();
auto& fs = GetEmptyFilesystem();
fs.CreateFile("/proc/cpuinfo",
R"(CPU implementer : 0x51
CPU architecture: 7
CPU variant : 0x0
CPU part : 0x6f
CPU revision : 3)");
const auto info = GetArmInfo();
EXPECT_TRUE(info.features.idiva);
EXPECT_TRUE(info.features.idivt);
}
// The emulator-specific Android 4.2 kernel fails to report support for the
// 32-bit ARM IDIV instruction. Technically, this is a feature of the virtual
// CPU implemented by the emulator.
TEST(CpuinfoArmTest, EmulatorSpecificIdiv) {
DisableHardwareCapabilities();
auto& fs = GetEmptyFilesystem();
fs.CreateFile("/proc/cpuinfo",
R"(Processor : ARMv7 Processor rev 0 (v7l)
BogoMIPS : 629.14
Features : swp half thumb fastmult vfp edsp neon vfpv3
CPU implementer : 0x41
CPU architecture: 7
CPU variant : 0x0
CPU part : 0xc08
CPU revision : 0
Hardware : Goldfish
Revision : 0000
Serial : 0000000000000000)");
const auto info = GetArmInfo();
EXPECT_TRUE(info.features.idiva);
}
} // namespace
} // namespace cpu_features

View file

@ -0,0 +1,125 @@
// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "cpuinfo_mips.h"
#include "filesystem_for_testing.h"
#include "hwcaps_for_testing.h"
#include "internal/stack_line_reader.h"
#include "internal/string_view.h"
#include "gtest/gtest.h"
namespace cpu_features {
namespace {
void DisableHardwareCapabilities() { SetHardwareCapabilities(0, 0); }
TEST(CpuinfoMipsTest, FromHardwareCapBoth) {
SetHardwareCapabilities(MIPS_HWCAP_EVA | MIPS_HWCAP_MSA, 0);
GetEmptyFilesystem(); // disabling /proc/cpuinfo
const auto info = GetMipsInfo();
EXPECT_TRUE(info.features.msa);
EXPECT_TRUE(info.features.eva);
}
TEST(CpuinfoMipsTest, FromHardwareCapOnlyOne) {
SetHardwareCapabilities(MIPS_HWCAP_MSA, 0);
GetEmptyFilesystem(); // disabling /proc/cpuinfo
const auto info = GetMipsInfo();
EXPECT_TRUE(info.features.msa);
EXPECT_FALSE(info.features.eva);
}
TEST(CpuinfoMipsTest, Ci40) {
DisableHardwareCapabilities();
auto& fs = GetEmptyFilesystem();
fs.CreateFile("/proc/cpuinfo", R"(system type : IMG Pistachio SoC (B0)
machine : IMG Marduk Ci40 with cc2520
processor : 0
cpu model : MIPS interAptiv (multi) V2.0 FPU V0.0
BogoMIPS : 363.72
wait instruction : yes
microsecond timers : yes
tlb_entries : 64
extra interrupt vector : yes
hardware watchpoint : yes, count: 4, address/irw mask: [0x0ffc, 0x0ffc, 0x0ffb, 0x0ffb]
isa : mips1 mips2 mips32r1 mips32r2
ASEs implemented : mips16 dsp mt eva
shadow register sets : 1
kscratch registers : 0
package : 0
core : 0
VCED exceptions : not available
VCEI exceptions : not available
VPE : 0
)");
const auto info = GetMipsInfo();
EXPECT_FALSE(info.features.msa);
EXPECT_TRUE(info.features.eva);
}
TEST(CpuinfoMipsTest, AR7161) {
DisableHardwareCapabilities();
auto& fs = GetEmptyFilesystem();
fs.CreateFile("/proc/cpuinfo",
R"(system type : Atheros AR7161 rev 2
machine : NETGEAR WNDR3700/WNDR3800/WNDRMAC
processor : 0
cpu model : MIPS 24Kc V7.4
BogoMIPS : 452.19
wait instruction : yes
microsecond timers : yes
tlb_entries : 16
extra interrupt vector : yes
hardware watchpoint : yes, count: 4, address/irw mask: [0x0000, 0x0f98, 0x0f78, 0x0df8]
ASEs implemented : mips16
shadow register sets : 1
kscratch registers : 0
core : 0
VCED exceptions : not available
VCEI exceptions : not available
)");
const auto info = GetMipsInfo();
EXPECT_FALSE(info.features.msa);
EXPECT_FALSE(info.features.eva);
}
TEST(CpuinfoMipsTest, Goldfish) {
DisableHardwareCapabilities();
auto& fs = GetEmptyFilesystem();
fs.CreateFile("/proc/cpuinfo", R"(system type : MIPS-Goldfish
Hardware : goldfish
Revison : 1
processor : 0
cpu model : MIPS 24Kc V0.0 FPU V0.0
BogoMIPS : 1042.02
wait instruction : yes
microsecond timers : yes
tlb_entries : 16
extra interrupt vector : yes
hardware watchpoint : yes, count: 1, address/irw mask: [0x0ff8]
ASEs implemented :
shadow register sets : 1
core : 0
VCED exceptions : not available
VCEI exceptions : not available
)");
const auto info = GetMipsInfo();
EXPECT_FALSE(info.features.msa);
EXPECT_FALSE(info.features.eva);
}
} // namespace
} // namespace cpu_features

View file

@ -0,0 +1,119 @@
// Copyright 2018 IBM.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "cpuinfo_ppc.h"
#include "filesystem_for_testing.h"
#include "hwcaps_for_testing.h"
#include "internal/string_view.h"
#include "gtest/gtest.h"
namespace cpu_features {
namespace {
void DisableHardwareCapabilities() { SetHardwareCapabilities(0, 0); }
TEST(CpustringsPPCTest, FromHardwareCap) {
SetHardwareCapabilities(PPC_FEATURE_HAS_FPU | PPC_FEATURE_HAS_VSX,
PPC_FEATURE2_ARCH_3_00);
GetEmptyFilesystem(); // disabling /proc/cpuinfo
const auto info = GetPPCInfo();
EXPECT_TRUE(info.features.fpu);
EXPECT_FALSE(info.features.mmu);
EXPECT_TRUE(info.features.vsx);
EXPECT_TRUE(info.features.arch300);
EXPECT_FALSE(info.features.power4);
EXPECT_FALSE(info.features.altivec);
EXPECT_FALSE(info.features.vcrypto);
EXPECT_FALSE(info.features.htm);
}
TEST(CpustringsPPCTest, Blade) {
DisableHardwareCapabilities();
auto& fs = GetEmptyFilesystem();
fs.CreateFile("/proc/cpuinfo",
R"(processor : 14
cpu : POWER7 (architected), altivec supported
clock : 3000.000000MHz
revision : 2.1 (pvr 003f 0201)
processor : 15
cpu : POWER7 (architected), altivec supported
clock : 3000.000000MHz
revision : 2.1 (pvr 003f 0201)
timebase : 512000000
platform : pSeries
model : IBM,8406-70Y
machine : CHRP IBM,8406-70Y)");
SetPlatformTypes("power7", "power8");
const auto strings = GetPPCPlatformStrings();
ASSERT_STREQ(strings.platform, "pSeries");
ASSERT_STREQ(strings.model, "IBM,8406-70Y");
ASSERT_STREQ(strings.machine, "CHRP IBM,8406-70Y");
ASSERT_STREQ(strings.cpu, "POWER7 (architected), altivec supported");
ASSERT_STREQ(strings.type.platform, "power7");
ASSERT_STREQ(strings.type.base_platform, "power8");
}
TEST(CpustringsPPCTest, Firestone) {
DisableHardwareCapabilities();
auto& fs = GetEmptyFilesystem();
fs.CreateFile("/proc/cpuinfo",
R"(processor : 126
cpu : POWER8 (raw), altivec supported
clock : 2061.000000MHz
revision : 2.0 (pvr 004d 0200)
processor : 127
cpu : POWER8 (raw), altivec supported
clock : 2061.000000MHz
revision : 2.0 (pvr 004d 0200)
timebase : 512000000
platform : PowerNV
model : 8335-GTA
machine : PowerNV 8335-GTA
firmware : OPAL v3)");
const auto strings = GetPPCPlatformStrings();
ASSERT_STREQ(strings.platform, "PowerNV");
ASSERT_STREQ(strings.model, "8335-GTA");
ASSERT_STREQ(strings.machine, "PowerNV 8335-GTA");
ASSERT_STREQ(strings.cpu, "POWER8 (raw), altivec supported");
}
TEST(CpustringsPPCTest, w8) {
DisableHardwareCapabilities();
auto& fs = GetEmptyFilesystem();
fs.CreateFile("/proc/cpuinfo",
R"(processor : 143
cpu : POWER9, altivec supported
clock : 2300.000000MHz
revision : 2.2 (pvr 004e 1202)
timebase : 512000000
platform : PowerNV
model : 0000000000000000
machine : PowerNV 0000000000000000
firmware : OPAL
MMU : Radix)");
const auto strings = GetPPCPlatformStrings();
ASSERT_STREQ(strings.platform, "PowerNV");
ASSERT_STREQ(strings.model, "0000000000000000");
ASSERT_STREQ(strings.machine, "PowerNV 0000000000000000");
ASSERT_STREQ(strings.cpu, "POWER9, altivec supported");
}
} // namespace
} // namespace cpu_features

View file

@ -0,0 +1,172 @@
// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <cassert>
#include <cstdio>
#include <map>
#include "gtest/gtest.h"
#include "cpuinfo_x86.h"
#include "internal/cpuid_x86.h"
namespace cpu_features {
class FakeCpu {
public:
Leaf CpuId(uint32_t leaf_id) const {
const auto itr = cpuid_leaves_.find(leaf_id);
EXPECT_TRUE(itr != cpuid_leaves_.end()) << "Missing leaf " << leaf_id;
return itr->second;
}
uint32_t GetXCR0Eax() const { return xcr0_eax_; }
void SetLeaves(std::map<uint32_t, Leaf> configuration) {
cpuid_leaves_ = std::move(configuration);
}
void SetOsBackupsExtendedRegisters(bool os_backups_extended_registers) {
xcr0_eax_ = os_backups_extended_registers ? -1 : 0;
}
private:
std::map<uint32_t, Leaf> cpuid_leaves_;
uint32_t xcr0_eax_;
};
auto* g_fake_cpu = new FakeCpu();
extern "C" Leaf CpuId(uint32_t leaf_id) { return g_fake_cpu->CpuId(leaf_id); }
extern "C" uint32_t GetXCR0Eax(void) { return g_fake_cpu->GetXCR0Eax(); }
namespace {
TEST(CpuidX86Test, SandyBridge) {
g_fake_cpu->SetOsBackupsExtendedRegisters(true);
g_fake_cpu->SetLeaves({
{0x00000000, Leaf{0x0000000D, 0x756E6547, 0x6C65746E, 0x49656E69}},
{0x00000001, Leaf{0x000206A6, 0x00100800, 0x1F9AE3BF, 0xBFEBFBFF}},
{0x00000007, Leaf{0x00000000, 0x00000000, 0x00000000, 0x00000000}},
});
const auto info = GetX86Info();
EXPECT_STREQ(info.vendor, "GenuineIntel");
EXPECT_EQ(info.family, 0x06);
EXPECT_EQ(info.model, 0x02A);
EXPECT_EQ(info.stepping, 0x06);
// Leaf 7 is zeroed out so none of the Leaf 7 flags are set.
const auto features = info.features;
EXPECT_FALSE(features.erms);
EXPECT_FALSE(features.avx2);
EXPECT_FALSE(features.avx512f);
EXPECT_FALSE(features.avx512cd);
EXPECT_FALSE(features.avx512er);
EXPECT_FALSE(features.avx512pf);
EXPECT_FALSE(features.avx512bw);
EXPECT_FALSE(features.avx512dq);
EXPECT_FALSE(features.avx512vl);
EXPECT_FALSE(features.avx512ifma);
EXPECT_FALSE(features.avx512vbmi);
EXPECT_FALSE(features.avx512vbmi2);
EXPECT_FALSE(features.avx512vnni);
EXPECT_FALSE(features.avx512bitalg);
EXPECT_FALSE(features.avx512vpopcntdq);
EXPECT_FALSE(features.avx512_4vnniw);
EXPECT_FALSE(features.avx512_4vbmi2);
// All old cpu features should be set.
EXPECT_TRUE(features.aes);
EXPECT_TRUE(features.ssse3);
EXPECT_TRUE(features.sse4_1);
EXPECT_TRUE(features.sse4_2);
EXPECT_TRUE(features.avx);
}
TEST(CpuidX86Test, SandyBridgeTestOsSupport) {
g_fake_cpu->SetLeaves({
{0x00000000, Leaf{0x0000000D, 0x756E6547, 0x6C65746E, 0x49656E69}},
{0x00000001, Leaf{0x000206A6, 0x00100800, 0x1F9AE3BF, 0xBFEBFBFF}},
{0x00000007, Leaf{0x00000000, 0x00000000, 0x00000000, 0x00000000}},
});
// avx is disabled if os does not support backing up ymm registers.
g_fake_cpu->SetOsBackupsExtendedRegisters(false);
EXPECT_FALSE(GetX86Info().features.avx);
// avx is disabled if os does not support backing up ymm registers.
g_fake_cpu->SetOsBackupsExtendedRegisters(true);
EXPECT_TRUE(GetX86Info().features.avx);
}
TEST(CpuidX86Test, SkyLake) {
g_fake_cpu->SetOsBackupsExtendedRegisters(true);
g_fake_cpu->SetLeaves({
{0x00000000, Leaf{0x00000016, 0x756E6547, 0x6C65746E, 0x49656E69}},
{0x00000001, Leaf{0x000406E3, 0x00100800, 0x7FFAFBBF, 0xBFEBFBFF}},
{0x00000007, Leaf{0x00000000, 0x029C67AF, 0x00000000, 0x00000000}},
});
const auto info = GetX86Info();
EXPECT_STREQ(info.vendor, "GenuineIntel");
EXPECT_EQ(info.family, 0x06);
EXPECT_EQ(info.model, 0x04E);
EXPECT_EQ(info.stepping, 0x03);
EXPECT_EQ(GetX86Microarchitecture(&info), X86Microarchitecture::INTEL_SKL);
}
TEST(CpuidX86Test, Branding) {
g_fake_cpu->SetLeaves({
{0x00000000, Leaf{0x00000016, 0x756E6547, 0x6C65746E, 0x49656E69}},
{0x00000001, Leaf{0x000406E3, 0x00100800, 0x7FFAFBBF, 0xBFEBFBFF}},
{0x00000007, Leaf{0x00000000, 0x029C67AF, 0x00000000, 0x00000000}},
{0x80000000, Leaf{0x80000008, 0x00000000, 0x00000000, 0x00000000}},
{0x80000001, Leaf{0x00000000, 0x00000000, 0x00000121, 0x2C100000}},
{0x80000002, Leaf{0x65746E49, 0x2952286C, 0x726F4320, 0x4D542865}},
{0x80000003, Leaf{0x37692029, 0x3035362D, 0x43205530, 0x40205550}},
{0x80000004, Leaf{0x352E3220, 0x7A484730, 0x00000000, 0x00000000}},
});
char brand_string[49];
FillX86BrandString(brand_string);
EXPECT_STREQ(brand_string, "Intel(R) Core(TM) i7-6500U CPU @ 2.50GHz");
}
// http://users.atw.hu/instlatx64/AuthenticAMD0630F81_K15_Godavari_CPUID.txt
TEST(CpuidX86Test, AMD_K15) {
g_fake_cpu->SetLeaves({
{0x00000000, Leaf{0x0000000D, 0x68747541, 0x444D4163, 0x69746E65}},
{0x00000001, Leaf{0x00630F81, 0x00040800, 0x3E98320B, 0x178BFBFF}},
{0x00000007, Leaf{0x00000000, 0x00000000, 0x00000000, 0x00000000}},
{0x80000000, Leaf{0x8000001E, 0x68747541, 0x444D4163, 0x69746E65}},
{0x80000001, Leaf{0x00630F81, 0x10000000, 0x0FEBBFFF, 0x2FD3FBFF}},
{0x80000002, Leaf{0x20444D41, 0x372D3841, 0x4B303736, 0x64615220}},
{0x80000003, Leaf{0x206E6F65, 0x202C3752, 0x43203031, 0x75706D6F}},
{0x80000004, Leaf{0x43206574, 0x7365726F, 0x2B433420, 0x00204736}},
{0x80000005, Leaf{0xFF40FF18, 0xFF40FF30, 0x10040140, 0x60030140}},
});
const auto info = GetX86Info();
EXPECT_STREQ(info.vendor, "AuthenticAMD");
EXPECT_EQ(info.family, 0x15);
EXPECT_EQ(info.model, 0x38);
EXPECT_EQ(info.stepping, 0x01);
EXPECT_EQ(GetX86Microarchitecture(&info),
X86Microarchitecture::AMD_BULLDOZER);
char brand_string[49];
FillX86BrandString(brand_string);
EXPECT_STREQ(brand_string, "AMD A8-7670K Radeon R7, 10 Compute Cores 4C+6G ");
}
// TODO(user): test what happens when xsave/osxsave are not present.
// TODO(user): test what happens when xmm/ymm/zmm os support are not
// present.
} // namespace
} // namespace cpu_features

View file

@ -0,0 +1,103 @@
// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "filesystem_for_testing.h"
#include <cassert>
#include <climits>
#include <cstdio>
#include <cstring>
#include <utility>
namespace cpu_features {
FakeFile::FakeFile(int file_descriptor, const char* content)
: file_descriptor_(file_descriptor), content_(content) {}
FakeFile::~FakeFile() { assert(!opened_); }
void FakeFile::Open() {
assert(!opened_);
opened_ = true;
}
void FakeFile::Close() {
assert(opened_);
opened_ = false;
}
int FakeFile::Read(int fd, void* buf, size_t count) {
assert(count < INT_MAX);
assert(fd == file_descriptor_);
const size_t remainder = content_.size() - head_index_;
const size_t read = count > remainder ? remainder : count;
memcpy(buf, content_.data() + head_index_, read);
head_index_ += read;
assert(read < INT_MAX);
return read;
}
void FakeFilesystem::Reset() { files_.clear(); }
FakeFile* FakeFilesystem::CreateFile(const std::string& filename,
const char* content) {
auto& file = files_[filename];
file =
std::unique_ptr<FakeFile>(new FakeFile(next_file_descriptor_++, content));
return file.get();
}
FakeFile* FakeFilesystem::FindFileOrNull(const std::string& filename) const {
const auto itr = files_.find(filename);
return itr == files_.end() ? nullptr : itr->second.get();
}
FakeFile* FakeFilesystem::FindFileOrDie(const int file_descriptor) const {
for (const auto& filename_file_pair : files_) {
FakeFile* const file_ptr = filename_file_pair.second.get();
if (file_ptr->GetFileDescriptor() == file_descriptor) {
return file_ptr;
}
}
assert(false);
return nullptr;
}
static FakeFilesystem* kFilesystem = new FakeFilesystem();
FakeFilesystem& GetEmptyFilesystem() {
kFilesystem->Reset();
return *kFilesystem;
}
extern "C" int CpuFeatures_OpenFile(const char* filename) {
auto* const file = kFilesystem->FindFileOrNull(filename);
if (file) {
file->Open();
return file->GetFileDescriptor();
}
return -1;
}
extern "C" void CpuFeatures_CloseFile(int file_descriptor) {
kFilesystem->FindFileOrDie(file_descriptor)->Close();
}
extern "C" int CpuFeatures_ReadFile(int file_descriptor, void* buffer,
size_t buffer_size) {
return kFilesystem->FindFileOrDie(file_descriptor)
->Read(file_descriptor, buffer, buffer_size);
}
} // namespace cpu_features

View file

@ -0,0 +1,61 @@
// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Implements a fake filesystem, useful for tests.
#ifndef CPU_FEATURES_TEST_FILESYSTEM_FOR_TESTING_H_
#define CPU_FEATURES_TEST_FILESYSTEM_FOR_TESTING_H_
#include <memory>
#include <string>
#include <unordered_map>
#include "internal/filesystem.h"
namespace cpu_features {
class FakeFile {
public:
explicit FakeFile(int file_descriptor, const char* content);
~FakeFile();
void Open();
void Close();
int Read(int fd, void* buf, size_t count);
int GetFileDescriptor() const { return file_descriptor_; }
private:
const int file_descriptor_;
const std::string content_;
bool opened_ = false;
size_t head_index_ = 0;
};
class FakeFilesystem {
public:
void Reset();
FakeFile* CreateFile(const std::string& filename, const char* content);
FakeFile* FindFileOrDie(const int file_descriptor) const;
FakeFile* FindFileOrNull(const std::string& filename) const;
private:
size_t next_file_descriptor_ = 0;
std::unordered_map<std::string, std::unique_ptr<FakeFile>> files_;
};
FakeFilesystem& GetEmptyFilesystem();
} // namespace cpu_features
#endif // CPU_FEATURES_TEST_FILESYSTEM_FOR_TESTING_H_

View file

@ -0,0 +1,45 @@
// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <string.h>
#include "hwcaps_for_testing.h"
#include "internal/string_view.h"
namespace cpu_features {
namespace {
static auto* const g_hardware_capabilities = new HardwareCapabilities();
static auto* const g_platform_types = new PlatformType();
} // namespace
void SetHardwareCapabilities(uint32_t hwcaps, uint32_t hwcaps2) {
g_hardware_capabilities->hwcaps = hwcaps;
g_hardware_capabilities->hwcaps2 = hwcaps2;
}
HardwareCapabilities CpuFeatures_GetHardwareCapabilities(void) {
return *g_hardware_capabilities;
}
void SetPlatformTypes(const char* platform, const char* base_platform) {
CpuFeatures_StringView_CopyString(str(platform), g_platform_types->platform,
sizeof(g_platform_types->platform));
CpuFeatures_StringView_CopyString(str(base_platform),
g_platform_types->base_platform,
sizeof(g_platform_types->base_platform));
}
PlatformType CpuFeatures_GetPlatformType(void) { return *g_platform_types; }
} // namespace cpu_features

View file

@ -0,0 +1,27 @@
// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef CPU_FEATURES_TEST_HWCAPS_FOR_TESTING_H_
#define CPU_FEATURES_TEST_HWCAPS_FOR_TESTING_H_
#include "internal/hwcaps.h"
namespace cpu_features {
void SetHardwareCapabilities(uint32_t hwcaps, uint32_t hwcaps2);
void SetPlatformTypes(const char *platform, const char *base_platform);
} // namespace cpu_features
#endif // CPU_FEATURES_TEST_HWCAPS_FOR_TESTING_H_

View file

@ -0,0 +1,95 @@
// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <array>
#include "internal/linux_features_aggregator.h"
#include "gtest/gtest.h"
namespace cpu_features {
namespace {
struct Features {
bool a = false;
bool b = false;
bool c = false;
};
DECLARE_SETTER(Features, a)
DECLARE_SETTER(Features, b)
DECLARE_SETTER(Features, c)
class LinuxFeatureAggregatorTest : public testing::Test {
public:
const std::array<CapabilityConfig, 3> kConfigs = {
{{{0b0001, 0b0000}, "a", &set_a},
{{0b0010, 0b0000}, "b", &set_b},
{{0b0000, 0b1100}, "c", &set_c}}};
};
TEST_F(LinuxFeatureAggregatorTest, FromFlagsEmpty) {
Features features;
CpuFeatures_SetFromFlags(kConfigs.size(), kConfigs.data(), str(""),
&features);
EXPECT_FALSE(features.a);
EXPECT_FALSE(features.b);
EXPECT_FALSE(features.c);
}
TEST_F(LinuxFeatureAggregatorTest, FromFlagsAllSet) {
Features features;
CpuFeatures_SetFromFlags(kConfigs.size(), kConfigs.data(), str("a c b"),
&features);
EXPECT_TRUE(features.a);
EXPECT_TRUE(features.b);
EXPECT_TRUE(features.c);
}
TEST_F(LinuxFeatureAggregatorTest, FromFlagsOnlyA) {
Features features;
CpuFeatures_SetFromFlags(kConfigs.size(), kConfigs.data(), str("a"),
&features);
EXPECT_TRUE(features.a);
EXPECT_FALSE(features.b);
EXPECT_FALSE(features.c);
}
TEST_F(LinuxFeatureAggregatorTest, FromHwcapsNone) {
HardwareCapabilities capability;
capability.hwcaps = 0; // matches none
capability.hwcaps2 = 0; // matches none
Features features;
CpuFeatures_OverrideFromHwCaps(kConfigs.size(), kConfigs.data(), capability,
&features);
EXPECT_FALSE(features.a);
EXPECT_FALSE(features.b);
EXPECT_FALSE(features.c);
}
TEST_F(LinuxFeatureAggregatorTest, FromHwcapsSet) {
HardwareCapabilities capability;
capability.hwcaps = 0b0010; // matches b but not a
capability.hwcaps2 = 0b1111; // matches c
Features features;
CpuFeatures_OverrideFromHwCaps(kConfigs.size(), kConfigs.data(), capability,
&features);
EXPECT_FALSE(features.a);
EXPECT_TRUE(features.b);
EXPECT_TRUE(features.c);
}
} // namespace
} // namespace cpu_features

View file

@ -0,0 +1,132 @@
// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "internal/stack_line_reader.h"
#include "filesystem_for_testing.h"
#include "gtest/gtest.h"
namespace cpu_features {
bool operator==(const StringView& a, const StringView& b) {
return CpuFeatures_StringView_IsEquals(a, b);
}
namespace {
std::string ToString(StringView view) { return {view.ptr, view.size}; }
TEST(StackLineReaderTest, Empty) {
auto& fs = GetEmptyFilesystem();
auto* file = fs.CreateFile("/proc/cpuinfo", "");
StackLineReader reader;
StackLineReader_Initialize(&reader, file->GetFileDescriptor());
{
const auto result = StackLineReader_NextLine(&reader);
EXPECT_TRUE(result.eof);
EXPECT_TRUE(result.full_line);
EXPECT_EQ(result.line, str(""));
}
}
TEST(StackLineReaderTest, ManySmallLines) {
auto& fs = GetEmptyFilesystem();
auto* file = fs.CreateFile("/proc/cpuinfo", "a\nb\nc");
StackLineReader reader;
StackLineReader_Initialize(&reader, file->GetFileDescriptor());
{
const auto result = StackLineReader_NextLine(&reader);
EXPECT_FALSE(result.eof);
EXPECT_TRUE(result.full_line);
EXPECT_EQ(result.line, str("a"));
}
{
const auto result = StackLineReader_NextLine(&reader);
EXPECT_FALSE(result.eof);
EXPECT_TRUE(result.full_line);
EXPECT_EQ(result.line, str("b"));
}
{
const auto result = StackLineReader_NextLine(&reader);
EXPECT_TRUE(result.eof);
EXPECT_TRUE(result.full_line);
EXPECT_EQ(result.line, str("c"));
}
}
TEST(StackLineReaderTest, TruncatedLine) {
auto& fs = GetEmptyFilesystem();
auto* file = fs.CreateFile("/proc/cpuinfo", R"(First
Second
More than 16 characters, this will be truncated.
last)");
StackLineReader reader;
StackLineReader_Initialize(&reader, file->GetFileDescriptor());
{
const auto result = StackLineReader_NextLine(&reader);
EXPECT_FALSE(result.eof);
EXPECT_TRUE(result.full_line);
EXPECT_EQ(result.line, str("First"));
}
{
const auto result = StackLineReader_NextLine(&reader);
EXPECT_FALSE(result.eof);
EXPECT_TRUE(result.full_line);
EXPECT_EQ(result.line, str("Second"));
}
{
const auto result = StackLineReader_NextLine(&reader);
EXPECT_FALSE(result.eof);
EXPECT_FALSE(result.full_line);
EXPECT_EQ(result.line, str("More than 16 cha"));
}
{
const auto result = StackLineReader_NextLine(&reader);
EXPECT_TRUE(result.eof);
EXPECT_TRUE(result.full_line);
EXPECT_EQ(result.line, str("last"));
}
}
TEST(StackLineReaderTest, TruncatedLines) {
auto& fs = GetEmptyFilesystem();
auto* file = fs.CreateFile("/proc/cpuinfo", R"(More than 16 characters
Another line that is too long)");
StackLineReader reader;
StackLineReader_Initialize(&reader, file->GetFileDescriptor());
{
const auto result = StackLineReader_NextLine(&reader);
EXPECT_FALSE(result.eof);
EXPECT_FALSE(result.full_line);
EXPECT_EQ(result.line, str("More than 16 cha"));
}
{
const auto result = StackLineReader_NextLine(&reader);
EXPECT_FALSE(result.eof);
EXPECT_FALSE(result.full_line);
EXPECT_EQ(result.line, str("Another line tha"));
}
{
const auto result = StackLineReader_NextLine(&reader);
EXPECT_TRUE(result.eof);
EXPECT_TRUE(result.full_line);
EXPECT_EQ(result.line, str(""));
}
}
} // namespace
} // namespace cpu_features

View file

@ -0,0 +1,144 @@
// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "internal/string_view.h"
#include "gtest/gtest.h"
namespace cpu_features {
bool operator==(const StringView& a, const StringView& b) {
return CpuFeatures_StringView_IsEquals(a, b);
}
namespace {
TEST(StringViewTest, Empty) {
EXPECT_EQ(kEmptyStringView.ptr, nullptr);
EXPECT_EQ(kEmptyStringView.size, 0);
}
TEST(StringViewTest, Build) {
const auto view = str("test");
EXPECT_EQ(view.ptr[0], 't');
EXPECT_EQ(view.size, 4);
}
TEST(StringViewTest, CpuFeatures_StringView_IndexOfChar) {
// Found.
EXPECT_EQ(CpuFeatures_StringView_IndexOfChar(str("test"), 'e'), 1);
// Not found.
EXPECT_EQ(CpuFeatures_StringView_IndexOfChar(str("test"), 'z'), -1);
// Empty.
EXPECT_EQ(CpuFeatures_StringView_IndexOfChar(kEmptyStringView, 'z'), -1);
}
TEST(StringViewTest, CpuFeatures_StringView_IndexOf) {
// Found.
EXPECT_EQ(CpuFeatures_StringView_IndexOf(str("test"), str("es")), 1);
// Not found.
EXPECT_EQ(CpuFeatures_StringView_IndexOf(str("test"), str("aa")), -1);
// Empty.
EXPECT_EQ(CpuFeatures_StringView_IndexOf(kEmptyStringView, str("aa")), -1);
EXPECT_EQ(CpuFeatures_StringView_IndexOf(str("aa"), kEmptyStringView), -1);
}
TEST(StringViewTest, CpuFeatures_StringView_StartsWith) {
EXPECT_TRUE(CpuFeatures_StringView_StartsWith(str("test"), str("te")));
EXPECT_FALSE(CpuFeatures_StringView_StartsWith(str("test"), str("")));
EXPECT_FALSE(
CpuFeatures_StringView_StartsWith(str("test"), kEmptyStringView));
EXPECT_FALSE(
CpuFeatures_StringView_StartsWith(kEmptyStringView, str("test")));
}
TEST(StringViewTest, CpuFeatures_StringView_IsEquals) {
EXPECT_TRUE(
CpuFeatures_StringView_IsEquals(kEmptyStringView, kEmptyStringView));
EXPECT_TRUE(CpuFeatures_StringView_IsEquals(kEmptyStringView, str("")));
EXPECT_TRUE(CpuFeatures_StringView_IsEquals(str(""), kEmptyStringView));
EXPECT_TRUE(CpuFeatures_StringView_IsEquals(str("a"), str("a")));
EXPECT_FALSE(CpuFeatures_StringView_IsEquals(str("a"), str("b")));
EXPECT_FALSE(CpuFeatures_StringView_IsEquals(str("a"), kEmptyStringView));
EXPECT_FALSE(CpuFeatures_StringView_IsEquals(kEmptyStringView, str("a")));
}
TEST(StringViewTest, CpuFeatures_StringView_PopFront) {
EXPECT_EQ(CpuFeatures_StringView_PopFront(str("test"), 2), str("st"));
EXPECT_EQ(CpuFeatures_StringView_PopFront(str("test"), 0), str("test"));
EXPECT_EQ(CpuFeatures_StringView_PopFront(str("test"), 4), str(""));
EXPECT_EQ(CpuFeatures_StringView_PopFront(str("test"), 100), str(""));
}
TEST(StringViewTest, CpuFeatures_StringView_ParsePositiveNumber) {
EXPECT_EQ(CpuFeatures_StringView_ParsePositiveNumber(str("42")), 42);
EXPECT_EQ(CpuFeatures_StringView_ParsePositiveNumber(str("0x2a")), 42);
EXPECT_EQ(CpuFeatures_StringView_ParsePositiveNumber(str("0x2A")), 42);
EXPECT_EQ(CpuFeatures_StringView_ParsePositiveNumber(str("-0x2A")), -1);
EXPECT_EQ(CpuFeatures_StringView_ParsePositiveNumber(str("abc")), -1);
EXPECT_EQ(CpuFeatures_StringView_ParsePositiveNumber(str("")), -1);
}
TEST(StringViewTest, CpuFeatures_StringView_CopyString) {
char buf[4];
buf[0] = 'X';
// Empty
CpuFeatures_StringView_CopyString(str(""), buf, sizeof(buf));
EXPECT_STREQ(buf, "");
// Less
CpuFeatures_StringView_CopyString(str("a"), buf, sizeof(buf));
EXPECT_STREQ(buf, "a");
// exact
CpuFeatures_StringView_CopyString(str("abc"), buf, sizeof(buf));
EXPECT_STREQ(buf, "abc");
// More
CpuFeatures_StringView_CopyString(str("abcd"), buf, sizeof(buf));
EXPECT_STREQ(buf, "abc");
}
TEST(StringViewTest, CpuFeatures_StringView_HasWord) {
// Find flags at beginning, middle and end.
EXPECT_TRUE(
CpuFeatures_StringView_HasWord(str("first middle last"), "first"));
EXPECT_TRUE(
CpuFeatures_StringView_HasWord(str("first middle last"), "middle"));
EXPECT_TRUE(CpuFeatures_StringView_HasWord(str("first middle last"), "last"));
// Do not match partial flags
EXPECT_FALSE(
CpuFeatures_StringView_HasWord(str("first middle last"), "irst"));
EXPECT_FALSE(CpuFeatures_StringView_HasWord(str("first middle last"), "mid"));
EXPECT_FALSE(CpuFeatures_StringView_HasWord(str("first middle last"), "las"));
}
TEST(StringViewTest, CpuFeatures_StringView_GetAttributeKeyValue) {
const StringView line = str(" key : first middle last ");
StringView key, value;
EXPECT_TRUE(CpuFeatures_StringView_GetAttributeKeyValue(line, &key, &value));
EXPECT_EQ(key, str("key"));
EXPECT_EQ(value, str("first middle last"));
}
TEST(StringViewTest, FailingGetAttributeKeyValue) {
const StringView line = str("key first middle last");
StringView key, value;
EXPECT_FALSE(CpuFeatures_StringView_GetAttributeKeyValue(line, &key, &value));
}
} // namespace
} // namespace cpu_features

View file

@ -0,0 +1,353 @@
#define BLOCK_BYTES 32
#define OUT_BYTES 16
#define BLAKE_SHARED_MEM 480
#define BLAKE_SHARED_MEM_UINT 120
#define G(m, r, i, a, b, c, d) \
do { \
a = a + b + m[blake2b_sigma[r][2 * i + 0]]; \
d = rotr64(d ^ a, 32); \
c = c + d; \
b = rotr64(b ^ c, 24); \
a = a + b + m[blake2b_sigma[r][2 * i + 1]]; \
d = rotr64(d ^ a, 16); \
c = c + d; \
b = rotr64(b ^ c, 63); \
} while ((void)0, 0)
#define G_S(m, a, b, c, d) \
do { \
a = a + b + m; \
d = rotr64(d ^ a, 32); \
c = c + d; \
b = rotr64(b ^ c, 24); \
a = a + b + m; \
d = rotr64(d ^ a, 16); \
c = c + d; \
b = rotr64(b ^ c, 63); \
} while ((void)0, 0)
#define ROUND(m, t, r) \
do { \
G(m, r, t, v0, v1, v2, v3); \
v1 = __shfl_sync(0xFFFFFFFF, v1, t + 1, 4); \
v2 = __shfl_sync(0xFFFFFFFF, v2, t + 2, 4); \
v3 = __shfl_sync(0xFFFFFFFF, v3, t + 3, 4); \
G(m, r, (t + 4), v0, v1, v2, v3); \
v1 = __shfl_sync(0xFFFFFFFF, v1, t + 3, 4); \
v2 = __shfl_sync(0xFFFFFFFF, v2, t + 2, 4); \
v3 = __shfl_sync(0xFFFFFFFF, v3, t + 1, 4); \
} while ((void)0, 0)
#define ROUND_S(m, t) \
do { \
G_S(m, v0, v1, v2, v3); \
v1 = __shfl_sync(0xFFFFFFFF, v1, t + 1, 4); \
v2 = __shfl_sync(0xFFFFFFFF, v2, t + 2, 4); \
v3 = __shfl_sync(0xFFFFFFFF, v3, t + 3, 4); \
G_S(m, v0, v1, v2, v3); \
v1 = __shfl_sync(0xFFFFFFFF, v1, t + 3, 4); \
v2 = __shfl_sync(0xFFFFFFFF, v2, t + 2, 4); \
v3 = __shfl_sync(0xFFFFFFFF, v3, t + 1, 4); \
} while ((void)0, 0)
__constant__ uint64_t blake2b_IV[8] = {
0x6A09E667F3BCC908, 0xBB67AE8584CAA73B,
0x3C6EF372FE94F82B, 0xA54FF53A5F1D36F1,
0x510E527FADE682D1, 0x9B05688C2B3E6C1F,
0x1F83D9ABFB41BD6B, 0x5BE0CD19137E2179
};
__constant__ uint32_t blake2b_sigma[12][16] = {
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
{11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4},
{7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8},
{9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13},
{2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9},
{12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11},
{13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10},
{6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5},
{10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0},
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
};
__device__ uint64_t rotr64(uint64_t x, uint32_t n)
{
return (x >> n) | (x << (64 - n));
}
__device__ __forceinline__ void blake2b_compress(uint64_t *h, uint64_t *m, uint64_t f0, int thr_id)
{
uint64_t v0, v1, v2, v3;
v0 = h[thr_id];
v1 = h[thr_id + 4];
v2 = blake2b_IV[thr_id];
v3 = blake2b_IV[thr_id + 4];
if(thr_id == 0) v3 ^= h[8];
if(thr_id == 1) v3 ^= h[9];
if(thr_id == 2) v3 ^= f0;
ROUND(m, thr_id, 0);
ROUND(m, thr_id, 1);
ROUND(m, thr_id, 2);
ROUND(m, thr_id, 3);
ROUND(m, thr_id, 4);
ROUND(m, thr_id, 5);
ROUND(m, thr_id, 6);
ROUND(m, thr_id, 7);
ROUND(m, thr_id, 8);
ROUND(m, thr_id, 9);
ROUND(m, thr_id, 10);
ROUND(m, thr_id, 11);
h[thr_id] ^= v0 ^ v2;
h[thr_id + 4] ^= v1 ^ v3;
}
__device__ __forceinline__ void blake2b_compress_static(uint64_t *h, uint64_t m, uint64_t f0, int thr_id)
{
uint64_t v0, v1, v2, v3;
v0 = h[thr_id];
v1 = h[thr_id + 4];
v2 = blake2b_IV[thr_id];
v3 = blake2b_IV[thr_id + 4];
if(thr_id == 0) v3 ^= h[8];
if(thr_id == 1) v3 ^= h[9];
if(thr_id == 2) v3 ^= f0;
ROUND_S(m, thr_id);
ROUND_S(m, thr_id);
ROUND_S(m, thr_id);
ROUND_S(m, thr_id);
ROUND_S(m, thr_id);
ROUND_S(m, thr_id);
ROUND_S(m, thr_id);
ROUND_S(m, thr_id);
ROUND_S(m, thr_id);
ROUND_S(m, thr_id);
ROUND_S(m, thr_id);
ROUND_S(m, thr_id);
h[thr_id] ^= v0 ^ v2;
h[thr_id + 4] ^= v1 ^ v3;
}
__device__ __forceinline__ int blake2b_init(uint64_t *h, int out_len, int thr_id)
{
h[thr_id * 2] = blake2b_IV[thr_id * 2];
h[thr_id * 2 + 1] = blake2b_IV[thr_id * 2 + 1];
if(thr_id == 0) {
h[8] = h[9] = 0;
h[0] = 0x6A09E667F3BCC908 ^ ((out_len * 4) | (1 << 16) | (1 << 24));
}
return 0;
}
__device__ __forceinline__ void blake2b_incrementCounter(uint64_t *h, int inc)
{
h[8] += (inc * 4);
h[9] += (h[8] < (inc * 4));
}
__device__ __forceinline__ int blake2b_update(uint32_t *in, int in_len, uint64_t *h, uint32_t *buf, int buf_len, int thr_id)
{
uint32_t *cursor_in = in;
uint32_t *cursor_out = buf + buf_len;
if (buf_len + in_len > BLOCK_BYTES) {
int left = BLOCK_BYTES - buf_len;
for(int i=0; i < (left >> 2); i++, cursor_in += 4, cursor_out += 4) {
cursor_out[thr_id] = cursor_in[thr_id];
}
if(thr_id == 0) {
for (int i = 0; i < (left % 4); i++) {
cursor_out[i] = cursor_in[i];
}
blake2b_incrementCounter(h, BLOCK_BYTES);
}
blake2b_compress(h, (uint64_t*)buf, 0, thr_id);
buf_len = 0;
in_len -= left;
in += left;
while (in_len > BLOCK_BYTES) {
if(thr_id == 0)
blake2b_incrementCounter(h, BLOCK_BYTES);
cursor_in = in;
cursor_out = buf;
for(int i=0; i < (BLOCK_BYTES / 4); i++, cursor_in += 4, cursor_out += 4) {
cursor_out[thr_id] = cursor_in[thr_id];
}
blake2b_compress(h, (uint64_t *)buf, 0, thr_id);
in_len -= BLOCK_BYTES;
in += BLOCK_BYTES;
}
}
cursor_in = in;
cursor_out = buf + buf_len;
for(int i=0; i < (in_len >> 2); i++, cursor_in += 4, cursor_out += 4) {
cursor_out[thr_id] = cursor_in[thr_id];
}
if(thr_id == 0) {
for (int i = 0; i < (in_len % 4); i++) {
cursor_out[i] = cursor_in[i];
}
}
return buf_len + in_len;
}
__device__ __forceinline__ int blake2b_update_static(uint32_t in, int in_len, uint64_t *h, uint32_t *buf, int buf_len, int thr_id)
{
uint64_t in64 = in;
in64 = in64 << 32;
in64 = in64 | in;
uint32_t *cursor_out = buf + buf_len;
if (buf_len + in_len > BLOCK_BYTES) {
int left = BLOCK_BYTES - buf_len;
for(int i=0; i < (left >> 2); i++, cursor_out += 4) {
cursor_out[thr_id] = in;
}
if(thr_id == 0) {
for (int i = 0; i < (left % 4); i++) {
cursor_out[i] = in;
}
blake2b_incrementCounter(h, BLOCK_BYTES);
}
blake2b_compress(h, (uint64_t*)buf, 0, thr_id);
buf_len = 0;
in_len -= left;
while (in_len > BLOCK_BYTES) {
if(thr_id == 0)
blake2b_incrementCounter(h, BLOCK_BYTES);
blake2b_compress_static(h, in64, 0, thr_id);
in_len -= BLOCK_BYTES;
}
}
cursor_out = buf + buf_len;
for(int i=0; i < (in_len >> 2); i++, cursor_out += 4) {
cursor_out[thr_id] = in;
}
if(thr_id == 0) {
for (int i = 0; i < (in_len % 4); i++) {
cursor_out[i] = in;
}
}
return buf_len + in_len;
}
__device__ __forceinline__ void blake2b_final(uint32_t *out, int out_len, uint64_t *h, uint32_t *buf, int buf_len, int thr_id)
{
int left = BLOCK_BYTES - buf_len;
uint32_t *cursor_out = buf + buf_len;
for(int i=0; i < (left >> 2); i++, cursor_out += 4) {
cursor_out[thr_id] = 0;
}
if(thr_id == 0) {
for (int i = 0; i < (left % 4); i++) {
cursor_out[i] = 0;
}
blake2b_incrementCounter(h, buf_len);
}
blake2b_compress(h, (uint64_t*)buf, 0xFFFFFFFFFFFFFFFF, thr_id);
uint32_t *cursor_in = (uint32_t *)h;
cursor_out = out;
for(int i=0; i < (out_len >> 2); i++, cursor_in += 4, cursor_out += 4) {
cursor_out[thr_id] = cursor_in[thr_id];
}
if(thr_id == 0) {
for (int i = 0; i < (out_len % 4); i++) {
cursor_out[i] = cursor_in[i];
}
}
}
__device__ void blake2b_digestLong(uint32_t *out, int out_len, uint32_t *in, int in_len, int thr_id, uint32_t *shared)
{
uint64_t *h = (uint64_t*)shared;
uint32_t *buf = (uint32_t*)&h[10];
uint32_t *out_buffer = &buf[32];
int buf_len;
if(thr_id == 0) buf[0] = (out_len * 4);
buf_len = 1;
if (out_len <= OUT_BYTES) {
blake2b_init(h, out_len, thr_id);
buf_len = blake2b_update(in, in_len, h, buf, buf_len, thr_id);
blake2b_final(out, out_len, h, buf, buf_len, thr_id);
} else {
uint32_t *cursor_in = out_buffer;
uint32_t *cursor_out = out;
blake2b_init(h, OUT_BYTES, thr_id);
buf_len = blake2b_update(in, in_len, h, buf, buf_len, thr_id);
blake2b_final(out_buffer, OUT_BYTES, h, buf, buf_len, thr_id);
for(int i=0; i < (OUT_BYTES / 8); i++, cursor_in += 4, cursor_out += 4) {
cursor_out[thr_id] = cursor_in[thr_id];
}
out += OUT_BYTES / 2;
int to_produce = out_len - OUT_BYTES / 2;
while (to_produce > OUT_BYTES) {
buf_len = blake2b_init(h, OUT_BYTES, thr_id);
buf_len = blake2b_update(out_buffer, OUT_BYTES, h, buf, buf_len, thr_id);
blake2b_final(out_buffer, OUT_BYTES, h, buf, buf_len, thr_id);
cursor_out = out;
cursor_in = out_buffer;
for(int i=0; i < (OUT_BYTES / 8); i++, cursor_in += 4, cursor_out += 4) {
cursor_out[thr_id] = cursor_in[thr_id];
}
out += OUT_BYTES / 2;
to_produce -= OUT_BYTES / 2;
}
buf_len = blake2b_init(h, to_produce, thr_id);
buf_len = blake2b_update(out_buffer, OUT_BYTES, h, buf, buf_len, thr_id);
blake2b_final(out, to_produce, h, buf, buf_len, thr_id);
}
}

View file

@ -0,0 +1,340 @@
//
// Created by Haifa Bogdan Adnan on 03/08/2018.
//
#include <crypto/Argon2_constants.h>
#include "../../../common/common.h"
#include "crypto/argon2_hasher/hash/Hasher.h"
#include "crypto/argon2_hasher/hash/argon2/Argon2.h"
#if defined(WITH_CUDA)
#include <cuda_runtime.h>
#include <driver_types.h>
#include "cuda_hasher.h"
#include "../../../common/DLLExport.h"
cuda_hasher::cuda_hasher() {
m_type = "GPU";
m_subType = "CUDA";
m_shortSubType = "NVD";
m_intensity = 0;
m_description = "";
m_computingThreads = 0;
}
cuda_hasher::~cuda_hasher() {
this->cleanup();
}
bool cuda_hasher::initialize(xmrig::Algo algorithm, xmrig::Variant variant) {
cudaError_t error = cudaSuccess;
string error_message;
m_profile = getArgon2Profile(algorithm, variant);
__devices = __query_cuda_devices(error, error_message);
if(error != cudaSuccess) {
m_description = "No compatible GPU detected: " + error_message;
return false;
}
if (__devices.empty()) {
m_description = "No compatible GPU detected.";
return false;
}
return true;
}
vector<cuda_device_info *> cuda_hasher::__query_cuda_devices(cudaError_t &error, string &error_message) {
vector<cuda_device_info *> devices;
int devCount = 0;
error = cudaGetDeviceCount(&devCount);
if(error != cudaSuccess) {
error_message = "Error querying CUDA device count.";
return devices;
}
if(devCount == 0)
return devices;
for (int i = 0; i < devCount; ++i)
{
cuda_device_info *dev = __get_device_info(i);
if(dev == NULL)
continue;
if(dev->error != cudaSuccess) {
error = dev->error;
error_message = dev->error_message;
continue;
}
devices.push_back(dev);
}
return devices;
}
cuda_device_info *cuda_hasher::__get_device_info(int device_index) {
cuda_device_info *device_info = new cuda_device_info();
device_info->error = cudaSuccess;
device_info->cuda_index = device_index;
device_info->error = cudaSetDevice(device_index);
if(device_info->error != cudaSuccess) {
device_info->error_message = "Error setting current device.";
return device_info;
}
cudaDeviceProp devProp;
device_info->error = cudaGetDeviceProperties(&devProp, device_index);
if(device_info->error != cudaSuccess) {
device_info->error_message = "Error setting current device.";
return device_info;
}
device_info->device_string = devProp.name;
size_t freemem, totalmem;
device_info->error = cudaMemGetInfo(&freemem, &totalmem);
if(device_info->error != cudaSuccess) {
device_info->error_message = "Error setting current device.";
return device_info;
}
device_info->free_mem_size = freemem;
device_info->max_allocable_mem_size = freemem / 4;
double mem_in_gb = totalmem / 1073741824.0;
stringstream ss;
ss << setprecision(2) << mem_in_gb;
device_info->device_string += (" (" + ss.str() + "GB)");
return device_info;
}
bool cuda_hasher::configure(xmrig::HasherConfig &config) {
int index = config.getGPUCardsCount();
double intensity = 0;
int total_threads = 0;
intensity = config.getAverageGPUIntensity();
if (intensity == 0) {
m_intensity = 0;
m_description = "Status: DISABLED - by user.";
return false;
}
bool cards_selected = false;
intensity = 0;
for(vector<cuda_device_info *>::iterator d = __devices.begin(); d != __devices.end(); d++, index++) {
stringstream ss;
ss << "["<< (index + 1) << "] " << (*d)->device_string;
string device_description = ss.str();
(*d)->device_index = index;
(*d)->profile_info.profile = m_profile;
if(config.gpuFilter().size() > 0) {
bool found = false;
for(xmrig::GPUFilter fit : config.gpuFilter()) {
if(device_description.find(fit.filter) != string::npos) {
found = true;
break;
}
}
if(!found) {
(*d)->profile_info.threads = 0;
ss << " - DISABLED" << endl;
m_description += ss.str();
continue;
}
else {
cards_selected = true;
}
}
else {
cards_selected = true;
}
ss << endl;
double device_intensity = config.getGPUIntensity((*d)->device_index);
m_description += ss.str();
if(!(__setup_device_info((*d), device_intensity))) {
m_description += (*d)->error_message;
m_description += "\n";
continue;
};
DeviceInfo device;
char bus_id[100];
if(cudaDeviceGetPCIBusId(bus_id, 100, (*d)->cuda_index) == cudaSuccess) {
device.bus_id = bus_id;
int domain_separator = device.bus_id.find(":");
if(domain_separator != string::npos) {
device.bus_id.erase(0, domain_separator + 1);
}
}
device.name = (*d)->device_string;
device.intensity = device_intensity;
storeDeviceInfo((*d)->device_index, device);
__enabledDevices.push_back(*d);
total_threads += (*d)->profile_info.threads;
intensity += device_intensity;
}
config.addGPUCardsCount(index - config.getGPUCardsCount());
if(!cards_selected) {
m_intensity = 0;
m_description += "Status: DISABLED - no card enabled because of filtering.";
return false;
}
if (total_threads == 0) {
m_intensity = 0;
m_description += "Status: DISABLED - not enough resources.";
return false;
}
if(!buildThreadData())
return false;
m_intensity = intensity / __enabledDevices.size();
m_computingThreads = __enabledDevices.size() * 2; // 2 computing threads for each device
m_description += "Status: ENABLED - with " + to_string(total_threads) + " threads.";
return true;
}
void cuda_hasher::cleanup() {
for(vector<cuda_device_info *>::iterator d = __devices.begin(); d != __devices.end(); d++) {
cuda_free(*d);
}
}
bool cuda_hasher::__setup_device_info(cuda_device_info *device, double intensity) {
device->profile_info.threads_per_chunk = (uint32_t)(device->max_allocable_mem_size / device->profile_info.profile->memSize);
size_t chunk_size = device->profile_info.threads_per_chunk * device->profile_info.profile->memSize;
if(chunk_size == 0) {
device->error = cudaErrorInitializationError;
device->error_message = "Not enough memory on GPU.";
return false;
}
uint64_t usable_memory = device->free_mem_size;
double chunks = (double)usable_memory / (double)chunk_size;
uint32_t max_threads = (uint32_t)(device->profile_info.threads_per_chunk * chunks);
if(max_threads == 0) {
device->error = cudaErrorInitializationError;
device->error_message = "Not enough memory on GPU.";
return false;
}
device->profile_info.threads = (uint32_t)(max_threads * intensity / 100.0);
device->profile_info.threads = (device->profile_info.threads / 2) * 2; // make it divisible by 2 to allow for parallel kernel execution
if(max_threads > 0 && device->profile_info.threads == 0 && intensity > 0)
device->profile_info.threads = 2;
chunks = (double)device->profile_info.threads / (double)device->profile_info.threads_per_chunk;
cuda_allocate(device, chunks, chunk_size);
if(device->error != cudaSuccess)
return false;
return true;
}
bool cuda_hasher::buildThreadData() {
__thread_data = new cuda_gpumgmt_thread_data[__enabledDevices.size() * 2];
for(int i=0; i < __enabledDevices.size(); i++) {
cuda_device_info *device = __enabledDevices[i];
for(int threadId = 0; threadId < 2; threadId ++) {
cuda_gpumgmt_thread_data &thread_data = __thread_data[i * 2 + threadId];
thread_data.device = device;
thread_data.thread_id = threadId;
cudaStream_t stream;
device->error = cudaStreamCreate(&stream);
if(device->error != cudaSuccess) {
LOG("Error running kernel: (" + to_string(device->error) + ") cannot create cuda stream.");
return false;
}
thread_data.device_data = stream;
#ifdef PARALLEL_CUDA
if(threadId == 0) {
thread_data.threads_idx = 0;
thread_data.threads = device->profile_info.threads / 2;
}
else {
thread_data.threads_idx = device->profile_info.threads / 2;
thread_data.threads = device->profile_info.threads - thread_data.threads_idx;
}
#else
thread_data.threads_idx = 0;
thread_data.threads = device->profile_info.threads;
#endif
thread_data.argon2 = new Argon2(cuda_kernel_prehasher, cuda_kernel_filler, cuda_kernel_posthasher,
nullptr, &thread_data);
thread_data.argon2->setThreads(thread_data.threads);
thread_data.hashData.outSize = xmrig::ARGON2_HASHLEN + 4;
}
}
return true;
}
int cuda_hasher::compute(int threadIdx, uint8_t *input, size_t size, uint8_t *output) {
cuda_gpumgmt_thread_data &threadData = __thread_data[threadIdx];
cudaSetDevice(threadData.device->cuda_index);
threadData.hashData.input = input;
threadData.hashData.inSize = size;
threadData.hashData.output = output;
int hashCount = threadData.argon2->generateHashes(*m_profile, threadData.hashData);
if(threadData.device->error != cudaSuccess) {
LOG("Error running kernel: (" + to_string(threadData.device->error) + ")" + threadData.device->error_message);
return 0;
}
uint32_t *nonce = ((uint32_t *)(((uint8_t*)threadData.hashData.input) + 39));
(*nonce) += threadData.threads;
return hashCount;
}
size_t cuda_hasher::parallelism(int workerIdx) {
cuda_gpumgmt_thread_data &threadData = __thread_data[workerIdx];
return threadData.threads;
}
size_t cuda_hasher::deviceCount() {
return __enabledDevices.size();
}
REGISTER_HASHER(cuda_hasher);
#endif //WITH_CUDA

View file

@ -0,0 +1,126 @@
//
// Created by Haifa Bogdan Adnan on 18/09/2018.
//
#ifndef ARGON2_CUDA_HASHER_H
#define ARGON2_CUDA_HASHER_H
#if defined(WITH_CUDA)
struct cuda_kernel_arguments {
void *memory_chunk_0;
void *memory_chunk_1;
void *memory_chunk_2;
void *memory_chunk_3;
void *memory_chunk_4;
void *memory_chunk_5;
uint32_t *refs;
uint32_t *idxs;
uint32_t *segments;
uint32_t *preseed_memory[2];
uint32_t *seed_memory[2];
uint32_t *out_memory[2];
uint32_t *hash_memory[2];
uint32_t *host_seed_memory[2];
};
struct argon2profile_info {
argon2profile_info() {
threads = 0;
threads_per_chunk = 0;
}
uint32_t threads;
uint32_t threads_per_chunk;
Argon2Profile *profile;
};
struct cuda_device_info {
cuda_device_info() {
device_index = 0;
device_string = "";
free_mem_size = 0;
max_allocable_mem_size = 0;
error = cudaSuccess;
error_message = "";
}
int device_index;
int cuda_index;
string device_string;
uint64_t free_mem_size;
uint64_t max_allocable_mem_size;
argon2profile_info profile_info;
cuda_kernel_arguments arguments;
mutex device_lock;
cudaError_t error;
string error_message;
};
struct cuda_gpumgmt_thread_data {
void lock() {
#ifndef PARALLEL_CUDA
device->device_lock.lock();
#endif
}
void unlock() {
#ifndef PARALLEL_CUDA
device->device_lock.unlock();
#endif
}
int thread_id;
cuda_device_info *device;
Argon2 *argon2;
HashData hashData;
void *device_data;
int threads;
int threads_idx;
};
class cuda_hasher : public Hasher {
public:
cuda_hasher();
~cuda_hasher();
virtual bool initialize(xmrig::Algo algorithm, xmrig::Variant variant);
virtual bool configure(xmrig::HasherConfig &config);
virtual void cleanup();
virtual int compute(int threadIdx, uint8_t *input, size_t size, uint8_t *output);
virtual size_t parallelism(int workerIdx);
virtual size_t deviceCount();
private:
cuda_device_info *__get_device_info(int device_index);
bool __setup_device_info(cuda_device_info *device, double intensity);
vector<cuda_device_info*> __query_cuda_devices(cudaError_t &error, string &error_message);
bool buildThreadData();
vector<cuda_device_info*> __devices;
vector<cuda_device_info*> __enabledDevices;
cuda_gpumgmt_thread_data *__thread_data;
Argon2Profile *m_profile;
};
// CUDA kernel exports
extern void cuda_allocate(cuda_device_info *device, double chunks, size_t chunk_size);
extern void cuda_free(cuda_device_info *device);
extern bool cuda_kernel_prehasher(void *memory, int threads, Argon2Profile *profile, void *user_data);
extern void *cuda_kernel_filler(int threads, Argon2Profile *profile, void *user_data);
extern bool cuda_kernel_posthasher(void *memory, int threads, Argon2Profile *profile, void *user_data);
// end CUDA kernel exports
#endif //WITH_CUDA
#endif //ARGON2_CUDA_HASHER_H

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,888 @@
//
// Created by Haifa Bogdan Adnan on 03/08/2018.
//
#include <crypto/Argon2_constants.h>
#include "../../../common/common.h"
#include "crypto/argon2_hasher/hash/Hasher.h"
#include "crypto/argon2_hasher/hash/argon2/Argon2.h"
#include "OpenCLHasher.h"
#include "OpenCLKernel.h"
#include "crypto/argon2_hasher/common/DLLExport.h"
#if defined(WITH_OPENCL)
#ifndef CL_DEVICE_BOARD_NAME_AMD
#define CL_DEVICE_BOARD_NAME_AMD 0x4038
#endif
#ifndef CL_DEVICE_TOPOLOGY_AMD
#define CL_DEVICE_TOPOLOGY_AMD 0x4037
#endif
#ifndef CL_DEVICE_PCI_BUS_ID_NV
#define CL_DEVICE_PCI_BUS_ID_NV 0x4008
#endif
#ifndef CL_DEVICE_PCI_SLOT_ID_NV
#define CL_DEVICE_PCI_SLOT_ID_NV 0x4009
#endif
typedef union
{
struct { cl_uint type; cl_uint data[5]; } raw;
struct { cl_uint type; cl_char unused[17]; cl_char bus; cl_char device; cl_char function; } pcie;
} device_topology_amd;
#define KERNEL_WORKGROUP_SIZE 32
opencl_hasher::opencl_hasher() {
m_type = "GPU";
m_subType = "OPENCL";
m_shortSubType = "OCL";
m_intensity = 0;
m_description = "";
m_computingThreads = 0;
}
opencl_hasher::~opencl_hasher() {
// this->cleanup();
}
bool opencl_hasher::initialize(xmrig::Algo algorithm, xmrig::Variant variant) {
cl_int error = CL_SUCCESS;
string error_message;
m_profile = getArgon2Profile(algorithm, variant);
__devices = __query_opencl_devices(error, error_message);
if(error != CL_SUCCESS) {
m_description = "No compatible GPU detected: " + error_message;
return false;
}
if (__devices.empty()) {
m_description = "No compatible GPU detected.";
return false;
}
return true;
}
vector<opencl_device_info*> opencl_hasher::__query_opencl_devices(cl_int &error, string &error_message) {
cl_int err;
cl_uint platform_count = 0;
cl_uint device_count = 0;
vector<opencl_device_info*> result;
clGetPlatformIDs(0, NULL, &platform_count);
if(platform_count == 0) {
return result;
}
cl_platform_id *platforms = (cl_platform_id*)malloc(platform_count * sizeof(cl_platform_id));
err=clGetPlatformIDs(platform_count, platforms, &platform_count);
if(err != CL_SUCCESS) {
free(platforms);
error = err;
error_message = "Error querying for opencl platforms.";
return result;
}
int counter = 0;
for(uint32_t i=0; i < platform_count; i++) {
device_count = 0;
clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU, 0, NULL, &device_count);
if(device_count == 0) {
continue;
}
cl_device_id * devices = (cl_device_id*)malloc(device_count * sizeof(cl_device_id));
err=clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU, device_count, devices, &device_count);
if(err != CL_SUCCESS) {
free(devices);
error = err;
error_message = "Error querying for opencl devices.";
continue;
}
for(uint32_t j=0; j < device_count; j++) {
opencl_device_info *info = __get_device_info(platforms[i], devices[j]);
if(info->error != CL_SUCCESS) {
error = info->error;
error_message = info->error_message;
}
else {
info->device_index = counter;
result.push_back(info);
counter++;
}
}
free(devices);
}
free(platforms);
return result;
}
opencl_device_info *opencl_hasher::__get_device_info(cl_platform_id platform, cl_device_id device) {
opencl_device_info *device_info = new opencl_device_info(CL_SUCCESS, "");
device_info->platform = platform;
device_info->device = device;
char *buffer;
size_t sz;
// device name
string device_vendor;
sz = 0;
clGetDeviceInfo(device, CL_DEVICE_VENDOR, 0, NULL, &sz);
buffer = (char *)malloc(sz + 1);
device_info->error = clGetDeviceInfo(device, CL_DEVICE_VENDOR, sz, buffer, &sz);
if(device_info->error != CL_SUCCESS) {
free(buffer);
device_info->error_message = "Error querying device vendor.";
return device_info;
}
else {
buffer[sz] = 0;
device_vendor = buffer;
free(buffer);
}
string device_name;
cl_device_info query_type = CL_DEVICE_NAME;
if(device_vendor.find("Advanced Micro Devices") != string::npos)
query_type = CL_DEVICE_BOARD_NAME_AMD;
sz = 0;
clGetDeviceInfo(device, query_type, 0, NULL, &sz);
buffer = (char *) malloc(sz + 1);
device_info->error = clGetDeviceInfo(device, query_type, sz, buffer, &sz);
if (device_info->error != CL_SUCCESS) {
free(buffer);
device_info->error_message = "Error querying device name.";
return device_info;
} else {
buffer[sz] = 0;
device_name = buffer;
free(buffer);
}
string device_version;
sz = 0;
clGetDeviceInfo(device, CL_DEVICE_VERSION, 0, NULL, &sz);
buffer = (char *)malloc(sz + 1);
device_info->error = clGetDeviceInfo(device, CL_DEVICE_VERSION, sz, buffer, &sz);
if(device_info->error != CL_SUCCESS) {
free(buffer);
device_info->error_message = "Error querying device version.";
return device_info;
}
else {
buffer[sz] = 0;
device_version = buffer;
free(buffer);
}
device_info->device_string = device_vendor + " - " + device_name/* + " : " + device_version*/;
device_info->error = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(device_info->max_mem_size), &(device_info->max_mem_size), NULL);
if(device_info->error != CL_SUCCESS) {
device_info->error_message = "Error querying device global memory size.";
return device_info;
}
device_info->error = clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(device_info->max_allocable_mem_size), &(device_info->max_allocable_mem_size), NULL);
if(device_info->error != CL_SUCCESS) {
device_info->error_message = "Error querying device max memory allocation.";
return device_info;
}
double mem_in_gb = device_info->max_mem_size / 1073741824.0;
stringstream ss;
ss << setprecision(2) << mem_in_gb;
device_info->device_string += (" (" + ss.str() + "GB)");
return device_info;
}
bool opencl_hasher::configure(xmrig::HasherConfig &config) {
int index = config.getGPUCardsCount();
double intensity = 0;
int total_threads = 0;
intensity = config.getAverageGPUIntensity();
if (intensity == 0) {
m_intensity = 0;
m_description = "Status: DISABLED - by user.";
return false;
}
bool cards_selected = false;
intensity = 0;
for(vector<opencl_device_info *>::iterator d = __devices.begin(); d != __devices.end(); d++, index++) {
stringstream ss;
ss << "["<< (index + 1) << "] " << (*d)->device_string;
string device_description = ss.str();
(*d)->device_index = index;
(*d)->profile_info.profile = m_profile;
if(config.gpuFilter().size() > 0) {
bool found = false;
for(xmrig::GPUFilter fit : config.gpuFilter()) {
if(device_description.find(fit.filter) != string::npos) {
found = true;
break;
}
}
if(!found) {
(*d)->profile_info.threads = 0;
ss << " - DISABLED" << endl;
m_description += ss.str();
continue;
}
else {
cards_selected = true;
}
}
else {
cards_selected = true;
}
ss << endl;
double device_intensity = config.getGPUIntensity((*d)->device_index);
m_description += ss.str();
if(!(__setup_device_info((*d), device_intensity))) {
m_description += (*d)->error_message;
m_description += "\n";
continue;
};
DeviceInfo device;
if((*d)->device_string.find("Advanced Micro Devices") != string::npos) {
device_topology_amd amdtopo;
if(clGetDeviceInfo((*d)->device, CL_DEVICE_TOPOLOGY_AMD, sizeof(amdtopo), &amdtopo, NULL) == CL_SUCCESS) {
char bus_id[50];
sprintf(bus_id, "%02x:%02x.%x", amdtopo.pcie.bus, amdtopo.pcie.device, amdtopo.pcie.function);
device.bus_id = bus_id;
}
}
else if((*d)->device_string.find("NVIDIA") != string::npos) {
cl_uint bus;
cl_uint slot;
if(clGetDeviceInfo ((*d)->device, CL_DEVICE_PCI_BUS_ID_NV, sizeof(bus), &bus, NULL) == CL_SUCCESS) {
if(clGetDeviceInfo ((*d)->device, CL_DEVICE_PCI_SLOT_ID_NV, sizeof(slot), &slot, NULL) == CL_SUCCESS) {
char bus_id[50];
sprintf(bus_id, "%02x:%02x.0", bus, slot);
device.bus_id = bus_id;
}
}
}
device.name = (*d)->device_string;
device.intensity = device_intensity;
storeDeviceInfo((*d)->device_index, device);
__enabledDevices.push_back(*d);
total_threads += (*d)->profile_info.threads;
intensity += device_intensity;
}
config.addGPUCardsCount(index - config.getGPUCardsCount());
if(!cards_selected) {
m_intensity = 0;
m_description += "Status: DISABLED - no card enabled because of filtering.";
return false;
}
if (total_threads == 0) {
m_intensity = 0;
m_description += "Status: DISABLED - not enough resources.";
return false;
}
buildThreadData();
m_intensity = intensity / __enabledDevices.size();
m_computingThreads = __enabledDevices.size() * 2; // 2 computing threads for each device
m_description += "Status: ENABLED - with " + to_string(total_threads) + " threads.";
return true;
}
bool opencl_hasher::__setup_device_info(opencl_device_info *device, double intensity) {
cl_int error;
cl_context_properties properties[] = {
CL_CONTEXT_PLATFORM, (cl_context_properties) device->platform,
0};
device->context = clCreateContext(properties, 1, &(device->device), NULL, NULL, &error);
if (error != CL_SUCCESS) {
device->error = error;
device->error_message = "Error getting device context.";
return false;
}
device->queue = clCreateCommandQueue(device->context, device->device, CL_QUEUE_PROFILING_ENABLE, &error);
if (error != CL_SUCCESS) {
device->error = error;
device->error_message = "Error getting device command queue.";
return false;
}
const char *srcptr[] = {OpenCLKernel.c_str()};
size_t srcsize = OpenCLKernel.size();
device->program = clCreateProgramWithSource(device->context, 1, srcptr, &srcsize, &error);
if (error != CL_SUCCESS) {
device->error = error;
device->error_message = "Error creating opencl program for device.";
return false;
}
error = clBuildProgram(device->program, 1, &device->device, "", NULL, NULL);
if (error != CL_SUCCESS) {
size_t log_size;
clGetProgramBuildInfo(device->program, device->device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
char *log = (char *) malloc(log_size + 1);
clGetProgramBuildInfo(device->program, device->device, CL_PROGRAM_BUILD_LOG, log_size, log, NULL);
log[log_size] = 0;
string build_log = log;
free(log);
device->error = error;
device->error_message = "Error building opencl program for device: " + build_log;
return false;
}
device->kernel_prehash = clCreateKernel(device->program, "prehash", &error);
if (error != CL_SUCCESS) {
device->error = error;
device->error_message = "Error creating opencl prehash kernel for device.";
return false;
}
device->kernel_fill_blocks = clCreateKernel(device->program, "fill_blocks", &error);
if (error != CL_SUCCESS) {
device->error = error;
device->error_message = "Error creating opencl main kernel for device.";
return false;
}
device->kernel_posthash = clCreateKernel(device->program, "posthash", &error);
if (error != CL_SUCCESS) {
device->error = error;
device->error_message = "Error creating opencl posthash kernel for device.";
return false;
}
device->profile_info.threads_per_chunk = (uint32_t) (device->max_allocable_mem_size / device->profile_info.profile->memSize);
size_t chunk_size = device->profile_info.threads_per_chunk * device->profile_info.profile->memSize;
if (chunk_size == 0) {
device->error = -1;
device->error_message = "Not enough memory on GPU.";
return false;
}
uint64_t usable_memory = device->max_mem_size;
double chunks = (double) usable_memory / (double) chunk_size;
uint32_t max_threads = (uint32_t) (device->profile_info.threads_per_chunk * chunks);
if (max_threads == 0) {
device->error = -1;
device->error_message = "Not enough memory on GPU.";
return false;
}
device->profile_info.threads = (uint32_t) (max_threads * intensity / 100.0);
device->profile_info.threads = (device->profile_info.threads / 4) * 4; // make it divisible by 4
if (max_threads > 0 && device->profile_info.threads == 0 && intensity > 0)
device->profile_info.threads = 4;
double counter = (double) device->profile_info.threads / (double) device->profile_info.threads_per_chunk;
size_t allocated_mem_for_current_chunk = 0;
if (counter > 0) {
if (counter > 1) {
allocated_mem_for_current_chunk = chunk_size;
} else {
allocated_mem_for_current_chunk = (size_t) ceil(chunk_size * counter);
}
counter -= 1;
} else {
allocated_mem_for_current_chunk = 1;
}
device->arguments.memory_chunk_0 = clCreateBuffer(device->context, CL_MEM_READ_WRITE,
allocated_mem_for_current_chunk, NULL, &error);
if (error != CL_SUCCESS) {
device->error = error;
device->error_message = "Error creating memory buffer.";
return false;
}
if (counter > 0) {
if (counter > 1) {
allocated_mem_for_current_chunk = chunk_size;
} else {
allocated_mem_for_current_chunk = (size_t) ceil(chunk_size * counter);
}
counter -= 1;
} else {
allocated_mem_for_current_chunk = 1;
}
device->arguments.memory_chunk_1 = clCreateBuffer(device->context, CL_MEM_READ_WRITE,
allocated_mem_for_current_chunk, NULL, &error);
if (error != CL_SUCCESS) {
device->error = error;
device->error_message = "Error creating memory buffer.";
return false;
}
if (counter > 0) {
if (counter > 1) {
allocated_mem_for_current_chunk = chunk_size;
} else {
allocated_mem_for_current_chunk = (size_t) ceil(chunk_size * counter);
}
counter -= 1;
} else {
allocated_mem_for_current_chunk = 1;
}
device->arguments.memory_chunk_2 = clCreateBuffer(device->context, CL_MEM_READ_WRITE,
allocated_mem_for_current_chunk, NULL, &error);
if (error != CL_SUCCESS) {
device->error = error;
device->error_message = "Error creating memory buffer.";
return false;
}
if (counter > 0) {
if (counter > 1) {
allocated_mem_for_current_chunk = chunk_size;
} else {
allocated_mem_for_current_chunk = (size_t) ceil(chunk_size * counter);
}
counter -= 1;
} else {
allocated_mem_for_current_chunk = 1;
}
device->arguments.memory_chunk_3 = clCreateBuffer(device->context, CL_MEM_READ_WRITE,
allocated_mem_for_current_chunk, NULL, &error);
if (error != CL_SUCCESS) {
device->error = error;
device->error_message = "Error creating memory buffer.";
return false;
}
if (counter > 0) {
if (counter > 1) {
allocated_mem_for_current_chunk = chunk_size;
} else {
allocated_mem_for_current_chunk = (size_t) ceil(chunk_size * counter);
}
counter -= 1;
} else {
allocated_mem_for_current_chunk = 1;
}
device->arguments.memory_chunk_4 = clCreateBuffer(device->context, CL_MEM_READ_WRITE,
allocated_mem_for_current_chunk, NULL, &error);
if (error != CL_SUCCESS) {
device->error = error;
device->error_message = "Error creating memory buffer.";
return false;
}
if (counter > 0) {
if (counter > 1) {
allocated_mem_for_current_chunk = chunk_size;
} else {
allocated_mem_for_current_chunk = (size_t) ceil(chunk_size * counter);
}
counter -= 1;
} else {
allocated_mem_for_current_chunk = 1;
}
device->arguments.memory_chunk_5 = clCreateBuffer(device->context, CL_MEM_READ_WRITE,
allocated_mem_for_current_chunk, NULL, &error);
if (error != CL_SUCCESS) {
device->error = error;
device->error_message = "Error creating memory buffer.";
return false;
}
device->arguments.refs = clCreateBuffer(device->context, CL_MEM_READ_ONLY,
device->profile_info.profile->blockRefsSize * sizeof(uint32_t), NULL,
&error);
if (error != CL_SUCCESS) {
device->error = error;
device->error_message = "Error creating memory buffer.";
return false;
}
if (device->profile_info.profile->succesiveIdxs == 1) {
device->arguments.idxs = NULL;
}
else {
device->arguments.idxs = clCreateBuffer(device->context, CL_MEM_READ_ONLY,
device->profile_info.profile->blockRefsSize * sizeof(uint32_t), NULL,
&error);
if (error != CL_SUCCESS) {
device->error = error;
device->error_message = "Error creating memory buffer.";
return false;
}
}
device->arguments.segments = clCreateBuffer(device->context, CL_MEM_READ_ONLY, device->profile_info.profile->segCount * 3 * sizeof(uint32_t), NULL, &error);
if(error != CL_SUCCESS) {
device->error = error;
device->error_message = "Error creating memory buffer.";
return false;
}
size_t preseed_memory_size = device->profile_info.profile->pwdLen * 4;
size_t seed_memory_size = device->profile_info.threads * (device->profile_info.profile->thrCost * 2) * ARGON2_BLOCK_SIZE;
size_t out_memory_size = device->profile_info.threads * ARGON2_BLOCK_SIZE;
size_t hash_memory_size = device->profile_info.threads * (xmrig::ARGON2_HASHLEN + 4);
device->arguments.preseed_memory[0] = clCreateBuffer(device->context, CL_MEM_READ_ONLY, preseed_memory_size, NULL, &error);
if(error != CL_SUCCESS) {
device->error = error;
device->error_message = "Error creating memory buffer.";
return false;
}
device->arguments.preseed_memory[1] = clCreateBuffer(device->context, CL_MEM_READ_ONLY, preseed_memory_size, NULL, &error);
if(error != CL_SUCCESS) {
device->error = error;
device->error_message = "Error creating memory buffer.";
return false;
}
device->arguments.seed_memory[0] = clCreateBuffer(device->context, CL_MEM_READ_WRITE, seed_memory_size, NULL, &error);
if(error != CL_SUCCESS) {
device->error = error;
device->error_message = "Error creating memory buffer.";
return false;
}
device->arguments.seed_memory[1] = clCreateBuffer(device->context, CL_MEM_READ_WRITE, seed_memory_size, NULL, &error);
if(error != CL_SUCCESS) {
device->error = error;
device->error_message = "Error creating memory buffer.";
return false;
}
device->arguments.out_memory[0] = clCreateBuffer(device->context, CL_MEM_READ_WRITE, out_memory_size, NULL, &error);
if(error != CL_SUCCESS) {
device->error = error;
device->error_message = "Error creating memory buffer.";
return false;
}
device->arguments.out_memory[1] = clCreateBuffer(device->context, CL_MEM_READ_WRITE, out_memory_size, NULL, &error);
if(error != CL_SUCCESS) {
device->error = error;
device->error_message = "Error creating memory buffer.";
return false;
}
device->arguments.hash_memory[0] = clCreateBuffer(device->context, CL_MEM_WRITE_ONLY, hash_memory_size, NULL, &error);
if(error != CL_SUCCESS) {
device->error = error;
device->error_message = "Error creating memory buffer.";
return false;
}
device->arguments.hash_memory[1] = clCreateBuffer(device->context, CL_MEM_WRITE_ONLY, hash_memory_size, NULL, &error);
if(error != CL_SUCCESS) {
device->error = error;
device->error_message = "Error creating memory buffer.";
return false;
}
//optimise address sizes
uint32_t *refs = (uint32_t *)malloc(device->profile_info.profile->blockRefsSize * sizeof(uint32_t));
for(int i=0;i<device->profile_info.profile->blockRefsSize;i++) {
refs[i] = device->profile_info.profile->blockRefs[i*3 + 1];
}
error=clEnqueueWriteBuffer(device->queue, device->arguments.refs, CL_TRUE, 0, device->profile_info.profile->blockRefsSize * sizeof(uint32_t), refs, 0, NULL, NULL);
if(error != CL_SUCCESS) {
device->error = error;
device->error_message = "Error writing to gpu memory.";
return false;
}
free(refs);
if(device->profile_info.profile->succesiveIdxs == 0) {
uint32_t *idxs = (uint32_t *) malloc(device->profile_info.profile->blockRefsSize * sizeof(uint32_t));
for (int i = 0; i < device->profile_info.profile->blockRefsSize; i++) {
idxs[i] = device->profile_info.profile->blockRefs[i * 3];
if (device->profile_info.profile->blockRefs[i * 3 + 2] == 1) {
idxs[i] |= 0x80000000;
}
}
error=clEnqueueWriteBuffer(device->queue, device->arguments.idxs, CL_TRUE, 0, device->profile_info.profile->blockRefsSize * sizeof(uint32_t), idxs, 0, NULL, NULL);
if(error != CL_SUCCESS) {
device->error = error;
device->error_message = "Error writing to gpu memory.";
return false;
}
free(idxs);
}
error=clEnqueueWriteBuffer(device->queue, device->arguments.segments, CL_TRUE, 0, device->profile_info.profile->segCount * 3 * sizeof(uint32_t), device->profile_info.profile->segments, 0, NULL, NULL);
if(error != CL_SUCCESS) {
device->error = error;
device->error_message = "Error writing to gpu memory.";
return false;
}
clSetKernelArg(device->kernel_fill_blocks, 0, sizeof(device->arguments.memory_chunk_0), &device->arguments.memory_chunk_0);
clSetKernelArg(device->kernel_fill_blocks, 1, sizeof(device->arguments.memory_chunk_1), &device->arguments.memory_chunk_1);
clSetKernelArg(device->kernel_fill_blocks, 2, sizeof(device->arguments.memory_chunk_2), &device->arguments.memory_chunk_2);
clSetKernelArg(device->kernel_fill_blocks, 3, sizeof(device->arguments.memory_chunk_3), &device->arguments.memory_chunk_3);
clSetKernelArg(device->kernel_fill_blocks, 4, sizeof(device->arguments.memory_chunk_4), &device->arguments.memory_chunk_4);
clSetKernelArg(device->kernel_fill_blocks, 5, sizeof(device->arguments.memory_chunk_5), &device->arguments.memory_chunk_5);
clSetKernelArg(device->kernel_fill_blocks, 8, sizeof(device->arguments.refs), &device->arguments.refs);
if(device->profile_info.profile->succesiveIdxs == 0)
clSetKernelArg(device->kernel_fill_blocks, 9, sizeof(device->arguments.idxs), &device->arguments.idxs);
else
clSetKernelArg(device->kernel_fill_blocks, 9, sizeof(cl_mem), NULL);
clSetKernelArg(device->kernel_fill_blocks, 10, sizeof(device->arguments.segments), &device->arguments.segments);
clSetKernelArg(device->kernel_fill_blocks, 11, sizeof(int32_t), &device->profile_info.profile->memSize);
clSetKernelArg(device->kernel_fill_blocks, 12, sizeof(int32_t), &device->profile_info.profile->thrCost);
clSetKernelArg(device->kernel_fill_blocks, 13, sizeof(int32_t), &device->profile_info.profile->segSize);
clSetKernelArg(device->kernel_fill_blocks, 14, sizeof(int32_t), &device->profile_info.profile->segCount);
clSetKernelArg(device->kernel_fill_blocks, 15, sizeof(int32_t), &device->profile_info.threads_per_chunk);
clSetKernelArg(device->kernel_prehash, 2, sizeof(int32_t), &device->profile_info.profile->memCost);
clSetKernelArg(device->kernel_prehash, 3, sizeof(int32_t), &device->profile_info.profile->thrCost);
int passes = device->profile_info.profile->segCount / (4 * device->profile_info.profile->thrCost);
clSetKernelArg(device->kernel_prehash, 4, sizeof(int32_t), &passes);
clSetKernelArg(device->kernel_prehash, 6, sizeof(int32_t), &device->profile_info.profile->saltLen);
return true;
}
bool opencl_kernel_prehasher(void *memory, int threads, Argon2Profile *profile, void *user_data) {
opencl_gpumgmt_thread_data *gpumgmt_thread = (opencl_gpumgmt_thread_data *)user_data;
opencl_device_info *device = gpumgmt_thread->device;
cl_int error;
int sessions = max(profile->thrCost * 2, (uint32_t)16);
double hashes_per_block = sessions / (profile->thrCost * 2.0);
size_t total_work_items = sessions * 4 * ceil(threads / hashes_per_block);
size_t local_work_items = sessions * 4;
device->device_lock.lock();
error = clEnqueueWriteBuffer(device->queue, device->arguments.preseed_memory[gpumgmt_thread->thread_id],
CL_FALSE, 0, gpumgmt_thread->hashData.inSize, memory, 0, NULL, NULL);
if (error != CL_SUCCESS) {
device->error = error;
device->error_message = "Error writing to gpu memory.";
device->device_lock.unlock();
return false;
}
int inSizeInInt = gpumgmt_thread->hashData.inSize / 4;
clSetKernelArg(device->kernel_prehash, 0, sizeof(device->arguments.preseed_memory[gpumgmt_thread->thread_id]), &device->arguments.preseed_memory[gpumgmt_thread->thread_id]);
clSetKernelArg(device->kernel_prehash, 1, sizeof(device->arguments.seed_memory[gpumgmt_thread->thread_id]), &device->arguments.seed_memory[gpumgmt_thread->thread_id]);
clSetKernelArg(device->kernel_prehash, 5, sizeof(int), &inSizeInInt);
clSetKernelArg(device->kernel_prehash, 7, sizeof(int), &threads);
clSetKernelArg(device->kernel_prehash, 8, sessions * sizeof(cl_ulong) * 76, NULL); // (preseed size is 16 ulongs = 128 bytes)
error=clEnqueueNDRangeKernel(device->queue, device->kernel_prehash, 1, NULL, &total_work_items, &local_work_items, 0, NULL, NULL);
if(error != CL_SUCCESS) {
device->error = error;
device->error_message = "Error running the kernel.";
device->device_lock.unlock();
return false;
}
return true;
}
void *opencl_kernel_filler(int threads, Argon2Profile *profile, void *user_data) {
opencl_gpumgmt_thread_data *gpumgmt_thread = (opencl_gpumgmt_thread_data *)user_data;
opencl_device_info *device = gpumgmt_thread->device;
cl_int error;
size_t total_work_items = threads * KERNEL_WORKGROUP_SIZE * profile->thrCost;
size_t local_work_items = KERNEL_WORKGROUP_SIZE * profile->thrCost;
size_t shared_mem = profile->thrCost * ARGON2_QWORDS_IN_BLOCK;
clSetKernelArg(device->kernel_fill_blocks, 6, sizeof(device->arguments.seed_memory[gpumgmt_thread->thread_id]), &device->arguments.seed_memory[gpumgmt_thread->thread_id]);
clSetKernelArg(device->kernel_fill_blocks, 7, sizeof(device->arguments.out_memory[gpumgmt_thread->thread_id]), &device->arguments.out_memory[gpumgmt_thread->thread_id]);
clSetKernelArg(device->kernel_fill_blocks, 16, sizeof(cl_ulong) * shared_mem, NULL);
error=clEnqueueNDRangeKernel(device->queue, device->kernel_fill_blocks, 1, NULL, &total_work_items, &local_work_items, 0, NULL, NULL);
if(error != CL_SUCCESS) {
device->error = error;
device->error_message = "Error running the kernel.";
device->device_lock.unlock();
return NULL;
}
return (void *)1;
}
bool opencl_kernel_posthasher(void *memory, int threads, Argon2Profile *profile, void *user_data) {
opencl_gpumgmt_thread_data *gpumgmt_thread = (opencl_gpumgmt_thread_data *)user_data;
opencl_device_info *device = gpumgmt_thread->device;
cl_int error;
size_t total_work_items = threads * 4;
size_t local_work_items = 4;
clSetKernelArg(device->kernel_posthash, 0, sizeof(device->arguments.hash_memory[gpumgmt_thread->thread_id]), &device->arguments.hash_memory[gpumgmt_thread->thread_id]);
clSetKernelArg(device->kernel_posthash, 1, sizeof(device->arguments.out_memory[gpumgmt_thread->thread_id]), &device->arguments.out_memory[gpumgmt_thread->thread_id]);
clSetKernelArg(device->kernel_posthash, 2, sizeof(device->arguments.preseed_memory[gpumgmt_thread->thread_id]), &device->arguments.preseed_memory[gpumgmt_thread->thread_id]);
clSetKernelArg(device->kernel_posthash, 3, sizeof(cl_ulong) * 60, NULL);
error=clEnqueueNDRangeKernel(device->queue, device->kernel_posthash, 1, NULL, &total_work_items, &local_work_items, 0, NULL, NULL);
if(error != CL_SUCCESS) {
device->error = error;
device->error_message = "Error running the kernel.";
device->device_lock.unlock();
return false;
}
error = clEnqueueReadBuffer(device->queue, device->arguments.hash_memory[gpumgmt_thread->thread_id], CL_FALSE, 0, threads * (xmrig::ARGON2_HASHLEN + 4), memory, 0, NULL, NULL);
if (error != CL_SUCCESS) {
device->error = error;
device->error_message = "Error reading gpu memory.";
device->device_lock.unlock();
return false;
}
error=clFinish(device->queue);
if(error != CL_SUCCESS) {
device->error = error;
device->error_message = "Error flushing GPU queue.";
device->device_lock.unlock();
return false;
}
device->device_lock.unlock();
return true;
}
void opencl_hasher::buildThreadData() {
__thread_data = new opencl_gpumgmt_thread_data[__enabledDevices.size() * 2];
for(int i=0; i < __enabledDevices.size(); i++) {
opencl_device_info *device = __enabledDevices[i];
for(int threadId = 0; threadId < 2; threadId ++) {
opencl_gpumgmt_thread_data &thread_data = __thread_data[i * 2 + threadId];
thread_data.device = device;
thread_data.thread_id = threadId;
thread_data.argon2 = new Argon2(opencl_kernel_prehasher, opencl_kernel_filler, opencl_kernel_posthasher,
nullptr, &thread_data);
thread_data.argon2->setThreads(device->profile_info.threads);
thread_data.hashData.outSize = xmrig::ARGON2_HASHLEN + 4;
}
}
}
int opencl_hasher::compute(int threadIdx, uint8_t *input, size_t size, uint8_t *output) {
opencl_gpumgmt_thread_data &threadData = __thread_data[threadIdx];
threadData.hashData.input = input;
threadData.hashData.inSize = size;
threadData.hashData.output = output;
int hashCount = threadData.argon2->generateHashes(*m_profile, threadData.hashData);
if(threadData.device->error != CL_SUCCESS) {
LOG("Error running kernel: (" + to_string(threadData.device->error) + ")" + threadData.device->error_message);
return 0;
}
uint32_t *nonce = ((uint32_t *)(((uint8_t*)threadData.hashData.input) + 39));
(*nonce) += threadData.device->profile_info.threads;
return hashCount;
}
void opencl_hasher::cleanup() {
vector<cl_platform_id> platforms;
for(vector<opencl_device_info *>::iterator it=__devices.begin(); it != __devices.end(); it++) {
if ((*it)->profile_info.threads != 0) {
clReleaseMemObject((*it)->arguments.memory_chunk_0);
clReleaseMemObject((*it)->arguments.memory_chunk_1);
clReleaseMemObject((*it)->arguments.memory_chunk_2);
clReleaseMemObject((*it)->arguments.memory_chunk_3);
clReleaseMemObject((*it)->arguments.memory_chunk_4);
clReleaseMemObject((*it)->arguments.memory_chunk_5);
clReleaseMemObject((*it)->arguments.refs);
clReleaseMemObject((*it)->arguments.segments);
clReleaseMemObject((*it)->arguments.preseed_memory[0]);
clReleaseMemObject((*it)->arguments.preseed_memory[1]);
clReleaseMemObject((*it)->arguments.seed_memory[0]);
clReleaseMemObject((*it)->arguments.seed_memory[1]);
clReleaseMemObject((*it)->arguments.out_memory[0]);
clReleaseMemObject((*it)->arguments.out_memory[1]);
clReleaseMemObject((*it)->arguments.hash_memory[0]);
clReleaseMemObject((*it)->arguments.hash_memory[1]);
clReleaseKernel((*it)->kernel_prehash);
clReleaseKernel((*it)->kernel_fill_blocks);
clReleaseKernel((*it)->kernel_posthash);
clReleaseProgram((*it)->program);
clReleaseCommandQueue((*it)->queue);
clReleaseContext((*it)->context);
}
clReleaseDevice((*it)->device);
delete (*it);
}
__devices.clear();
}
size_t opencl_hasher::parallelism(int workerIdx) {
// there are 2 computing threads per device, so divide by 2 to get device index
workerIdx /= 2;
if(workerIdx < 0 || workerIdx > __enabledDevices.size())
return 0;
return __enabledDevices[workerIdx]->profile_info.threads;
}
size_t opencl_hasher::deviceCount() {
return __enabledDevices.size();
}
REGISTER_HASHER(opencl_hasher);
#endif // WITH_OPENCL

View file

@ -0,0 +1,110 @@
//
// Created by Haifa Bogdan Adnan on 03/08/2018.
//
#ifndef ARGON2_OPENCL_HASHER_H
#define ARGON2_OPENCL_HASHER_H
#if defined(WITH_OPENCL)
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
#if defined(__APPLE__) || defined(__MACOSX)
#include <OpenCL/opencl.h>
#else
#include <CL/opencl.h>
#endif // !__APPLE__
struct opencl_kernel_arguments {
cl_mem memory_chunk_0;
cl_mem memory_chunk_1;
cl_mem memory_chunk_2;
cl_mem memory_chunk_3;
cl_mem memory_chunk_4;
cl_mem memory_chunk_5;
cl_mem refs;
cl_mem idxs;
cl_mem segments;
cl_mem preseed_memory[2];
cl_mem seed_memory[2];
cl_mem out_memory[2];
cl_mem hash_memory[2];
};
struct argon2profile_info {
argon2profile_info() {
threads = 0;
threads_per_chunk = 0;
}
uint32_t threads;
uint32_t threads_per_chunk;
Argon2Profile *profile;
};
struct opencl_device_info {
opencl_device_info(cl_int err, const string &err_msg) {
error = err;
error_message = err_msg;
}
cl_platform_id platform;
cl_device_id device;
cl_context context;
cl_command_queue queue;
cl_program program;
cl_kernel kernel_prehash;
cl_kernel kernel_fill_blocks;
cl_kernel kernel_posthash;
int device_index;
opencl_kernel_arguments arguments;
argon2profile_info profile_info;
string device_string;
uint64_t max_mem_size;
uint64_t max_allocable_mem_size;
cl_int error;
string error_message;
mutex device_lock;
};
struct opencl_gpumgmt_thread_data {
int thread_id;
opencl_device_info *device;
Argon2 *argon2;
HashData hashData;
};
class opencl_hasher : public Hasher {
public:
opencl_hasher();
~opencl_hasher();
virtual bool initialize(xmrig::Algo algorithm, xmrig::Variant variant);
virtual bool configure(xmrig::HasherConfig &config);
virtual void cleanup();
virtual int compute(int threadIdx, uint8_t *input, size_t size, uint8_t *output);
virtual size_t parallelism(int workerIdx);
virtual size_t deviceCount();
private:
opencl_device_info *__get_device_info(cl_platform_id platform, cl_device_id device);
bool __setup_device_info(opencl_device_info *device, double intensity);
vector<opencl_device_info*> __query_opencl_devices(cl_int &error, string &error_message);
void buildThreadData();
vector<opencl_device_info*> __devices;
vector<opencl_device_info*> __enabledDevices;
opencl_gpumgmt_thread_data *__thread_data;
Argon2Profile *m_profile;
};
#endif //WITH_OPENCL
#endif //ARGON2_OPENCL_HASHER_H

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,10 @@
//
// Created by Haifa Bogdan Adnan on 06/08/2018.
//
#ifndef ARGON2_OPENCL_KERNEL_H
#define ARGON2_OPENCL_KERNEL_H
extern string OpenCLKernel;
#endif //ARGON2_OPENCL_KERNEL_H