xmrig v6.3.0 based release
This commit is contained in:
commit
a334a9a956
25 changed files with 5022 additions and 2262 deletions
|
@ -62,6 +62,7 @@ public:
|
|||
FLAG_SSSE3,
|
||||
FLAG_XOP,
|
||||
FLAG_POPCNT,
|
||||
FLAG_CAT_L3,
|
||||
FLAG_MAX
|
||||
};
|
||||
|
||||
|
@ -79,6 +80,7 @@ public:
|
|||
virtual bool hasAVX2() const = 0;
|
||||
virtual bool hasBMI2() const = 0;
|
||||
virtual bool hasOneGbPages() const = 0;
|
||||
virtual bool hasCatL3() const = 0;
|
||||
virtual const char *backend() const = 0;
|
||||
virtual const char *brand() const = 0;
|
||||
virtual CpuThreads threads(const Algorithm &algorithm, uint32_t limit) const = 0;
|
||||
|
|
|
@ -57,7 +57,7 @@
|
|||
namespace xmrig {
|
||||
|
||||
|
||||
static const std::array<const char *, ICpuInfo::FLAG_MAX> flagNames = { "aes", "avx2", "avx512f", "bmi2", "osxsave", "pdpe1gb", "sse2", "ssse3", "xop", "popcnt" };
|
||||
static const std::array<const char *, ICpuInfo::FLAG_MAX> flagNames = { "aes", "avx2", "avx512f", "bmi2", "osxsave", "pdpe1gb", "sse2", "ssse3", "xop", "popcnt", "cat_l3" };
|
||||
static const std::array<const char *, ICpuInfo::MSR_MOD_MAX> msrNames = { "none", "ryzen", "intel", "custom" };
|
||||
|
||||
|
||||
|
@ -66,7 +66,7 @@ static inline void cpuid(uint32_t level, int32_t output[4])
|
|||
memset(output, 0, sizeof(int32_t) * 4);
|
||||
|
||||
# ifdef _MSC_VER
|
||||
__cpuid(output, static_cast<int>(level));
|
||||
__cpuidex(output, static_cast<int>(level), 0);
|
||||
# else
|
||||
__cpuid_count(level, 0, output[0], output[1], output[2], output[3]);
|
||||
# endif
|
||||
|
@ -143,6 +143,7 @@ static inline bool has_sse2() { return has_feature(PROCESSOR_INFO,
|
|||
static inline bool has_ssse3() { return has_feature(PROCESSOR_INFO, ECX_Reg, 1 << 9); }
|
||||
static inline bool has_xop() { return has_feature(0x80000001, ECX_Reg, 1 << 11); }
|
||||
static inline bool has_popcnt() { return has_feature(PROCESSOR_INFO, ECX_Reg, 1 << 23); }
|
||||
static inline bool has_cat_l3() { return has_feature(EXTENDED_FEATURES, EBX_Reg, 1 << 15) && has_feature(0x10, EBX_Reg, 1 << 1); }
|
||||
|
||||
|
||||
} // namespace xmrig
|
||||
|
@ -178,6 +179,7 @@ xmrig::BasicCpuInfo::BasicCpuInfo() :
|
|||
m_flags.set(FLAG_SSSE3, has_ssse3());
|
||||
m_flags.set(FLAG_XOP, has_xop());
|
||||
m_flags.set(FLAG_POPCNT, has_popcnt());
|
||||
m_flags.set(FLAG_CAT_L3, has_cat_l3());
|
||||
|
||||
# ifdef XMRIG_FEATURE_ASM
|
||||
if (hasAES()) {
|
||||
|
|
|
@ -51,6 +51,7 @@ protected:
|
|||
inline bool hasAVX2() const override { return has(FLAG_AVX2); }
|
||||
inline bool hasBMI2() const override { return has(FLAG_BMI2); }
|
||||
inline bool hasOneGbPages() const override { return has(FLAG_PDPE1GB); }
|
||||
inline bool hasCatL3() const override { return has(FLAG_CAT_L3); }
|
||||
inline const char *brand() const override { return m_brand; }
|
||||
inline MsrMod msrMod() const override { return m_msrMod; }
|
||||
inline size_t cores() const override { return 0; }
|
||||
|
|
|
@ -71,7 +71,7 @@ inline ulong getIdx()
|
|||
|
||||
|
||||
__attribute__((reqd_work_group_size(8, 8, 1)))
|
||||
__kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ulong *states, uint Threads)
|
||||
__kernel void cn0(__global ulong *input, int inlen, __global uint4 *Scratchpad, __global ulong *states, uint Threads)
|
||||
{
|
||||
uint ExpandedKey1[40];
|
||||
__local uint AES0[256], AES1[256], AES2[256], AES3[256];
|
||||
|
@ -109,34 +109,25 @@ __kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ul
|
|||
if (get_local_id(1) == 0) {
|
||||
__local ulong* State = State_buf + get_local_id(0) * 25;
|
||||
|
||||
((__local ulong8 *)State)[0] = vload8(0, input);
|
||||
State[8] = input[8];
|
||||
State[9] = input[9];
|
||||
State[10] = input[10];
|
||||
State[11] = input[11];
|
||||
State[12] = input[12];
|
||||
State[13] = input[13];
|
||||
State[14] = input[14];
|
||||
State[15] = input[15];
|
||||
|
||||
((__local uint *)State)[9] &= 0x00FFFFFFU;
|
||||
((__local uint *)State)[9] |= (((uint)get_global_id(0)) & 0xFF) << 24;
|
||||
((__local uint *)State)[10] &= 0xFF000000U;
|
||||
/* explicit cast to `uint` is required because some OpenCL implementations (e.g. NVIDIA)
|
||||
* handle get_global_id and get_global_offset as signed long long int and add
|
||||
* 0xFFFFFFFF... to `get_global_id` if we set on host side a 32bit offset where the first bit is `1`
|
||||
* (even if it is correct casted to unsigned on the host)
|
||||
*/
|
||||
((__local uint *)State)[10] |= (((uint)get_global_id(0) >> 8));
|
||||
|
||||
// Last bit of padding
|
||||
State[16] = 0x8000000000000000UL;
|
||||
|
||||
for (int i = 17; i < 25; ++i) {
|
||||
State[i] = 0x00UL;
|
||||
#pragma unroll
|
||||
for (int i = 0; i < 25; ++i) {
|
||||
State[i] = 0;
|
||||
}
|
||||
|
||||
keccakf1600_2(State);
|
||||
// Input length must be a multiple of 136 and padded on the host side
|
||||
for (int i = 0; inlen > 0; i += 17, inlen -= 136) {
|
||||
#pragma unroll
|
||||
for (int j = 0; j < 17; ++j) {
|
||||
State[j] ^= input[i + j];
|
||||
}
|
||||
if (i == 0) {
|
||||
((__local uint *)State)[9] &= 0x00FFFFFFU;
|
||||
((__local uint *)State)[9] |= (((uint)get_global_id(0)) & 0xFF) << 24;
|
||||
((__local uint *)State)[10] &= 0xFF000000U;
|
||||
((__local uint *)State)[10] |= (((uint)get_global_id(0) >> 8));
|
||||
}
|
||||
keccakf1600_2(State);
|
||||
}
|
||||
|
||||
#pragma unroll 1
|
||||
for (int i = 0; i < 25; ++i) {
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -38,10 +38,11 @@ void xmrig::Cn0Kernel::enqueue(cl_command_queue queue, uint32_t nonce, size_t th
|
|||
|
||||
|
||||
// __kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ulong *states, uint Threads)
|
||||
void xmrig::Cn0Kernel::setArgs(cl_mem input, cl_mem scratchpads, cl_mem states, uint32_t threads)
|
||||
void xmrig::Cn0Kernel::setArgs(cl_mem input, int inlen, cl_mem scratchpads, cl_mem states, uint32_t threads)
|
||||
{
|
||||
setArg(0, sizeof(cl_mem), &input);
|
||||
setArg(1, sizeof(cl_mem), &scratchpads);
|
||||
setArg(2, sizeof(cl_mem), &states);
|
||||
setArg(3, sizeof(uint32_t), &threads);
|
||||
setArg(1, sizeof(int), &inlen);
|
||||
setArg(2, sizeof(cl_mem), &scratchpads);
|
||||
setArg(3, sizeof(cl_mem), &states);
|
||||
setArg(4, sizeof(uint32_t), &threads);
|
||||
}
|
||||
|
|
|
@ -38,7 +38,7 @@ public:
|
|||
inline Cn0Kernel(cl_program program) : OclKernel(program, "cn0") {}
|
||||
|
||||
void enqueue(cl_command_queue queue, uint32_t nonce, size_t threads);
|
||||
void setArgs(cl_mem input, cl_mem scratchpads, cl_mem states, uint32_t threads);
|
||||
void setArgs(cl_mem input, int inlen, cl_mem scratchpads, cl_mem states, uint32_t threads);
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -122,10 +122,16 @@ void xmrig::OclCnRunner::set(const Job &job, uint8_t *blob)
|
|||
throw std::length_error("job size too big");
|
||||
}
|
||||
|
||||
blob[job.size()] = 0x01;
|
||||
memset(blob + job.size() + 1, 0, Job::kMaxBlobSize - job.size() - 1);
|
||||
const int inlen = static_cast<int>(job.size() + 136 - (job.size() % 136));
|
||||
|
||||
enqueueWriteBuffer(m_input, CL_TRUE, 0, Job::kMaxBlobSize, blob);
|
||||
blob[job.size()] = 0x01;
|
||||
memset(blob + job.size() + 1, 0, inlen - job.size() - 1);
|
||||
|
||||
blob[inlen - 1] |= 0x80;
|
||||
|
||||
enqueueWriteBuffer(m_input, CL_TRUE, 0, inlen, blob);
|
||||
|
||||
m_cn0->setArg(1, sizeof(int), &inlen);
|
||||
|
||||
if (m_algorithm == Algorithm::CN_R && m_height != job.height()) {
|
||||
delete m_cn1;
|
||||
|
@ -152,7 +158,7 @@ void xmrig::OclCnRunner::build()
|
|||
OclBaseRunner::build();
|
||||
|
||||
m_cn0 = new Cn0Kernel(m_program);
|
||||
m_cn0->setArgs(m_input, m_scratchpads, m_states, m_intensity);
|
||||
m_cn0->setArgs(m_input, 0, m_scratchpads, m_states, m_intensity);
|
||||
|
||||
m_cn2 = new Cn2Kernel(m_program);
|
||||
m_cn2->setArgs(m_scratchpads, m_states, m_branches, m_intensity);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue