diff --git a/CMakeLists.txt b/CMakeLists.txt
index b779b74d..1becac5c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -7,6 +7,7 @@ option(WITH_SUMO "CryptoNight-Heavy support" ON)
option(WITH_HTTPD "HTTP REST API" ON)
option(WITH_DEBUG_LOG "Enable debug log output" OFF)
option(WITH_TLS "Enable OpenSSL support" ON)
+option(WITH_ASM "Enable ASM PoW implementations" ON)
option(BUILD_STATIC "Build static binary" OFF)
include (CheckIncludeFile)
@@ -20,6 +21,7 @@ set(HEADERS
src/common/config/ConfigLoader.h
src/common/config/ConfigWatcher.h
src/common/Console.h
+ src/common/cpu/Cpu.h
src/common/crypto/Algorithm.h
src/common/crypto/keccak.h
src/common/interfaces/IClientListener.h
@@ -27,6 +29,7 @@ set(HEADERS
src/common/interfaces/IConfigCreator.h
src/common/interfaces/IConsoleListener.h
src/common/interfaces/IControllerListener.h
+ src/common/interfaces/ICpuInfo.h
src/common/interfaces/ILogBackend.h
src/common/interfaces/IStrategy.h
src/common/interfaces/IStrategyListener.h
@@ -49,7 +52,6 @@ set(HEADERS
src/common/xmrig.h
src/core/ConfigLoader_platform.h
src/core/Controller.h
- src/Cpu.h
src/interfaces/IJobResultListener.h
src/interfaces/IThread.h
src/interfaces/IWorker.h
@@ -135,7 +137,6 @@ if (WIN32)
res/app.rc
src/App_win.cpp
src/common/Platform_win.cpp
- src/Cpu_win.cpp
src/Mem_win.cpp
)
@@ -145,14 +146,12 @@ elseif (APPLE)
set(SOURCES_OS
src/App_unix.cpp
src/common/Platform_mac.cpp
- src/Cpu_mac.cpp
src/Mem_unix.cpp
)
else()
set(SOURCES_OS
src/App_unix.cpp
src/common/Platform_unix.cpp
- src/Cpu_unix.cpp
src/Mem_unix.cpp
)
@@ -184,18 +183,20 @@ if (WITH_LIBCPUID)
include_directories(src/3rdparty/libcpuid)
set(CPUID_LIB cpuid)
- set(SOURCES_CPUID src/Cpu.cpp)
+ set(SOURCES_CPUID src/core/cpu/AdvancedCpuInfo.h src/core/cpu/AdvancedCpuInfo.cpp src/core/cpu/Cpu.cpp)
else()
add_definitions(/DXMRIG_NO_LIBCPUID)
+ set(SOURCES_CPUID src/common/cpu/BasicCpuInfo.h src/common/cpu/Cpu.cpp)
if (XMRIG_ARM)
- set(SOURCES_CPUID src/Cpu_arm.cpp)
+ set(SOURCES_CPUID ${SOURCES_CPUID} src/common/cpu/BasicCpuInfo_arm.cpp)
else()
- set(SOURCES_CPUID src/Cpu_stub.cpp)
+ set(SOURCES_CPUID ${SOURCES_CPUID} src/common/cpu/BasicCpuInfo.cpp)
endif()
endif()
include(cmake/OpenSSL.cmake)
+include(cmake/asm.cmake)
CHECK_INCLUDE_FILE (syslog.h HAVE_SYSLOG_H)
if (HAVE_SYSLOG_H)
@@ -254,5 +255,5 @@ if (WITH_DEBUG_LOG)
add_definitions(/DAPP_DEBUG)
endif()
-add_executable(${PROJECT_NAME} ${HEADERS} ${SOURCES} ${SOURCES_OS} ${SOURCES_CPUID} ${HEADERS_CRYPTO} ${SOURCES_CRYPTO} ${SOURCES_SYSLOG} ${HTTPD_SOURCES} ${TLS_SOURCES})
-target_link_libraries(${PROJECT_NAME} ${OPENSSL_LIBRARIES} ${UV_LIBRARIES} ${MHD_LIBRARY} ${EXTRA_LIBS} ${CPUID_LIB})
+add_executable(${PROJECT_NAME} ${HEADERS} ${SOURCES} ${SOURCES_OS} ${SOURCES_CPUID} ${HEADERS_CRYPTO} ${SOURCES_CRYPTO} ${SOURCES_SYSLOG} ${HTTPD_SOURCES} ${TLS_SOURCES} ${XMRIG_ASM_SOURCES})
+target_link_libraries(${PROJECT_NAME} ${XMRIG_ASM_LIBRARY} ${OPENSSL_LIBRARIES} ${UV_LIBRARIES} ${MHD_LIBRARY} ${EXTRA_LIBS} ${CPUID_LIB})
diff --git a/cmake/asm.cmake b/cmake/asm.cmake
new file mode 100644
index 00000000..cb50f0d9
--- /dev/null
+++ b/cmake/asm.cmake
@@ -0,0 +1,27 @@
+if (WITH_ASM AND NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8)
+ set(XMRIG_ASM_LIBRARY "xmrig-asm")
+
+ if (CMAKE_C_COMPILER_ID MATCHES MSVC)
+ enable_language(ASM_MASM)
+ set(XMRIG_ASM_FILE "src/crypto/asm/cnv2_main_loop.asm")
+ set_property(SOURCE ${XMRIG_ASM_FILE} PROPERTY ASM_MASM)
+ else()
+ enable_language(ASM)
+
+ if (WIN32 AND CMAKE_C_COMPILER_ID MATCHES GNU)
+ set(XMRIG_ASM_FILE "src/crypto/asm/cnv2_main_loop_win.S")
+ else()
+ set(XMRIG_ASM_FILE "src/crypto/asm/cnv2_main_loop.S")
+ endif()
+
+ set_property(SOURCE ${XMRIG_ASM_FILE} PROPERTY C)
+ endif()
+
+ add_library(${XMRIG_ASM_LIBRARY} STATIC ${XMRIG_ASM_FILE})
+ set(XMRIG_ASM_SOURCES src/crypto/Asm.h src/crypto/Asm.cpp)
+ set_property(TARGET ${XMRIG_ASM_LIBRARY} PROPERTY LINKER_LANGUAGE C)
+else()
+ set(XMRIG_ASM_SOURCES "")
+ set(XMRIG_ASM_LIBRARY "")
+ add_definitions(/DXMRIG_NO_ASM)
+endif()
diff --git a/src/3rdparty/libcpuid/asm-bits.c b/src/3rdparty/libcpuid/asm-bits.c
index b8e32284..bfabd404 100644
--- a/src/3rdparty/libcpuid/asm-bits.c
+++ b/src/3rdparty/libcpuid/asm-bits.c
@@ -1,825 +1,836 @@
-/*
- * Copyright 2008 Veselin Georgiev,
- * anrieffNOSPAM @ mgail_DOT.com (convert to gmail)
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "libcpuid.h"
-#include "asm-bits.h"
-
-int cpuid_exists_by_eflags(void)
-{
-#if defined(PLATFORM_X64)
- return 1; /* CPUID is always present on the x86_64 */
-#elif defined(PLATFORM_X86)
-# if defined(COMPILER_GCC)
- int result;
- __asm __volatile(
- " pushfl\n"
- " pop %%eax\n"
- " mov %%eax, %%ecx\n"
- " xor $0x200000, %%eax\n"
- " push %%eax\n"
- " popfl\n"
- " pushfl\n"
- " pop %%eax\n"
- " xor %%ecx, %%eax\n"
- " mov %%eax, %0\n"
- " push %%ecx\n"
- " popfl\n"
- : "=m"(result)
- : :"eax", "ecx", "memory");
- return (result != 0);
-# elif defined(COMPILER_MICROSOFT)
- int result;
- __asm {
- pushfd
- pop eax
- mov ecx, eax
- xor eax, 0x200000
- push eax
- popfd
- pushfd
- pop eax
- xor eax, ecx
- mov result, eax
- push ecx
- popfd
- };
- return (result != 0);
-# else
- return 0;
-# endif /* COMPILER_MICROSOFT */
-#else
- return 0;
-#endif /* PLATFORM_X86 */
-}
-
-#ifdef INLINE_ASM_SUPPORTED
-/*
- * with MSVC/AMD64, the exec_cpuid() and cpu_rdtsc() functions
- * are implemented in separate .asm files. Otherwise, use inline assembly
- */
-void exec_cpuid(uint32_t *regs)
-{
-#ifdef COMPILER_GCC
-# ifdef PLATFORM_X64
- __asm __volatile(
- " mov %0, %%rdi\n"
-
- " push %%rbx\n"
- " push %%rcx\n"
- " push %%rdx\n"
-
- " mov (%%rdi), %%eax\n"
- " mov 4(%%rdi), %%ebx\n"
- " mov 8(%%rdi), %%ecx\n"
- " mov 12(%%rdi), %%edx\n"
-
- " cpuid\n"
-
- " movl %%eax, (%%rdi)\n"
- " movl %%ebx, 4(%%rdi)\n"
- " movl %%ecx, 8(%%rdi)\n"
- " movl %%edx, 12(%%rdi)\n"
- " pop %%rdx\n"
- " pop %%rcx\n"
- " pop %%rbx\n"
- :
- :"m"(regs)
- :"memory", "eax", "rdi"
- );
-# else
- __asm __volatile(
- " mov %0, %%edi\n"
-
- " push %%ebx\n"
- " push %%ecx\n"
- " push %%edx\n"
-
- " mov (%%edi), %%eax\n"
- " mov 4(%%edi), %%ebx\n"
- " mov 8(%%edi), %%ecx\n"
- " mov 12(%%edi), %%edx\n"
-
- " cpuid\n"
-
- " mov %%eax, (%%edi)\n"
- " mov %%ebx, 4(%%edi)\n"
- " mov %%ecx, 8(%%edi)\n"
- " mov %%edx, 12(%%edi)\n"
- " pop %%edx\n"
- " pop %%ecx\n"
- " pop %%ebx\n"
- :
- :"m"(regs)
- :"memory", "eax", "edi"
- );
-# endif /* COMPILER_GCC */
-#else
-# ifdef COMPILER_MICROSOFT
- __asm {
- push ebx
- push ecx
- push edx
- push edi
- mov edi, regs
-
- mov eax, [edi]
- mov ebx, [edi+4]
- mov ecx, [edi+8]
- mov edx, [edi+12]
-
- cpuid
-
- mov [edi], eax
- mov [edi+4], ebx
- mov [edi+8], ecx
- mov [edi+12], edx
-
- pop edi
- pop edx
- pop ecx
- pop ebx
- }
-# else
-# error "Unsupported compiler"
-# endif /* COMPILER_MICROSOFT */
-#endif
-}
-#endif /* INLINE_ASSEMBLY_SUPPORTED */
-
-#ifdef INLINE_ASM_SUPPORTED
-void cpu_rdtsc(uint64_t* result)
-{
- uint32_t low_part, hi_part;
-#ifdef COMPILER_GCC
- __asm __volatile (
- " rdtsc\n"
- " mov %%eax, %0\n"
- " mov %%edx, %1\n"
- :"=m"(low_part), "=m"(hi_part)::"memory", "eax", "edx"
- );
-#else
-# ifdef COMPILER_MICROSOFT
- __asm {
- rdtsc
- mov low_part, eax
- mov hi_part, edx
- };
-# else
-# error "Unsupported compiler"
-# endif /* COMPILER_MICROSOFT */
-#endif /* COMPILER_GCC */
- *result = (uint64_t)low_part + (((uint64_t) hi_part) << 32);
-}
-#endif /* INLINE_ASM_SUPPORTED */
-
-#ifdef INLINE_ASM_SUPPORTED
-void busy_sse_loop(int cycles)
-{
-#ifdef COMPILER_GCC
-#ifndef __APPLE__
-# define XALIGN ".balign 16\n"
-#else
-# define XALIGN ".align 4\n"
-#endif
- __asm __volatile (
- " xorps %%xmm0, %%xmm0\n"
- " xorps %%xmm1, %%xmm1\n"
- " xorps %%xmm2, %%xmm2\n"
- " xorps %%xmm3, %%xmm3\n"
- " xorps %%xmm4, %%xmm4\n"
- " xorps %%xmm5, %%xmm5\n"
- " xorps %%xmm6, %%xmm6\n"
- " xorps %%xmm7, %%xmm7\n"
- XALIGN
- /* ".bsLoop:\n" */
- "1:\n"
- // 0:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
- // 1:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
- // 2:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
- // 3:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
- // 4:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
- // 5:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
- // 6:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
- // 7:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
- // 8:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
- // 9:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
- //10:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
- //11:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
- //12:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
- //13:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
- //14:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
- //15:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
- //16:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
- //17:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
- //18:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
- //19:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
- //20:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
- //21:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
- //22:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
- //23:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
- //24:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
- //25:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
- //26:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
- //27:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
- //28:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
- //29:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
- //30:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
- //31:
- " addps %%xmm1, %%xmm0\n"
- " addps %%xmm2, %%xmm1\n"
- " addps %%xmm3, %%xmm2\n"
- " addps %%xmm4, %%xmm3\n"
- " addps %%xmm5, %%xmm4\n"
- " addps %%xmm6, %%xmm5\n"
- " addps %%xmm7, %%xmm6\n"
- " addps %%xmm0, %%xmm7\n"
-
- " dec %%eax\n"
- /* "jnz .bsLoop\n" */
- " jnz 1b\n"
- ::"a"(cycles)
- );
-#else
-# ifdef COMPILER_MICROSOFT
- __asm {
- mov eax, cycles
- xorps xmm0, xmm0
- xorps xmm1, xmm1
- xorps xmm2, xmm2
- xorps xmm3, xmm3
- xorps xmm4, xmm4
- xorps xmm5, xmm5
- xorps xmm6, xmm6
- xorps xmm7, xmm7
- //--
- align 16
-bsLoop:
- // 0:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- // 1:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- // 2:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- // 3:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- // 4:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- // 5:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- // 6:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- // 7:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- // 8:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- // 9:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- // 10:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- // 11:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- // 12:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- // 13:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- // 14:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- // 15:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- // 16:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- // 17:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- // 18:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- // 19:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- // 20:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- // 21:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- // 22:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- // 23:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- // 24:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- // 25:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- // 26:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- // 27:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- // 28:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- // 29:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- // 30:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- // 31:
- addps xmm0, xmm1
- addps xmm1, xmm2
- addps xmm2, xmm3
- addps xmm3, xmm4
- addps xmm4, xmm5
- addps xmm5, xmm6
- addps xmm6, xmm7
- addps xmm7, xmm0
- //----------------------
- dec eax
- jnz bsLoop
- }
-# else
-# error "Unsupported compiler"
-# endif /* COMPILER_MICROSOFT */
-#endif /* COMPILER_GCC */
-}
-#endif /* INLINE_ASSEMBLY_SUPPORTED */
+/*
+ * Copyright 2008 Veselin Georgiev,
+ * anrieffNOSPAM @ mgail_DOT.com (convert to gmail)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "libcpuid.h"
+#include "asm-bits.h"
+
+int cpuid_exists_by_eflags(void)
+{
+#if defined(PLATFORM_X64)
+ return 1; /* CPUID is always present on the x86_64 */
+#elif defined(PLATFORM_X86)
+# if defined(COMPILER_GCC) || defined(COMPILER_CLANG)
+ int result;
+ __asm __volatile(
+ " pushfl\n"
+ " pop %%eax\n"
+ " mov %%eax, %%ecx\n"
+ " xor $0x200000, %%eax\n"
+ " push %%eax\n"
+ " popfl\n"
+ " pushfl\n"
+ " pop %%eax\n"
+ " xor %%ecx, %%eax\n"
+ " mov %%eax, %0\n"
+ " push %%ecx\n"
+ " popfl\n"
+ : "=m"(result)
+ : :"eax", "ecx", "memory");
+ return (result != 0);
+# elif defined(COMPILER_MICROSOFT)
+ int result;
+ __asm {
+ pushfd
+ pop eax
+ mov ecx, eax
+ xor eax, 0x200000
+ push eax
+ popfd
+ pushfd
+ pop eax
+ xor eax, ecx
+ mov result, eax
+ push ecx
+ popfd
+ };
+ return (result != 0);
+# else
+ return 0;
+# endif /* COMPILER_MICROSOFT */
+#elif defined(PLATFORM_ARM)
+ return 0;
+#else
+ return 0;
+#endif /* PLATFORM_X86 */
+}
+
+#ifdef INLINE_ASM_SUPPORTED
+/*
+ * with MSVC/AMD64, the exec_cpuid() and cpu_rdtsc() functions
+ * are implemented in separate .asm files. Otherwise, use inline assembly
+ */
+void exec_cpuid(uint32_t *regs)
+{
+# if defined(COMPILER_GCC) || defined(COMPILER_CLANG)
+# ifdef PLATFORM_X64
+ __asm __volatile(
+ " mov %0, %%rdi\n"
+
+ " push %%rbx\n"
+ " push %%rcx\n"
+ " push %%rdx\n"
+
+ " mov (%%rdi), %%eax\n"
+ " mov 4(%%rdi), %%ebx\n"
+ " mov 8(%%rdi), %%ecx\n"
+ " mov 12(%%rdi), %%edx\n"
+
+ " cpuid\n"
+
+ " movl %%eax, (%%rdi)\n"
+ " movl %%ebx, 4(%%rdi)\n"
+ " movl %%ecx, 8(%%rdi)\n"
+ " movl %%edx, 12(%%rdi)\n"
+ " pop %%rdx\n"
+ " pop %%rcx\n"
+ " pop %%rbx\n"
+ :
+ :"m"(regs)
+ :"memory", "eax", "rdi"
+ );
+# elif defined(PLATFORM_X86)
+ __asm __volatile(
+ " mov %0, %%edi\n"
+
+ " push %%ebx\n"
+ " push %%ecx\n"
+ " push %%edx\n"
+
+ " mov (%%edi), %%eax\n"
+ " mov 4(%%edi), %%ebx\n"
+ " mov 8(%%edi), %%ecx\n"
+ " mov 12(%%edi), %%edx\n"
+
+ " cpuid\n"
+
+ " mov %%eax, (%%edi)\n"
+ " mov %%ebx, 4(%%edi)\n"
+ " mov %%ecx, 8(%%edi)\n"
+ " mov %%edx, 12(%%edi)\n"
+ " pop %%edx\n"
+ " pop %%ecx\n"
+ " pop %%ebx\n"
+ :
+ :"m"(regs)
+ :"memory", "eax", "edi"
+ );
+# elif defined(PLATFORM_ARM)
+# endif /* COMPILER_GCC */
+#else
+# ifdef COMPILER_MICROSOFT
+ __asm {
+ push ebx
+ push ecx
+ push edx
+ push edi
+ mov edi, regs
+
+ mov eax, [edi]
+ mov ebx, [edi+4]
+ mov ecx, [edi+8]
+ mov edx, [edi+12]
+
+ cpuid
+
+ mov [edi], eax
+ mov [edi+4], ebx
+ mov [edi+8], ecx
+ mov [edi+12], edx
+
+ pop edi
+ pop edx
+ pop ecx
+ pop ebx
+ }
+# else
+# error "Unsupported compiler"
+# endif /* COMPILER_MICROSOFT */
+#endif
+}
+#endif /* INLINE_ASSEMBLY_SUPPORTED */
+
+#ifdef INLINE_ASM_SUPPORTED
+void cpu_rdtsc(uint64_t* result)
+{
+ uint32_t low_part, hi_part;
+#if defined(COMPILER_GCC) || defined(COMPILER_CLANG)
+#ifdef PLATFORM_ARM
+ low_part = 0;
+ hi_part = 0;
+#else
+ __asm __volatile (
+ " rdtsc\n"
+ " mov %%eax, %0\n"
+ " mov %%edx, %1\n"
+ :"=m"(low_part), "=m"(hi_part)::"memory", "eax", "edx"
+ );
+#endif
+#else
+# ifdef COMPILER_MICROSOFT
+ __asm {
+ rdtsc
+ mov low_part, eax
+ mov hi_part, edx
+ };
+# else
+# error "Unsupported compiler"
+# endif /* COMPILER_MICROSOFT */
+#endif /* COMPILER_GCC */
+ *result = (uint64_t)low_part + (((uint64_t) hi_part) << 32);
+}
+#endif /* INLINE_ASM_SUPPORTED */
+
+#ifdef INLINE_ASM_SUPPORTED
+void busy_sse_loop(int cycles)
+{
+# if defined(COMPILER_GCC) || defined(COMPILER_CLANG)
+#ifndef __APPLE__
+# define XALIGN ".balign 16\n"
+#else
+# define XALIGN ".align 4\n"
+#endif
+#ifdef PLATFORM_ARM
+#else
+ __asm __volatile (
+ " xorps %%xmm0, %%xmm0\n"
+ " xorps %%xmm1, %%xmm1\n"
+ " xorps %%xmm2, %%xmm2\n"
+ " xorps %%xmm3, %%xmm3\n"
+ " xorps %%xmm4, %%xmm4\n"
+ " xorps %%xmm5, %%xmm5\n"
+ " xorps %%xmm6, %%xmm6\n"
+ " xorps %%xmm7, %%xmm7\n"
+ XALIGN
+ /* ".bsLoop:\n" */
+ "1:\n"
+ // 0:
+ " addps %%xmm1, %%xmm0\n"
+ " addps %%xmm2, %%xmm1\n"
+ " addps %%xmm3, %%xmm2\n"
+ " addps %%xmm4, %%xmm3\n"
+ " addps %%xmm5, %%xmm4\n"
+ " addps %%xmm6, %%xmm5\n"
+ " addps %%xmm7, %%xmm6\n"
+ " addps %%xmm0, %%xmm7\n"
+ // 1:
+ " addps %%xmm1, %%xmm0\n"
+ " addps %%xmm2, %%xmm1\n"
+ " addps %%xmm3, %%xmm2\n"
+ " addps %%xmm4, %%xmm3\n"
+ " addps %%xmm5, %%xmm4\n"
+ " addps %%xmm6, %%xmm5\n"
+ " addps %%xmm7, %%xmm6\n"
+ " addps %%xmm0, %%xmm7\n"
+ // 2:
+ " addps %%xmm1, %%xmm0\n"
+ " addps %%xmm2, %%xmm1\n"
+ " addps %%xmm3, %%xmm2\n"
+ " addps %%xmm4, %%xmm3\n"
+ " addps %%xmm5, %%xmm4\n"
+ " addps %%xmm6, %%xmm5\n"
+ " addps %%xmm7, %%xmm6\n"
+ " addps %%xmm0, %%xmm7\n"
+ // 3:
+ " addps %%xmm1, %%xmm0\n"
+ " addps %%xmm2, %%xmm1\n"
+ " addps %%xmm3, %%xmm2\n"
+ " addps %%xmm4, %%xmm3\n"
+ " addps %%xmm5, %%xmm4\n"
+ " addps %%xmm6, %%xmm5\n"
+ " addps %%xmm7, %%xmm6\n"
+ " addps %%xmm0, %%xmm7\n"
+ // 4:
+ " addps %%xmm1, %%xmm0\n"
+ " addps %%xmm2, %%xmm1\n"
+ " addps %%xmm3, %%xmm2\n"
+ " addps %%xmm4, %%xmm3\n"
+ " addps %%xmm5, %%xmm4\n"
+ " addps %%xmm6, %%xmm5\n"
+ " addps %%xmm7, %%xmm6\n"
+ " addps %%xmm0, %%xmm7\n"
+ // 5:
+ " addps %%xmm1, %%xmm0\n"
+ " addps %%xmm2, %%xmm1\n"
+ " addps %%xmm3, %%xmm2\n"
+ " addps %%xmm4, %%xmm3\n"
+ " addps %%xmm5, %%xmm4\n"
+ " addps %%xmm6, %%xmm5\n"
+ " addps %%xmm7, %%xmm6\n"
+ " addps %%xmm0, %%xmm7\n"
+ // 6:
+ " addps %%xmm1, %%xmm0\n"
+ " addps %%xmm2, %%xmm1\n"
+ " addps %%xmm3, %%xmm2\n"
+ " addps %%xmm4, %%xmm3\n"
+ " addps %%xmm5, %%xmm4\n"
+ " addps %%xmm6, %%xmm5\n"
+ " addps %%xmm7, %%xmm6\n"
+ " addps %%xmm0, %%xmm7\n"
+ // 7:
+ " addps %%xmm1, %%xmm0\n"
+ " addps %%xmm2, %%xmm1\n"
+ " addps %%xmm3, %%xmm2\n"
+ " addps %%xmm4, %%xmm3\n"
+ " addps %%xmm5, %%xmm4\n"
+ " addps %%xmm6, %%xmm5\n"
+ " addps %%xmm7, %%xmm6\n"
+ " addps %%xmm0, %%xmm7\n"
+ // 8:
+ " addps %%xmm1, %%xmm0\n"
+ " addps %%xmm2, %%xmm1\n"
+ " addps %%xmm3, %%xmm2\n"
+ " addps %%xmm4, %%xmm3\n"
+ " addps %%xmm5, %%xmm4\n"
+ " addps %%xmm6, %%xmm5\n"
+ " addps %%xmm7, %%xmm6\n"
+ " addps %%xmm0, %%xmm7\n"
+ // 9:
+ " addps %%xmm1, %%xmm0\n"
+ " addps %%xmm2, %%xmm1\n"
+ " addps %%xmm3, %%xmm2\n"
+ " addps %%xmm4, %%xmm3\n"
+ " addps %%xmm5, %%xmm4\n"
+ " addps %%xmm6, %%xmm5\n"
+ " addps %%xmm7, %%xmm6\n"
+ " addps %%xmm0, %%xmm7\n"
+ //10:
+ " addps %%xmm1, %%xmm0\n"
+ " addps %%xmm2, %%xmm1\n"
+ " addps %%xmm3, %%xmm2\n"
+ " addps %%xmm4, %%xmm3\n"
+ " addps %%xmm5, %%xmm4\n"
+ " addps %%xmm6, %%xmm5\n"
+ " addps %%xmm7, %%xmm6\n"
+ " addps %%xmm0, %%xmm7\n"
+ //11:
+ " addps %%xmm1, %%xmm0\n"
+ " addps %%xmm2, %%xmm1\n"
+ " addps %%xmm3, %%xmm2\n"
+ " addps %%xmm4, %%xmm3\n"
+ " addps %%xmm5, %%xmm4\n"
+ " addps %%xmm6, %%xmm5\n"
+ " addps %%xmm7, %%xmm6\n"
+ " addps %%xmm0, %%xmm7\n"
+ //12:
+ " addps %%xmm1, %%xmm0\n"
+ " addps %%xmm2, %%xmm1\n"
+ " addps %%xmm3, %%xmm2\n"
+ " addps %%xmm4, %%xmm3\n"
+ " addps %%xmm5, %%xmm4\n"
+ " addps %%xmm6, %%xmm5\n"
+ " addps %%xmm7, %%xmm6\n"
+ " addps %%xmm0, %%xmm7\n"
+ //13:
+ " addps %%xmm1, %%xmm0\n"
+ " addps %%xmm2, %%xmm1\n"
+ " addps %%xmm3, %%xmm2\n"
+ " addps %%xmm4, %%xmm3\n"
+ " addps %%xmm5, %%xmm4\n"
+ " addps %%xmm6, %%xmm5\n"
+ " addps %%xmm7, %%xmm6\n"
+ " addps %%xmm0, %%xmm7\n"
+ //14:
+ " addps %%xmm1, %%xmm0\n"
+ " addps %%xmm2, %%xmm1\n"
+ " addps %%xmm3, %%xmm2\n"
+ " addps %%xmm4, %%xmm3\n"
+ " addps %%xmm5, %%xmm4\n"
+ " addps %%xmm6, %%xmm5\n"
+ " addps %%xmm7, %%xmm6\n"
+ " addps %%xmm0, %%xmm7\n"
+ //15:
+ " addps %%xmm1, %%xmm0\n"
+ " addps %%xmm2, %%xmm1\n"
+ " addps %%xmm3, %%xmm2\n"
+ " addps %%xmm4, %%xmm3\n"
+ " addps %%xmm5, %%xmm4\n"
+ " addps %%xmm6, %%xmm5\n"
+ " addps %%xmm7, %%xmm6\n"
+ " addps %%xmm0, %%xmm7\n"
+ //16:
+ " addps %%xmm1, %%xmm0\n"
+ " addps %%xmm2, %%xmm1\n"
+ " addps %%xmm3, %%xmm2\n"
+ " addps %%xmm4, %%xmm3\n"
+ " addps %%xmm5, %%xmm4\n"
+ " addps %%xmm6, %%xmm5\n"
+ " addps %%xmm7, %%xmm6\n"
+ " addps %%xmm0, %%xmm7\n"
+ //17:
+ " addps %%xmm1, %%xmm0\n"
+ " addps %%xmm2, %%xmm1\n"
+ " addps %%xmm3, %%xmm2\n"
+ " addps %%xmm4, %%xmm3\n"
+ " addps %%xmm5, %%xmm4\n"
+ " addps %%xmm6, %%xmm5\n"
+ " addps %%xmm7, %%xmm6\n"
+ " addps %%xmm0, %%xmm7\n"
+ //18:
+ " addps %%xmm1, %%xmm0\n"
+ " addps %%xmm2, %%xmm1\n"
+ " addps %%xmm3, %%xmm2\n"
+ " addps %%xmm4, %%xmm3\n"
+ " addps %%xmm5, %%xmm4\n"
+ " addps %%xmm6, %%xmm5\n"
+ " addps %%xmm7, %%xmm6\n"
+ " addps %%xmm0, %%xmm7\n"
+ //19:
+ " addps %%xmm1, %%xmm0\n"
+ " addps %%xmm2, %%xmm1\n"
+ " addps %%xmm3, %%xmm2\n"
+ " addps %%xmm4, %%xmm3\n"
+ " addps %%xmm5, %%xmm4\n"
+ " addps %%xmm6, %%xmm5\n"
+ " addps %%xmm7, %%xmm6\n"
+ " addps %%xmm0, %%xmm7\n"
+ //20:
+ " addps %%xmm1, %%xmm0\n"
+ " addps %%xmm2, %%xmm1\n"
+ " addps %%xmm3, %%xmm2\n"
+ " addps %%xmm4, %%xmm3\n"
+ " addps %%xmm5, %%xmm4\n"
+ " addps %%xmm6, %%xmm5\n"
+ " addps %%xmm7, %%xmm6\n"
+ " addps %%xmm0, %%xmm7\n"
+ //21:
+ " addps %%xmm1, %%xmm0\n"
+ " addps %%xmm2, %%xmm1\n"
+ " addps %%xmm3, %%xmm2\n"
+ " addps %%xmm4, %%xmm3\n"
+ " addps %%xmm5, %%xmm4\n"
+ " addps %%xmm6, %%xmm5\n"
+ " addps %%xmm7, %%xmm6\n"
+ " addps %%xmm0, %%xmm7\n"
+ //22:
+ " addps %%xmm1, %%xmm0\n"
+ " addps %%xmm2, %%xmm1\n"
+ " addps %%xmm3, %%xmm2\n"
+ " addps %%xmm4, %%xmm3\n"
+ " addps %%xmm5, %%xmm4\n"
+ " addps %%xmm6, %%xmm5\n"
+ " addps %%xmm7, %%xmm6\n"
+ " addps %%xmm0, %%xmm7\n"
+ //23:
+ " addps %%xmm1, %%xmm0\n"
+ " addps %%xmm2, %%xmm1\n"
+ " addps %%xmm3, %%xmm2\n"
+ " addps %%xmm4, %%xmm3\n"
+ " addps %%xmm5, %%xmm4\n"
+ " addps %%xmm6, %%xmm5\n"
+ " addps %%xmm7, %%xmm6\n"
+ " addps %%xmm0, %%xmm7\n"
+ //24:
+ " addps %%xmm1, %%xmm0\n"
+ " addps %%xmm2, %%xmm1\n"
+ " addps %%xmm3, %%xmm2\n"
+ " addps %%xmm4, %%xmm3\n"
+ " addps %%xmm5, %%xmm4\n"
+ " addps %%xmm6, %%xmm5\n"
+ " addps %%xmm7, %%xmm6\n"
+ " addps %%xmm0, %%xmm7\n"
+ //25:
+ " addps %%xmm1, %%xmm0\n"
+ " addps %%xmm2, %%xmm1\n"
+ " addps %%xmm3, %%xmm2\n"
+ " addps %%xmm4, %%xmm3\n"
+ " addps %%xmm5, %%xmm4\n"
+ " addps %%xmm6, %%xmm5\n"
+ " addps %%xmm7, %%xmm6\n"
+ " addps %%xmm0, %%xmm7\n"
+ //26:
+ " addps %%xmm1, %%xmm0\n"
+ " addps %%xmm2, %%xmm1\n"
+ " addps %%xmm3, %%xmm2\n"
+ " addps %%xmm4, %%xmm3\n"
+ " addps %%xmm5, %%xmm4\n"
+ " addps %%xmm6, %%xmm5\n"
+ " addps %%xmm7, %%xmm6\n"
+ " addps %%xmm0, %%xmm7\n"
+ //27:
+ " addps %%xmm1, %%xmm0\n"
+ " addps %%xmm2, %%xmm1\n"
+ " addps %%xmm3, %%xmm2\n"
+ " addps %%xmm4, %%xmm3\n"
+ " addps %%xmm5, %%xmm4\n"
+ " addps %%xmm6, %%xmm5\n"
+ " addps %%xmm7, %%xmm6\n"
+ " addps %%xmm0, %%xmm7\n"
+ //28:
+ " addps %%xmm1, %%xmm0\n"
+ " addps %%xmm2, %%xmm1\n"
+ " addps %%xmm3, %%xmm2\n"
+ " addps %%xmm4, %%xmm3\n"
+ " addps %%xmm5, %%xmm4\n"
+ " addps %%xmm6, %%xmm5\n"
+ " addps %%xmm7, %%xmm6\n"
+ " addps %%xmm0, %%xmm7\n"
+ //29:
+ " addps %%xmm1, %%xmm0\n"
+ " addps %%xmm2, %%xmm1\n"
+ " addps %%xmm3, %%xmm2\n"
+ " addps %%xmm4, %%xmm3\n"
+ " addps %%xmm5, %%xmm4\n"
+ " addps %%xmm6, %%xmm5\n"
+ " addps %%xmm7, %%xmm6\n"
+ " addps %%xmm0, %%xmm7\n"
+ //30:
+ " addps %%xmm1, %%xmm0\n"
+ " addps %%xmm2, %%xmm1\n"
+ " addps %%xmm3, %%xmm2\n"
+ " addps %%xmm4, %%xmm3\n"
+ " addps %%xmm5, %%xmm4\n"
+ " addps %%xmm6, %%xmm5\n"
+ " addps %%xmm7, %%xmm6\n"
+ " addps %%xmm0, %%xmm7\n"
+ //31:
+ " addps %%xmm1, %%xmm0\n"
+ " addps %%xmm2, %%xmm1\n"
+ " addps %%xmm3, %%xmm2\n"
+ " addps %%xmm4, %%xmm3\n"
+ " addps %%xmm5, %%xmm4\n"
+ " addps %%xmm6, %%xmm5\n"
+ " addps %%xmm7, %%xmm6\n"
+ " addps %%xmm0, %%xmm7\n"
+
+ " dec %%eax\n"
+ /* "jnz .bsLoop\n" */
+ " jnz 1b\n"
+ ::"a"(cycles)
+ );
+#endif
+#else
+# ifdef COMPILER_MICROSOFT
+ __asm {
+ mov eax, cycles
+ xorps xmm0, xmm0
+ xorps xmm1, xmm1
+ xorps xmm2, xmm2
+ xorps xmm3, xmm3
+ xorps xmm4, xmm4
+ xorps xmm5, xmm5
+ xorps xmm6, xmm6
+ xorps xmm7, xmm7
+ //--
+ align 16
+bsLoop:
+ // 0:
+ addps xmm0, xmm1
+ addps xmm1, xmm2
+ addps xmm2, xmm3
+ addps xmm3, xmm4
+ addps xmm4, xmm5
+ addps xmm5, xmm6
+ addps xmm6, xmm7
+ addps xmm7, xmm0
+ // 1:
+ addps xmm0, xmm1
+ addps xmm1, xmm2
+ addps xmm2, xmm3
+ addps xmm3, xmm4
+ addps xmm4, xmm5
+ addps xmm5, xmm6
+ addps xmm6, xmm7
+ addps xmm7, xmm0
+ // 2:
+ addps xmm0, xmm1
+ addps xmm1, xmm2
+ addps xmm2, xmm3
+ addps xmm3, xmm4
+ addps xmm4, xmm5
+ addps xmm5, xmm6
+ addps xmm6, xmm7
+ addps xmm7, xmm0
+ // 3:
+ addps xmm0, xmm1
+ addps xmm1, xmm2
+ addps xmm2, xmm3
+ addps xmm3, xmm4
+ addps xmm4, xmm5
+ addps xmm5, xmm6
+ addps xmm6, xmm7
+ addps xmm7, xmm0
+ // 4:
+ addps xmm0, xmm1
+ addps xmm1, xmm2
+ addps xmm2, xmm3
+ addps xmm3, xmm4
+ addps xmm4, xmm5
+ addps xmm5, xmm6
+ addps xmm6, xmm7
+ addps xmm7, xmm0
+ // 5:
+ addps xmm0, xmm1
+ addps xmm1, xmm2
+ addps xmm2, xmm3
+ addps xmm3, xmm4
+ addps xmm4, xmm5
+ addps xmm5, xmm6
+ addps xmm6, xmm7
+ addps xmm7, xmm0
+ // 6:
+ addps xmm0, xmm1
+ addps xmm1, xmm2
+ addps xmm2, xmm3
+ addps xmm3, xmm4
+ addps xmm4, xmm5
+ addps xmm5, xmm6
+ addps xmm6, xmm7
+ addps xmm7, xmm0
+ // 7:
+ addps xmm0, xmm1
+ addps xmm1, xmm2
+ addps xmm2, xmm3
+ addps xmm3, xmm4
+ addps xmm4, xmm5
+ addps xmm5, xmm6
+ addps xmm6, xmm7
+ addps xmm7, xmm0
+ // 8:
+ addps xmm0, xmm1
+ addps xmm1, xmm2
+ addps xmm2, xmm3
+ addps xmm3, xmm4
+ addps xmm4, xmm5
+ addps xmm5, xmm6
+ addps xmm6, xmm7
+ addps xmm7, xmm0
+ // 9:
+ addps xmm0, xmm1
+ addps xmm1, xmm2
+ addps xmm2, xmm3
+ addps xmm3, xmm4
+ addps xmm4, xmm5
+ addps xmm5, xmm6
+ addps xmm6, xmm7
+ addps xmm7, xmm0
+ // 10:
+ addps xmm0, xmm1
+ addps xmm1, xmm2
+ addps xmm2, xmm3
+ addps xmm3, xmm4
+ addps xmm4, xmm5
+ addps xmm5, xmm6
+ addps xmm6, xmm7
+ addps xmm7, xmm0
+ // 11:
+ addps xmm0, xmm1
+ addps xmm1, xmm2
+ addps xmm2, xmm3
+ addps xmm3, xmm4
+ addps xmm4, xmm5
+ addps xmm5, xmm6
+ addps xmm6, xmm7
+ addps xmm7, xmm0
+ // 12:
+ addps xmm0, xmm1
+ addps xmm1, xmm2
+ addps xmm2, xmm3
+ addps xmm3, xmm4
+ addps xmm4, xmm5
+ addps xmm5, xmm6
+ addps xmm6, xmm7
+ addps xmm7, xmm0
+ // 13:
+ addps xmm0, xmm1
+ addps xmm1, xmm2
+ addps xmm2, xmm3
+ addps xmm3, xmm4
+ addps xmm4, xmm5
+ addps xmm5, xmm6
+ addps xmm6, xmm7
+ addps xmm7, xmm0
+ // 14:
+ addps xmm0, xmm1
+ addps xmm1, xmm2
+ addps xmm2, xmm3
+ addps xmm3, xmm4
+ addps xmm4, xmm5
+ addps xmm5, xmm6
+ addps xmm6, xmm7
+ addps xmm7, xmm0
+ // 15:
+ addps xmm0, xmm1
+ addps xmm1, xmm2
+ addps xmm2, xmm3
+ addps xmm3, xmm4
+ addps xmm4, xmm5
+ addps xmm5, xmm6
+ addps xmm6, xmm7
+ addps xmm7, xmm0
+ // 16:
+ addps xmm0, xmm1
+ addps xmm1, xmm2
+ addps xmm2, xmm3
+ addps xmm3, xmm4
+ addps xmm4, xmm5
+ addps xmm5, xmm6
+ addps xmm6, xmm7
+ addps xmm7, xmm0
+ // 17:
+ addps xmm0, xmm1
+ addps xmm1, xmm2
+ addps xmm2, xmm3
+ addps xmm3, xmm4
+ addps xmm4, xmm5
+ addps xmm5, xmm6
+ addps xmm6, xmm7
+ addps xmm7, xmm0
+ // 18:
+ addps xmm0, xmm1
+ addps xmm1, xmm2
+ addps xmm2, xmm3
+ addps xmm3, xmm4
+ addps xmm4, xmm5
+ addps xmm5, xmm6
+ addps xmm6, xmm7
+ addps xmm7, xmm0
+ // 19:
+ addps xmm0, xmm1
+ addps xmm1, xmm2
+ addps xmm2, xmm3
+ addps xmm3, xmm4
+ addps xmm4, xmm5
+ addps xmm5, xmm6
+ addps xmm6, xmm7
+ addps xmm7, xmm0
+ // 20:
+ addps xmm0, xmm1
+ addps xmm1, xmm2
+ addps xmm2, xmm3
+ addps xmm3, xmm4
+ addps xmm4, xmm5
+ addps xmm5, xmm6
+ addps xmm6, xmm7
+ addps xmm7, xmm0
+ // 21:
+ addps xmm0, xmm1
+ addps xmm1, xmm2
+ addps xmm2, xmm3
+ addps xmm3, xmm4
+ addps xmm4, xmm5
+ addps xmm5, xmm6
+ addps xmm6, xmm7
+ addps xmm7, xmm0
+ // 22:
+ addps xmm0, xmm1
+ addps xmm1, xmm2
+ addps xmm2, xmm3
+ addps xmm3, xmm4
+ addps xmm4, xmm5
+ addps xmm5, xmm6
+ addps xmm6, xmm7
+ addps xmm7, xmm0
+ // 23:
+ addps xmm0, xmm1
+ addps xmm1, xmm2
+ addps xmm2, xmm3
+ addps xmm3, xmm4
+ addps xmm4, xmm5
+ addps xmm5, xmm6
+ addps xmm6, xmm7
+ addps xmm7, xmm0
+ // 24:
+ addps xmm0, xmm1
+ addps xmm1, xmm2
+ addps xmm2, xmm3
+ addps xmm3, xmm4
+ addps xmm4, xmm5
+ addps xmm5, xmm6
+ addps xmm6, xmm7
+ addps xmm7, xmm0
+ // 25:
+ addps xmm0, xmm1
+ addps xmm1, xmm2
+ addps xmm2, xmm3
+ addps xmm3, xmm4
+ addps xmm4, xmm5
+ addps xmm5, xmm6
+ addps xmm6, xmm7
+ addps xmm7, xmm0
+ // 26:
+ addps xmm0, xmm1
+ addps xmm1, xmm2
+ addps xmm2, xmm3
+ addps xmm3, xmm4
+ addps xmm4, xmm5
+ addps xmm5, xmm6
+ addps xmm6, xmm7
+ addps xmm7, xmm0
+ // 27:
+ addps xmm0, xmm1
+ addps xmm1, xmm2
+ addps xmm2, xmm3
+ addps xmm3, xmm4
+ addps xmm4, xmm5
+ addps xmm5, xmm6
+ addps xmm6, xmm7
+ addps xmm7, xmm0
+ // 28:
+ addps xmm0, xmm1
+ addps xmm1, xmm2
+ addps xmm2, xmm3
+ addps xmm3, xmm4
+ addps xmm4, xmm5
+ addps xmm5, xmm6
+ addps xmm6, xmm7
+ addps xmm7, xmm0
+ // 29:
+ addps xmm0, xmm1
+ addps xmm1, xmm2
+ addps xmm2, xmm3
+ addps xmm3, xmm4
+ addps xmm4, xmm5
+ addps xmm5, xmm6
+ addps xmm6, xmm7
+ addps xmm7, xmm0
+ // 30:
+ addps xmm0, xmm1
+ addps xmm1, xmm2
+ addps xmm2, xmm3
+ addps xmm3, xmm4
+ addps xmm4, xmm5
+ addps xmm5, xmm6
+ addps xmm6, xmm7
+ addps xmm7, xmm0
+ // 31:
+ addps xmm0, xmm1
+ addps xmm1, xmm2
+ addps xmm2, xmm3
+ addps xmm3, xmm4
+ addps xmm4, xmm5
+ addps xmm5, xmm6
+ addps xmm6, xmm7
+ addps xmm7, xmm0
+ //----------------------
+ dec eax
+ jnz bsLoop
+ }
+# else
+# error "Unsupported compiler"
+# endif /* COMPILER_MICROSOFT */
+#endif /* COMPILER_GCC */
+}
+#endif /* INLINE_ASSEMBLY_SUPPORTED */
\ No newline at end of file
diff --git a/src/3rdparty/libcpuid/asm-bits.h b/src/3rdparty/libcpuid/asm-bits.h
index 3a03e11c..9049e2fe 100644
--- a/src/3rdparty/libcpuid/asm-bits.h
+++ b/src/3rdparty/libcpuid/asm-bits.h
@@ -1,53 +1,71 @@
-/*
- * Copyright 2008 Veselin Georgiev,
- * anrieffNOSPAM @ mgail_DOT.com (convert to gmail)
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-#ifndef __ASM_BITS_H__
-#define __ASM_BITS_H__
-#include "libcpuid.h"
-
-/* Determine Compiler: */
-#if defined(_MSC_VER)
-# define COMPILER_MICROSOFT
-#elif defined(__GNUC__)
-# define COMPILER_GCC
-#endif
-
-/* Determine Platform */
-#if defined(__x86_64__) || defined(_M_AMD64)
-# define PLATFORM_X64
-#elif defined(__i386__) || defined(_M_IX86)
-# define PLATFORM_X86
-#endif
-
-/* Under Windows/AMD64 with MSVC, inline assembly isn't supported */
-#if (defined(COMPILER_GCC) && defined(PLATFORM_X64)) || defined(PLATFORM_X86)
-# define INLINE_ASM_SUPPORTED
-#endif
-
-int cpuid_exists_by_eflags(void);
-void exec_cpuid(uint32_t *regs);
-void busy_sse_loop(int cycles);
-
-#endif /* __ASM_BITS_H__ */
+/*
+ * Copyright 2008 Veselin Georgiev,
+ * anrieffNOSPAM @ mgail_DOT.com (convert to gmail)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef __ASM_BITS_H__
+#define __ASM_BITS_H__
+#include "libcpuid.h"
+
+/* Determine Compiler: */
+#if defined(_MSC_VER)
+#if !defined(COMPILER_MICROSOFT)
+# define COMPILER_MICROSOFT
+#endif
+#elif defined(__GNUC__)
+#if !defined(COMPILER_GCC)
+# define COMPILER_GCC
+#endif
+#elif defined(__clang__)
+#if !defined(COMPILER_CLANG)
+# define COMPILER_CLANG
+#endif
+#endif
+
+/* Determine Platform */
+#if defined(__x86_64__) || defined(_M_AMD64)
+#if !defined(PLATFORM_X64)
+# define PLATFORM_X64
+#endif
+#elif defined(__i386__) || defined(_M_IX86)
+#if !defined(PLATFORM_X86)
+# define PLATFORM_X86
+#endif
+#elif defined(__ARMEL__)
+#if !defined(PLATFORM_ARM)
+# define PLATFORM_ARM
+#endif
+#endif
+
+/* Under Windows/AMD64 with MSVC, inline assembly isn't supported */
+#if (((defined(COMPILER_GCC) || defined(COMPILER_CLANG))) && \
+ (defined(PLATFORM_X64) || defined(PLATFORM_X86) || defined(PLATFORM_ARM))) || \
+ (defined(COMPILER_MICROSOFT) && defined(PLATFORM_X86))
+# define INLINE_ASM_SUPPORTED
+#endif
+
+int cpuid_exists_by_eflags(void);
+void exec_cpuid(uint32_t *regs);
+void busy_sse_loop(int cycles);
+
+#endif /* __ASM_BITS_H__ */
diff --git a/src/3rdparty/libcpuid/cpuid_main.c b/src/3rdparty/libcpuid/cpuid_main.c
index f22c7dd6..61cb638d 100644
--- a/src/3rdparty/libcpuid/cpuid_main.c
+++ b/src/3rdparty/libcpuid/cpuid_main.c
@@ -221,42 +221,42 @@ static void load_features_common(struct cpu_raw_data_t* raw, struct cpu_id_t* da
static cpu_vendor_t cpuid_vendor_identify(const uint32_t *raw_vendor, char *vendor_str)
{
- int i;
- cpu_vendor_t vendor = VENDOR_UNKNOWN;
- const struct { cpu_vendor_t vendor; char match[16]; }
- matchtable[NUM_CPU_VENDORS] = {
- /* source: http://www.sandpile.org/ia32/cpuid.htm */
- { VENDOR_INTEL , "GenuineIntel" },
- { VENDOR_AMD , "AuthenticAMD" },
- { VENDOR_CYRIX , "CyrixInstead" },
- { VENDOR_NEXGEN , "NexGenDriven" },
- { VENDOR_TRANSMETA , "GenuineTMx86" },
- { VENDOR_UMC , "UMC UMC UMC " },
- { VENDOR_CENTAUR , "CentaurHauls" },
- { VENDOR_RISE , "RiseRiseRise" },
- { VENDOR_SIS , "SiS SiS SiS " },
- { VENDOR_NSC , "Geode by NSC" },
- };
+ int i;
+ cpu_vendor_t vendor = VENDOR_UNKNOWN;
+ const struct { cpu_vendor_t vendor; char match[16]; }
+ matchtable[NUM_CPU_VENDORS] = {
+ /* source: http://www.sandpile.org/ia32/cpuid.htm */
+ { VENDOR_INTEL , "GenuineIntel" },
+ { VENDOR_AMD , "AuthenticAMD" },
+ { VENDOR_CYRIX , "CyrixInstead" },
+ { VENDOR_NEXGEN , "NexGenDriven" },
+ { VENDOR_TRANSMETA , "GenuineTMx86" },
+ { VENDOR_UMC , "UMC UMC UMC " },
+ { VENDOR_CENTAUR , "CentaurHauls" },
+ { VENDOR_RISE , "RiseRiseRise" },
+ { VENDOR_SIS , "SiS SiS SiS " },
+ { VENDOR_NSC , "Geode by NSC" },
+ };
- memcpy(vendor_str + 0, &raw_vendor[1], 4);
- memcpy(vendor_str + 4, &raw_vendor[3], 4);
- memcpy(vendor_str + 8, &raw_vendor[2], 4);
- vendor_str[12] = 0;
+ memcpy(vendor_str + 0, &raw_vendor[1], 4);
+ memcpy(vendor_str + 4, &raw_vendor[3], 4);
+ memcpy(vendor_str + 8, &raw_vendor[2], 4);
+ vendor_str[12] = 0;
- /* Determine vendor: */
- for (i = 0; i < NUM_CPU_VENDORS; i++)
- if (!strcmp(vendor_str, matchtable[i].match)) {
- vendor = matchtable[i].vendor;
- break;
- }
- return vendor;
+ /* Determine vendor: */
+ for (i = 0; i < NUM_CPU_VENDORS; i++)
+ if (!strcmp(vendor_str, matchtable[i].match)) {
+ vendor = matchtable[i].vendor;
+ break;
+ }
+ return vendor;
}
static int cpuid_basic_identify(struct cpu_raw_data_t* raw, struct cpu_id_t* data)
{
int i, j, basic, xmodel, xfamily, ext;
char brandstr[64] = {0};
- data->vendor = cpuid_vendor_identify(raw->basic_cpuid[0], data->vendor_str);
+ data->vendor = cpuid_vendor_identify(raw->basic_cpuid[0], data->vendor_str);
if (data->vendor == VENDOR_UNKNOWN)
return set_error(ERR_CPU_UNKN);
diff --git a/src/3rdparty/libcpuid/libcpuid.h b/src/3rdparty/libcpuid/libcpuid.h
index c44990c3..847e5a4a 100644
--- a/src/3rdparty/libcpuid/libcpuid.h
+++ b/src/3rdparty/libcpuid/libcpuid.h
@@ -60,7 +60,7 @@
*/
/** @mainpage A simple libcpuid introduction
- *
+ *
* LibCPUID provides CPU identification and access to the CPUID and RDTSC
* instructions on the x86.
*
@@ -82,6 +82,7 @@
*/
/** @defgroup libcpuid LibCPUID
+ * @brief LibCPUID provides CPU identification
@{ */
/* Include some integer type specifications: */
@@ -535,23 +536,23 @@ typedef enum {
* @brief Describes common library error codes
*/
typedef enum {
- ERR_OK = 0, /*!< "No error" */
- ERR_NO_CPUID = -1, /*!< "CPUID instruction is not supported" */
- ERR_NO_RDTSC = -2, /*!< "RDTSC instruction is not supported" */
- ERR_NO_MEM = -3, /*!< "Memory allocation failed" */
- ERR_OPEN = -4, /*!< "File open operation failed" */
- ERR_BADFMT = -5, /*!< "Bad file format" */
- ERR_NOT_IMP = -6, /*!< "Not implemented" */
- ERR_CPU_UNKN = -7, /*!< "Unsupported processor" */
- ERR_NO_RDMSR = -8, /*!< "RDMSR instruction is not supported" */
- ERR_NO_DRIVER= -9, /*!< "RDMSR driver error (generic)" */
- ERR_NO_PERMS = -10, /*!< "No permissions to install RDMSR driver" */
- ERR_EXTRACT = -11, /*!< "Cannot extract RDMSR driver (read only media?)" */
- ERR_HANDLE = -12, /*!< "Bad handle" */
- ERR_INVMSR = -13, /*!< "Invalid MSR" */
- ERR_INVCNB = -14, /*!< "Invalid core number" */
- ERR_HANDLE_R = -15, /*!< "Error on handle read" */
- ERR_INVRANGE = -16, /*!< "Invalid given range" */
+ ERR_OK = 0, /*!< No error */
+ ERR_NO_CPUID = -1, /*!< CPUID instruction is not supported */
+ ERR_NO_RDTSC = -2, /*!< RDTSC instruction is not supported */
+ ERR_NO_MEM = -3, /*!< Memory allocation failed */
+ ERR_OPEN = -4, /*!< File open operation failed */
+ ERR_BADFMT = -5, /*!< Bad file format */
+ ERR_NOT_IMP = -6, /*!< Not implemented */
+ ERR_CPU_UNKN = -7, /*!< Unsupported processor */
+ ERR_NO_RDMSR = -8, /*!< RDMSR instruction is not supported */
+ ERR_NO_DRIVER= -9, /*!< RDMSR driver error (generic) */
+ ERR_NO_PERMS = -10, /*!< No permissions to install RDMSR driver */
+ ERR_EXTRACT = -11, /*!< Cannot extract RDMSR driver (read only media?) */
+ ERR_HANDLE = -12, /*!< Bad handle */
+ ERR_INVMSR = -13, /*!< Invalid MSR */
+ ERR_INVCNB = -14, /*!< Invalid core number */
+ ERR_HANDLE_R = -15, /*!< Error on handle read */
+ ERR_INVRANGE = -16, /*!< Invalid given range */
} cpu_error_t;
/**
@@ -668,7 +669,7 @@ struct cpu_epc_t cpuid_get_epc(int index, const struct cpu_raw_data_t* raw);
const char* cpuid_lib_version(void);
#ifdef __cplusplus
-}; /* extern "C" */
+} /* extern "C" */
#endif
diff --git a/src/3rdparty/libcpuid/libcpuid_internal.h b/src/3rdparty/libcpuid/libcpuid_internal.h
index 038aa209..64804616 100644
--- a/src/3rdparty/libcpuid/libcpuid_internal.h
+++ b/src/3rdparty/libcpuid/libcpuid_internal.h
@@ -75,8 +75,9 @@ enum _intel_bits_t {
_3 = LBIT( 14 ),
_5 = LBIT( 15 ),
_7 = LBIT( 16 ),
- XEON_ = LBIT( 17 ),
- ATOM_ = LBIT( 18 ),
+ _9 = LBIT( 17 ),
+ XEON_ = LBIT( 18 ),
+ ATOM_ = LBIT( 19 ),
};
typedef enum _intel_bits_t intel_bits_t;
diff --git a/src/3rdparty/libcpuid/libcpuid_types.h b/src/3rdparty/libcpuid/libcpuid_types.h
index 9e897275..0e667aa6 100644
--- a/src/3rdparty/libcpuid/libcpuid_types.h
+++ b/src/3rdparty/libcpuid/libcpuid_types.h
@@ -32,6 +32,32 @@
#ifndef __LIBCPUID_TYPES_H__
#define __LIBCPUID_TYPES_H__
-#include
+#if !defined(_MSC_VER) || _MSC_VER >= 1600
+# include
+#else
+/* we have to provide our own: */
+# if !defined(__int32_t_defined)
+typedef int int32_t;
+# endif
+
+# if !defined(__uint32_t_defined)
+typedef unsigned uint32_t;
+# endif
+
+typedef signed char int8_t;
+typedef unsigned char uint8_t;
+typedef signed short int16_t;
+typedef unsigned short uint16_t;
+#if (defined _MSC_VER) && (_MSC_VER <= 1300)
+ /* MSVC 6.0: no long longs ... */
+ typedef signed __int64 int64_t;
+ typedef unsigned __int64 uint64_t;
+#else
+ /* all other sane compilers: */
+ typedef signed long long int64_t;
+ typedef unsigned long long uint64_t;
+#endif
+
+#endif
#endif /* __LIBCPUID_TYPES_H__ */
diff --git a/src/3rdparty/libcpuid/recog_amd.c b/src/3rdparty/libcpuid/recog_amd.c
index 352d733b..4726f633 100644
--- a/src/3rdparty/libcpuid/recog_amd.c
+++ b/src/3rdparty/libcpuid/recog_amd.c
@@ -49,6 +49,10 @@ enum _amd_model_codes_t {
_1400,
_1500,
_1600,
+ _1900,
+ _2400,
+ _2500,
+ _2700,
};
static void load_amd_features(struct cpu_raw_data_t* raw, struct cpu_id_t* data)
diff --git a/src/3rdparty/libcpuid/recog_intel.c b/src/3rdparty/libcpuid/recog_intel.c
index 5467c19f..397d750e 100644
--- a/src/3rdparty/libcpuid/recog_intel.c
+++ b/src/3rdparty/libcpuid/recog_intel.c
@@ -376,7 +376,7 @@ static intel_code_and_bits_t get_brand_code_and_bits(struct cpu_id_t* data)
bits |= bit_matchtable[i].bit;
}
- if ((i = match_pattern(bs, "Core(TM) [im][357]")) != 0) {
+ if ((i = match_pattern(bs, "Core(TM) [im][3579]")) != 0) {
bits |= CORE_;
i--;
switch (bs[i + 9]) {
@@ -387,6 +387,7 @@ static intel_code_and_bits_t get_brand_code_and_bits(struct cpu_id_t* data)
case '3': bits |= _3; break;
case '5': bits |= _5; break;
case '7': bits |= _7; break;
+ case '9': bits |= _9; break;
}
}
for (i = 0; i < COUNT_OF(matchtable); i++)
diff --git a/src/App.cpp b/src/App.cpp
index adcc5752..134e4ef5 100644
--- a/src/App.cpp
+++ b/src/App.cpp
@@ -29,11 +29,11 @@
#include "api/Api.h"
#include "App.h"
#include "common/Console.h"
+#include "common/cpu/Cpu.h"
#include "common/log/Log.h"
#include "common/Platform.h"
#include "core/Config.h"
#include "core/Controller.h"
-#include "Cpu.h"
#include "crypto/CryptoNight.h"
#include "Mem.h"
#include "net/Network.h"
diff --git a/src/Cpu.h b/src/Cpu.h
deleted file mode 100644
index a125bae8..00000000
--- a/src/Cpu.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/* XMRig
- * Copyright 2010 Jeff Garzik
- * Copyright 2012-2014 pooler
- * Copyright 2014 Lucas Jones
- * Copyright 2014-2016 Wolf9466
- * Copyright 2016 Jay D Dee
- * Copyright 2017-2018 XMR-Stak ,
- * Copyright 2016-2018 XMRig ,
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see .
- */
-
-#ifndef __CPU_H__
-#define __CPU_H__
-
-
-#include
-
-
-class Cpu
-{
-public:
- enum Flags {
- X86_64 = 1,
- AES = 2,
- BMI2 = 4
- };
-
- static size_t optimalThreadsCount(size_t size, int maxCpuUsage);
- static void init();
-
- static inline bool hasAES() { return (m_flags & AES) != 0; }
- static inline bool isX64() { return (m_flags & X86_64) != 0; }
- static inline const char *brand() { return m_brand; }
- static inline int cores() { return m_totalCores; }
- static inline int l2() { return m_l2_cache; }
- static inline int l3() { return m_l3_cache; }
- static inline int sockets() { return m_sockets; }
- static inline int threads() { return m_totalThreads; }
-
-private:
- static void initCommon();
-
- static bool m_l2_exclusive;
- static char m_brand[64];
- static int m_flags;
- static int m_l2_cache;
- static int m_l3_cache;
- static int m_sockets;
- static int m_totalCores;
- static size_t m_totalThreads;
-};
-
-
-#endif /* __CPU_H__ */
diff --git a/src/Summary.cpp b/src/Summary.cpp
index de6b1234..3c1d06a7 100644
--- a/src/Summary.cpp
+++ b/src/Summary.cpp
@@ -27,16 +27,33 @@
#include
+#include "common/cpu/Cpu.h"
#include "common/log/Log.h"
#include "common/net/Pool.h"
#include "core/Config.h"
#include "core/Controller.h"
-#include "Cpu.h"
+#include "crypto/Asm.h"
#include "Mem.h"
#include "Summary.h"
#include "version.h"
+#ifndef XMRIG_NO_ASM
+static const char *coloredAsmNames[] = {
+ "\x1B[1;31mnone\x1B[0m",
+ "auto",
+ "\x1B[1;32mintel\x1B[0m",
+ "\x1B[1;32mryzen\x1B[0m"
+};
+
+
+inline static const char *asmName(xmrig::Assembly assembly, bool colors)
+{
+ return colors ? coloredAsmNames[assembly] : xmrig::Asm::toString(assembly);
+}
+#endif
+
+
static void print_memory(xmrig::Config *config) {
# ifdef _WIN32
if (config->isColors()) {
@@ -52,21 +69,23 @@ static void print_memory(xmrig::Config *config) {
static void print_cpu(xmrig::Config *config)
{
+ using namespace xmrig;
+
if (config->isColors()) {
Log::i()->text(GREEN_BOLD(" * ") WHITE_BOLD("%-13s%s (%d)") " %sx64 %sAES",
"CPU",
- Cpu::brand(),
- Cpu::sockets(),
- Cpu::isX64() ? "\x1B[1;32m" : "\x1B[1;31m-",
- Cpu::hasAES() ? "\x1B[1;32m" : "\x1B[1;31m-");
+ Cpu::info()->brand(),
+ Cpu::info()->sockets(),
+ Cpu::info()->isX64() ? "\x1B[1;32m" : "\x1B[1;31m-",
+ Cpu::info()->hasAES() ? "\x1B[1;32m" : "\x1B[1;31m-");
# ifndef XMRIG_NO_LIBCPUID
- Log::i()->text(GREEN_BOLD(" * ") WHITE_BOLD("%-13s%.1f MB/%.1f MB"), "CPU L2/L3", Cpu::l2() / 1024.0, Cpu::l3() / 1024.0);
+ Log::i()->text(GREEN_BOLD(" * ") WHITE_BOLD("%-13s%.1f MB/%.1f MB"), "CPU L2/L3", Cpu::info()->L2() / 1024.0, Cpu::info()->L3() / 1024.0);
# endif
}
else {
- Log::i()->text(" * %-13s%s (%d) %sx64 %sAES", "CPU", Cpu::brand(), Cpu::sockets(), Cpu::isX64() ? "" : "-", Cpu::hasAES() ? "" : "-");
+ Log::i()->text(" * %-13s%s (%d) %sx64 %sAES", "CPU", Cpu::info()->brand(), Cpu::info()->sockets(), Cpu::info()->isX64() ? "" : "-", Cpu::info()->hasAES() ? "" : "-");
# ifndef XMRIG_NO_LIBCPUID
- Log::i()->text(" * %-13s%.1f MB/%.1f MB", "CPU L2/L3", Cpu::l2() / 1024.0, Cpu::l3() / 1024.0);
+ Log::i()->text(" * %-13s%.1f MB/%.1f MB", "CPU L2/L3", Cpu::info()->L2() / 1024.0, Cpu::info()->L3() / 1024.0);
# endif
}
}
@@ -99,6 +118,18 @@ static void print_threads(xmrig::Config *config)
config->isColors() && config->donateLevel() == 0 ? "\x1B[1;31m" : "",
config->donateLevel());
}
+
+# ifndef XMRIG_NO_ASM
+ if (config->assembly() == xmrig::ASM_AUTO) {
+ const xmrig::Assembly assembly = xmrig::Cpu::info()->assembly();
+
+ Log::i()->text(config->isColors() ? GREEN_BOLD(" * ") WHITE_BOLD("%-13sauto:%s")
+ : " * %-13sauto:%s", "ASSEMBLY", asmName(assembly, config->isColors()));
+ }
+ else {
+ Log::i()->text(config->isColors() ? GREEN_BOLD(" * ") WHITE_BOLD("%-13s%s") : " * %-13s%s", "ASSEMBLY", asmName(config->assembly(), config->isColors()));
+ }
+# endif
}
diff --git a/src/api/ApiRouter.cpp b/src/api/ApiRouter.cpp
index 2c62696a..dd7accf6 100644
--- a/src/api/ApiRouter.cpp
+++ b/src/api/ApiRouter.cpp
@@ -35,12 +35,12 @@
#include "api/ApiRouter.h"
#include "common/api/HttpReply.h"
#include "common/api/HttpRequest.h"
+#include "common/cpu/Cpu.h"
#include "common/crypto/keccak.h"
#include "common/net/Job.h"
#include "common/Platform.h"
#include "core/Config.h"
#include "core/Controller.h"
-#include "Cpu.h"
#include "interfaces/IThread.h"
#include "rapidjson/document.h"
#include "rapidjson/prettywriter.h"
@@ -238,13 +238,14 @@ void ApiRouter::getIdentify(rapidjson::Document &doc) const
void ApiRouter::getMiner(rapidjson::Document &doc) const
{
+ using namespace xmrig;
auto &allocator = doc.GetAllocator();
rapidjson::Value cpu(rapidjson::kObjectType);
- cpu.AddMember("brand", rapidjson::StringRef(Cpu::brand()), allocator);
- cpu.AddMember("aes", Cpu::hasAES(), allocator);
- cpu.AddMember("x64", Cpu::isX64(), allocator);
- cpu.AddMember("sockets", Cpu::sockets(), allocator);
+ cpu.AddMember("brand", rapidjson::StringRef(Cpu::info()->brand()), allocator);
+ cpu.AddMember("aes", Cpu::info()->hasAES(), allocator);
+ cpu.AddMember("x64", Cpu::info()->isX64(), allocator);
+ cpu.AddMember("sockets", Cpu::info()->sockets(), allocator);
doc.AddMember("version", APP_VERSION, allocator);
doc.AddMember("kind", APP_KIND, allocator);
diff --git a/src/Cpu_stub.cpp b/src/common/cpu/BasicCpuInfo.cpp
similarity index 77%
rename from src/Cpu_stub.cpp
rename to src/common/cpu/BasicCpuInfo.cpp
index 8b27ad65..66af53cc 100644
--- a/src/Cpu_stub.cpp
+++ b/src/common/cpu/BasicCpuInfo.cpp
@@ -21,6 +21,9 @@
* along with this program. If not, see .
*/
+#include
+#include
+
#ifdef _MSC_VER
# include
@@ -32,14 +35,8 @@
# define bit_AES (1 << 25)
#endif
-#ifndef bit_BMI2
-# define bit_BMI2 (1 << 8)
-#endif
-#include
-
-
-#include "Cpu.h"
+#include "common/cpu/BasicCpuInfo.h"
#define VENDOR_ID (0)
@@ -96,43 +93,18 @@ static inline bool has_aes_ni()
}
-static inline bool has_bmi2() {
- int cpu_info[4] = { 0 };
- cpuid(EXTENDED_FEATURES, cpu_info);
-
- return (cpu_info[EBX_Reg] & bit_BMI2) != 0;
-}
-
-
-char Cpu::m_brand[64] = { 0 };
-int Cpu::m_flags = 0;
-int Cpu::m_l2_cache = 0;
-int Cpu::m_l3_cache = 0;
-int Cpu::m_sockets = 1;
-int Cpu::m_totalCores = 0;
-size_t Cpu::m_totalThreads = 0;
-
-
-size_t Cpu::optimalThreadsCount(size_t size, int maxCpuUsage)
-{
- const size_t count = m_totalThreads / 2;
- return count < 1 ? 1 : count;
-}
-
-
-void Cpu::initCommon()
+xmrig::BasicCpuInfo::BasicCpuInfo() :
+ m_aes(has_aes_ni()),
+ m_brand(),
+ m_threads(std::thread::hardware_concurrency())
{
cpu_brand_string(m_brand);
-
-# if defined(__x86_64__) || defined(_M_AMD64)
- m_flags |= X86_64;
-# endif
-
- if (has_aes_ni()) {
- m_flags |= AES;
- }
-
- if (has_bmi2()) {
- m_flags |= BMI2;
- }
+}
+
+
+size_t xmrig::BasicCpuInfo::optimalThreadsCount(size_t memSize, int maxCpuUsage) const
+{
+ const size_t count = threads() / 2;
+
+ return count < 1 ? 1 : count;
}
diff --git a/src/common/cpu/BasicCpuInfo.h b/src/common/cpu/BasicCpuInfo.h
new file mode 100644
index 00000000..f9d710d6
--- /dev/null
+++ b/src/common/cpu/BasicCpuInfo.h
@@ -0,0 +1,70 @@
+/* XMRig
+ * Copyright 2010 Jeff Garzik
+ * Copyright 2012-2014 pooler
+ * Copyright 2014 Lucas Jones
+ * Copyright 2014-2016 Wolf9466
+ * Copyright 2016 Jay D Dee
+ * Copyright 2017-2018 XMR-Stak ,
+ * Copyright 2016-2018 XMRig ,
+ *
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ */
+
+#ifndef XMRIG_BASICCPUINFO_H
+#define XMRIG_BASICCPUINFO_H
+
+
+#include "common/interfaces/ICpuInfo.h"
+
+
+namespace xmrig {
+
+
+class BasicCpuInfo : public ICpuInfo
+{
+public:
+ BasicCpuInfo();
+
+protected:
+ size_t optimalThreadsCount(size_t memSize, int maxCpuUsage) const override;
+
+ inline Assembly assembly() const override { return ASM_NONE; }
+ inline bool hasAES() const override { return m_aes; }
+ inline bool isSupported() const override { return true; }
+ inline const char *brand() const override { return m_brand; }
+ inline int32_t cores() const override { return -1; }
+ inline int32_t L2() const override { return -1; }
+ inline int32_t L3() const override { return -1; }
+ inline int32_t nodes() const override { return -1; }
+ inline int32_t sockets() const override { return 1; }
+ inline int32_t threads() const override { return m_threads; }
+
+# if defined(__x86_64__) || defined(_M_AMD64) || defined (__arm64__) || defined (__aarch64__)
+ inline bool isX64() const override { return true; }
+# else
+ inline bool isX64() const override { return false; }
+# endif
+
+private:
+ bool m_aes;
+ char m_brand[64];
+ int32_t m_threads;
+};
+
+
+} /* namespace xmrig */
+
+
+#endif /* XMRIG_BASICCPUINFO_H */
diff --git a/src/Cpu_arm.cpp b/src/common/cpu/BasicCpuInfo_arm.cpp
similarity index 73%
rename from src/Cpu_arm.cpp
rename to src/common/cpu/BasicCpuInfo_arm.cpp
index 3e259d0d..c1c127db 100644
--- a/src/Cpu_arm.cpp
+++ b/src/common/cpu/BasicCpuInfo_arm.cpp
@@ -21,37 +21,27 @@
* along with this program. If not, see .
*/
-
#include
+#include
-#include "Cpu.h"
+#include "common/cpu/BasicCpuInfo.h"
-char Cpu::m_brand[64] = { 0 };
-int Cpu::m_flags = 0;
-int Cpu::m_l2_cache = 0;
-int Cpu::m_l3_cache = 0;
-int Cpu::m_sockets = 1;
-int Cpu::m_totalCores = 0;
-size_t Cpu::m_totalThreads = 0;
-
-
-size_t Cpu::optimalThreadsCount(size_t size, int maxCpuUsage)
-{
- return m_totalThreads;
-}
-
-
-void Cpu::initCommon()
+xmrig::BasicCpuInfo::BasicCpuInfo() :
+ m_aes(false),
+ m_brand(),
+ m_threads(std::thread::hardware_concurrency())
{
memcpy(m_brand, "Unknown", 7);
-# if defined (__arm64__) || defined (__aarch64__)
- m_flags |= X86_64;
-# endif
-
# if __ARM_FEATURE_CRYPTO
- m_flags |= AES;
+ m_aes = true;
# endif
}
+
+
+size_t xmrig::BasicCpuInfo::optimalThreadsCount(size_t memSize, int maxCpuUsage) const
+{
+ return threads();
+}
diff --git a/src/Cpu_unix.cpp b/src/common/cpu/Cpu.cpp
similarity index 71%
rename from src/Cpu_unix.cpp
rename to src/common/cpu/Cpu.cpp
index b895c897..b1bb28ac 100644
--- a/src/Cpu_unix.cpp
+++ b/src/common/cpu/Cpu.cpp
@@ -22,33 +22,36 @@
*/
-#ifdef __FreeBSD__
-# include
-# include
-# include
-# include
-#endif
+#include
-#include
-#include
-#include
-#include
+#include "common/cpu/BasicCpuInfo.h"
+#include "common/cpu/Cpu.h"
-#include "Cpu.h"
+static xmrig::ICpuInfo *cpuInfo = nullptr;
-#ifdef __FreeBSD__
-typedef cpuset_t cpu_set_t;
-#endif
-
-
-void Cpu::init()
+xmrig::ICpuInfo *xmrig::Cpu::info()
{
-# ifdef XMRIG_NO_LIBCPUID
- m_totalThreads = sysconf(_SC_NPROCESSORS_CONF);
-# endif
+ assert(cpuInfo != nullptr);
- initCommon();
+ return cpuInfo;
+}
+
+
+void xmrig::Cpu::init()
+{
+ assert(cpuInfo == nullptr);
+
+ cpuInfo = new BasicCpuInfo();
+}
+
+
+void xmrig::Cpu::release()
+{
+ assert(cpuInfo != nullptr);
+
+ delete cpuInfo;
+ cpuInfo = nullptr;
}
diff --git a/src/Cpu_mac.cpp b/src/common/cpu/Cpu.h
similarity index 75%
rename from src/Cpu_mac.cpp
rename to src/common/cpu/Cpu.h
index 085148bc..1d5a9fb1 100644
--- a/src/Cpu_mac.cpp
+++ b/src/common/cpu/Cpu.h
@@ -4,7 +4,7 @@
* Copyright 2014 Lucas Jones
* Copyright 2014-2016 Wolf9466
* Copyright 2016 Jay D Dee
- * Copyright 2016 Jay D Dee
+ * Copyright 2017-2018 XMR-Stak ,
* Copyright 2016-2018 XMRig ,
*
* This program is free software: you can redistribute it and/or modify
@@ -21,20 +21,26 @@
* along with this program. If not, see .
*/
-
-#include
-#include
-#include
+#ifndef XMRIG_CPU_H
+#define XMRIG_CPU_H
-#include "Cpu.h"
+#include "common/interfaces/ICpuInfo.h"
-void Cpu::init()
+namespace xmrig {
+
+
+class Cpu
{
-# ifdef XMRIG_NO_LIBCPUID
- m_totalThreads = sysconf(_SC_NPROCESSORS_CONF);
-# endif
+public:
+ static ICpuInfo *info();
+ static void init();
+ static void release();
+};
- initCommon();
-}
+
+} /* namespace xmrig */
+
+
+#endif /* XMRIG_CPU_H */
diff --git a/src/common/interfaces/IConfig.h b/src/common/interfaces/IConfig.h
index d3593163..0fcac2d1 100644
--- a/src/common/interfaces/IConfig.h
+++ b/src/common/interfaces/IConfig.h
@@ -80,6 +80,7 @@ public:
SafeKey = 1005,
ThreadsKey = 't',
HardwareAESKey = 1011,
+ AssemblyKey = 1015,
// xmrig amd
OclPlatformKey = 1400,
diff --git a/src/common/interfaces/ICpuInfo.h b/src/common/interfaces/ICpuInfo.h
new file mode 100644
index 00000000..267616d0
--- /dev/null
+++ b/src/common/interfaces/ICpuInfo.h
@@ -0,0 +1,60 @@
+/* XMRig
+ * Copyright 2010 Jeff Garzik
+ * Copyright 2012-2014 pooler
+ * Copyright 2014 Lucas Jones
+ * Copyright 2014-2016 Wolf9466
+ * Copyright 2016 Jay D Dee
+ * Copyright 2016-2018 XMRig
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ */
+
+#ifndef XMRIG_CPUINFO_H
+#define XMRIG_CPUINFO_H
+
+
+#include
+#include
+
+
+#include "common/xmrig.h"
+
+
+namespace xmrig {
+
+
+class ICpuInfo
+{
+public:
+ virtual ~ICpuInfo() {}
+
+ virtual bool hasAES() const = 0;
+ virtual bool isSupported() const = 0;
+ virtual bool isX64() const = 0;
+ virtual const char *brand() const = 0;
+ virtual int32_t cores() const = 0;
+ virtual int32_t L2() const = 0;
+ virtual int32_t L3() const = 0;
+ virtual int32_t nodes() const = 0;
+ virtual int32_t sockets() const = 0;
+ virtual int32_t threads() const = 0;
+ virtual size_t optimalThreadsCount(size_t memSize, int maxCpuUsage) const = 0;
+ virtual xmrig::Assembly assembly() const = 0;
+};
+
+
+} /* namespace xmrig */
+
+
+#endif // XMRIG_CPUINFO_H
diff --git a/src/common/xmrig.h b/src/common/xmrig.h
index e1c7702e..52650f0d 100644
--- a/src/common/xmrig.h
+++ b/src/common/xmrig.h
@@ -94,6 +94,15 @@ enum OclVendor {
};
+enum Assembly {
+ ASM_NONE,
+ ASM_AUTO,
+ ASM_INTEL,
+ ASM_RYZEN,
+ ASM_MAX
+};
+
+
} /* namespace xmrig */
diff --git a/src/core/Config.cpp b/src/core/Config.cpp
index 0380c26d..20a3aece 100644
--- a/src/core/Config.cpp
+++ b/src/core/Config.cpp
@@ -27,9 +27,10 @@
#include "common/config/ConfigLoader.h"
+#include "common/cpu/Cpu.h"
#include "core/Config.h"
#include "core/ConfigCreator.h"
-#include "Cpu.h"
+#include "crypto/Asm.h"
#include "crypto/CryptoNight_constants.h"
#include "rapidjson/document.h"
#include "rapidjson/filewritestream.h"
@@ -43,6 +44,7 @@ static char affinity_tmp[20] = { 0 };
xmrig::Config::Config() : xmrig::CommonConfig(),
m_aesMode(AES_AUTO),
m_algoVariant(AV_AUTO),
+ m_assembly(ASM_AUTO),
m_hugePages(true),
m_safe(false),
m_maxCpuUsage(75),
@@ -51,11 +53,6 @@ xmrig::Config::Config() : xmrig::CommonConfig(),
}
-xmrig::Config::~Config()
-{
-}
-
-
bool xmrig::Config::reload(const char *json)
{
return xmrig::ConfigLoader::reload(this, json);
@@ -153,7 +150,7 @@ bool xmrig::Config::finalize()
if (!m_threads.cpu.empty()) {
m_threads.mode = Advanced;
- const bool softAES = (m_aesMode == AES_AUTO ? (Cpu::hasAES() ? AES_HW : AES_SOFT) : m_aesMode) == AES_SOFT;
+ const bool softAES = (m_aesMode == AES_AUTO ? (Cpu::info()->hasAES() ? AES_HW : AES_SOFT) : m_aesMode) == AES_SOFT;
for (size_t i = 0; i < m_threads.cpu.size(); ++i) {
m_threads.list.push_back(CpuThread::createFromData(i, m_algorithm.algo(), m_threads.cpu[i], m_priority, softAES));
@@ -168,17 +165,17 @@ bool xmrig::Config::finalize()
const size_t size = CpuThread::multiway(av) * cn_select_memory(m_algorithm.algo()) / 1024;
if (!m_threads.count) {
- m_threads.count = Cpu::optimalThreadsCount(size, m_maxCpuUsage);
+ m_threads.count = Cpu::info()->optimalThreadsCount(size, m_maxCpuUsage);
}
else if (m_safe) {
- const size_t count = Cpu::optimalThreadsCount(size, m_maxCpuUsage);
+ const size_t count = Cpu::info()->optimalThreadsCount(size, m_maxCpuUsage);
if (m_threads.count > count) {
m_threads.count = count;
}
}
for (size_t i = 0; i < m_threads.count; ++i) {
- m_threads.list.push_back(CpuThread::createFromAV(i, m_algorithm.algo(), av, m_threads.mask, m_priority));
+ m_threads.list.push_back(CpuThread::createFromAV(i, m_algorithm.algo(), av, m_threads.mask, m_priority, m_assembly));
}
return true;
@@ -204,6 +201,12 @@ bool xmrig::Config::parseBoolean(int key, bool enable)
m_aesMode = enable ? AES_HW : AES_SOFT;
break;
+# ifndef XMRIG_NO_ASM
+ case AssemblyKey:
+ m_assembly = Asm::parse(enable);
+ break;
+# endif
+
default:
break;
}
@@ -232,7 +235,7 @@ bool xmrig::Config::parseString(int key, const char *arg)
case ThreadsKey: /* --threads */
if (strncmp(arg, "all", 3) == 0) {
- m_threads.count = Cpu::threads();
+ m_threads.count = Cpu::info()->threads();
return true;
}
@@ -244,6 +247,12 @@ bool xmrig::Config::parseString(int key, const char *arg)
return parseUint64(key, p ? strtoull(p, nullptr, 16) : strtoull(arg, nullptr, 10));
}
+# ifndef XMRIG_NO_ASM
+ case AssemblyKey: /* --asm */
+ m_assembly = Asm::parse(arg);
+ break;
+# endif
+
default:
break;
}
@@ -339,10 +348,10 @@ xmrig::AlgoVariant xmrig::Config::getAlgoVariant() const
# endif
if (m_algoVariant <= AV_AUTO || m_algoVariant >= AV_MAX) {
- return Cpu::hasAES() ? AV_SINGLE : AV_SINGLE_SOFT;
+ return Cpu::info()->hasAES() ? AV_SINGLE : AV_SINGLE_SOFT;
}
- if (m_safe && !Cpu::hasAES() && m_algoVariant <= AV_DOUBLE) {
+ if (m_safe && !Cpu::info()->hasAES() && m_algoVariant <= AV_DOUBLE) {
return static_cast(m_algoVariant + 2);
}
@@ -354,10 +363,10 @@ xmrig::AlgoVariant xmrig::Config::getAlgoVariant() const
xmrig::AlgoVariant xmrig::Config::getAlgoVariantLite() const
{
if (m_algoVariant <= AV_AUTO || m_algoVariant >= AV_MAX) {
- return Cpu::hasAES() ? AV_DOUBLE : AV_DOUBLE_SOFT;
+ return Cpu::info()->hasAES() ? AV_DOUBLE : AV_DOUBLE_SOFT;
}
- if (m_safe && !Cpu::hasAES() && m_algoVariant <= AV_DOUBLE) {
+ if (m_safe && !Cpu::info()->hasAES() && m_algoVariant <= AV_DOUBLE) {
return static_cast(m_algoVariant + 2);
}
diff --git a/src/core/Config.h b/src/core/Config.h
index f0f1404f..95afc34c 100644
--- a/src/core/Config.h
+++ b/src/core/Config.h
@@ -21,8 +21,8 @@
* along with this program. If not, see .
*/
-#ifndef __CONFIG_H__
-#define __CONFIG_H__
+#ifndef XMRIG_CONFIG_H
+#define XMRIG_CONFIG_H
#include
@@ -69,7 +69,6 @@ public:
Config();
- ~Config();
bool reload(const char *json);
@@ -77,6 +76,7 @@ public:
inline AesMode aesMode() const { return m_aesMode; }
inline AlgoVariant algoVariant() const { return m_algoVariant; }
+ inline Assembly assembly() const { return m_assembly; }
inline bool isHugePages() const { return m_hugePages; }
inline const std::vector &threads() const { return m_threads.list; }
inline int priority() const { return m_priority; }
@@ -116,6 +116,7 @@ private:
AesMode m_aesMode;
AlgoVariant m_algoVariant;
+ Assembly m_assembly;
bool m_hugePages;
bool m_safe;
int m_maxCpuUsage;
@@ -126,4 +127,4 @@ private:
} /* namespace xmrig */
-#endif /* __CONFIG_H__ */
+#endif /* XMRIG_CONFIG_H */
diff --git a/src/core/ConfigLoader_platform.h b/src/core/ConfigLoader_platform.h
index c034f3e7..3b95a90f 100644
--- a/src/core/ConfigLoader_platform.h
+++ b/src/core/ConfigLoader_platform.h
@@ -135,6 +135,7 @@ static struct option const options[] = {
{ "tls", 0, nullptr, xmrig::IConfig::TlsKey },
{ "tls-fingerprint", 1, nullptr, xmrig::IConfig::FingerprintKey },
{ "version", 0, nullptr, xmrig::IConfig::VersionKey },
+ { "asm", 1, nullptr, xmrig::IConfig::AssemblyKey },
{ nullptr, 0, nullptr, 0 }
};
@@ -159,6 +160,7 @@ static struct option const config_options[] = {
{ "threads", 1, nullptr, xmrig::IConfig::ThreadsKey },
{ "user-agent", 1, nullptr, xmrig::IConfig::UserAgentKey },
{ "hw-aes", 0, nullptr, xmrig::IConfig::HardwareAESKey },
+ { "asm", 1, nullptr, xmrig::IConfig::AssemblyKey },
{ nullptr, 0, nullptr, 0 }
};
diff --git a/src/core/Controller.cpp b/src/core/Controller.cpp
index ce73f037..792ac939 100644
--- a/src/core/Controller.cpp
+++ b/src/core/Controller.cpp
@@ -26,6 +26,7 @@
#include "common/config/ConfigLoader.h"
+#include "common/cpu/Cpu.h"
#include "common/interfaces/IControllerListener.h"
#include "common/log/ConsoleLog.h"
#include "common/log/FileLog.h"
@@ -33,7 +34,6 @@
#include "common/Platform.h"
#include "core/Config.h"
#include "core/Controller.h"
-#include "Cpu.h"
#include "net/Network.h"
diff --git a/src/Cpu.cpp b/src/core/cpu/AdvancedCpuInfo.cpp
similarity index 57%
rename from src/Cpu.cpp
rename to src/core/cpu/AdvancedCpuInfo.cpp
index eebe585d..1f86a420 100644
--- a/src/Cpu.cpp
+++ b/src/core/cpu/AdvancedCpuInfo.cpp
@@ -21,65 +21,25 @@
* along with this program. If not, see .
*/
-
#include
#include
#include
+#include
-#include "Cpu.h"
+#include "core/cpu/AdvancedCpuInfo.h"
-bool Cpu::m_l2_exclusive = false;
-char Cpu::m_brand[64] = { 0 };
-int Cpu::m_flags = 0;
-int Cpu::m_l2_cache = 0;
-int Cpu::m_l3_cache = 0;
-int Cpu::m_sockets = 1;
-int Cpu::m_totalCores = 0;
-size_t Cpu::m_totalThreads = 0;
-
-
-size_t Cpu::optimalThreadsCount(size_t size, int maxCpuUsage)
-{
- if (m_totalThreads == 1) {
- return 1;
- }
-
- size_t cache = 0;
- if (m_l3_cache) {
- cache = m_l2_exclusive ? (m_l2_cache + m_l3_cache) : m_l3_cache;
- }
- else {
- cache = m_l2_cache;
- }
-
- size_t count = 0;
-
- if (cache) {
- count = cache / size;
-
- if (cache % size >= size / 2) {
- count++;
- }
- }
- else {
- count = m_totalThreads / 2;
- }
-
- if (count > m_totalThreads) {
- count = m_totalThreads;
- }
-
- if (((float) count / m_totalThreads * 100) > maxCpuUsage) {
- count = (int) ceil((float) m_totalThreads * (maxCpuUsage / 100.0));
- }
-
- return count < 1 ? 1 : count;
-}
-
-
-void Cpu::initCommon()
+xmrig::AdvancedCpuInfo::AdvancedCpuInfo() :
+ m_assembly(ASM_NONE),
+ m_aes(false),
+ m_L2_exclusive(false),
+ m_brand(),
+ m_cores(0),
+ m_L2(0),
+ m_L3(0),
+ m_sockets(1),
+ m_threads(std::thread::hardware_concurrency())
{
struct cpu_raw_data_t raw = { 0 };
struct cpu_id_t data = { 0 };
@@ -87,42 +47,78 @@ void Cpu::initCommon()
cpuid_get_raw_data(&raw);
cpu_identify(&raw, &data);
- strncpy(m_brand, data.brand_str, sizeof(m_brand) - 1);
-
- m_totalThreads = data.total_logical_cpus;
- m_sockets = m_totalThreads / data.num_logical_cpus;
+ strncpy(m_brand, data.brand_str, sizeof(m_brand));
+ m_sockets = threads() / data.num_logical_cpus;
if (m_sockets == 0) {
m_sockets = 1;
}
- m_totalCores = data.num_cores * m_sockets;
- m_l3_cache = data.l3_cache > 0 ? data.l3_cache * m_sockets : 0;
+ m_cores = data.num_cores * m_sockets;
+ m_L3 = data.l3_cache > 0 ? data.l3_cache * m_sockets : 0;
// Workaround for AMD CPUs https://github.com/anrieff/libcpuid/issues/97
if (data.vendor == VENDOR_AMD && data.ext_family >= 0x15 && data.ext_family < 0x17) {
- m_l2_cache = data.l2_cache * (m_totalCores / 2) * m_sockets;
- m_l2_exclusive = true;
+ m_L2 = data.l2_cache * (cores() / 2) * m_sockets;
+ m_L2_exclusive = true;
}
// Workaround for Intel Pentium Dual-Core, Core Duo, Core 2 Duo, Core 2 Quad and their Xeon homologue
// These processors have L2 cache shared by 2 cores.
else if (data.vendor == VENDOR_INTEL && data.ext_family == 0x06 && (data.ext_model == 0x0E || data.ext_model == 0x0F || data.ext_model == 0x17)) {
- int l2_count_per_socket = m_totalCores > 1 ? m_totalCores / 2 : 1;
- m_l2_cache = data.l2_cache > 0 ? data.l2_cache * l2_count_per_socket * m_sockets : 0;
+ int l2_count_per_socket = cores() > 1 ? cores() / 2 : 1;
+ m_L2 = data.l2_cache > 0 ? data.l2_cache * l2_count_per_socket * m_sockets : 0;
}
else{
- m_l2_cache = data.l2_cache > 0 ? data.l2_cache * m_totalCores * m_sockets : 0;
+ m_L2 = data.l2_cache > 0 ? data.l2_cache * cores() * m_sockets : 0;
}
-# if defined(__x86_64__) || defined(_M_AMD64)
- m_flags |= X86_64;
-# endif
-
if (data.flags[CPU_FEATURE_AES]) {
- m_flags |= AES;
- }
+ m_aes = true;
- if (data.flags[CPU_FEATURE_BMI2]) {
- m_flags |= BMI2;
+ if (data.vendor == VENDOR_AMD && data.ext_family >= 23) {
+ m_assembly = ASM_RYZEN;
+ }
+ else if (data.vendor == VENDOR_INTEL && data.ext_model >= 42) {
+ m_assembly = ASM_INTEL;
+ }
}
}
+
+
+size_t xmrig::AdvancedCpuInfo::optimalThreadsCount(size_t memSize, int maxCpuUsage) const
+{
+ if (threads() == 1) {
+ return 1;
+ }
+
+ size_t cache = 0;
+ if (m_L3) {
+ cache = m_L2_exclusive ? (m_L2 + m_L3) : m_L3;
+ }
+ else {
+ cache = m_L2;
+ }
+
+ size_t count = 0;
+
+ if (cache) {
+ count = cache / memSize;
+
+ if (cache % memSize >= memSize / 2) {
+ count++;
+ }
+ }
+ else {
+ count = threads() / 2;
+ }
+
+ if (count > (size_t) threads()) {
+ count = threads();
+ }
+
+ if (((float) count / threads() * 100) > maxCpuUsage) {
+ count = (int) ceil((float) threads() * (maxCpuUsage / 100.0));
+ }
+
+ return count < 1 ? 1 : count;
+}
diff --git a/src/core/cpu/AdvancedCpuInfo.h b/src/core/cpu/AdvancedCpuInfo.h
new file mode 100644
index 00000000..5e8967ad
--- /dev/null
+++ b/src/core/cpu/AdvancedCpuInfo.h
@@ -0,0 +1,75 @@
+/* XMRig
+ * Copyright 2010 Jeff Garzik
+ * Copyright 2012-2014 pooler
+ * Copyright 2014 Lucas Jones
+ * Copyright 2014-2016 Wolf9466
+ * Copyright 2016 Jay D Dee
+ * Copyright 2017-2018 XMR-Stak ,
+ * Copyright 2016-2018 XMRig ,
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ */
+
+#ifndef XMRIG_ADVANCEDCPUINFO_H
+#define XMRIG_ADVANCEDCPUINFO_H
+
+
+#include "common/interfaces/ICpuInfo.h"
+
+
+namespace xmrig {
+
+
+class AdvancedCpuInfo : public ICpuInfo
+{
+public:
+ AdvancedCpuInfo();
+
+protected:
+ size_t optimalThreadsCount(size_t memSize, int maxCpuUsage) const override;
+
+ inline Assembly assembly() const override { return m_assembly; }
+ inline bool hasAES() const override { return m_aes; }
+ inline bool isSupported() const override { return true; }
+ inline const char *brand() const override { return m_brand; }
+ inline int32_t cores() const override { return m_cores; }
+ inline int32_t L2() const override { return m_L2; }
+ inline int32_t L3() const override { return m_L3; }
+ inline int32_t nodes() const override { return -1; }
+ inline int32_t sockets() const override { return m_sockets; }
+ inline int32_t threads() const override { return m_threads; }
+
+# if defined(__x86_64__) || defined(_M_AMD64)
+ inline bool isX64() const override { return true; }
+# else
+ inline bool isX64() const override { return false; }
+# endif
+
+private:
+ Assembly m_assembly;
+ bool m_aes;
+ bool m_L2_exclusive;
+ char m_brand[64];
+ int32_t m_cores;
+ int32_t m_L2;
+ int32_t m_L3;
+ int32_t m_sockets;
+ int32_t m_threads;
+};
+
+
+} /* namespace xmrig */
+
+
+#endif /* XMRIG_ADVANCEDCPUINFO_H */
diff --git a/src/Cpu_win.cpp b/src/core/cpu/Cpu.cpp
similarity index 69%
rename from src/Cpu_win.cpp
rename to src/core/cpu/Cpu.cpp
index 7258f726..773255d2 100644
--- a/src/Cpu_win.cpp
+++ b/src/core/cpu/Cpu.cpp
@@ -22,20 +22,40 @@
*/
-#include
+#include
-#include "Cpu.h"
+#include "common/cpu/Cpu.h"
-void Cpu::init()
+#ifndef XMRIG_NO_LIBCPUID
+# include "core/cpu/AdvancedCpuInfo.h"
+#endif
+
+
+static xmrig::ICpuInfo *cpuInfo = nullptr;
+
+
+xmrig::ICpuInfo *xmrig::Cpu::info()
{
-# ifdef XMRIG_NO_LIBCPUID
- SYSTEM_INFO sysinfo;
- GetSystemInfo(&sysinfo);
+ assert(cpuInfo != nullptr);
- m_totalThreads = sysinfo.dwNumberOfProcessors;
-# endif
-
- initCommon();
+ return cpuInfo;
+}
+
+
+void xmrig::Cpu::init()
+{
+ assert(cpuInfo == nullptr);
+
+ cpuInfo = new AdvancedCpuInfo();
+}
+
+
+void xmrig::Cpu::release()
+{
+ assert(cpuInfo != nullptr);
+
+ delete cpuInfo;
+ cpuInfo = nullptr;
}
diff --git a/src/crypto/Asm.cpp b/src/crypto/Asm.cpp
new file mode 100644
index 00000000..79dd1cc9
--- /dev/null
+++ b/src/crypto/Asm.cpp
@@ -0,0 +1,100 @@
+/* XMRig
+ * Copyright 2010 Jeff Garzik
+ * Copyright 2012-2014 pooler
+ * Copyright 2014 Lucas Jones
+ * Copyright 2014-2016 Wolf9466
+ * Copyright 2016 Jay D Dee
+ * Copyright 2017-2018 XMR-Stak ,
+ * Copyright 2016-2018 XMRig ,
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ */
+
+
+#include
+#include
+
+
+#ifdef _MSC_VER
+# define strncasecmp _strnicmp
+# define strcasecmp _stricmp
+#endif
+
+
+#include "crypto/Asm.h"
+#include "rapidjson/document.h"
+
+
+static const char *asmNames[] = {
+ "none",
+ "auto",
+ "intel",
+ "ryzen"
+};
+
+
+xmrig::Assembly xmrig::Asm::parse(const char *assembly, Assembly defaultValue)
+{
+ constexpr size_t const size = sizeof(asmNames) / sizeof((asmNames)[0]);
+ assert(assembly != nullptr);
+ assert(ASM_MAX == size);
+
+ if (assembly == nullptr) {
+ return defaultValue;
+ }
+
+ for (size_t i = 0; i < size; i++) {
+ if (strcasecmp(assembly, asmNames[i]) == 0) {
+ return static_cast(i);
+ }
+ }
+
+ return defaultValue;
+}
+
+
+xmrig::Assembly xmrig::Asm::parse(const rapidjson::Value &value, Assembly defaultValue)
+{
+ if (value.IsBool()) {
+ return parse(value.IsBool());
+ }
+
+ if (value.IsString()) {
+ return parse(value.GetString(), defaultValue);
+ }
+
+ return defaultValue;
+}
+
+
+const char *xmrig::Asm::toString(Assembly assembly)
+{
+ return asmNames[assembly];
+}
+
+
+rapidjson::Value xmrig::Asm::toJSON(Assembly assembly)
+{
+ using namespace rapidjson;
+
+ if (assembly == ASM_NONE) {
+ return Value(false);
+ }
+
+ if (assembly == ASM_AUTO) {
+ return Value(true);
+ }
+
+ return Value(StringRef(toString(assembly)));
+}
diff --git a/src/crypto/Asm.h b/src/crypto/Asm.h
new file mode 100644
index 00000000..3b755fd6
--- /dev/null
+++ b/src/crypto/Asm.h
@@ -0,0 +1,50 @@
+/* XMRig
+ * Copyright 2010 Jeff Garzik
+ * Copyright 2012-2014 pooler
+ * Copyright 2014 Lucas Jones
+ * Copyright 2014-2016 Wolf9466
+ * Copyright 2016 Jay D Dee
+ * Copyright 2017-2018 XMR-Stak ,
+ * Copyright 2016-2018 XMRig ,
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ */
+
+#ifndef XMRIG_ASM_H
+#define XMRIG_ASM_H
+
+
+#include "common/xmrig.h"
+#include "rapidjson/fwd.h"
+
+
+namespace xmrig {
+
+
+class Asm
+{
+public:
+ static Assembly parse(const char *assembly, Assembly defaultValue = ASM_AUTO);
+ static Assembly parse(const rapidjson::Value &value, Assembly defaultValue = ASM_AUTO);
+ static const char *toString(Assembly assembly);
+ static rapidjson::Value toJSON(Assembly assembly);
+
+ inline static Assembly parse(bool enable) { return enable ? ASM_AUTO : ASM_NONE; }
+};
+
+
+} /* namespace xmrig */
+
+
+#endif /* XMRIG_ASM_H */
diff --git a/src/crypto/CryptoNight.h b/src/crypto/CryptoNight.h
index e8e86dc4..680f1740 100644
--- a/src/crypto/CryptoNight.h
+++ b/src/crypto/CryptoNight.h
@@ -22,8 +22,8 @@
* along with this program. If not, see .
*/
-#ifndef __CRYPTONIGHT_H__
-#define __CRYPTONIGHT_H__
+#ifndef XMRIG_CRYPTONIGHT_H
+#define XMRIG_CRYPTONIGHT_H
#include
@@ -31,9 +31,9 @@
struct cryptonight_ctx {
- alignas(16) uint8_t state[200];
- alignas(16) uint8_t* memory;
+ alignas(16) uint8_t state[224];
+ alignas(16) uint8_t *memory;
};
-#endif /* __CRYPTONIGHT_H__ */
+#endif /* XMRIG_CRYPTONIGHT_H */
diff --git a/src/crypto/CryptoNight_x86.h b/src/crypto/CryptoNight_x86.h
index 1cb06687..42ea37b5 100644
--- a/src/crypto/CryptoNight_x86.h
+++ b/src/crypto/CryptoNight_x86.h
@@ -561,6 +561,33 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
}
+#ifndef XMRIG_NO_ASM
+extern "C" void cnv2_mainloop_ivybridge_asm(cryptonight_ctx *ctx);
+extern "C" void cnv2_mainloop_ryzen_asm(cryptonight_ctx *ctx);
+
+
+template
+inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx)
+{
+ constexpr size_t MEM = xmrig::cn_select_memory();
+
+ xmrig::keccak(input, size, ctx[0]->state);
+ cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory);
+
+ if (ASM == xmrig::ASM_INTEL) {
+ cnv2_mainloop_ivybridge_asm(ctx[0]);
+ }
+ else {
+ cnv2_mainloop_ryzen_asm(ctx[0]);
+ }
+
+ cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state);
+ xmrig::keccakf(reinterpret_cast(ctx[0]->state), 24);
+ extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
+}
+#endif
+
+
template
inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx)
{
diff --git a/src/crypto/asm/cnv2_main_loop.S b/src/crypto/asm/cnv2_main_loop.S
new file mode 100644
index 00000000..580a4588
--- /dev/null
+++ b/src/crypto/asm/cnv2_main_loop.S
@@ -0,0 +1,27 @@
+#define ALIGN .align
+.intel_syntax noprefix
+#ifdef __APPLE__
+# define FN_PREFIX(fn) _ ## fn
+.text
+#else
+# define FN_PREFIX(fn) fn
+.section .text
+#endif
+.global FN_PREFIX(cnv2_mainloop_ivybridge_asm)
+.global FN_PREFIX(cnv2_mainloop_ryzen_asm)
+
+ALIGN 16
+FN_PREFIX(cnv2_mainloop_ivybridge_asm):
+ sub rsp, 48
+ mov rcx, rdi
+ #include "cnv2_main_loop_ivybridge.inc"
+ add rsp, 48
+ ret 0
+
+ALIGN 16
+FN_PREFIX(cnv2_mainloop_ryzen_asm):
+ sub rsp, 48
+ mov rcx, rdi
+ #include "cnv2_main_loop_ryzen.inc"
+ add rsp, 48
+ ret 0
diff --git a/src/crypto/asm/cnv2_main_loop.asm b/src/crypto/asm/cnv2_main_loop.asm
new file mode 100644
index 00000000..7ec895c4
--- /dev/null
+++ b/src/crypto/asm/cnv2_main_loop.asm
@@ -0,0 +1,18 @@
+_TEXT_CNV2_MAINLOOP SEGMENT PAGE READ EXECUTE
+PUBLIC cnv2_mainloop_ivybridge_asm
+PUBLIC cnv2_mainloop_ryzen_asm
+
+ALIGN 64
+cnv2_mainloop_ivybridge_asm PROC
+ INCLUDE cnv2_main_loop_ivybridge.inc
+ ret 0
+cnv2_mainloop_ivybridge_asm ENDP
+
+ALIGN 64
+cnv2_mainloop_ryzen_asm PROC
+ INCLUDE cnv2_main_loop_ryzen.inc
+ ret 0
+cnv2_mainloop_ryzen_asm ENDP
+
+_TEXT_CNV2_MAINLOOP ENDS
+END
diff --git a/src/crypto/asm/cnv2_main_loop_ivybridge.inc b/src/crypto/asm/cnv2_main_loop_ivybridge.inc
new file mode 100644
index 00000000..8c2c2d3b
--- /dev/null
+++ b/src/crypto/asm/cnv2_main_loop_ivybridge.inc
@@ -0,0 +1,186 @@
+ mov QWORD PTR [rsp+24], rbx
+ push rbp
+ push rsi
+ push rdi
+ push r12
+ push r13
+ push r14
+ push r15
+ sub rsp, 80
+
+ stmxcsr DWORD PTR [rsp]
+ mov DWORD PTR [rsp+4], 24448
+ ldmxcsr DWORD PTR [rsp+4]
+
+ mov rax, QWORD PTR [rcx+48]
+ mov r9, rcx
+ xor rax, QWORD PTR [rcx+16]
+ mov esi, 524288
+ mov r8, QWORD PTR [rcx+32]
+ mov r13d, -2147483647
+ xor r8, QWORD PTR [rcx]
+ mov r11, QWORD PTR [rcx+40]
+ mov r10, r8
+ mov rdx, QWORD PTR [rcx+56]
+ movq xmm4, rax
+ xor rdx, QWORD PTR [rcx+24]
+ xor r11, QWORD PTR [rcx+8]
+ mov rbx, QWORD PTR [rcx+224]
+ mov rax, QWORD PTR [r9+80]
+ xor rax, QWORD PTR [r9+64]
+ movq xmm0, rdx
+ mov rcx, QWORD PTR [rcx+88]
+ xor rcx, QWORD PTR [r9+72]
+ movq xmm3, QWORD PTR [r9+104]
+ movaps XMMWORD PTR [rsp+64], xmm6
+ movaps XMMWORD PTR [rsp+48], xmm7
+ movaps XMMWORD PTR [rsp+32], xmm8
+ and r10d, 2097136
+ movq xmm5, rax
+
+ xor eax, eax
+ mov QWORD PTR [rsp+16], rax
+
+ mov ax, 1023
+ shl rax, 52
+ movq xmm8, rax
+ mov r15, QWORD PTR [r9+96]
+ punpcklqdq xmm4, xmm0
+ movq xmm0, rcx
+ punpcklqdq xmm5, xmm0
+ movdqu xmm6, XMMWORD PTR [r10+rbx]
+
+ ALIGN 16
+main_loop_ivybridge:
+ lea rdx, QWORD PTR [r10+rbx]
+ mov ecx, r10d
+ mov eax, r10d
+ mov rdi, r15
+ xor ecx, 16
+ xor eax, 32
+ xor r10d, 48
+ movq xmm0, r11
+ movq xmm7, r8
+ punpcklqdq xmm7, xmm0
+ aesenc xmm6, xmm7
+ movq rbp, xmm6
+ mov r9, rbp
+ and r9d, 2097136
+ movdqu xmm2, XMMWORD PTR [rcx+rbx]
+ movdqu xmm1, XMMWORD PTR [rax+rbx]
+ movdqu xmm0, XMMWORD PTR [r10+rbx]
+ paddq xmm1, xmm7
+ paddq xmm0, xmm5
+ paddq xmm2, xmm4
+ movdqu XMMWORD PTR [rcx+rbx], xmm0
+ movdqu XMMWORD PTR [rax+rbx], xmm2
+ movdqu XMMWORD PTR [r10+rbx], xmm1
+ mov r10, r9
+ xor r10d, 32
+ movq rcx, xmm3
+ mov rax, rcx
+ shl rax, 32
+ xor rdi, rax
+ movdqa xmm0, xmm6
+ pxor xmm0, xmm4
+ movdqu XMMWORD PTR [rdx], xmm0
+ xor rdi, QWORD PTR [r9+rbx]
+ lea r14, QWORD PTR [r9+rbx]
+ mov r12, QWORD PTR [r14+8]
+ xor edx, edx
+ lea r9d, DWORD PTR [ecx+ecx]
+ add r9d, ebp
+ movdqa xmm0, xmm6
+ psrldq xmm0, 8
+ or r9d, r13d
+ movq rax, xmm0
+ div r9
+ xorps xmm3, xmm3
+ mov eax, eax
+ shl rdx, 32
+ add rdx, rax
+ lea r9, QWORD PTR [rdx+rbp]
+ mov r15, rdx
+ mov rax, r9
+ shr rax, 12
+ movq xmm0, rax
+ paddq xmm0, xmm8
+ sqrtsd xmm3, xmm0
+ psubq xmm3, XMMWORD PTR [rsp+16]
+ movq rdx, xmm3
+ test edx, 524287
+ je sqrt_fixup_ivybridge
+ psrlq xmm3, 19
+sqrt_fixup_ivybridge_ret:
+
+ mov ecx, r10d
+ mov rax, rdi
+ mul rbp
+ movq xmm2, rdx
+ xor rdx, [rcx+rbx]
+ add r8, rdx
+ mov QWORD PTR [r14], r8
+ xor r8, rdi
+ mov edi, r8d
+ and edi, 2097136
+ movq xmm0, rax
+ xor rax, [rcx+rbx+8]
+ add r11, rax
+ mov QWORD PTR [r14+8], r11
+ punpcklqdq xmm2, xmm0
+
+ mov r9d, r10d
+ xor r9d, 48
+ xor r10d, 16
+ pxor xmm2, XMMWORD PTR [r9+rbx]
+ movdqu xmm0, XMMWORD PTR [r10+rbx]
+ paddq xmm0, xmm5
+ movdqu xmm1, XMMWORD PTR [rcx+rbx]
+ paddq xmm2, xmm4
+ paddq xmm1, xmm7
+ movdqa xmm5, xmm4
+ movdqu XMMWORD PTR [r9+rbx], xmm0
+ movdqa xmm4, xmm6
+ movdqu XMMWORD PTR [rcx+rbx], xmm2
+ movdqu XMMWORD PTR [r10+rbx], xmm1
+ movdqu xmm6, [rdi+rbx]
+ mov r10d, edi
+ xor r11, r12
+ dec rsi
+ jne main_loop_ivybridge
+
+ ldmxcsr DWORD PTR [rsp]
+ mov rbx, QWORD PTR [rsp+160]
+ movaps xmm6, XMMWORD PTR [rsp+64]
+ movaps xmm7, XMMWORD PTR [rsp+48]
+ movaps xmm8, XMMWORD PTR [rsp+32]
+ add rsp, 80
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rdi
+ pop rsi
+ pop rbp
+ jmp cnv2_main_loop_ivybridge_endp
+
+sqrt_fixup_ivybridge:
+ dec rdx
+ mov r13d, -1022
+ shl r13, 32
+ mov rax, rdx
+ shr rdx, 19
+ shr rax, 20
+ mov rcx, rdx
+ sub rcx, rax
+ add rax, r13
+ not r13
+ sub rcx, r13
+ mov r13d, -2147483647
+ imul rcx, rax
+ sub rcx, r9
+ adc rdx, 0
+ movq xmm3, rdx
+ jmp sqrt_fixup_ivybridge_ret
+
+cnv2_main_loop_ivybridge_endp:
diff --git a/src/crypto/asm/cnv2_main_loop_ryzen.inc b/src/crypto/asm/cnv2_main_loop_ryzen.inc
new file mode 100644
index 00000000..d386aa2d
--- /dev/null
+++ b/src/crypto/asm/cnv2_main_loop_ryzen.inc
@@ -0,0 +1,179 @@
+ mov QWORD PTR [rsp+16], rbx
+ mov QWORD PTR [rsp+24], rbp
+ mov QWORD PTR [rsp+32], rsi
+ push rdi
+ push r12
+ push r13
+ push r14
+ push r15
+ sub rsp, 64
+
+ stmxcsr DWORD PTR [rsp]
+ mov DWORD PTR [rsp+4], 24448
+ ldmxcsr DWORD PTR [rsp+4]
+
+ mov rax, QWORD PTR [rcx+48]
+ mov r9, rcx
+ xor rax, QWORD PTR [rcx+16]
+ mov ebp, 524288
+ mov r8, QWORD PTR [rcx+32]
+ xor r8, QWORD PTR [rcx]
+ mov r11, QWORD PTR [rcx+40]
+ mov r10, r8
+ mov rdx, QWORD PTR [rcx+56]
+ movq xmm3, rax
+ xor rdx, QWORD PTR [rcx+24]
+ xor r11, QWORD PTR [rcx+8]
+ mov rbx, QWORD PTR [rcx+224]
+ mov rax, QWORD PTR [r9+80]
+ xor rax, QWORD PTR [r9+64]
+ movq xmm0, rdx
+ mov rcx, QWORD PTR [rcx+88]
+ xor rcx, QWORD PTR [r9+72]
+ mov rdi, QWORD PTR [r9+104]
+ and r10d, 2097136
+ movaps XMMWORD PTR [rsp+48], xmm6
+ movq xmm4, rax
+ movaps XMMWORD PTR [rsp+32], xmm7
+ movaps XMMWORD PTR [rsp+16], xmm8
+ xorps xmm8, xmm8
+ mov ax, 1023
+ shl rax, 52
+ movq xmm7, rax
+ mov r15, QWORD PTR [r9+96]
+ punpcklqdq xmm3, xmm0
+ movq xmm0, rcx
+ punpcklqdq xmm4, xmm0
+
+ ALIGN 16
+main_loop_ryzen:
+ movdqa xmm5, XMMWORD PTR [r10+rbx]
+ movq xmm0, r11
+ movq xmm6, r8
+ punpcklqdq xmm6, xmm0
+ lea rdx, QWORD PTR [r10+rbx]
+ lea r9, QWORD PTR [rdi+rdi]
+ shl rdi, 32
+
+ mov ecx, r10d
+ mov eax, r10d
+ xor ecx, 16
+ xor eax, 32
+ xor r10d, 48
+ aesenc xmm5, xmm6
+ movdqa xmm2, XMMWORD PTR [rcx+rbx]
+ movdqa xmm1, XMMWORD PTR [rax+rbx]
+ movdqa xmm0, XMMWORD PTR [r10+rbx]
+ paddq xmm2, xmm3
+ paddq xmm1, xmm6
+ paddq xmm0, xmm4
+ movdqa XMMWORD PTR [rcx+rbx], xmm0
+ movdqa XMMWORD PTR [rax+rbx], xmm2
+ movdqa XMMWORD PTR [r10+rbx], xmm1
+
+ movaps xmm1, xmm8
+ mov rsi, r15
+ xor rsi, rdi
+ movq r14, xmm5
+ movdqa xmm0, xmm5
+ pxor xmm0, xmm3
+ mov r10, r14
+ and r10d, 2097136
+ movdqa XMMWORD PTR [rdx], xmm0
+ xor rsi, QWORD PTR [r10+rbx]
+ lea r12, QWORD PTR [r10+rbx]
+ mov r13, QWORD PTR [r10+rbx+8]
+
+ add r9d, r14d
+ or r9d, -2147483647
+ xor edx, edx
+ movdqa xmm0, xmm5
+ psrldq xmm0, 8
+ movq rax, xmm0
+
+ div r9
+ movq xmm0, rax
+ movq xmm1, rdx
+ punpckldq xmm0, xmm1
+ movq r15, xmm0
+ paddq xmm0, xmm5
+ movdqa xmm2, xmm0
+ psrlq xmm0, 12
+ paddq xmm0, xmm7
+ sqrtsd xmm1, xmm0
+ movq rdi, xmm1
+ test rdi, 524287
+ je sqrt_fixup_ryzen
+ shr rdi, 19
+
+sqrt_fixup_ryzen_ret:
+ mov rax, rsi
+ mul r14
+ movq xmm1, rax
+ movq xmm0, rdx
+ punpcklqdq xmm0, xmm1
+
+ mov r9d, r10d
+ mov ecx, r10d
+ xor r9d, 16
+ xor ecx, 32
+ xor r10d, 48
+ movdqa xmm1, XMMWORD PTR [rcx+rbx]
+ xor rdx, [rcx+rbx]
+ xor rax, [rcx+rbx+8]
+ movdqa xmm2, XMMWORD PTR [r9+rbx]
+ pxor xmm2, xmm0
+ paddq xmm4, XMMWORD PTR [r10+rbx]
+ paddq xmm2, xmm3
+ paddq xmm1, xmm6
+ movdqa XMMWORD PTR [r9+rbx], xmm4
+ movdqa XMMWORD PTR [rcx+rbx], xmm2
+ movdqa XMMWORD PTR [r10+rbx], xmm1
+
+ movdqa xmm4, xmm3
+ add r8, rdx
+ add r11, rax
+ mov QWORD PTR [r12], r8
+ xor r8, rsi
+ mov QWORD PTR [r12+8], r11
+ mov r10, r8
+ xor r11, r13
+ and r10d, 2097136
+ movdqa xmm3, xmm5
+ dec ebp
+ jne main_loop_ryzen
+
+ ldmxcsr DWORD PTR [rsp]
+ movaps xmm6, XMMWORD PTR [rsp+48]
+ lea r11, QWORD PTR [rsp+64]
+ mov rbx, QWORD PTR [r11+56]
+ mov rbp, QWORD PTR [r11+64]
+ mov rsi, QWORD PTR [r11+72]
+ movaps xmm8, XMMWORD PTR [r11-48]
+ movaps xmm7, XMMWORD PTR [rsp+32]
+ mov rsp, r11
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rdi
+ jmp cnv2_main_loop_ryzen_endp
+
+sqrt_fixup_ryzen:
+ movq r9, xmm2
+ dec rdi
+ mov edx, -1022
+ shl rdx, 32
+ mov rax, rdi
+ shr rdi, 19
+ shr rax, 20
+ mov rcx, rdi
+ sub rcx, rax
+ lea rcx, [rcx+rdx+1]
+ add rax, rdx
+ imul rcx, rax
+ sub rcx, r9
+ adc rdi, 0
+ jmp sqrt_fixup_ryzen_ret
+
+cnv2_main_loop_ryzen_endp:
diff --git a/src/crypto/asm/cnv2_main_loop_win.S b/src/crypto/asm/cnv2_main_loop_win.S
new file mode 100644
index 00000000..3c2028b6
--- /dev/null
+++ b/src/crypto/asm/cnv2_main_loop_win.S
@@ -0,0 +1,15 @@
+#define ALIGN .align
+.intel_syntax noprefix
+.section .text
+.global cnv2_mainloop_ivybridge_asm
+.global cnv2_mainloop_ryzen_asm
+
+ALIGN 16
+cnv2_mainloop_ivybridge_asm:
+ #include "cnv2_main_loop_ivybridge.inc"
+ ret 0
+
+ALIGN 16
+cnv2_mainloop_ryzen_asm:
+ #include "cnv2_main_loop_ryzen.inc"
+ ret 0
diff --git a/src/workers/CpuThread.cpp b/src/workers/CpuThread.cpp
index ca7681f0..ff6be585 100644
--- a/src/workers/CpuThread.cpp
+++ b/src/workers/CpuThread.cpp
@@ -24,8 +24,10 @@
#include
+#include "common/cpu/Cpu.h"
#include "common/log/Log.h"
#include "common/net/Pool.h"
+#include "crypto/Asm.h"
#include "rapidjson/document.h"
#include "workers/CpuThread.h"
@@ -37,9 +39,10 @@
#endif
-xmrig::CpuThread::CpuThread(size_t index, Algo algorithm, AlgoVariant av, Multiway multiway, int64_t affinity, int priority, bool softAES, bool prefetch) :
+xmrig::CpuThread::CpuThread(size_t index, Algo algorithm, AlgoVariant av, Multiway multiway, int64_t affinity, int priority, bool softAES, bool prefetch, Assembly assembly) :
m_algorithm(algorithm),
m_av(av),
+ m_assembly(assembly),
m_prefetch(prefetch),
m_softAES(softAES),
m_priority(priority),
@@ -50,22 +53,23 @@ xmrig::CpuThread::CpuThread(size_t index, Algo algorithm, AlgoVariant av, Multiw
}
-xmrig::CpuThread::~CpuThread()
-{
-}
-
-
bool xmrig::CpuThread::isSoftAES(AlgoVariant av)
{
return av == AV_SINGLE_SOFT || av == AV_DOUBLE_SOFT || av > AV_PENTA;
}
-xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant av, Variant variant)
+xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant av, Variant variant, Assembly assembly)
{
assert(variant >= VARIANT_0 && variant < VARIANT_MAX);
- static const cn_hash_fun func_table[VARIANT_MAX * 10 * 3] = {
+# ifndef XMRIG_NO_ASM
+ constexpr const size_t count = VARIANT_MAX * 10 * 3 + 2;
+# else
+ constexpr const size_t count = VARIANT_MAX * 10 * 3;
+# endif
+
+ static const cn_hash_fun func_table[count] = {
cryptonight_single_hash,
cryptonight_double_hash,
cryptonight_single_hash,
@@ -241,12 +245,15 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
+# endif
+# ifndef XMRIG_NO_ASM
+ cryptonight_single_hash_asm,
+ cryptonight_single_hash_asm
# endif
};
- const size_t index = VARIANT_MAX * 10 * algorithm + 10 * variant + av - 1;
-
# ifndef NDEBUG
+ const size_t index = fnIndex(algorithm, av, variant, assembly);
cn_hash_fun func = func_table[index];
assert(index < sizeof(func_table) / sizeof(func_table[0]));
@@ -254,12 +261,12 @@ xmrig::CpuThread::cn_hash_fun xmrig::CpuThread::fn(Algo algorithm, AlgoVariant a
return func;
# else
- return func_table[index];
+ return func_table[fnIndex(algorithm, av, variant, assembly)];
# endif
}
-xmrig::CpuThread *xmrig::CpuThread::createFromAV(size_t index, Algo algorithm, AlgoVariant av, int64_t affinity, int priority)
+xmrig::CpuThread *xmrig::CpuThread::createFromAV(size_t index, Algo algorithm, AlgoVariant av, int64_t affinity, int priority, Assembly assembly)
{
assert(av > AV_AUTO && av < AV_MAX);
@@ -282,7 +289,7 @@ xmrig::CpuThread *xmrig::CpuThread::createFromAV(size_t index, Algo algorithm, A
}
}
- return new CpuThread(index, algorithm, av, multiway(av), cpuId, priority, isSoftAES(av), false);
+ return new CpuThread(index, algorithm, av, multiway(av), cpuId, priority, isSoftAES(av), false, assembly);
}
@@ -300,7 +307,7 @@ xmrig::CpuThread *xmrig::CpuThread::createFromData(size_t index, Algo algorithm,
assert(av > AV_AUTO && av < AV_MAX);
- return new CpuThread(index, algorithm, static_cast(av), multiway, data.affinity, priority, softAES, false);
+ return new CpuThread(index, algorithm, static_cast(av), multiway, data.affinity, priority, softAES, false, data.assembly);
}
@@ -322,11 +329,14 @@ xmrig::CpuThread::Data xmrig::CpuThread::parse(const rapidjson::Value &object)
}
const auto &affinity = object["affine_to_cpu"];
-
if (affinity.IsUint64()) {
data.affinity = affinity.GetInt64();
}
+# ifndef XMRIG_NO_ASM
+ data.assembly = Asm::parse(object["asm"]);
+# endif
+
return data;
}
@@ -368,7 +378,11 @@ void xmrig::CpuThread::print() const
LOG_DEBUG(GREEN_BOLD("CPU thread: ") " index " WHITE_BOLD("%zu") ", multiway " WHITE_BOLD("%d") ", av " WHITE_BOLD("%d") ",",
index(), static_cast(multiway()), static_cast(m_av));
+# ifndef XMRIG_NO_ASM
+ LOG_DEBUG(" assembly: %s, affine_to_cpu: %" PRId64, Asm::toString(m_assembly), affinity());
+# else
LOG_DEBUG(" affine_to_cpu: %" PRId64, affinity());
+# endif
}
#endif
@@ -403,5 +417,35 @@ rapidjson::Value xmrig::CpuThread::toConfig(rapidjson::Document &doc) const
obj.AddMember("low_power_mode", multiway(), allocator);
obj.AddMember("affine_to_cpu", affinity() == -1L ? Value(kFalseType) : Value(affinity()), allocator);
+# ifndef XMRIG_NO_ASM
+ obj.AddMember("asm", Asm::toJSON(m_assembly), allocator);
+# endif
+
return obj;
}
+
+
+size_t xmrig::CpuThread::fnIndex(Algo algorithm, AlgoVariant av, Variant variant, Assembly assembly)
+{
+ const size_t index = VARIANT_MAX * 10 * algorithm + 10 * variant + av - 1;
+
+# ifndef XMRIG_NO_ASM
+ if (assembly == ASM_AUTO) {
+ assembly = Cpu::info()->assembly();
+ }
+
+ if (assembly == ASM_NONE) {
+ return index;
+ }
+
+ constexpr const size_t offset = VARIANT_MAX * 10 * 3;
+
+ if (algorithm == CRYPTONIGHT && variant == VARIANT_2) {
+ if (av == AV_SINGLE) {
+ return offset + assembly - 2;
+ }
+ }
+# endif
+
+ return index;
+}
diff --git a/src/workers/CpuThread.h b/src/workers/CpuThread.h
index 622dc3a2..29ab9696 100644
--- a/src/workers/CpuThread.h
+++ b/src/workers/CpuThread.h
@@ -40,7 +40,7 @@ class CpuThread : public IThread
public:
struct Data
{
- inline Data() : valid(false), affinity(-1L), multiway(SingleWay) {}
+ inline Data() : assembly(ASM_AUTO), valid(false), affinity(-1L), multiway(SingleWay) {}
inline void setMultiway(int value)
{
@@ -50,27 +50,27 @@ public:
}
}
+ Assembly assembly;
bool valid;
int64_t affinity;
Multiway multiway;
};
- CpuThread(size_t index, Algo algorithm, AlgoVariant av, Multiway multiway, int64_t affinity, int priority, bool softAES, bool prefetch);
- ~CpuThread();
+ CpuThread(size_t index, Algo algorithm, AlgoVariant av, Multiway multiway, int64_t affinity, int priority, bool softAES, bool prefetch, Assembly assembly);
typedef void (*cn_hash_fun)(const uint8_t *input, size_t size, uint8_t *output, cryptonight_ctx **ctx);
static bool isSoftAES(AlgoVariant av);
- static cn_hash_fun fn(Algo algorithm, AlgoVariant av, Variant variant);
- static CpuThread *createFromAV(size_t index, Algo algorithm, AlgoVariant av, int64_t affinity, int priority);
+ static cn_hash_fun fn(Algo algorithm, AlgoVariant av, Variant variant, Assembly assembly);
+ static CpuThread *createFromAV(size_t index, Algo algorithm, AlgoVariant av, int64_t affinity, int priority, Assembly assembly);
static CpuThread *createFromData(size_t index, Algo algorithm, const CpuThread::Data &data, int priority, bool softAES);
static Data parse(const rapidjson::Value &object);
static Multiway multiway(AlgoVariant av);
inline bool isPrefetch() const { return m_prefetch; }
inline bool isSoftAES() const { return m_softAES; }
- inline cn_hash_fun fn(Variant variant) const { return fn(m_algorithm, m_av, variant); }
+ inline cn_hash_fun fn(Variant variant) const { return fn(m_algorithm, m_av, variant, m_assembly); }
inline Algo algorithm() const override { return m_algorithm; }
inline int priority() const override { return m_priority; }
@@ -91,8 +91,11 @@ protected:
rapidjson::Value toConfig(rapidjson::Document &doc) const override;
private:
+ static size_t fnIndex(Algo algorithm, AlgoVariant av, Variant variant, Assembly assembly);
+
const Algo m_algorithm;
const AlgoVariant m_av;
+ const Assembly m_assembly;
const bool m_prefetch;
const bool m_softAES;
const int m_priority;
diff --git a/src/workers/Worker.cpp b/src/workers/Worker.cpp
index 567b3e08..c569908c 100644
--- a/src/workers/Worker.cpp
+++ b/src/workers/Worker.cpp
@@ -24,8 +24,8 @@
#include
+#include "common/cpu/Cpu.h"
#include "common/Platform.h"
-#include "Cpu.h"
#include "workers/CpuThread.h"
#include "workers/Handle.h"
#include "workers/Worker.h"
@@ -41,7 +41,7 @@ Worker::Worker(Handle *handle) :
m_sequence(0),
m_thread(static_cast(handle->config()))
{
- if (Cpu::threads() > 1 && m_thread->affinity() != -1L) {
+ if (xmrig::Cpu::info()->threads() > 1 && m_thread->affinity() != -1L) {
Platform::setThreadAffinity(m_thread->affinity());
}
diff --git a/src/workers/Worker.h b/src/workers/Worker.h
index aad9e3c5..73e25033 100644
--- a/src/workers/Worker.h
+++ b/src/workers/Worker.h
@@ -21,8 +21,8 @@
* along with this program. If not, see .
*/
-#ifndef __WORKER_H__
-#define __WORKER_H__
+#ifndef XMRIG_WORKER_H
+#define XMRIG_WORKER_H
#include
@@ -33,7 +33,6 @@
#include "Mem.h"
-struct cryptonight_ctx;
class Handle;
@@ -67,4 +66,4 @@ protected:
};
-#endif /* __WORKER_H__ */
+#endif /* XMRIG_WORKER_H */