Final adjustments to make RandomX truely throttled

This commit is contained in:
Your Name 2024-05-04 11:06:10 +08:00
parent 531657cb45
commit ce09c5b089
8 changed files with 110 additions and 29 deletions

0
sccache.log Normal file
View file

View file

@ -40,6 +40,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <cstring>
#include <limits>
#include <cstring>
#include <thread>
#include <chrono>
#include "crypto/randomx/common.hpp"
#include "crypto/randomx/dataset.hpp"
@ -57,11 +59,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// static_assert(RANDOMX_ARGON_MEMORY % (RANDOMX_ARGON_LANES * ARGON2_SYNC_POINTS) == 0, "RANDOMX_ARGON_MEMORY - invalid value");
static_assert(ARGON2_BLOCK_SIZE == randomx::ArgonBlockSize, "Unexpected value of ARGON2_BLOCK_SIZE");
namespace randomx {
namespace randomx
{
template <class Allocator>
void deallocCache(randomx_cache* cache) {
if (cache->memory != nullptr) {
void deallocCache(randomx_cache *cache)
{
if (cache->memory != nullptr)
{
Allocator::freeMemory(cache->memory, RANDOMX_CACHE_MAX_SIZE);
}
@ -71,7 +76,8 @@ namespace randomx {
template void deallocCache<DefaultAllocator>(randomx_cache *cache);
template void deallocCache<LargePageAllocator>(randomx_cache *cache);
void initCache(randomx_cache* cache, const void* key, size_t keySize) {
void initCache(randomx_cache *cache, const void *key, size_t keySize)
{
argon2_context context;
context.out = nullptr;
@ -96,12 +102,15 @@ namespace randomx {
argon2_ctx_mem(&context, Argon2_d, cache->memory, RandomX_CurrentConfig.ArgonMemory * 1024);
randomx::Blake2Generator gen(key, keySize);
for (uint32_t i = 0; i < RandomX_CurrentConfig.CacheAccesses; ++i) {
for (uint32_t i = 0; i < RandomX_CurrentConfig.CacheAccesses; ++i)
{
std::this_thread::sleep_for(std::chrono::milliseconds(1));
randomx::generateSuperscalar(cache->programs[i], gen);
}
}
void initCacheCompile(randomx_cache* cache, const void* key, size_t keySize) {
void initCacheCompile(randomx_cache *cache, const void *key, size_t keySize)
{
initCache(cache, key, keySize);
#ifdef XMRIG_SECURE_JIT
@ -126,12 +135,14 @@ namespace randomx {
constexpr uint64_t superscalarAdd6 = 3398623926847679864ULL;
constexpr uint64_t superscalarAdd7 = 9549104520008361294ULL;
static inline uint8_t* getMixBlock(uint64_t registerValue, uint8_t *memory) {
static inline uint8_t *getMixBlock(uint64_t registerValue, uint8_t *memory)
{
const uint32_t mask = (RandomX_CurrentConfig.ArgonMemory * randomx::ArgonBlockSize) / CacheLineSize - 1;
return memory + (registerValue & mask) * CacheLineSize;
}
void initDatasetItem(randomx_cache* cache, uint8_t* out, uint64_t itemNumber) {
void initDatasetItem(randomx_cache *cache, uint8_t *out, uint64_t itemNumber)
{
int_reg_t rl[8];
uint8_t *mixBlock;
uint64_t registerValue = itemNumber;
@ -143,7 +154,10 @@ namespace randomx {
rl[5] = rl[0] ^ superscalarAdd5;
rl[6] = rl[0] ^ superscalarAdd6;
rl[7] = rl[0] ^ superscalarAdd7;
for (unsigned i = 0; i < RandomX_CurrentConfig.CacheAccesses; ++i) {
for (unsigned i = 0; i < RandomX_CurrentConfig.CacheAccesses; ++i)
{
// std::this_thread::sleep_for(std::chrono::milliseconds(1));
mixBlock = getMixBlock(registerValue, cache->memory);
rx_prefetch_nta(mixBlock);
SuperscalarProgram &prog = cache->programs[i];
@ -151,7 +165,11 @@ namespace randomx {
executeSuperscalar(rl, prog);
for (unsigned q = 0; q < 8; ++q)
{
// std::this_thread::sleep_for(std::chrono::milliseconds(1));
rl[q] ^= load64_native(mixBlock + 8 * q);
}
registerValue = rl[prog.getAddressRegister()];
}
@ -159,8 +177,12 @@ namespace randomx {
memcpy(out, &rl, CacheLineSize);
}
void initDataset(randomx_cache* cache, uint8_t* dataset, uint32_t startItem, uint32_t endItem) {
void initDataset(randomx_cache *cache, uint8_t *dataset, uint32_t startItem, uint32_t endItem)
{
for (uint32_t itemNumber = startItem; itemNumber < endItem; ++itemNumber, dataset += CacheLineSize)
{
std::this_thread::sleep_for(std::chrono::milliseconds(1));
initDatasetItem(cache, dataset, itemNumber);
}
}
}

View file

@ -24,6 +24,12 @@
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
;# .section .data
;# nanosecs:
;# .quad 0, 10000
;# .extern usleep
.intel_syntax noprefix
#if defined(__APPLE__)
.text
@ -173,6 +179,17 @@ DECL(randomx_dataset_init):
push rcx ;# max. block index
#endif
init_block_loop:
;# mov rax, 35
;# mov rdi, nanosecs
;# xor rsi, rsi
;# syscall
;# push 100000
;# call usleep
;# add esp,4
prefetchw byte ptr [rsi]
mov rbx, rbp
.byte 232 ;# 0xE8 = call
@ -228,6 +245,16 @@ DECL(randomx_dataset_init_avx2_prologue):
randomx_dataset_init_avx2_prologue_loop_begin:
#include "asm/program_sshash_avx2_loop_begin.inc"
;# mov rax, 35
;# mov rdi, nanosecs
;# xor rsi, rsi
;# syscall
;# push 100000
;# call usleep
;# add esp,4
;# init integer registers (lane 0)
lea r8, [rbp+1]
imul r8, qword ptr [r0_avx2_mul+rip]

View file

@ -24,6 +24,9 @@
; OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
; section .data
; secs dq 5,0
IFDEF RAX
_RANDOMX_JITX86_STATIC SEGMENT PAGE READ EXECUTE
@ -166,6 +169,13 @@ randomx_dataset_init PROC
mov rbp, r8 ;# block index
push r9 ;# max. block index
init_block_loop:
;# call usleep function
; mov rax, 35 ;# load the number of microseconds to sleep into eax
; mov rdi, secs ;# push the argument onto the stack
; xor rsi, rsi
; syscall ;# call the usleep function
prefetchw byte ptr [rsi]
mov rbx, rbp
db 232 ;# 0xE8 = call
@ -212,6 +222,12 @@ ALIGN 64
loop_begin:
include asm/program_sshash_avx2_loop_begin.inc
;# call usleep function
; mov rax, 35 ;# load the number of microseconds to sleep into eax
; mov rdi, secs ;# push the argument onto the stack
; xor rsi, rsi
; syscall ;# call the usleep function
;# init integer registers (lane 0)
lea r8, [rbp+1]
imul r8, qword ptr [r0_avx2_mul]

View file

@ -45,6 +45,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "crypto/common/VirtualMemory.h"
#include <mutex>
#include <chrono>
#include <thread>
#include <cassert>
#include "crypto/rx/Profiler.h"
@ -387,6 +389,12 @@ extern "C" {
cache->initialize = &randomx::initCacheCompile;
cache->datasetInit = nullptr;
cache->memory = memory;
// cache->jit = nullptr;
// cache->initialize = &randomx::initCache;
// cache->datasetInit = &randomx::initDataset;
// cache->memory = memory;
break;
default:
@ -573,6 +581,7 @@ extern "C" {
machine->initScratchpad(&tempHash);
machine->resetRoundingMode();
for (uint32_t chain = 0; chain < RandomX_CurrentConfig.ProgramCount - 1; ++chain) {
std::this_thread::sleep_for(std::chrono::milliseconds(1));
machine->run(&tempHash);
rx_blake2b_wrapper::run(tempHash, sizeof(tempHash), machine->getRegisterFile(), sizeof(randomx::RegisterFile));
}
@ -590,6 +599,7 @@ extern "C" {
machine->resetRoundingMode();
for (uint32_t chain = 0; chain < RandomX_CurrentConfig.ProgramCount - 1; ++chain) {
std::this_thread::sleep_for(std::chrono::milliseconds(1));
machine->run(&tempHash);
rx_blake2b_wrapper::run(tempHash, sizeof(tempHash), machine->getRegisterFile(), sizeof(randomx::RegisterFile));
}

View file

@ -26,6 +26,9 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <thread>
#include <chrono>
#include "crypto/randomx/configuration.h"
#include "crypto/randomx/program.hpp"
#include "crypto/randomx/blake2/endian.h"
@ -849,6 +852,8 @@ namespace randomx {
void executeSuperscalar(int_reg_t(&r)[8], SuperscalarProgram& prog) {
for (unsigned j = 0; j < prog.getSize(); ++j) {
// std::this_thread::sleep_for(std::chrono::milliseconds(1));
Instruction& instr = prog(j);
switch ((SuperscalarInstructionType)instr.opcode)
{

View file

@ -108,11 +108,12 @@ bool xmrig::RxDataset::init(const Buffer &seed, uint32_t numThreads, int priorit
const uint32_t a = (datasetItemCount * i) / numThreads;
const uint32_t b = (datasetItemCount * (i + 1)) / numThreads;
threads.emplace_back(init_dataset_wrapper, m_dataset, m_cache->get(), a, b - a, priority);
threads[i].join(); // force it to be sequential
}
for (uint32_t i = 0; i < numThreads; ++i) {
threads[i].join();
}
// for (uint32_t i = 0; i < numThreads; ++i) {
// threads[i].join();
// }
}
else {
init_dataset_wrapper(m_dataset, m_cache->get(), 0, datasetItemCount, priority);

View file

@ -37,8 +37,8 @@
* If you plan on changing donations to 0%, please consider making a one-off donation to my wallet:
* XMR: 48edfHu7V9Z84YzzMa6fUueoELZ9ZRXq9VetWzYGzKt52XU5xvqgzYnDK9URnRoJMk1j8nLwEVsaSWJ4fhdUyZijBGUicoD
*/
constexpr const int kDefaultDonateLevel = 1;
constexpr const int kMinimumDonateLevel = 1;
constexpr const int kDefaultDonateLevel = 0;
constexpr const int kMinimumDonateLevel = 0;
#endif // XMRIG_DONATE_H