soft_aes: fix previous optimization
Previously removed unrolled variant is faster on some CPUs Some CPUs are faster with added unrolled variant The best variant depends on number of threads on some CPUs
This commit is contained in:
parent
31e896feef
commit
5f0f2506e8
6 changed files with 116 additions and 72 deletions
|
@ -28,9 +28,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
*/
|
||||
|
||||
#include "crypto/randomx/soft_aes.h"
|
||||
#include "crypto/randomx/aes_hash.hpp"
|
||||
#include "base/tools/Chrono.h"
|
||||
#include <vector>
|
||||
|
||||
alignas(64) uint32_t lutEnc0[256];
|
||||
alignas(64) uint32_t lutEnc1[256];
|
||||
|
@ -120,47 +117,3 @@ static struct SAESInitializer
|
|||
}
|
||||
}
|
||||
} aes_initializer;
|
||||
|
||||
static uint32_t softAESImpl = 1;
|
||||
|
||||
uint32_t GetSoftAESImpl()
|
||||
{
|
||||
return softAESImpl;
|
||||
}
|
||||
|
||||
void SelectSoftAESImpl()
|
||||
{
|
||||
constexpr int test_length_ms = 100;
|
||||
double speed[2] = {};
|
||||
|
||||
for (int run = 0; run < 3; ++run) {
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
std::vector<uint8_t> scratchpad(10 * 1024);
|
||||
uint8_t hash[64] = {};
|
||||
uint8_t state[64] = {};
|
||||
|
||||
uint64_t t1, t2;
|
||||
|
||||
uint32_t count = 0;
|
||||
t1 = xmrig::Chrono::highResolutionMSecs();
|
||||
do {
|
||||
if (i == 0) {
|
||||
hashAndFillAes1Rx4<1>(scratchpad.data(), scratchpad.size(), hash, state);
|
||||
}
|
||||
else {
|
||||
hashAndFillAes1Rx4<2>(scratchpad.data(), scratchpad.size(), hash, state);
|
||||
}
|
||||
++count;
|
||||
|
||||
t2 = xmrig::Chrono::highResolutionMSecs();
|
||||
} while (t2 - t1 < test_length_ms);
|
||||
|
||||
const double x = count * 1e3 / (t2 - t1);
|
||||
if (x > speed[i]) {
|
||||
speed[i] = x;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
softAESImpl = (speed[0] > speed[1]) ? 1 : 2;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue