soft_aes: fix previous optimization
Previously removed unrolled variant is faster on some CPUs Some CPUs are faster with added unrolled variant The best variant depends on number of threads on some CPUs
This commit is contained in:
parent
31e896feef
commit
5f0f2506e8
6 changed files with 116 additions and 72 deletions
|
@ -30,6 +30,17 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#include <cstddef>
|
||||
|
||||
typedef void (hashAndFillAes1Rx4_impl)(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);
|
||||
|
||||
extern hashAndFillAes1Rx4_impl* softAESImpl;
|
||||
|
||||
inline hashAndFillAes1Rx4_impl* GetSoftAESImpl()
|
||||
{
|
||||
return softAESImpl;
|
||||
}
|
||||
|
||||
void SelectSoftAESImpl(size_t threadsCount);
|
||||
|
||||
template<int softAes>
|
||||
void hashAes1Rx4(const void *input, size_t inputSize, void *hash);
|
||||
|
||||
|
@ -39,5 +50,5 @@ void fillAes1Rx4(void *state, size_t outputSize, void *buffer);
|
|||
template<int softAes>
|
||||
void fillAes4Rx4(void *state, size_t outputSize, void *buffer);
|
||||
|
||||
template<int softAes>
|
||||
template<int softAes, int unroll>
|
||||
void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue