soft_aes: fix previous optimization
It was faster to inline aesenc, aesdec without any modifications during previous optimization
This commit is contained in:
parent
852fe14604
commit
2b40ee88b6
1 changed files with 24 additions and 42 deletions
|
@ -46,58 +46,40 @@ template<bool soft> rx_vec_i128 aesdec(rx_vec_i128 in, rx_vec_i128 key);
|
|||
|
||||
template<>
|
||||
FORCE_INLINE rx_vec_i128 aesenc<true>(rx_vec_i128 in, rx_vec_i128 key) {
|
||||
volatile uint8_t s[16];
|
||||
memcpy((void*) s, &in, 16);
|
||||
uint32_t s0, s1, s2, s3;
|
||||
|
||||
uint32_t s0 = lutEnc0[s[ 0]];
|
||||
uint32_t s1 = lutEnc0[s[ 4]];
|
||||
uint32_t s2 = lutEnc0[s[ 8]];
|
||||
uint32_t s3 = lutEnc0[s[12]];
|
||||
s0 = rx_vec_i128_w(in);
|
||||
s1 = rx_vec_i128_z(in);
|
||||
s2 = rx_vec_i128_y(in);
|
||||
s3 = rx_vec_i128_x(in);
|
||||
|
||||
s0 ^= lutEnc1[s[ 5]];
|
||||
s1 ^= lutEnc1[s[ 9]];
|
||||
s2 ^= lutEnc1[s[13]];
|
||||
s3 ^= lutEnc1[s[ 1]];
|
||||
rx_vec_i128 out = rx_set_int_vec_i128(
|
||||
(lutEnc0[s0 & 0xff] ^ lutEnc1[(s3 >> 8) & 0xff] ^ lutEnc2[(s2 >> 16) & 0xff] ^ lutEnc3[s1 >> 24]),
|
||||
(lutEnc0[s1 & 0xff] ^ lutEnc1[(s0 >> 8) & 0xff] ^ lutEnc2[(s3 >> 16) & 0xff] ^ lutEnc3[s2 >> 24]),
|
||||
(lutEnc0[s2 & 0xff] ^ lutEnc1[(s1 >> 8) & 0xff] ^ lutEnc2[(s0 >> 16) & 0xff] ^ lutEnc3[s3 >> 24]),
|
||||
(lutEnc0[s3 & 0xff] ^ lutEnc1[(s2 >> 8) & 0xff] ^ lutEnc2[(s1 >> 16) & 0xff] ^ lutEnc3[s0 >> 24])
|
||||
);
|
||||
|
||||
s0 ^= lutEnc2[s[10]];
|
||||
s1 ^= lutEnc2[s[14]];
|
||||
s2 ^= lutEnc2[s[ 2]];
|
||||
s3 ^= lutEnc2[s[ 6]];
|
||||
|
||||
s0 ^= lutEnc3[s[15]];
|
||||
s1 ^= lutEnc3[s[ 3]];
|
||||
s2 ^= lutEnc3[s[ 7]];
|
||||
s3 ^= lutEnc3[s[11]];
|
||||
|
||||
return rx_xor_vec_i128(rx_set_int_vec_i128(s3, s2, s1, s0), key);
|
||||
return rx_xor_vec_i128(out, key);
|
||||
}
|
||||
|
||||
template<>
|
||||
FORCE_INLINE rx_vec_i128 aesdec<true>(rx_vec_i128 in, rx_vec_i128 key) {
|
||||
volatile uint8_t s[16];
|
||||
memcpy((void*) s, &in, 16);
|
||||
uint32_t s0, s1, s2, s3;
|
||||
|
||||
uint32_t s0 = lutDec0[s[ 0]];
|
||||
uint32_t s1 = lutDec0[s[ 4]];
|
||||
uint32_t s2 = lutDec0[s[ 8]];
|
||||
uint32_t s3 = lutDec0[s[12]];
|
||||
s0 = rx_vec_i128_w(in);
|
||||
s1 = rx_vec_i128_z(in);
|
||||
s2 = rx_vec_i128_y(in);
|
||||
s3 = rx_vec_i128_x(in);
|
||||
|
||||
s0 ^= lutDec1[s[13]];
|
||||
s1 ^= lutDec1[s[ 1]];
|
||||
s2 ^= lutDec1[s[ 5]];
|
||||
s3 ^= lutDec1[s[ 9]];
|
||||
rx_vec_i128 out = rx_set_int_vec_i128(
|
||||
(lutDec0[s0 & 0xff] ^ lutDec1[(s1 >> 8) & 0xff] ^ lutDec2[(s2 >> 16) & 0xff] ^ lutDec3[s3 >> 24]),
|
||||
(lutDec0[s1 & 0xff] ^ lutDec1[(s2 >> 8) & 0xff] ^ lutDec2[(s3 >> 16) & 0xff] ^ lutDec3[s0 >> 24]),
|
||||
(lutDec0[s2 & 0xff] ^ lutDec1[(s3 >> 8) & 0xff] ^ lutDec2[(s0 >> 16) & 0xff] ^ lutDec3[s1 >> 24]),
|
||||
(lutDec0[s3 & 0xff] ^ lutDec1[(s0 >> 8) & 0xff] ^ lutDec2[(s1 >> 16) & 0xff] ^ lutDec3[s2 >> 24])
|
||||
);
|
||||
|
||||
s0 ^= lutDec2[s[10]];
|
||||
s1 ^= lutDec2[s[14]];
|
||||
s2 ^= lutDec2[s[ 2]];
|
||||
s3 ^= lutDec2[s[ 6]];
|
||||
|
||||
s0 ^= lutDec3[s[ 7]];
|
||||
s1 ^= lutDec3[s[11]];
|
||||
s2 ^= lutDec3[s[15]];
|
||||
s3 ^= lutDec3[s[ 3]];
|
||||
|
||||
return rx_xor_vec_i128(rx_set_int_vec_i128(s3, s2, s1, s0), key);
|
||||
return rx_xor_vec_i128(out, key);
|
||||
}
|
||||
|
||||
template<>
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue