Home | History | Annotate | Line # | Download | only in ref
      1 
      2 #include <stdint.h>
      3 #include <string.h>
      4 
      5 #include "blake2.h"
      6 #include "private/common.h"
      7 #include "private/sse2_64_32.h"
      8 
      9 #if defined(HAVE_EMMINTRIN_H) && defined(HAVE_TMMINTRIN_H)
     10 
     11 # ifdef __GNUC__
     12 #  pragma GCC target("sse2")
     13 #  pragma GCC target("ssse3")
     14 # endif
     15 
     16 # include <emmintrin.h>
     17 # include <tmmintrin.h>
     18 
     19 # include "blake2b-compress-ssse3.h"
     20 
     21 CRYPTO_ALIGN(64)
     22 static const uint64_t blake2b_IV[8] = {
     23     0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL, 0x3c6ef372fe94f82bULL,
     24     0xa54ff53a5f1d36f1ULL, 0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
     25     0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
     26 };
     27 
     28 int
     29 blake2b_compress_ssse3(blake2b_state *S,
     30                        const uint8_t  block[BLAKE2B_BLOCKBYTES])
     31 {
     32     __m128i       row1l, row1h;
     33     __m128i       row2l, row2h;
     34     __m128i       row3l, row3h;
     35     __m128i       row4l, row4h;
     36     __m128i       b0, b1;
     37     __m128i       t0, t1;
     38     const __m128i r16 =
     39         _mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9);
     40     const __m128i r24 =
     41         _mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10);
     42     const uint64_t m0  = ((uint64_t *) block)[0];
     43     const uint64_t m1  = ((uint64_t *) block)[1];
     44     const uint64_t m2  = ((uint64_t *) block)[2];
     45     const uint64_t m3  = ((uint64_t *) block)[3];
     46     const uint64_t m4  = ((uint64_t *) block)[4];
     47     const uint64_t m5  = ((uint64_t *) block)[5];
     48     const uint64_t m6  = ((uint64_t *) block)[6];
     49     const uint64_t m7  = ((uint64_t *) block)[7];
     50     const uint64_t m8  = ((uint64_t *) block)[8];
     51     const uint64_t m9  = ((uint64_t *) block)[9];
     52     const uint64_t m10 = ((uint64_t *) block)[10];
     53     const uint64_t m11 = ((uint64_t *) block)[11];
     54     const uint64_t m12 = ((uint64_t *) block)[12];
     55     const uint64_t m13 = ((uint64_t *) block)[13];
     56     const uint64_t m14 = ((uint64_t *) block)[14];
     57     const uint64_t m15 = ((uint64_t *) block)[15];
     58 
     59     row1l = LOADU(&S->h[0]);
     60     row1h = LOADU(&S->h[2]);
     61     row2l = LOADU(&S->h[4]);
     62     row2h = LOADU(&S->h[6]);
     63     row3l = LOADU(&blake2b_IV[0]);
     64     row3h = LOADU(&blake2b_IV[2]);
     65     row4l = _mm_xor_si128(LOADU(&blake2b_IV[4]), LOADU(&S->t[0]));
     66     row4h = _mm_xor_si128(LOADU(&blake2b_IV[6]), LOADU(&S->f[0]));
     67     ROUND(0);
     68     ROUND(1);
     69     ROUND(2);
     70     ROUND(3);
     71     ROUND(4);
     72     ROUND(5);
     73     ROUND(6);
     74     ROUND(7);
     75     ROUND(8);
     76     ROUND(9);
     77     ROUND(10);
     78     ROUND(11);
     79     row1l = _mm_xor_si128(row3l, row1l);
     80     row1h = _mm_xor_si128(row3h, row1h);
     81     STOREU(&S->h[0], _mm_xor_si128(LOADU(&S->h[0]), row1l));
     82     STOREU(&S->h[2], _mm_xor_si128(LOADU(&S->h[2]), row1h));
     83     row2l = _mm_xor_si128(row4l, row2l);
     84     row2h = _mm_xor_si128(row4h, row2h);
     85     STOREU(&S->h[4], _mm_xor_si128(LOADU(&S->h[4]), row2l));
     86     STOREU(&S->h[6], _mm_xor_si128(LOADU(&S->h[6]), row2h));
     87     return 0;
     88 }
     89 
     90 #endif
     91