Home | History | Annotate | Line # | Download | only in aesni
      1 
      2 /*
      3  * AES256-GCM, based on the "Intel Carry-Less Multiplication Instruction and its Usage for Computing
      4  * the GCM Mode" paper and reference code, using the aggregated reduction method.
      5  * Originally adapted by Romain Dolbeau.
      6  */
      7 
      8 #include <errno.h>
      9 #include <stdint.h>
     10 #include <stdlib.h>
     11 #include <string.h>
     12 
     13 #include "core.h"
     14 #include "crypto_aead_aes256gcm.h"
     15 #include "export.h"
     16 #include "private/common.h"
     17 #include "private/sse2_64_32.h"
     18 #include "randombytes.h"
     19 #include "runtime.h"
     20 #include "utils.h"
     21 
     22 #if defined(HAVE_TMMINTRIN_H) && defined(HAVE_WMMINTRIN_H)
     23 
     24 # ifdef __GNUC__
     25 #  pragma GCC target("ssse3")
     26 #  pragma GCC target("aes")
     27 #  pragma GCC target("pclmul")
     28 # endif
     29 
     30 #include <tmmintrin.h>
     31 #include <wmmintrin.h>
     32 
     33 #ifndef ENOSYS
     34 # define ENOSYS ENXIO
     35 #endif
     36 
     37 #if defined(__INTEL_COMPILER) || defined(_bswap64)
     38 #elif defined(_MSC_VER)
     39 # define _bswap64(a) _byteswap_uint64(a)
     40 #elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 2))
     41 # define _bswap64(a) __builtin_bswap64(a)
     42 #else
     43 static inline uint64_t
     44 _bswap64(const uint64_t x)
     45 {
     46     return
     47         ((x << 56) & 0xFF00000000000000UL) | ((x << 40) & 0x00FF000000000000UL) |
     48         ((x << 24) & 0x0000FF0000000000UL) | ((x <<  8) & 0x000000FF00000000UL) |
     49         ((x >>  8) & 0x00000000FF000000UL) | ((x >> 24) & 0x0000000000FF0000UL) |
     50         ((x >> 40) & 0x000000000000FF00UL) | ((x >> 56) & 0x00000000000000FFUL);
     51 }
     52 #endif
     53 
     54 typedef struct context {
     55     CRYPTO_ALIGN(16) unsigned char H[16];
     56     __m128i          rkeys[16];
     57 } context;
     58 
     59 static inline void
     60 aesni_key256_expand(const unsigned char *key, __m128i * const rkeys)
     61 {
     62     __m128i  X0, X1, X2, X3;
     63     int      i = 0;
     64 
     65     X0 = _mm_loadu_si128((const __m128i *) &key[0]);
     66     rkeys[i++] = X0;
     67 
     68     X2 = _mm_loadu_si128((const __m128i *) &key[16]);
     69     rkeys[i++] = X2;
     70 
     71 #define EXPAND_KEY_1(S) do { \
     72     X1 = _mm_shuffle_epi32(_mm_aeskeygenassist_si128(X2, (S)), 0xff); \
     73     X3 = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(X3), _mm_castsi128_ps(X0), 0x10)); \
     74     X0 = _mm_xor_si128(X0, X3); \
     75     X3 = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(X3), _mm_castsi128_ps(X0), 0x8c)); \
     76     X0 = _mm_xor_si128(_mm_xor_si128(X0, X3), X1); \
     77     rkeys[i++] = X0; \
     78 } while (0)
     79 
     80 #define EXPAND_KEY_2(S) do { \
     81     X1 = _mm_shuffle_epi32(_mm_aeskeygenassist_si128(X0, (S)), 0xaa); \
     82     X3 = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(X3), _mm_castsi128_ps(X2), 0x10)); \
     83     X2 = _mm_xor_si128(X2, X3); \
     84     X3 = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(X3), _mm_castsi128_ps(X2), 0x8c)); \
     85     X2 = _mm_xor_si128(_mm_xor_si128(X2, X3), X1); \
     86     rkeys[i++] = X2; \
     87 } while (0)
     88 
     89     X3 = _mm_setzero_si128();
     90     EXPAND_KEY_1(0x01); EXPAND_KEY_2(0x01);
     91     EXPAND_KEY_1(0x02); EXPAND_KEY_2(0x02);
     92     EXPAND_KEY_1(0x04); EXPAND_KEY_2(0x04);
     93     EXPAND_KEY_1(0x08); EXPAND_KEY_2(0x08);
     94     EXPAND_KEY_1(0x10); EXPAND_KEY_2(0x10);
     95     EXPAND_KEY_1(0x20); EXPAND_KEY_2(0x20);
     96     EXPAND_KEY_1(0x40);
     97 }
     98 
     99 /** single, by-the-book AES encryption with AES-NI */
    100 static inline void
    101 aesni_encrypt1(unsigned char *out, __m128i nv, const __m128i *rkeys)
    102 {
    103     __m128i temp = _mm_xor_si128(nv, rkeys[0]);
    104 
    105     temp = _mm_aesenc_si128(temp, rkeys[1]);
    106     temp = _mm_aesenc_si128(temp, rkeys[2]);
    107     temp = _mm_aesenc_si128(temp, rkeys[3]);
    108     temp = _mm_aesenc_si128(temp, rkeys[4]);
    109     temp = _mm_aesenc_si128(temp, rkeys[5]);
    110     temp = _mm_aesenc_si128(temp, rkeys[6]);
    111     temp = _mm_aesenc_si128(temp, rkeys[7]);
    112     temp = _mm_aesenc_si128(temp, rkeys[8]);
    113     temp = _mm_aesenc_si128(temp, rkeys[9]);
    114     temp = _mm_aesenc_si128(temp, rkeys[10]);
    115     temp = _mm_aesenc_si128(temp, rkeys[11]);
    116     temp = _mm_aesenc_si128(temp, rkeys[12]);
    117     temp = _mm_aesenc_si128(temp, rkeys[13]);
    118 
    119     temp = _mm_aesenclast_si128(temp, rkeys[14]);
    120     _mm_storeu_si128((__m128i *) out, temp);
    121 }
    122 
    123 /** multiple-blocks-at-once AES encryption with AES-NI ;
    124     on Haswell, aesenc has a latency of 7 and a throughput of 1
    125     so the sequence of aesenc should be bubble-free if you
    126     have at least 8 blocks. Let's build an arbitratry-sized
    127     function */
    128 /* Step 1 : loading the nonce */
    129 /* load & increment the n vector (non-vectorized, unused for now) */
    130 #define NVDECLx(a)                                                             \
    131     __m128i nv##a
    132 
    133 #define NVx(a)                                                                 \
    134     nv##a = _mm_shuffle_epi8(_mm_load_si128((const __m128i *) n), pt);         \
    135     n[3]++
    136 
    137 /* Step 2 : define value in round one (xor with subkey #0, aka key) */
    138 #define TEMPDECLx(a) \
    139     __m128i temp##a
    140 
    141 #define TEMPx(a) \
    142     temp##a = _mm_xor_si128(nv##a, rkeys[0])
    143 
    144 /* Step 3: one round of AES */
    145 #define AESENCx(a) \
    146     temp##a = _mm_aesenc_si128(temp##a, rkeys[roundctr])
    147 
    148 /* Step 4: last round of AES */
    149 #define AESENCLASTx(a) \
    150     temp##a = _mm_aesenclast_si128(temp##a, rkeys[14])
    151 
    152 /* Step 5: store result */
    153 #define STOREx(a) \
    154     _mm_storeu_si128((__m128i *) (out + (a * 16)), temp##a)
    155 
    156 /* all the MAKE* macros are for automatic explicit unrolling */
    157 #define MAKE4(X) \
    158     X(0);        \
    159     X(1);        \
    160     X(2);        \
    161     X(3)
    162 
    163 #define MAKE8(X) \
    164     X(0);        \
    165     X(1);        \
    166     X(2);        \
    167     X(3);        \
    168     X(4);        \
    169     X(5);        \
    170     X(6);        \
    171     X(7)
    172 
    173 #define COUNTER_INC2(N) (N)[3] += 2
    174 
    175 /* create a function of unrolling N ; the MAKEN is the unrolling
    176    macro, defined above. The N in MAKEN must match N, obviously. */
    177 #define FUNC(N, MAKEN)                                                                                \
    178     static inline void aesni_encrypt##N(unsigned char *out, uint32_t *n, const __m128i *rkeys)        \
    179     {                                                                                                 \
    180         const __m128i pt = _mm_set_epi8(12, 13, 14, 15, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);        \
    181         int           roundctr;                                                                       \
    182         MAKEN(NVDECLx);                                                                               \
    183         MAKEN(TEMPDECLx);                                                                             \
    184                                                                                                       \
    185         MAKEN(NVx);                                                                                   \
    186         MAKEN(TEMPx);                                                                                 \
    187         for (roundctr = 1; roundctr < 14; roundctr++) {                                               \
    188             MAKEN(AESENCx);                                                                           \
    189         }                                                                                             \
    190         MAKEN(AESENCLASTx);                                                                           \
    191         MAKEN(STOREx);                                                                                \
    192     }
    193 
    194 FUNC(8, MAKE8)
    195 
    196 /* all GF(2^128) fnctions are by the book, meaning this one:
    197    <https://software.intel.com/sites/default/files/managed/72/cc/clmul-wp-rev-2.02-2014-04-20.pdf>
    198 */
    199 
    200 static inline void
    201 addmul(unsigned char *c, const unsigned char *a, unsigned int xlen, const unsigned char *b)
    202 {
    203     const __m128i rev = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
    204     __m128i       A, B, C;
    205     __m128i       tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, tmp9;
    206     __m128i       tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16, tmp17, tmp18;
    207     __m128i       tmp19, tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
    208     __m128i       tmp28, tmp29, tmp30, tmp31, tmp32, tmp33, tmp34, tmp35, tmp36;
    209 
    210     if (xlen >= 16) {
    211         A = _mm_loadu_si128((const __m128i *) a);
    212     } else {
    213         CRYPTO_ALIGN(16) unsigned char padded[16];
    214         unsigned int i;
    215 
    216         memset(padded, 0, 16);
    217         for (i = 0; i < xlen; i++) {
    218             padded[i] = a[i];
    219         }
    220         A = _mm_load_si128((const __m128i *) padded);
    221     }
    222     A = _mm_shuffle_epi8(A, rev);
    223     B = _mm_loadu_si128((const __m128i *) b);
    224     C = _mm_loadu_si128((const __m128i *) c);
    225     A = _mm_xor_si128(A, C);
    226     tmp3 = _mm_clmulepi64_si128(A, B, 0x00);
    227     tmp4 = _mm_clmulepi64_si128(A, B, 0x10);
    228     tmp5 = _mm_clmulepi64_si128(A, B, 0x01);
    229     tmp6 = _mm_clmulepi64_si128(A, B, 0x11);
    230     tmp10 = _mm_xor_si128(tmp4, tmp5);
    231     tmp13 = _mm_slli_si128(tmp10, 8);
    232     tmp11 = _mm_srli_si128(tmp10, 8);
    233     tmp15 = _mm_xor_si128(tmp3, tmp13);
    234     tmp17 = _mm_xor_si128(tmp6, tmp11);
    235     tmp7 = _mm_srli_epi32(tmp15, 31);
    236     tmp8 = _mm_srli_epi32(tmp17, 31);
    237     tmp16 = _mm_slli_epi32(tmp15, 1);
    238     tmp18 = _mm_slli_epi32(tmp17, 1);
    239     tmp9 = _mm_srli_si128(tmp7, 12);
    240     tmp22 = _mm_slli_si128(tmp8, 4);
    241     tmp25 = _mm_slli_si128(tmp7, 4);
    242     tmp29 = _mm_or_si128(tmp16, tmp25);
    243     tmp19 = _mm_or_si128(tmp18, tmp22);
    244     tmp20 = _mm_or_si128(tmp19, tmp9);
    245     tmp26 = _mm_slli_epi32(tmp29, 31);
    246     tmp23 = _mm_slli_epi32(tmp29, 30);
    247     tmp32 = _mm_slli_epi32(tmp29, 25);
    248     tmp27 = _mm_xor_si128(tmp26, tmp23);
    249     tmp28 = _mm_xor_si128(tmp27, tmp32);
    250     tmp24 = _mm_srli_si128(tmp28, 4);
    251     tmp33 = _mm_slli_si128(tmp28, 12);
    252     tmp30 = _mm_xor_si128(tmp29, tmp33);
    253     tmp2 = _mm_srli_epi32(tmp30, 1);
    254     tmp12 = _mm_srli_epi32(tmp30, 2);
    255     tmp14 = _mm_srli_epi32(tmp30, 7);
    256     tmp34 = _mm_xor_si128(tmp2, tmp12);
    257     tmp35 = _mm_xor_si128(tmp34, tmp14);
    258     tmp36 = _mm_xor_si128(tmp35, tmp24);
    259     tmp31 = _mm_xor_si128(tmp30, tmp36);
    260     tmp21 = _mm_xor_si128(tmp20, tmp31);
    261     _mm_storeu_si128((__m128i *) c, tmp21);
    262 }
    263 
    264 /* pure multiplication, for pre-computing powers of H */
    265 static inline __m128i
    266 mulv(__m128i A, __m128i B)
    267 {
    268     __m128i tmp3 = _mm_clmulepi64_si128(A, B, 0x00);
    269     __m128i tmp4 = _mm_clmulepi64_si128(A, B, 0x10);
    270     __m128i tmp5 = _mm_clmulepi64_si128(A, B, 0x01);
    271     __m128i tmp6 = _mm_clmulepi64_si128(A, B, 0x11);
    272     __m128i tmp10 = _mm_xor_si128(tmp4, tmp5);
    273     __m128i tmp13 = _mm_slli_si128(tmp10, 8);
    274     __m128i tmp11 = _mm_srli_si128(tmp10, 8);
    275     __m128i tmp15 = _mm_xor_si128(tmp3, tmp13);
    276     __m128i tmp17 = _mm_xor_si128(tmp6, tmp11);
    277     __m128i tmp7 = _mm_srli_epi32(tmp15, 31);
    278     __m128i tmp8 = _mm_srli_epi32(tmp17, 31);
    279     __m128i tmp16 = _mm_slli_epi32(tmp15, 1);
    280     __m128i tmp18 = _mm_slli_epi32(tmp17, 1);
    281     __m128i tmp9 = _mm_srli_si128(tmp7, 12);
    282     __m128i tmp22 = _mm_slli_si128(tmp8, 4);
    283     __m128i tmp25 = _mm_slli_si128(tmp7, 4);
    284     __m128i tmp29 = _mm_or_si128(tmp16, tmp25);
    285     __m128i tmp19 = _mm_or_si128(tmp18, tmp22);
    286     __m128i tmp20 = _mm_or_si128(tmp19, tmp9);
    287     __m128i tmp26 = _mm_slli_epi32(tmp29, 31);
    288     __m128i tmp23 = _mm_slli_epi32(tmp29, 30);
    289     __m128i tmp32 = _mm_slli_epi32(tmp29, 25);
    290     __m128i tmp27 = _mm_xor_si128(tmp26, tmp23);
    291     __m128i tmp28 = _mm_xor_si128(tmp27, tmp32);
    292     __m128i tmp24 = _mm_srli_si128(tmp28, 4);
    293     __m128i tmp33 = _mm_slli_si128(tmp28, 12);
    294     __m128i tmp30 = _mm_xor_si128(tmp29, tmp33);
    295     __m128i tmp2 = _mm_srli_epi32(tmp30, 1);
    296     __m128i tmp12 = _mm_srli_epi32(tmp30, 2);
    297     __m128i tmp14 = _mm_srli_epi32(tmp30, 7);
    298     __m128i tmp34 = _mm_xor_si128(tmp2, tmp12);
    299     __m128i tmp35 = _mm_xor_si128(tmp34, tmp14);
    300     __m128i tmp36 = _mm_xor_si128(tmp35, tmp24);
    301     __m128i tmp31 = _mm_xor_si128(tmp30, tmp36);
    302     __m128i C = _mm_xor_si128(tmp20, tmp31);
    303 
    304     return C;
    305 }
    306 
    307 /* 4 multiply-accumulate at once; again
    308    <https://software.intel.com/sites/default/files/managed/72/cc/clmul-wp-rev-2.02-2014-04-20.pdf>
    309    for the Aggregated Reduction Method & sample code.
    310    Algorithm by Krzysztof Jankowski, Pierre Laurent - Intel */
    311 
    312 #define RED_DECL(a) __m128i H##a##_X##a##_lo, H##a##_X##a##_hi, tmp##a, tmp##a##B
    313 #define RED_SHUFFLE(a) X##a = _mm_shuffle_epi8(X##a, rev)
    314 #define RED_MUL_LOW(a) H##a##_X##a##_lo = _mm_clmulepi64_si128(H##a, X##a, 0x00)
    315 #define RED_MUL_HIGH(a) H##a##_X##a##_hi = _mm_clmulepi64_si128(H##a, X##a, 0x11)
    316 #define RED_MUL_MID(a)                          \
    317     tmp##a = _mm_shuffle_epi32(H##a, 0x4e);     \
    318     tmp##a##B = _mm_shuffle_epi32(X##a, 0x4e);  \
    319     tmp##a = _mm_xor_si128(tmp##a, H##a);       \
    320     tmp##a##B = _mm_xor_si128(tmp##a##B, X##a); \
    321     tmp##a = _mm_clmulepi64_si128(tmp##a, tmp##a##B, 0x00)
    322 
    323 #define MULREDUCE4(rev, H0_, H1_, H2_, H3_, X0_, X1_, X2_, X3_, accv) \
    324 do { \
    325     MAKE4(RED_DECL); \
    326     __m128i lo, hi; \
    327     __m128i tmp8, tmp9; \
    328     __m128i H0 = H0_; \
    329     __m128i H1 = H1_; \
    330     __m128i H2 = H2_; \
    331     __m128i H3 = H3_; \
    332     __m128i X0 = X0_; \
    333     __m128i X1 = X1_; \
    334     __m128i X2 = X2_; \
    335     __m128i X3 = X3_; \
    336 \
    337 /* byte-revert the inputs & xor the first one into the accumulator */ \
    338 \
    339     MAKE4(RED_SHUFFLE); \
    340     X3 = _mm_xor_si128(X3, accv); \
    341 \
    342 /* 4 low H*X (x0*h0) */ \
    343 \
    344     MAKE4(RED_MUL_LOW); \
    345     lo = _mm_xor_si128(H0_X0_lo, H1_X1_lo); \
    346     lo = _mm_xor_si128(lo, H2_X2_lo); \
    347     lo = _mm_xor_si128(lo, H3_X3_lo); \
    348 \
    349 /* 4 high H*X (x1*h1) */ \
    350 \
    351     MAKE4(RED_MUL_HIGH); \
    352     hi = _mm_xor_si128(H0_X0_hi, H1_X1_hi); \
    353     hi = _mm_xor_si128(hi, H2_X2_hi); \
    354     hi = _mm_xor_si128(hi, H3_X3_hi); \
    355 \
    356 /* 4 middle H*X, using Karatsuba, i.e. \
    357      x1*h0+x0*h1 =(x1+x0)*(h1+h0)-x1*h1-x0*h0 \
    358      we already have all x1y1 & x0y0 (accumulated in hi & lo) \
    359      (0 is low half and 1 is high half) \
    360   */ \
    361 /* permute the high and low 64 bits in H1 & X1, \
    362      so create (h0,h1) from (h1,h0) and (x0,x1) from (x1,x0), \
    363      then compute (h0+h1,h1+h0) and (x0+x1,x1+x0), \
    364      and finally multiply \
    365   */ \
    366     MAKE4(RED_MUL_MID); \
    367 \
    368 /* substracts x1*h1 and x0*h0 */ \
    369     tmp0 = _mm_xor_si128(tmp0, lo); \
    370     tmp0 = _mm_xor_si128(tmp0, hi); \
    371     tmp0 = _mm_xor_si128(tmp1, tmp0); \
    372     tmp0 = _mm_xor_si128(tmp2, tmp0); \
    373     tmp0 = _mm_xor_si128(tmp3, tmp0);\
    374 \
    375     /* reduction */ \
    376     tmp0B = _mm_slli_si128(tmp0, 8); \
    377     tmp0 = _mm_srli_si128(tmp0, 8); \
    378     lo = _mm_xor_si128(tmp0B, lo); \
    379     hi = _mm_xor_si128(tmp0, hi); \
    380     tmp3 = lo; \
    381     tmp2B = hi; \
    382     tmp3B = _mm_srli_epi32(tmp3, 31); \
    383     tmp8 = _mm_srli_epi32(tmp2B, 31); \
    384     tmp3 = _mm_slli_epi32(tmp3, 1); \
    385     tmp2B = _mm_slli_epi32(tmp2B, 1); \
    386     tmp9 = _mm_srli_si128(tmp3B, 12); \
    387     tmp8 = _mm_slli_si128(tmp8, 4); \
    388     tmp3B = _mm_slli_si128(tmp3B, 4); \
    389     tmp3 = _mm_or_si128(tmp3, tmp3B); \
    390     tmp2B = _mm_or_si128(tmp2B, tmp8); \
    391     tmp2B = _mm_or_si128(tmp2B, tmp9); \
    392     tmp3B = _mm_slli_epi32(tmp3, 31); \
    393     tmp8 = _mm_slli_epi32(tmp3, 30); \
    394     tmp9 = _mm_slli_epi32(tmp3, 25); \
    395     tmp3B = _mm_xor_si128(tmp3B, tmp8); \
    396     tmp3B = _mm_xor_si128(tmp3B, tmp9); \
    397     tmp8 = _mm_srli_si128(tmp3B, 4); \
    398     tmp3B = _mm_slli_si128(tmp3B, 12); \
    399     tmp3 = _mm_xor_si128(tmp3, tmp3B); \
    400     tmp2 = _mm_srli_epi32(tmp3, 1); \
    401     tmp0B = _mm_srli_epi32(tmp3, 2); \
    402     tmp1B = _mm_srli_epi32(tmp3, 7); \
    403     tmp2 = _mm_xor_si128(tmp2, tmp0B); \
    404     tmp2 = _mm_xor_si128(tmp2, tmp1B); \
    405     tmp2 = _mm_xor_si128(tmp2, tmp8); \
    406     tmp3 = _mm_xor_si128(tmp3, tmp2); \
    407     tmp2B = _mm_xor_si128(tmp2B, tmp3); \
    408 \
    409     accv = tmp2B; \
    410 } while(0)
    411 
    412 #define XORx(a)                                                       \
    413         temp##a = _mm_xor_si128(temp##a,                              \
    414                                 _mm_loadu_si128((const __m128i *) (in + a * 16)))
    415 
    416 #define LOADx(a)                                                      \
    417     __m128i in##a = _mm_loadu_si128((const __m128i *) (in + a * 16))
    418 
    419 /* full encrypt & checksum 8 blocks at once */
    420 #define aesni_encrypt8full(out_, n_, rkeys, in_, accum, hv_, h2v_, h3v_, h4v_, rev) \
    421 do { \
    422     unsigned char *out = out_; \
    423     uint32_t *n = n_; \
    424     const unsigned char *in = in_; \
    425     const __m128i hv = hv_; \
    426     const __m128i h2v = h2v_; \
    427     const __m128i h3v = h3v_; \
    428     const __m128i h4v = h4v_; \
    429     const __m128i pt = _mm_set_epi8(12, 13, 14, 15, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \
    430     __m128i       accv_; \
    431     int           roundctr; \
    432     \
    433     MAKE8(NVDECLx); \
    434     MAKE8(TEMPDECLx); \
    435     MAKE8(NVx); \
    436     MAKE8(TEMPx); \
    437     for (roundctr = 1; roundctr < 14; roundctr++) { \
    438         MAKE8(AESENCx); \
    439     } \
    440     MAKE8(AESENCLASTx); \
    441     MAKE8(XORx); \
    442     MAKE8(STOREx); \
    443     accv_ = _mm_load_si128((const __m128i *) accum); \
    444     MULREDUCE4(rev, hv, h2v, h3v, h4v, temp3, temp2, temp1, temp0, accv_); \
    445     MULREDUCE4(rev, hv, h2v, h3v, h4v, temp7, temp6, temp5, temp4, accv_); \
    446     _mm_store_si128((__m128i *) accum, accv_); \
    447 } while(0)
    448 
    449 /* checksum 8 blocks at once */
    450 #define aesni_addmul8full(in_, accum, hv_, h2v_, h3v_, h4v_, rev) \
    451 do { \
    452     const unsigned char *in = in_; \
    453     const __m128i hv = hv_; \
    454     const __m128i h2v = h2v_; \
    455     const __m128i h3v = h3v_; \
    456     const __m128i h4v = h4v_; \
    457     __m128i accv_; \
    458     \
    459     MAKE8(LOADx); \
    460     accv_ = _mm_load_si128((const __m128i *) accum); \
    461     MULREDUCE4(rev, hv, h2v, h3v, h4v, in3, in2, in1, in0, accv_); \
    462     MULREDUCE4(rev, hv, h2v, h3v, h4v, in7, in6, in5, in4, accv_); \
    463     _mm_store_si128((__m128i *) accum, accv_); \
    464 } while(0)
    465 
    466 /* decrypt 8 blocks at once */
    467 #define aesni_decrypt8full(out_, n_, rkeys, in_) \
    468 do { \
    469     unsigned char       *out = out_; \
    470     uint32_t            *n = n_; \
    471     const unsigned char *in = in_; \
    472     const __m128i        pt = _mm_set_epi8(12, 13, 14, 15, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \
    473     int                  roundctr; \
    474 \
    475     MAKE8(NVDECLx); \
    476     MAKE8(TEMPDECLx); \
    477     MAKE8(NVx); \
    478     MAKE8(TEMPx); \
    479     for (roundctr = 1; roundctr < 14; roundctr++) { \
    480         MAKE8(AESENCx); \
    481     } \
    482     MAKE8(AESENCLASTx); \
    483     MAKE8(XORx); \
    484     MAKE8(STOREx); \
    485 } while(0)
    486 
    487 int
    488 crypto_aead_aes256gcm_beforenm(crypto_aead_aes256gcm_state *ctx_,
    489                                const unsigned char *k)
    490 {
    491     context       *ctx = (context *) ctx_;
    492     __m128i       *rkeys = ctx->rkeys;
    493     __m128i        zero = _mm_setzero_si128();
    494     unsigned char *H = ctx->H;
    495 
    496     COMPILER_ASSERT((sizeof *ctx_) >= (sizeof *ctx));
    497     aesni_key256_expand(k, rkeys);
    498     aesni_encrypt1(H, zero, rkeys);
    499 
    500     return 0;
    501 }
    502 
    503 int
    504 crypto_aead_aes256gcm_encrypt_detached_afternm(unsigned char *c,
    505                                                unsigned char *mac, unsigned long long *maclen_p,
    506                                                const unsigned char *m, unsigned long long mlen,
    507                                                const unsigned char *ad, unsigned long long adlen,
    508                                                const unsigned char *nsec,
    509                                                const unsigned char *npub,
    510                                                const crypto_aead_aes256gcm_state *ctx_)
    511 {
    512     const __m128i       rev = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
    513     const context      *ctx = (const context *) ctx_;
    514     const __m128i      *rkeys = ctx->rkeys;
    515     __m128i             Hv, H2v, H3v, H4v, accv;
    516     unsigned long long  i, j;
    517     unsigned long long  adlen_rnd64 = adlen & ~63ULL;
    518     unsigned long long  mlen_rnd128 = mlen & ~127ULL;
    519     CRYPTO_ALIGN(16) uint32_t      n2[4];
    520     CRYPTO_ALIGN(16) unsigned char H[16];
    521     CRYPTO_ALIGN(16) unsigned char T[16];
    522     CRYPTO_ALIGN(16) unsigned char accum[16];
    523     CRYPTO_ALIGN(16) unsigned char fb[16];
    524 
    525     (void) nsec;
    526     memcpy(H, ctx->H, sizeof H);
    527     if (mlen > crypto_aead_aes256gcm_MESSAGEBYTES_MAX) {
    528         sodium_misuse(); /* LCOV_EXCL_LINE */
    529     }
    530     memcpy(&n2[0], npub, 3 * 4);
    531     n2[3] = 0x01000000;
    532     aesni_encrypt1(T, _mm_load_si128((const __m128i *) n2), rkeys);
    533     {
    534         uint64_t x;
    535         x = _bswap64((uint64_t) (8 * adlen));
    536         memcpy(&fb[0], &x, sizeof x);
    537         x = _bswap64((uint64_t) (8 * mlen));
    538         memcpy(&fb[8], &x, sizeof x);
    539     }
    540     /* we store H (and it's power) byte-reverted once and for all */
    541     Hv = _mm_shuffle_epi8(_mm_load_si128((const __m128i *) H), rev);
    542     _mm_store_si128((__m128i *) H, Hv);
    543     H2v = mulv(Hv, Hv);
    544     H3v = mulv(H2v, Hv);
    545     H4v = mulv(H3v, Hv);
    546 
    547     accv = _mm_setzero_si128();
    548     /* unrolled by 4 GCM (by 8 doesn't improve using MULREDUCE4) */
    549     for (i = 0; i < adlen_rnd64; i += 64) {
    550         __m128i X4_ = _mm_loadu_si128((const __m128i *) (ad + i + 0));
    551         __m128i X3_ = _mm_loadu_si128((const __m128i *) (ad + i + 16));
    552         __m128i X2_ = _mm_loadu_si128((const __m128i *) (ad + i + 32));
    553         __m128i X1_ = _mm_loadu_si128((const __m128i *) (ad + i + 48));
    554         MULREDUCE4(rev, Hv, H2v, H3v, H4v, X1_, X2_, X3_, X4_, accv);
    555     }
    556     _mm_store_si128((__m128i *) accum, accv);
    557 
    558     /* GCM remainder loop */
    559     for (i = adlen_rnd64; i < adlen; i += 16) {
    560         unsigned int blocklen = 16;
    561 
    562         if (i + (unsigned long long) blocklen > adlen) {
    563             blocklen = (unsigned int) (adlen - i);
    564         }
    565         addmul(accum, ad + i, blocklen, H);
    566     }
    567 
    568 /* this only does 8 full blocks, so no fancy bounds checking is necessary*/
    569 #define LOOPRND128                                                                                   \
    570     do {                                                                                             \
    571         const int iter = 8;                                                                          \
    572         const int lb = iter * 16;                                                                    \
    573                                                                                                      \
    574         for (i = 0; i < mlen_rnd128; i += lb) {                                                      \
    575             aesni_encrypt8full(c + i, n2, rkeys, m + i, accum, Hv, H2v, H3v, H4v, rev);              \
    576         }                                                                                            \
    577     } while(0)
    578 
    579 /* remainder loop, with the slower GCM update to accommodate partial blocks */
    580 #define LOOPRMD128                                           \
    581     do {                                                     \
    582         const int iter = 8;                                  \
    583         const int lb = iter * 16;                            \
    584                                                              \
    585         for (i = mlen_rnd128; i < mlen; i += lb) {           \
    586             CRYPTO_ALIGN(16) unsigned char outni[8 * 16];    \
    587             unsigned long long mj = lb;                      \
    588                                                              \
    589             aesni_encrypt8(outni, n2, rkeys);                \
    590             if ((i + mj) >= mlen) {                          \
    591                 mj = mlen - i;                               \
    592             }                                                \
    593             for (j = 0; j < mj; j++) {                       \
    594                 c[i + j] = m[i + j] ^ outni[j];              \
    595             }                                                \
    596             for (j = 0; j < mj; j += 16) {                   \
    597                 unsigned int bl = 16;                        \
    598                                                              \
    599                 if (j + (unsigned long long) bl >= mj) {     \
    600                     bl = (unsigned int) (mj - j);            \
    601                 }                                            \
    602                 addmul(accum, c + i + j, bl, H);             \
    603             }                                                \
    604         }                                                    \
    605     } while(0)
    606 
    607     n2[3] &= 0x00ffffff;
    608     COUNTER_INC2(n2);
    609     LOOPRND128;
    610     LOOPRMD128;
    611 
    612     addmul(accum, fb, 16, H);
    613 
    614     for (i = 0; i < 16; ++i) {
    615         mac[i] = T[i] ^ accum[15 - i];
    616     }
    617     if (maclen_p != NULL) {
    618         *maclen_p = 16;
    619     }
    620     return 0;
    621 }
    622 
    623 int
    624 crypto_aead_aes256gcm_encrypt_afternm(unsigned char *c, unsigned long long *clen_p,
    625                                       const unsigned char *m, unsigned long long mlen,
    626                                       const unsigned char *ad, unsigned long long adlen,
    627                                       const unsigned char *nsec,
    628                                       const unsigned char *npub,
    629                                       const crypto_aead_aes256gcm_state *ctx_)
    630 {
    631     int ret = crypto_aead_aes256gcm_encrypt_detached_afternm(c,
    632                                                              c + mlen, NULL,
    633                                                              m, mlen,
    634                                                              ad, adlen,
    635                                                              nsec, npub, ctx_);
    636     if (clen_p != NULL) {
    637         *clen_p = mlen + crypto_aead_aes256gcm_ABYTES;
    638     }
    639     return ret;
    640 }
    641 
    642 int
    643 crypto_aead_aes256gcm_decrypt_detached_afternm(unsigned char *m, unsigned char *nsec,
    644                                                const unsigned char *c, unsigned long long clen,
    645                                                const unsigned char *mac,
    646                                                const unsigned char *ad, unsigned long long adlen,
    647                                                const unsigned char *npub,
    648                                                const crypto_aead_aes256gcm_state *ctx_)
    649 {
    650     const __m128i       rev = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
    651     const context      *ctx = (const context *) ctx_;
    652     const __m128i      *rkeys = ctx->rkeys;
    653     __m128i             Hv, H2v, H3v, H4v, accv;
    654     unsigned long long  i, j;
    655     unsigned long long  adlen_rnd64 = adlen & ~63ULL;
    656     unsigned long long  mlen;
    657     unsigned long long  mlen_rnd128;
    658     CRYPTO_ALIGN(16) uint32_t      n2[4];
    659     CRYPTO_ALIGN(16) unsigned char H[16];
    660     CRYPTO_ALIGN(16) unsigned char T[16];
    661     CRYPTO_ALIGN(16) unsigned char accum[16];
    662     CRYPTO_ALIGN(16) unsigned char fb[16];
    663 
    664     (void) nsec;
    665     if (clen > crypto_aead_aes256gcm_MESSAGEBYTES_MAX) {
    666         sodium_misuse(); /* LCOV_EXCL_LINE */
    667     }
    668     mlen = clen;
    669 
    670     memcpy(&n2[0], npub, 3 * 4);
    671     n2[3] = 0x01000000;
    672     aesni_encrypt1(T, _mm_load_si128((const __m128i *) n2), rkeys);
    673 
    674     {
    675         uint64_t x;
    676         x = _bswap64((uint64_t)(8 * adlen));
    677         memcpy(&fb[0], &x, sizeof x);
    678         x = _bswap64((uint64_t)(8 * mlen));
    679         memcpy(&fb[8], &x, sizeof x);
    680     }
    681 
    682     memcpy(H, ctx->H, sizeof H);
    683     Hv = _mm_shuffle_epi8(_mm_load_si128((const __m128i *) H), rev);
    684     _mm_store_si128((__m128i *) H, Hv);
    685     H2v = mulv(Hv, Hv);
    686     H3v = mulv(H2v, Hv);
    687     H4v = mulv(H3v, Hv);
    688 
    689     accv = _mm_setzero_si128();
    690     for (i = 0; i < adlen_rnd64; i += 64) {
    691         __m128i X4_ = _mm_loadu_si128((const __m128i *) (ad + i + 0));
    692         __m128i X3_ = _mm_loadu_si128((const __m128i *) (ad + i + 16));
    693         __m128i X2_ = _mm_loadu_si128((const __m128i *) (ad + i + 32));
    694         __m128i X1_ = _mm_loadu_si128((const __m128i *) (ad + i + 48));
    695         MULREDUCE4(rev, Hv, H2v, H3v, H4v, X1_, X2_, X3_, X4_, accv);
    696     }
    697     _mm_store_si128((__m128i *) accum, accv);
    698 
    699     for (i = adlen_rnd64; i < adlen; i += 16) {
    700         unsigned int blocklen = 16;
    701         if (i + (unsigned long long) blocklen > adlen) {
    702             blocklen = (unsigned int) (adlen - i);
    703         }
    704         addmul(accum, ad + i, blocklen, H);
    705     }
    706 
    707     mlen_rnd128 = mlen & ~127ULL;
    708 
    709 #define LOOPACCUMDRND128                                                                          \
    710     do {                                                                                          \
    711         const int iter = 8;                                                                       \
    712         const int lb = iter * 16;                                                                 \
    713         for (i = 0; i < mlen_rnd128; i += lb) {                                                   \
    714             aesni_addmul8full(c + i, accum, Hv, H2v, H3v, H4v, rev);                              \
    715         }                                                                                         \
    716     } while(0)
    717 
    718 #define LOOPDRND128                                                                               \
    719     do {                                                                                          \
    720         const int iter = 8;                                                                       \
    721         const int lb = iter * 16;                                                                 \
    722                                                                                                   \
    723         for (i = 0; i < mlen_rnd128; i += lb) {                                                   \
    724             aesni_decrypt8full(m + i, n2, rkeys, c + i);                                          \
    725         }                                                                                         \
    726     } while(0)
    727 
    728 #define LOOPACCUMDRMD128                                     \
    729     do {                                                     \
    730         const int iter = 8;                                  \
    731         const int lb = iter * 16;                            \
    732                                                              \
    733         for (i = mlen_rnd128; i < mlen; i += lb) {           \
    734             unsigned long long mj = lb;                      \
    735                                                              \
    736             if ((i + mj) >= mlen) {                          \
    737                 mj = mlen - i;                               \
    738             }                                                \
    739             for (j = 0; j < mj; j += 16) {                   \
    740                 unsigned int bl = 16;                        \
    741                                                              \
    742                 if (j + (unsigned long long) bl >= mj) {     \
    743                     bl = (unsigned int) (mj - j);            \
    744                 }                                            \
    745                 addmul(accum, c + i + j, bl, H);             \
    746             }                                                \
    747         }                                                    \
    748     } while(0)
    749 
    750 #define LOOPDRMD128                                          \
    751     do {                                                     \
    752         const int iter = 8;                                  \
    753         const int lb = iter * 16;                            \
    754                                                              \
    755         for (i = mlen_rnd128; i < mlen; i += lb) {           \
    756             CRYPTO_ALIGN(16) unsigned char outni[8 * 16];    \
    757             unsigned long long mj = lb;                      \
    758                                                              \
    759             if ((i + mj) >= mlen) {                          \
    760                 mj = mlen - i;                               \
    761             }                                                \
    762             aesni_encrypt8(outni, n2, rkeys);                \
    763             for (j = 0; j < mj; j++) {                       \
    764                 m[i + j] = c[i + j] ^ outni[j];              \
    765             }                                                \
    766         }                                                    \
    767     } while(0)
    768 
    769     n2[3] &= 0x00ffffff;
    770 
    771     COUNTER_INC2(n2);
    772     LOOPACCUMDRND128;
    773     LOOPACCUMDRMD128;
    774     addmul(accum, fb, 16, H);
    775     {
    776         unsigned char d = 0;
    777 
    778         for (i = 0; i < 16; i++) {
    779             d |= (mac[i] ^ (T[i] ^ accum[15 - i]));
    780         }
    781         if (d != 0) {
    782             if (m != NULL) {
    783                 memset(m, 0, mlen);
    784             }
    785             return -1;
    786         }
    787         if (m == NULL) {
    788             return 0;
    789         }
    790     }
    791     n2[3] = 0U;
    792     COUNTER_INC2(n2);
    793     LOOPDRND128;
    794     LOOPDRMD128;
    795 
    796     return 0;
    797 }
    798 
    799 int
    800 crypto_aead_aes256gcm_decrypt_afternm(unsigned char *m, unsigned long long *mlen_p,
    801                                       unsigned char *nsec,
    802                                       const unsigned char *c, unsigned long long clen,
    803                                       const unsigned char *ad, unsigned long long adlen,
    804                                       const unsigned char *npub,
    805                                       const crypto_aead_aes256gcm_state *ctx_)
    806 {
    807     unsigned long long mlen = 0ULL;
    808     int                ret = -1;
    809 
    810     if (clen >= crypto_aead_aes256gcm_ABYTES) {
    811         ret = crypto_aead_aes256gcm_decrypt_detached_afternm
    812             (m, nsec, c, clen - crypto_aead_aes256gcm_ABYTES,
    813              c + clen - crypto_aead_aes256gcm_ABYTES,
    814              ad, adlen, npub, ctx_);
    815     }
    816     if (mlen_p != NULL) {
    817         if (ret == 0) {
    818             mlen = clen - crypto_aead_aes256gcm_ABYTES;
    819         }
    820         *mlen_p = mlen;
    821     }
    822     return ret;
    823 }
    824 
    825 int
    826 crypto_aead_aes256gcm_encrypt_detached(unsigned char *c,
    827                                        unsigned char *mac,
    828                                        unsigned long long *maclen_p,
    829                                        const unsigned char *m,
    830                                        unsigned long long mlen,
    831                                        const unsigned char *ad,
    832                                        unsigned long long adlen,
    833                                        const unsigned char *nsec,
    834                                        const unsigned char *npub,
    835                                        const unsigned char *k)
    836 {
    837     CRYPTO_ALIGN(16) crypto_aead_aes256gcm_state ctx;
    838 
    839     crypto_aead_aes256gcm_beforenm(&ctx, k);
    840 
    841     return crypto_aead_aes256gcm_encrypt_detached_afternm
    842         (c, mac, maclen_p, m, mlen, ad, adlen, nsec, npub,
    843             (const crypto_aead_aes256gcm_state *) &ctx);
    844 }
    845 
    846 int
    847 crypto_aead_aes256gcm_encrypt(unsigned char *c,
    848                               unsigned long long *clen_p,
    849                               const unsigned char *m,
    850                               unsigned long long mlen,
    851                               const unsigned char *ad,
    852                               unsigned long long adlen,
    853                               const unsigned char *nsec,
    854                               const unsigned char *npub,
    855                               const unsigned char *k)
    856 {
    857     CRYPTO_ALIGN(16) crypto_aead_aes256gcm_state ctx;
    858     int ret;
    859 
    860     crypto_aead_aes256gcm_beforenm(&ctx, k);
    861 
    862     ret = crypto_aead_aes256gcm_encrypt_afternm
    863         (c, clen_p, m, mlen, ad, adlen, nsec, npub,
    864             (const crypto_aead_aes256gcm_state *) &ctx);
    865     sodium_memzero(ctx, sizeof ctx);
    866 
    867     return ret;
    868 }
    869 
    870 int
    871 crypto_aead_aes256gcm_decrypt_detached(unsigned char *m,
    872                                        unsigned char *nsec,
    873                                        const unsigned char *c,
    874                                        unsigned long long clen,
    875                                        const unsigned char *mac,
    876                                        const unsigned char *ad,
    877                                        unsigned long long adlen,
    878                                        const unsigned char *npub,
    879                                        const unsigned char *k)
    880 {
    881     CRYPTO_ALIGN(16) crypto_aead_aes256gcm_state ctx;
    882 
    883     crypto_aead_aes256gcm_beforenm(&ctx, k);
    884 
    885     return crypto_aead_aes256gcm_decrypt_detached_afternm
    886         (m, nsec, c, clen, mac, ad, adlen, npub,
    887             (const crypto_aead_aes256gcm_state *) &ctx);
    888 }
    889 
    890 int
    891 crypto_aead_aes256gcm_decrypt(unsigned char *m,
    892                               unsigned long long *mlen_p,
    893                               unsigned char *nsec,
    894                               const unsigned char *c,
    895                               unsigned long long clen,
    896                               const unsigned char *ad,
    897                               unsigned long long adlen,
    898                               const unsigned char *npub,
    899                               const unsigned char *k)
    900 {
    901     CRYPTO_ALIGN(16) crypto_aead_aes256gcm_state ctx;
    902     int ret;
    903 
    904     crypto_aead_aes256gcm_beforenm(&ctx, k);
    905 
    906     ret = crypto_aead_aes256gcm_decrypt_afternm
    907         (m, mlen_p, nsec, c, clen, ad, adlen, npub,
    908          (const crypto_aead_aes256gcm_state *) &ctx);
    909     sodium_memzero(ctx, sizeof ctx);
    910 
    911     return ret;
    912 }
    913 
    914 int
    915 crypto_aead_aes256gcm_is_available(void)
    916 {
    917     return sodium_runtime_has_pclmul() & sodium_runtime_has_aesni();
    918 }
    919 
    920 #else
    921 
    922 int
    923 crypto_aead_aes256gcm_encrypt_detached(unsigned char *c,
    924                                        unsigned char *mac,
    925                                        unsigned long long *maclen_p,
    926                                        const unsigned char *m,
    927                                        unsigned long long mlen,
    928                                        const unsigned char *ad,
    929                                        unsigned long long adlen,
    930                                        const unsigned char *nsec,
    931                                        const unsigned char *npub,
    932                                        const unsigned char *k)
    933 {
    934     errno = ENOSYS;
    935     return -1;
    936 }
    937 
    938 int
    939 crypto_aead_aes256gcm_encrypt(unsigned char *c, unsigned long long *clen_p,
    940                               const unsigned char *m, unsigned long long mlen,
    941                               const unsigned char *ad, unsigned long long adlen,
    942                               const unsigned char *nsec, const unsigned char *npub,
    943                               const unsigned char *k)
    944 {
    945     errno = ENOSYS;
    946     return -1;
    947 }
    948 
    949 int
    950 crypto_aead_aes256gcm_decrypt_detached(unsigned char *m,
    951                                        unsigned char *nsec,
    952                                        const unsigned char *c,
    953                                        unsigned long long clen,
    954                                        const unsigned char *mac,
    955                                        const unsigned char *ad,
    956                                        unsigned long long adlen,
    957                                        const unsigned char *npub,
    958                                        const unsigned char *k)
    959 {
    960     errno = ENOSYS;
    961     return -1;
    962 }
    963 
    964 int
    965 crypto_aead_aes256gcm_decrypt(unsigned char *m, unsigned long long *mlen_p,
    966                               unsigned char *nsec, const unsigned char *c,
    967                               unsigned long long clen, const unsigned char *ad,
    968                               unsigned long long adlen, const unsigned char *npub,
    969                               const unsigned char *k)
    970 {
    971     errno = ENOSYS;
    972     return -1;
    973 }
    974 
    975 int
    976 crypto_aead_aes256gcm_beforenm(crypto_aead_aes256gcm_state *ctx_,
    977                                const unsigned char *k)
    978 {
    979     errno = ENOSYS;
    980     return -1;
    981 }
    982 
    983 int
    984 crypto_aead_aes256gcm_encrypt_detached_afternm(unsigned char *c,
    985                                                unsigned char *mac, unsigned long long *maclen_p,
    986                                                const unsigned char *m, unsigned long long mlen,
    987                                                const unsigned char *ad, unsigned long long adlen,
    988                                                const unsigned char *nsec,
    989                                                const unsigned char *npub,
    990                                                const crypto_aead_aes256gcm_state *ctx_)
    991 {
    992     errno = ENOSYS;
    993     return -1;
    994 }
    995 
    996 int
    997 crypto_aead_aes256gcm_encrypt_afternm(unsigned char *c, unsigned long long *clen_p,
    998                                       const unsigned char *m, unsigned long long mlen,
    999                                       const unsigned char *ad, unsigned long long adlen,
   1000                                       const unsigned char *nsec, const unsigned char *npub,
   1001                                       const crypto_aead_aes256gcm_state *ctx_)
   1002 {
   1003     errno = ENOSYS;
   1004     return -1;
   1005 }
   1006 
   1007 int
   1008 crypto_aead_aes256gcm_decrypt_detached_afternm(unsigned char *m, unsigned char *nsec,
   1009                                                const unsigned char *c, unsigned long long clen,
   1010                                                const unsigned char *mac,
   1011                                                const unsigned char *ad, unsigned long long adlen,
   1012                                                const unsigned char *npub,
   1013                                                const crypto_aead_aes256gcm_state *ctx_)
   1014 {
   1015     errno = ENOSYS;
   1016     return -1;
   1017 }
   1018 
   1019 int
   1020 crypto_aead_aes256gcm_decrypt_afternm(unsigned char *m, unsigned long long *mlen_p,
   1021                                       unsigned char *nsec,
   1022                                       const unsigned char *c, unsigned long long clen,
   1023                                       const unsigned char *ad, unsigned long long adlen,
   1024                                       const unsigned char *npub,
   1025                                       const crypto_aead_aes256gcm_state *ctx_)
   1026 {
   1027     errno = ENOSYS;
   1028     return -1;
   1029 }
   1030 
   1031 int
   1032 crypto_aead_aes256gcm_is_available(void)
   1033 {
   1034     return 0;
   1035 }
   1036 
   1037 #endif
   1038 
   1039 size_t
   1040 crypto_aead_aes256gcm_keybytes(void)
   1041 {
   1042     return crypto_aead_aes256gcm_KEYBYTES;
   1043 }
   1044 
   1045 size_t
   1046 crypto_aead_aes256gcm_nsecbytes(void)
   1047 {
   1048     return crypto_aead_aes256gcm_NSECBYTES;
   1049 }
   1050 
   1051 size_t
   1052 crypto_aead_aes256gcm_npubbytes(void)
   1053 {
   1054     return crypto_aead_aes256gcm_NPUBBYTES;
   1055 }
   1056 
   1057 size_t
   1058 crypto_aead_aes256gcm_abytes(void)
   1059 {
   1060     return crypto_aead_aes256gcm_ABYTES;
   1061 }
   1062 
   1063 size_t
   1064 crypto_aead_aes256gcm_statebytes(void)
   1065 {
   1066     return (sizeof(crypto_aead_aes256gcm_state) + (size_t) 15U) & ~(size_t) 15U;
   1067 }
   1068 
   1069 size_t
   1070 crypto_aead_aes256gcm_messagebytes_max(void)
   1071 {
   1072     return crypto_aead_aes256gcm_MESSAGEBYTES_MAX;
   1073 }
   1074 
   1075 void
   1076 crypto_aead_aes256gcm_keygen(unsigned char k[crypto_aead_aes256gcm_KEYBYTES])
   1077 {
   1078     randombytes_buf(k, crypto_aead_aes256gcm_KEYBYTES);
   1079 }
   1080