Home | History | Annotate | Line # | Download | only in libiberty
sha1.c revision 1.10
      1   1.1       mrg /* sha1.c - Functions to compute SHA1 message digest of files or
      2   1.1       mrg    memory blocks according to the NIST specification FIPS-180-1.
      3   1.1       mrg 
      4   1.9       mrg    Copyright (C) 2000-2024 Free Software Foundation, Inc.
      5   1.1       mrg 
      6   1.1       mrg    This program is free software; you can redistribute it and/or modify it
      7   1.1       mrg    under the terms of the GNU General Public License as published by the
      8   1.1       mrg    Free Software Foundation; either version 2, or (at your option) any
      9   1.1       mrg    later version.
     10   1.1       mrg 
     11   1.1       mrg    This program is distributed in the hope that it will be useful,
     12   1.1       mrg    but WITHOUT ANY WARRANTY; without even the implied warranty of
     13   1.1       mrg    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     14   1.1       mrg    GNU General Public License for more details.
     15   1.1       mrg 
     16   1.1       mrg    You should have received a copy of the GNU General Public License
     17   1.1       mrg    along with this program; if not, write to the Free Software Foundation,
     18   1.1       mrg    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
     19   1.1       mrg 
     20   1.1       mrg /* Written by Scott G. Miller
     21   1.1       mrg    Credits:
     22   1.1       mrg       Robert Klep <robert (at) ilse.nl>  -- Expansion function fix
     23   1.1       mrg */
     24   1.1       mrg 
     25   1.1       mrg #include <config.h>
     26   1.1       mrg 
     27   1.1       mrg #include "sha1.h"
     28   1.1       mrg 
     29   1.1       mrg #include <stddef.h>
     30   1.1       mrg #include <string.h>
     31   1.1       mrg 
     32   1.9       mrg #ifdef HAVE_X86_SHA1_HW_SUPPORT
     33   1.9       mrg # include <x86intrin.h>
     34   1.9       mrg # include <cpuid.h>
     35   1.9       mrg #endif
     36   1.9       mrg 
     37   1.1       mrg #if USE_UNLOCKED_IO
     38   1.1       mrg # include "unlocked-io.h"
     39   1.1       mrg #endif
     40   1.1       mrg 
     41   1.1       mrg #ifdef WORDS_BIGENDIAN
     42   1.1       mrg # define SWAP(n) (n)
     43   1.1       mrg #else
     44   1.1       mrg # define SWAP(n) \
     45   1.1       mrg     (((n) << 24) | (((n) & 0xff00) << 8) | (((n) >> 8) & 0xff00) | ((n) >> 24))
     46   1.1       mrg #endif
     47   1.1       mrg 
     48   1.1       mrg #define BLOCKSIZE 4096
     49   1.1       mrg #if BLOCKSIZE % 64 != 0
     50   1.1       mrg # error "invalid BLOCKSIZE"
     51   1.1       mrg #endif
     52   1.1       mrg 
     53   1.1       mrg /* This array contains the bytes used to pad the buffer to the next
     54   1.1       mrg    64-byte boundary.  (RFC 1321, 3.1: Step 1)  */
     55   1.1       mrg static const unsigned char fillbuf[64] = { 0x80, 0 /* , 0, 0, ...  */ };
     56   1.1       mrg 
     57   1.1       mrg 
     58   1.1       mrg /* Take a pointer to a 160 bit block of data (five 32 bit ints) and
     59   1.1       mrg    initialize it to the start constants of the SHA1 algorithm.  This
     60   1.1       mrg    must be called before using hash in the call to sha1_hash.  */
     61   1.1       mrg void
     62   1.1       mrg sha1_init_ctx (struct sha1_ctx *ctx)
     63   1.1       mrg {
     64   1.1       mrg   ctx->A = 0x67452301;
     65   1.1       mrg   ctx->B = 0xefcdab89;
     66   1.1       mrg   ctx->C = 0x98badcfe;
     67   1.1       mrg   ctx->D = 0x10325476;
     68   1.1       mrg   ctx->E = 0xc3d2e1f0;
     69   1.1       mrg 
     70   1.1       mrg   ctx->total[0] = ctx->total[1] = 0;
     71   1.1       mrg   ctx->buflen = 0;
     72   1.1       mrg }
     73   1.1       mrg 
     74   1.1       mrg /* Put result from CTX in first 20 bytes following RESBUF.  The result
     75   1.1       mrg    must be in little endian byte order.
     76   1.1       mrg 
     77   1.1       mrg    IMPORTANT: On some systems it is required that RESBUF is correctly
     78   1.1       mrg    aligned for a 32-bit value.  */
     79   1.1       mrg void *
     80   1.1       mrg sha1_read_ctx (const struct sha1_ctx *ctx, void *resbuf)
     81   1.1       mrg {
     82   1.1       mrg   ((sha1_uint32 *) resbuf)[0] = SWAP (ctx->A);
     83   1.1       mrg   ((sha1_uint32 *) resbuf)[1] = SWAP (ctx->B);
     84   1.1       mrg   ((sha1_uint32 *) resbuf)[2] = SWAP (ctx->C);
     85   1.1       mrg   ((sha1_uint32 *) resbuf)[3] = SWAP (ctx->D);
     86   1.1       mrg   ((sha1_uint32 *) resbuf)[4] = SWAP (ctx->E);
     87   1.1       mrg 
     88   1.1       mrg   return resbuf;
     89   1.1       mrg }
     90   1.1       mrg 
     91   1.1       mrg /* Process the remaining bytes in the internal buffer and the usual
     92   1.1       mrg    prolog according to the standard and write the result to RESBUF.
     93   1.1       mrg 
     94   1.1       mrg    IMPORTANT: On some systems it is required that RESBUF is correctly
     95   1.1       mrg    aligned for a 32-bit value.  */
     96   1.1       mrg void *
     97   1.1       mrg sha1_finish_ctx (struct sha1_ctx *ctx, void *resbuf)
     98   1.1       mrg {
     99   1.1       mrg   /* Take yet unprocessed bytes into account.  */
    100   1.1       mrg   sha1_uint32 bytes = ctx->buflen;
    101   1.1       mrg   size_t size = (bytes < 56) ? 64 / 4 : 64 * 2 / 4;
    102   1.1       mrg 
    103   1.1       mrg   /* Now count remaining bytes.  */
    104   1.1       mrg   ctx->total[0] += bytes;
    105   1.1       mrg   if (ctx->total[0] < bytes)
    106   1.1       mrg     ++ctx->total[1];
    107   1.1       mrg 
    108   1.1       mrg   /* Put the 64-bit file length in *bits* at the end of the buffer.  */
    109   1.1       mrg   ctx->buffer[size - 2] = SWAP ((ctx->total[1] << 3) | (ctx->total[0] >> 29));
    110   1.1       mrg   ctx->buffer[size - 1] = SWAP (ctx->total[0] << 3);
    111   1.1       mrg 
    112   1.1       mrg   memcpy (&((char *) ctx->buffer)[bytes], fillbuf, (size - 2) * 4 - bytes);
    113   1.1       mrg 
    114   1.1       mrg   /* Process last bytes.  */
    115   1.1       mrg   sha1_process_block (ctx->buffer, size * 4, ctx);
    116   1.1       mrg 
    117   1.1       mrg   return sha1_read_ctx (ctx, resbuf);
    118   1.1       mrg }
    119   1.1       mrg 
    120   1.1       mrg /* Compute SHA1 message digest for bytes read from STREAM.  The
    121   1.1       mrg    resulting message digest number will be written into the 16 bytes
    122   1.1       mrg    beginning at RESBLOCK.  */
    123   1.1       mrg int
    124   1.1       mrg sha1_stream (FILE *stream, void *resblock)
    125   1.1       mrg {
    126   1.1       mrg   struct sha1_ctx ctx;
    127   1.1       mrg   char buffer[BLOCKSIZE + 72];
    128   1.1       mrg   size_t sum;
    129   1.1       mrg 
    130   1.1       mrg   /* Initialize the computation context.  */
    131   1.1       mrg   sha1_init_ctx (&ctx);
    132   1.1       mrg 
    133   1.1       mrg   /* Iterate over full file contents.  */
    134   1.1       mrg   while (1)
    135   1.1       mrg     {
    136   1.1       mrg       /* We read the file in blocks of BLOCKSIZE bytes.  One call of the
    137   1.1       mrg 	 computation function processes the whole buffer so that with the
    138   1.1       mrg 	 next round of the loop another block can be read.  */
    139   1.1       mrg       size_t n;
    140   1.1       mrg       sum = 0;
    141   1.1       mrg 
    142   1.1       mrg       /* Read block.  Take care for partial reads.  */
    143   1.1       mrg       while (1)
    144   1.1       mrg 	{
    145   1.1       mrg 	  n = fread (buffer + sum, 1, BLOCKSIZE - sum, stream);
    146   1.1       mrg 
    147   1.1       mrg 	  sum += n;
    148   1.1       mrg 
    149   1.1       mrg 	  if (sum == BLOCKSIZE)
    150   1.1       mrg 	    break;
    151   1.1       mrg 
    152   1.1       mrg 	  if (n == 0)
    153   1.1       mrg 	    {
    154   1.1       mrg 	      /* Check for the error flag IFF N == 0, so that we don't
    155   1.1       mrg 		 exit the loop after a partial read due to e.g., EAGAIN
    156   1.1       mrg 		 or EWOULDBLOCK.  */
    157   1.1       mrg 	      if (ferror (stream))
    158   1.1       mrg 		return 1;
    159   1.1       mrg 	      goto process_partial_block;
    160   1.1       mrg 	    }
    161   1.1       mrg 
    162   1.1       mrg 	  /* We've read at least one byte, so ignore errors.  But always
    163   1.1       mrg 	     check for EOF, since feof may be true even though N > 0.
    164   1.1       mrg 	     Otherwise, we could end up calling fread after EOF.  */
    165   1.1       mrg 	  if (feof (stream))
    166   1.1       mrg 	    goto process_partial_block;
    167   1.1       mrg 	}
    168   1.1       mrg 
    169   1.1       mrg       /* Process buffer with BLOCKSIZE bytes.  Note that
    170   1.1       mrg 			BLOCKSIZE % 64 == 0
    171   1.1       mrg        */
    172   1.1       mrg       sha1_process_block (buffer, BLOCKSIZE, &ctx);
    173   1.1       mrg     }
    174   1.1       mrg 
    175   1.1       mrg  process_partial_block:;
    176   1.1       mrg 
    177   1.1       mrg   /* Process any remaining bytes.  */
    178   1.1       mrg   if (sum > 0)
    179   1.1       mrg     sha1_process_bytes (buffer, sum, &ctx);
    180   1.1       mrg 
    181   1.1       mrg   /* Construct result in desired memory.  */
    182   1.1       mrg   sha1_finish_ctx (&ctx, resblock);
    183   1.1       mrg   return 0;
    184   1.1       mrg }
    185   1.1       mrg 
    186   1.1       mrg /* Compute SHA1 message digest for LEN bytes beginning at BUFFER.  The
    187   1.1       mrg    result is always in little endian byte order, so that a byte-wise
    188   1.1       mrg    output yields to the wanted ASCII representation of the message
    189   1.1       mrg    digest.  */
    190   1.1       mrg void *
    191   1.1       mrg sha1_buffer (const char *buffer, size_t len, void *resblock)
    192   1.1       mrg {
    193   1.1       mrg   struct sha1_ctx ctx;
    194   1.1       mrg 
    195   1.1       mrg   /* Initialize the computation context.  */
    196   1.1       mrg   sha1_init_ctx (&ctx);
    197   1.1       mrg 
    198   1.1       mrg   /* Process whole buffer but last len % 64 bytes.  */
    199   1.1       mrg   sha1_process_bytes (buffer, len, &ctx);
    200   1.1       mrg 
    201   1.1       mrg   /* Put result in desired memory area.  */
    202   1.1       mrg   return sha1_finish_ctx (&ctx, resblock);
    203   1.1       mrg }
    204   1.1       mrg 
    205   1.1       mrg void
    206   1.1       mrg sha1_process_bytes (const void *buffer, size_t len, struct sha1_ctx *ctx)
    207   1.1       mrg {
    208   1.1       mrg   /* When we already have some bits in our internal buffer concatenate
    209   1.1       mrg      both inputs first.  */
    210   1.1       mrg   if (ctx->buflen != 0)
    211   1.1       mrg     {
    212   1.1       mrg       size_t left_over = ctx->buflen;
    213   1.1       mrg       size_t add = 128 - left_over > len ? len : 128 - left_over;
    214   1.1       mrg 
    215   1.1       mrg       memcpy (&((char *) ctx->buffer)[left_over], buffer, add);
    216   1.1       mrg       ctx->buflen += add;
    217   1.1       mrg 
    218   1.1       mrg       if (ctx->buflen > 64)
    219   1.1       mrg 	{
    220   1.1       mrg 	  sha1_process_block (ctx->buffer, ctx->buflen & ~63, ctx);
    221   1.1       mrg 
    222   1.1       mrg 	  ctx->buflen &= 63;
    223   1.1       mrg 	  /* The regions in the following copy operation cannot overlap.  */
    224   1.1       mrg 	  memcpy (ctx->buffer,
    225   1.1       mrg 		  &((char *) ctx->buffer)[(left_over + add) & ~63],
    226   1.1       mrg 		  ctx->buflen);
    227   1.1       mrg 	}
    228   1.1       mrg 
    229   1.1       mrg       buffer = (const char *) buffer + add;
    230   1.1       mrg       len -= add;
    231   1.1       mrg     }
    232   1.1       mrg 
    233   1.1       mrg   /* Process available complete blocks.  */
    234   1.1       mrg   if (len >= 64)
    235   1.1       mrg     {
    236   1.1       mrg #if !_STRING_ARCH_unaligned
    237   1.5  christos # if defined(__clang__) || defined(__GNUC__)
    238   1.2  christos # define alignof(type) __alignof__(type)
    239   1.2  christos # else
    240   1.1       mrg # define alignof(type) offsetof (struct { char c; type x; }, x)
    241   1.2  christos # endif
    242   1.1       mrg # define UNALIGNED_P(p) (((size_t) p) % alignof (sha1_uint32) != 0)
    243   1.1       mrg       if (UNALIGNED_P (buffer))
    244   1.1       mrg 	while (len > 64)
    245   1.1       mrg 	  {
    246   1.1       mrg 	    sha1_process_block (memcpy (ctx->buffer, buffer, 64), 64, ctx);
    247   1.1       mrg 	    buffer = (const char *) buffer + 64;
    248   1.1       mrg 	    len -= 64;
    249   1.1       mrg 	  }
    250   1.1       mrg       else
    251   1.1       mrg #endif
    252   1.1       mrg 	{
    253   1.1       mrg 	  sha1_process_block (buffer, len & ~63, ctx);
    254   1.1       mrg 	  buffer = (const char *) buffer + (len & ~63);
    255   1.1       mrg 	  len &= 63;
    256   1.1       mrg 	}
    257   1.1       mrg     }
    258   1.1       mrg 
    259   1.1       mrg   /* Move remaining bytes in internal buffer.  */
    260   1.1       mrg   if (len > 0)
    261   1.1       mrg     {
    262   1.1       mrg       size_t left_over = ctx->buflen;
    263   1.1       mrg 
    264   1.1       mrg       memcpy (&((char *) ctx->buffer)[left_over], buffer, len);
    265   1.1       mrg       left_over += len;
    266   1.1       mrg       if (left_over >= 64)
    267   1.1       mrg 	{
    268   1.1       mrg 	  sha1_process_block (ctx->buffer, 64, ctx);
    269   1.1       mrg 	  left_over -= 64;
    270   1.8       mrg 	  memmove (ctx->buffer, &ctx->buffer[16], left_over);
    271   1.1       mrg 	}
    272   1.1       mrg       ctx->buflen = left_over;
    273   1.1       mrg     }
    274   1.1       mrg }
    275   1.1       mrg 
    276   1.1       mrg /* --- Code below is the primary difference between md5.c and sha1.c --- */
    277   1.1       mrg 
    278   1.1       mrg /* SHA1 round constants */
    279   1.1       mrg #define K1 0x5a827999
    280   1.1       mrg #define K2 0x6ed9eba1
    281   1.1       mrg #define K3 0x8f1bbcdc
    282   1.1       mrg #define K4 0xca62c1d6
    283   1.1       mrg 
    284   1.1       mrg /* Round functions.  Note that F2 is the same as F4.  */
    285   1.1       mrg #define F1(B,C,D) ( D ^ ( B & ( C ^ D ) ) )
    286   1.1       mrg #define F2(B,C,D) (B ^ C ^ D)
    287   1.1       mrg #define F3(B,C,D) ( ( B & C ) | ( D & ( B | C ) ) )
    288   1.1       mrg #define F4(B,C,D) (B ^ C ^ D)
    289   1.1       mrg 
    290   1.1       mrg /* Process LEN bytes of BUFFER, accumulating context into CTX.
    291   1.1       mrg    It is assumed that LEN % 64 == 0.
    292   1.1       mrg    Most of this code comes from GnuPG's cipher/sha1.c.  */
    293   1.1       mrg 
    294   1.1       mrg void
    295   1.1       mrg sha1_process_block (const void *buffer, size_t len, struct sha1_ctx *ctx)
    296   1.1       mrg {
    297   1.1       mrg   const sha1_uint32 *words = (const sha1_uint32*) buffer;
    298   1.1       mrg   size_t nwords = len / sizeof (sha1_uint32);
    299   1.1       mrg   const sha1_uint32 *endp = words + nwords;
    300   1.1       mrg   sha1_uint32 x[16];
    301   1.1       mrg   sha1_uint32 a = ctx->A;
    302   1.1       mrg   sha1_uint32 b = ctx->B;
    303   1.1       mrg   sha1_uint32 c = ctx->C;
    304   1.1       mrg   sha1_uint32 d = ctx->D;
    305   1.1       mrg   sha1_uint32 e = ctx->E;
    306   1.1       mrg 
    307   1.1       mrg   /* First increment the byte count.  RFC 1321 specifies the possible
    308   1.1       mrg      length of the file up to 2^64 bits.  Here we only compute the
    309   1.1       mrg      number of bytes.  Do a double word increment.  */
    310   1.1       mrg   ctx->total[0] += len;
    311   1.2  christos   ctx->total[1] += ((len >> 31) >> 1) + (ctx->total[0] < len);
    312   1.1       mrg 
    313   1.1       mrg #define rol(x, n) (((x) << (n)) | ((sha1_uint32) (x) >> (32 - (n))))
    314   1.1       mrg 
    315   1.1       mrg #define M(I) ( tm =   x[I&0x0f] ^ x[(I-14)&0x0f] \
    316   1.1       mrg 		    ^ x[(I-8)&0x0f] ^ x[(I-3)&0x0f] \
    317   1.1       mrg 	       , (x[I&0x0f] = rol(tm, 1)) )
    318   1.1       mrg 
    319   1.1       mrg #define R(A,B,C,D,E,F,K,M)  do { E += rol( A, 5 )     \
    320   1.1       mrg 				      + F( B, C, D )  \
    321   1.1       mrg 				      + K	      \
    322   1.1       mrg 				      + M;	      \
    323   1.1       mrg 				 B = rol( B, 30 );    \
    324   1.1       mrg 			       } while(0)
    325   1.1       mrg 
    326   1.1       mrg   while (words < endp)
    327   1.1       mrg     {
    328   1.1       mrg       sha1_uint32 tm;
    329   1.1       mrg       int t;
    330   1.1       mrg       for (t = 0; t < 16; t++)
    331   1.1       mrg 	{
    332   1.1       mrg 	  x[t] = SWAP (*words);
    333   1.1       mrg 	  words++;
    334   1.1       mrg 	}
    335   1.1       mrg 
    336   1.1       mrg       R( a, b, c, d, e, F1, K1, x[ 0] );
    337   1.1       mrg       R( e, a, b, c, d, F1, K1, x[ 1] );
    338   1.1       mrg       R( d, e, a, b, c, F1, K1, x[ 2] );
    339   1.1       mrg       R( c, d, e, a, b, F1, K1, x[ 3] );
    340   1.1       mrg       R( b, c, d, e, a, F1, K1, x[ 4] );
    341   1.1       mrg       R( a, b, c, d, e, F1, K1, x[ 5] );
    342   1.1       mrg       R( e, a, b, c, d, F1, K1, x[ 6] );
    343   1.1       mrg       R( d, e, a, b, c, F1, K1, x[ 7] );
    344   1.1       mrg       R( c, d, e, a, b, F1, K1, x[ 8] );
    345   1.1       mrg       R( b, c, d, e, a, F1, K1, x[ 9] );
    346   1.1       mrg       R( a, b, c, d, e, F1, K1, x[10] );
    347   1.1       mrg       R( e, a, b, c, d, F1, K1, x[11] );
    348   1.1       mrg       R( d, e, a, b, c, F1, K1, x[12] );
    349   1.1       mrg       R( c, d, e, a, b, F1, K1, x[13] );
    350   1.1       mrg       R( b, c, d, e, a, F1, K1, x[14] );
    351   1.1       mrg       R( a, b, c, d, e, F1, K1, x[15] );
    352   1.1       mrg       R( e, a, b, c, d, F1, K1, M(16) );
    353   1.1       mrg       R( d, e, a, b, c, F1, K1, M(17) );
    354   1.1       mrg       R( c, d, e, a, b, F1, K1, M(18) );
    355   1.1       mrg       R( b, c, d, e, a, F1, K1, M(19) );
    356   1.1       mrg       R( a, b, c, d, e, F2, K2, M(20) );
    357   1.1       mrg       R( e, a, b, c, d, F2, K2, M(21) );
    358   1.1       mrg       R( d, e, a, b, c, F2, K2, M(22) );
    359   1.1       mrg       R( c, d, e, a, b, F2, K2, M(23) );
    360   1.1       mrg       R( b, c, d, e, a, F2, K2, M(24) );
    361   1.1       mrg       R( a, b, c, d, e, F2, K2, M(25) );
    362   1.1       mrg       R( e, a, b, c, d, F2, K2, M(26) );
    363   1.1       mrg       R( d, e, a, b, c, F2, K2, M(27) );
    364   1.1       mrg       R( c, d, e, a, b, F2, K2, M(28) );
    365   1.1       mrg       R( b, c, d, e, a, F2, K2, M(29) );
    366   1.1       mrg       R( a, b, c, d, e, F2, K2, M(30) );
    367   1.1       mrg       R( e, a, b, c, d, F2, K2, M(31) );
    368   1.1       mrg       R( d, e, a, b, c, F2, K2, M(32) );
    369   1.1       mrg       R( c, d, e, a, b, F2, K2, M(33) );
    370   1.1       mrg       R( b, c, d, e, a, F2, K2, M(34) );
    371   1.1       mrg       R( a, b, c, d, e, F2, K2, M(35) );
    372   1.1       mrg       R( e, a, b, c, d, F2, K2, M(36) );
    373   1.1       mrg       R( d, e, a, b, c, F2, K2, M(37) );
    374   1.1       mrg       R( c, d, e, a, b, F2, K2, M(38) );
    375   1.1       mrg       R( b, c, d, e, a, F2, K2, M(39) );
    376   1.1       mrg       R( a, b, c, d, e, F3, K3, M(40) );
    377   1.1       mrg       R( e, a, b, c, d, F3, K3, M(41) );
    378   1.1       mrg       R( d, e, a, b, c, F3, K3, M(42) );
    379   1.1       mrg       R( c, d, e, a, b, F3, K3, M(43) );
    380   1.1       mrg       R( b, c, d, e, a, F3, K3, M(44) );
    381   1.1       mrg       R( a, b, c, d, e, F3, K3, M(45) );
    382   1.1       mrg       R( e, a, b, c, d, F3, K3, M(46) );
    383   1.1       mrg       R( d, e, a, b, c, F3, K3, M(47) );
    384   1.1       mrg       R( c, d, e, a, b, F3, K3, M(48) );
    385   1.1       mrg       R( b, c, d, e, a, F3, K3, M(49) );
    386   1.1       mrg       R( a, b, c, d, e, F3, K3, M(50) );
    387   1.1       mrg       R( e, a, b, c, d, F3, K3, M(51) );
    388   1.1       mrg       R( d, e, a, b, c, F3, K3, M(52) );
    389   1.1       mrg       R( c, d, e, a, b, F3, K3, M(53) );
    390   1.1       mrg       R( b, c, d, e, a, F3, K3, M(54) );
    391   1.1       mrg       R( a, b, c, d, e, F3, K3, M(55) );
    392   1.1       mrg       R( e, a, b, c, d, F3, K3, M(56) );
    393   1.1       mrg       R( d, e, a, b, c, F3, K3, M(57) );
    394   1.1       mrg       R( c, d, e, a, b, F3, K3, M(58) );
    395   1.1       mrg       R( b, c, d, e, a, F3, K3, M(59) );
    396   1.1       mrg       R( a, b, c, d, e, F4, K4, M(60) );
    397   1.1       mrg       R( e, a, b, c, d, F4, K4, M(61) );
    398   1.1       mrg       R( d, e, a, b, c, F4, K4, M(62) );
    399   1.1       mrg       R( c, d, e, a, b, F4, K4, M(63) );
    400   1.1       mrg       R( b, c, d, e, a, F4, K4, M(64) );
    401   1.1       mrg       R( a, b, c, d, e, F4, K4, M(65) );
    402   1.1       mrg       R( e, a, b, c, d, F4, K4, M(66) );
    403   1.1       mrg       R( d, e, a, b, c, F4, K4, M(67) );
    404   1.1       mrg       R( c, d, e, a, b, F4, K4, M(68) );
    405   1.1       mrg       R( b, c, d, e, a, F4, K4, M(69) );
    406   1.1       mrg       R( a, b, c, d, e, F4, K4, M(70) );
    407   1.1       mrg       R( e, a, b, c, d, F4, K4, M(71) );
    408   1.1       mrg       R( d, e, a, b, c, F4, K4, M(72) );
    409   1.1       mrg       R( c, d, e, a, b, F4, K4, M(73) );
    410   1.1       mrg       R( b, c, d, e, a, F4, K4, M(74) );
    411   1.1       mrg       R( a, b, c, d, e, F4, K4, M(75) );
    412   1.1       mrg       R( e, a, b, c, d, F4, K4, M(76) );
    413   1.1       mrg       R( d, e, a, b, c, F4, K4, M(77) );
    414   1.1       mrg       R( c, d, e, a, b, F4, K4, M(78) );
    415   1.1       mrg       R( b, c, d, e, a, F4, K4, M(79) );
    416   1.1       mrg 
    417   1.1       mrg       a = ctx->A += a;
    418   1.1       mrg       b = ctx->B += b;
    419   1.1       mrg       c = ctx->C += c;
    420   1.1       mrg       d = ctx->D += d;
    421   1.1       mrg       e = ctx->E += e;
    422   1.1       mrg     }
    423   1.1       mrg }
    424   1.9       mrg 
    425   1.9       mrg #if defined(HAVE_X86_SHA1_HW_SUPPORT)
    426   1.9       mrg /* HW specific version of sha1_process_bytes.  */
    427   1.9       mrg 
    428   1.9       mrg static void sha1_hw_process_block (const void *, size_t, struct sha1_ctx *);
    429   1.9       mrg 
    430   1.9       mrg static void
    431   1.9       mrg sha1_hw_process_bytes (const void *buffer, size_t len, struct sha1_ctx *ctx)
    432   1.9       mrg {
    433   1.9       mrg   /* When we already have some bits in our internal buffer concatenate
    434   1.9       mrg      both inputs first.  */
    435   1.9       mrg   if (ctx->buflen != 0)
    436   1.9       mrg     {
    437   1.9       mrg       size_t left_over = ctx->buflen;
    438   1.9       mrg       size_t add = 128 - left_over > len ? len : 128 - left_over;
    439   1.9       mrg 
    440   1.9       mrg       memcpy (&((char *) ctx->buffer)[left_over], buffer, add);
    441   1.9       mrg       ctx->buflen += add;
    442   1.9       mrg 
    443   1.9       mrg       if (ctx->buflen > 64)
    444   1.9       mrg 	{
    445   1.9       mrg 	  sha1_hw_process_block (ctx->buffer, ctx->buflen & ~63, ctx);
    446   1.9       mrg 
    447   1.9       mrg 	  ctx->buflen &= 63;
    448   1.9       mrg 	  /* The regions in the following copy operation cannot overlap.  */
    449   1.9       mrg 	  memcpy (ctx->buffer,
    450   1.9       mrg 		  &((char *) ctx->buffer)[(left_over + add) & ~63],
    451   1.9       mrg 		  ctx->buflen);
    452   1.9       mrg 	}
    453   1.9       mrg 
    454   1.9       mrg       buffer = (const char *) buffer + add;
    455   1.9       mrg       len -= add;
    456   1.9       mrg     }
    457   1.9       mrg 
    458   1.9       mrg   /* Process available complete blocks.  */
    459   1.9       mrg   if (len >= 64)
    460   1.9       mrg     {
    461   1.9       mrg #if !_STRING_ARCH_unaligned
    462  1.10       mrg #if 0 /* XXXMRG defined above */
    463   1.9       mrg # define alignof(type) offsetof (struct { char c; type x; }, x)
    464   1.9       mrg # define UNALIGNED_P(p) (((size_t) p) % alignof (sha1_uint32) != 0)
    465  1.10       mrg #endif
    466   1.9       mrg       if (UNALIGNED_P (buffer))
    467   1.9       mrg 	while (len > 64)
    468   1.9       mrg 	  {
    469   1.9       mrg 	    sha1_hw_process_block (memcpy (ctx->buffer, buffer, 64), 64, ctx);
    470   1.9       mrg 	    buffer = (const char *) buffer + 64;
    471   1.9       mrg 	    len -= 64;
    472   1.9       mrg 	  }
    473   1.9       mrg       else
    474   1.9       mrg #endif
    475   1.9       mrg 	{
    476   1.9       mrg 	  sha1_hw_process_block (buffer, len & ~63, ctx);
    477   1.9       mrg 	  buffer = (const char *) buffer + (len & ~63);
    478   1.9       mrg 	  len &= 63;
    479   1.9       mrg 	}
    480   1.9       mrg     }
    481   1.9       mrg 
    482   1.9       mrg   /* Move remaining bytes in internal buffer.  */
    483   1.9       mrg   if (len > 0)
    484   1.9       mrg     {
    485   1.9       mrg       size_t left_over = ctx->buflen;
    486   1.9       mrg 
    487   1.9       mrg       memcpy (&((char *) ctx->buffer)[left_over], buffer, len);
    488   1.9       mrg       left_over += len;
    489   1.9       mrg       if (left_over >= 64)
    490   1.9       mrg 	{
    491   1.9       mrg 	  sha1_hw_process_block (ctx->buffer, 64, ctx);
    492   1.9       mrg 	  left_over -= 64;
    493   1.9       mrg 	  memmove (ctx->buffer, &ctx->buffer[16], left_over);
    494   1.9       mrg 	}
    495   1.9       mrg       ctx->buflen = left_over;
    496   1.9       mrg     }
    497   1.9       mrg }
    498   1.9       mrg 
    499   1.9       mrg /* Process LEN bytes of BUFFER, accumulating context into CTX.
    500   1.9       mrg    Using CPU specific intrinsics.  */
    501   1.9       mrg 
    502   1.9       mrg #ifdef HAVE_X86_SHA1_HW_SUPPORT
    503   1.9       mrg __attribute__((__target__ ("sse4.1,sha")))
    504   1.9       mrg #endif
    505   1.9       mrg static void
    506   1.9       mrg sha1_hw_process_block (const void *buffer, size_t len, struct sha1_ctx *ctx)
    507   1.9       mrg {
    508   1.9       mrg #ifdef HAVE_X86_SHA1_HW_SUPPORT
    509   1.9       mrg   /* Implemented from
    510   1.9       mrg      https://www.intel.com/content/www/us/en/developer/articles/technical/intel-sha-extensions.html  */
    511   1.9       mrg   const __m128i *words = (const __m128i *) buffer;
    512   1.9       mrg   const __m128i *endp = (const __m128i *) ((const char *) buffer + len);
    513   1.9       mrg   __m128i abcd, abcd_save, e0, e0_save, e1, msg0, msg1, msg2, msg3;
    514   1.9       mrg   const __m128i shuf_mask
    515   1.9       mrg     = _mm_set_epi64x (0x0001020304050607ULL, 0x08090a0b0c0d0e0fULL);
    516   1.9       mrg   char check[((offsetof (struct sha1_ctx, B)
    517   1.9       mrg 	     == offsetof (struct sha1_ctx, A) + sizeof (ctx->A))
    518   1.9       mrg 		   && (offsetof (struct sha1_ctx, C)
    519   1.9       mrg 		       == offsetof (struct sha1_ctx, A) + 2 * sizeof (ctx->A))
    520   1.9       mrg 		   && (offsetof (struct sha1_ctx, D)
    521   1.9       mrg 		       == offsetof (struct sha1_ctx, A) + 3 * sizeof (ctx->A)))
    522   1.9       mrg 		  ? 1 : -1];
    523   1.9       mrg 
    524   1.9       mrg   /* First increment the byte count.  RFC 1321 specifies the possible
    525   1.9       mrg      length of the file up to 2^64 bits.  Here we only compute the
    526   1.9       mrg      number of bytes.  Do a double word increment.  */
    527   1.9       mrg   ctx->total[0] += len;
    528   1.9       mrg   ctx->total[1] += ((len >> 31) >> 1) + (ctx->total[0] < len);
    529   1.9       mrg 
    530   1.9       mrg   (void) &check[0];
    531   1.9       mrg   abcd = _mm_loadu_si128 ((const __m128i *) &ctx->A);
    532   1.9       mrg   e0 = _mm_set_epi32 (ctx->E, 0, 0, 0);
    533   1.9       mrg   abcd = _mm_shuffle_epi32 (abcd, 0x1b); /* 0, 1, 2, 3 */
    534   1.9       mrg 
    535   1.9       mrg   while (words < endp)
    536   1.9       mrg     {
    537   1.9       mrg       abcd_save = abcd;
    538   1.9       mrg       e0_save = e0;
    539   1.9       mrg 
    540   1.9       mrg       /* 0..3 */
    541   1.9       mrg       msg0 = _mm_loadu_si128 (words);
    542   1.9       mrg       msg0 = _mm_shuffle_epi8 (msg0, shuf_mask);
    543   1.9       mrg       e0 = _mm_add_epi32 (e0, msg0);
    544   1.9       mrg       e1 = abcd;
    545   1.9       mrg       abcd = _mm_sha1rnds4_epu32 (abcd, e0, 0);
    546   1.9       mrg 
    547   1.9       mrg       /* 4..7 */
    548   1.9       mrg       msg1 = _mm_loadu_si128 (words + 1);
    549   1.9       mrg       msg1 = _mm_shuffle_epi8 (msg1, shuf_mask);
    550   1.9       mrg       e1 = _mm_sha1nexte_epu32 (e1, msg1);
    551   1.9       mrg       e0 = abcd;
    552   1.9       mrg       abcd = _mm_sha1rnds4_epu32 (abcd, e1, 0);
    553   1.9       mrg       msg0 = _mm_sha1msg1_epu32 (msg0, msg1);
    554   1.9       mrg 
    555   1.9       mrg       /* 8..11 */
    556   1.9       mrg       msg2 = _mm_loadu_si128 (words + 2);
    557   1.9       mrg       msg2 = _mm_shuffle_epi8 (msg2, shuf_mask);
    558   1.9       mrg       e0 = _mm_sha1nexte_epu32 (e0, msg2);
    559   1.9       mrg       e1 = abcd;
    560   1.9       mrg       abcd = _mm_sha1rnds4_epu32 (abcd, e0, 0);
    561   1.9       mrg       msg1 = _mm_sha1msg1_epu32 (msg1, msg2);
    562   1.9       mrg       msg0 = _mm_xor_si128 (msg0, msg2);
    563   1.9       mrg 
    564   1.9       mrg       /* 12..15 */
    565   1.9       mrg       msg3 = _mm_loadu_si128 (words + 3);
    566   1.9       mrg       msg3 = _mm_shuffle_epi8 (msg3, shuf_mask);
    567   1.9       mrg       e1 = _mm_sha1nexte_epu32 (e1, msg3);
    568   1.9       mrg       e0 = abcd;
    569   1.9       mrg       msg0 = _mm_sha1msg2_epu32 (msg0, msg3);
    570   1.9       mrg       abcd = _mm_sha1rnds4_epu32 (abcd, e1, 0);
    571   1.9       mrg       msg2 = _mm_sha1msg1_epu32 (msg2, msg3);
    572   1.9       mrg       msg1 = _mm_xor_si128 (msg1, msg3);
    573   1.9       mrg 
    574   1.9       mrg       /* 16..19 */
    575   1.9       mrg       e0 = _mm_sha1nexte_epu32 (e0, msg0);
    576   1.9       mrg       e1 = abcd;
    577   1.9       mrg       msg1 = _mm_sha1msg2_epu32 (msg1, msg0);
    578   1.9       mrg       abcd = _mm_sha1rnds4_epu32 (abcd, e0, 0);
    579   1.9       mrg       msg3 = _mm_sha1msg1_epu32 (msg3, msg0);
    580   1.9       mrg       msg2 = _mm_xor_si128 (msg2, msg0);
    581   1.9       mrg 
    582   1.9       mrg       /* 20..23 */
    583   1.9       mrg       e1 = _mm_sha1nexte_epu32 (e1, msg1);
    584   1.9       mrg       e0 = abcd;
    585   1.9       mrg       msg2 = _mm_sha1msg2_epu32 (msg2, msg1);
    586   1.9       mrg       abcd = _mm_sha1rnds4_epu32 (abcd, e1, 1);
    587   1.9       mrg       msg0 = _mm_sha1msg1_epu32 (msg0, msg1);
    588   1.9       mrg       msg3 = _mm_xor_si128 (msg3, msg1);
    589   1.9       mrg 
    590   1.9       mrg       /* 24..27 */
    591   1.9       mrg       e0 = _mm_sha1nexte_epu32 (e0, msg2);
    592   1.9       mrg       e1 = abcd;
    593   1.9       mrg       msg3 = _mm_sha1msg2_epu32 (msg3, msg2);
    594   1.9       mrg       abcd = _mm_sha1rnds4_epu32 (abcd, e0, 1);
    595   1.9       mrg       msg1 = _mm_sha1msg1_epu32 (msg1, msg2);
    596   1.9       mrg       msg0 = _mm_xor_si128 (msg0, msg2);
    597   1.9       mrg 
    598   1.9       mrg       /* 28..31 */
    599   1.9       mrg       e1 = _mm_sha1nexte_epu32 (e1, msg3);
    600   1.9       mrg       e0 = abcd;
    601   1.9       mrg       msg0 = _mm_sha1msg2_epu32 (msg0, msg3);
    602   1.9       mrg       abcd = _mm_sha1rnds4_epu32 (abcd, e1, 1);
    603   1.9       mrg       msg2 = _mm_sha1msg1_epu32 (msg2, msg3);
    604   1.9       mrg       msg1 = _mm_xor_si128 (msg1, msg3);
    605   1.9       mrg 
    606   1.9       mrg       /* 32..35 */
    607   1.9       mrg       e0 = _mm_sha1nexte_epu32 (e0, msg0);
    608   1.9       mrg       e1 = abcd;
    609   1.9       mrg       msg1 = _mm_sha1msg2_epu32 (msg1, msg0);
    610   1.9       mrg       abcd = _mm_sha1rnds4_epu32 (abcd, e0, 1);
    611   1.9       mrg       msg3 = _mm_sha1msg1_epu32 (msg3, msg0);
    612   1.9       mrg       msg2 = _mm_xor_si128 (msg2, msg0);
    613   1.9       mrg 
    614   1.9       mrg       /* 36..39 */
    615   1.9       mrg       e1 = _mm_sha1nexte_epu32 (e1, msg1);
    616   1.9       mrg       e0 = abcd;
    617   1.9       mrg       msg2 = _mm_sha1msg2_epu32 (msg2, msg1);
    618   1.9       mrg       abcd = _mm_sha1rnds4_epu32 (abcd, e1, 1);
    619   1.9       mrg       msg0 = _mm_sha1msg1_epu32 (msg0, msg1);
    620   1.9       mrg       msg3 = _mm_xor_si128 (msg3, msg1);
    621   1.9       mrg 
    622   1.9       mrg       /* 40..43 */
    623   1.9       mrg       e0 = _mm_sha1nexte_epu32 (e0, msg2);
    624   1.9       mrg       e1 = abcd;
    625   1.9       mrg       msg3 = _mm_sha1msg2_epu32 (msg3, msg2);
    626   1.9       mrg       abcd = _mm_sha1rnds4_epu32 (abcd, e0, 2);
    627   1.9       mrg       msg1 = _mm_sha1msg1_epu32 (msg1, msg2);
    628   1.9       mrg       msg0 = _mm_xor_si128 (msg0, msg2);
    629   1.9       mrg 
    630   1.9       mrg       /* 44..47 */
    631   1.9       mrg       e1 = _mm_sha1nexte_epu32 (e1, msg3);
    632   1.9       mrg       e0 = abcd;
    633   1.9       mrg       msg0 = _mm_sha1msg2_epu32 (msg0, msg3);
    634   1.9       mrg       abcd = _mm_sha1rnds4_epu32 (abcd, e1, 2);
    635   1.9       mrg       msg2 = _mm_sha1msg1_epu32 (msg2, msg3);
    636   1.9       mrg       msg1 = _mm_xor_si128 (msg1, msg3);
    637   1.9       mrg 
    638   1.9       mrg       /* 48..51 */
    639   1.9       mrg       e0 = _mm_sha1nexte_epu32 (e0, msg0);
    640   1.9       mrg       e1 = abcd;
    641   1.9       mrg       msg1 = _mm_sha1msg2_epu32 (msg1, msg0);
    642   1.9       mrg       abcd = _mm_sha1rnds4_epu32 (abcd, e0, 2);
    643   1.9       mrg       msg3 = _mm_sha1msg1_epu32 (msg3, msg0);
    644   1.9       mrg       msg2 = _mm_xor_si128 (msg2, msg0);
    645   1.9       mrg 
    646   1.9       mrg       /* 52..55 */
    647   1.9       mrg       e1 = _mm_sha1nexte_epu32 (e1, msg1);
    648   1.9       mrg       e0 = abcd;
    649   1.9       mrg       msg2 = _mm_sha1msg2_epu32 (msg2, msg1);
    650   1.9       mrg       abcd = _mm_sha1rnds4_epu32 (abcd, e1, 2);
    651   1.9       mrg       msg0 = _mm_sha1msg1_epu32 (msg0, msg1);
    652   1.9       mrg       msg3 = _mm_xor_si128 (msg3, msg1);
    653   1.9       mrg 
    654   1.9       mrg       /* 56..59 */
    655   1.9       mrg       e0 = _mm_sha1nexte_epu32 (e0, msg2);
    656   1.9       mrg       e1 = abcd;
    657   1.9       mrg       msg3 = _mm_sha1msg2_epu32 (msg3, msg2);
    658   1.9       mrg       abcd = _mm_sha1rnds4_epu32 (abcd, e0, 2);
    659   1.9       mrg       msg1 = _mm_sha1msg1_epu32 (msg1, msg2);
    660   1.9       mrg       msg0 = _mm_xor_si128 (msg0, msg2);
    661   1.9       mrg 
    662   1.9       mrg       /* 60..63 */
    663   1.9       mrg       e1 = _mm_sha1nexte_epu32 (e1, msg3);
    664   1.9       mrg       e0 = abcd;
    665   1.9       mrg       msg0 = _mm_sha1msg2_epu32 (msg0, msg3);
    666   1.9       mrg       abcd = _mm_sha1rnds4_epu32 (abcd, e1, 3);
    667   1.9       mrg       msg2 = _mm_sha1msg1_epu32 (msg2, msg3);
    668   1.9       mrg       msg1 = _mm_xor_si128 (msg1, msg3);
    669   1.9       mrg 
    670   1.9       mrg       /* 64..67 */
    671   1.9       mrg       e0 = _mm_sha1nexte_epu32 (e0, msg0);
    672   1.9       mrg       e1 = abcd;
    673   1.9       mrg       msg1 = _mm_sha1msg2_epu32 (msg1, msg0);
    674   1.9       mrg       abcd = _mm_sha1rnds4_epu32 (abcd, e0, 3);
    675   1.9       mrg       msg3 = _mm_sha1msg1_epu32 (msg3, msg0);
    676   1.9       mrg       msg2 = _mm_xor_si128 (msg2, msg0);
    677   1.9       mrg 
    678   1.9       mrg       /* 68..71 */
    679   1.9       mrg       e1 = _mm_sha1nexte_epu32 (e1, msg1);
    680   1.9       mrg       e0 = abcd;
    681   1.9       mrg       msg2 = _mm_sha1msg2_epu32 (msg2, msg1);
    682   1.9       mrg       abcd = _mm_sha1rnds4_epu32 (abcd, e1, 3);
    683   1.9       mrg       msg3 = _mm_xor_si128 (msg3, msg1);
    684   1.9       mrg 
    685   1.9       mrg       /* 72..75 */
    686   1.9       mrg       e0 = _mm_sha1nexte_epu32 (e0, msg2);
    687   1.9       mrg       e1 = abcd;
    688   1.9       mrg       msg3 = _mm_sha1msg2_epu32 (msg3, msg2);
    689   1.9       mrg       abcd = _mm_sha1rnds4_epu32 (abcd, e0, 3);
    690   1.9       mrg 
    691   1.9       mrg       /* 76..79 */
    692   1.9       mrg       e1 = _mm_sha1nexte_epu32 (e1, msg3);
    693   1.9       mrg       e0 = abcd;
    694   1.9       mrg       abcd = _mm_sha1rnds4_epu32 (abcd, e1, 3);
    695   1.9       mrg 
    696   1.9       mrg       /* Finalize. */
    697   1.9       mrg       e0 = _mm_sha1nexte_epu32 (e0, e0_save);
    698   1.9       mrg       abcd = _mm_add_epi32 (abcd, abcd_save);
    699   1.9       mrg 
    700   1.9       mrg       words = words + 4;
    701   1.9       mrg     }
    702   1.9       mrg 
    703   1.9       mrg   abcd = _mm_shuffle_epi32 (abcd, 0x1b); /* 0, 1, 2, 3 */
    704   1.9       mrg   _mm_storeu_si128 ((__m128i *) &ctx->A, abcd);
    705   1.9       mrg   ctx->E = _mm_extract_epi32 (e0, 3);
    706   1.9       mrg #endif
    707   1.9       mrg }
    708   1.9       mrg #endif
    709   1.9       mrg 
    710   1.9       mrg /* Return sha1_process_bytes or some hardware optimized version thereof
    711   1.9       mrg    depending on current CPU.  */
    712   1.9       mrg 
    713   1.9       mrg sha1_process_bytes_fn
    714   1.9       mrg sha1_choose_process_bytes (void)
    715   1.9       mrg {
    716   1.9       mrg #ifdef HAVE_X86_SHA1_HW_SUPPORT
    717   1.9       mrg   unsigned int eax, ebx, ecx, edx;
    718   1.9       mrg   if (__get_cpuid_count (7, 0, &eax, &ebx, &ecx, &edx)
    719   1.9       mrg       && (ebx & bit_SHA) != 0
    720   1.9       mrg       && __get_cpuid (1, &eax, &ebx, &ecx, &edx)
    721   1.9       mrg       && (ecx & bit_SSE4_1) != 0)
    722   1.9       mrg     return sha1_hw_process_bytes;
    723   1.9       mrg #endif
    724   1.9       mrg   return sha1_process_bytes;
    725   1.9       mrg }
    726