Home | History | Annotate | Line # | Download | only in libiberty
sha1.c revision 1.10
      1   1.1  christos /* sha1.c - Functions to compute SHA1 message digest of files or
      2   1.1  christos    memory blocks according to the NIST specification FIPS-180-1.
      3   1.1  christos 
      4  1.10  christos    Copyright (C) 2000-2024 Free Software Foundation, Inc.
      5   1.1  christos 
      6   1.1  christos    This program is free software; you can redistribute it and/or modify it
      7   1.1  christos    under the terms of the GNU General Public License as published by the
      8   1.1  christos    Free Software Foundation; either version 2, or (at your option) any
      9   1.1  christos    later version.
     10   1.1  christos 
     11   1.1  christos    This program is distributed in the hope that it will be useful,
     12   1.1  christos    but WITHOUT ANY WARRANTY; without even the implied warranty of
     13   1.1  christos    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     14   1.1  christos    GNU General Public License for more details.
     15   1.1  christos 
     16   1.1  christos    You should have received a copy of the GNU General Public License
     17   1.1  christos    along with this program; if not, write to the Free Software Foundation,
     18   1.1  christos    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
     19   1.1  christos 
     20   1.1  christos /* Written by Scott G. Miller
     21   1.1  christos    Credits:
     22   1.1  christos       Robert Klep <robert (at) ilse.nl>  -- Expansion function fix
     23   1.1  christos */
     24   1.1  christos 
     25   1.1  christos #include <config.h>
     26   1.1  christos 
     27   1.1  christos #include "sha1.h"
     28   1.1  christos 
     29   1.1  christos #include <stddef.h>
     30   1.1  christos #include <string.h>
     31   1.1  christos 
     32  1.10  christos #ifdef HAVE_X86_SHA1_HW_SUPPORT
     33  1.10  christos # include <x86intrin.h>
     34  1.10  christos # include <cpuid.h>
     35  1.10  christos #endif
     36  1.10  christos 
     37   1.1  christos #if USE_UNLOCKED_IO
     38   1.1  christos # include "unlocked-io.h"
     39   1.1  christos #endif
     40   1.1  christos 
     41   1.1  christos #ifdef WORDS_BIGENDIAN
     42   1.1  christos # define SWAP(n) (n)
     43   1.1  christos #else
     44   1.1  christos # define SWAP(n) \
     45   1.1  christos     (((n) << 24) | (((n) & 0xff00) << 8) | (((n) >> 8) & 0xff00) | ((n) >> 24))
     46   1.1  christos #endif
     47   1.1  christos 
     48   1.1  christos #define BLOCKSIZE 4096
     49   1.1  christos #if BLOCKSIZE % 64 != 0
     50   1.1  christos # error "invalid BLOCKSIZE"
     51   1.1  christos #endif
     52   1.1  christos 
     53   1.1  christos /* This array contains the bytes used to pad the buffer to the next
     54   1.1  christos    64-byte boundary.  (RFC 1321, 3.1: Step 1)  */
     55   1.1  christos static const unsigned char fillbuf[64] = { 0x80, 0 /* , 0, 0, ...  */ };
     56   1.1  christos 
     57   1.1  christos 
     58   1.1  christos /* Take a pointer to a 160 bit block of data (five 32 bit ints) and
     59   1.1  christos    initialize it to the start constants of the SHA1 algorithm.  This
     60   1.1  christos    must be called before using hash in the call to sha1_hash.  */
     61   1.1  christos void
     62   1.1  christos sha1_init_ctx (struct sha1_ctx *ctx)
     63   1.1  christos {
     64   1.1  christos   ctx->A = 0x67452301;
     65   1.1  christos   ctx->B = 0xefcdab89;
     66   1.1  christos   ctx->C = 0x98badcfe;
     67   1.1  christos   ctx->D = 0x10325476;
     68   1.1  christos   ctx->E = 0xc3d2e1f0;
     69   1.1  christos 
     70   1.1  christos   ctx->total[0] = ctx->total[1] = 0;
     71   1.1  christos   ctx->buflen = 0;
     72   1.1  christos }
     73   1.1  christos 
     74   1.1  christos /* Put result from CTX in first 20 bytes following RESBUF.  The result
     75   1.1  christos    must be in little endian byte order.
     76   1.1  christos 
     77   1.1  christos    IMPORTANT: On some systems it is required that RESBUF is correctly
     78   1.1  christos    aligned for a 32-bit value.  */
     79   1.1  christos void *
     80   1.1  christos sha1_read_ctx (const struct sha1_ctx *ctx, void *resbuf)
     81   1.1  christos {
     82   1.1  christos   ((sha1_uint32 *) resbuf)[0] = SWAP (ctx->A);
     83   1.1  christos   ((sha1_uint32 *) resbuf)[1] = SWAP (ctx->B);
     84   1.1  christos   ((sha1_uint32 *) resbuf)[2] = SWAP (ctx->C);
     85   1.1  christos   ((sha1_uint32 *) resbuf)[3] = SWAP (ctx->D);
     86   1.1  christos   ((sha1_uint32 *) resbuf)[4] = SWAP (ctx->E);
     87   1.1  christos 
     88   1.1  christos   return resbuf;
     89   1.1  christos }
     90   1.1  christos 
     91   1.1  christos /* Process the remaining bytes in the internal buffer and the usual
     92   1.1  christos    prolog according to the standard and write the result to RESBUF.
     93   1.1  christos 
     94   1.1  christos    IMPORTANT: On some systems it is required that RESBUF is correctly
     95   1.1  christos    aligned for a 32-bit value.  */
     96   1.1  christos void *
     97   1.1  christos sha1_finish_ctx (struct sha1_ctx *ctx, void *resbuf)
     98   1.1  christos {
     99   1.1  christos   /* Take yet unprocessed bytes into account.  */
    100   1.1  christos   sha1_uint32 bytes = ctx->buflen;
    101   1.1  christos   size_t size = (bytes < 56) ? 64 / 4 : 64 * 2 / 4;
    102   1.1  christos 
    103   1.1  christos   /* Now count remaining bytes.  */
    104   1.1  christos   ctx->total[0] += bytes;
    105   1.1  christos   if (ctx->total[0] < bytes)
    106   1.1  christos     ++ctx->total[1];
    107   1.1  christos 
    108   1.1  christos   /* Put the 64-bit file length in *bits* at the end of the buffer.  */
    109   1.1  christos   ctx->buffer[size - 2] = SWAP ((ctx->total[1] << 3) | (ctx->total[0] >> 29));
    110   1.1  christos   ctx->buffer[size - 1] = SWAP (ctx->total[0] << 3);
    111   1.1  christos 
    112   1.1  christos   memcpy (&((char *) ctx->buffer)[bytes], fillbuf, (size - 2) * 4 - bytes);
    113   1.1  christos 
    114   1.1  christos   /* Process last bytes.  */
    115   1.1  christos   sha1_process_block (ctx->buffer, size * 4, ctx);
    116   1.1  christos 
    117   1.1  christos   return sha1_read_ctx (ctx, resbuf);
    118   1.1  christos }
    119   1.1  christos 
    120   1.1  christos /* Compute SHA1 message digest for bytes read from STREAM.  The
    121   1.1  christos    resulting message digest number will be written into the 16 bytes
    122   1.1  christos    beginning at RESBLOCK.  */
    123   1.1  christos int
    124   1.1  christos sha1_stream (FILE *stream, void *resblock)
    125   1.1  christos {
    126   1.1  christos   struct sha1_ctx ctx;
    127   1.1  christos   char buffer[BLOCKSIZE + 72];
    128   1.1  christos   size_t sum;
    129   1.1  christos 
    130   1.1  christos   /* Initialize the computation context.  */
    131   1.1  christos   sha1_init_ctx (&ctx);
    132   1.1  christos 
    133   1.1  christos   /* Iterate over full file contents.  */
    134   1.1  christos   while (1)
    135   1.1  christos     {
    136   1.1  christos       /* We read the file in blocks of BLOCKSIZE bytes.  One call of the
    137   1.1  christos 	 computation function processes the whole buffer so that with the
    138   1.1  christos 	 next round of the loop another block can be read.  */
    139   1.1  christos       size_t n;
    140   1.1  christos       sum = 0;
    141   1.1  christos 
    142   1.1  christos       /* Read block.  Take care for partial reads.  */
    143   1.1  christos       while (1)
    144   1.1  christos 	{
    145   1.1  christos 	  n = fread (buffer + sum, 1, BLOCKSIZE - sum, stream);
    146   1.1  christos 
    147   1.1  christos 	  sum += n;
    148   1.1  christos 
    149   1.1  christos 	  if (sum == BLOCKSIZE)
    150   1.1  christos 	    break;
    151   1.1  christos 
    152   1.1  christos 	  if (n == 0)
    153   1.1  christos 	    {
    154   1.1  christos 	      /* Check for the error flag IFF N == 0, so that we don't
    155   1.1  christos 		 exit the loop after a partial read due to e.g., EAGAIN
    156   1.1  christos 		 or EWOULDBLOCK.  */
    157   1.1  christos 	      if (ferror (stream))
    158   1.1  christos 		return 1;
    159   1.1  christos 	      goto process_partial_block;
    160   1.1  christos 	    }
    161   1.1  christos 
    162   1.1  christos 	  /* We've read at least one byte, so ignore errors.  But always
    163   1.1  christos 	     check for EOF, since feof may be true even though N > 0.
    164   1.1  christos 	     Otherwise, we could end up calling fread after EOF.  */
    165   1.1  christos 	  if (feof (stream))
    166   1.1  christos 	    goto process_partial_block;
    167   1.1  christos 	}
    168   1.1  christos 
    169   1.1  christos       /* Process buffer with BLOCKSIZE bytes.  Note that
    170   1.1  christos 			BLOCKSIZE % 64 == 0
    171   1.1  christos        */
    172   1.1  christos       sha1_process_block (buffer, BLOCKSIZE, &ctx);
    173   1.1  christos     }
    174   1.1  christos 
    175   1.1  christos  process_partial_block:;
    176   1.1  christos 
    177   1.1  christos   /* Process any remaining bytes.  */
    178   1.1  christos   if (sum > 0)
    179   1.1  christos     sha1_process_bytes (buffer, sum, &ctx);
    180   1.1  christos 
    181   1.1  christos   /* Construct result in desired memory.  */
    182   1.1  christos   sha1_finish_ctx (&ctx, resblock);
    183   1.1  christos   return 0;
    184   1.1  christos }
    185   1.1  christos 
    186   1.1  christos /* Compute SHA1 message digest for LEN bytes beginning at BUFFER.  The
    187   1.1  christos    result is always in little endian byte order, so that a byte-wise
    188   1.1  christos    output yields to the wanted ASCII representation of the message
    189   1.1  christos    digest.  */
    190   1.1  christos void *
    191   1.1  christos sha1_buffer (const char *buffer, size_t len, void *resblock)
    192   1.1  christos {
    193   1.1  christos   struct sha1_ctx ctx;
    194   1.1  christos 
    195   1.1  christos   /* Initialize the computation context.  */
    196   1.1  christos   sha1_init_ctx (&ctx);
    197   1.1  christos 
    198   1.1  christos   /* Process whole buffer but last len % 64 bytes.  */
    199   1.1  christos   sha1_process_bytes (buffer, len, &ctx);
    200   1.1  christos 
    201   1.1  christos   /* Put result in desired memory area.  */
    202   1.1  christos   return sha1_finish_ctx (&ctx, resblock);
    203   1.1  christos }
    204   1.1  christos 
    205   1.1  christos void
    206   1.1  christos sha1_process_bytes (const void *buffer, size_t len, struct sha1_ctx *ctx)
    207   1.1  christos {
    208   1.1  christos   /* When we already have some bits in our internal buffer concatenate
    209   1.1  christos      both inputs first.  */
    210   1.1  christos   if (ctx->buflen != 0)
    211   1.1  christos     {
    212   1.1  christos       size_t left_over = ctx->buflen;
    213   1.1  christos       size_t add = 128 - left_over > len ? len : 128 - left_over;
    214   1.1  christos 
    215   1.1  christos       memcpy (&((char *) ctx->buffer)[left_over], buffer, add);
    216   1.1  christos       ctx->buflen += add;
    217   1.1  christos 
    218   1.1  christos       if (ctx->buflen > 64)
    219   1.1  christos 	{
    220   1.1  christos 	  sha1_process_block (ctx->buffer, ctx->buflen & ~63, ctx);
    221   1.1  christos 
    222   1.1  christos 	  ctx->buflen &= 63;
    223   1.1  christos 	  /* The regions in the following copy operation cannot overlap.  */
    224   1.1  christos 	  memcpy (ctx->buffer,
    225   1.1  christos 		  &((char *) ctx->buffer)[(left_over + add) & ~63],
    226   1.1  christos 		  ctx->buflen);
    227   1.1  christos 	}
    228   1.1  christos 
    229   1.1  christos       buffer = (const char *) buffer + add;
    230   1.1  christos       len -= add;
    231   1.1  christos     }
    232   1.1  christos 
    233   1.1  christos   /* Process available complete blocks.  */
    234   1.1  christos   if (len >= 64)
    235   1.1  christos     {
    236   1.1  christos #if !_STRING_ARCH_unaligned
    237   1.1  christos # define alignof(type) offsetof (struct { char c; type x; }, x)
    238   1.1  christos # define UNALIGNED_P(p) (((size_t) p) % alignof (sha1_uint32) != 0)
    239   1.1  christos       if (UNALIGNED_P (buffer))
    240   1.1  christos 	while (len > 64)
    241   1.1  christos 	  {
    242   1.1  christos 	    sha1_process_block (memcpy (ctx->buffer, buffer, 64), 64, ctx);
    243   1.1  christos 	    buffer = (const char *) buffer + 64;
    244   1.1  christos 	    len -= 64;
    245   1.1  christos 	  }
    246   1.1  christos       else
    247   1.1  christos #endif
    248   1.1  christos 	{
    249   1.1  christos 	  sha1_process_block (buffer, len & ~63, ctx);
    250   1.1  christos 	  buffer = (const char *) buffer + (len & ~63);
    251   1.1  christos 	  len &= 63;
    252   1.1  christos 	}
    253   1.1  christos     }
    254   1.1  christos 
    255   1.1  christos   /* Move remaining bytes in internal buffer.  */
    256   1.1  christos   if (len > 0)
    257   1.1  christos     {
    258   1.1  christos       size_t left_over = ctx->buflen;
    259   1.1  christos 
    260   1.1  christos       memcpy (&((char *) ctx->buffer)[left_over], buffer, len);
    261   1.1  christos       left_over += len;
    262   1.1  christos       if (left_over >= 64)
    263   1.1  christos 	{
    264   1.1  christos 	  sha1_process_block (ctx->buffer, 64, ctx);
    265   1.1  christos 	  left_over -= 64;
    266   1.9  christos 	  memmove (ctx->buffer, &ctx->buffer[16], left_over);
    267   1.1  christos 	}
    268   1.1  christos       ctx->buflen = left_over;
    269   1.1  christos     }
    270   1.1  christos }
    271   1.1  christos 
    272   1.1  christos /* --- Code below is the primary difference between md5.c and sha1.c --- */
    273   1.1  christos 
    274   1.1  christos /* SHA1 round constants */
    275   1.1  christos #define K1 0x5a827999
    276   1.1  christos #define K2 0x6ed9eba1
    277   1.1  christos #define K3 0x8f1bbcdc
    278   1.1  christos #define K4 0xca62c1d6
    279   1.1  christos 
    280   1.1  christos /* Round functions.  Note that F2 is the same as F4.  */
    281   1.1  christos #define F1(B,C,D) ( D ^ ( B & ( C ^ D ) ) )
    282   1.1  christos #define F2(B,C,D) (B ^ C ^ D)
    283   1.1  christos #define F3(B,C,D) ( ( B & C ) | ( D & ( B | C ) ) )
    284   1.1  christos #define F4(B,C,D) (B ^ C ^ D)
    285   1.1  christos 
    286   1.1  christos /* Process LEN bytes of BUFFER, accumulating context into CTX.
    287   1.1  christos    It is assumed that LEN % 64 == 0.
    288   1.1  christos    Most of this code comes from GnuPG's cipher/sha1.c.  */
    289   1.1  christos 
    290   1.1  christos void
    291   1.1  christos sha1_process_block (const void *buffer, size_t len, struct sha1_ctx *ctx)
    292   1.1  christos {
    293   1.1  christos   const sha1_uint32 *words = (const sha1_uint32*) buffer;
    294   1.1  christos   size_t nwords = len / sizeof (sha1_uint32);
    295   1.1  christos   const sha1_uint32 *endp = words + nwords;
    296   1.1  christos   sha1_uint32 x[16];
    297   1.1  christos   sha1_uint32 a = ctx->A;
    298   1.1  christos   sha1_uint32 b = ctx->B;
    299   1.1  christos   sha1_uint32 c = ctx->C;
    300   1.1  christos   sha1_uint32 d = ctx->D;
    301   1.1  christos   sha1_uint32 e = ctx->E;
    302   1.1  christos 
    303   1.1  christos   /* First increment the byte count.  RFC 1321 specifies the possible
    304   1.1  christos      length of the file up to 2^64 bits.  Here we only compute the
    305   1.1  christos      number of bytes.  Do a double word increment.  */
    306   1.1  christos   ctx->total[0] += len;
    307   1.1  christos   ctx->total[1] += ((len >> 31) >> 1) + (ctx->total[0] < len);
    308   1.1  christos 
    309   1.1  christos #define rol(x, n) (((x) << (n)) | ((sha1_uint32) (x) >> (32 - (n))))
    310   1.1  christos 
    311   1.1  christos #define M(I) ( tm =   x[I&0x0f] ^ x[(I-14)&0x0f] \
    312   1.1  christos 		    ^ x[(I-8)&0x0f] ^ x[(I-3)&0x0f] \
    313   1.1  christos 	       , (x[I&0x0f] = rol(tm, 1)) )
    314   1.1  christos 
    315   1.1  christos #define R(A,B,C,D,E,F,K,M)  do { E += rol( A, 5 )     \
    316   1.1  christos 				      + F( B, C, D )  \
    317   1.1  christos 				      + K	      \
    318   1.1  christos 				      + M;	      \
    319   1.1  christos 				 B = rol( B, 30 );    \
    320   1.1  christos 			       } while(0)
    321   1.1  christos 
    322   1.1  christos   while (words < endp)
    323   1.1  christos     {
    324   1.1  christos       sha1_uint32 tm;
    325   1.1  christos       int t;
    326   1.1  christos       for (t = 0; t < 16; t++)
    327   1.1  christos 	{
    328   1.1  christos 	  x[t] = SWAP (*words);
    329   1.1  christos 	  words++;
    330   1.1  christos 	}
    331   1.1  christos 
    332   1.1  christos       R( a, b, c, d, e, F1, K1, x[ 0] );
    333   1.1  christos       R( e, a, b, c, d, F1, K1, x[ 1] );
    334   1.1  christos       R( d, e, a, b, c, F1, K1, x[ 2] );
    335   1.1  christos       R( c, d, e, a, b, F1, K1, x[ 3] );
    336   1.1  christos       R( b, c, d, e, a, F1, K1, x[ 4] );
    337   1.1  christos       R( a, b, c, d, e, F1, K1, x[ 5] );
    338   1.1  christos       R( e, a, b, c, d, F1, K1, x[ 6] );
    339   1.1  christos       R( d, e, a, b, c, F1, K1, x[ 7] );
    340   1.1  christos       R( c, d, e, a, b, F1, K1, x[ 8] );
    341   1.1  christos       R( b, c, d, e, a, F1, K1, x[ 9] );
    342   1.1  christos       R( a, b, c, d, e, F1, K1, x[10] );
    343   1.1  christos       R( e, a, b, c, d, F1, K1, x[11] );
    344   1.1  christos       R( d, e, a, b, c, F1, K1, x[12] );
    345   1.1  christos       R( c, d, e, a, b, F1, K1, x[13] );
    346   1.1  christos       R( b, c, d, e, a, F1, K1, x[14] );
    347   1.1  christos       R( a, b, c, d, e, F1, K1, x[15] );
    348   1.1  christos       R( e, a, b, c, d, F1, K1, M(16) );
    349   1.1  christos       R( d, e, a, b, c, F1, K1, M(17) );
    350   1.1  christos       R( c, d, e, a, b, F1, K1, M(18) );
    351   1.1  christos       R( b, c, d, e, a, F1, K1, M(19) );
    352   1.1  christos       R( a, b, c, d, e, F2, K2, M(20) );
    353   1.1  christos       R( e, a, b, c, d, F2, K2, M(21) );
    354   1.1  christos       R( d, e, a, b, c, F2, K2, M(22) );
    355   1.1  christos       R( c, d, e, a, b, F2, K2, M(23) );
    356   1.1  christos       R( b, c, d, e, a, F2, K2, M(24) );
    357   1.1  christos       R( a, b, c, d, e, F2, K2, M(25) );
    358   1.1  christos       R( e, a, b, c, d, F2, K2, M(26) );
    359   1.1  christos       R( d, e, a, b, c, F2, K2, M(27) );
    360   1.1  christos       R( c, d, e, a, b, F2, K2, M(28) );
    361   1.1  christos       R( b, c, d, e, a, F2, K2, M(29) );
    362   1.1  christos       R( a, b, c, d, e, F2, K2, M(30) );
    363   1.1  christos       R( e, a, b, c, d, F2, K2, M(31) );
    364   1.1  christos       R( d, e, a, b, c, F2, K2, M(32) );
    365   1.1  christos       R( c, d, e, a, b, F2, K2, M(33) );
    366   1.1  christos       R( b, c, d, e, a, F2, K2, M(34) );
    367   1.1  christos       R( a, b, c, d, e, F2, K2, M(35) );
    368   1.1  christos       R( e, a, b, c, d, F2, K2, M(36) );
    369   1.1  christos       R( d, e, a, b, c, F2, K2, M(37) );
    370   1.1  christos       R( c, d, e, a, b, F2, K2, M(38) );
    371   1.1  christos       R( b, c, d, e, a, F2, K2, M(39) );
    372   1.1  christos       R( a, b, c, d, e, F3, K3, M(40) );
    373   1.1  christos       R( e, a, b, c, d, F3, K3, M(41) );
    374   1.1  christos       R( d, e, a, b, c, F3, K3, M(42) );
    375   1.1  christos       R( c, d, e, a, b, F3, K3, M(43) );
    376   1.1  christos       R( b, c, d, e, a, F3, K3, M(44) );
    377   1.1  christos       R( a, b, c, d, e, F3, K3, M(45) );
    378   1.1  christos       R( e, a, b, c, d, F3, K3, M(46) );
    379   1.1  christos       R( d, e, a, b, c, F3, K3, M(47) );
    380   1.1  christos       R( c, d, e, a, b, F3, K3, M(48) );
    381   1.1  christos       R( b, c, d, e, a, F3, K3, M(49) );
    382   1.1  christos       R( a, b, c, d, e, F3, K3, M(50) );
    383   1.1  christos       R( e, a, b, c, d, F3, K3, M(51) );
    384   1.1  christos       R( d, e, a, b, c, F3, K3, M(52) );
    385   1.1  christos       R( c, d, e, a, b, F3, K3, M(53) );
    386   1.1  christos       R( b, c, d, e, a, F3, K3, M(54) );
    387   1.1  christos       R( a, b, c, d, e, F3, K3, M(55) );
    388   1.1  christos       R( e, a, b, c, d, F3, K3, M(56) );
    389   1.1  christos       R( d, e, a, b, c, F3, K3, M(57) );
    390   1.1  christos       R( c, d, e, a, b, F3, K3, M(58) );
    391   1.1  christos       R( b, c, d, e, a, F3, K3, M(59) );
    392   1.1  christos       R( a, b, c, d, e, F4, K4, M(60) );
    393   1.1  christos       R( e, a, b, c, d, F4, K4, M(61) );
    394   1.1  christos       R( d, e, a, b, c, F4, K4, M(62) );
    395   1.1  christos       R( c, d, e, a, b, F4, K4, M(63) );
    396   1.1  christos       R( b, c, d, e, a, F4, K4, M(64) );
    397   1.1  christos       R( a, b, c, d, e, F4, K4, M(65) );
    398   1.1  christos       R( e, a, b, c, d, F4, K4, M(66) );
    399   1.1  christos       R( d, e, a, b, c, F4, K4, M(67) );
    400   1.1  christos       R( c, d, e, a, b, F4, K4, M(68) );
    401   1.1  christos       R( b, c, d, e, a, F4, K4, M(69) );
    402   1.1  christos       R( a, b, c, d, e, F4, K4, M(70) );
    403   1.1  christos       R( e, a, b, c, d, F4, K4, M(71) );
    404   1.1  christos       R( d, e, a, b, c, F4, K4, M(72) );
    405   1.1  christos       R( c, d, e, a, b, F4, K4, M(73) );
    406   1.1  christos       R( b, c, d, e, a, F4, K4, M(74) );
    407   1.1  christos       R( a, b, c, d, e, F4, K4, M(75) );
    408   1.1  christos       R( e, a, b, c, d, F4, K4, M(76) );
    409   1.1  christos       R( d, e, a, b, c, F4, K4, M(77) );
    410   1.1  christos       R( c, d, e, a, b, F4, K4, M(78) );
    411   1.1  christos       R( b, c, d, e, a, F4, K4, M(79) );
    412   1.1  christos 
    413   1.1  christos       a = ctx->A += a;
    414   1.1  christos       b = ctx->B += b;
    415   1.1  christos       c = ctx->C += c;
    416   1.1  christos       d = ctx->D += d;
    417   1.1  christos       e = ctx->E += e;
    418   1.1  christos     }
    419   1.1  christos }
    420  1.10  christos 
    421  1.10  christos #if defined(HAVE_X86_SHA1_HW_SUPPORT)
    422  1.10  christos /* HW specific version of sha1_process_bytes.  */
    423  1.10  christos 
    424  1.10  christos static void sha1_hw_process_block (const void *, size_t, struct sha1_ctx *);
    425  1.10  christos 
    426  1.10  christos static void
    427  1.10  christos sha1_hw_process_bytes (const void *buffer, size_t len, struct sha1_ctx *ctx)
    428  1.10  christos {
    429  1.10  christos   /* When we already have some bits in our internal buffer concatenate
    430  1.10  christos      both inputs first.  */
    431  1.10  christos   if (ctx->buflen != 0)
    432  1.10  christos     {
    433  1.10  christos       size_t left_over = ctx->buflen;
    434  1.10  christos       size_t add = 128 - left_over > len ? len : 128 - left_over;
    435  1.10  christos 
    436  1.10  christos       memcpy (&((char *) ctx->buffer)[left_over], buffer, add);
    437  1.10  christos       ctx->buflen += add;
    438  1.10  christos 
    439  1.10  christos       if (ctx->buflen > 64)
    440  1.10  christos 	{
    441  1.10  christos 	  sha1_hw_process_block (ctx->buffer, ctx->buflen & ~63, ctx);
    442  1.10  christos 
    443  1.10  christos 	  ctx->buflen &= 63;
    444  1.10  christos 	  /* The regions in the following copy operation cannot overlap.  */
    445  1.10  christos 	  memcpy (ctx->buffer,
    446  1.10  christos 		  &((char *) ctx->buffer)[(left_over + add) & ~63],
    447  1.10  christos 		  ctx->buflen);
    448  1.10  christos 	}
    449  1.10  christos 
    450  1.10  christos       buffer = (const char *) buffer + add;
    451  1.10  christos       len -= add;
    452  1.10  christos     }
    453  1.10  christos 
    454  1.10  christos   /* Process available complete blocks.  */
    455  1.10  christos   if (len >= 64)
    456  1.10  christos     {
    457  1.10  christos #if !_STRING_ARCH_unaligned
    458  1.10  christos # define alignof(type) offsetof (struct { char c; type x; }, x)
    459  1.10  christos # define UNALIGNED_P(p) (((size_t) p) % alignof (sha1_uint32) != 0)
    460  1.10  christos       if (UNALIGNED_P (buffer))
    461  1.10  christos 	while (len > 64)
    462  1.10  christos 	  {
    463  1.10  christos 	    sha1_hw_process_block (memcpy (ctx->buffer, buffer, 64), 64, ctx);
    464  1.10  christos 	    buffer = (const char *) buffer + 64;
    465  1.10  christos 	    len -= 64;
    466  1.10  christos 	  }
    467  1.10  christos       else
    468  1.10  christos #endif
    469  1.10  christos 	{
    470  1.10  christos 	  sha1_hw_process_block (buffer, len & ~63, ctx);
    471  1.10  christos 	  buffer = (const char *) buffer + (len & ~63);
    472  1.10  christos 	  len &= 63;
    473  1.10  christos 	}
    474  1.10  christos     }
    475  1.10  christos 
    476  1.10  christos   /* Move remaining bytes in internal buffer.  */
    477  1.10  christos   if (len > 0)
    478  1.10  christos     {
    479  1.10  christos       size_t left_over = ctx->buflen;
    480  1.10  christos 
    481  1.10  christos       memcpy (&((char *) ctx->buffer)[left_over], buffer, len);
    482  1.10  christos       left_over += len;
    483  1.10  christos       if (left_over >= 64)
    484  1.10  christos 	{
    485  1.10  christos 	  sha1_hw_process_block (ctx->buffer, 64, ctx);
    486  1.10  christos 	  left_over -= 64;
    487  1.10  christos 	  memmove (ctx->buffer, &ctx->buffer[16], left_over);
    488  1.10  christos 	}
    489  1.10  christos       ctx->buflen = left_over;
    490  1.10  christos     }
    491  1.10  christos }
    492  1.10  christos 
    493  1.10  christos /* Process LEN bytes of BUFFER, accumulating context into CTX.
    494  1.10  christos    Using CPU specific intrinsics.  */
    495  1.10  christos 
    496  1.10  christos #ifdef HAVE_X86_SHA1_HW_SUPPORT
    497  1.10  christos __attribute__((__target__ ("sse4.1,sha")))
    498  1.10  christos #endif
    499  1.10  christos static void
    500  1.10  christos sha1_hw_process_block (const void *buffer, size_t len, struct sha1_ctx *ctx)
    501  1.10  christos {
    502  1.10  christos #ifdef HAVE_X86_SHA1_HW_SUPPORT
    503  1.10  christos   /* Implemented from
    504  1.10  christos      https://www.intel.com/content/www/us/en/developer/articles/technical/intel-sha-extensions.html  */
    505  1.10  christos   const __m128i *words = (const __m128i *) buffer;
    506  1.10  christos   const __m128i *endp = (const __m128i *) ((const char *) buffer + len);
    507  1.10  christos   __m128i abcd, abcd_save, e0, e0_save, e1, msg0, msg1, msg2, msg3;
    508  1.10  christos   const __m128i shuf_mask
    509  1.10  christos     = _mm_set_epi64x (0x0001020304050607ULL, 0x08090a0b0c0d0e0fULL);
    510  1.10  christos   char check[((offsetof (struct sha1_ctx, B)
    511  1.10  christos 	     == offsetof (struct sha1_ctx, A) + sizeof (ctx->A))
    512  1.10  christos 		   && (offsetof (struct sha1_ctx, C)
    513  1.10  christos 		       == offsetof (struct sha1_ctx, A) + 2 * sizeof (ctx->A))
    514  1.10  christos 		   && (offsetof (struct sha1_ctx, D)
    515  1.10  christos 		       == offsetof (struct sha1_ctx, A) + 3 * sizeof (ctx->A)))
    516  1.10  christos 		  ? 1 : -1];
    517  1.10  christos 
    518  1.10  christos   /* First increment the byte count.  RFC 1321 specifies the possible
    519  1.10  christos      length of the file up to 2^64 bits.  Here we only compute the
    520  1.10  christos      number of bytes.  Do a double word increment.  */
    521  1.10  christos   ctx->total[0] += len;
    522  1.10  christos   ctx->total[1] += ((len >> 31) >> 1) + (ctx->total[0] < len);
    523  1.10  christos 
    524  1.10  christos   (void) &check[0];
    525  1.10  christos   abcd = _mm_loadu_si128 ((const __m128i *) &ctx->A);
    526  1.10  christos   e0 = _mm_set_epi32 (ctx->E, 0, 0, 0);
    527  1.10  christos   abcd = _mm_shuffle_epi32 (abcd, 0x1b); /* 0, 1, 2, 3 */
    528  1.10  christos 
    529  1.10  christos   while (words < endp)
    530  1.10  christos     {
    531  1.10  christos       abcd_save = abcd;
    532  1.10  christos       e0_save = e0;
    533  1.10  christos 
    534  1.10  christos       /* 0..3 */
    535  1.10  christos       msg0 = _mm_loadu_si128 (words);
    536  1.10  christos       msg0 = _mm_shuffle_epi8 (msg0, shuf_mask);
    537  1.10  christos       e0 = _mm_add_epi32 (e0, msg0);
    538  1.10  christos       e1 = abcd;
    539  1.10  christos       abcd = _mm_sha1rnds4_epu32 (abcd, e0, 0);
    540  1.10  christos 
    541  1.10  christos       /* 4..7 */
    542  1.10  christos       msg1 = _mm_loadu_si128 (words + 1);
    543  1.10  christos       msg1 = _mm_shuffle_epi8 (msg1, shuf_mask);
    544  1.10  christos       e1 = _mm_sha1nexte_epu32 (e1, msg1);
    545  1.10  christos       e0 = abcd;
    546  1.10  christos       abcd = _mm_sha1rnds4_epu32 (abcd, e1, 0);
    547  1.10  christos       msg0 = _mm_sha1msg1_epu32 (msg0, msg1);
    548  1.10  christos 
    549  1.10  christos       /* 8..11 */
    550  1.10  christos       msg2 = _mm_loadu_si128 (words + 2);
    551  1.10  christos       msg2 = _mm_shuffle_epi8 (msg2, shuf_mask);
    552  1.10  christos       e0 = _mm_sha1nexte_epu32 (e0, msg2);
    553  1.10  christos       e1 = abcd;
    554  1.10  christos       abcd = _mm_sha1rnds4_epu32 (abcd, e0, 0);
    555  1.10  christos       msg1 = _mm_sha1msg1_epu32 (msg1, msg2);
    556  1.10  christos       msg0 = _mm_xor_si128 (msg0, msg2);
    557  1.10  christos 
    558  1.10  christos       /* 12..15 */
    559  1.10  christos       msg3 = _mm_loadu_si128 (words + 3);
    560  1.10  christos       msg3 = _mm_shuffle_epi8 (msg3, shuf_mask);
    561  1.10  christos       e1 = _mm_sha1nexte_epu32 (e1, msg3);
    562  1.10  christos       e0 = abcd;
    563  1.10  christos       msg0 = _mm_sha1msg2_epu32 (msg0, msg3);
    564  1.10  christos       abcd = _mm_sha1rnds4_epu32 (abcd, e1, 0);
    565  1.10  christos       msg2 = _mm_sha1msg1_epu32 (msg2, msg3);
    566  1.10  christos       msg1 = _mm_xor_si128 (msg1, msg3);
    567  1.10  christos 
    568  1.10  christos       /* 16..19 */
    569  1.10  christos       e0 = _mm_sha1nexte_epu32 (e0, msg0);
    570  1.10  christos       e1 = abcd;
    571  1.10  christos       msg1 = _mm_sha1msg2_epu32 (msg1, msg0);
    572  1.10  christos       abcd = _mm_sha1rnds4_epu32 (abcd, e0, 0);
    573  1.10  christos       msg3 = _mm_sha1msg1_epu32 (msg3, msg0);
    574  1.10  christos       msg2 = _mm_xor_si128 (msg2, msg0);
    575  1.10  christos 
    576  1.10  christos       /* 20..23 */
    577  1.10  christos       e1 = _mm_sha1nexte_epu32 (e1, msg1);
    578  1.10  christos       e0 = abcd;
    579  1.10  christos       msg2 = _mm_sha1msg2_epu32 (msg2, msg1);
    580  1.10  christos       abcd = _mm_sha1rnds4_epu32 (abcd, e1, 1);
    581  1.10  christos       msg0 = _mm_sha1msg1_epu32 (msg0, msg1);
    582  1.10  christos       msg3 = _mm_xor_si128 (msg3, msg1);
    583  1.10  christos 
    584  1.10  christos       /* 24..27 */
    585  1.10  christos       e0 = _mm_sha1nexte_epu32 (e0, msg2);
    586  1.10  christos       e1 = abcd;
    587  1.10  christos       msg3 = _mm_sha1msg2_epu32 (msg3, msg2);
    588  1.10  christos       abcd = _mm_sha1rnds4_epu32 (abcd, e0, 1);
    589  1.10  christos       msg1 = _mm_sha1msg1_epu32 (msg1, msg2);
    590  1.10  christos       msg0 = _mm_xor_si128 (msg0, msg2);
    591  1.10  christos 
    592  1.10  christos       /* 28..31 */
    593  1.10  christos       e1 = _mm_sha1nexte_epu32 (e1, msg3);
    594  1.10  christos       e0 = abcd;
    595  1.10  christos       msg0 = _mm_sha1msg2_epu32 (msg0, msg3);
    596  1.10  christos       abcd = _mm_sha1rnds4_epu32 (abcd, e1, 1);
    597  1.10  christos       msg2 = _mm_sha1msg1_epu32 (msg2, msg3);
    598  1.10  christos       msg1 = _mm_xor_si128 (msg1, msg3);
    599  1.10  christos 
    600  1.10  christos       /* 32..35 */
    601  1.10  christos       e0 = _mm_sha1nexte_epu32 (e0, msg0);
    602  1.10  christos       e1 = abcd;
    603  1.10  christos       msg1 = _mm_sha1msg2_epu32 (msg1, msg0);
    604  1.10  christos       abcd = _mm_sha1rnds4_epu32 (abcd, e0, 1);
    605  1.10  christos       msg3 = _mm_sha1msg1_epu32 (msg3, msg0);
    606  1.10  christos       msg2 = _mm_xor_si128 (msg2, msg0);
    607  1.10  christos 
    608  1.10  christos       /* 36..39 */
    609  1.10  christos       e1 = _mm_sha1nexte_epu32 (e1, msg1);
    610  1.10  christos       e0 = abcd;
    611  1.10  christos       msg2 = _mm_sha1msg2_epu32 (msg2, msg1);
    612  1.10  christos       abcd = _mm_sha1rnds4_epu32 (abcd, e1, 1);
    613  1.10  christos       msg0 = _mm_sha1msg1_epu32 (msg0, msg1);
    614  1.10  christos       msg3 = _mm_xor_si128 (msg3, msg1);
    615  1.10  christos 
    616  1.10  christos       /* 40..43 */
    617  1.10  christos       e0 = _mm_sha1nexte_epu32 (e0, msg2);
    618  1.10  christos       e1 = abcd;
    619  1.10  christos       msg3 = _mm_sha1msg2_epu32 (msg3, msg2);
    620  1.10  christos       abcd = _mm_sha1rnds4_epu32 (abcd, e0, 2);
    621  1.10  christos       msg1 = _mm_sha1msg1_epu32 (msg1, msg2);
    622  1.10  christos       msg0 = _mm_xor_si128 (msg0, msg2);
    623  1.10  christos 
    624  1.10  christos       /* 44..47 */
    625  1.10  christos       e1 = _mm_sha1nexte_epu32 (e1, msg3);
    626  1.10  christos       e0 = abcd;
    627  1.10  christos       msg0 = _mm_sha1msg2_epu32 (msg0, msg3);
    628  1.10  christos       abcd = _mm_sha1rnds4_epu32 (abcd, e1, 2);
    629  1.10  christos       msg2 = _mm_sha1msg1_epu32 (msg2, msg3);
    630  1.10  christos       msg1 = _mm_xor_si128 (msg1, msg3);
    631  1.10  christos 
    632  1.10  christos       /* 48..51 */
    633  1.10  christos       e0 = _mm_sha1nexte_epu32 (e0, msg0);
    634  1.10  christos       e1 = abcd;
    635  1.10  christos       msg1 = _mm_sha1msg2_epu32 (msg1, msg0);
    636  1.10  christos       abcd = _mm_sha1rnds4_epu32 (abcd, e0, 2);
    637  1.10  christos       msg3 = _mm_sha1msg1_epu32 (msg3, msg0);
    638  1.10  christos       msg2 = _mm_xor_si128 (msg2, msg0);
    639  1.10  christos 
    640  1.10  christos       /* 52..55 */
    641  1.10  christos       e1 = _mm_sha1nexte_epu32 (e1, msg1);
    642  1.10  christos       e0 = abcd;
    643  1.10  christos       msg2 = _mm_sha1msg2_epu32 (msg2, msg1);
    644  1.10  christos       abcd = _mm_sha1rnds4_epu32 (abcd, e1, 2);
    645  1.10  christos       msg0 = _mm_sha1msg1_epu32 (msg0, msg1);
    646  1.10  christos       msg3 = _mm_xor_si128 (msg3, msg1);
    647  1.10  christos 
    648  1.10  christos       /* 56..59 */
    649  1.10  christos       e0 = _mm_sha1nexte_epu32 (e0, msg2);
    650  1.10  christos       e1 = abcd;
    651  1.10  christos       msg3 = _mm_sha1msg2_epu32 (msg3, msg2);
    652  1.10  christos       abcd = _mm_sha1rnds4_epu32 (abcd, e0, 2);
    653  1.10  christos       msg1 = _mm_sha1msg1_epu32 (msg1, msg2);
    654  1.10  christos       msg0 = _mm_xor_si128 (msg0, msg2);
    655  1.10  christos 
    656  1.10  christos       /* 60..63 */
    657  1.10  christos       e1 = _mm_sha1nexte_epu32 (e1, msg3);
    658  1.10  christos       e0 = abcd;
    659  1.10  christos       msg0 = _mm_sha1msg2_epu32 (msg0, msg3);
    660  1.10  christos       abcd = _mm_sha1rnds4_epu32 (abcd, e1, 3);
    661  1.10  christos       msg2 = _mm_sha1msg1_epu32 (msg2, msg3);
    662  1.10  christos       msg1 = _mm_xor_si128 (msg1, msg3);
    663  1.10  christos 
    664  1.10  christos       /* 64..67 */
    665  1.10  christos       e0 = _mm_sha1nexte_epu32 (e0, msg0);
    666  1.10  christos       e1 = abcd;
    667  1.10  christos       msg1 = _mm_sha1msg2_epu32 (msg1, msg0);
    668  1.10  christos       abcd = _mm_sha1rnds4_epu32 (abcd, e0, 3);
    669  1.10  christos       msg3 = _mm_sha1msg1_epu32 (msg3, msg0);
    670  1.10  christos       msg2 = _mm_xor_si128 (msg2, msg0);
    671  1.10  christos 
    672  1.10  christos       /* 68..71 */
    673  1.10  christos       e1 = _mm_sha1nexte_epu32 (e1, msg1);
    674  1.10  christos       e0 = abcd;
    675  1.10  christos       msg2 = _mm_sha1msg2_epu32 (msg2, msg1);
    676  1.10  christos       abcd = _mm_sha1rnds4_epu32 (abcd, e1, 3);
    677  1.10  christos       msg3 = _mm_xor_si128 (msg3, msg1);
    678  1.10  christos 
    679  1.10  christos       /* 72..75 */
    680  1.10  christos       e0 = _mm_sha1nexte_epu32 (e0, msg2);
    681  1.10  christos       e1 = abcd;
    682  1.10  christos       msg3 = _mm_sha1msg2_epu32 (msg3, msg2);
    683  1.10  christos       abcd = _mm_sha1rnds4_epu32 (abcd, e0, 3);
    684  1.10  christos 
    685  1.10  christos       /* 76..79 */
    686  1.10  christos       e1 = _mm_sha1nexte_epu32 (e1, msg3);
    687  1.10  christos       e0 = abcd;
    688  1.10  christos       abcd = _mm_sha1rnds4_epu32 (abcd, e1, 3);
    689  1.10  christos 
    690  1.10  christos       /* Finalize. */
    691  1.10  christos       e0 = _mm_sha1nexte_epu32 (e0, e0_save);
    692  1.10  christos       abcd = _mm_add_epi32 (abcd, abcd_save);
    693  1.10  christos 
    694  1.10  christos       words = words + 4;
    695  1.10  christos     }
    696  1.10  christos 
    697  1.10  christos   abcd = _mm_shuffle_epi32 (abcd, 0x1b); /* 0, 1, 2, 3 */
    698  1.10  christos   _mm_storeu_si128 ((__m128i *) &ctx->A, abcd);
    699  1.10  christos   ctx->E = _mm_extract_epi32 (e0, 3);
    700  1.10  christos #endif
    701  1.10  christos }
    702  1.10  christos #endif
    703  1.10  christos 
    704  1.10  christos /* Return sha1_process_bytes or some hardware optimized version thereof
    705  1.10  christos    depending on current CPU.  */
    706  1.10  christos 
    707  1.10  christos sha1_process_bytes_fn
    708  1.10  christos sha1_choose_process_bytes (void)
    709  1.10  christos {
    710  1.10  christos #ifdef HAVE_X86_SHA1_HW_SUPPORT
    711  1.10  christos   unsigned int eax, ebx, ecx, edx;
    712  1.10  christos   if (__get_cpuid_count (7, 0, &eax, &ebx, &ecx, &edx)
    713  1.10  christos       && (ebx & bit_SHA) != 0
    714  1.10  christos       && __get_cpuid (1, &eax, &ebx, &ecx, &edx)
    715  1.10  christos       && (ecx & bit_SSE4_1) != 0)
    716  1.10  christos     return sha1_hw_process_bytes;
    717  1.10  christos #endif
    718  1.10  christos   return sha1_process_bytes;
    719  1.10  christos }
    720