dist/libiberty/sha1.c

1.1       mrg /* sha1.c - Functions to compute SHA1 message digest of files or
1.1       mrg    memory blocks according to the NIST specification FIPS-180-1.
1.1       mrg
1.9       mrg    Copyright (C) 2000-2024 Free Software Foundation, Inc.
1.1       mrg
1.1       mrg    This program is free software; you can redistribute it and/or modify it
1.1       mrg    under the terms of the GNU General Public License as published by the
1.1       mrg    Free Software Foundation; either version 2, or (at your option) any
1.1       mrg    later version.
1.1       mrg
1.1       mrg    This program is distributed in the hope that it will be useful,
1.1       mrg    but WITHOUT ANY WARRANTY; without even the implied warranty of
1.1       mrg    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
1.1       mrg    GNU General Public License for more details.
1.1       mrg
1.1       mrg    You should have received a copy of the GNU General Public License
1.1       mrg    along with this program; if not, write to the Free Software Foundation,
1.1       mrg    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
1.1       mrg
1.1       mrg /* Written by Scott G. Miller
1.1       mrg    Credits:
1.1       mrg       Robert Klep <robert (at) ilse.nl>  -- Expansion function fix
1.1       mrg */
1.1       mrg
1.1       mrg #include <config.h>
1.1       mrg
1.1       mrg #include "sha1.h"
1.1       mrg
1.1       mrg #include <stddef.h>
1.1       mrg #include <string.h>
1.1       mrg
1.9       mrg #ifdef HAVE_X86_SHA1_HW_SUPPORT
1.9       mrg # include <x86intrin.h>
1.9       mrg # include <cpuid.h>
1.9       mrg #endif
1.9       mrg
1.1       mrg #if USE_UNLOCKED_IO
1.1       mrg # include "unlocked-io.h"
1.1       mrg #endif
1.1       mrg
1.1       mrg #ifdef WORDS_BIGENDIAN
1.1       mrg # define SWAP(n) (n)
1.1       mrg #else
1.1       mrg # define SWAP(n) \
1.1       mrg     (((n) << 24) | (((n) & 0xff00) << 8) | (((n) >> 8) & 0xff00) | ((n) >> 24))
1.1       mrg #endif
1.1       mrg
1.1       mrg #define BLOCKSIZE 4096
1.1       mrg #if BLOCKSIZE % 64 != 0
1.1       mrg # error "invalid BLOCKSIZE"
1.1       mrg #endif
1.1       mrg
1.1       mrg /* This array contains the bytes used to pad the buffer to the next
1.1       mrg    64-byte boundary.  (RFC 1321, 3.1: Step 1)  */
1.1       mrg static const unsigned char fillbuf[64] = { 0x80, 0 /* , 0, 0, ...  */ };
1.1       mrg
1.1       mrg
1.1       mrg /* Take a pointer to a 160 bit block of data (five 32 bit ints) and
1.1       mrg    initialize it to the start constants of the SHA1 algorithm.  This
1.1       mrg    must be called before using hash in the call to sha1_hash.  */
1.1       mrg void
1.1       mrg sha1_init_ctx (struct sha1_ctx *ctx)
1.1       mrg {
1.1       mrg   ctx->A = 0x67452301;
1.1       mrg   ctx->B = 0xefcdab89;
1.1       mrg   ctx->C = 0x98badcfe;
1.1       mrg   ctx->D = 0x10325476;
1.1       mrg   ctx->E = 0xc3d2e1f0;
1.1       mrg
1.1       mrg   ctx->total[0] = ctx->total[1] = 0;
1.1       mrg   ctx->buflen = 0;
1.1       mrg }
1.1       mrg
1.1       mrg /* Put result from CTX in first 20 bytes following RESBUF.  The result
1.1       mrg    must be in little endian byte order.
1.1       mrg
1.1       mrg    IMPORTANT: On some systems it is required that RESBUF is correctly
1.1       mrg    aligned for a 32-bit value.  */
1.1       mrg void *
1.1       mrg sha1_read_ctx (const struct sha1_ctx *ctx, void *resbuf)
1.1       mrg {
1.1       mrg   ((sha1_uint32 *) resbuf)[0] = SWAP (ctx->A);
1.1       mrg   ((sha1_uint32 *) resbuf)[1] = SWAP (ctx->B);
1.1       mrg   ((sha1_uint32 *) resbuf)[2] = SWAP (ctx->C);
1.1       mrg   ((sha1_uint32 *) resbuf)[3] = SWAP (ctx->D);
1.1       mrg   ((sha1_uint32 *) resbuf)[4] = SWAP (ctx->E);
1.1       mrg
1.1       mrg   return resbuf;
1.1       mrg }
1.1       mrg
1.1       mrg /* Process the remaining bytes in the internal buffer and the usual
1.1       mrg    prolog according to the standard and write the result to RESBUF.
1.1       mrg
1.1       mrg    IMPORTANT: On some systems it is required that RESBUF is correctly
1.1       mrg    aligned for a 32-bit value.  */
1.1       mrg void *
1.1       mrg sha1_finish_ctx (struct sha1_ctx *ctx, void *resbuf)
1.1       mrg {
1.1       mrg   /* Take yet unprocessed bytes into account.  */
1.1       mrg   sha1_uint32 bytes = ctx->buflen;
1.1       mrg   size_t size = (bytes < 56) ? 64 / 4 : 64 * 2 / 4;
1.1       mrg
1.1       mrg   /* Now count remaining bytes.  */
1.1       mrg   ctx->total[0] += bytes;
1.1       mrg   if (ctx->total[0] < bytes)
1.1       mrg     ++ctx->total[1];
1.1       mrg
1.1       mrg   /* Put the 64-bit file length in *bits* at the end of the buffer.  */
1.1       mrg   ctx->buffer[size - 2] = SWAP ((ctx->total[1] << 3) | (ctx->total[0] >> 29));
1.1       mrg   ctx->buffer[size - 1] = SWAP (ctx->total[0] << 3);
1.1       mrg
1.1       mrg   memcpy (&((char *) ctx->buffer)[bytes], fillbuf, (size - 2) * 4 - bytes);
1.1       mrg
1.1       mrg   /* Process last bytes.  */
1.1       mrg   sha1_process_block (ctx->buffer, size * 4, ctx);
1.1       mrg
1.1       mrg   return sha1_read_ctx (ctx, resbuf);
1.1       mrg }
1.1       mrg
1.1       mrg /* Compute SHA1 message digest for bytes read from STREAM.  The
1.1       mrg    resulting message digest number will be written into the 16 bytes
1.1       mrg    beginning at RESBLOCK.  */
1.1       mrg int
1.1       mrg sha1_stream (FILE *stream, void *resblock)
1.1       mrg {
1.1       mrg   struct sha1_ctx ctx;
1.1       mrg   char buffer[BLOCKSIZE + 72];
1.1       mrg   size_t sum;
1.1       mrg
1.1       mrg   /* Initialize the computation context.  */
1.1       mrg   sha1_init_ctx (&ctx);
1.1       mrg
1.1       mrg   /* Iterate over full file contents.  */
1.1       mrg   while (1)
1.1       mrg     {
1.1       mrg       /* We read the file in blocks of BLOCKSIZE bytes.  One call of the
1.1       mrg 	 computation function processes the whole buffer so that with the
1.1       mrg 	 next round of the loop another block can be read.  */
1.1       mrg       size_t n;
1.1       mrg       sum = 0;
1.1       mrg
1.1       mrg       /* Read block.  Take care for partial reads.  */
1.1       mrg       while (1)
1.1       mrg 	{
1.1       mrg 	  n = fread (buffer + sum, 1, BLOCKSIZE - sum, stream);
1.1       mrg
1.1       mrg 	  sum += n;
1.1       mrg
1.1       mrg 	  if (sum == BLOCKSIZE)
1.1       mrg 	    break;
1.1       mrg
1.1       mrg 	  if (n == 0)
1.1       mrg 	    {
1.1       mrg 	      /* Check for the error flag IFF N == 0, so that we don't
1.1       mrg 		 exit the loop after a partial read due to e.g., EAGAIN
1.1       mrg 		 or EWOULDBLOCK.  */
1.1       mrg 	      if (ferror (stream))
1.1       mrg 		return 1;
1.1       mrg 	      goto process_partial_block;
1.1       mrg 	    }
1.1       mrg
1.1       mrg 	  /* We've read at least one byte, so ignore errors.  But always
1.1       mrg 	     check for EOF, since feof may be true even though N > 0.
1.1       mrg 	     Otherwise, we could end up calling fread after EOF.  */
1.1       mrg 	  if (feof (stream))
1.1       mrg 	    goto process_partial_block;
1.1       mrg 	}
1.1       mrg
1.1       mrg       /* Process buffer with BLOCKSIZE bytes.  Note that
1.1       mrg 			BLOCKSIZE % 64 == 0
1.1       mrg        */
1.1       mrg       sha1_process_block (buffer, BLOCKSIZE, &ctx);
1.1       mrg     }
1.1       mrg
1.1       mrg  process_partial_block:;
1.1       mrg
1.1       mrg   /* Process any remaining bytes.  */
1.1       mrg   if (sum > 0)
1.1       mrg     sha1_process_bytes (buffer, sum, &ctx);
1.1       mrg
1.1       mrg   /* Construct result in desired memory.  */
1.1       mrg   sha1_finish_ctx (&ctx, resblock);
1.1       mrg   return 0;
1.1       mrg }
1.1       mrg
1.1       mrg /* Compute SHA1 message digest for LEN bytes beginning at BUFFER.  The
1.1       mrg    result is always in little endian byte order, so that a byte-wise
1.1       mrg    output yields to the wanted ASCII representation of the message
1.1       mrg    digest.  */
1.1       mrg void *
1.1       mrg sha1_buffer (const char *buffer, size_t len, void *resblock)
1.1       mrg {
1.1       mrg   struct sha1_ctx ctx;
1.1       mrg
1.1       mrg   /* Initialize the computation context.  */
1.1       mrg   sha1_init_ctx (&ctx);
1.1       mrg
1.1       mrg   /* Process whole buffer but last len % 64 bytes.  */
1.1       mrg   sha1_process_bytes (buffer, len, &ctx);
1.1       mrg
1.1       mrg   /* Put result in desired memory area.  */
1.1       mrg   return sha1_finish_ctx (&ctx, resblock);
1.1       mrg }
1.1       mrg
1.1       mrg void
1.1       mrg sha1_process_bytes (const void *buffer, size_t len, struct sha1_ctx *ctx)
1.1       mrg {
1.1       mrg   /* When we already have some bits in our internal buffer concatenate
1.1       mrg      both inputs first.  */
1.1       mrg   if (ctx->buflen != 0)
1.1       mrg     {
1.1       mrg       size_t left_over = ctx->buflen;
1.1       mrg       size_t add = 128 - left_over > len ? len : 128 - left_over;
1.1       mrg
1.1       mrg       memcpy (&((char *) ctx->buffer)[left_over], buffer, add);
1.1       mrg       ctx->buflen += add;
1.1       mrg
1.1       mrg       if (ctx->buflen > 64)
1.1       mrg 	{
1.1       mrg 	  sha1_process_block (ctx->buffer, ctx->buflen & ~63, ctx);
1.1       mrg
1.1       mrg 	  ctx->buflen &= 63;
1.1       mrg 	  /* The regions in the following copy operation cannot overlap.  */
1.1       mrg 	  memcpy (ctx->buffer,
1.1       mrg 		  &((char *) ctx->buffer)[(left_over + add) & ~63],
1.1       mrg 		  ctx->buflen);
1.1       mrg 	}
1.1       mrg
1.1       mrg       buffer = (const char *) buffer + add;
1.1       mrg       len -= add;
1.1       mrg     }
1.1       mrg
1.1       mrg   /* Process available complete blocks.  */
1.1       mrg   if (len >= 64)
1.1       mrg     {
1.1       mrg #if !_STRING_ARCH_unaligned
1.5  christos # if defined(__clang__) || defined(__GNUC__)
1.2  christos # define alignof(type) __alignof__(type)
1.2  christos # else
1.1       mrg # define alignof(type) offsetof (struct { char c; type x; }, x)
1.2  christos # endif
1.1       mrg # define UNALIGNED_P(p) (((size_t) p) % alignof (sha1_uint32) != 0)
1.1       mrg       if (UNALIGNED_P (buffer))
1.1       mrg 	while (len > 64)
1.1       mrg 	  {
1.1       mrg 	    sha1_process_block (memcpy (ctx->buffer, buffer, 64), 64, ctx);
1.1       mrg 	    buffer = (const char *) buffer + 64;
1.1       mrg 	    len -= 64;
1.1       mrg 	  }
1.1       mrg       else
1.1       mrg #endif
1.1       mrg 	{
1.1       mrg 	  sha1_process_block (buffer, len & ~63, ctx);
1.1       mrg 	  buffer = (const char *) buffer + (len & ~63);
1.1       mrg 	  len &= 63;
1.1       mrg 	}
1.1       mrg     }
1.1       mrg
1.1       mrg   /* Move remaining bytes in internal buffer.  */
1.1       mrg   if (len > 0)
1.1       mrg     {
1.1       mrg       size_t left_over = ctx->buflen;
1.1       mrg
1.1       mrg       memcpy (&((char *) ctx->buffer)[left_over], buffer, len);
1.1       mrg       left_over += len;
1.1       mrg       if (left_over >= 64)
1.1       mrg 	{
1.1       mrg 	  sha1_process_block (ctx->buffer, 64, ctx);
1.1       mrg 	  left_over -= 64;
1.8       mrg 	  memmove (ctx->buffer, &ctx->buffer[16], left_over);
1.1       mrg 	}
1.1       mrg       ctx->buflen = left_over;
1.1       mrg     }
1.1       mrg }
1.1       mrg
1.1       mrg /* --- Code below is the primary difference between md5.c and sha1.c --- */
1.1       mrg
1.1       mrg /* SHA1 round constants */
1.1       mrg #define K1 0x5a827999
1.1       mrg #define K2 0x6ed9eba1
1.1       mrg #define K3 0x8f1bbcdc
1.1       mrg #define K4 0xca62c1d6
1.1       mrg
1.1       mrg /* Round functions.  Note that F2 is the same as F4.  */
1.1       mrg #define F1(B,C,D) ( D ^ ( B & ( C ^ D ) ) )
1.1       mrg #define F2(B,C,D) (B ^ C ^ D)
1.1       mrg #define F3(B,C,D) ( ( B & C ) | ( D & ( B | C ) ) )
1.1       mrg #define F4(B,C,D) (B ^ C ^ D)
1.1       mrg
1.1       mrg /* Process LEN bytes of BUFFER, accumulating context into CTX.
1.1       mrg    It is assumed that LEN % 64 == 0.
1.1       mrg    Most of this code comes from GnuPG's cipher/sha1.c.  */
1.1       mrg
1.1       mrg void
1.1       mrg sha1_process_block (const void *buffer, size_t len, struct sha1_ctx *ctx)
1.1       mrg {
1.1       mrg   const sha1_uint32 *words = (const sha1_uint32*) buffer;
1.1       mrg   size_t nwords = len / sizeof (sha1_uint32);
1.1       mrg   const sha1_uint32 *endp = words + nwords;
1.1       mrg   sha1_uint32 x[16];
1.1       mrg   sha1_uint32 a = ctx->A;
1.1       mrg   sha1_uint32 b = ctx->B;
1.1       mrg   sha1_uint32 c = ctx->C;
1.1       mrg   sha1_uint32 d = ctx->D;
1.1       mrg   sha1_uint32 e = ctx->E;
1.1       mrg
1.1       mrg   /* First increment the byte count.  RFC 1321 specifies the possible
1.1       mrg      length of the file up to 2^64 bits.  Here we only compute the
1.1       mrg      number of bytes.  Do a double word increment.  */
1.1       mrg   ctx->total[0] += len;
1.2  christos   ctx->total[1] += ((len >> 31) >> 1) + (ctx->total[0] < len);
1.1       mrg
1.1       mrg #define rol(x, n) (((x) << (n)) | ((sha1_uint32) (x) >> (32 - (n))))
1.1       mrg
1.1       mrg #define M(I) ( tm =   x[I&0x0f] ^ x[(I-14)&0x0f] \
1.1       mrg 		    ^ x[(I-8)&0x0f] ^ x[(I-3)&0x0f] \
1.1       mrg 	       , (x[I&0x0f] = rol(tm, 1)) )
1.1       mrg
1.1       mrg #define R(A,B,C,D,E,F,K,M)  do { E += rol( A, 5 )     \
1.1       mrg 				      + F( B, C, D )  \
1.1       mrg 				      + K	      \
1.1       mrg 				      + M;	      \
1.1       mrg 				 B = rol( B, 30 );    \
1.1       mrg 			       } while(0)
1.1       mrg
1.1       mrg   while (words < endp)
1.1       mrg     {
1.1       mrg       sha1_uint32 tm;
1.1       mrg       int t;
1.1       mrg       for (t = 0; t < 16; t++)
1.1       mrg 	{
1.1       mrg 	  x[t] = SWAP (*words);
1.1       mrg 	  words++;
1.1       mrg 	}
1.1       mrg
1.1       mrg       R( a, b, c, d, e, F1, K1, x[ 0] );
1.1       mrg       R( e, a, b, c, d, F1, K1, x[ 1] );
1.1       mrg       R( d, e, a, b, c, F1, K1, x[ 2] );
1.1       mrg       R( c, d, e, a, b, F1, K1, x[ 3] );
1.1       mrg       R( b, c, d, e, a, F1, K1, x[ 4] );
1.1       mrg       R( a, b, c, d, e, F1, K1, x[ 5] );
1.1       mrg       R( e, a, b, c, d, F1, K1, x[ 6] );
1.1       mrg       R( d, e, a, b, c, F1, K1, x[ 7] );
1.1       mrg       R( c, d, e, a, b, F1, K1, x[ 8] );
1.1       mrg       R( b, c, d, e, a, F1, K1, x[ 9] );
1.1       mrg       R( a, b, c, d, e, F1, K1, x[10] );
1.1       mrg       R( e, a, b, c, d, F1, K1, x[11] );
1.1       mrg       R( d, e, a, b, c, F1, K1, x[12] );
1.1       mrg       R( c, d, e, a, b, F1, K1, x[13] );
1.1       mrg       R( b, c, d, e, a, F1, K1, x[14] );
1.1       mrg       R( a, b, c, d, e, F1, K1, x[15] );
1.1       mrg       R( e, a, b, c, d, F1, K1, M(16) );
1.1       mrg       R( d, e, a, b, c, F1, K1, M(17) );
1.1       mrg       R( c, d, e, a, b, F1, K1, M(18) );
1.1       mrg       R( b, c, d, e, a, F1, K1, M(19) );
1.1       mrg       R( a, b, c, d, e, F2, K2, M(20) );
1.1       mrg       R( e, a, b, c, d, F2, K2, M(21) );
1.1       mrg       R( d, e, a, b, c, F2, K2, M(22) );
1.1       mrg       R( c, d, e, a, b, F2, K2, M(23) );
1.1       mrg       R( b, c, d, e, a, F2, K2, M(24) );
1.1       mrg       R( a, b, c, d, e, F2, K2, M(25) );
1.1       mrg       R( e, a, b, c, d, F2, K2, M(26) );
1.1       mrg       R( d, e, a, b, c, F2, K2, M(27) );
1.1       mrg       R( c, d, e, a, b, F2, K2, M(28) );
1.1       mrg       R( b, c, d, e, a, F2, K2, M(29) );
1.1       mrg       R( a, b, c, d, e, F2, K2, M(30) );
1.1       mrg       R( e, a, b, c, d, F2, K2, M(31) );
1.1       mrg       R( d, e, a, b, c, F2, K2, M(32) );
1.1       mrg       R( c, d, e, a, b, F2, K2, M(33) );
1.1       mrg       R( b, c, d, e, a, F2, K2, M(34) );
1.1       mrg       R( a, b, c, d, e, F2, K2, M(35) );
1.1       mrg       R( e, a, b, c, d, F2, K2, M(36) );
1.1       mrg       R( d, e, a, b, c, F2, K2, M(37) );
1.1       mrg       R( c, d, e, a, b, F2, K2, M(38) );
1.1       mrg       R( b, c, d, e, a, F2, K2, M(39) );
1.1       mrg       R( a, b, c, d, e, F3, K3, M(40) );
1.1       mrg       R( e, a, b, c, d, F3, K3, M(41) );
1.1       mrg       R( d, e, a, b, c, F3, K3, M(42) );
1.1       mrg       R( c, d, e, a, b, F3, K3, M(43) );
1.1       mrg       R( b, c, d, e, a, F3, K3, M(44) );
1.1       mrg       R( a, b, c, d, e, F3, K3, M(45) );
1.1       mrg       R( e, a, b, c, d, F3, K3, M(46) );
1.1       mrg       R( d, e, a, b, c, F3, K3, M(47) );
1.1       mrg       R( c, d, e, a, b, F3, K3, M(48) );
1.1       mrg       R( b, c, d, e, a, F3, K3, M(49) );
1.1       mrg       R( a, b, c, d, e, F3, K3, M(50) );
1.1       mrg       R( e, a, b, c, d, F3, K3, M(51) );
1.1       mrg       R( d, e, a, b, c, F3, K3, M(52) );
1.1       mrg       R( c, d, e, a, b, F3, K3, M(53) );
1.1       mrg       R( b, c, d, e, a, F3, K3, M(54) );
1.1       mrg       R( a, b, c, d, e, F3, K3, M(55) );
1.1       mrg       R( e, a, b, c, d, F3, K3, M(56) );
1.1       mrg       R( d, e, a, b, c, F3, K3, M(57) );
1.1       mrg       R( c, d, e, a, b, F3, K3, M(58) );
1.1       mrg       R( b, c, d, e, a, F3, K3, M(59) );
1.1       mrg       R( a, b, c, d, e, F4, K4, M(60) );
1.1       mrg       R( e, a, b, c, d, F4, K4, M(61) );
1.1       mrg       R( d, e, a, b, c, F4, K4, M(62) );
1.1       mrg       R( c, d, e, a, b, F4, K4, M(63) );
1.1       mrg       R( b, c, d, e, a, F4, K4, M(64) );
1.1       mrg       R( a, b, c, d, e, F4, K4, M(65) );
1.1       mrg       R( e, a, b, c, d, F4, K4, M(66) );
1.1       mrg       R( d, e, a, b, c, F4, K4, M(67) );
1.1       mrg       R( c, d, e, a, b, F4, K4, M(68) );
1.1       mrg       R( b, c, d, e, a, F4, K4, M(69) );
1.1       mrg       R( a, b, c, d, e, F4, K4, M(70) );
1.1       mrg       R( e, a, b, c, d, F4, K4, M(71) );
1.1       mrg       R( d, e, a, b, c, F4, K4, M(72) );
1.1       mrg       R( c, d, e, a, b, F4, K4, M(73) );
1.1       mrg       R( b, c, d, e, a, F4, K4, M(74) );
1.1       mrg       R( a, b, c, d, e, F4, K4, M(75) );
1.1       mrg       R( e, a, b, c, d, F4, K4, M(76) );
1.1       mrg       R( d, e, a, b, c, F4, K4, M(77) );
1.1       mrg       R( c, d, e, a, b, F4, K4, M(78) );
1.1       mrg       R( b, c, d, e, a, F4, K4, M(79) );
1.1       mrg
1.1       mrg       a = ctx->A += a;
1.1       mrg       b = ctx->B += b;
1.1       mrg       c = ctx->C += c;
1.1       mrg       d = ctx->D += d;
1.1       mrg       e = ctx->E += e;
1.1       mrg     }
1.1       mrg }
1.9       mrg
1.9       mrg #if defined(HAVE_X86_SHA1_HW_SUPPORT)
1.9       mrg /* HW specific version of sha1_process_bytes.  */
1.9       mrg
1.9       mrg static void sha1_hw_process_block (const void *, size_t, struct sha1_ctx *);
1.9       mrg
1.9       mrg static void
1.9       mrg sha1_hw_process_bytes (const void *buffer, size_t len, struct sha1_ctx *ctx)
1.9       mrg {
1.9       mrg   /* When we already have some bits in our internal buffer concatenate
1.9       mrg      both inputs first.  */
1.9       mrg   if (ctx->buflen != 0)
1.9       mrg     {
1.9       mrg       size_t left_over = ctx->buflen;
1.9       mrg       size_t add = 128 - left_over > len ? len : 128 - left_over;
1.9       mrg
1.9       mrg       memcpy (&((char *) ctx->buffer)[left_over], buffer, add);
1.9       mrg       ctx->buflen += add;
1.9       mrg
1.9       mrg       if (ctx->buflen > 64)
1.9       mrg 	{
1.9       mrg 	  sha1_hw_process_block (ctx->buffer, ctx->buflen & ~63, ctx);
1.9       mrg
1.9       mrg 	  ctx->buflen &= 63;
1.9       mrg 	  /* The regions in the following copy operation cannot overlap.  */
1.9       mrg 	  memcpy (ctx->buffer,
1.9       mrg 		  &((char *) ctx->buffer)[(left_over + add) & ~63],
1.9       mrg 		  ctx->buflen);
1.9       mrg 	}
1.9       mrg
1.9       mrg       buffer = (const char *) buffer + add;
1.9       mrg       len -= add;
1.9       mrg     }
1.9       mrg
1.9       mrg   /* Process available complete blocks.  */
1.9       mrg   if (len >= 64)
1.9       mrg     {
1.9       mrg #if !_STRING_ARCH_unaligned
1.9       mrg # define alignof(type) offsetof (struct { char c; type x; }, x)
1.9       mrg # define UNALIGNED_P(p) (((size_t) p) % alignof (sha1_uint32) != 0)
1.9       mrg       if (UNALIGNED_P (buffer))
1.9       mrg 	while (len > 64)
1.9       mrg 	  {
1.9       mrg 	    sha1_hw_process_block (memcpy (ctx->buffer, buffer, 64), 64, ctx);
1.9       mrg 	    buffer = (const char *) buffer + 64;
1.9       mrg 	    len -= 64;
1.9       mrg 	  }
1.9       mrg       else
1.9       mrg #endif
1.9       mrg 	{
1.9       mrg 	  sha1_hw_process_block (buffer, len & ~63, ctx);
1.9       mrg 	  buffer = (const char *) buffer + (len & ~63);
1.9       mrg 	  len &= 63;
1.9       mrg 	}
1.9       mrg     }
1.9       mrg
1.9       mrg   /* Move remaining bytes in internal buffer.  */
1.9       mrg   if (len > 0)
1.9       mrg     {
1.9       mrg       size_t left_over = ctx->buflen;
1.9       mrg
1.9       mrg       memcpy (&((char *) ctx->buffer)[left_over], buffer, len);
1.9       mrg       left_over += len;
1.9       mrg       if (left_over >= 64)
1.9       mrg 	{
1.9       mrg 	  sha1_hw_process_block (ctx->buffer, 64, ctx);
1.9       mrg 	  left_over -= 64;
1.9       mrg 	  memmove (ctx->buffer, &ctx->buffer[16], left_over);
1.9       mrg 	}
1.9       mrg       ctx->buflen = left_over;
1.9       mrg     }
1.9       mrg }
1.9       mrg
1.9       mrg /* Process LEN bytes of BUFFER, accumulating context into CTX.
1.9       mrg    Using CPU specific intrinsics.  */
1.9       mrg
1.9       mrg #ifdef HAVE_X86_SHA1_HW_SUPPORT
1.9       mrg __attribute__((__target__ ("sse4.1,sha")))
1.9       mrg #endif
1.9       mrg static void
1.9       mrg sha1_hw_process_block (const void *buffer, size_t len, struct sha1_ctx *ctx)
1.9       mrg {
1.9       mrg #ifdef HAVE_X86_SHA1_HW_SUPPORT
1.9       mrg   /* Implemented from
1.9       mrg      https://www.intel.com/content/www/us/en/developer/articles/technical/intel-sha-extensions.html  */
1.9       mrg   const __m128i *words = (const __m128i *) buffer;
1.9       mrg   const __m128i *endp = (const __m128i *) ((const char *) buffer + len);
1.9       mrg   __m128i abcd, abcd_save, e0, e0_save, e1, msg0, msg1, msg2, msg3;
1.9       mrg   const __m128i shuf_mask
1.9       mrg     = _mm_set_epi64x (0x0001020304050607ULL, 0x08090a0b0c0d0e0fULL);
1.9       mrg   char check[((offsetof (struct sha1_ctx, B)
1.9       mrg 	     == offsetof (struct sha1_ctx, A) + sizeof (ctx->A))
1.9       mrg 		   && (offsetof (struct sha1_ctx, C)
1.9       mrg 		       == offsetof (struct sha1_ctx, A) + 2 * sizeof (ctx->A))
1.9       mrg 		   && (offsetof (struct sha1_ctx, D)
1.9       mrg 		       == offsetof (struct sha1_ctx, A) + 3 * sizeof (ctx->A)))
1.9       mrg 		  ? 1 : -1];
1.9       mrg
1.9       mrg   /* First increment the byte count.  RFC 1321 specifies the possible
1.9       mrg      length of the file up to 2^64 bits.  Here we only compute the
1.9       mrg      number of bytes.  Do a double word increment.  */
1.9       mrg   ctx->total[0] += len;
1.9       mrg   ctx->total[1] += ((len >> 31) >> 1) + (ctx->total[0] < len);
1.9       mrg
1.9       mrg   (void) &check[0];
1.9       mrg   abcd = _mm_loadu_si128 ((const __m128i *) &ctx->A);
1.9       mrg   e0 = _mm_set_epi32 (ctx->E, 0, 0, 0);
1.9       mrg   abcd = _mm_shuffle_epi32 (abcd, 0x1b); /* 0, 1, 2, 3 */
1.9       mrg
1.9       mrg   while (words < endp)
1.9       mrg     {
1.9       mrg       abcd_save = abcd;
1.9       mrg       e0_save = e0;
1.9       mrg
1.9       mrg       /* 0..3 */
1.9       mrg       msg0 = _mm_loadu_si128 (words);
1.9       mrg       msg0 = _mm_shuffle_epi8 (msg0, shuf_mask);
1.9       mrg       e0 = _mm_add_epi32 (e0, msg0);
1.9       mrg       e1 = abcd;
1.9       mrg       abcd = _mm_sha1rnds4_epu32 (abcd, e0, 0);
1.9       mrg
1.9       mrg       /* 4..7 */
1.9       mrg       msg1 = _mm_loadu_si128 (words + 1);
1.9       mrg       msg1 = _mm_shuffle_epi8 (msg1, shuf_mask);
1.9       mrg       e1 = _mm_sha1nexte_epu32 (e1, msg1);
1.9       mrg       e0 = abcd;
1.9       mrg       abcd = _mm_sha1rnds4_epu32 (abcd, e1, 0);
1.9       mrg       msg0 = _mm_sha1msg1_epu32 (msg0, msg1);
1.9       mrg
1.9       mrg       /* 8..11 */
1.9       mrg       msg2 = _mm_loadu_si128 (words + 2);
1.9       mrg       msg2 = _mm_shuffle_epi8 (msg2, shuf_mask);
1.9       mrg       e0 = _mm_sha1nexte_epu32 (e0, msg2);
1.9       mrg       e1 = abcd;
1.9       mrg       abcd = _mm_sha1rnds4_epu32 (abcd, e0, 0);
1.9       mrg       msg1 = _mm_sha1msg1_epu32 (msg1, msg2);
1.9       mrg       msg0 = _mm_xor_si128 (msg0, msg2);
1.9       mrg
1.9       mrg       /* 12..15 */
1.9       mrg       msg3 = _mm_loadu_si128 (words + 3);
1.9       mrg       msg3 = _mm_shuffle_epi8 (msg3, shuf_mask);
1.9       mrg       e1 = _mm_sha1nexte_epu32 (e1, msg3);
1.9       mrg       e0 = abcd;
1.9       mrg       msg0 = _mm_sha1msg2_epu32 (msg0, msg3);
1.9       mrg       abcd = _mm_sha1rnds4_epu32 (abcd, e1, 0);
1.9       mrg       msg2 = _mm_sha1msg1_epu32 (msg2, msg3);
1.9       mrg       msg1 = _mm_xor_si128 (msg1, msg3);
1.9       mrg
1.9       mrg       /* 16..19 */
1.9       mrg       e0 = _mm_sha1nexte_epu32 (e0, msg0);
1.9       mrg       e1 = abcd;
1.9       mrg       msg1 = _mm_sha1msg2_epu32 (msg1, msg0);
1.9       mrg       abcd = _mm_sha1rnds4_epu32 (abcd, e0, 0);
1.9       mrg       msg3 = _mm_sha1msg1_epu32 (msg3, msg0);
1.9       mrg       msg2 = _mm_xor_si128 (msg2, msg0);
1.9       mrg
1.9       mrg       /* 20..23 */
1.9       mrg       e1 = _mm_sha1nexte_epu32 (e1, msg1);
1.9       mrg       e0 = abcd;
1.9       mrg       msg2 = _mm_sha1msg2_epu32 (msg2, msg1);
1.9       mrg       abcd = _mm_sha1rnds4_epu32 (abcd, e1, 1);
1.9       mrg       msg0 = _mm_sha1msg1_epu32 (msg0, msg1);
1.9       mrg       msg3 = _mm_xor_si128 (msg3, msg1);
1.9       mrg
1.9       mrg       /* 24..27 */
1.9       mrg       e0 = _mm_sha1nexte_epu32 (e0, msg2);
1.9       mrg       e1 = abcd;
1.9       mrg       msg3 = _mm_sha1msg2_epu32 (msg3, msg2);
1.9       mrg       abcd = _mm_sha1rnds4_epu32 (abcd, e0, 1);
1.9       mrg       msg1 = _mm_sha1msg1_epu32 (msg1, msg2);
1.9       mrg       msg0 = _mm_xor_si128 (msg0, msg2);
1.9       mrg
1.9       mrg       /* 28..31 */
1.9       mrg       e1 = _mm_sha1nexte_epu32 (e1, msg3);
1.9       mrg       e0 = abcd;
1.9       mrg       msg0 = _mm_sha1msg2_epu32 (msg0, msg3);
1.9       mrg       abcd = _mm_sha1rnds4_epu32 (abcd, e1, 1);
1.9       mrg       msg2 = _mm_sha1msg1_epu32 (msg2, msg3);
1.9       mrg       msg1 = _mm_xor_si128 (msg1, msg3);
1.9       mrg
1.9       mrg       /* 32..35 */
1.9       mrg       e0 = _mm_sha1nexte_epu32 (e0, msg0);
1.9       mrg       e1 = abcd;
1.9       mrg       msg1 = _mm_sha1msg2_epu32 (msg1, msg0);
1.9       mrg       abcd = _mm_sha1rnds4_epu32 (abcd, e0, 1);
1.9       mrg       msg3 = _mm_sha1msg1_epu32 (msg3, msg0);
1.9       mrg       msg2 = _mm_xor_si128 (msg2, msg0);
1.9       mrg
1.9       mrg       /* 36..39 */
1.9       mrg       e1 = _mm_sha1nexte_epu32 (e1, msg1);
1.9       mrg       e0 = abcd;
1.9       mrg       msg2 = _mm_sha1msg2_epu32 (msg2, msg1);
1.9       mrg       abcd = _mm_sha1rnds4_epu32 (abcd, e1, 1);
1.9       mrg       msg0 = _mm_sha1msg1_epu32 (msg0, msg1);
1.9       mrg       msg3 = _mm_xor_si128 (msg3, msg1);
1.9       mrg
1.9       mrg       /* 40..43 */
1.9       mrg       e0 = _mm_sha1nexte_epu32 (e0, msg2);
1.9       mrg       e1 = abcd;
1.9       mrg       msg3 = _mm_sha1msg2_epu32 (msg3, msg2);
1.9       mrg       abcd = _mm_sha1rnds4_epu32 (abcd, e0, 2);
1.9       mrg       msg1 = _mm_sha1msg1_epu32 (msg1, msg2);
1.9       mrg       msg0 = _mm_xor_si128 (msg0, msg2);
1.9       mrg
1.9       mrg       /* 44..47 */
1.9       mrg       e1 = _mm_sha1nexte_epu32 (e1, msg3);
1.9       mrg       e0 = abcd;
1.9       mrg       msg0 = _mm_sha1msg2_epu32 (msg0, msg3);
1.9       mrg       abcd = _mm_sha1rnds4_epu32 (abcd, e1, 2);
1.9       mrg       msg2 = _mm_sha1msg1_epu32 (msg2, msg3);
1.9       mrg       msg1 = _mm_xor_si128 (msg1, msg3);
1.9       mrg
1.9       mrg       /* 48..51 */
1.9       mrg       e0 = _mm_sha1nexte_epu32 (e0, msg0);
1.9       mrg       e1 = abcd;
1.9       mrg       msg1 = _mm_sha1msg2_epu32 (msg1, msg0);
1.9       mrg       abcd = _mm_sha1rnds4_epu32 (abcd, e0, 2);
1.9       mrg       msg3 = _mm_sha1msg1_epu32 (msg3, msg0);
1.9       mrg       msg2 = _mm_xor_si128 (msg2, msg0);
1.9       mrg
1.9       mrg       /* 52..55 */
1.9       mrg       e1 = _mm_sha1nexte_epu32 (e1, msg1);
1.9       mrg       e0 = abcd;
1.9       mrg       msg2 = _mm_sha1msg2_epu32 (msg2, msg1);
1.9       mrg       abcd = _mm_sha1rnds4_epu32 (abcd, e1, 2);
1.9       mrg       msg0 = _mm_sha1msg1_epu32 (msg0, msg1);
1.9       mrg       msg3 = _mm_xor_si128 (msg3, msg1);
1.9       mrg
1.9       mrg       /* 56..59 */
1.9       mrg       e0 = _mm_sha1nexte_epu32 (e0, msg2);
1.9       mrg       e1 = abcd;
1.9       mrg       msg3 = _mm_sha1msg2_epu32 (msg3, msg2);
1.9       mrg       abcd = _mm_sha1rnds4_epu32 (abcd, e0, 2);
1.9       mrg       msg1 = _mm_sha1msg1_epu32 (msg1, msg2);
1.9       mrg       msg0 = _mm_xor_si128 (msg0, msg2);
1.9       mrg
1.9       mrg       /* 60..63 */
1.9       mrg       e1 = _mm_sha1nexte_epu32 (e1, msg3);
1.9       mrg       e0 = abcd;
1.9       mrg       msg0 = _mm_sha1msg2_epu32 (msg0, msg3);
1.9       mrg       abcd = _mm_sha1rnds4_epu32 (abcd, e1, 3);
1.9       mrg       msg2 = _mm_sha1msg1_epu32 (msg2, msg3);
1.9       mrg       msg1 = _mm_xor_si128 (msg1, msg3);
1.9       mrg
1.9       mrg       /* 64..67 */
1.9       mrg       e0 = _mm_sha1nexte_epu32 (e0, msg0);
1.9       mrg       e1 = abcd;
1.9       mrg       msg1 = _mm_sha1msg2_epu32 (msg1, msg0);
1.9       mrg       abcd = _mm_sha1rnds4_epu32 (abcd, e0, 3);
1.9       mrg       msg3 = _mm_sha1msg1_epu32 (msg3, msg0);
1.9       mrg       msg2 = _mm_xor_si128 (msg2, msg0);
1.9       mrg
1.9       mrg       /* 68..71 */
1.9       mrg       e1 = _mm_sha1nexte_epu32 (e1, msg1);
1.9       mrg       e0 = abcd;
1.9       mrg       msg2 = _mm_sha1msg2_epu32 (msg2, msg1);
1.9       mrg       abcd = _mm_sha1rnds4_epu32 (abcd, e1, 3);
1.9       mrg       msg3 = _mm_xor_si128 (msg3, msg1);
1.9       mrg
1.9       mrg       /* 72..75 */
1.9       mrg       e0 = _mm_sha1nexte_epu32 (e0, msg2);
1.9       mrg       e1 = abcd;
1.9       mrg       msg3 = _mm_sha1msg2_epu32 (msg3, msg2);
1.9       mrg       abcd = _mm_sha1rnds4_epu32 (abcd, e0, 3);
1.9       mrg
1.9       mrg       /* 76..79 */
1.9       mrg       e1 = _mm_sha1nexte_epu32 (e1, msg3);
1.9       mrg       e0 = abcd;
1.9       mrg       abcd = _mm_sha1rnds4_epu32 (abcd, e1, 3);
1.9       mrg
1.9       mrg       /* Finalize. */
1.9       mrg       e0 = _mm_sha1nexte_epu32 (e0, e0_save);
1.9       mrg       abcd = _mm_add_epi32 (abcd, abcd_save);
1.9       mrg
1.9       mrg       words = words + 4;
1.9       mrg     }
1.9       mrg
1.9       mrg   abcd = _mm_shuffle_epi32 (abcd, 0x1b); /* 0, 1, 2, 3 */
1.9       mrg   _mm_storeu_si128 ((__m128i *) &ctx->A, abcd);
1.9       mrg   ctx->E = _mm_extract_epi32 (e0, 3);
1.9       mrg #endif
1.9       mrg }
1.9       mrg #endif
1.9       mrg
1.9       mrg /* Return sha1_process_bytes or some hardware optimized version thereof
1.9       mrg    depending on current CPU.  */
1.9       mrg
1.9       mrg sha1_process_bytes_fn
1.9       mrg sha1_choose_process_bytes (void)
1.9       mrg {
1.9       mrg #ifdef HAVE_X86_SHA1_HW_SUPPORT
1.9       mrg   unsigned int eax, ebx, ecx, edx;
1.9       mrg   if (__get_cpuid_count (7, 0, &eax, &ebx, &ecx, &edx)
1.9       mrg       && (ebx & bit_SHA) != 0
1.9       mrg       && __get_cpuid (1, &eax, &ebx, &ecx, &edx)
1.9       mrg       && (ecx & bit_SSE4_1) != 0)
1.9       mrg     return sha1_hw_process_bytes;
1.9       mrg #endif
1.9       mrg   return sha1_process_bytes;
1.9       mrg }