1 1.1 mrg /* sha1.c - Functions to compute SHA1 message digest of files or 2 1.1 mrg memory blocks according to the NIST specification FIPS-180-1. 3 1.1 mrg 4 1.9 mrg Copyright (C) 2000-2024 Free Software Foundation, Inc. 5 1.1 mrg 6 1.1 mrg This program is free software; you can redistribute it and/or modify it 7 1.1 mrg under the terms of the GNU General Public License as published by the 8 1.1 mrg Free Software Foundation; either version 2, or (at your option) any 9 1.1 mrg later version. 10 1.1 mrg 11 1.1 mrg This program is distributed in the hope that it will be useful, 12 1.1 mrg but WITHOUT ANY WARRANTY; without even the implied warranty of 13 1.1 mrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 1.1 mrg GNU General Public License for more details. 15 1.1 mrg 16 1.1 mrg You should have received a copy of the GNU General Public License 17 1.1 mrg along with this program; if not, write to the Free Software Foundation, 18 1.1 mrg Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ 19 1.1 mrg 20 1.1 mrg /* Written by Scott G. Miller 21 1.1 mrg Credits: 22 1.1 mrg Robert Klep <robert (at) ilse.nl> -- Expansion function fix 23 1.1 mrg */ 24 1.1 mrg 25 1.1 mrg #include <config.h> 26 1.1 mrg 27 1.1 mrg #include "sha1.h" 28 1.1 mrg 29 1.1 mrg #include <stddef.h> 30 1.1 mrg #include <string.h> 31 1.1 mrg 32 1.9 mrg #ifdef HAVE_X86_SHA1_HW_SUPPORT 33 1.9 mrg # include <x86intrin.h> 34 1.9 mrg # include <cpuid.h> 35 1.9 mrg #endif 36 1.9 mrg 37 1.1 mrg #if USE_UNLOCKED_IO 38 1.1 mrg # include "unlocked-io.h" 39 1.1 mrg #endif 40 1.1 mrg 41 1.1 mrg #ifdef WORDS_BIGENDIAN 42 1.1 mrg # define SWAP(n) (n) 43 1.1 mrg #else 44 1.1 mrg # define SWAP(n) \ 45 1.1 mrg (((n) << 24) | (((n) & 0xff00) << 8) | (((n) >> 8) & 0xff00) | ((n) >> 24)) 46 1.1 mrg #endif 47 1.1 mrg 48 1.1 mrg #define BLOCKSIZE 4096 49 1.1 mrg #if BLOCKSIZE % 64 != 0 50 1.1 mrg # error "invalid BLOCKSIZE" 51 1.1 mrg #endif 52 1.1 mrg 53 1.1 mrg /* This array contains the bytes used to pad the buffer to the next 54 1.1 mrg 64-byte boundary. (RFC 1321, 3.1: Step 1) */ 55 1.1 mrg static const unsigned char fillbuf[64] = { 0x80, 0 /* , 0, 0, ... */ }; 56 1.1 mrg 57 1.1 mrg 58 1.1 mrg /* Take a pointer to a 160 bit block of data (five 32 bit ints) and 59 1.1 mrg initialize it to the start constants of the SHA1 algorithm. This 60 1.1 mrg must be called before using hash in the call to sha1_hash. */ 61 1.1 mrg void 62 1.1 mrg sha1_init_ctx (struct sha1_ctx *ctx) 63 1.1 mrg { 64 1.1 mrg ctx->A = 0x67452301; 65 1.1 mrg ctx->B = 0xefcdab89; 66 1.1 mrg ctx->C = 0x98badcfe; 67 1.1 mrg ctx->D = 0x10325476; 68 1.1 mrg ctx->E = 0xc3d2e1f0; 69 1.1 mrg 70 1.1 mrg ctx->total[0] = ctx->total[1] = 0; 71 1.1 mrg ctx->buflen = 0; 72 1.1 mrg } 73 1.1 mrg 74 1.1 mrg /* Put result from CTX in first 20 bytes following RESBUF. The result 75 1.1 mrg must be in little endian byte order. 76 1.1 mrg 77 1.1 mrg IMPORTANT: On some systems it is required that RESBUF is correctly 78 1.1 mrg aligned for a 32-bit value. */ 79 1.1 mrg void * 80 1.1 mrg sha1_read_ctx (const struct sha1_ctx *ctx, void *resbuf) 81 1.1 mrg { 82 1.1 mrg ((sha1_uint32 *) resbuf)[0] = SWAP (ctx->A); 83 1.1 mrg ((sha1_uint32 *) resbuf)[1] = SWAP (ctx->B); 84 1.1 mrg ((sha1_uint32 *) resbuf)[2] = SWAP (ctx->C); 85 1.1 mrg ((sha1_uint32 *) resbuf)[3] = SWAP (ctx->D); 86 1.1 mrg ((sha1_uint32 *) resbuf)[4] = SWAP (ctx->E); 87 1.1 mrg 88 1.1 mrg return resbuf; 89 1.1 mrg } 90 1.1 mrg 91 1.1 mrg /* Process the remaining bytes in the internal buffer and the usual 92 1.1 mrg prolog according to the standard and write the result to RESBUF. 93 1.1 mrg 94 1.1 mrg IMPORTANT: On some systems it is required that RESBUF is correctly 95 1.1 mrg aligned for a 32-bit value. */ 96 1.1 mrg void * 97 1.1 mrg sha1_finish_ctx (struct sha1_ctx *ctx, void *resbuf) 98 1.1 mrg { 99 1.1 mrg /* Take yet unprocessed bytes into account. */ 100 1.1 mrg sha1_uint32 bytes = ctx->buflen; 101 1.1 mrg size_t size = (bytes < 56) ? 64 / 4 : 64 * 2 / 4; 102 1.1 mrg 103 1.1 mrg /* Now count remaining bytes. */ 104 1.1 mrg ctx->total[0] += bytes; 105 1.1 mrg if (ctx->total[0] < bytes) 106 1.1 mrg ++ctx->total[1]; 107 1.1 mrg 108 1.1 mrg /* Put the 64-bit file length in *bits* at the end of the buffer. */ 109 1.1 mrg ctx->buffer[size - 2] = SWAP ((ctx->total[1] << 3) | (ctx->total[0] >> 29)); 110 1.1 mrg ctx->buffer[size - 1] = SWAP (ctx->total[0] << 3); 111 1.1 mrg 112 1.1 mrg memcpy (&((char *) ctx->buffer)[bytes], fillbuf, (size - 2) * 4 - bytes); 113 1.1 mrg 114 1.1 mrg /* Process last bytes. */ 115 1.1 mrg sha1_process_block (ctx->buffer, size * 4, ctx); 116 1.1 mrg 117 1.1 mrg return sha1_read_ctx (ctx, resbuf); 118 1.1 mrg } 119 1.1 mrg 120 1.1 mrg /* Compute SHA1 message digest for bytes read from STREAM. The 121 1.1 mrg resulting message digest number will be written into the 16 bytes 122 1.1 mrg beginning at RESBLOCK. */ 123 1.1 mrg int 124 1.1 mrg sha1_stream (FILE *stream, void *resblock) 125 1.1 mrg { 126 1.1 mrg struct sha1_ctx ctx; 127 1.1 mrg char buffer[BLOCKSIZE + 72]; 128 1.1 mrg size_t sum; 129 1.1 mrg 130 1.1 mrg /* Initialize the computation context. */ 131 1.1 mrg sha1_init_ctx (&ctx); 132 1.1 mrg 133 1.1 mrg /* Iterate over full file contents. */ 134 1.1 mrg while (1) 135 1.1 mrg { 136 1.1 mrg /* We read the file in blocks of BLOCKSIZE bytes. One call of the 137 1.1 mrg computation function processes the whole buffer so that with the 138 1.1 mrg next round of the loop another block can be read. */ 139 1.1 mrg size_t n; 140 1.1 mrg sum = 0; 141 1.1 mrg 142 1.1 mrg /* Read block. Take care for partial reads. */ 143 1.1 mrg while (1) 144 1.1 mrg { 145 1.1 mrg n = fread (buffer + sum, 1, BLOCKSIZE - sum, stream); 146 1.1 mrg 147 1.1 mrg sum += n; 148 1.1 mrg 149 1.1 mrg if (sum == BLOCKSIZE) 150 1.1 mrg break; 151 1.1 mrg 152 1.1 mrg if (n == 0) 153 1.1 mrg { 154 1.1 mrg /* Check for the error flag IFF N == 0, so that we don't 155 1.1 mrg exit the loop after a partial read due to e.g., EAGAIN 156 1.1 mrg or EWOULDBLOCK. */ 157 1.1 mrg if (ferror (stream)) 158 1.1 mrg return 1; 159 1.1 mrg goto process_partial_block; 160 1.1 mrg } 161 1.1 mrg 162 1.1 mrg /* We've read at least one byte, so ignore errors. But always 163 1.1 mrg check for EOF, since feof may be true even though N > 0. 164 1.1 mrg Otherwise, we could end up calling fread after EOF. */ 165 1.1 mrg if (feof (stream)) 166 1.1 mrg goto process_partial_block; 167 1.1 mrg } 168 1.1 mrg 169 1.1 mrg /* Process buffer with BLOCKSIZE bytes. Note that 170 1.1 mrg BLOCKSIZE % 64 == 0 171 1.1 mrg */ 172 1.1 mrg sha1_process_block (buffer, BLOCKSIZE, &ctx); 173 1.1 mrg } 174 1.1 mrg 175 1.1 mrg process_partial_block:; 176 1.1 mrg 177 1.1 mrg /* Process any remaining bytes. */ 178 1.1 mrg if (sum > 0) 179 1.1 mrg sha1_process_bytes (buffer, sum, &ctx); 180 1.1 mrg 181 1.1 mrg /* Construct result in desired memory. */ 182 1.1 mrg sha1_finish_ctx (&ctx, resblock); 183 1.1 mrg return 0; 184 1.1 mrg } 185 1.1 mrg 186 1.1 mrg /* Compute SHA1 message digest for LEN bytes beginning at BUFFER. The 187 1.1 mrg result is always in little endian byte order, so that a byte-wise 188 1.1 mrg output yields to the wanted ASCII representation of the message 189 1.1 mrg digest. */ 190 1.1 mrg void * 191 1.1 mrg sha1_buffer (const char *buffer, size_t len, void *resblock) 192 1.1 mrg { 193 1.1 mrg struct sha1_ctx ctx; 194 1.1 mrg 195 1.1 mrg /* Initialize the computation context. */ 196 1.1 mrg sha1_init_ctx (&ctx); 197 1.1 mrg 198 1.1 mrg /* Process whole buffer but last len % 64 bytes. */ 199 1.1 mrg sha1_process_bytes (buffer, len, &ctx); 200 1.1 mrg 201 1.1 mrg /* Put result in desired memory area. */ 202 1.1 mrg return sha1_finish_ctx (&ctx, resblock); 203 1.1 mrg } 204 1.1 mrg 205 1.1 mrg void 206 1.1 mrg sha1_process_bytes (const void *buffer, size_t len, struct sha1_ctx *ctx) 207 1.1 mrg { 208 1.1 mrg /* When we already have some bits in our internal buffer concatenate 209 1.1 mrg both inputs first. */ 210 1.1 mrg if (ctx->buflen != 0) 211 1.1 mrg { 212 1.1 mrg size_t left_over = ctx->buflen; 213 1.1 mrg size_t add = 128 - left_over > len ? len : 128 - left_over; 214 1.1 mrg 215 1.1 mrg memcpy (&((char *) ctx->buffer)[left_over], buffer, add); 216 1.1 mrg ctx->buflen += add; 217 1.1 mrg 218 1.1 mrg if (ctx->buflen > 64) 219 1.1 mrg { 220 1.1 mrg sha1_process_block (ctx->buffer, ctx->buflen & ~63, ctx); 221 1.1 mrg 222 1.1 mrg ctx->buflen &= 63; 223 1.1 mrg /* The regions in the following copy operation cannot overlap. */ 224 1.1 mrg memcpy (ctx->buffer, 225 1.1 mrg &((char *) ctx->buffer)[(left_over + add) & ~63], 226 1.1 mrg ctx->buflen); 227 1.1 mrg } 228 1.1 mrg 229 1.1 mrg buffer = (const char *) buffer + add; 230 1.1 mrg len -= add; 231 1.1 mrg } 232 1.1 mrg 233 1.1 mrg /* Process available complete blocks. */ 234 1.1 mrg if (len >= 64) 235 1.1 mrg { 236 1.1 mrg #if !_STRING_ARCH_unaligned 237 1.5 christos # if defined(__clang__) || defined(__GNUC__) 238 1.2 christos # define alignof(type) __alignof__(type) 239 1.2 christos # else 240 1.1 mrg # define alignof(type) offsetof (struct { char c; type x; }, x) 241 1.2 christos # endif 242 1.1 mrg # define UNALIGNED_P(p) (((size_t) p) % alignof (sha1_uint32) != 0) 243 1.1 mrg if (UNALIGNED_P (buffer)) 244 1.1 mrg while (len > 64) 245 1.1 mrg { 246 1.1 mrg sha1_process_block (memcpy (ctx->buffer, buffer, 64), 64, ctx); 247 1.1 mrg buffer = (const char *) buffer + 64; 248 1.1 mrg len -= 64; 249 1.1 mrg } 250 1.1 mrg else 251 1.1 mrg #endif 252 1.1 mrg { 253 1.1 mrg sha1_process_block (buffer, len & ~63, ctx); 254 1.1 mrg buffer = (const char *) buffer + (len & ~63); 255 1.1 mrg len &= 63; 256 1.1 mrg } 257 1.1 mrg } 258 1.1 mrg 259 1.1 mrg /* Move remaining bytes in internal buffer. */ 260 1.1 mrg if (len > 0) 261 1.1 mrg { 262 1.1 mrg size_t left_over = ctx->buflen; 263 1.1 mrg 264 1.1 mrg memcpy (&((char *) ctx->buffer)[left_over], buffer, len); 265 1.1 mrg left_over += len; 266 1.1 mrg if (left_over >= 64) 267 1.1 mrg { 268 1.1 mrg sha1_process_block (ctx->buffer, 64, ctx); 269 1.1 mrg left_over -= 64; 270 1.8 mrg memmove (ctx->buffer, &ctx->buffer[16], left_over); 271 1.1 mrg } 272 1.1 mrg ctx->buflen = left_over; 273 1.1 mrg } 274 1.1 mrg } 275 1.1 mrg 276 1.1 mrg /* --- Code below is the primary difference between md5.c and sha1.c --- */ 277 1.1 mrg 278 1.1 mrg /* SHA1 round constants */ 279 1.1 mrg #define K1 0x5a827999 280 1.1 mrg #define K2 0x6ed9eba1 281 1.1 mrg #define K3 0x8f1bbcdc 282 1.1 mrg #define K4 0xca62c1d6 283 1.1 mrg 284 1.1 mrg /* Round functions. Note that F2 is the same as F4. */ 285 1.1 mrg #define F1(B,C,D) ( D ^ ( B & ( C ^ D ) ) ) 286 1.1 mrg #define F2(B,C,D) (B ^ C ^ D) 287 1.1 mrg #define F3(B,C,D) ( ( B & C ) | ( D & ( B | C ) ) ) 288 1.1 mrg #define F4(B,C,D) (B ^ C ^ D) 289 1.1 mrg 290 1.1 mrg /* Process LEN bytes of BUFFER, accumulating context into CTX. 291 1.1 mrg It is assumed that LEN % 64 == 0. 292 1.1 mrg Most of this code comes from GnuPG's cipher/sha1.c. */ 293 1.1 mrg 294 1.1 mrg void 295 1.1 mrg sha1_process_block (const void *buffer, size_t len, struct sha1_ctx *ctx) 296 1.1 mrg { 297 1.1 mrg const sha1_uint32 *words = (const sha1_uint32*) buffer; 298 1.1 mrg size_t nwords = len / sizeof (sha1_uint32); 299 1.1 mrg const sha1_uint32 *endp = words + nwords; 300 1.1 mrg sha1_uint32 x[16]; 301 1.1 mrg sha1_uint32 a = ctx->A; 302 1.1 mrg sha1_uint32 b = ctx->B; 303 1.1 mrg sha1_uint32 c = ctx->C; 304 1.1 mrg sha1_uint32 d = ctx->D; 305 1.1 mrg sha1_uint32 e = ctx->E; 306 1.1 mrg 307 1.1 mrg /* First increment the byte count. RFC 1321 specifies the possible 308 1.1 mrg length of the file up to 2^64 bits. Here we only compute the 309 1.1 mrg number of bytes. Do a double word increment. */ 310 1.1 mrg ctx->total[0] += len; 311 1.2 christos ctx->total[1] += ((len >> 31) >> 1) + (ctx->total[0] < len); 312 1.1 mrg 313 1.1 mrg #define rol(x, n) (((x) << (n)) | ((sha1_uint32) (x) >> (32 - (n)))) 314 1.1 mrg 315 1.1 mrg #define M(I) ( tm = x[I&0x0f] ^ x[(I-14)&0x0f] \ 316 1.1 mrg ^ x[(I-8)&0x0f] ^ x[(I-3)&0x0f] \ 317 1.1 mrg , (x[I&0x0f] = rol(tm, 1)) ) 318 1.1 mrg 319 1.1 mrg #define R(A,B,C,D,E,F,K,M) do { E += rol( A, 5 ) \ 320 1.1 mrg + F( B, C, D ) \ 321 1.1 mrg + K \ 322 1.1 mrg + M; \ 323 1.1 mrg B = rol( B, 30 ); \ 324 1.1 mrg } while(0) 325 1.1 mrg 326 1.1 mrg while (words < endp) 327 1.1 mrg { 328 1.1 mrg sha1_uint32 tm; 329 1.1 mrg int t; 330 1.1 mrg for (t = 0; t < 16; t++) 331 1.1 mrg { 332 1.1 mrg x[t] = SWAP (*words); 333 1.1 mrg words++; 334 1.1 mrg } 335 1.1 mrg 336 1.1 mrg R( a, b, c, d, e, F1, K1, x[ 0] ); 337 1.1 mrg R( e, a, b, c, d, F1, K1, x[ 1] ); 338 1.1 mrg R( d, e, a, b, c, F1, K1, x[ 2] ); 339 1.1 mrg R( c, d, e, a, b, F1, K1, x[ 3] ); 340 1.1 mrg R( b, c, d, e, a, F1, K1, x[ 4] ); 341 1.1 mrg R( a, b, c, d, e, F1, K1, x[ 5] ); 342 1.1 mrg R( e, a, b, c, d, F1, K1, x[ 6] ); 343 1.1 mrg R( d, e, a, b, c, F1, K1, x[ 7] ); 344 1.1 mrg R( c, d, e, a, b, F1, K1, x[ 8] ); 345 1.1 mrg R( b, c, d, e, a, F1, K1, x[ 9] ); 346 1.1 mrg R( a, b, c, d, e, F1, K1, x[10] ); 347 1.1 mrg R( e, a, b, c, d, F1, K1, x[11] ); 348 1.1 mrg R( d, e, a, b, c, F1, K1, x[12] ); 349 1.1 mrg R( c, d, e, a, b, F1, K1, x[13] ); 350 1.1 mrg R( b, c, d, e, a, F1, K1, x[14] ); 351 1.1 mrg R( a, b, c, d, e, F1, K1, x[15] ); 352 1.1 mrg R( e, a, b, c, d, F1, K1, M(16) ); 353 1.1 mrg R( d, e, a, b, c, F1, K1, M(17) ); 354 1.1 mrg R( c, d, e, a, b, F1, K1, M(18) ); 355 1.1 mrg R( b, c, d, e, a, F1, K1, M(19) ); 356 1.1 mrg R( a, b, c, d, e, F2, K2, M(20) ); 357 1.1 mrg R( e, a, b, c, d, F2, K2, M(21) ); 358 1.1 mrg R( d, e, a, b, c, F2, K2, M(22) ); 359 1.1 mrg R( c, d, e, a, b, F2, K2, M(23) ); 360 1.1 mrg R( b, c, d, e, a, F2, K2, M(24) ); 361 1.1 mrg R( a, b, c, d, e, F2, K2, M(25) ); 362 1.1 mrg R( e, a, b, c, d, F2, K2, M(26) ); 363 1.1 mrg R( d, e, a, b, c, F2, K2, M(27) ); 364 1.1 mrg R( c, d, e, a, b, F2, K2, M(28) ); 365 1.1 mrg R( b, c, d, e, a, F2, K2, M(29) ); 366 1.1 mrg R( a, b, c, d, e, F2, K2, M(30) ); 367 1.1 mrg R( e, a, b, c, d, F2, K2, M(31) ); 368 1.1 mrg R( d, e, a, b, c, F2, K2, M(32) ); 369 1.1 mrg R( c, d, e, a, b, F2, K2, M(33) ); 370 1.1 mrg R( b, c, d, e, a, F2, K2, M(34) ); 371 1.1 mrg R( a, b, c, d, e, F2, K2, M(35) ); 372 1.1 mrg R( e, a, b, c, d, F2, K2, M(36) ); 373 1.1 mrg R( d, e, a, b, c, F2, K2, M(37) ); 374 1.1 mrg R( c, d, e, a, b, F2, K2, M(38) ); 375 1.1 mrg R( b, c, d, e, a, F2, K2, M(39) ); 376 1.1 mrg R( a, b, c, d, e, F3, K3, M(40) ); 377 1.1 mrg R( e, a, b, c, d, F3, K3, M(41) ); 378 1.1 mrg R( d, e, a, b, c, F3, K3, M(42) ); 379 1.1 mrg R( c, d, e, a, b, F3, K3, M(43) ); 380 1.1 mrg R( b, c, d, e, a, F3, K3, M(44) ); 381 1.1 mrg R( a, b, c, d, e, F3, K3, M(45) ); 382 1.1 mrg R( e, a, b, c, d, F3, K3, M(46) ); 383 1.1 mrg R( d, e, a, b, c, F3, K3, M(47) ); 384 1.1 mrg R( c, d, e, a, b, F3, K3, M(48) ); 385 1.1 mrg R( b, c, d, e, a, F3, K3, M(49) ); 386 1.1 mrg R( a, b, c, d, e, F3, K3, M(50) ); 387 1.1 mrg R( e, a, b, c, d, F3, K3, M(51) ); 388 1.1 mrg R( d, e, a, b, c, F3, K3, M(52) ); 389 1.1 mrg R( c, d, e, a, b, F3, K3, M(53) ); 390 1.1 mrg R( b, c, d, e, a, F3, K3, M(54) ); 391 1.1 mrg R( a, b, c, d, e, F3, K3, M(55) ); 392 1.1 mrg R( e, a, b, c, d, F3, K3, M(56) ); 393 1.1 mrg R( d, e, a, b, c, F3, K3, M(57) ); 394 1.1 mrg R( c, d, e, a, b, F3, K3, M(58) ); 395 1.1 mrg R( b, c, d, e, a, F3, K3, M(59) ); 396 1.1 mrg R( a, b, c, d, e, F4, K4, M(60) ); 397 1.1 mrg R( e, a, b, c, d, F4, K4, M(61) ); 398 1.1 mrg R( d, e, a, b, c, F4, K4, M(62) ); 399 1.1 mrg R( c, d, e, a, b, F4, K4, M(63) ); 400 1.1 mrg R( b, c, d, e, a, F4, K4, M(64) ); 401 1.1 mrg R( a, b, c, d, e, F4, K4, M(65) ); 402 1.1 mrg R( e, a, b, c, d, F4, K4, M(66) ); 403 1.1 mrg R( d, e, a, b, c, F4, K4, M(67) ); 404 1.1 mrg R( c, d, e, a, b, F4, K4, M(68) ); 405 1.1 mrg R( b, c, d, e, a, F4, K4, M(69) ); 406 1.1 mrg R( a, b, c, d, e, F4, K4, M(70) ); 407 1.1 mrg R( e, a, b, c, d, F4, K4, M(71) ); 408 1.1 mrg R( d, e, a, b, c, F4, K4, M(72) ); 409 1.1 mrg R( c, d, e, a, b, F4, K4, M(73) ); 410 1.1 mrg R( b, c, d, e, a, F4, K4, M(74) ); 411 1.1 mrg R( a, b, c, d, e, F4, K4, M(75) ); 412 1.1 mrg R( e, a, b, c, d, F4, K4, M(76) ); 413 1.1 mrg R( d, e, a, b, c, F4, K4, M(77) ); 414 1.1 mrg R( c, d, e, a, b, F4, K4, M(78) ); 415 1.1 mrg R( b, c, d, e, a, F4, K4, M(79) ); 416 1.1 mrg 417 1.1 mrg a = ctx->A += a; 418 1.1 mrg b = ctx->B += b; 419 1.1 mrg c = ctx->C += c; 420 1.1 mrg d = ctx->D += d; 421 1.1 mrg e = ctx->E += e; 422 1.1 mrg } 423 1.1 mrg } 424 1.9 mrg 425 1.9 mrg #if defined(HAVE_X86_SHA1_HW_SUPPORT) 426 1.9 mrg /* HW specific version of sha1_process_bytes. */ 427 1.9 mrg 428 1.9 mrg static void sha1_hw_process_block (const void *, size_t, struct sha1_ctx *); 429 1.9 mrg 430 1.9 mrg static void 431 1.9 mrg sha1_hw_process_bytes (const void *buffer, size_t len, struct sha1_ctx *ctx) 432 1.9 mrg { 433 1.9 mrg /* When we already have some bits in our internal buffer concatenate 434 1.9 mrg both inputs first. */ 435 1.9 mrg if (ctx->buflen != 0) 436 1.9 mrg { 437 1.9 mrg size_t left_over = ctx->buflen; 438 1.9 mrg size_t add = 128 - left_over > len ? len : 128 - left_over; 439 1.9 mrg 440 1.9 mrg memcpy (&((char *) ctx->buffer)[left_over], buffer, add); 441 1.9 mrg ctx->buflen += add; 442 1.9 mrg 443 1.9 mrg if (ctx->buflen > 64) 444 1.9 mrg { 445 1.9 mrg sha1_hw_process_block (ctx->buffer, ctx->buflen & ~63, ctx); 446 1.9 mrg 447 1.9 mrg ctx->buflen &= 63; 448 1.9 mrg /* The regions in the following copy operation cannot overlap. */ 449 1.9 mrg memcpy (ctx->buffer, 450 1.9 mrg &((char *) ctx->buffer)[(left_over + add) & ~63], 451 1.9 mrg ctx->buflen); 452 1.9 mrg } 453 1.9 mrg 454 1.9 mrg buffer = (const char *) buffer + add; 455 1.9 mrg len -= add; 456 1.9 mrg } 457 1.9 mrg 458 1.9 mrg /* Process available complete blocks. */ 459 1.9 mrg if (len >= 64) 460 1.9 mrg { 461 1.9 mrg #if !_STRING_ARCH_unaligned 462 1.10 mrg #if 0 /* XXXMRG defined above */ 463 1.9 mrg # define alignof(type) offsetof (struct { char c; type x; }, x) 464 1.9 mrg # define UNALIGNED_P(p) (((size_t) p) % alignof (sha1_uint32) != 0) 465 1.10 mrg #endif 466 1.9 mrg if (UNALIGNED_P (buffer)) 467 1.9 mrg while (len > 64) 468 1.9 mrg { 469 1.9 mrg sha1_hw_process_block (memcpy (ctx->buffer, buffer, 64), 64, ctx); 470 1.9 mrg buffer = (const char *) buffer + 64; 471 1.9 mrg len -= 64; 472 1.9 mrg } 473 1.9 mrg else 474 1.9 mrg #endif 475 1.9 mrg { 476 1.9 mrg sha1_hw_process_block (buffer, len & ~63, ctx); 477 1.9 mrg buffer = (const char *) buffer + (len & ~63); 478 1.9 mrg len &= 63; 479 1.9 mrg } 480 1.9 mrg } 481 1.9 mrg 482 1.9 mrg /* Move remaining bytes in internal buffer. */ 483 1.9 mrg if (len > 0) 484 1.9 mrg { 485 1.9 mrg size_t left_over = ctx->buflen; 486 1.9 mrg 487 1.9 mrg memcpy (&((char *) ctx->buffer)[left_over], buffer, len); 488 1.9 mrg left_over += len; 489 1.9 mrg if (left_over >= 64) 490 1.9 mrg { 491 1.9 mrg sha1_hw_process_block (ctx->buffer, 64, ctx); 492 1.9 mrg left_over -= 64; 493 1.9 mrg memmove (ctx->buffer, &ctx->buffer[16], left_over); 494 1.9 mrg } 495 1.9 mrg ctx->buflen = left_over; 496 1.9 mrg } 497 1.9 mrg } 498 1.9 mrg 499 1.9 mrg /* Process LEN bytes of BUFFER, accumulating context into CTX. 500 1.9 mrg Using CPU specific intrinsics. */ 501 1.9 mrg 502 1.9 mrg #ifdef HAVE_X86_SHA1_HW_SUPPORT 503 1.9 mrg __attribute__((__target__ ("sse4.1,sha"))) 504 1.9 mrg #endif 505 1.9 mrg static void 506 1.9 mrg sha1_hw_process_block (const void *buffer, size_t len, struct sha1_ctx *ctx) 507 1.9 mrg { 508 1.9 mrg #ifdef HAVE_X86_SHA1_HW_SUPPORT 509 1.9 mrg /* Implemented from 510 1.9 mrg https://www.intel.com/content/www/us/en/developer/articles/technical/intel-sha-extensions.html */ 511 1.9 mrg const __m128i *words = (const __m128i *) buffer; 512 1.9 mrg const __m128i *endp = (const __m128i *) ((const char *) buffer + len); 513 1.9 mrg __m128i abcd, abcd_save, e0, e0_save, e1, msg0, msg1, msg2, msg3; 514 1.9 mrg const __m128i shuf_mask 515 1.9 mrg = _mm_set_epi64x (0x0001020304050607ULL, 0x08090a0b0c0d0e0fULL); 516 1.9 mrg char check[((offsetof (struct sha1_ctx, B) 517 1.9 mrg == offsetof (struct sha1_ctx, A) + sizeof (ctx->A)) 518 1.9 mrg && (offsetof (struct sha1_ctx, C) 519 1.9 mrg == offsetof (struct sha1_ctx, A) + 2 * sizeof (ctx->A)) 520 1.9 mrg && (offsetof (struct sha1_ctx, D) 521 1.9 mrg == offsetof (struct sha1_ctx, A) + 3 * sizeof (ctx->A))) 522 1.9 mrg ? 1 : -1]; 523 1.9 mrg 524 1.9 mrg /* First increment the byte count. RFC 1321 specifies the possible 525 1.9 mrg length of the file up to 2^64 bits. Here we only compute the 526 1.9 mrg number of bytes. Do a double word increment. */ 527 1.9 mrg ctx->total[0] += len; 528 1.9 mrg ctx->total[1] += ((len >> 31) >> 1) + (ctx->total[0] < len); 529 1.9 mrg 530 1.9 mrg (void) &check[0]; 531 1.9 mrg abcd = _mm_loadu_si128 ((const __m128i *) &ctx->A); 532 1.9 mrg e0 = _mm_set_epi32 (ctx->E, 0, 0, 0); 533 1.9 mrg abcd = _mm_shuffle_epi32 (abcd, 0x1b); /* 0, 1, 2, 3 */ 534 1.9 mrg 535 1.9 mrg while (words < endp) 536 1.9 mrg { 537 1.9 mrg abcd_save = abcd; 538 1.9 mrg e0_save = e0; 539 1.9 mrg 540 1.9 mrg /* 0..3 */ 541 1.9 mrg msg0 = _mm_loadu_si128 (words); 542 1.9 mrg msg0 = _mm_shuffle_epi8 (msg0, shuf_mask); 543 1.9 mrg e0 = _mm_add_epi32 (e0, msg0); 544 1.9 mrg e1 = abcd; 545 1.9 mrg abcd = _mm_sha1rnds4_epu32 (abcd, e0, 0); 546 1.9 mrg 547 1.9 mrg /* 4..7 */ 548 1.9 mrg msg1 = _mm_loadu_si128 (words + 1); 549 1.9 mrg msg1 = _mm_shuffle_epi8 (msg1, shuf_mask); 550 1.9 mrg e1 = _mm_sha1nexte_epu32 (e1, msg1); 551 1.9 mrg e0 = abcd; 552 1.9 mrg abcd = _mm_sha1rnds4_epu32 (abcd, e1, 0); 553 1.9 mrg msg0 = _mm_sha1msg1_epu32 (msg0, msg1); 554 1.9 mrg 555 1.9 mrg /* 8..11 */ 556 1.9 mrg msg2 = _mm_loadu_si128 (words + 2); 557 1.9 mrg msg2 = _mm_shuffle_epi8 (msg2, shuf_mask); 558 1.9 mrg e0 = _mm_sha1nexte_epu32 (e0, msg2); 559 1.9 mrg e1 = abcd; 560 1.9 mrg abcd = _mm_sha1rnds4_epu32 (abcd, e0, 0); 561 1.9 mrg msg1 = _mm_sha1msg1_epu32 (msg1, msg2); 562 1.9 mrg msg0 = _mm_xor_si128 (msg0, msg2); 563 1.9 mrg 564 1.9 mrg /* 12..15 */ 565 1.9 mrg msg3 = _mm_loadu_si128 (words + 3); 566 1.9 mrg msg3 = _mm_shuffle_epi8 (msg3, shuf_mask); 567 1.9 mrg e1 = _mm_sha1nexte_epu32 (e1, msg3); 568 1.9 mrg e0 = abcd; 569 1.9 mrg msg0 = _mm_sha1msg2_epu32 (msg0, msg3); 570 1.9 mrg abcd = _mm_sha1rnds4_epu32 (abcd, e1, 0); 571 1.9 mrg msg2 = _mm_sha1msg1_epu32 (msg2, msg3); 572 1.9 mrg msg1 = _mm_xor_si128 (msg1, msg3); 573 1.9 mrg 574 1.9 mrg /* 16..19 */ 575 1.9 mrg e0 = _mm_sha1nexte_epu32 (e0, msg0); 576 1.9 mrg e1 = abcd; 577 1.9 mrg msg1 = _mm_sha1msg2_epu32 (msg1, msg0); 578 1.9 mrg abcd = _mm_sha1rnds4_epu32 (abcd, e0, 0); 579 1.9 mrg msg3 = _mm_sha1msg1_epu32 (msg3, msg0); 580 1.9 mrg msg2 = _mm_xor_si128 (msg2, msg0); 581 1.9 mrg 582 1.9 mrg /* 20..23 */ 583 1.9 mrg e1 = _mm_sha1nexte_epu32 (e1, msg1); 584 1.9 mrg e0 = abcd; 585 1.9 mrg msg2 = _mm_sha1msg2_epu32 (msg2, msg1); 586 1.9 mrg abcd = _mm_sha1rnds4_epu32 (abcd, e1, 1); 587 1.9 mrg msg0 = _mm_sha1msg1_epu32 (msg0, msg1); 588 1.9 mrg msg3 = _mm_xor_si128 (msg3, msg1); 589 1.9 mrg 590 1.9 mrg /* 24..27 */ 591 1.9 mrg e0 = _mm_sha1nexte_epu32 (e0, msg2); 592 1.9 mrg e1 = abcd; 593 1.9 mrg msg3 = _mm_sha1msg2_epu32 (msg3, msg2); 594 1.9 mrg abcd = _mm_sha1rnds4_epu32 (abcd, e0, 1); 595 1.9 mrg msg1 = _mm_sha1msg1_epu32 (msg1, msg2); 596 1.9 mrg msg0 = _mm_xor_si128 (msg0, msg2); 597 1.9 mrg 598 1.9 mrg /* 28..31 */ 599 1.9 mrg e1 = _mm_sha1nexte_epu32 (e1, msg3); 600 1.9 mrg e0 = abcd; 601 1.9 mrg msg0 = _mm_sha1msg2_epu32 (msg0, msg3); 602 1.9 mrg abcd = _mm_sha1rnds4_epu32 (abcd, e1, 1); 603 1.9 mrg msg2 = _mm_sha1msg1_epu32 (msg2, msg3); 604 1.9 mrg msg1 = _mm_xor_si128 (msg1, msg3); 605 1.9 mrg 606 1.9 mrg /* 32..35 */ 607 1.9 mrg e0 = _mm_sha1nexte_epu32 (e0, msg0); 608 1.9 mrg e1 = abcd; 609 1.9 mrg msg1 = _mm_sha1msg2_epu32 (msg1, msg0); 610 1.9 mrg abcd = _mm_sha1rnds4_epu32 (abcd, e0, 1); 611 1.9 mrg msg3 = _mm_sha1msg1_epu32 (msg3, msg0); 612 1.9 mrg msg2 = _mm_xor_si128 (msg2, msg0); 613 1.9 mrg 614 1.9 mrg /* 36..39 */ 615 1.9 mrg e1 = _mm_sha1nexte_epu32 (e1, msg1); 616 1.9 mrg e0 = abcd; 617 1.9 mrg msg2 = _mm_sha1msg2_epu32 (msg2, msg1); 618 1.9 mrg abcd = _mm_sha1rnds4_epu32 (abcd, e1, 1); 619 1.9 mrg msg0 = _mm_sha1msg1_epu32 (msg0, msg1); 620 1.9 mrg msg3 = _mm_xor_si128 (msg3, msg1); 621 1.9 mrg 622 1.9 mrg /* 40..43 */ 623 1.9 mrg e0 = _mm_sha1nexte_epu32 (e0, msg2); 624 1.9 mrg e1 = abcd; 625 1.9 mrg msg3 = _mm_sha1msg2_epu32 (msg3, msg2); 626 1.9 mrg abcd = _mm_sha1rnds4_epu32 (abcd, e0, 2); 627 1.9 mrg msg1 = _mm_sha1msg1_epu32 (msg1, msg2); 628 1.9 mrg msg0 = _mm_xor_si128 (msg0, msg2); 629 1.9 mrg 630 1.9 mrg /* 44..47 */ 631 1.9 mrg e1 = _mm_sha1nexte_epu32 (e1, msg3); 632 1.9 mrg e0 = abcd; 633 1.9 mrg msg0 = _mm_sha1msg2_epu32 (msg0, msg3); 634 1.9 mrg abcd = _mm_sha1rnds4_epu32 (abcd, e1, 2); 635 1.9 mrg msg2 = _mm_sha1msg1_epu32 (msg2, msg3); 636 1.9 mrg msg1 = _mm_xor_si128 (msg1, msg3); 637 1.9 mrg 638 1.9 mrg /* 48..51 */ 639 1.9 mrg e0 = _mm_sha1nexte_epu32 (e0, msg0); 640 1.9 mrg e1 = abcd; 641 1.9 mrg msg1 = _mm_sha1msg2_epu32 (msg1, msg0); 642 1.9 mrg abcd = _mm_sha1rnds4_epu32 (abcd, e0, 2); 643 1.9 mrg msg3 = _mm_sha1msg1_epu32 (msg3, msg0); 644 1.9 mrg msg2 = _mm_xor_si128 (msg2, msg0); 645 1.9 mrg 646 1.9 mrg /* 52..55 */ 647 1.9 mrg e1 = _mm_sha1nexte_epu32 (e1, msg1); 648 1.9 mrg e0 = abcd; 649 1.9 mrg msg2 = _mm_sha1msg2_epu32 (msg2, msg1); 650 1.9 mrg abcd = _mm_sha1rnds4_epu32 (abcd, e1, 2); 651 1.9 mrg msg0 = _mm_sha1msg1_epu32 (msg0, msg1); 652 1.9 mrg msg3 = _mm_xor_si128 (msg3, msg1); 653 1.9 mrg 654 1.9 mrg /* 56..59 */ 655 1.9 mrg e0 = _mm_sha1nexte_epu32 (e0, msg2); 656 1.9 mrg e1 = abcd; 657 1.9 mrg msg3 = _mm_sha1msg2_epu32 (msg3, msg2); 658 1.9 mrg abcd = _mm_sha1rnds4_epu32 (abcd, e0, 2); 659 1.9 mrg msg1 = _mm_sha1msg1_epu32 (msg1, msg2); 660 1.9 mrg msg0 = _mm_xor_si128 (msg0, msg2); 661 1.9 mrg 662 1.9 mrg /* 60..63 */ 663 1.9 mrg e1 = _mm_sha1nexte_epu32 (e1, msg3); 664 1.9 mrg e0 = abcd; 665 1.9 mrg msg0 = _mm_sha1msg2_epu32 (msg0, msg3); 666 1.9 mrg abcd = _mm_sha1rnds4_epu32 (abcd, e1, 3); 667 1.9 mrg msg2 = _mm_sha1msg1_epu32 (msg2, msg3); 668 1.9 mrg msg1 = _mm_xor_si128 (msg1, msg3); 669 1.9 mrg 670 1.9 mrg /* 64..67 */ 671 1.9 mrg e0 = _mm_sha1nexte_epu32 (e0, msg0); 672 1.9 mrg e1 = abcd; 673 1.9 mrg msg1 = _mm_sha1msg2_epu32 (msg1, msg0); 674 1.9 mrg abcd = _mm_sha1rnds4_epu32 (abcd, e0, 3); 675 1.9 mrg msg3 = _mm_sha1msg1_epu32 (msg3, msg0); 676 1.9 mrg msg2 = _mm_xor_si128 (msg2, msg0); 677 1.9 mrg 678 1.9 mrg /* 68..71 */ 679 1.9 mrg e1 = _mm_sha1nexte_epu32 (e1, msg1); 680 1.9 mrg e0 = abcd; 681 1.9 mrg msg2 = _mm_sha1msg2_epu32 (msg2, msg1); 682 1.9 mrg abcd = _mm_sha1rnds4_epu32 (abcd, e1, 3); 683 1.9 mrg msg3 = _mm_xor_si128 (msg3, msg1); 684 1.9 mrg 685 1.9 mrg /* 72..75 */ 686 1.9 mrg e0 = _mm_sha1nexte_epu32 (e0, msg2); 687 1.9 mrg e1 = abcd; 688 1.9 mrg msg3 = _mm_sha1msg2_epu32 (msg3, msg2); 689 1.9 mrg abcd = _mm_sha1rnds4_epu32 (abcd, e0, 3); 690 1.9 mrg 691 1.9 mrg /* 76..79 */ 692 1.9 mrg e1 = _mm_sha1nexte_epu32 (e1, msg3); 693 1.9 mrg e0 = abcd; 694 1.9 mrg abcd = _mm_sha1rnds4_epu32 (abcd, e1, 3); 695 1.9 mrg 696 1.9 mrg /* Finalize. */ 697 1.9 mrg e0 = _mm_sha1nexte_epu32 (e0, e0_save); 698 1.9 mrg abcd = _mm_add_epi32 (abcd, abcd_save); 699 1.9 mrg 700 1.9 mrg words = words + 4; 701 1.9 mrg } 702 1.9 mrg 703 1.9 mrg abcd = _mm_shuffle_epi32 (abcd, 0x1b); /* 0, 1, 2, 3 */ 704 1.9 mrg _mm_storeu_si128 ((__m128i *) &ctx->A, abcd); 705 1.9 mrg ctx->E = _mm_extract_epi32 (e0, 3); 706 1.9 mrg #endif 707 1.9 mrg } 708 1.9 mrg #endif 709 1.9 mrg 710 1.9 mrg /* Return sha1_process_bytes or some hardware optimized version thereof 711 1.9 mrg depending on current CPU. */ 712 1.9 mrg 713 1.9 mrg sha1_process_bytes_fn 714 1.9 mrg sha1_choose_process_bytes (void) 715 1.9 mrg { 716 1.9 mrg #ifdef HAVE_X86_SHA1_HW_SUPPORT 717 1.9 mrg unsigned int eax, ebx, ecx, edx; 718 1.9 mrg if (__get_cpuid_count (7, 0, &eax, &ebx, &ecx, &edx) 719 1.9 mrg && (ebx & bit_SHA) != 0 720 1.9 mrg && __get_cpuid (1, &eax, &ebx, &ecx, &edx) 721 1.9 mrg && (ecx & bit_SSE4_1) != 0) 722 1.9 mrg return sha1_hw_process_bytes; 723 1.9 mrg #endif 724 1.9 mrg return sha1_process_bytes; 725 1.9 mrg } 726