sha1.c revision 1.9 1 1.1 mrg /* sha1.c - Functions to compute SHA1 message digest of files or
2 1.1 mrg memory blocks according to the NIST specification FIPS-180-1.
3 1.1 mrg
4 1.9 mrg Copyright (C) 2000-2024 Free Software Foundation, Inc.
5 1.1 mrg
6 1.1 mrg This program is free software; you can redistribute it and/or modify it
7 1.1 mrg under the terms of the GNU General Public License as published by the
8 1.1 mrg Free Software Foundation; either version 2, or (at your option) any
9 1.1 mrg later version.
10 1.1 mrg
11 1.1 mrg This program is distributed in the hope that it will be useful,
12 1.1 mrg but WITHOUT ANY WARRANTY; without even the implied warranty of
13 1.1 mrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 1.1 mrg GNU General Public License for more details.
15 1.1 mrg
16 1.1 mrg You should have received a copy of the GNU General Public License
17 1.1 mrg along with this program; if not, write to the Free Software Foundation,
18 1.1 mrg Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
19 1.1 mrg
20 1.1 mrg /* Written by Scott G. Miller
21 1.1 mrg Credits:
22 1.1 mrg Robert Klep <robert (at) ilse.nl> -- Expansion function fix
23 1.1 mrg */
24 1.1 mrg
25 1.1 mrg #include <config.h>
26 1.1 mrg
27 1.1 mrg #include "sha1.h"
28 1.1 mrg
29 1.1 mrg #include <stddef.h>
30 1.1 mrg #include <string.h>
31 1.1 mrg
32 1.9 mrg #ifdef HAVE_X86_SHA1_HW_SUPPORT
33 1.9 mrg # include <x86intrin.h>
34 1.9 mrg # include <cpuid.h>
35 1.9 mrg #endif
36 1.9 mrg
37 1.1 mrg #if USE_UNLOCKED_IO
38 1.1 mrg # include "unlocked-io.h"
39 1.1 mrg #endif
40 1.1 mrg
41 1.1 mrg #ifdef WORDS_BIGENDIAN
42 1.1 mrg # define SWAP(n) (n)
43 1.1 mrg #else
44 1.1 mrg # define SWAP(n) \
45 1.1 mrg (((n) << 24) | (((n) & 0xff00) << 8) | (((n) >> 8) & 0xff00) | ((n) >> 24))
46 1.1 mrg #endif
47 1.1 mrg
48 1.1 mrg #define BLOCKSIZE 4096
49 1.1 mrg #if BLOCKSIZE % 64 != 0
50 1.1 mrg # error "invalid BLOCKSIZE"
51 1.1 mrg #endif
52 1.1 mrg
53 1.1 mrg /* This array contains the bytes used to pad the buffer to the next
54 1.1 mrg 64-byte boundary. (RFC 1321, 3.1: Step 1) */
55 1.1 mrg static const unsigned char fillbuf[64] = { 0x80, 0 /* , 0, 0, ... */ };
56 1.1 mrg
57 1.1 mrg
58 1.1 mrg /* Take a pointer to a 160 bit block of data (five 32 bit ints) and
59 1.1 mrg initialize it to the start constants of the SHA1 algorithm. This
60 1.1 mrg must be called before using hash in the call to sha1_hash. */
61 1.1 mrg void
62 1.1 mrg sha1_init_ctx (struct sha1_ctx *ctx)
63 1.1 mrg {
64 1.1 mrg ctx->A = 0x67452301;
65 1.1 mrg ctx->B = 0xefcdab89;
66 1.1 mrg ctx->C = 0x98badcfe;
67 1.1 mrg ctx->D = 0x10325476;
68 1.1 mrg ctx->E = 0xc3d2e1f0;
69 1.1 mrg
70 1.1 mrg ctx->total[0] = ctx->total[1] = 0;
71 1.1 mrg ctx->buflen = 0;
72 1.1 mrg }
73 1.1 mrg
74 1.1 mrg /* Put result from CTX in first 20 bytes following RESBUF. The result
75 1.1 mrg must be in little endian byte order.
76 1.1 mrg
77 1.1 mrg IMPORTANT: On some systems it is required that RESBUF is correctly
78 1.1 mrg aligned for a 32-bit value. */
79 1.1 mrg void *
80 1.1 mrg sha1_read_ctx (const struct sha1_ctx *ctx, void *resbuf)
81 1.1 mrg {
82 1.1 mrg ((sha1_uint32 *) resbuf)[0] = SWAP (ctx->A);
83 1.1 mrg ((sha1_uint32 *) resbuf)[1] = SWAP (ctx->B);
84 1.1 mrg ((sha1_uint32 *) resbuf)[2] = SWAP (ctx->C);
85 1.1 mrg ((sha1_uint32 *) resbuf)[3] = SWAP (ctx->D);
86 1.1 mrg ((sha1_uint32 *) resbuf)[4] = SWAP (ctx->E);
87 1.1 mrg
88 1.1 mrg return resbuf;
89 1.1 mrg }
90 1.1 mrg
91 1.1 mrg /* Process the remaining bytes in the internal buffer and the usual
92 1.1 mrg prolog according to the standard and write the result to RESBUF.
93 1.1 mrg
94 1.1 mrg IMPORTANT: On some systems it is required that RESBUF is correctly
95 1.1 mrg aligned for a 32-bit value. */
96 1.1 mrg void *
97 1.1 mrg sha1_finish_ctx (struct sha1_ctx *ctx, void *resbuf)
98 1.1 mrg {
99 1.1 mrg /* Take yet unprocessed bytes into account. */
100 1.1 mrg sha1_uint32 bytes = ctx->buflen;
101 1.1 mrg size_t size = (bytes < 56) ? 64 / 4 : 64 * 2 / 4;
102 1.1 mrg
103 1.1 mrg /* Now count remaining bytes. */
104 1.1 mrg ctx->total[0] += bytes;
105 1.1 mrg if (ctx->total[0] < bytes)
106 1.1 mrg ++ctx->total[1];
107 1.1 mrg
108 1.1 mrg /* Put the 64-bit file length in *bits* at the end of the buffer. */
109 1.1 mrg ctx->buffer[size - 2] = SWAP ((ctx->total[1] << 3) | (ctx->total[0] >> 29));
110 1.1 mrg ctx->buffer[size - 1] = SWAP (ctx->total[0] << 3);
111 1.1 mrg
112 1.1 mrg memcpy (&((char *) ctx->buffer)[bytes], fillbuf, (size - 2) * 4 - bytes);
113 1.1 mrg
114 1.1 mrg /* Process last bytes. */
115 1.1 mrg sha1_process_block (ctx->buffer, size * 4, ctx);
116 1.1 mrg
117 1.1 mrg return sha1_read_ctx (ctx, resbuf);
118 1.1 mrg }
119 1.1 mrg
120 1.1 mrg /* Compute SHA1 message digest for bytes read from STREAM. The
121 1.1 mrg resulting message digest number will be written into the 16 bytes
122 1.1 mrg beginning at RESBLOCK. */
123 1.1 mrg int
124 1.1 mrg sha1_stream (FILE *stream, void *resblock)
125 1.1 mrg {
126 1.1 mrg struct sha1_ctx ctx;
127 1.1 mrg char buffer[BLOCKSIZE + 72];
128 1.1 mrg size_t sum;
129 1.1 mrg
130 1.1 mrg /* Initialize the computation context. */
131 1.1 mrg sha1_init_ctx (&ctx);
132 1.1 mrg
133 1.1 mrg /* Iterate over full file contents. */
134 1.1 mrg while (1)
135 1.1 mrg {
136 1.1 mrg /* We read the file in blocks of BLOCKSIZE bytes. One call of the
137 1.1 mrg computation function processes the whole buffer so that with the
138 1.1 mrg next round of the loop another block can be read. */
139 1.1 mrg size_t n;
140 1.1 mrg sum = 0;
141 1.1 mrg
142 1.1 mrg /* Read block. Take care for partial reads. */
143 1.1 mrg while (1)
144 1.1 mrg {
145 1.1 mrg n = fread (buffer + sum, 1, BLOCKSIZE - sum, stream);
146 1.1 mrg
147 1.1 mrg sum += n;
148 1.1 mrg
149 1.1 mrg if (sum == BLOCKSIZE)
150 1.1 mrg break;
151 1.1 mrg
152 1.1 mrg if (n == 0)
153 1.1 mrg {
154 1.1 mrg /* Check for the error flag IFF N == 0, so that we don't
155 1.1 mrg exit the loop after a partial read due to e.g., EAGAIN
156 1.1 mrg or EWOULDBLOCK. */
157 1.1 mrg if (ferror (stream))
158 1.1 mrg return 1;
159 1.1 mrg goto process_partial_block;
160 1.1 mrg }
161 1.1 mrg
162 1.1 mrg /* We've read at least one byte, so ignore errors. But always
163 1.1 mrg check for EOF, since feof may be true even though N > 0.
164 1.1 mrg Otherwise, we could end up calling fread after EOF. */
165 1.1 mrg if (feof (stream))
166 1.1 mrg goto process_partial_block;
167 1.1 mrg }
168 1.1 mrg
169 1.1 mrg /* Process buffer with BLOCKSIZE bytes. Note that
170 1.1 mrg BLOCKSIZE % 64 == 0
171 1.1 mrg */
172 1.1 mrg sha1_process_block (buffer, BLOCKSIZE, &ctx);
173 1.1 mrg }
174 1.1 mrg
175 1.1 mrg process_partial_block:;
176 1.1 mrg
177 1.1 mrg /* Process any remaining bytes. */
178 1.1 mrg if (sum > 0)
179 1.1 mrg sha1_process_bytes (buffer, sum, &ctx);
180 1.1 mrg
181 1.1 mrg /* Construct result in desired memory. */
182 1.1 mrg sha1_finish_ctx (&ctx, resblock);
183 1.1 mrg return 0;
184 1.1 mrg }
185 1.1 mrg
186 1.1 mrg /* Compute SHA1 message digest for LEN bytes beginning at BUFFER. The
187 1.1 mrg result is always in little endian byte order, so that a byte-wise
188 1.1 mrg output yields to the wanted ASCII representation of the message
189 1.1 mrg digest. */
190 1.1 mrg void *
191 1.1 mrg sha1_buffer (const char *buffer, size_t len, void *resblock)
192 1.1 mrg {
193 1.1 mrg struct sha1_ctx ctx;
194 1.1 mrg
195 1.1 mrg /* Initialize the computation context. */
196 1.1 mrg sha1_init_ctx (&ctx);
197 1.1 mrg
198 1.1 mrg /* Process whole buffer but last len % 64 bytes. */
199 1.1 mrg sha1_process_bytes (buffer, len, &ctx);
200 1.1 mrg
201 1.1 mrg /* Put result in desired memory area. */
202 1.1 mrg return sha1_finish_ctx (&ctx, resblock);
203 1.1 mrg }
204 1.1 mrg
205 1.1 mrg void
206 1.1 mrg sha1_process_bytes (const void *buffer, size_t len, struct sha1_ctx *ctx)
207 1.1 mrg {
208 1.1 mrg /* When we already have some bits in our internal buffer concatenate
209 1.1 mrg both inputs first. */
210 1.1 mrg if (ctx->buflen != 0)
211 1.1 mrg {
212 1.1 mrg size_t left_over = ctx->buflen;
213 1.1 mrg size_t add = 128 - left_over > len ? len : 128 - left_over;
214 1.1 mrg
215 1.1 mrg memcpy (&((char *) ctx->buffer)[left_over], buffer, add);
216 1.1 mrg ctx->buflen += add;
217 1.1 mrg
218 1.1 mrg if (ctx->buflen > 64)
219 1.1 mrg {
220 1.1 mrg sha1_process_block (ctx->buffer, ctx->buflen & ~63, ctx);
221 1.1 mrg
222 1.1 mrg ctx->buflen &= 63;
223 1.1 mrg /* The regions in the following copy operation cannot overlap. */
224 1.1 mrg memcpy (ctx->buffer,
225 1.1 mrg &((char *) ctx->buffer)[(left_over + add) & ~63],
226 1.1 mrg ctx->buflen);
227 1.1 mrg }
228 1.1 mrg
229 1.1 mrg buffer = (const char *) buffer + add;
230 1.1 mrg len -= add;
231 1.1 mrg }
232 1.1 mrg
233 1.1 mrg /* Process available complete blocks. */
234 1.1 mrg if (len >= 64)
235 1.1 mrg {
236 1.1 mrg #if !_STRING_ARCH_unaligned
237 1.5 christos # if defined(__clang__) || defined(__GNUC__)
238 1.2 christos # define alignof(type) __alignof__(type)
239 1.2 christos # else
240 1.1 mrg # define alignof(type) offsetof (struct { char c; type x; }, x)
241 1.2 christos # endif
242 1.1 mrg # define UNALIGNED_P(p) (((size_t) p) % alignof (sha1_uint32) != 0)
243 1.1 mrg if (UNALIGNED_P (buffer))
244 1.1 mrg while (len > 64)
245 1.1 mrg {
246 1.1 mrg sha1_process_block (memcpy (ctx->buffer, buffer, 64), 64, ctx);
247 1.1 mrg buffer = (const char *) buffer + 64;
248 1.1 mrg len -= 64;
249 1.1 mrg }
250 1.1 mrg else
251 1.1 mrg #endif
252 1.1 mrg {
253 1.1 mrg sha1_process_block (buffer, len & ~63, ctx);
254 1.1 mrg buffer = (const char *) buffer + (len & ~63);
255 1.1 mrg len &= 63;
256 1.1 mrg }
257 1.1 mrg }
258 1.1 mrg
259 1.1 mrg /* Move remaining bytes in internal buffer. */
260 1.1 mrg if (len > 0)
261 1.1 mrg {
262 1.1 mrg size_t left_over = ctx->buflen;
263 1.1 mrg
264 1.1 mrg memcpy (&((char *) ctx->buffer)[left_over], buffer, len);
265 1.1 mrg left_over += len;
266 1.1 mrg if (left_over >= 64)
267 1.1 mrg {
268 1.1 mrg sha1_process_block (ctx->buffer, 64, ctx);
269 1.1 mrg left_over -= 64;
270 1.8 mrg memmove (ctx->buffer, &ctx->buffer[16], left_over);
271 1.1 mrg }
272 1.1 mrg ctx->buflen = left_over;
273 1.1 mrg }
274 1.1 mrg }
275 1.1 mrg
276 1.1 mrg /* --- Code below is the primary difference between md5.c and sha1.c --- */
277 1.1 mrg
278 1.1 mrg /* SHA1 round constants */
279 1.1 mrg #define K1 0x5a827999
280 1.1 mrg #define K2 0x6ed9eba1
281 1.1 mrg #define K3 0x8f1bbcdc
282 1.1 mrg #define K4 0xca62c1d6
283 1.1 mrg
284 1.1 mrg /* Round functions. Note that F2 is the same as F4. */
285 1.1 mrg #define F1(B,C,D) ( D ^ ( B & ( C ^ D ) ) )
286 1.1 mrg #define F2(B,C,D) (B ^ C ^ D)
287 1.1 mrg #define F3(B,C,D) ( ( B & C ) | ( D & ( B | C ) ) )
288 1.1 mrg #define F4(B,C,D) (B ^ C ^ D)
289 1.1 mrg
290 1.1 mrg /* Process LEN bytes of BUFFER, accumulating context into CTX.
291 1.1 mrg It is assumed that LEN % 64 == 0.
292 1.1 mrg Most of this code comes from GnuPG's cipher/sha1.c. */
293 1.1 mrg
294 1.1 mrg void
295 1.1 mrg sha1_process_block (const void *buffer, size_t len, struct sha1_ctx *ctx)
296 1.1 mrg {
297 1.1 mrg const sha1_uint32 *words = (const sha1_uint32*) buffer;
298 1.1 mrg size_t nwords = len / sizeof (sha1_uint32);
299 1.1 mrg const sha1_uint32 *endp = words + nwords;
300 1.1 mrg sha1_uint32 x[16];
301 1.1 mrg sha1_uint32 a = ctx->A;
302 1.1 mrg sha1_uint32 b = ctx->B;
303 1.1 mrg sha1_uint32 c = ctx->C;
304 1.1 mrg sha1_uint32 d = ctx->D;
305 1.1 mrg sha1_uint32 e = ctx->E;
306 1.1 mrg
307 1.1 mrg /* First increment the byte count. RFC 1321 specifies the possible
308 1.1 mrg length of the file up to 2^64 bits. Here we only compute the
309 1.1 mrg number of bytes. Do a double word increment. */
310 1.1 mrg ctx->total[0] += len;
311 1.2 christos ctx->total[1] += ((len >> 31) >> 1) + (ctx->total[0] < len);
312 1.1 mrg
313 1.1 mrg #define rol(x, n) (((x) << (n)) | ((sha1_uint32) (x) >> (32 - (n))))
314 1.1 mrg
315 1.1 mrg #define M(I) ( tm = x[I&0x0f] ^ x[(I-14)&0x0f] \
316 1.1 mrg ^ x[(I-8)&0x0f] ^ x[(I-3)&0x0f] \
317 1.1 mrg , (x[I&0x0f] = rol(tm, 1)) )
318 1.1 mrg
319 1.1 mrg #define R(A,B,C,D,E,F,K,M) do { E += rol( A, 5 ) \
320 1.1 mrg + F( B, C, D ) \
321 1.1 mrg + K \
322 1.1 mrg + M; \
323 1.1 mrg B = rol( B, 30 ); \
324 1.1 mrg } while(0)
325 1.1 mrg
326 1.1 mrg while (words < endp)
327 1.1 mrg {
328 1.1 mrg sha1_uint32 tm;
329 1.1 mrg int t;
330 1.1 mrg for (t = 0; t < 16; t++)
331 1.1 mrg {
332 1.1 mrg x[t] = SWAP (*words);
333 1.1 mrg words++;
334 1.1 mrg }
335 1.1 mrg
336 1.1 mrg R( a, b, c, d, e, F1, K1, x[ 0] );
337 1.1 mrg R( e, a, b, c, d, F1, K1, x[ 1] );
338 1.1 mrg R( d, e, a, b, c, F1, K1, x[ 2] );
339 1.1 mrg R( c, d, e, a, b, F1, K1, x[ 3] );
340 1.1 mrg R( b, c, d, e, a, F1, K1, x[ 4] );
341 1.1 mrg R( a, b, c, d, e, F1, K1, x[ 5] );
342 1.1 mrg R( e, a, b, c, d, F1, K1, x[ 6] );
343 1.1 mrg R( d, e, a, b, c, F1, K1, x[ 7] );
344 1.1 mrg R( c, d, e, a, b, F1, K1, x[ 8] );
345 1.1 mrg R( b, c, d, e, a, F1, K1, x[ 9] );
346 1.1 mrg R( a, b, c, d, e, F1, K1, x[10] );
347 1.1 mrg R( e, a, b, c, d, F1, K1, x[11] );
348 1.1 mrg R( d, e, a, b, c, F1, K1, x[12] );
349 1.1 mrg R( c, d, e, a, b, F1, K1, x[13] );
350 1.1 mrg R( b, c, d, e, a, F1, K1, x[14] );
351 1.1 mrg R( a, b, c, d, e, F1, K1, x[15] );
352 1.1 mrg R( e, a, b, c, d, F1, K1, M(16) );
353 1.1 mrg R( d, e, a, b, c, F1, K1, M(17) );
354 1.1 mrg R( c, d, e, a, b, F1, K1, M(18) );
355 1.1 mrg R( b, c, d, e, a, F1, K1, M(19) );
356 1.1 mrg R( a, b, c, d, e, F2, K2, M(20) );
357 1.1 mrg R( e, a, b, c, d, F2, K2, M(21) );
358 1.1 mrg R( d, e, a, b, c, F2, K2, M(22) );
359 1.1 mrg R( c, d, e, a, b, F2, K2, M(23) );
360 1.1 mrg R( b, c, d, e, a, F2, K2, M(24) );
361 1.1 mrg R( a, b, c, d, e, F2, K2, M(25) );
362 1.1 mrg R( e, a, b, c, d, F2, K2, M(26) );
363 1.1 mrg R( d, e, a, b, c, F2, K2, M(27) );
364 1.1 mrg R( c, d, e, a, b, F2, K2, M(28) );
365 1.1 mrg R( b, c, d, e, a, F2, K2, M(29) );
366 1.1 mrg R( a, b, c, d, e, F2, K2, M(30) );
367 1.1 mrg R( e, a, b, c, d, F2, K2, M(31) );
368 1.1 mrg R( d, e, a, b, c, F2, K2, M(32) );
369 1.1 mrg R( c, d, e, a, b, F2, K2, M(33) );
370 1.1 mrg R( b, c, d, e, a, F2, K2, M(34) );
371 1.1 mrg R( a, b, c, d, e, F2, K2, M(35) );
372 1.1 mrg R( e, a, b, c, d, F2, K2, M(36) );
373 1.1 mrg R( d, e, a, b, c, F2, K2, M(37) );
374 1.1 mrg R( c, d, e, a, b, F2, K2, M(38) );
375 1.1 mrg R( b, c, d, e, a, F2, K2, M(39) );
376 1.1 mrg R( a, b, c, d, e, F3, K3, M(40) );
377 1.1 mrg R( e, a, b, c, d, F3, K3, M(41) );
378 1.1 mrg R( d, e, a, b, c, F3, K3, M(42) );
379 1.1 mrg R( c, d, e, a, b, F3, K3, M(43) );
380 1.1 mrg R( b, c, d, e, a, F3, K3, M(44) );
381 1.1 mrg R( a, b, c, d, e, F3, K3, M(45) );
382 1.1 mrg R( e, a, b, c, d, F3, K3, M(46) );
383 1.1 mrg R( d, e, a, b, c, F3, K3, M(47) );
384 1.1 mrg R( c, d, e, a, b, F3, K3, M(48) );
385 1.1 mrg R( b, c, d, e, a, F3, K3, M(49) );
386 1.1 mrg R( a, b, c, d, e, F3, K3, M(50) );
387 1.1 mrg R( e, a, b, c, d, F3, K3, M(51) );
388 1.1 mrg R( d, e, a, b, c, F3, K3, M(52) );
389 1.1 mrg R( c, d, e, a, b, F3, K3, M(53) );
390 1.1 mrg R( b, c, d, e, a, F3, K3, M(54) );
391 1.1 mrg R( a, b, c, d, e, F3, K3, M(55) );
392 1.1 mrg R( e, a, b, c, d, F3, K3, M(56) );
393 1.1 mrg R( d, e, a, b, c, F3, K3, M(57) );
394 1.1 mrg R( c, d, e, a, b, F3, K3, M(58) );
395 1.1 mrg R( b, c, d, e, a, F3, K3, M(59) );
396 1.1 mrg R( a, b, c, d, e, F4, K4, M(60) );
397 1.1 mrg R( e, a, b, c, d, F4, K4, M(61) );
398 1.1 mrg R( d, e, a, b, c, F4, K4, M(62) );
399 1.1 mrg R( c, d, e, a, b, F4, K4, M(63) );
400 1.1 mrg R( b, c, d, e, a, F4, K4, M(64) );
401 1.1 mrg R( a, b, c, d, e, F4, K4, M(65) );
402 1.1 mrg R( e, a, b, c, d, F4, K4, M(66) );
403 1.1 mrg R( d, e, a, b, c, F4, K4, M(67) );
404 1.1 mrg R( c, d, e, a, b, F4, K4, M(68) );
405 1.1 mrg R( b, c, d, e, a, F4, K4, M(69) );
406 1.1 mrg R( a, b, c, d, e, F4, K4, M(70) );
407 1.1 mrg R( e, a, b, c, d, F4, K4, M(71) );
408 1.1 mrg R( d, e, a, b, c, F4, K4, M(72) );
409 1.1 mrg R( c, d, e, a, b, F4, K4, M(73) );
410 1.1 mrg R( b, c, d, e, a, F4, K4, M(74) );
411 1.1 mrg R( a, b, c, d, e, F4, K4, M(75) );
412 1.1 mrg R( e, a, b, c, d, F4, K4, M(76) );
413 1.1 mrg R( d, e, a, b, c, F4, K4, M(77) );
414 1.1 mrg R( c, d, e, a, b, F4, K4, M(78) );
415 1.1 mrg R( b, c, d, e, a, F4, K4, M(79) );
416 1.1 mrg
417 1.1 mrg a = ctx->A += a;
418 1.1 mrg b = ctx->B += b;
419 1.1 mrg c = ctx->C += c;
420 1.1 mrg d = ctx->D += d;
421 1.1 mrg e = ctx->E += e;
422 1.1 mrg }
423 1.1 mrg }
424 1.9 mrg
425 1.9 mrg #if defined(HAVE_X86_SHA1_HW_SUPPORT)
426 1.9 mrg /* HW specific version of sha1_process_bytes. */
427 1.9 mrg
428 1.9 mrg static void sha1_hw_process_block (const void *, size_t, struct sha1_ctx *);
429 1.9 mrg
430 1.9 mrg static void
431 1.9 mrg sha1_hw_process_bytes (const void *buffer, size_t len, struct sha1_ctx *ctx)
432 1.9 mrg {
433 1.9 mrg /* When we already have some bits in our internal buffer concatenate
434 1.9 mrg both inputs first. */
435 1.9 mrg if (ctx->buflen != 0)
436 1.9 mrg {
437 1.9 mrg size_t left_over = ctx->buflen;
438 1.9 mrg size_t add = 128 - left_over > len ? len : 128 - left_over;
439 1.9 mrg
440 1.9 mrg memcpy (&((char *) ctx->buffer)[left_over], buffer, add);
441 1.9 mrg ctx->buflen += add;
442 1.9 mrg
443 1.9 mrg if (ctx->buflen > 64)
444 1.9 mrg {
445 1.9 mrg sha1_hw_process_block (ctx->buffer, ctx->buflen & ~63, ctx);
446 1.9 mrg
447 1.9 mrg ctx->buflen &= 63;
448 1.9 mrg /* The regions in the following copy operation cannot overlap. */
449 1.9 mrg memcpy (ctx->buffer,
450 1.9 mrg &((char *) ctx->buffer)[(left_over + add) & ~63],
451 1.9 mrg ctx->buflen);
452 1.9 mrg }
453 1.9 mrg
454 1.9 mrg buffer = (const char *) buffer + add;
455 1.9 mrg len -= add;
456 1.9 mrg }
457 1.9 mrg
458 1.9 mrg /* Process available complete blocks. */
459 1.9 mrg if (len >= 64)
460 1.9 mrg {
461 1.9 mrg #if !_STRING_ARCH_unaligned
462 1.9 mrg # define alignof(type) offsetof (struct { char c; type x; }, x)
463 1.9 mrg # define UNALIGNED_P(p) (((size_t) p) % alignof (sha1_uint32) != 0)
464 1.9 mrg if (UNALIGNED_P (buffer))
465 1.9 mrg while (len > 64)
466 1.9 mrg {
467 1.9 mrg sha1_hw_process_block (memcpy (ctx->buffer, buffer, 64), 64, ctx);
468 1.9 mrg buffer = (const char *) buffer + 64;
469 1.9 mrg len -= 64;
470 1.9 mrg }
471 1.9 mrg else
472 1.9 mrg #endif
473 1.9 mrg {
474 1.9 mrg sha1_hw_process_block (buffer, len & ~63, ctx);
475 1.9 mrg buffer = (const char *) buffer + (len & ~63);
476 1.9 mrg len &= 63;
477 1.9 mrg }
478 1.9 mrg }
479 1.9 mrg
480 1.9 mrg /* Move remaining bytes in internal buffer. */
481 1.9 mrg if (len > 0)
482 1.9 mrg {
483 1.9 mrg size_t left_over = ctx->buflen;
484 1.9 mrg
485 1.9 mrg memcpy (&((char *) ctx->buffer)[left_over], buffer, len);
486 1.9 mrg left_over += len;
487 1.9 mrg if (left_over >= 64)
488 1.9 mrg {
489 1.9 mrg sha1_hw_process_block (ctx->buffer, 64, ctx);
490 1.9 mrg left_over -= 64;
491 1.9 mrg memmove (ctx->buffer, &ctx->buffer[16], left_over);
492 1.9 mrg }
493 1.9 mrg ctx->buflen = left_over;
494 1.9 mrg }
495 1.9 mrg }
496 1.9 mrg
497 1.9 mrg /* Process LEN bytes of BUFFER, accumulating context into CTX.
498 1.9 mrg Using CPU specific intrinsics. */
499 1.9 mrg
500 1.9 mrg #ifdef HAVE_X86_SHA1_HW_SUPPORT
501 1.9 mrg __attribute__((__target__ ("sse4.1,sha")))
502 1.9 mrg #endif
503 1.9 mrg static void
504 1.9 mrg sha1_hw_process_block (const void *buffer, size_t len, struct sha1_ctx *ctx)
505 1.9 mrg {
506 1.9 mrg #ifdef HAVE_X86_SHA1_HW_SUPPORT
507 1.9 mrg /* Implemented from
508 1.9 mrg https://www.intel.com/content/www/us/en/developer/articles/technical/intel-sha-extensions.html */
509 1.9 mrg const __m128i *words = (const __m128i *) buffer;
510 1.9 mrg const __m128i *endp = (const __m128i *) ((const char *) buffer + len);
511 1.9 mrg __m128i abcd, abcd_save, e0, e0_save, e1, msg0, msg1, msg2, msg3;
512 1.9 mrg const __m128i shuf_mask
513 1.9 mrg = _mm_set_epi64x (0x0001020304050607ULL, 0x08090a0b0c0d0e0fULL);
514 1.9 mrg char check[((offsetof (struct sha1_ctx, B)
515 1.9 mrg == offsetof (struct sha1_ctx, A) + sizeof (ctx->A))
516 1.9 mrg && (offsetof (struct sha1_ctx, C)
517 1.9 mrg == offsetof (struct sha1_ctx, A) + 2 * sizeof (ctx->A))
518 1.9 mrg && (offsetof (struct sha1_ctx, D)
519 1.9 mrg == offsetof (struct sha1_ctx, A) + 3 * sizeof (ctx->A)))
520 1.9 mrg ? 1 : -1];
521 1.9 mrg
522 1.9 mrg /* First increment the byte count. RFC 1321 specifies the possible
523 1.9 mrg length of the file up to 2^64 bits. Here we only compute the
524 1.9 mrg number of bytes. Do a double word increment. */
525 1.9 mrg ctx->total[0] += len;
526 1.9 mrg ctx->total[1] += ((len >> 31) >> 1) + (ctx->total[0] < len);
527 1.9 mrg
528 1.9 mrg (void) &check[0];
529 1.9 mrg abcd = _mm_loadu_si128 ((const __m128i *) &ctx->A);
530 1.9 mrg e0 = _mm_set_epi32 (ctx->E, 0, 0, 0);
531 1.9 mrg abcd = _mm_shuffle_epi32 (abcd, 0x1b); /* 0, 1, 2, 3 */
532 1.9 mrg
533 1.9 mrg while (words < endp)
534 1.9 mrg {
535 1.9 mrg abcd_save = abcd;
536 1.9 mrg e0_save = e0;
537 1.9 mrg
538 1.9 mrg /* 0..3 */
539 1.9 mrg msg0 = _mm_loadu_si128 (words);
540 1.9 mrg msg0 = _mm_shuffle_epi8 (msg0, shuf_mask);
541 1.9 mrg e0 = _mm_add_epi32 (e0, msg0);
542 1.9 mrg e1 = abcd;
543 1.9 mrg abcd = _mm_sha1rnds4_epu32 (abcd, e0, 0);
544 1.9 mrg
545 1.9 mrg /* 4..7 */
546 1.9 mrg msg1 = _mm_loadu_si128 (words + 1);
547 1.9 mrg msg1 = _mm_shuffle_epi8 (msg1, shuf_mask);
548 1.9 mrg e1 = _mm_sha1nexte_epu32 (e1, msg1);
549 1.9 mrg e0 = abcd;
550 1.9 mrg abcd = _mm_sha1rnds4_epu32 (abcd, e1, 0);
551 1.9 mrg msg0 = _mm_sha1msg1_epu32 (msg0, msg1);
552 1.9 mrg
553 1.9 mrg /* 8..11 */
554 1.9 mrg msg2 = _mm_loadu_si128 (words + 2);
555 1.9 mrg msg2 = _mm_shuffle_epi8 (msg2, shuf_mask);
556 1.9 mrg e0 = _mm_sha1nexte_epu32 (e0, msg2);
557 1.9 mrg e1 = abcd;
558 1.9 mrg abcd = _mm_sha1rnds4_epu32 (abcd, e0, 0);
559 1.9 mrg msg1 = _mm_sha1msg1_epu32 (msg1, msg2);
560 1.9 mrg msg0 = _mm_xor_si128 (msg0, msg2);
561 1.9 mrg
562 1.9 mrg /* 12..15 */
563 1.9 mrg msg3 = _mm_loadu_si128 (words + 3);
564 1.9 mrg msg3 = _mm_shuffle_epi8 (msg3, shuf_mask);
565 1.9 mrg e1 = _mm_sha1nexte_epu32 (e1, msg3);
566 1.9 mrg e0 = abcd;
567 1.9 mrg msg0 = _mm_sha1msg2_epu32 (msg0, msg3);
568 1.9 mrg abcd = _mm_sha1rnds4_epu32 (abcd, e1, 0);
569 1.9 mrg msg2 = _mm_sha1msg1_epu32 (msg2, msg3);
570 1.9 mrg msg1 = _mm_xor_si128 (msg1, msg3);
571 1.9 mrg
572 1.9 mrg /* 16..19 */
573 1.9 mrg e0 = _mm_sha1nexte_epu32 (e0, msg0);
574 1.9 mrg e1 = abcd;
575 1.9 mrg msg1 = _mm_sha1msg2_epu32 (msg1, msg0);
576 1.9 mrg abcd = _mm_sha1rnds4_epu32 (abcd, e0, 0);
577 1.9 mrg msg3 = _mm_sha1msg1_epu32 (msg3, msg0);
578 1.9 mrg msg2 = _mm_xor_si128 (msg2, msg0);
579 1.9 mrg
580 1.9 mrg /* 20..23 */
581 1.9 mrg e1 = _mm_sha1nexte_epu32 (e1, msg1);
582 1.9 mrg e0 = abcd;
583 1.9 mrg msg2 = _mm_sha1msg2_epu32 (msg2, msg1);
584 1.9 mrg abcd = _mm_sha1rnds4_epu32 (abcd, e1, 1);
585 1.9 mrg msg0 = _mm_sha1msg1_epu32 (msg0, msg1);
586 1.9 mrg msg3 = _mm_xor_si128 (msg3, msg1);
587 1.9 mrg
588 1.9 mrg /* 24..27 */
589 1.9 mrg e0 = _mm_sha1nexte_epu32 (e0, msg2);
590 1.9 mrg e1 = abcd;
591 1.9 mrg msg3 = _mm_sha1msg2_epu32 (msg3, msg2);
592 1.9 mrg abcd = _mm_sha1rnds4_epu32 (abcd, e0, 1);
593 1.9 mrg msg1 = _mm_sha1msg1_epu32 (msg1, msg2);
594 1.9 mrg msg0 = _mm_xor_si128 (msg0, msg2);
595 1.9 mrg
596 1.9 mrg /* 28..31 */
597 1.9 mrg e1 = _mm_sha1nexte_epu32 (e1, msg3);
598 1.9 mrg e0 = abcd;
599 1.9 mrg msg0 = _mm_sha1msg2_epu32 (msg0, msg3);
600 1.9 mrg abcd = _mm_sha1rnds4_epu32 (abcd, e1, 1);
601 1.9 mrg msg2 = _mm_sha1msg1_epu32 (msg2, msg3);
602 1.9 mrg msg1 = _mm_xor_si128 (msg1, msg3);
603 1.9 mrg
604 1.9 mrg /* 32..35 */
605 1.9 mrg e0 = _mm_sha1nexte_epu32 (e0, msg0);
606 1.9 mrg e1 = abcd;
607 1.9 mrg msg1 = _mm_sha1msg2_epu32 (msg1, msg0);
608 1.9 mrg abcd = _mm_sha1rnds4_epu32 (abcd, e0, 1);
609 1.9 mrg msg3 = _mm_sha1msg1_epu32 (msg3, msg0);
610 1.9 mrg msg2 = _mm_xor_si128 (msg2, msg0);
611 1.9 mrg
612 1.9 mrg /* 36..39 */
613 1.9 mrg e1 = _mm_sha1nexte_epu32 (e1, msg1);
614 1.9 mrg e0 = abcd;
615 1.9 mrg msg2 = _mm_sha1msg2_epu32 (msg2, msg1);
616 1.9 mrg abcd = _mm_sha1rnds4_epu32 (abcd, e1, 1);
617 1.9 mrg msg0 = _mm_sha1msg1_epu32 (msg0, msg1);
618 1.9 mrg msg3 = _mm_xor_si128 (msg3, msg1);
619 1.9 mrg
620 1.9 mrg /* 40..43 */
621 1.9 mrg e0 = _mm_sha1nexte_epu32 (e0, msg2);
622 1.9 mrg e1 = abcd;
623 1.9 mrg msg3 = _mm_sha1msg2_epu32 (msg3, msg2);
624 1.9 mrg abcd = _mm_sha1rnds4_epu32 (abcd, e0, 2);
625 1.9 mrg msg1 = _mm_sha1msg1_epu32 (msg1, msg2);
626 1.9 mrg msg0 = _mm_xor_si128 (msg0, msg2);
627 1.9 mrg
628 1.9 mrg /* 44..47 */
629 1.9 mrg e1 = _mm_sha1nexte_epu32 (e1, msg3);
630 1.9 mrg e0 = abcd;
631 1.9 mrg msg0 = _mm_sha1msg2_epu32 (msg0, msg3);
632 1.9 mrg abcd = _mm_sha1rnds4_epu32 (abcd, e1, 2);
633 1.9 mrg msg2 = _mm_sha1msg1_epu32 (msg2, msg3);
634 1.9 mrg msg1 = _mm_xor_si128 (msg1, msg3);
635 1.9 mrg
636 1.9 mrg /* 48..51 */
637 1.9 mrg e0 = _mm_sha1nexte_epu32 (e0, msg0);
638 1.9 mrg e1 = abcd;
639 1.9 mrg msg1 = _mm_sha1msg2_epu32 (msg1, msg0);
640 1.9 mrg abcd = _mm_sha1rnds4_epu32 (abcd, e0, 2);
641 1.9 mrg msg3 = _mm_sha1msg1_epu32 (msg3, msg0);
642 1.9 mrg msg2 = _mm_xor_si128 (msg2, msg0);
643 1.9 mrg
644 1.9 mrg /* 52..55 */
645 1.9 mrg e1 = _mm_sha1nexte_epu32 (e1, msg1);
646 1.9 mrg e0 = abcd;
647 1.9 mrg msg2 = _mm_sha1msg2_epu32 (msg2, msg1);
648 1.9 mrg abcd = _mm_sha1rnds4_epu32 (abcd, e1, 2);
649 1.9 mrg msg0 = _mm_sha1msg1_epu32 (msg0, msg1);
650 1.9 mrg msg3 = _mm_xor_si128 (msg3, msg1);
651 1.9 mrg
652 1.9 mrg /* 56..59 */
653 1.9 mrg e0 = _mm_sha1nexte_epu32 (e0, msg2);
654 1.9 mrg e1 = abcd;
655 1.9 mrg msg3 = _mm_sha1msg2_epu32 (msg3, msg2);
656 1.9 mrg abcd = _mm_sha1rnds4_epu32 (abcd, e0, 2);
657 1.9 mrg msg1 = _mm_sha1msg1_epu32 (msg1, msg2);
658 1.9 mrg msg0 = _mm_xor_si128 (msg0, msg2);
659 1.9 mrg
660 1.9 mrg /* 60..63 */
661 1.9 mrg e1 = _mm_sha1nexte_epu32 (e1, msg3);
662 1.9 mrg e0 = abcd;
663 1.9 mrg msg0 = _mm_sha1msg2_epu32 (msg0, msg3);
664 1.9 mrg abcd = _mm_sha1rnds4_epu32 (abcd, e1, 3);
665 1.9 mrg msg2 = _mm_sha1msg1_epu32 (msg2, msg3);
666 1.9 mrg msg1 = _mm_xor_si128 (msg1, msg3);
667 1.9 mrg
668 1.9 mrg /* 64..67 */
669 1.9 mrg e0 = _mm_sha1nexte_epu32 (e0, msg0);
670 1.9 mrg e1 = abcd;
671 1.9 mrg msg1 = _mm_sha1msg2_epu32 (msg1, msg0);
672 1.9 mrg abcd = _mm_sha1rnds4_epu32 (abcd, e0, 3);
673 1.9 mrg msg3 = _mm_sha1msg1_epu32 (msg3, msg0);
674 1.9 mrg msg2 = _mm_xor_si128 (msg2, msg0);
675 1.9 mrg
676 1.9 mrg /* 68..71 */
677 1.9 mrg e1 = _mm_sha1nexte_epu32 (e1, msg1);
678 1.9 mrg e0 = abcd;
679 1.9 mrg msg2 = _mm_sha1msg2_epu32 (msg2, msg1);
680 1.9 mrg abcd = _mm_sha1rnds4_epu32 (abcd, e1, 3);
681 1.9 mrg msg3 = _mm_xor_si128 (msg3, msg1);
682 1.9 mrg
683 1.9 mrg /* 72..75 */
684 1.9 mrg e0 = _mm_sha1nexte_epu32 (e0, msg2);
685 1.9 mrg e1 = abcd;
686 1.9 mrg msg3 = _mm_sha1msg2_epu32 (msg3, msg2);
687 1.9 mrg abcd = _mm_sha1rnds4_epu32 (abcd, e0, 3);
688 1.9 mrg
689 1.9 mrg /* 76..79 */
690 1.9 mrg e1 = _mm_sha1nexte_epu32 (e1, msg3);
691 1.9 mrg e0 = abcd;
692 1.9 mrg abcd = _mm_sha1rnds4_epu32 (abcd, e1, 3);
693 1.9 mrg
694 1.9 mrg /* Finalize. */
695 1.9 mrg e0 = _mm_sha1nexte_epu32 (e0, e0_save);
696 1.9 mrg abcd = _mm_add_epi32 (abcd, abcd_save);
697 1.9 mrg
698 1.9 mrg words = words + 4;
699 1.9 mrg }
700 1.9 mrg
701 1.9 mrg abcd = _mm_shuffle_epi32 (abcd, 0x1b); /* 0, 1, 2, 3 */
702 1.9 mrg _mm_storeu_si128 ((__m128i *) &ctx->A, abcd);
703 1.9 mrg ctx->E = _mm_extract_epi32 (e0, 3);
704 1.9 mrg #endif
705 1.9 mrg }
706 1.9 mrg #endif
707 1.9 mrg
708 1.9 mrg /* Return sha1_process_bytes or some hardware optimized version thereof
709 1.9 mrg depending on current CPU. */
710 1.9 mrg
711 1.9 mrg sha1_process_bytes_fn
712 1.9 mrg sha1_choose_process_bytes (void)
713 1.9 mrg {
714 1.9 mrg #ifdef HAVE_X86_SHA1_HW_SUPPORT
715 1.9 mrg unsigned int eax, ebx, ecx, edx;
716 1.9 mrg if (__get_cpuid_count (7, 0, &eax, &ebx, &ecx, &edx)
717 1.9 mrg && (ebx & bit_SHA) != 0
718 1.9 mrg && __get_cpuid (1, &eax, &ebx, &ecx, &edx)
719 1.9 mrg && (ecx & bit_SSE4_1) != 0)
720 1.9 mrg return sha1_hw_process_bytes;
721 1.9 mrg #endif
722 1.9 mrg return sha1_process_bytes;
723 1.9 mrg }
724