crc32.c revision 1.7 1 /* $NetBSD: crc32.c,v 1.7 2024/09/22 19:12:27 christos Exp $ */
2
3 /* crc32.c -- compute the CRC-32 of a data stream
4 * Copyright (C) 1995-2022 Mark Adler
5 * For conditions of distribution and use, see copyright notice in zlib.h
6 *
7 * This interleaved implementation of a CRC makes use of pipelined multiple
8 * arithmetic-logic units, commonly found in modern CPU cores. It is due to
9 * Kadatch and Jenkins (2010). See doc/crc-doc.1.0.pdf in this distribution.
10 */
11
12 /* @(#) Id */
13
14 /*
15 Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore
16 protection on the static variables used to control the first-use generation
17 of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should
18 first call get_crc_table() to initialize the tables before allowing more than
19 one thread to use crc32().
20
21 MAKECRCH can be #defined to write out crc32.h. A main() routine is also
22 produced, so that this one source file can be compiled to an executable.
23 */
24
25 #ifdef MAKECRCH
26 # include <stdio.h>
27 # ifndef DYNAMIC_CRC_TABLE
28 # define DYNAMIC_CRC_TABLE
29 # endif /* !DYNAMIC_CRC_TABLE */
30 #endif /* MAKECRCH */
31
32 #include "zutil.h" /* for Z_U4, Z_U8, z_crc_t, and FAR definitions */
33
34 /*
35 A CRC of a message is computed on N braids of words in the message, where
36 each word consists of W bytes (4 or 8). If N is 3, for example, then three
37 running sparse CRCs are calculated respectively on each braid, at these
38 indices in the array of words: 0, 3, 6, ..., 1, 4, 7, ..., and 2, 5, 8, ...
39 This is done starting at a word boundary, and continues until as many blocks
40 of N * W bytes as are available have been processed. The results are combined
41 into a single CRC at the end. For this code, N must be in the range 1..6 and
42 W must be 4 or 8. The upper limit on N can be increased if desired by adding
43 more #if blocks, extending the patterns apparent in the code. In addition,
44 crc32.h would need to be regenerated, if the maximum N value is increased.
45
46 N and W are chosen empirically by benchmarking the execution time on a given
47 processor. The choices for N and W below were based on testing on Intel Kaby
48 Lake i7, AMD Ryzen 7, ARM Cortex-A57, Sparc64-VII, PowerPC POWER9, and MIPS64
49 Octeon II processors. The Intel, AMD, and ARM processors were all fastest
50 with N=5, W=8. The Sparc, PowerPC, and MIPS64 were all fastest at N=5, W=4.
51 They were all tested with either gcc or clang, all using the -O3 optimization
52 level. Your mileage may vary.
53 */
54
55 /* Define N */
56 #ifdef Z_TESTN
57 # define N Z_TESTN
58 #else
59 # define N 5
60 #endif
61 #if N < 1 || N > 6
62 # error N must be in 1..6
63 #endif
64
65 /*
66 z_crc_t must be at least 32 bits. z_word_t must be at least as long as
67 z_crc_t. It is assumed here that z_word_t is either 32 bits or 64 bits, and
68 that bytes are eight bits.
69 */
70
71 /*
72 Define W and the associated z_word_t type. If W is not defined, then a
73 braided calculation is not used, and the associated tables and code are not
74 compiled.
75 */
76 #ifdef Z_TESTW
77 # if Z_TESTW-1 != -1
78 # define W Z_TESTW
79 # endif
80 #else
81 # ifdef MAKECRCH
82 # define W 8 /* required for MAKECRCH */
83 # else
84 # if defined(__x86_64__) || defined(__aarch64__)
85 # define W 8
86 # else
87 # define W 4
88 # endif
89 # endif
90 #endif
91 #ifdef W
92 # if W == 8 && defined(Z_U8)
93 typedef Z_U8 z_word_t;
94 # elif defined(Z_U4)
95 # undef W
96 # define W 4
97 typedef Z_U4 z_word_t;
98 # else
99 # undef W
100 # endif
101 #endif
102
103 /* If available, use the ARM processor CRC32 instruction. */
104 #if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) && W == 8
105 # define ARMCRC32
106 #endif
107
108 #if defined(W) && (!defined(ARMCRC32) || defined(DYNAMIC_CRC_TABLE))
109 /*
110 Swap the bytes in a z_word_t to convert between little and big endian. Any
111 self-respecting compiler will optimize this to a single machine byte-swap
112 instruction, if one is available. This assumes that word_t is either 32 bits
113 or 64 bits.
114 */
115 local z_word_t byte_swap(z_word_t word) {
116 # if W == 8
117 return
118 (word & 0xff00000000000000) >> 56 |
119 (word & 0xff000000000000) >> 40 |
120 (word & 0xff0000000000) >> 24 |
121 (word & 0xff00000000) >> 8 |
122 (word & 0xff000000) << 8 |
123 (word & 0xff0000) << 24 |
124 (word & 0xff00) << 40 |
125 (word & 0xff) << 56;
126 # else /* W == 4 */
127 return
128 (word & 0xff000000) >> 24 |
129 (word & 0xff0000) >> 8 |
130 (word & 0xff00) << 8 |
131 (word & 0xff) << 24;
132 # endif
133 }
134 #endif
135
136 #ifdef DYNAMIC_CRC_TABLE
137 /* =========================================================================
138 * Table of powers of x for combining CRC-32s, filled in by make_crc_table()
139 * below.
140 */
141 local z_crc_t FAR x2n_table[32];
142 #else
143 /* =========================================================================
144 * Tables for byte-wise and braided CRC-32 calculations, and a table of powers
145 * of x for combining CRC-32s, all made by make_crc_table().
146 */
147 # include "crc32.h"
148 #endif
149
150 /* CRC polynomial. */
151 #define POLY 0xedb88320 /* p(x) reflected, with x^32 implied */
152
153 /*
154 Return a(x) multiplied by b(x) modulo p(x), where p(x) is the CRC polynomial,
155 reflected. For speed, this requires that a not be zero.
156 */
157 local z_crc_t multmodp(z_crc_t a, z_crc_t b) {
158 z_crc_t m, p;
159
160 m = (z_crc_t)1 << 31;
161 p = 0;
162 for (;;) {
163 if (a & m) {
164 p ^= b;
165 if ((a & (m - 1)) == 0)
166 break;
167 }
168 m >>= 1;
169 b = b & 1 ? (b >> 1) ^ POLY : b >> 1;
170 }
171 return p;
172 }
173
174 /*
175 Return x^(n * 2^k) modulo p(x). Requires that x2n_table[] has been
176 initialized.
177 */
178 local z_crc_t x2nmodp(z_off64_t n, unsigned k) {
179 z_crc_t p;
180
181 p = (z_crc_t)1 << 31; /* x^0 == 1 */
182 while (n) {
183 if (n & 1)
184 p = multmodp(x2n_table[k & 31], p);
185 n >>= 1;
186 k++;
187 }
188 return p;
189 }
190
191 #ifdef DYNAMIC_CRC_TABLE
192 /* =========================================================================
193 * Build the tables for byte-wise and braided CRC-32 calculations, and a table
194 * of powers of x for combining CRC-32s.
195 */
196 local z_crc_t FAR crc_table[256];
197 #ifdef W
198 local z_word_t FAR crc_big_table[256];
199 local z_crc_t FAR crc_braid_table[W][256];
200 local z_word_t FAR crc_braid_big_table[W][256];
201 local void braid(z_crc_t [][256], z_word_t [][256], int, int);
202 #endif
203 #ifdef MAKECRCH
204 local void write_table(FILE *, const z_crc_t FAR *, int);
205 local void write_table32hi(FILE *, const z_word_t FAR *, int);
206 local void write_table64(FILE *, const z_word_t FAR *, int);
207 #endif /* MAKECRCH */
208
209 /*
210 Define a once() function depending on the availability of atomics. If this is
211 compiled with DYNAMIC_CRC_TABLE defined, and if CRCs will be computed in
212 multiple threads, and if atomics are not available, then get_crc_table() must
213 be called to initialize the tables and must return before any threads are
214 allowed to compute or combine CRCs.
215 */
216
217 /* Definition of once functionality. */
218 typedef struct once_s once_t;
219
220 /* Check for the availability of atomics. */
221 #if defined(__STDC__) && __STDC_VERSION__ >= 201112L && \
222 !defined(__STDC_NO_ATOMICS__)
223
224 #include <stdatomic.h>
225
226 /* Structure for once(), which must be initialized with ONCE_INIT. */
227 struct once_s {
228 atomic_flag begun;
229 atomic_int done;
230 };
231 #define ONCE_INIT {ATOMIC_FLAG_INIT, 0}
232
233 /*
234 Run the provided init() function exactly once, even if multiple threads
235 invoke once() at the same time. The state must be a once_t initialized with
236 ONCE_INIT.
237 */
238 local void once(once_t *state, void (*init)(void)) {
239 if (!atomic_load(&state->done)) {
240 if (atomic_flag_test_and_set(&state->begun))
241 while (!atomic_load(&state->done))
242 ;
243 else {
244 init();
245 atomic_store(&state->done, 1);
246 }
247 }
248 }
249
250 #else /* no atomics */
251
252 /* Structure for once(), which must be initialized with ONCE_INIT. */
253 struct once_s {
254 volatile int begun;
255 volatile int done;
256 };
257 #define ONCE_INIT {0, 0}
258
259 /* Test and set. Alas, not atomic, but tries to minimize the period of
260 vulnerability. */
261 local int test_and_set(int volatile *flag) {
262 int was;
263
264 was = *flag;
265 *flag = 1;
266 return was;
267 }
268
269 /* Run the provided init() function once. This is not thread-safe. */
270 local void once(once_t *state, void (*init)(void)) {
271 if (!state->done) {
272 if (test_and_set(&state->begun))
273 while (!state->done)
274 ;
275 else {
276 init();
277 state->done = 1;
278 }
279 }
280 }
281
282 #endif
283
284 /* State for once(). */
285 local once_t made = ONCE_INIT;
286
287 /*
288 Generate tables for a byte-wise 32-bit CRC calculation on the polynomial:
289 x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1.
290
291 Polynomials over GF(2) are represented in binary, one bit per coefficient,
292 with the lowest powers in the most significant bit. Then adding polynomials
293 is just exclusive-or, and multiplying a polynomial by x is a right shift by
294 one. If we call the above polynomial p, and represent a byte as the
295 polynomial q, also with the lowest power in the most significant bit (so the
296 byte 0xb1 is the polynomial x^7+x^3+x^2+1), then the CRC is (q*x^32) mod p,
297 where a mod b means the remainder after dividing a by b.
298
299 This calculation is done using the shift-register method of multiplying and
300 taking the remainder. The register is initialized to zero, and for each
301 incoming bit, x^32 is added mod p to the register if the bit is a one (where
302 x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by x
303 (which is shifting right by one and adding x^32 mod p if the bit shifted out
304 is a one). We start with the highest power (least significant bit) of q and
305 repeat for all eight bits of q.
306
307 The table is simply the CRC of all possible eight bit values. This is all the
308 information needed to generate CRCs on data a byte at a time for all
309 combinations of CRC register values and incoming bytes.
310 */
311
312 local void make_crc_table(void) {
313 unsigned i, j, n;
314 z_crc_t p;
315
316 /* initialize the CRC of bytes tables */
317 for (i = 0; i < 256; i++) {
318 p = i;
319 for (j = 0; j < 8; j++)
320 p = p & 1 ? (p >> 1) ^ POLY : p >> 1;
321 crc_table[i] = p;
322 #ifdef W
323 crc_big_table[i] = byte_swap(p);
324 #endif
325 }
326
327 /* initialize the x^2^n mod p(x) table */
328 p = (z_crc_t)1 << 30; /* x^1 */
329 x2n_table[0] = p;
330 for (n = 1; n < 32; n++)
331 x2n_table[n] = p = multmodp(p, p);
332
333 #ifdef W
334 /* initialize the braiding tables -- needs x2n_table[] */
335 braid(crc_braid_table, crc_braid_big_table, N, W);
336 #endif
337
338 #ifdef MAKECRCH
339 {
340 /*
341 The crc32.h header file contains tables for both 32-bit and 64-bit
342 z_word_t's, and so requires a 64-bit type be available. In that case,
343 z_word_t must be defined to be 64-bits. This code then also generates
344 and writes out the tables for the case that z_word_t is 32 bits.
345 */
346 #if !defined(W) || W != 8
347 # error Need a 64-bit integer type in order to generate crc32.h.
348 #endif
349 FILE *out;
350 int k, n;
351 z_crc_t ltl[8][256];
352 z_word_t big[8][256];
353
354 out = fopen("crc32.h", "w");
355 if (out == NULL) return;
356
357 /* write out little-endian CRC table to crc32.h */
358 fprintf(out,
359 "/* crc32.h -- tables for rapid CRC calculation\n"
360 " * Generated automatically by crc32.c\n */\n"
361 "\n"
362 "local const z_crc_t FAR crc_table[] = {\n"
363 " ");
364 write_table(out, crc_table, 256);
365 fprintf(out,
366 "};\n");
367
368 /* write out big-endian CRC table for 64-bit z_word_t to crc32.h */
369 fprintf(out,
370 "\n"
371 "#ifdef W\n"
372 "\n"
373 "#if W == 8\n"
374 "\n"
375 "local const z_word_t FAR crc_big_table[] = {\n"
376 " ");
377 write_table64(out, crc_big_table, 256);
378 fprintf(out,
379 "};\n");
380
381 /* write out big-endian CRC table for 32-bit z_word_t to crc32.h */
382 fprintf(out,
383 "\n"
384 "#else /* W == 4 */\n"
385 "\n"
386 "local const z_word_t FAR crc_big_table[] = {\n"
387 " ");
388 write_table32hi(out, crc_big_table, 256);
389 fprintf(out,
390 "};\n"
391 "\n"
392 "#endif\n");
393
394 /* write out braid tables for each value of N */
395 for (n = 1; n <= 6; n++) {
396 fprintf(out,
397 "\n"
398 "#if N == %d\n", n);
399
400 /* compute braid tables for this N and 64-bit word_t */
401 braid(ltl, big, n, 8);
402
403 /* write out braid tables for 64-bit z_word_t to crc32.h */
404 fprintf(out,
405 "\n"
406 "#if W == 8\n"
407 "\n"
408 "local const z_crc_t FAR crc_braid_table[][256] = {\n");
409 for (k = 0; k < 8; k++) {
410 fprintf(out, " {");
411 write_table(out, ltl[k], 256);
412 fprintf(out, "}%s", k < 7 ? ",\n" : "");
413 }
414 fprintf(out,
415 "};\n"
416 "\n"
417 "local const z_word_t FAR crc_braid_big_table[][256] = {\n");
418 for (k = 0; k < 8; k++) {
419 fprintf(out, " {");
420 write_table64(out, big[k], 256);
421 fprintf(out, "}%s", k < 7 ? ",\n" : "");
422 }
423 fprintf(out,
424 "};\n");
425
426 /* compute braid tables for this N and 32-bit word_t */
427 braid(ltl, big, n, 4);
428
429 /* write out braid tables for 32-bit z_word_t to crc32.h */
430 fprintf(out,
431 "\n"
432 "#else /* W == 4 */\n"
433 "\n"
434 "local const z_crc_t FAR crc_braid_table[][256] = {\n");
435 for (k = 0; k < 4; k++) {
436 fprintf(out, " {");
437 write_table(out, ltl[k], 256);
438 fprintf(out, "}%s", k < 3 ? ",\n" : "");
439 }
440 fprintf(out,
441 "};\n"
442 "\n"
443 "local const z_word_t FAR crc_braid_big_table[][256] = {\n");
444 for (k = 0; k < 4; k++) {
445 fprintf(out, " {");
446 write_table32hi(out, big[k], 256);
447 fprintf(out, "}%s", k < 3 ? ",\n" : "");
448 }
449 fprintf(out,
450 "};\n"
451 "\n"
452 "#endif\n"
453 "\n"
454 "#endif\n");
455 }
456 fprintf(out,
457 "\n"
458 "#endif\n");
459
460 /* write out zeros operator table to crc32.h */
461 fprintf(out,
462 "\n"
463 "local const z_crc_t FAR x2n_table[] = {\n"
464 " ");
465 write_table(out, x2n_table, 32);
466 fprintf(out,
467 "};\n");
468 fclose(out);
469 }
470 #endif /* MAKECRCH */
471 }
472
473 #ifdef MAKECRCH
474
475 /*
476 Write the 32-bit values in table[0..k-1] to out, five per line in
477 hexadecimal separated by commas.
478 */
479 local void write_table(FILE *out, const z_crc_t FAR *table, int k) {
480 int n;
481
482 for (n = 0; n < k; n++)
483 fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : " ",
484 (unsigned long)(table[n]),
485 n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));
486 }
487
488 /*
489 Write the high 32-bits of each value in table[0..k-1] to out, five per line
490 in hexadecimal separated by commas.
491 */
492 local void write_table32hi(FILE *out, const z_word_t FAR *table, int k) {
493 int n;
494
495 for (n = 0; n < k; n++)
496 fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : " ",
497 (unsigned long)(table[n] >> 32),
498 n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));
499 }
500
501 /*
502 Write the 64-bit values in table[0..k-1] to out, three per line in
503 hexadecimal separated by commas. This assumes that if there is a 64-bit
504 type, then there is also a long long integer type, and it is at least 64
505 bits. If not, then the type cast and format string can be adjusted
506 accordingly.
507 */
508 local void write_table64(FILE *out, const z_word_t FAR *table, int k) {
509 int n;
510
511 for (n = 0; n < k; n++)
512 fprintf(out, "%s0x%016llx%s", n == 0 || n % 3 ? "" : " ",
513 (unsigned long long)(table[n]),
514 n == k - 1 ? "" : (n % 3 == 2 ? ",\n" : ", "));
515 }
516
517 /* Actually do the deed. */
518 int main(void) {
519 make_crc_table();
520 return 0;
521 }
522
523 #endif /* MAKECRCH */
524
525 #ifdef W
526 /*
527 Generate the little and big-endian braid tables for the given n and z_word_t
528 size w. Each array must have room for w blocks of 256 elements.
529 */
530 local void braid(z_crc_t ltl[][256], z_word_t big[][256], int n, int w) {
531 int k;
532 z_crc_t i, p, q;
533 for (k = 0; k < w; k++) {
534 p = x2nmodp((n * w + 3 - k) << 3, 0);
535 ltl[k][0] = 0;
536 big[w - 1 - k][0] = 0;
537 for (i = 1; i < 256; i++) {
538 ltl[k][i] = q = multmodp(i << 24, p);
539 big[w - 1 - k][i] = byte_swap(q);
540 }
541 }
542 }
543 #endif
544
545 #endif /* DYNAMIC_CRC_TABLE */
546
547 /* =========================================================================
548 * This function can be used by asm versions of crc32(), and to force the
549 * generation of the CRC tables in a threaded application.
550 */
551 const z_crc_t FAR * ZEXPORT get_crc_table(void) {
552 #ifdef DYNAMIC_CRC_TABLE
553 once(&made, make_crc_table);
554 #endif /* DYNAMIC_CRC_TABLE */
555 return (const z_crc_t FAR *)crc_table;
556 }
557
558 /* =========================================================================
559 * Use ARM machine instructions if available. This will compute the CRC about
560 * ten times faster than the braided calculation. This code does not check for
561 * the presence of the CRC instruction at run time. __ARM_FEATURE_CRC32 will
562 * only be defined if the compilation specifies an ARM processor architecture
563 * that has the instructions. For example, compiling with -march=armv8.1-a or
564 * -march=armv8-a+crc, or -march=native if the compile machine has the crc32
565 * instructions.
566 */
567 #ifdef ARMCRC32
568
569 /*
570 Constants empirically determined to maximize speed. These values are from
571 measurements on a Cortex-A57. Your mileage may vary.
572 */
573 #define Z_BATCH 3990 /* number of words in a batch */
574 #define Z_BATCH_ZEROS 0xa10d3d0c /* computed from Z_BATCH = 3990 */
575 #define Z_BATCH_MIN 800 /* fewest words in a final batch */
576
577 unsigned long ZEXPORT crc32_z(unsigned long crc, const unsigned char FAR *buf,
578 z_size_t len) {
579 z_crc_t val;
580 z_word_t crc1, crc2;
581 const z_word_t *word;
582 z_word_t val0, val1, val2;
583 z_size_t last, last2, i;
584 z_size_t num;
585
586 /* Return initial CRC, if requested. */
587 if (buf == Z_NULL) return 0;
588
589 #ifdef DYNAMIC_CRC_TABLE
590 once(&made, make_crc_table);
591 #endif /* DYNAMIC_CRC_TABLE */
592
593 /* Pre-condition the CRC */
594 crc = (~crc) & 0xffffffff;
595
596 /* Compute the CRC up to a word boundary. */
597 while (len && ((z_size_t)buf & 7) != 0) {
598 len--;
599 val = *buf++;
600 __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));
601 }
602
603 /* Prepare to compute the CRC on full 64-bit words word[0..num-1]. */
604 word = (z_word_t const *)buf;
605 num = len >> 3;
606 len &= 7;
607
608 /* Do three interleaved CRCs to realize the throughput of one crc32x
609 instruction per cycle. Each CRC is calculated on Z_BATCH words. The
610 three CRCs are combined into a single CRC after each set of batches. */
611 while (num >= 3 * Z_BATCH) {
612 crc1 = 0;
613 crc2 = 0;
614 for (i = 0; i < Z_BATCH; i++) {
615 val0 = word[i];
616 val1 = word[i + Z_BATCH];
617 val2 = word[i + 2 * Z_BATCH];
618 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
619 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1));
620 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2));
621 }
622 word += 3 * Z_BATCH;
623 num -= 3 * Z_BATCH;
624 crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc1;
625 crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc2;
626 }
627
628 /* Do one last smaller batch with the remaining words, if there are enough
629 to pay for the combination of CRCs. */
630 last = num / 3;
631 if (last >= Z_BATCH_MIN) {
632 last2 = last << 1;
633 crc1 = 0;
634 crc2 = 0;
635 for (i = 0; i < last; i++) {
636 val0 = word[i];
637 val1 = word[i + last];
638 val2 = word[i + last2];
639 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
640 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1));
641 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2));
642 }
643 word += 3 * last;
644 num -= 3 * last;
645 val = x2nmodp(last, 6);
646 crc = multmodp(val, crc) ^ crc1;
647 crc = multmodp(val, crc) ^ crc2;
648 }
649
650 /* Compute the CRC on any remaining words. */
651 for (i = 0; i < num; i++) {
652 val0 = word[i];
653 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
654 }
655 word += num;
656
657 /* Complete the CRC on any remaining bytes. */
658 buf = (const unsigned char FAR *)word;
659 while (len) {
660 len--;
661 val = *buf++;
662 __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));
663 }
664
665 /* Return the CRC, post-conditioned. */
666 return crc ^ 0xffffffff;
667 }
668
669 #else
670
671 #ifdef W
672
673 /*
674 Return the CRC of the W bytes in the word_t data, taking the
675 least-significant byte of the word as the first byte of data, without any pre
676 or post conditioning. This is used to combine the CRCs of each braid.
677 */
678 local z_crc_t crc_word(z_word_t data) {
679 int k;
680 for (k = 0; k < W; k++)
681 data = (data >> 8) ^ crc_table[data & 0xff];
682 return (z_crc_t)data;
683 }
684
685 local z_word_t crc_word_big(z_word_t data) {
686 int k;
687 for (k = 0; k < W; k++)
688 data = (data << 8) ^
689 crc_big_table[(data >> ((W - 1) << 3)) & 0xff];
690 return data;
691 }
692
693 #endif
694
695 /* ========================================================================= */
696 unsigned long ZEXPORT crc32_z(unsigned long crc, const unsigned char FAR *buf,
697 z_size_t len) {
698 /* Return initial CRC, if requested. */
699 if (buf == Z_NULL) return 0;
700
701 #ifdef DYNAMIC_CRC_TABLE
702 once(&made, make_crc_table);
703 #endif /* DYNAMIC_CRC_TABLE */
704
705 /* Pre-condition the CRC */
706 crc = (~crc) & 0xffffffff;
707
708 #ifdef W
709
710 /* If provided enough bytes, do a braided CRC calculation. */
711 if (len >= N * W + W - 1) {
712 z_size_t blks;
713 z_word_t const *words;
714 unsigned endian;
715 int k;
716
717 /* Compute the CRC up to a z_word_t boundary. */
718 while (len && ((z_size_t)buf & (W - 1)) != 0) {
719 len--;
720 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
721 }
722
723 /* Compute the CRC on as many N z_word_t blocks as are available. */
724 blks = len / (N * W);
725 len -= blks * N * W;
726 words = (z_word_t const *)buf;
727
728 /* Do endian check at execution time instead of compile time, since ARM
729 processors can change the endianness at execution time. If the
730 compiler knows what the endianness will be, it can optimize out the
731 check and the unused branch. */
732 endian = 1;
733 if (*(unsigned char *)&endian) {
734 /* Little endian. */
735
736 z_crc_t crc0;
737 z_word_t word0;
738 #if N > 1
739 z_crc_t crc1;
740 z_word_t word1;
741 #if N > 2
742 z_crc_t crc2;
743 z_word_t word2;
744 #if N > 3
745 z_crc_t crc3;
746 z_word_t word3;
747 #if N > 4
748 z_crc_t crc4;
749 z_word_t word4;
750 #if N > 5
751 z_crc_t crc5;
752 z_word_t word5;
753 #endif
754 #endif
755 #endif
756 #endif
757 #endif
758
759 /* Initialize the CRC for each braid. */
760 crc0 = crc;
761 #if N > 1
762 crc1 = 0;
763 #if N > 2
764 crc2 = 0;
765 #if N > 3
766 crc3 = 0;
767 #if N > 4
768 crc4 = 0;
769 #if N > 5
770 crc5 = 0;
771 #endif
772 #endif
773 #endif
774 #endif
775 #endif
776
777 /*
778 Process the first blks-1 blocks, computing the CRCs on each braid
779 independently.
780 */
781 while (--blks) {
782 /* Load the word for each braid into registers. */
783 word0 = crc0 ^ words[0];
784 #if N > 1
785 word1 = crc1 ^ words[1];
786 #if N > 2
787 word2 = crc2 ^ words[2];
788 #if N > 3
789 word3 = crc3 ^ words[3];
790 #if N > 4
791 word4 = crc4 ^ words[4];
792 #if N > 5
793 word5 = crc5 ^ words[5];
794 #endif
795 #endif
796 #endif
797 #endif
798 #endif
799 words += N;
800
801 /* Compute and update the CRC for each word. The loop should
802 get unrolled. */
803 crc0 = crc_braid_table[0][word0 & 0xff];
804 #if N > 1
805 crc1 = crc_braid_table[0][word1 & 0xff];
806 #if N > 2
807 crc2 = crc_braid_table[0][word2 & 0xff];
808 #if N > 3
809 crc3 = crc_braid_table[0][word3 & 0xff];
810 #if N > 4
811 crc4 = crc_braid_table[0][word4 & 0xff];
812 #if N > 5
813 crc5 = crc_braid_table[0][word5 & 0xff];
814 #endif
815 #endif
816 #endif
817 #endif
818 #endif
819 for (k = 1; k < W; k++) {
820 crc0 ^= crc_braid_table[k][(word0 >> (k << 3)) & 0xff];
821 #if N > 1
822 crc1 ^= crc_braid_table[k][(word1 >> (k << 3)) & 0xff];
823 #if N > 2
824 crc2 ^= crc_braid_table[k][(word2 >> (k << 3)) & 0xff];
825 #if N > 3
826 crc3 ^= crc_braid_table[k][(word3 >> (k << 3)) & 0xff];
827 #if N > 4
828 crc4 ^= crc_braid_table[k][(word4 >> (k << 3)) & 0xff];
829 #if N > 5
830 crc5 ^= crc_braid_table[k][(word5 >> (k << 3)) & 0xff];
831 #endif
832 #endif
833 #endif
834 #endif
835 #endif
836 }
837 }
838
839 /*
840 Process the last block, combining the CRCs of the N braids at the
841 same time.
842 */
843 crc = crc_word(crc0 ^ words[0]);
844 #if N > 1
845 crc = crc_word(crc1 ^ words[1] ^ crc);
846 #if N > 2
847 crc = crc_word(crc2 ^ words[2] ^ crc);
848 #if N > 3
849 crc = crc_word(crc3 ^ words[3] ^ crc);
850 #if N > 4
851 crc = crc_word(crc4 ^ words[4] ^ crc);
852 #if N > 5
853 crc = crc_word(crc5 ^ words[5] ^ crc);
854 #endif
855 #endif
856 #endif
857 #endif
858 #endif
859 words += N;
860 }
861 else {
862 /* Big endian. */
863
864 z_word_t crc0, word0, comb;
865 #if N > 1
866 z_word_t crc1, word1;
867 #if N > 2
868 z_word_t crc2, word2;
869 #if N > 3
870 z_word_t crc3, word3;
871 #if N > 4
872 z_word_t crc4, word4;
873 #if N > 5
874 z_word_t crc5, word5;
875 #endif
876 #endif
877 #endif
878 #endif
879 #endif
880
881 /* Initialize the CRC for each braid. */
882 crc0 = byte_swap(crc);
883 #if N > 1
884 crc1 = 0;
885 #if N > 2
886 crc2 = 0;
887 #if N > 3
888 crc3 = 0;
889 #if N > 4
890 crc4 = 0;
891 #if N > 5
892 crc5 = 0;
893 #endif
894 #endif
895 #endif
896 #endif
897 #endif
898
899 /*
900 Process the first blks-1 blocks, computing the CRCs on each braid
901 independently.
902 */
903 while (--blks) {
904 /* Load the word for each braid into registers. */
905 word0 = crc0 ^ words[0];
906 #if N > 1
907 word1 = crc1 ^ words[1];
908 #if N > 2
909 word2 = crc2 ^ words[2];
910 #if N > 3
911 word3 = crc3 ^ words[3];
912 #if N > 4
913 word4 = crc4 ^ words[4];
914 #if N > 5
915 word5 = crc5 ^ words[5];
916 #endif
917 #endif
918 #endif
919 #endif
920 #endif
921 words += N;
922
923 /* Compute and update the CRC for each word. The loop should
924 get unrolled. */
925 crc0 = crc_braid_big_table[0][word0 & 0xff];
926 #if N > 1
927 crc1 = crc_braid_big_table[0][word1 & 0xff];
928 #if N > 2
929 crc2 = crc_braid_big_table[0][word2 & 0xff];
930 #if N > 3
931 crc3 = crc_braid_big_table[0][word3 & 0xff];
932 #if N > 4
933 crc4 = crc_braid_big_table[0][word4 & 0xff];
934 #if N > 5
935 crc5 = crc_braid_big_table[0][word5 & 0xff];
936 #endif
937 #endif
938 #endif
939 #endif
940 #endif
941 for (k = 1; k < W; k++) {
942 crc0 ^= crc_braid_big_table[k][(word0 >> (k << 3)) & 0xff];
943 #if N > 1
944 crc1 ^= crc_braid_big_table[k][(word1 >> (k << 3)) & 0xff];
945 #if N > 2
946 crc2 ^= crc_braid_big_table[k][(word2 >> (k << 3)) & 0xff];
947 #if N > 3
948 crc3 ^= crc_braid_big_table[k][(word3 >> (k << 3)) & 0xff];
949 #if N > 4
950 crc4 ^= crc_braid_big_table[k][(word4 >> (k << 3)) & 0xff];
951 #if N > 5
952 crc5 ^= crc_braid_big_table[k][(word5 >> (k << 3)) & 0xff];
953 #endif
954 #endif
955 #endif
956 #endif
957 #endif
958 }
959 }
960
961 /*
962 Process the last block, combining the CRCs of the N braids at the
963 same time.
964 */
965 comb = crc_word_big(crc0 ^ words[0]);
966 #if N > 1
967 comb = crc_word_big(crc1 ^ words[1] ^ comb);
968 #if N > 2
969 comb = crc_word_big(crc2 ^ words[2] ^ comb);
970 #if N > 3
971 comb = crc_word_big(crc3 ^ words[3] ^ comb);
972 #if N > 4
973 comb = crc_word_big(crc4 ^ words[4] ^ comb);
974 #if N > 5
975 comb = crc_word_big(crc5 ^ words[5] ^ comb);
976 #endif
977 #endif
978 #endif
979 #endif
980 #endif
981 words += N;
982 crc = byte_swap(comb);
983 }
984
985 /*
986 Update the pointer to the remaining bytes to process.
987 */
988 buf = (unsigned char const *)words;
989 }
990
991 #endif /* W */
992
993 /* Complete the computation of the CRC on any remaining bytes. */
994 while (len >= 8) {
995 len -= 8;
996 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
997 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
998 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
999 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1000 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1001 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1002 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1003 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1004 }
1005 while (len) {
1006 len--;
1007 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1008 }
1009
1010 /* Return the CRC, post-conditioned. */
1011 return crc ^ 0xffffffff;
1012 }
1013
1014 #endif
1015
1016 /* ========================================================================= */
1017 unsigned long ZEXPORT crc32(unsigned long crc, const unsigned char FAR *buf,
1018 uInt len) {
1019 return crc32_z(crc, buf, len);
1020 }
1021
1022 /* ========================================================================= */
1023 uLong ZEXPORT crc32_combine64(uLong crc1, uLong crc2, z_off64_t len2) {
1024 #ifdef DYNAMIC_CRC_TABLE
1025 once(&made, make_crc_table);
1026 #endif /* DYNAMIC_CRC_TABLE */
1027 return multmodp(x2nmodp(len2, 3), crc1) ^ (crc2 & 0xffffffff);
1028 }
1029
1030 /* ========================================================================= */
1031 uLong ZEXPORT crc32_combine(uLong crc1, uLong crc2, z_off_t len2) {
1032 return crc32_combine64(crc1, crc2, (z_off64_t)len2);
1033 }
1034
1035 /* ========================================================================= */
1036 uLong ZEXPORT crc32_combine_gen64(z_off64_t len2) {
1037 #ifdef DYNAMIC_CRC_TABLE
1038 once(&made, make_crc_table);
1039 #endif /* DYNAMIC_CRC_TABLE */
1040 return x2nmodp(len2, 3);
1041 }
1042
1043 /* ========================================================================= */
1044 uLong ZEXPORT crc32_combine_gen(z_off_t len2) {
1045 return crc32_combine_gen64((z_off64_t)len2);
1046 }
1047
1048 /* ========================================================================= */
1049 uLong ZEXPORT crc32_combine_op(uLong crc1, uLong crc2, uLong op) {
1050 return multmodp(op, crc1) ^ (crc2 & 0xffffffff);
1051 }
1052