Home | History | Annotate | Line # | Download | only in zlib
      1  1.7  christos /*	$NetBSD: crc32.c,v 1.7 2024/09/22 19:12:27 christos Exp $	*/
      2  1.1  christos 
      3  1.1  christos /* crc32.c -- compute the CRC-32 of a data stream
      4  1.6  christos  * Copyright (C) 1995-2022 Mark Adler
      5  1.1  christos  * For conditions of distribution and use, see copyright notice in zlib.h
      6  1.1  christos  *
      7  1.6  christos  * This interleaved implementation of a CRC makes use of pipelined multiple
      8  1.6  christos  * arithmetic-logic units, commonly found in modern CPU cores. It is due to
      9  1.6  christos  * Kadatch and Jenkins (2010). See doc/crc-doc.1.0.pdf in this distribution.
     10  1.1  christos  */
     11  1.1  christos 
     12  1.6  christos /* @(#) Id */
     13  1.1  christos 
     14  1.1  christos /*
     15  1.1  christos   Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore
     16  1.1  christos   protection on the static variables used to control the first-use generation
     17  1.6  christos   of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should
     18  1.1  christos   first call get_crc_table() to initialize the tables before allowing more than
     19  1.1  christos   one thread to use crc32().
     20  1.5  christos 
     21  1.6  christos   MAKECRCH can be #defined to write out crc32.h. A main() routine is also
     22  1.6  christos   produced, so that this one source file can be compiled to an executable.
     23  1.1  christos  */
     24  1.1  christos 
     25  1.1  christos #ifdef MAKECRCH
     26  1.1  christos #  include <stdio.h>
     27  1.1  christos #  ifndef DYNAMIC_CRC_TABLE
     28  1.1  christos #    define DYNAMIC_CRC_TABLE
     29  1.1  christos #  endif /* !DYNAMIC_CRC_TABLE */
     30  1.1  christos #endif /* MAKECRCH */
     31  1.1  christos 
     32  1.6  christos #include "zutil.h"      /* for Z_U4, Z_U8, z_crc_t, and FAR definitions */
     33  1.1  christos 
     34  1.6  christos  /*
     35  1.6  christos   A CRC of a message is computed on N braids of words in the message, where
     36  1.6  christos   each word consists of W bytes (4 or 8). If N is 3, for example, then three
     37  1.6  christos   running sparse CRCs are calculated respectively on each braid, at these
     38  1.6  christos   indices in the array of words: 0, 3, 6, ..., 1, 4, 7, ..., and 2, 5, 8, ...
     39  1.6  christos   This is done starting at a word boundary, and continues until as many blocks
     40  1.6  christos   of N * W bytes as are available have been processed. The results are combined
     41  1.6  christos   into a single CRC at the end. For this code, N must be in the range 1..6 and
     42  1.6  christos   W must be 4 or 8. The upper limit on N can be increased if desired by adding
     43  1.6  christos   more #if blocks, extending the patterns apparent in the code. In addition,
     44  1.6  christos   crc32.h would need to be regenerated, if the maximum N value is increased.
     45  1.6  christos 
     46  1.6  christos   N and W are chosen empirically by benchmarking the execution time on a given
     47  1.6  christos   processor. The choices for N and W below were based on testing on Intel Kaby
     48  1.6  christos   Lake i7, AMD Ryzen 7, ARM Cortex-A57, Sparc64-VII, PowerPC POWER9, and MIPS64
     49  1.6  christos   Octeon II processors. The Intel, AMD, and ARM processors were all fastest
     50  1.6  christos   with N=5, W=8. The Sparc, PowerPC, and MIPS64 were all fastest at N=5, W=4.
     51  1.6  christos   They were all tested with either gcc or clang, all using the -O3 optimization
     52  1.6  christos   level. Your mileage may vary.
     53  1.6  christos  */
     54  1.6  christos 
     55  1.6  christos /* Define N */
     56  1.6  christos #ifdef Z_TESTN
     57  1.6  christos #  define N Z_TESTN
     58  1.6  christos #else
     59  1.6  christos #  define N 5
     60  1.6  christos #endif
     61  1.6  christos #if N < 1 || N > 6
     62  1.6  christos #  error N must be in 1..6
     63  1.6  christos #endif
     64  1.6  christos 
     65  1.6  christos /*
     66  1.6  christos   z_crc_t must be at least 32 bits. z_word_t must be at least as long as
     67  1.6  christos   z_crc_t. It is assumed here that z_word_t is either 32 bits or 64 bits, and
     68  1.6  christos   that bytes are eight bits.
     69  1.6  christos  */
     70  1.6  christos 
     71  1.6  christos /*
     72  1.6  christos   Define W and the associated z_word_t type. If W is not defined, then a
     73  1.6  christos   braided calculation is not used, and the associated tables and code are not
     74  1.6  christos   compiled.
     75  1.6  christos  */
     76  1.6  christos #ifdef Z_TESTW
     77  1.6  christos #  if Z_TESTW-1 != -1
     78  1.6  christos #    define W Z_TESTW
     79  1.6  christos #  endif
     80  1.6  christos #else
     81  1.6  christos #  ifdef MAKECRCH
     82  1.6  christos #    define W 8         /* required for MAKECRCH */
     83  1.6  christos #  else
     84  1.6  christos #    if defined(__x86_64__) || defined(__aarch64__)
     85  1.6  christos #      define W 8
     86  1.6  christos #    else
     87  1.6  christos #      define W 4
     88  1.6  christos #    endif
     89  1.6  christos #  endif
     90  1.6  christos #endif
     91  1.6  christos #ifdef W
     92  1.6  christos #  if W == 8 && defined(Z_U8)
     93  1.6  christos      typedef Z_U8 z_word_t;
     94  1.6  christos #  elif defined(Z_U4)
     95  1.6  christos #    undef W
     96  1.6  christos #    define W 4
     97  1.6  christos      typedef Z_U4 z_word_t;
     98  1.6  christos #  else
     99  1.6  christos #    undef W
    100  1.6  christos #  endif
    101  1.6  christos #endif
    102  1.1  christos 
    103  1.6  christos /* If available, use the ARM processor CRC32 instruction. */
    104  1.6  christos #if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) && W == 8
    105  1.6  christos #  define ARMCRC32
    106  1.4       dsl #endif
    107  1.4       dsl 
    108  1.6  christos #if defined(W) && (!defined(ARMCRC32) || defined(DYNAMIC_CRC_TABLE))
    109  1.6  christos /*
    110  1.6  christos   Swap the bytes in a z_word_t to convert between little and big endian. Any
    111  1.6  christos   self-respecting compiler will optimize this to a single machine byte-swap
    112  1.6  christos   instruction, if one is available. This assumes that word_t is either 32 bits
    113  1.6  christos   or 64 bits.
    114  1.6  christos  */
    115  1.7  christos local z_word_t byte_swap(z_word_t word) {
    116  1.6  christos #  if W == 8
    117  1.6  christos     return
    118  1.6  christos         (word & 0xff00000000000000) >> 56 |
    119  1.6  christos         (word & 0xff000000000000) >> 40 |
    120  1.6  christos         (word & 0xff0000000000) >> 24 |
    121  1.6  christos         (word & 0xff00000000) >> 8 |
    122  1.6  christos         (word & 0xff000000) << 8 |
    123  1.6  christos         (word & 0xff0000) << 24 |
    124  1.6  christos         (word & 0xff00) << 40 |
    125  1.6  christos         (word & 0xff) << 56;
    126  1.6  christos #  else   /* W == 4 */
    127  1.6  christos     return
    128  1.6  christos         (word & 0xff000000) >> 24 |
    129  1.6  christos         (word & 0xff0000) >> 8 |
    130  1.6  christos         (word & 0xff00) << 8 |
    131  1.6  christos         (word & 0xff) << 24;
    132  1.6  christos #  endif
    133  1.6  christos }
    134  1.6  christos #endif
    135  1.5  christos 
    136  1.7  christos #ifdef DYNAMIC_CRC_TABLE
    137  1.7  christos /* =========================================================================
    138  1.7  christos  * Table of powers of x for combining CRC-32s, filled in by make_crc_table()
    139  1.7  christos  * below.
    140  1.7  christos  */
    141  1.7  christos    local z_crc_t FAR x2n_table[32];
    142  1.7  christos #else
    143  1.7  christos /* =========================================================================
    144  1.7  christos  * Tables for byte-wise and braided CRC-32 calculations, and a table of powers
    145  1.7  christos  * of x for combining CRC-32s, all made by make_crc_table().
    146  1.7  christos  */
    147  1.7  christos #  include "crc32.h"
    148  1.7  christos #endif
    149  1.7  christos 
    150  1.6  christos /* CRC polynomial. */
    151  1.6  christos #define POLY 0xedb88320         /* p(x) reflected, with x^32 implied */
    152  1.1  christos 
    153  1.7  christos /*
    154  1.7  christos   Return a(x) multiplied by b(x) modulo p(x), where p(x) is the CRC polynomial,
    155  1.7  christos   reflected. For speed, this requires that a not be zero.
    156  1.7  christos  */
    157  1.7  christos local z_crc_t multmodp(z_crc_t a, z_crc_t b) {
    158  1.7  christos     z_crc_t m, p;
    159  1.7  christos 
    160  1.7  christos     m = (z_crc_t)1 << 31;
    161  1.7  christos     p = 0;
    162  1.7  christos     for (;;) {
    163  1.7  christos         if (a & m) {
    164  1.7  christos             p ^= b;
    165  1.7  christos             if ((a & (m - 1)) == 0)
    166  1.7  christos                 break;
    167  1.7  christos         }
    168  1.7  christos         m >>= 1;
    169  1.7  christos         b = b & 1 ? (b >> 1) ^ POLY : b >> 1;
    170  1.7  christos     }
    171  1.7  christos     return p;
    172  1.7  christos }
    173  1.7  christos 
    174  1.7  christos /*
    175  1.7  christos   Return x^(n * 2^k) modulo p(x). Requires that x2n_table[] has been
    176  1.7  christos   initialized.
    177  1.7  christos  */
    178  1.7  christos local z_crc_t x2nmodp(z_off64_t n, unsigned k) {
    179  1.7  christos     z_crc_t p;
    180  1.7  christos 
    181  1.7  christos     p = (z_crc_t)1 << 31;           /* x^0 == 1 */
    182  1.7  christos     while (n) {
    183  1.7  christos         if (n & 1)
    184  1.7  christos             p = multmodp(x2n_table[k & 31], p);
    185  1.7  christos         n >>= 1;
    186  1.7  christos         k++;
    187  1.7  christos     }
    188  1.7  christos     return p;
    189  1.7  christos }
    190  1.7  christos 
    191  1.1  christos #ifdef DYNAMIC_CRC_TABLE
    192  1.7  christos /* =========================================================================
    193  1.7  christos  * Build the tables for byte-wise and braided CRC-32 calculations, and a table
    194  1.7  christos  * of powers of x for combining CRC-32s.
    195  1.7  christos  */
    196  1.6  christos local z_crc_t FAR crc_table[256];
    197  1.6  christos #ifdef W
    198  1.6  christos    local z_word_t FAR crc_big_table[256];
    199  1.6  christos    local z_crc_t FAR crc_braid_table[W][256];
    200  1.6  christos    local z_word_t FAR crc_braid_big_table[W][256];
    201  1.7  christos    local void braid(z_crc_t [][256], z_word_t [][256], int, int);
    202  1.6  christos #endif
    203  1.1  christos #ifdef MAKECRCH
    204  1.7  christos    local void write_table(FILE *, const z_crc_t FAR *, int);
    205  1.7  christos    local void write_table32hi(FILE *, const z_word_t FAR *, int);
    206  1.7  christos    local void write_table64(FILE *, const z_word_t FAR *, int);
    207  1.1  christos #endif /* MAKECRCH */
    208  1.6  christos 
    209  1.6  christos /*
    210  1.6  christos   Define a once() function depending on the availability of atomics. If this is
    211  1.6  christos   compiled with DYNAMIC_CRC_TABLE defined, and if CRCs will be computed in
    212  1.6  christos   multiple threads, and if atomics are not available, then get_crc_table() must
    213  1.6  christos   be called to initialize the tables and must return before any threads are
    214  1.6  christos   allowed to compute or combine CRCs.
    215  1.6  christos  */
    216  1.6  christos 
    217  1.6  christos /* Definition of once functionality. */
    218  1.6  christos typedef struct once_s once_t;
    219  1.6  christos 
    220  1.6  christos /* Check for the availability of atomics. */
    221  1.6  christos #if defined(__STDC__) && __STDC_VERSION__ >= 201112L && \
    222  1.6  christos     !defined(__STDC_NO_ATOMICS__)
    223  1.6  christos 
    224  1.6  christos #include <stdatomic.h>
    225  1.6  christos 
    226  1.6  christos /* Structure for once(), which must be initialized with ONCE_INIT. */
    227  1.6  christos struct once_s {
    228  1.6  christos     atomic_flag begun;
    229  1.6  christos     atomic_int done;
    230  1.6  christos };
    231  1.6  christos #define ONCE_INIT {ATOMIC_FLAG_INIT, 0}
    232  1.6  christos 
    233  1.6  christos /*
    234  1.6  christos   Run the provided init() function exactly once, even if multiple threads
    235  1.6  christos   invoke once() at the same time. The state must be a once_t initialized with
    236  1.6  christos   ONCE_INIT.
    237  1.6  christos  */
    238  1.7  christos local void once(once_t *state, void (*init)(void)) {
    239  1.6  christos     if (!atomic_load(&state->done)) {
    240  1.6  christos         if (atomic_flag_test_and_set(&state->begun))
    241  1.6  christos             while (!atomic_load(&state->done))
    242  1.6  christos                 ;
    243  1.6  christos         else {
    244  1.6  christos             init();
    245  1.6  christos             atomic_store(&state->done, 1);
    246  1.6  christos         }
    247  1.6  christos     }
    248  1.6  christos }
    249  1.6  christos 
    250  1.6  christos #else   /* no atomics */
    251  1.6  christos 
    252  1.6  christos /* Structure for once(), which must be initialized with ONCE_INIT. */
    253  1.6  christos struct once_s {
    254  1.6  christos     volatile int begun;
    255  1.6  christos     volatile int done;
    256  1.6  christos };
    257  1.6  christos #define ONCE_INIT {0, 0}
    258  1.6  christos 
    259  1.6  christos /* Test and set. Alas, not atomic, but tries to minimize the period of
    260  1.6  christos    vulnerability. */
    261  1.7  christos local int test_and_set(int volatile *flag) {
    262  1.6  christos     int was;
    263  1.6  christos 
    264  1.6  christos     was = *flag;
    265  1.6  christos     *flag = 1;
    266  1.6  christos     return was;
    267  1.6  christos }
    268  1.6  christos 
    269  1.6  christos /* Run the provided init() function once. This is not thread-safe. */
    270  1.7  christos local void once(once_t *state, void (*init)(void)) {
    271  1.6  christos     if (!state->done) {
    272  1.6  christos         if (test_and_set(&state->begun))
    273  1.6  christos             while (!state->done)
    274  1.6  christos                 ;
    275  1.6  christos         else {
    276  1.6  christos             init();
    277  1.6  christos             state->done = 1;
    278  1.6  christos         }
    279  1.6  christos     }
    280  1.6  christos }
    281  1.6  christos 
    282  1.6  christos #endif
    283  1.6  christos 
    284  1.6  christos /* State for once(). */
    285  1.6  christos local once_t made = ONCE_INIT;
    286  1.6  christos 
    287  1.1  christos /*
    288  1.1  christos   Generate tables for a byte-wise 32-bit CRC calculation on the polynomial:
    289  1.1  christos   x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1.
    290  1.1  christos 
    291  1.1  christos   Polynomials over GF(2) are represented in binary, one bit per coefficient,
    292  1.6  christos   with the lowest powers in the most significant bit. Then adding polynomials
    293  1.1  christos   is just exclusive-or, and multiplying a polynomial by x is a right shift by
    294  1.6  christos   one. If we call the above polynomial p, and represent a byte as the
    295  1.1  christos   polynomial q, also with the lowest power in the most significant bit (so the
    296  1.6  christos   byte 0xb1 is the polynomial x^7+x^3+x^2+1), then the CRC is (q*x^32) mod p,
    297  1.1  christos   where a mod b means the remainder after dividing a by b.
    298  1.1  christos 
    299  1.1  christos   This calculation is done using the shift-register method of multiplying and
    300  1.6  christos   taking the remainder. The register is initialized to zero, and for each
    301  1.1  christos   incoming bit, x^32 is added mod p to the register if the bit is a one (where
    302  1.6  christos   x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by x
    303  1.6  christos   (which is shifting right by one and adding x^32 mod p if the bit shifted out
    304  1.6  christos   is a one). We start with the highest power (least significant bit) of q and
    305  1.6  christos   repeat for all eight bits of q.
    306  1.6  christos 
    307  1.6  christos   The table is simply the CRC of all possible eight bit values. This is all the
    308  1.6  christos   information needed to generate CRCs on data a byte at a time for all
    309  1.6  christos   combinations of CRC register values and incoming bytes.
    310  1.6  christos  */
    311  1.6  christos 
    312  1.7  christos local void make_crc_table(void) {
    313  1.6  christos     unsigned i, j, n;
    314  1.6  christos     z_crc_t p;
    315  1.1  christos 
    316  1.6  christos     /* initialize the CRC of bytes tables */
    317  1.6  christos     for (i = 0; i < 256; i++) {
    318  1.6  christos         p = i;
    319  1.6  christos         for (j = 0; j < 8; j++)
    320  1.6  christos             p = p & 1 ? (p >> 1) ^ POLY : p >> 1;
    321  1.6  christos         crc_table[i] = p;
    322  1.6  christos #ifdef W
    323  1.6  christos         crc_big_table[i] = byte_swap(p);
    324  1.6  christos #endif
    325  1.6  christos     }
    326  1.1  christos 
    327  1.6  christos     /* initialize the x^2^n mod p(x) table */
    328  1.6  christos     p = (z_crc_t)1 << 30;         /* x^1 */
    329  1.6  christos     x2n_table[0] = p;
    330  1.6  christos     for (n = 1; n < 32; n++)
    331  1.6  christos         x2n_table[n] = p = multmodp(p, p);
    332  1.6  christos 
    333  1.6  christos #ifdef W
    334  1.6  christos     /* initialize the braiding tables -- needs x2n_table[] */
    335  1.6  christos     braid(crc_braid_table, crc_braid_big_table, N, W);
    336  1.6  christos #endif
    337  1.1  christos 
    338  1.1  christos #ifdef MAKECRCH
    339  1.1  christos     {
    340  1.6  christos         /*
    341  1.6  christos           The crc32.h header file contains tables for both 32-bit and 64-bit
    342  1.6  christos           z_word_t's, and so requires a 64-bit type be available. In that case,
    343  1.6  christos           z_word_t must be defined to be 64-bits. This code then also generates
    344  1.6  christos           and writes out the tables for the case that z_word_t is 32 bits.
    345  1.6  christos          */
    346  1.6  christos #if !defined(W) || W != 8
    347  1.6  christos #  error Need a 64-bit integer type in order to generate crc32.h.
    348  1.6  christos #endif
    349  1.1  christos         FILE *out;
    350  1.6  christos         int k, n;
    351  1.6  christos         z_crc_t ltl[8][256];
    352  1.6  christos         z_word_t big[8][256];
    353  1.1  christos 
    354  1.1  christos         out = fopen("crc32.h", "w");
    355  1.1  christos         if (out == NULL) return;
    356  1.6  christos 
    357  1.6  christos         /* write out little-endian CRC table to crc32.h */
    358  1.6  christos         fprintf(out,
    359  1.6  christos             "/* crc32.h -- tables for rapid CRC calculation\n"
    360  1.6  christos             " * Generated automatically by crc32.c\n */\n"
    361  1.6  christos             "\n"
    362  1.6  christos             "local const z_crc_t FAR crc_table[] = {\n"
    363  1.6  christos             "    ");
    364  1.6  christos         write_table(out, crc_table, 256);
    365  1.6  christos         fprintf(out,
    366  1.6  christos             "};\n");
    367  1.6  christos 
    368  1.6  christos         /* write out big-endian CRC table for 64-bit z_word_t to crc32.h */
    369  1.6  christos         fprintf(out,
    370  1.6  christos             "\n"
    371  1.6  christos             "#ifdef W\n"
    372  1.6  christos             "\n"
    373  1.6  christos             "#if W == 8\n"
    374  1.6  christos             "\n"
    375  1.6  christos             "local const z_word_t FAR crc_big_table[] = {\n"
    376  1.6  christos             "    ");
    377  1.6  christos         write_table64(out, crc_big_table, 256);
    378  1.6  christos         fprintf(out,
    379  1.6  christos             "};\n");
    380  1.6  christos 
    381  1.6  christos         /* write out big-endian CRC table for 32-bit z_word_t to crc32.h */
    382  1.6  christos         fprintf(out,
    383  1.6  christos             "\n"
    384  1.6  christos             "#else /* W == 4 */\n"
    385  1.6  christos             "\n"
    386  1.6  christos             "local const z_word_t FAR crc_big_table[] = {\n"
    387  1.6  christos             "    ");
    388  1.6  christos         write_table32hi(out, crc_big_table, 256);
    389  1.6  christos         fprintf(out,
    390  1.6  christos             "};\n"
    391  1.6  christos             "\n"
    392  1.6  christos             "#endif\n");
    393  1.6  christos 
    394  1.6  christos         /* write out braid tables for each value of N */
    395  1.6  christos         for (n = 1; n <= 6; n++) {
    396  1.6  christos             fprintf(out,
    397  1.6  christos             "\n"
    398  1.6  christos             "#if N == %d\n", n);
    399  1.6  christos 
    400  1.6  christos             /* compute braid tables for this N and 64-bit word_t */
    401  1.6  christos             braid(ltl, big, n, 8);
    402  1.6  christos 
    403  1.6  christos             /* write out braid tables for 64-bit z_word_t to crc32.h */
    404  1.6  christos             fprintf(out,
    405  1.6  christos             "\n"
    406  1.6  christos             "#if W == 8\n"
    407  1.6  christos             "\n"
    408  1.6  christos             "local const z_crc_t FAR crc_braid_table[][256] = {\n");
    409  1.6  christos             for (k = 0; k < 8; k++) {
    410  1.6  christos                 fprintf(out, "   {");
    411  1.6  christos                 write_table(out, ltl[k], 256);
    412  1.6  christos                 fprintf(out, "}%s", k < 7 ? ",\n" : "");
    413  1.6  christos             }
    414  1.6  christos             fprintf(out,
    415  1.6  christos             "};\n"
    416  1.6  christos             "\n"
    417  1.6  christos             "local const z_word_t FAR crc_braid_big_table[][256] = {\n");
    418  1.6  christos             for (k = 0; k < 8; k++) {
    419  1.6  christos                 fprintf(out, "   {");
    420  1.6  christos                 write_table64(out, big[k], 256);
    421  1.6  christos                 fprintf(out, "}%s", k < 7 ? ",\n" : "");
    422  1.6  christos             }
    423  1.6  christos             fprintf(out,
    424  1.6  christos             "};\n");
    425  1.6  christos 
    426  1.6  christos             /* compute braid tables for this N and 32-bit word_t */
    427  1.6  christos             braid(ltl, big, n, 4);
    428  1.6  christos 
    429  1.6  christos             /* write out braid tables for 32-bit z_word_t to crc32.h */
    430  1.6  christos             fprintf(out,
    431  1.6  christos             "\n"
    432  1.6  christos             "#else /* W == 4 */\n"
    433  1.6  christos             "\n"
    434  1.6  christos             "local const z_crc_t FAR crc_braid_table[][256] = {\n");
    435  1.6  christos             for (k = 0; k < 4; k++) {
    436  1.6  christos                 fprintf(out, "   {");
    437  1.6  christos                 write_table(out, ltl[k], 256);
    438  1.6  christos                 fprintf(out, "}%s", k < 3 ? ",\n" : "");
    439  1.6  christos             }
    440  1.6  christos             fprintf(out,
    441  1.6  christos             "};\n"
    442  1.6  christos             "\n"
    443  1.6  christos             "local const z_word_t FAR crc_braid_big_table[][256] = {\n");
    444  1.6  christos             for (k = 0; k < 4; k++) {
    445  1.6  christos                 fprintf(out, "   {");
    446  1.6  christos                 write_table32hi(out, big[k], 256);
    447  1.6  christos                 fprintf(out, "}%s", k < 3 ? ",\n" : "");
    448  1.6  christos             }
    449  1.6  christos             fprintf(out,
    450  1.6  christos             "};\n"
    451  1.6  christos             "\n"
    452  1.6  christos             "#endif\n"
    453  1.6  christos             "\n"
    454  1.6  christos             "#endif\n");
    455  1.1  christos         }
    456  1.6  christos         fprintf(out,
    457  1.6  christos             "\n"
    458  1.6  christos             "#endif\n");
    459  1.6  christos 
    460  1.6  christos         /* write out zeros operator table to crc32.h */
    461  1.6  christos         fprintf(out,
    462  1.6  christos             "\n"
    463  1.6  christos             "local const z_crc_t FAR x2n_table[] = {\n"
    464  1.6  christos             "    ");
    465  1.6  christos         write_table(out, x2n_table, 32);
    466  1.6  christos         fprintf(out,
    467  1.6  christos             "};\n");
    468  1.1  christos         fclose(out);
    469  1.1  christos     }
    470  1.1  christos #endif /* MAKECRCH */
    471  1.1  christos }
    472  1.1  christos 
    473  1.1  christos #ifdef MAKECRCH
    474  1.6  christos 
    475  1.6  christos /*
    476  1.6  christos    Write the 32-bit values in table[0..k-1] to out, five per line in
    477  1.6  christos    hexadecimal separated by commas.
    478  1.6  christos  */
    479  1.7  christos local void write_table(FILE *out, const z_crc_t FAR *table, int k) {
    480  1.1  christos     int n;
    481  1.1  christos 
    482  1.6  christos     for (n = 0; n < k; n++)
    483  1.6  christos         fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : "    ",
    484  1.5  christos                 (unsigned long)(table[n]),
    485  1.6  christos                 n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));
    486  1.1  christos }
    487  1.6  christos 
    488  1.6  christos /*
    489  1.6  christos    Write the high 32-bits of each value in table[0..k-1] to out, five per line
    490  1.6  christos    in hexadecimal separated by commas.
    491  1.6  christos  */
    492  1.7  christos local void write_table32hi(FILE *out, const z_word_t FAR *table, int k) {
    493  1.6  christos     int n;
    494  1.6  christos 
    495  1.6  christos     for (n = 0; n < k; n++)
    496  1.6  christos         fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : "    ",
    497  1.6  christos                 (unsigned long)(table[n] >> 32),
    498  1.6  christos                 n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));
    499  1.6  christos }
    500  1.6  christos 
    501  1.6  christos /*
    502  1.6  christos   Write the 64-bit values in table[0..k-1] to out, three per line in
    503  1.6  christos   hexadecimal separated by commas. This assumes that if there is a 64-bit
    504  1.6  christos   type, then there is also a long long integer type, and it is at least 64
    505  1.6  christos   bits. If not, then the type cast and format string can be adjusted
    506  1.6  christos   accordingly.
    507  1.6  christos  */
    508  1.7  christos local void write_table64(FILE *out, const z_word_t FAR *table, int k) {
    509  1.6  christos     int n;
    510  1.6  christos 
    511  1.6  christos     for (n = 0; n < k; n++)
    512  1.6  christos         fprintf(out, "%s0x%016llx%s", n == 0 || n % 3 ? "" : "    ",
    513  1.6  christos                 (unsigned long long)(table[n]),
    514  1.6  christos                 n == k - 1 ? "" : (n % 3 == 2 ? ",\n" : ", "));
    515  1.6  christos }
    516  1.6  christos 
    517  1.6  christos /* Actually do the deed. */
    518  1.7  christos int main(void) {
    519  1.6  christos     make_crc_table();
    520  1.6  christos     return 0;
    521  1.6  christos }
    522  1.6  christos 
    523  1.1  christos #endif /* MAKECRCH */
    524  1.1  christos 
    525  1.6  christos #ifdef W
    526  1.6  christos /*
    527  1.6  christos   Generate the little and big-endian braid tables for the given n and z_word_t
    528  1.6  christos   size w. Each array must have room for w blocks of 256 elements.
    529  1.6  christos  */
    530  1.7  christos local void braid(z_crc_t ltl[][256], z_word_t big[][256], int n, int w) {
    531  1.6  christos     int k;
    532  1.6  christos     z_crc_t i, p, q;
    533  1.6  christos     for (k = 0; k < w; k++) {
    534  1.6  christos         p = x2nmodp((n * w + 3 - k) << 3, 0);
    535  1.6  christos         ltl[k][0] = 0;
    536  1.6  christos         big[w - 1 - k][0] = 0;
    537  1.6  christos         for (i = 1; i < 256; i++) {
    538  1.6  christos             ltl[k][i] = q = multmodp(i << 24, p);
    539  1.6  christos             big[w - 1 - k][i] = byte_swap(q);
    540  1.6  christos         }
    541  1.6  christos     }
    542  1.6  christos }
    543  1.6  christos #endif
    544  1.6  christos 
    545  1.1  christos #endif /* DYNAMIC_CRC_TABLE */
    546  1.1  christos 
    547  1.1  christos /* =========================================================================
    548  1.6  christos  * This function can be used by asm versions of crc32(), and to force the
    549  1.6  christos  * generation of the CRC tables in a threaded application.
    550  1.1  christos  */
    551  1.7  christos const z_crc_t FAR * ZEXPORT get_crc_table(void) {
    552  1.1  christos #ifdef DYNAMIC_CRC_TABLE
    553  1.6  christos     once(&made, make_crc_table);
    554  1.1  christos #endif /* DYNAMIC_CRC_TABLE */
    555  1.5  christos     return (const z_crc_t FAR *)crc_table;
    556  1.1  christos }
    557  1.1  christos 
    558  1.6  christos /* =========================================================================
    559  1.6  christos  * Use ARM machine instructions if available. This will compute the CRC about
    560  1.6  christos  * ten times faster than the braided calculation. This code does not check for
    561  1.6  christos  * the presence of the CRC instruction at run time. __ARM_FEATURE_CRC32 will
    562  1.6  christos  * only be defined if the compilation specifies an ARM processor architecture
    563  1.6  christos  * that has the instructions. For example, compiling with -march=armv8.1-a or
    564  1.6  christos  * -march=armv8-a+crc, or -march=native if the compile machine has the crc32
    565  1.6  christos  * instructions.
    566  1.6  christos  */
    567  1.6  christos #ifdef ARMCRC32
    568  1.6  christos 
    569  1.6  christos /*
    570  1.6  christos    Constants empirically determined to maximize speed. These values are from
    571  1.6  christos    measurements on a Cortex-A57. Your mileage may vary.
    572  1.6  christos  */
    573  1.6  christos #define Z_BATCH 3990                /* number of words in a batch */
    574  1.6  christos #define Z_BATCH_ZEROS 0xa10d3d0c    /* computed from Z_BATCH = 3990 */
    575  1.6  christos #define Z_BATCH_MIN 800             /* fewest words in a final batch */
    576  1.1  christos 
    577  1.7  christos unsigned long ZEXPORT crc32_z(unsigned long crc, const unsigned char FAR *buf,
    578  1.7  christos                               z_size_t len) {
    579  1.6  christos     z_crc_t val;
    580  1.6  christos     z_word_t crc1, crc2;
    581  1.6  christos     const z_word_t *word;
    582  1.6  christos     z_word_t val0, val1, val2;
    583  1.6  christos     z_size_t last, last2, i;
    584  1.6  christos     z_size_t num;
    585  1.6  christos 
    586  1.6  christos     /* Return initial CRC, if requested. */
    587  1.6  christos     if (buf == Z_NULL) return 0;
    588  1.1  christos 
    589  1.1  christos #ifdef DYNAMIC_CRC_TABLE
    590  1.6  christos     once(&made, make_crc_table);
    591  1.1  christos #endif /* DYNAMIC_CRC_TABLE */
    592  1.1  christos 
    593  1.6  christos     /* Pre-condition the CRC */
    594  1.6  christos     crc = (~crc) & 0xffffffff;
    595  1.1  christos 
    596  1.6  christos     /* Compute the CRC up to a word boundary. */
    597  1.6  christos     while (len && ((z_size_t)buf & 7) != 0) {
    598  1.6  christos         len--;
    599  1.6  christos         val = *buf++;
    600  1.6  christos         __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));
    601  1.6  christos     }
    602  1.6  christos 
    603  1.6  christos     /* Prepare to compute the CRC on full 64-bit words word[0..num-1]. */
    604  1.6  christos     word = (z_word_t const *)buf;
    605  1.6  christos     num = len >> 3;
    606  1.6  christos     len &= 7;
    607  1.6  christos 
    608  1.6  christos     /* Do three interleaved CRCs to realize the throughput of one crc32x
    609  1.6  christos        instruction per cycle. Each CRC is calculated on Z_BATCH words. The
    610  1.6  christos        three CRCs are combined into a single CRC after each set of batches. */
    611  1.6  christos     while (num >= 3 * Z_BATCH) {
    612  1.6  christos         crc1 = 0;
    613  1.6  christos         crc2 = 0;
    614  1.6  christos         for (i = 0; i < Z_BATCH; i++) {
    615  1.6  christos             val0 = word[i];
    616  1.6  christos             val1 = word[i + Z_BATCH];
    617  1.6  christos             val2 = word[i + 2 * Z_BATCH];
    618  1.6  christos             __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
    619  1.6  christos             __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1));
    620  1.6  christos             __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2));
    621  1.6  christos         }
    622  1.6  christos         word += 3 * Z_BATCH;
    623  1.6  christos         num -= 3 * Z_BATCH;
    624  1.6  christos         crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc1;
    625  1.6  christos         crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc2;
    626  1.6  christos     }
    627  1.6  christos 
    628  1.6  christos     /* Do one last smaller batch with the remaining words, if there are enough
    629  1.6  christos        to pay for the combination of CRCs. */
    630  1.6  christos     last = num / 3;
    631  1.6  christos     if (last >= Z_BATCH_MIN) {
    632  1.6  christos         last2 = last << 1;
    633  1.6  christos         crc1 = 0;
    634  1.6  christos         crc2 = 0;
    635  1.6  christos         for (i = 0; i < last; i++) {
    636  1.6  christos             val0 = word[i];
    637  1.6  christos             val1 = word[i + last];
    638  1.6  christos             val2 = word[i + last2];
    639  1.6  christos             __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
    640  1.6  christos             __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1));
    641  1.6  christos             __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2));
    642  1.6  christos         }
    643  1.6  christos         word += 3 * last;
    644  1.6  christos         num -= 3 * last;
    645  1.6  christos         val = x2nmodp(last, 6);
    646  1.6  christos         crc = multmodp(val, crc) ^ crc1;
    647  1.6  christos         crc = multmodp(val, crc) ^ crc2;
    648  1.6  christos     }
    649  1.6  christos 
    650  1.6  christos     /* Compute the CRC on any remaining words. */
    651  1.6  christos     for (i = 0; i < num; i++) {
    652  1.6  christos         val0 = word[i];
    653  1.6  christos         __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
    654  1.1  christos     }
    655  1.6  christos     word += num;
    656  1.6  christos 
    657  1.6  christos     /* Complete the CRC on any remaining bytes. */
    658  1.6  christos     buf = (const unsigned char FAR *)word;
    659  1.6  christos     while (len) {
    660  1.6  christos         len--;
    661  1.6  christos         val = *buf++;
    662  1.6  christos         __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));
    663  1.1  christos     }
    664  1.6  christos 
    665  1.6  christos     /* Return the CRC, post-conditioned. */
    666  1.6  christos     return crc ^ 0xffffffff;
    667  1.1  christos }
    668  1.1  christos 
    669  1.6  christos #else
    670  1.5  christos 
    671  1.6  christos #ifdef W
    672  1.1  christos 
    673  1.5  christos /*
    674  1.6  christos   Return the CRC of the W bytes in the word_t data, taking the
    675  1.6  christos   least-significant byte of the word as the first byte of data, without any pre
    676  1.6  christos   or post conditioning. This is used to combine the CRCs of each braid.
    677  1.5  christos  */
    678  1.7  christos local z_crc_t crc_word(z_word_t data) {
    679  1.6  christos     int k;
    680  1.6  christos     for (k = 0; k < W; k++)
    681  1.6  christos         data = (data >> 8) ^ crc_table[data & 0xff];
    682  1.6  christos     return (z_crc_t)data;
    683  1.6  christos }
    684  1.5  christos 
    685  1.7  christos local z_word_t crc_word_big(z_word_t data) {
    686  1.6  christos     int k;
    687  1.6  christos     for (k = 0; k < W; k++)
    688  1.6  christos         data = (data << 8) ^
    689  1.6  christos             crc_big_table[(data >> ((W - 1) << 3)) & 0xff];
    690  1.6  christos     return data;
    691  1.6  christos }
    692  1.6  christos 
    693  1.6  christos #endif
    694  1.1  christos 
    695  1.1  christos /* ========================================================================= */
    696  1.7  christos unsigned long ZEXPORT crc32_z(unsigned long crc, const unsigned char FAR *buf,
    697  1.7  christos                               z_size_t len) {
    698  1.6  christos     /* Return initial CRC, if requested. */
    699  1.6  christos     if (buf == Z_NULL) return 0;
    700  1.6  christos 
    701  1.6  christos #ifdef DYNAMIC_CRC_TABLE
    702  1.6  christos     once(&made, make_crc_table);
    703  1.6  christos #endif /* DYNAMIC_CRC_TABLE */
    704  1.6  christos 
    705  1.6  christos     /* Pre-condition the CRC */
    706  1.6  christos     crc = (~crc) & 0xffffffff;
    707  1.6  christos 
    708  1.6  christos #ifdef W
    709  1.6  christos 
    710  1.6  christos     /* If provided enough bytes, do a braided CRC calculation. */
    711  1.6  christos     if (len >= N * W + W - 1) {
    712  1.6  christos         z_size_t blks;
    713  1.6  christos         z_word_t const *words;
    714  1.6  christos         unsigned endian;
    715  1.6  christos         int k;
    716  1.6  christos 
    717  1.6  christos         /* Compute the CRC up to a z_word_t boundary. */
    718  1.6  christos         while (len && ((z_size_t)buf & (W - 1)) != 0) {
    719  1.6  christos             len--;
    720  1.6  christos             crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
    721  1.6  christos         }
    722  1.6  christos 
    723  1.6  christos         /* Compute the CRC on as many N z_word_t blocks as are available. */
    724  1.6  christos         blks = len / (N * W);
    725  1.6  christos         len -= blks * N * W;
    726  1.6  christos         words = (z_word_t const *)buf;
    727  1.6  christos 
    728  1.6  christos         /* Do endian check at execution time instead of compile time, since ARM
    729  1.7  christos            processors can change the endianness at execution time. If the
    730  1.7  christos            compiler knows what the endianness will be, it can optimize out the
    731  1.6  christos            check and the unused branch. */
    732  1.6  christos         endian = 1;
    733  1.6  christos         if (*(unsigned char *)&endian) {
    734  1.6  christos             /* Little endian. */
    735  1.6  christos 
    736  1.6  christos             z_crc_t crc0;
    737  1.6  christos             z_word_t word0;
    738  1.6  christos #if N > 1
    739  1.6  christos             z_crc_t crc1;
    740  1.6  christos             z_word_t word1;
    741  1.6  christos #if N > 2
    742  1.6  christos             z_crc_t crc2;
    743  1.6  christos             z_word_t word2;
    744  1.6  christos #if N > 3
    745  1.6  christos             z_crc_t crc3;
    746  1.6  christos             z_word_t word3;
    747  1.6  christos #if N > 4
    748  1.6  christos             z_crc_t crc4;
    749  1.6  christos             z_word_t word4;
    750  1.6  christos #if N > 5
    751  1.6  christos             z_crc_t crc5;
    752  1.6  christos             z_word_t word5;
    753  1.6  christos #endif
    754  1.6  christos #endif
    755  1.6  christos #endif
    756  1.6  christos #endif
    757  1.6  christos #endif
    758  1.6  christos 
    759  1.6  christos             /* Initialize the CRC for each braid. */
    760  1.6  christos             crc0 = crc;
    761  1.6  christos #if N > 1
    762  1.6  christos             crc1 = 0;
    763  1.6  christos #if N > 2
    764  1.6  christos             crc2 = 0;
    765  1.6  christos #if N > 3
    766  1.6  christos             crc3 = 0;
    767  1.6  christos #if N > 4
    768  1.6  christos             crc4 = 0;
    769  1.6  christos #if N > 5
    770  1.6  christos             crc5 = 0;
    771  1.6  christos #endif
    772  1.6  christos #endif
    773  1.6  christos #endif
    774  1.6  christos #endif
    775  1.6  christos #endif
    776  1.1  christos 
    777  1.6  christos             /*
    778  1.6  christos               Process the first blks-1 blocks, computing the CRCs on each braid
    779  1.6  christos               independently.
    780  1.6  christos              */
    781  1.6  christos             while (--blks) {
    782  1.6  christos                 /* Load the word for each braid into registers. */
    783  1.6  christos                 word0 = crc0 ^ words[0];
    784  1.6  christos #if N > 1
    785  1.6  christos                 word1 = crc1 ^ words[1];
    786  1.6  christos #if N > 2
    787  1.6  christos                 word2 = crc2 ^ words[2];
    788  1.6  christos #if N > 3
    789  1.6  christos                 word3 = crc3 ^ words[3];
    790  1.6  christos #if N > 4
    791  1.6  christos                 word4 = crc4 ^ words[4];
    792  1.6  christos #if N > 5
    793  1.6  christos                 word5 = crc5 ^ words[5];
    794  1.6  christos #endif
    795  1.6  christos #endif
    796  1.6  christos #endif
    797  1.6  christos #endif
    798  1.6  christos #endif
    799  1.6  christos                 words += N;
    800  1.6  christos 
    801  1.6  christos                 /* Compute and update the CRC for each word. The loop should
    802  1.6  christos                    get unrolled. */
    803  1.6  christos                 crc0 = crc_braid_table[0][word0 & 0xff];
    804  1.6  christos #if N > 1
    805  1.6  christos                 crc1 = crc_braid_table[0][word1 & 0xff];
    806  1.6  christos #if N > 2
    807  1.6  christos                 crc2 = crc_braid_table[0][word2 & 0xff];
    808  1.6  christos #if N > 3
    809  1.6  christos                 crc3 = crc_braid_table[0][word3 & 0xff];
    810  1.6  christos #if N > 4
    811  1.6  christos                 crc4 = crc_braid_table[0][word4 & 0xff];
    812  1.6  christos #if N > 5
    813  1.6  christos                 crc5 = crc_braid_table[0][word5 & 0xff];
    814  1.6  christos #endif
    815  1.6  christos #endif
    816  1.6  christos #endif
    817  1.6  christos #endif
    818  1.6  christos #endif
    819  1.6  christos                 for (k = 1; k < W; k++) {
    820  1.6  christos                     crc0 ^= crc_braid_table[k][(word0 >> (k << 3)) & 0xff];
    821  1.6  christos #if N > 1
    822  1.6  christos                     crc1 ^= crc_braid_table[k][(word1 >> (k << 3)) & 0xff];
    823  1.6  christos #if N > 2
    824  1.6  christos                     crc2 ^= crc_braid_table[k][(word2 >> (k << 3)) & 0xff];
    825  1.6  christos #if N > 3
    826  1.6  christos                     crc3 ^= crc_braid_table[k][(word3 >> (k << 3)) & 0xff];
    827  1.6  christos #if N > 4
    828  1.6  christos                     crc4 ^= crc_braid_table[k][(word4 >> (k << 3)) & 0xff];
    829  1.6  christos #if N > 5
    830  1.6  christos                     crc5 ^= crc_braid_table[k][(word5 >> (k << 3)) & 0xff];
    831  1.6  christos #endif
    832  1.6  christos #endif
    833  1.6  christos #endif
    834  1.6  christos #endif
    835  1.6  christos #endif
    836  1.6  christos                 }
    837  1.6  christos             }
    838  1.6  christos 
    839  1.6  christos             /*
    840  1.6  christos               Process the last block, combining the CRCs of the N braids at the
    841  1.6  christos               same time.
    842  1.6  christos              */
    843  1.6  christos             crc = crc_word(crc0 ^ words[0]);
    844  1.6  christos #if N > 1
    845  1.6  christos             crc = crc_word(crc1 ^ words[1] ^ crc);
    846  1.6  christos #if N > 2
    847  1.6  christos             crc = crc_word(crc2 ^ words[2] ^ crc);
    848  1.6  christos #if N > 3
    849  1.6  christos             crc = crc_word(crc3 ^ words[3] ^ crc);
    850  1.6  christos #if N > 4
    851  1.6  christos             crc = crc_word(crc4 ^ words[4] ^ crc);
    852  1.6  christos #if N > 5
    853  1.6  christos             crc = crc_word(crc5 ^ words[5] ^ crc);
    854  1.6  christos #endif
    855  1.6  christos #endif
    856  1.6  christos #endif
    857  1.6  christos #endif
    858  1.6  christos #endif
    859  1.6  christos             words += N;
    860  1.6  christos         }
    861  1.6  christos         else {
    862  1.6  christos             /* Big endian. */
    863  1.6  christos 
    864  1.6  christos             z_word_t crc0, word0, comb;
    865  1.6  christos #if N > 1
    866  1.6  christos             z_word_t crc1, word1;
    867  1.6  christos #if N > 2
    868  1.6  christos             z_word_t crc2, word2;
    869  1.6  christos #if N > 3
    870  1.6  christos             z_word_t crc3, word3;
    871  1.6  christos #if N > 4
    872  1.6  christos             z_word_t crc4, word4;
    873  1.6  christos #if N > 5
    874  1.6  christos             z_word_t crc5, word5;
    875  1.6  christos #endif
    876  1.6  christos #endif
    877  1.6  christos #endif
    878  1.6  christos #endif
    879  1.6  christos #endif
    880  1.6  christos 
    881  1.6  christos             /* Initialize the CRC for each braid. */
    882  1.6  christos             crc0 = byte_swap(crc);
    883  1.6  christos #if N > 1
    884  1.6  christos             crc1 = 0;
    885  1.6  christos #if N > 2
    886  1.6  christos             crc2 = 0;
    887  1.6  christos #if N > 3
    888  1.6  christos             crc3 = 0;
    889  1.6  christos #if N > 4
    890  1.6  christos             crc4 = 0;
    891  1.6  christos #if N > 5
    892  1.6  christos             crc5 = 0;
    893  1.6  christos #endif
    894  1.6  christos #endif
    895  1.6  christos #endif
    896  1.6  christos #endif
    897  1.6  christos #endif
    898  1.1  christos 
    899  1.6  christos             /*
    900  1.6  christos               Process the first blks-1 blocks, computing the CRCs on each braid
    901  1.6  christos               independently.
    902  1.6  christos              */
    903  1.6  christos             while (--blks) {
    904  1.6  christos                 /* Load the word for each braid into registers. */
    905  1.6  christos                 word0 = crc0 ^ words[0];
    906  1.6  christos #if N > 1
    907  1.6  christos                 word1 = crc1 ^ words[1];
    908  1.6  christos #if N > 2
    909  1.6  christos                 word2 = crc2 ^ words[2];
    910  1.6  christos #if N > 3
    911  1.6  christos                 word3 = crc3 ^ words[3];
    912  1.6  christos #if N > 4
    913  1.6  christos                 word4 = crc4 ^ words[4];
    914  1.6  christos #if N > 5
    915  1.6  christos                 word5 = crc5 ^ words[5];
    916  1.6  christos #endif
    917  1.6  christos #endif
    918  1.6  christos #endif
    919  1.6  christos #endif
    920  1.6  christos #endif
    921  1.6  christos                 words += N;
    922  1.1  christos 
    923  1.6  christos                 /* Compute and update the CRC for each word. The loop should
    924  1.6  christos                    get unrolled. */
    925  1.6  christos                 crc0 = crc_braid_big_table[0][word0 & 0xff];
    926  1.6  christos #if N > 1
    927  1.6  christos                 crc1 = crc_braid_big_table[0][word1 & 0xff];
    928  1.6  christos #if N > 2
    929  1.6  christos                 crc2 = crc_braid_big_table[0][word2 & 0xff];
    930  1.6  christos #if N > 3
    931  1.6  christos                 crc3 = crc_braid_big_table[0][word3 & 0xff];
    932  1.6  christos #if N > 4
    933  1.6  christos                 crc4 = crc_braid_big_table[0][word4 & 0xff];
    934  1.6  christos #if N > 5
    935  1.6  christos                 crc5 = crc_braid_big_table[0][word5 & 0xff];
    936  1.6  christos #endif
    937  1.6  christos #endif
    938  1.6  christos #endif
    939  1.6  christos #endif
    940  1.6  christos #endif
    941  1.6  christos                 for (k = 1; k < W; k++) {
    942  1.6  christos                     crc0 ^= crc_braid_big_table[k][(word0 >> (k << 3)) & 0xff];
    943  1.6  christos #if N > 1
    944  1.6  christos                     crc1 ^= crc_braid_big_table[k][(word1 >> (k << 3)) & 0xff];
    945  1.6  christos #if N > 2
    946  1.6  christos                     crc2 ^= crc_braid_big_table[k][(word2 >> (k << 3)) & 0xff];
    947  1.6  christos #if N > 3
    948  1.6  christos                     crc3 ^= crc_braid_big_table[k][(word3 >> (k << 3)) & 0xff];
    949  1.6  christos #if N > 4
    950  1.6  christos                     crc4 ^= crc_braid_big_table[k][(word4 >> (k << 3)) & 0xff];
    951  1.6  christos #if N > 5
    952  1.6  christos                     crc5 ^= crc_braid_big_table[k][(word5 >> (k << 3)) & 0xff];
    953  1.6  christos #endif
    954  1.6  christos #endif
    955  1.6  christos #endif
    956  1.6  christos #endif
    957  1.6  christos #endif
    958  1.6  christos                 }
    959  1.6  christos             }
    960  1.1  christos 
    961  1.6  christos             /*
    962  1.6  christos               Process the last block, combining the CRCs of the N braids at the
    963  1.6  christos               same time.
    964  1.6  christos              */
    965  1.6  christos             comb = crc_word_big(crc0 ^ words[0]);
    966  1.6  christos #if N > 1
    967  1.6  christos             comb = crc_word_big(crc1 ^ words[1] ^ comb);
    968  1.6  christos #if N > 2
    969  1.6  christos             comb = crc_word_big(crc2 ^ words[2] ^ comb);
    970  1.6  christos #if N > 3
    971  1.6  christos             comb = crc_word_big(crc3 ^ words[3] ^ comb);
    972  1.6  christos #if N > 4
    973  1.6  christos             comb = crc_word_big(crc4 ^ words[4] ^ comb);
    974  1.6  christos #if N > 5
    975  1.6  christos             comb = crc_word_big(crc5 ^ words[5] ^ comb);
    976  1.6  christos #endif
    977  1.6  christos #endif
    978  1.6  christos #endif
    979  1.6  christos #endif
    980  1.6  christos #endif
    981  1.6  christos             words += N;
    982  1.6  christos             crc = byte_swap(comb);
    983  1.6  christos         }
    984  1.1  christos 
    985  1.6  christos         /*
    986  1.6  christos           Update the pointer to the remaining bytes to process.
    987  1.6  christos          */
    988  1.6  christos         buf = (unsigned char const *)words;
    989  1.1  christos     }
    990  1.1  christos 
    991  1.6  christos #endif /* W */
    992  1.6  christos 
    993  1.6  christos     /* Complete the computation of the CRC on any remaining bytes. */
    994  1.6  christos     while (len >= 8) {
    995  1.6  christos         len -= 8;
    996  1.6  christos         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
    997  1.6  christos         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
    998  1.6  christos         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
    999  1.6  christos         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
   1000  1.6  christos         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
   1001  1.6  christos         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
   1002  1.6  christos         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
   1003  1.6  christos         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
   1004  1.1  christos     }
   1005  1.6  christos     while (len) {
   1006  1.6  christos         len--;
   1007  1.6  christos         crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
   1008  1.1  christos     }
   1009  1.1  christos 
   1010  1.6  christos     /* Return the CRC, post-conditioned. */
   1011  1.6  christos     return crc ^ 0xffffffff;
   1012  1.1  christos }
   1013  1.1  christos 
   1014  1.6  christos #endif
   1015  1.1  christos 
   1016  1.1  christos /* ========================================================================= */
   1017  1.7  christos unsigned long ZEXPORT crc32(unsigned long crc, const unsigned char FAR *buf,
   1018  1.7  christos                             uInt len) {
   1019  1.6  christos     return crc32_z(crc, buf, len);
   1020  1.1  christos }
   1021  1.1  christos 
   1022  1.1  christos /* ========================================================================= */
   1023  1.7  christos uLong ZEXPORT crc32_combine64(uLong crc1, uLong crc2, z_off64_t len2) {
   1024  1.6  christos #ifdef DYNAMIC_CRC_TABLE
   1025  1.6  christos     once(&made, make_crc_table);
   1026  1.6  christos #endif /* DYNAMIC_CRC_TABLE */
   1027  1.6  christos     return multmodp(x2nmodp(len2, 3), crc1) ^ (crc2 & 0xffffffff);
   1028  1.1  christos }
   1029  1.1  christos 
   1030  1.1  christos /* ========================================================================= */
   1031  1.7  christos uLong ZEXPORT crc32_combine(uLong crc1, uLong crc2, z_off_t len2) {
   1032  1.6  christos     return crc32_combine64(crc1, crc2, (z_off64_t)len2);
   1033  1.6  christos }
   1034  1.6  christos 
   1035  1.6  christos /* ========================================================================= */
   1036  1.7  christos uLong ZEXPORT crc32_combine_gen64(z_off64_t len2) {
   1037  1.6  christos #ifdef DYNAMIC_CRC_TABLE
   1038  1.6  christos     once(&made, make_crc_table);
   1039  1.6  christos #endif /* DYNAMIC_CRC_TABLE */
   1040  1.6  christos     return x2nmodp(len2, 3);
   1041  1.1  christos }
   1042  1.5  christos 
   1043  1.5  christos /* ========================================================================= */
   1044  1.7  christos uLong ZEXPORT crc32_combine_gen(z_off_t len2) {
   1045  1.6  christos     return crc32_combine_gen64((z_off64_t)len2);
   1046  1.5  christos }
   1047  1.5  christos 
   1048  1.6  christos /* ========================================================================= */
   1049  1.7  christos uLong ZEXPORT crc32_combine_op(uLong crc1, uLong crc2, uLong op) {
   1050  1.6  christos     return multmodp(op, crc1) ^ (crc2 & 0xffffffff);
   1051  1.5  christos }
   1052