Home | History | Annotate | Line # | Download | only in zfs
zfs_fletcher.c revision 1.1.1.1.44.1
      1           1.1      haad /*
      2           1.1      haad  * CDDL HEADER START
      3           1.1      haad  *
      4           1.1      haad  * The contents of this file are subject to the terms of the
      5           1.1      haad  * Common Development and Distribution License (the "License").
      6           1.1      haad  * You may not use this file except in compliance with the License.
      7           1.1      haad  *
      8           1.1      haad  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9           1.1      haad  * or http://www.opensolaris.org/os/licensing.
     10           1.1      haad  * See the License for the specific language governing permissions
     11           1.1      haad  * and limitations under the License.
     12           1.1      haad  *
     13           1.1      haad  * When distributing Covered Code, include this CDDL HEADER in each
     14           1.1      haad  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15           1.1      haad  * If applicable, add the following below this CDDL HEADER, with the
     16           1.1      haad  * fields enclosed by brackets "[]" replaced with your own identifying
     17           1.1      haad  * information: Portions Copyright [yyyy] [name of copyright owner]
     18           1.1      haad  *
     19           1.1      haad  * CDDL HEADER END
     20           1.1      haad  */
     21           1.1      haad /*
     22           1.1      haad  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23           1.1      haad  * Use is subject to license terms.
     24           1.1      haad  */
     25  1.1.1.1.44.1  pgoyette /*
     26  1.1.1.1.44.1  pgoyette  * Copyright 2013 Saso Kiselkov. All rights reserved.
     27  1.1.1.1.44.1  pgoyette  */
     28           1.1      haad 
     29           1.1      haad /*
     30           1.1      haad  * Fletcher Checksums
     31           1.1      haad  * ------------------
     32           1.1      haad  *
     33           1.1      haad  * ZFS's 2nd and 4th order Fletcher checksums are defined by the following
     34           1.1      haad  * recurrence relations:
     35           1.1      haad  *
     36           1.1      haad  *	a  = a    + f
     37           1.1      haad  *	 i    i-1    i-1
     38           1.1      haad  *
     39           1.1      haad  *	b  = b    + a
     40           1.1      haad  *	 i    i-1    i
     41           1.1      haad  *
     42           1.1      haad  *	c  = c    + b		(fletcher-4 only)
     43           1.1      haad  *	 i    i-1    i
     44           1.1      haad  *
     45           1.1      haad  *	d  = d    + c		(fletcher-4 only)
     46           1.1      haad  *	 i    i-1    i
     47           1.1      haad  *
     48           1.1      haad  * Where
     49           1.1      haad  *	a_0 = b_0 = c_0 = d_0 = 0
     50           1.1      haad  * and
     51           1.1      haad  *	f_0 .. f_(n-1) are the input data.
     52           1.1      haad  *
     53           1.1      haad  * Using standard techniques, these translate into the following series:
     54           1.1      haad  *
     55           1.1      haad  *	     __n_			     __n_
     56           1.1      haad  *	     \   |			     \   |
     57           1.1      haad  *	a  =  >     f			b  =  >     i * f
     58           1.1      haad  *	 n   /___|   n - i		 n   /___|	 n - i
     59           1.1      haad  *	     i = 1			     i = 1
     60           1.1      haad  *
     61           1.1      haad  *
     62           1.1      haad  *	     __n_			     __n_
     63           1.1      haad  *	     \   |  i*(i+1)		     \   |  i*(i+1)*(i+2)
     64           1.1      haad  *	c  =  >     ------- f		d  =  >     ------------- f
     65           1.1      haad  *	 n   /___|     2     n - i	 n   /___|	  6	   n - i
     66           1.1      haad  *	     i = 1			     i = 1
     67           1.1      haad  *
     68           1.1      haad  * For fletcher-2, the f_is are 64-bit, and [ab]_i are 64-bit accumulators.
     69           1.1      haad  * Since the additions are done mod (2^64), errors in the high bits may not
     70           1.1      haad  * be noticed.  For this reason, fletcher-2 is deprecated.
     71           1.1      haad  *
     72           1.1      haad  * For fletcher-4, the f_is are 32-bit, and [abcd]_i are 64-bit accumulators.
     73           1.1      haad  * A conservative estimate of how big the buffer can get before we overflow
     74           1.1      haad  * can be estimated using f_i = 0xffffffff for all i:
     75           1.1      haad  *
     76           1.1      haad  * % bc
     77           1.1      haad  *  f=2^32-1;d=0; for (i = 1; d<2^64; i++) { d += f*i*(i+1)*(i+2)/6 }; (i-1)*4
     78           1.1      haad  * 2264
     79           1.1      haad  *  quit
     80           1.1      haad  * %
     81           1.1      haad  *
     82           1.1      haad  * So blocks of up to 2k will not overflow.  Our largest block size is
     83           1.1      haad  * 128k, which has 32k 4-byte words, so we can compute the largest possible
     84           1.1      haad  * accumulators, then divide by 2^64 to figure the max amount of overflow:
     85           1.1      haad  *
     86           1.1      haad  * % bc
     87           1.1      haad  *  a=b=c=d=0; f=2^32-1; for (i=1; i<=32*1024; i++) { a+=f; b+=a; c+=b; d+=c }
     88           1.1      haad  *  a/2^64;b/2^64;c/2^64;d/2^64
     89           1.1      haad  * 0
     90           1.1      haad  * 0
     91           1.1      haad  * 1365
     92           1.1      haad  * 11186858
     93           1.1      haad  *  quit
     94           1.1      haad  * %
     95           1.1      haad  *
     96           1.1      haad  * So a and b cannot overflow.  To make sure each bit of input has some
     97           1.1      haad  * effect on the contents of c and d, we can look at what the factors of
     98           1.1      haad  * the coefficients in the equations for c_n and d_n are.  The number of 2s
     99           1.1      haad  * in the factors determines the lowest set bit in the multiplier.  Running
    100           1.1      haad  * through the cases for n*(n+1)/2 reveals that the highest power of 2 is
    101           1.1      haad  * 2^14, and for n*(n+1)*(n+2)/6 it is 2^15.  So while some data may overflow
    102           1.1      haad  * the 64-bit accumulators, every bit of every f_i effects every accumulator,
    103           1.1      haad  * even for 128k blocks.
    104           1.1      haad  *
    105           1.1      haad  * If we wanted to make a stronger version of fletcher4 (fletcher4c?),
    106           1.1      haad  * we could do our calculations mod (2^32 - 1) by adding in the carries
    107           1.1      haad  * periodically, and store the number of carries in the top 32-bits.
    108           1.1      haad  *
    109           1.1      haad  * --------------------
    110           1.1      haad  * Checksum Performance
    111           1.1      haad  * --------------------
    112           1.1      haad  *
    113           1.1      haad  * There are two interesting components to checksum performance: cached and
    114           1.1      haad  * uncached performance.  With cached data, fletcher-2 is about four times
    115           1.1      haad  * faster than fletcher-4.  With uncached data, the performance difference is
    116           1.1      haad  * negligible, since the cost of a cache fill dominates the processing time.
    117           1.1      haad  * Even though fletcher-4 is slower than fletcher-2, it is still a pretty
    118           1.1      haad  * efficient pass over the data.
    119           1.1      haad  *
    120           1.1      haad  * In normal operation, the data which is being checksummed is in a buffer
    121           1.1      haad  * which has been filled either by:
    122           1.1      haad  *
    123           1.1      haad  *	1. a compression step, which will be mostly cached, or
    124           1.1      haad  *	2. a bcopy() or copyin(), which will be uncached (because the
    125           1.1      haad  *	   copy is cache-bypassing).
    126           1.1      haad  *
    127           1.1      haad  * For both cached and uncached data, both fletcher checksums are much faster
    128           1.1      haad  * than sha-256, and slower than 'off', which doesn't touch the data at all.
    129           1.1      haad  */
    130           1.1      haad 
    131           1.1      haad #include <sys/types.h>
    132           1.1      haad #include <sys/sysmacros.h>
    133           1.1      haad #include <sys/byteorder.h>
    134           1.1      haad #include <sys/zio.h>
    135           1.1      haad #include <sys/spa.h>
    136           1.1      haad 
    137  1.1.1.1.44.1  pgoyette /*ARGSUSED*/
    138           1.1      haad void
    139  1.1.1.1.44.1  pgoyette fletcher_2_native(const void *buf, uint64_t size,
    140  1.1.1.1.44.1  pgoyette     const void *ctx_template, zio_cksum_t *zcp)
    141           1.1      haad {
    142           1.1      haad 	const uint64_t *ip = buf;
    143           1.1      haad 	const uint64_t *ipend = ip + (size / sizeof (uint64_t));
    144           1.1      haad 	uint64_t a0, b0, a1, b1;
    145           1.1      haad 
    146           1.1      haad 	for (a0 = b0 = a1 = b1 = 0; ip < ipend; ip += 2) {
    147           1.1      haad 		a0 += ip[0];
    148           1.1      haad 		a1 += ip[1];
    149           1.1      haad 		b0 += a0;
    150           1.1      haad 		b1 += a1;
    151           1.1      haad 	}
    152           1.1      haad 
    153           1.1      haad 	ZIO_SET_CHECKSUM(zcp, a0, a1, b0, b1);
    154           1.1      haad }
    155           1.1      haad 
    156  1.1.1.1.44.1  pgoyette /*ARGSUSED*/
    157           1.1      haad void
    158  1.1.1.1.44.1  pgoyette fletcher_2_byteswap(const void *buf, uint64_t size,
    159  1.1.1.1.44.1  pgoyette     const void *ctx_template, zio_cksum_t *zcp)
    160           1.1      haad {
    161           1.1      haad 	const uint64_t *ip = buf;
    162           1.1      haad 	const uint64_t *ipend = ip + (size / sizeof (uint64_t));
    163           1.1      haad 	uint64_t a0, b0, a1, b1;
    164           1.1      haad 
    165           1.1      haad 	for (a0 = b0 = a1 = b1 = 0; ip < ipend; ip += 2) {
    166           1.1      haad 		a0 += BSWAP_64(ip[0]);
    167           1.1      haad 		a1 += BSWAP_64(ip[1]);
    168           1.1      haad 		b0 += a0;
    169           1.1      haad 		b1 += a1;
    170           1.1      haad 	}
    171           1.1      haad 
    172           1.1      haad 	ZIO_SET_CHECKSUM(zcp, a0, a1, b0, b1);
    173           1.1      haad }
    174           1.1      haad 
    175  1.1.1.1.44.1  pgoyette /*ARGSUSED*/
    176           1.1      haad void
    177  1.1.1.1.44.1  pgoyette fletcher_4_native(const void *buf, uint64_t size,
    178  1.1.1.1.44.1  pgoyette     const void *ctx_template, zio_cksum_t *zcp)
    179           1.1      haad {
    180           1.1      haad 	const uint32_t *ip = buf;
    181           1.1      haad 	const uint32_t *ipend = ip + (size / sizeof (uint32_t));
    182           1.1      haad 	uint64_t a, b, c, d;
    183           1.1      haad 
    184           1.1      haad 	for (a = b = c = d = 0; ip < ipend; ip++) {
    185           1.1      haad 		a += ip[0];
    186           1.1      haad 		b += a;
    187           1.1      haad 		c += b;
    188           1.1      haad 		d += c;
    189           1.1      haad 	}
    190           1.1      haad 
    191           1.1      haad 	ZIO_SET_CHECKSUM(zcp, a, b, c, d);
    192           1.1      haad }
    193           1.1      haad 
    194  1.1.1.1.44.1  pgoyette /*ARGSUSED*/
    195           1.1      haad void
    196  1.1.1.1.44.1  pgoyette fletcher_4_byteswap(const void *buf, uint64_t size,
    197  1.1.1.1.44.1  pgoyette     const void *ctx_template, zio_cksum_t *zcp)
    198           1.1      haad {
    199           1.1      haad 	const uint32_t *ip = buf;
    200           1.1      haad 	const uint32_t *ipend = ip + (size / sizeof (uint32_t));
    201           1.1      haad 	uint64_t a, b, c, d;
    202           1.1      haad 
    203           1.1      haad 	for (a = b = c = d = 0; ip < ipend; ip++) {
    204           1.1      haad 		a += BSWAP_32(ip[0]);
    205           1.1      haad 		b += a;
    206           1.1      haad 		c += b;
    207           1.1      haad 		d += c;
    208           1.1      haad 	}
    209           1.1      haad 
    210           1.1      haad 	ZIO_SET_CHECKSUM(zcp, a, b, c, d);
    211           1.1      haad }
    212           1.1      haad 
    213           1.1      haad void
    214           1.1      haad fletcher_4_incremental_native(const void *buf, uint64_t size,
    215           1.1      haad     zio_cksum_t *zcp)
    216           1.1      haad {
    217           1.1      haad 	const uint32_t *ip = buf;
    218           1.1      haad 	const uint32_t *ipend = ip + (size / sizeof (uint32_t));
    219           1.1      haad 	uint64_t a, b, c, d;
    220           1.1      haad 
    221           1.1      haad 	a = zcp->zc_word[0];
    222           1.1      haad 	b = zcp->zc_word[1];
    223           1.1      haad 	c = zcp->zc_word[2];
    224           1.1      haad 	d = zcp->zc_word[3];
    225           1.1      haad 
    226           1.1      haad 	for (; ip < ipend; ip++) {
    227           1.1      haad 		a += ip[0];
    228           1.1      haad 		b += a;
    229           1.1      haad 		c += b;
    230           1.1      haad 		d += c;
    231           1.1      haad 	}
    232           1.1      haad 
    233           1.1      haad 	ZIO_SET_CHECKSUM(zcp, a, b, c, d);
    234           1.1      haad }
    235           1.1      haad 
    236           1.1      haad void
    237           1.1      haad fletcher_4_incremental_byteswap(const void *buf, uint64_t size,
    238           1.1      haad     zio_cksum_t *zcp)
    239           1.1      haad {
    240           1.1      haad 	const uint32_t *ip = buf;
    241           1.1      haad 	const uint32_t *ipend = ip + (size / sizeof (uint32_t));
    242           1.1      haad 	uint64_t a, b, c, d;
    243           1.1      haad 
    244           1.1      haad 	a = zcp->zc_word[0];
    245           1.1      haad 	b = zcp->zc_word[1];
    246           1.1      haad 	c = zcp->zc_word[2];
    247           1.1      haad 	d = zcp->zc_word[3];
    248           1.1      haad 
    249           1.1      haad 	for (; ip < ipend; ip++) {
    250           1.1      haad 		a += BSWAP_32(ip[0]);
    251           1.1      haad 		b += a;
    252           1.1      haad 		c += b;
    253           1.1      haad 		d += c;
    254           1.1      haad 	}
    255           1.1      haad 
    256           1.1      haad 	ZIO_SET_CHECKSUM(zcp, a, b, c, d);
    257           1.1      haad }
    258