Home | History | Annotate | Line # | Download | only in skipjack
skipjack.c revision 1.1.4.4
      1  1.1.4.4  skrll /*	$NetBSD: skipjack.c,v 1.1.4.4 2004/09/21 13:26:23 skrll Exp $ */
      2  1.1.4.2  skrll /*	$OpenBSD: skipjack.c,v 1.3 2001/05/05 00:31:34 angelos Exp $	*/
      3  1.1.4.2  skrll 
      4  1.1.4.2  skrll /*
      5  1.1.4.2  skrll  * Further optimized test implementation of SKIPJACK algorithm
      6  1.1.4.2  skrll  * Mark Tillotson <markt (at) chaos.org.uk>, 25 June 98
      7  1.1.4.2  skrll  * Optimizations suit RISC (lots of registers) machine best.
      8  1.1.4.2  skrll  *
      9  1.1.4.2  skrll  * based on unoptimized implementation of
     10  1.1.4.2  skrll  * Panu Rissanen <bande (at) lut.fi> 960624
     11  1.1.4.2  skrll  *
     12  1.1.4.2  skrll  * SKIPJACK and KEA Algorithm Specifications
     13  1.1.4.2  skrll  * Version 2.0
     14  1.1.4.2  skrll  * 29 May 1998
     15  1.1.4.2  skrll */
     16  1.1.4.2  skrll 
     17  1.1.4.2  skrll #include <sys/cdefs.h>
     18  1.1.4.4  skrll __KERNEL_RCSID(0, "$NetBSD: skipjack.c,v 1.1.4.4 2004/09/21 13:26:23 skrll Exp $");
     19  1.1.4.2  skrll 
     20  1.1.4.2  skrll #include <sys/param.h>
     21  1.1.4.2  skrll #include <crypto/skipjack/skipjack.h>
     22  1.1.4.2  skrll #include <sys/malloc.h>
     23  1.1.4.2  skrll #include <opencrypto/cryptodev.h>
     24  1.1.4.2  skrll 
     25  1.1.4.2  skrll static const u_int8_t ftable[0x100] =
     26  1.1.4.2  skrll {
     27  1.1.4.2  skrll 	0xa3, 0xd7, 0x09, 0x83, 0xf8, 0x48, 0xf6, 0xf4,
     28  1.1.4.2  skrll 	0xb3, 0x21, 0x15, 0x78, 0x99, 0xb1, 0xaf, 0xf9,
     29  1.1.4.2  skrll 	0xe7, 0x2d, 0x4d, 0x8a, 0xce, 0x4c, 0xca, 0x2e,
     30  1.1.4.2  skrll 	0x52, 0x95, 0xd9, 0x1e, 0x4e, 0x38, 0x44, 0x28,
     31  1.1.4.2  skrll 	0x0a, 0xdf, 0x02, 0xa0, 0x17, 0xf1, 0x60, 0x68,
     32  1.1.4.2  skrll 	0x12, 0xb7, 0x7a, 0xc3, 0xe9, 0xfa, 0x3d, 0x53,
     33  1.1.4.2  skrll 	0x96, 0x84, 0x6b, 0xba, 0xf2, 0x63, 0x9a, 0x19,
     34  1.1.4.2  skrll 	0x7c, 0xae, 0xe5, 0xf5, 0xf7, 0x16, 0x6a, 0xa2,
     35  1.1.4.2  skrll 	0x39, 0xb6, 0x7b, 0x0f, 0xc1, 0x93, 0x81, 0x1b,
     36  1.1.4.2  skrll 	0xee, 0xb4, 0x1a, 0xea, 0xd0, 0x91, 0x2f, 0xb8,
     37  1.1.4.2  skrll 	0x55, 0xb9, 0xda, 0x85, 0x3f, 0x41, 0xbf, 0xe0,
     38  1.1.4.2  skrll 	0x5a, 0x58, 0x80, 0x5f, 0x66, 0x0b, 0xd8, 0x90,
     39  1.1.4.2  skrll 	0x35, 0xd5, 0xc0, 0xa7, 0x33, 0x06, 0x65, 0x69,
     40  1.1.4.2  skrll 	0x45, 0x00, 0x94, 0x56, 0x6d, 0x98, 0x9b, 0x76,
     41  1.1.4.2  skrll 	0x97, 0xfc, 0xb2, 0xc2, 0xb0, 0xfe, 0xdb, 0x20,
     42  1.1.4.2  skrll 	0xe1, 0xeb, 0xd6, 0xe4, 0xdd, 0x47, 0x4a, 0x1d,
     43  1.1.4.2  skrll 	0x42, 0xed, 0x9e, 0x6e, 0x49, 0x3c, 0xcd, 0x43,
     44  1.1.4.2  skrll 	0x27, 0xd2, 0x07, 0xd4, 0xde, 0xc7, 0x67, 0x18,
     45  1.1.4.2  skrll 	0x89, 0xcb, 0x30, 0x1f, 0x8d, 0xc6, 0x8f, 0xaa,
     46  1.1.4.2  skrll 	0xc8, 0x74, 0xdc, 0xc9, 0x5d, 0x5c, 0x31, 0xa4,
     47  1.1.4.2  skrll 	0x70, 0x88, 0x61, 0x2c, 0x9f, 0x0d, 0x2b, 0x87,
     48  1.1.4.2  skrll 	0x50, 0x82, 0x54, 0x64, 0x26, 0x7d, 0x03, 0x40,
     49  1.1.4.2  skrll 	0x34, 0x4b, 0x1c, 0x73, 0xd1, 0xc4, 0xfd, 0x3b,
     50  1.1.4.2  skrll 	0xcc, 0xfb, 0x7f, 0xab, 0xe6, 0x3e, 0x5b, 0xa5,
     51  1.1.4.2  skrll 	0xad, 0x04, 0x23, 0x9c, 0x14, 0x51, 0x22, 0xf0,
     52  1.1.4.2  skrll 	0x29, 0x79, 0x71, 0x7e, 0xff, 0x8c, 0x0e, 0xe2,
     53  1.1.4.2  skrll 	0x0c, 0xef, 0xbc, 0x72, 0x75, 0x6f, 0x37, 0xa1,
     54  1.1.4.2  skrll 	0xec, 0xd3, 0x8e, 0x62, 0x8b, 0x86, 0x10, 0xe8,
     55  1.1.4.2  skrll 	0x08, 0x77, 0x11, 0xbe, 0x92, 0x4f, 0x24, 0xc5,
     56  1.1.4.2  skrll 	0x32, 0x36, 0x9d, 0xcf, 0xf3, 0xa6, 0xbb, 0xac,
     57  1.1.4.2  skrll 	0x5e, 0x6c, 0xa9, 0x13, 0x57, 0x25, 0xb5, 0xe3,
     58  1.1.4.2  skrll 	0xbd, 0xa8, 0x3a, 0x01, 0x05, 0x59, 0x2a, 0x46
     59  1.1.4.2  skrll };
     60  1.1.4.2  skrll 
     61  1.1.4.2  skrll /*
     62  1.1.4.2  skrll  * For each key byte generate a table to represent the function
     63  1.1.4.2  skrll  *    ftable [in ^ keybyte]
     64  1.1.4.2  skrll  *
     65  1.1.4.2  skrll  * These tables used to save an XOR in each stage of the G-function
     66  1.1.4.2  skrll  * the tables are hopefully pointed to by register allocated variables
     67  1.1.4.2  skrll  * k0, k1..k9
     68  1.1.4.2  skrll  */
     69  1.1.4.2  skrll void
     70  1.1.4.2  skrll subkey_table_gen (const u_int8_t *key, u_int8_t **key_tables)
     71  1.1.4.2  skrll {
     72  1.1.4.2  skrll 	int i, k;
     73  1.1.4.2  skrll 
     74  1.1.4.2  skrll 	for (k = 0; k < 10; k++) {
     75  1.1.4.2  skrll 		u_int8_t   key_byte = key [k];
     76  1.1.4.2  skrll 		u_int8_t * table = key_tables[k];
     77  1.1.4.2  skrll 		for (i = 0; i < 0x100; i++)
     78  1.1.4.2  skrll 			table [i] = ftable [i ^ key_byte];
     79  1.1.4.2  skrll 	}
     80  1.1.4.2  skrll }
     81  1.1.4.2  skrll 
     82  1.1.4.2  skrll 
     83  1.1.4.2  skrll #define g(k0, k1, k2, k3, ih, il, oh, ol) \
     84  1.1.4.2  skrll { \
     85  1.1.4.2  skrll 	oh = k##k0 [il] ^ ih; \
     86  1.1.4.2  skrll 	ol = k##k1 [oh] ^ il; \
     87  1.1.4.2  skrll 	oh = k##k2 [ol] ^ oh; \
     88  1.1.4.2  skrll 	ol = k##k3 [oh] ^ ol; \
     89  1.1.4.2  skrll }
     90  1.1.4.2  skrll 
     91  1.1.4.2  skrll #define g0(ih, il, oh, ol) g(0, 1, 2, 3, ih, il, oh, ol)
     92  1.1.4.2  skrll #define g4(ih, il, oh, ol) g(4, 5, 6, 7, ih, il, oh, ol)
     93  1.1.4.2  skrll #define g8(ih, il, oh, ol) g(8, 9, 0, 1, ih, il, oh, ol)
     94  1.1.4.2  skrll #define g2(ih, il, oh, ol) g(2, 3, 4, 5, ih, il, oh, ol)
     95  1.1.4.2  skrll #define g6(ih, il, oh, ol) g(6, 7, 8, 9, ih, il, oh, ol)
     96  1.1.4.2  skrll 
     97  1.1.4.2  skrll 
     98  1.1.4.2  skrll #define g_inv(k0, k1, k2, k3, ih, il, oh, ol) \
     99  1.1.4.2  skrll { \
    100  1.1.4.2  skrll 	ol = k##k3 [ih] ^ il; \
    101  1.1.4.2  skrll 	oh = k##k2 [ol] ^ ih; \
    102  1.1.4.2  skrll 	ol = k##k1 [oh] ^ ol; \
    103  1.1.4.2  skrll 	oh = k##k0 [ol] ^ oh; \
    104  1.1.4.2  skrll }
    105  1.1.4.2  skrll 
    106  1.1.4.2  skrll 
    107  1.1.4.2  skrll #define g0_inv(ih, il, oh, ol) g_inv(0, 1, 2, 3, ih, il, oh, ol)
    108  1.1.4.2  skrll #define g4_inv(ih, il, oh, ol) g_inv(4, 5, 6, 7, ih, il, oh, ol)
    109  1.1.4.2  skrll #define g8_inv(ih, il, oh, ol) g_inv(8, 9, 0, 1, ih, il, oh, ol)
    110  1.1.4.2  skrll #define g2_inv(ih, il, oh, ol) g_inv(2, 3, 4, 5, ih, il, oh, ol)
    111  1.1.4.2  skrll #define g6_inv(ih, il, oh, ol) g_inv(6, 7, 8, 9, ih, il, oh, ol)
    112  1.1.4.2  skrll 
    113  1.1.4.2  skrll /* optimized version of Skipjack algorithm
    114  1.1.4.2  skrll  *
    115  1.1.4.2  skrll  * the appropriate g-function is inlined for each round
    116  1.1.4.2  skrll  *
    117  1.1.4.2  skrll  * the data movement is minimized by rotating the names of the
    118  1.1.4.2  skrll  * variables w1..w4, not their contents (saves 3 moves per round)
    119  1.1.4.2  skrll  *
    120  1.1.4.2  skrll  * the loops are completely unrolled (needed to staticize choice of g)
    121  1.1.4.2  skrll  *
    122  1.1.4.2  skrll  * compiles to about 470 instructions on a Sparc (gcc -O)
    123  1.1.4.2  skrll  * which is about 58 instructions per byte, 14 per round.
    124  1.1.4.2  skrll  * gcc seems to leave in some unnecessary and with 0xFF operations
    125  1.1.4.2  skrll  * but only in the latter part of the functions.  Perhaps it
    126  1.1.4.2  skrll  * runs out of resources to properly optimize long inlined function?
    127  1.1.4.2  skrll  * in theory should get about 11 instructions per round, not 14
    128  1.1.4.2  skrll  */
    129  1.1.4.2  skrll 
    130  1.1.4.2  skrll void
    131  1.1.4.2  skrll skipjack_forwards(u_int8_t *plain, u_int8_t *cipher, u_int8_t **key_tables)
    132  1.1.4.2  skrll {
    133  1.1.4.2  skrll 	u_int8_t wh1 = plain[0];  u_int8_t wl1 = plain[1];
    134  1.1.4.2  skrll 	u_int8_t wh2 = plain[2];  u_int8_t wl2 = plain[3];
    135  1.1.4.2  skrll 	u_int8_t wh3 = plain[4];  u_int8_t wl3 = plain[5];
    136  1.1.4.2  skrll 	u_int8_t wh4 = plain[6];  u_int8_t wl4 = plain[7];
    137  1.1.4.2  skrll 
    138  1.1.4.2  skrll 	u_int8_t * k0 = key_tables [0];
    139  1.1.4.2  skrll 	u_int8_t * k1 = key_tables [1];
    140  1.1.4.2  skrll 	u_int8_t * k2 = key_tables [2];
    141  1.1.4.2  skrll 	u_int8_t * k3 = key_tables [3];
    142  1.1.4.2  skrll 	u_int8_t * k4 = key_tables [4];
    143  1.1.4.2  skrll 	u_int8_t * k5 = key_tables [5];
    144  1.1.4.2  skrll 	u_int8_t * k6 = key_tables [6];
    145  1.1.4.2  skrll 	u_int8_t * k7 = key_tables [7];
    146  1.1.4.2  skrll 	u_int8_t * k8 = key_tables [8];
    147  1.1.4.2  skrll 	u_int8_t * k9 = key_tables [9];
    148  1.1.4.2  skrll 
    149  1.1.4.2  skrll 	/* first 8 rounds */
    150  1.1.4.2  skrll 	g0 (wh1,wl1, wh1,wl1); wl4 ^= wl1 ^ 1; wh4 ^= wh1;
    151  1.1.4.2  skrll 	g4 (wh4,wl4, wh4,wl4); wl3 ^= wl4 ^ 2; wh3 ^= wh4;
    152  1.1.4.2  skrll 	g8 (wh3,wl3, wh3,wl3); wl2 ^= wl3 ^ 3; wh2 ^= wh3;
    153  1.1.4.2  skrll 	g2 (wh2,wl2, wh2,wl2); wl1 ^= wl2 ^ 4; wh1 ^= wh2;
    154  1.1.4.2  skrll 	g6 (wh1,wl1, wh1,wl1); wl4 ^= wl1 ^ 5; wh4 ^= wh1;
    155  1.1.4.2  skrll 	g0 (wh4,wl4, wh4,wl4); wl3 ^= wl4 ^ 6; wh3 ^= wh4;
    156  1.1.4.2  skrll 	g4 (wh3,wl3, wh3,wl3); wl2 ^= wl3 ^ 7; wh2 ^= wh3;
    157  1.1.4.2  skrll 	g8 (wh2,wl2, wh2,wl2); wl1 ^= wl2 ^ 8; wh1 ^= wh2;
    158  1.1.4.2  skrll 
    159  1.1.4.2  skrll 	/* second 8 rounds */
    160  1.1.4.2  skrll 	wh2 ^= wh1; wl2 ^= wl1 ^ 9 ; g2 (wh1,wl1, wh1,wl1);
    161  1.1.4.2  skrll 	wh1 ^= wh4; wl1 ^= wl4 ^ 10; g6 (wh4,wl4, wh4,wl4);
    162  1.1.4.2  skrll 	wh4 ^= wh3; wl4 ^= wl3 ^ 11; g0 (wh3,wl3, wh3,wl3);
    163  1.1.4.2  skrll 	wh3 ^= wh2; wl3 ^= wl2 ^ 12; g4 (wh2,wl2, wh2,wl2);
    164  1.1.4.2  skrll 	wh2 ^= wh1; wl2 ^= wl1 ^ 13; g8 (wh1,wl1, wh1,wl1);
    165  1.1.4.2  skrll 	wh1 ^= wh4; wl1 ^= wl4 ^ 14; g2 (wh4,wl4, wh4,wl4);
    166  1.1.4.2  skrll 	wh4 ^= wh3; wl4 ^= wl3 ^ 15; g6 (wh3,wl3, wh3,wl3);
    167  1.1.4.2  skrll 	wh3 ^= wh2; wl3 ^= wl2 ^ 16; g0 (wh2,wl2, wh2,wl2);
    168  1.1.4.2  skrll 
    169  1.1.4.2  skrll 	/* third 8 rounds */
    170  1.1.4.2  skrll 	g4 (wh1,wl1, wh1,wl1); wl4 ^= wl1 ^ 17; wh4 ^= wh1;
    171  1.1.4.2  skrll 	g8 (wh4,wl4, wh4,wl4); wl3 ^= wl4 ^ 18; wh3 ^= wh4;
    172  1.1.4.2  skrll 	g2 (wh3,wl3, wh3,wl3); wl2 ^= wl3 ^ 19; wh2 ^= wh3;
    173  1.1.4.2  skrll 	g6 (wh2,wl2, wh2,wl2); wl1 ^= wl2 ^ 20; wh1 ^= wh2;
    174  1.1.4.2  skrll 	g0 (wh1,wl1, wh1,wl1); wl4 ^= wl1 ^ 21; wh4 ^= wh1;
    175  1.1.4.2  skrll 	g4 (wh4,wl4, wh4,wl4); wl3 ^= wl4 ^ 22; wh3 ^= wh4;
    176  1.1.4.2  skrll 	g8 (wh3,wl3, wh3,wl3); wl2 ^= wl3 ^ 23; wh2 ^= wh3;
    177  1.1.4.2  skrll 	g2 (wh2,wl2, wh2,wl2); wl1 ^= wl2 ^ 24; wh1 ^= wh2;
    178  1.1.4.2  skrll 
    179  1.1.4.2  skrll 	/* last 8 rounds */
    180  1.1.4.2  skrll 	wh2 ^= wh1; wl2 ^= wl1 ^ 25; g6 (wh1,wl1, wh1,wl1);
    181  1.1.4.2  skrll 	wh1 ^= wh4; wl1 ^= wl4 ^ 26; g0 (wh4,wl4, wh4,wl4);
    182  1.1.4.2  skrll 	wh4 ^= wh3; wl4 ^= wl3 ^ 27; g4 (wh3,wl3, wh3,wl3);
    183  1.1.4.2  skrll 	wh3 ^= wh2; wl3 ^= wl2 ^ 28; g8 (wh2,wl2, wh2,wl2);
    184  1.1.4.2  skrll 	wh2 ^= wh1; wl2 ^= wl1 ^ 29; g2 (wh1,wl1, wh1,wl1);
    185  1.1.4.2  skrll 	wh1 ^= wh4; wl1 ^= wl4 ^ 30; g6 (wh4,wl4, wh4,wl4);
    186  1.1.4.2  skrll 	wh4 ^= wh3; wl4 ^= wl3 ^ 31; g0 (wh3,wl3, wh3,wl3);
    187  1.1.4.2  skrll 	wh3 ^= wh2; wl3 ^= wl2 ^ 32; g4 (wh2,wl2, wh2,wl2);
    188  1.1.4.2  skrll 
    189  1.1.4.2  skrll 	/* pack into byte vector */
    190  1.1.4.2  skrll 	cipher [0] = wh1;  cipher [1] = wl1;
    191  1.1.4.2  skrll 	cipher [2] = wh2;  cipher [3] = wl2;
    192  1.1.4.2  skrll 	cipher [4] = wh3;  cipher [5] = wl3;
    193  1.1.4.2  skrll 	cipher [6] = wh4;  cipher [7] = wl4;
    194  1.1.4.2  skrll }
    195  1.1.4.2  skrll 
    196  1.1.4.2  skrll 
    197  1.1.4.2  skrll void
    198  1.1.4.2  skrll skipjack_backwards (u_int8_t *cipher, u_int8_t *plain, u_int8_t **key_tables)
    199  1.1.4.2  skrll {
    200  1.1.4.2  skrll 	/* setup 4 16-bit portions */
    201  1.1.4.2  skrll 	u_int8_t wh1 = cipher[0];  u_int8_t wl1 = cipher[1];
    202  1.1.4.2  skrll 	u_int8_t wh2 = cipher[2];  u_int8_t wl2 = cipher[3];
    203  1.1.4.2  skrll 	u_int8_t wh3 = cipher[4];  u_int8_t wl3 = cipher[5];
    204  1.1.4.2  skrll 	u_int8_t wh4 = cipher[6];  u_int8_t wl4 = cipher[7];
    205  1.1.4.2  skrll 
    206  1.1.4.2  skrll 	u_int8_t * k0 = key_tables [0];
    207  1.1.4.2  skrll 	u_int8_t * k1 = key_tables [1];
    208  1.1.4.2  skrll 	u_int8_t * k2 = key_tables [2];
    209  1.1.4.2  skrll 	u_int8_t * k3 = key_tables [3];
    210  1.1.4.2  skrll 	u_int8_t * k4 = key_tables [4];
    211  1.1.4.2  skrll 	u_int8_t * k5 = key_tables [5];
    212  1.1.4.2  skrll 	u_int8_t * k6 = key_tables [6];
    213  1.1.4.2  skrll 	u_int8_t * k7 = key_tables [7];
    214  1.1.4.2  skrll 	u_int8_t * k8 = key_tables [8];
    215  1.1.4.2  skrll 	u_int8_t * k9 = key_tables [9];
    216  1.1.4.2  skrll 
    217  1.1.4.2  skrll 	/* first 8 rounds */
    218  1.1.4.2  skrll 	g4_inv (wh2,wl2, wh2,wl2); wl3 ^= wl2 ^ 32; wh3 ^= wh2;
    219  1.1.4.2  skrll 	g0_inv (wh3,wl3, wh3,wl3); wl4 ^= wl3 ^ 31; wh4 ^= wh3;
    220  1.1.4.2  skrll 	g6_inv (wh4,wl4, wh4,wl4); wl1 ^= wl4 ^ 30; wh1 ^= wh4;
    221  1.1.4.2  skrll 	g2_inv (wh1,wl1, wh1,wl1); wl2 ^= wl1 ^ 29; wh2 ^= wh1;
    222  1.1.4.2  skrll 	g8_inv (wh2,wl2, wh2,wl2); wl3 ^= wl2 ^ 28; wh3 ^= wh2;
    223  1.1.4.2  skrll 	g4_inv (wh3,wl3, wh3,wl3); wl4 ^= wl3 ^ 27; wh4 ^= wh3;
    224  1.1.4.2  skrll 	g0_inv (wh4,wl4, wh4,wl4); wl1 ^= wl4 ^ 26; wh1 ^= wh4;
    225  1.1.4.2  skrll 	g6_inv (wh1,wl1, wh1,wl1); wl2 ^= wl1 ^ 25; wh2 ^= wh1;
    226  1.1.4.2  skrll 
    227  1.1.4.2  skrll 	/* second 8 rounds */
    228  1.1.4.2  skrll 	wh1 ^= wh2; wl1 ^= wl2 ^ 24; g2_inv (wh2,wl2, wh2,wl2);
    229  1.1.4.2  skrll 	wh2 ^= wh3; wl2 ^= wl3 ^ 23; g8_inv (wh3,wl3, wh3,wl3);
    230  1.1.4.2  skrll 	wh3 ^= wh4; wl3 ^= wl4 ^ 22; g4_inv (wh4,wl4, wh4,wl4);
    231  1.1.4.2  skrll 	wh4 ^= wh1; wl4 ^= wl1 ^ 21; g0_inv (wh1,wl1, wh1,wl1);
    232  1.1.4.2  skrll 	wh1 ^= wh2; wl1 ^= wl2 ^ 20; g6_inv (wh2,wl2, wh2,wl2);
    233  1.1.4.2  skrll 	wh2 ^= wh3; wl2 ^= wl3 ^ 19; g2_inv (wh3,wl3, wh3,wl3);
    234  1.1.4.2  skrll 	wh3 ^= wh4; wl3 ^= wl4 ^ 18; g8_inv (wh4,wl4, wh4,wl4);
    235  1.1.4.2  skrll 	wh4 ^= wh1; wl4 ^= wl1 ^ 17; g4_inv (wh1,wl1, wh1,wl1);
    236  1.1.4.2  skrll 
    237  1.1.4.2  skrll 	/* third 8 rounds */
    238  1.1.4.2  skrll 	g0_inv (wh2,wl2, wh2,wl2); wl3 ^= wl2 ^ 16; wh3 ^= wh2;
    239  1.1.4.2  skrll 	g6_inv (wh3,wl3, wh3,wl3); wl4 ^= wl3 ^ 15; wh4 ^= wh3;
    240  1.1.4.2  skrll 	g2_inv (wh4,wl4, wh4,wl4); wl1 ^= wl4 ^ 14; wh1 ^= wh4;
    241  1.1.4.2  skrll 	g8_inv (wh1,wl1, wh1,wl1); wl2 ^= wl1 ^ 13; wh2 ^= wh1;
    242  1.1.4.2  skrll 	g4_inv (wh2,wl2, wh2,wl2); wl3 ^= wl2 ^ 12; wh3 ^= wh2;
    243  1.1.4.2  skrll 	g0_inv (wh3,wl3, wh3,wl3); wl4 ^= wl3 ^ 11; wh4 ^= wh3;
    244  1.1.4.2  skrll 	g6_inv (wh4,wl4, wh4,wl4); wl1 ^= wl4 ^ 10; wh1 ^= wh4;
    245  1.1.4.2  skrll 	g2_inv (wh1,wl1, wh1,wl1); wl2 ^= wl1 ^ 9;  wh2 ^= wh1;
    246  1.1.4.2  skrll 
    247  1.1.4.2  skrll 	/* last 8 rounds */
    248  1.1.4.2  skrll 	wh1 ^= wh2; wl1 ^= wl2 ^ 8; g8_inv (wh2,wl2, wh2,wl2);
    249  1.1.4.2  skrll 	wh2 ^= wh3; wl2 ^= wl3 ^ 7; g4_inv (wh3,wl3, wh3,wl3);
    250  1.1.4.2  skrll 	wh3 ^= wh4; wl3 ^= wl4 ^ 6; g0_inv (wh4,wl4, wh4,wl4);
    251  1.1.4.2  skrll 	wh4 ^= wh1; wl4 ^= wl1 ^ 5; g6_inv (wh1,wl1, wh1,wl1);
    252  1.1.4.2  skrll 	wh1 ^= wh2; wl1 ^= wl2 ^ 4; g2_inv (wh2,wl2, wh2,wl2);
    253  1.1.4.2  skrll 	wh2 ^= wh3; wl2 ^= wl3 ^ 3; g8_inv (wh3,wl3, wh3,wl3);
    254  1.1.4.2  skrll 	wh3 ^= wh4; wl3 ^= wl4 ^ 2; g4_inv (wh4,wl4, wh4,wl4);
    255  1.1.4.2  skrll 	wh4 ^= wh1; wl4 ^= wl1 ^ 1; g0_inv (wh1,wl1, wh1,wl1);
    256  1.1.4.2  skrll 
    257  1.1.4.2  skrll 	/* pack into byte vector */
    258  1.1.4.2  skrll 	plain [0] = wh1;  plain [1] = wl1;
    259  1.1.4.2  skrll 	plain [2] = wh2;  plain [3] = wl2;
    260  1.1.4.2  skrll 	plain [4] = wh3;  plain [5] = wl3;
    261  1.1.4.2  skrll 	plain [6] = wh4;  plain [7] = wl4;
    262  1.1.4.2  skrll }
    263