skipjack.c revision 1.1.4.4 1 1.1.4.4 skrll /* $NetBSD: skipjack.c,v 1.1.4.4 2004/09/21 13:26:23 skrll Exp $ */
2 1.1.4.2 skrll /* $OpenBSD: skipjack.c,v 1.3 2001/05/05 00:31:34 angelos Exp $ */
3 1.1.4.2 skrll
4 1.1.4.2 skrll /*
5 1.1.4.2 skrll * Further optimized test implementation of SKIPJACK algorithm
6 1.1.4.2 skrll * Mark Tillotson <markt (at) chaos.org.uk>, 25 June 98
7 1.1.4.2 skrll * Optimizations suit RISC (lots of registers) machine best.
8 1.1.4.2 skrll *
9 1.1.4.2 skrll * based on unoptimized implementation of
10 1.1.4.2 skrll * Panu Rissanen <bande (at) lut.fi> 960624
11 1.1.4.2 skrll *
12 1.1.4.2 skrll * SKIPJACK and KEA Algorithm Specifications
13 1.1.4.2 skrll * Version 2.0
14 1.1.4.2 skrll * 29 May 1998
15 1.1.4.2 skrll */
16 1.1.4.2 skrll
17 1.1.4.2 skrll #include <sys/cdefs.h>
18 1.1.4.4 skrll __KERNEL_RCSID(0, "$NetBSD: skipjack.c,v 1.1.4.4 2004/09/21 13:26:23 skrll Exp $");
19 1.1.4.2 skrll
20 1.1.4.2 skrll #include <sys/param.h>
21 1.1.4.2 skrll #include <crypto/skipjack/skipjack.h>
22 1.1.4.2 skrll #include <sys/malloc.h>
23 1.1.4.2 skrll #include <opencrypto/cryptodev.h>
24 1.1.4.2 skrll
25 1.1.4.2 skrll static const u_int8_t ftable[0x100] =
26 1.1.4.2 skrll {
27 1.1.4.2 skrll 0xa3, 0xd7, 0x09, 0x83, 0xf8, 0x48, 0xf6, 0xf4,
28 1.1.4.2 skrll 0xb3, 0x21, 0x15, 0x78, 0x99, 0xb1, 0xaf, 0xf9,
29 1.1.4.2 skrll 0xe7, 0x2d, 0x4d, 0x8a, 0xce, 0x4c, 0xca, 0x2e,
30 1.1.4.2 skrll 0x52, 0x95, 0xd9, 0x1e, 0x4e, 0x38, 0x44, 0x28,
31 1.1.4.2 skrll 0x0a, 0xdf, 0x02, 0xa0, 0x17, 0xf1, 0x60, 0x68,
32 1.1.4.2 skrll 0x12, 0xb7, 0x7a, 0xc3, 0xe9, 0xfa, 0x3d, 0x53,
33 1.1.4.2 skrll 0x96, 0x84, 0x6b, 0xba, 0xf2, 0x63, 0x9a, 0x19,
34 1.1.4.2 skrll 0x7c, 0xae, 0xe5, 0xf5, 0xf7, 0x16, 0x6a, 0xa2,
35 1.1.4.2 skrll 0x39, 0xb6, 0x7b, 0x0f, 0xc1, 0x93, 0x81, 0x1b,
36 1.1.4.2 skrll 0xee, 0xb4, 0x1a, 0xea, 0xd0, 0x91, 0x2f, 0xb8,
37 1.1.4.2 skrll 0x55, 0xb9, 0xda, 0x85, 0x3f, 0x41, 0xbf, 0xe0,
38 1.1.4.2 skrll 0x5a, 0x58, 0x80, 0x5f, 0x66, 0x0b, 0xd8, 0x90,
39 1.1.4.2 skrll 0x35, 0xd5, 0xc0, 0xa7, 0x33, 0x06, 0x65, 0x69,
40 1.1.4.2 skrll 0x45, 0x00, 0x94, 0x56, 0x6d, 0x98, 0x9b, 0x76,
41 1.1.4.2 skrll 0x97, 0xfc, 0xb2, 0xc2, 0xb0, 0xfe, 0xdb, 0x20,
42 1.1.4.2 skrll 0xe1, 0xeb, 0xd6, 0xe4, 0xdd, 0x47, 0x4a, 0x1d,
43 1.1.4.2 skrll 0x42, 0xed, 0x9e, 0x6e, 0x49, 0x3c, 0xcd, 0x43,
44 1.1.4.2 skrll 0x27, 0xd2, 0x07, 0xd4, 0xde, 0xc7, 0x67, 0x18,
45 1.1.4.2 skrll 0x89, 0xcb, 0x30, 0x1f, 0x8d, 0xc6, 0x8f, 0xaa,
46 1.1.4.2 skrll 0xc8, 0x74, 0xdc, 0xc9, 0x5d, 0x5c, 0x31, 0xa4,
47 1.1.4.2 skrll 0x70, 0x88, 0x61, 0x2c, 0x9f, 0x0d, 0x2b, 0x87,
48 1.1.4.2 skrll 0x50, 0x82, 0x54, 0x64, 0x26, 0x7d, 0x03, 0x40,
49 1.1.4.2 skrll 0x34, 0x4b, 0x1c, 0x73, 0xd1, 0xc4, 0xfd, 0x3b,
50 1.1.4.2 skrll 0xcc, 0xfb, 0x7f, 0xab, 0xe6, 0x3e, 0x5b, 0xa5,
51 1.1.4.2 skrll 0xad, 0x04, 0x23, 0x9c, 0x14, 0x51, 0x22, 0xf0,
52 1.1.4.2 skrll 0x29, 0x79, 0x71, 0x7e, 0xff, 0x8c, 0x0e, 0xe2,
53 1.1.4.2 skrll 0x0c, 0xef, 0xbc, 0x72, 0x75, 0x6f, 0x37, 0xa1,
54 1.1.4.2 skrll 0xec, 0xd3, 0x8e, 0x62, 0x8b, 0x86, 0x10, 0xe8,
55 1.1.4.2 skrll 0x08, 0x77, 0x11, 0xbe, 0x92, 0x4f, 0x24, 0xc5,
56 1.1.4.2 skrll 0x32, 0x36, 0x9d, 0xcf, 0xf3, 0xa6, 0xbb, 0xac,
57 1.1.4.2 skrll 0x5e, 0x6c, 0xa9, 0x13, 0x57, 0x25, 0xb5, 0xe3,
58 1.1.4.2 skrll 0xbd, 0xa8, 0x3a, 0x01, 0x05, 0x59, 0x2a, 0x46
59 1.1.4.2 skrll };
60 1.1.4.2 skrll
61 1.1.4.2 skrll /*
62 1.1.4.2 skrll * For each key byte generate a table to represent the function
63 1.1.4.2 skrll * ftable [in ^ keybyte]
64 1.1.4.2 skrll *
65 1.1.4.2 skrll * These tables used to save an XOR in each stage of the G-function
66 1.1.4.2 skrll * the tables are hopefully pointed to by register allocated variables
67 1.1.4.2 skrll * k0, k1..k9
68 1.1.4.2 skrll */
69 1.1.4.2 skrll void
70 1.1.4.2 skrll subkey_table_gen (const u_int8_t *key, u_int8_t **key_tables)
71 1.1.4.2 skrll {
72 1.1.4.2 skrll int i, k;
73 1.1.4.2 skrll
74 1.1.4.2 skrll for (k = 0; k < 10; k++) {
75 1.1.4.2 skrll u_int8_t key_byte = key [k];
76 1.1.4.2 skrll u_int8_t * table = key_tables[k];
77 1.1.4.2 skrll for (i = 0; i < 0x100; i++)
78 1.1.4.2 skrll table [i] = ftable [i ^ key_byte];
79 1.1.4.2 skrll }
80 1.1.4.2 skrll }
81 1.1.4.2 skrll
82 1.1.4.2 skrll
83 1.1.4.2 skrll #define g(k0, k1, k2, k3, ih, il, oh, ol) \
84 1.1.4.2 skrll { \
85 1.1.4.2 skrll oh = k##k0 [il] ^ ih; \
86 1.1.4.2 skrll ol = k##k1 [oh] ^ il; \
87 1.1.4.2 skrll oh = k##k2 [ol] ^ oh; \
88 1.1.4.2 skrll ol = k##k3 [oh] ^ ol; \
89 1.1.4.2 skrll }
90 1.1.4.2 skrll
91 1.1.4.2 skrll #define g0(ih, il, oh, ol) g(0, 1, 2, 3, ih, il, oh, ol)
92 1.1.4.2 skrll #define g4(ih, il, oh, ol) g(4, 5, 6, 7, ih, il, oh, ol)
93 1.1.4.2 skrll #define g8(ih, il, oh, ol) g(8, 9, 0, 1, ih, il, oh, ol)
94 1.1.4.2 skrll #define g2(ih, il, oh, ol) g(2, 3, 4, 5, ih, il, oh, ol)
95 1.1.4.2 skrll #define g6(ih, il, oh, ol) g(6, 7, 8, 9, ih, il, oh, ol)
96 1.1.4.2 skrll
97 1.1.4.2 skrll
98 1.1.4.2 skrll #define g_inv(k0, k1, k2, k3, ih, il, oh, ol) \
99 1.1.4.2 skrll { \
100 1.1.4.2 skrll ol = k##k3 [ih] ^ il; \
101 1.1.4.2 skrll oh = k##k2 [ol] ^ ih; \
102 1.1.4.2 skrll ol = k##k1 [oh] ^ ol; \
103 1.1.4.2 skrll oh = k##k0 [ol] ^ oh; \
104 1.1.4.2 skrll }
105 1.1.4.2 skrll
106 1.1.4.2 skrll
107 1.1.4.2 skrll #define g0_inv(ih, il, oh, ol) g_inv(0, 1, 2, 3, ih, il, oh, ol)
108 1.1.4.2 skrll #define g4_inv(ih, il, oh, ol) g_inv(4, 5, 6, 7, ih, il, oh, ol)
109 1.1.4.2 skrll #define g8_inv(ih, il, oh, ol) g_inv(8, 9, 0, 1, ih, il, oh, ol)
110 1.1.4.2 skrll #define g2_inv(ih, il, oh, ol) g_inv(2, 3, 4, 5, ih, il, oh, ol)
111 1.1.4.2 skrll #define g6_inv(ih, il, oh, ol) g_inv(6, 7, 8, 9, ih, il, oh, ol)
112 1.1.4.2 skrll
113 1.1.4.2 skrll /* optimized version of Skipjack algorithm
114 1.1.4.2 skrll *
115 1.1.4.2 skrll * the appropriate g-function is inlined for each round
116 1.1.4.2 skrll *
117 1.1.4.2 skrll * the data movement is minimized by rotating the names of the
118 1.1.4.2 skrll * variables w1..w4, not their contents (saves 3 moves per round)
119 1.1.4.2 skrll *
120 1.1.4.2 skrll * the loops are completely unrolled (needed to staticize choice of g)
121 1.1.4.2 skrll *
122 1.1.4.2 skrll * compiles to about 470 instructions on a Sparc (gcc -O)
123 1.1.4.2 skrll * which is about 58 instructions per byte, 14 per round.
124 1.1.4.2 skrll * gcc seems to leave in some unnecessary and with 0xFF operations
125 1.1.4.2 skrll * but only in the latter part of the functions. Perhaps it
126 1.1.4.2 skrll * runs out of resources to properly optimize long inlined function?
127 1.1.4.2 skrll * in theory should get about 11 instructions per round, not 14
128 1.1.4.2 skrll */
129 1.1.4.2 skrll
130 1.1.4.2 skrll void
131 1.1.4.2 skrll skipjack_forwards(u_int8_t *plain, u_int8_t *cipher, u_int8_t **key_tables)
132 1.1.4.2 skrll {
133 1.1.4.2 skrll u_int8_t wh1 = plain[0]; u_int8_t wl1 = plain[1];
134 1.1.4.2 skrll u_int8_t wh2 = plain[2]; u_int8_t wl2 = plain[3];
135 1.1.4.2 skrll u_int8_t wh3 = plain[4]; u_int8_t wl3 = plain[5];
136 1.1.4.2 skrll u_int8_t wh4 = plain[6]; u_int8_t wl4 = plain[7];
137 1.1.4.2 skrll
138 1.1.4.2 skrll u_int8_t * k0 = key_tables [0];
139 1.1.4.2 skrll u_int8_t * k1 = key_tables [1];
140 1.1.4.2 skrll u_int8_t * k2 = key_tables [2];
141 1.1.4.2 skrll u_int8_t * k3 = key_tables [3];
142 1.1.4.2 skrll u_int8_t * k4 = key_tables [4];
143 1.1.4.2 skrll u_int8_t * k5 = key_tables [5];
144 1.1.4.2 skrll u_int8_t * k6 = key_tables [6];
145 1.1.4.2 skrll u_int8_t * k7 = key_tables [7];
146 1.1.4.2 skrll u_int8_t * k8 = key_tables [8];
147 1.1.4.2 skrll u_int8_t * k9 = key_tables [9];
148 1.1.4.2 skrll
149 1.1.4.2 skrll /* first 8 rounds */
150 1.1.4.2 skrll g0 (wh1,wl1, wh1,wl1); wl4 ^= wl1 ^ 1; wh4 ^= wh1;
151 1.1.4.2 skrll g4 (wh4,wl4, wh4,wl4); wl3 ^= wl4 ^ 2; wh3 ^= wh4;
152 1.1.4.2 skrll g8 (wh3,wl3, wh3,wl3); wl2 ^= wl3 ^ 3; wh2 ^= wh3;
153 1.1.4.2 skrll g2 (wh2,wl2, wh2,wl2); wl1 ^= wl2 ^ 4; wh1 ^= wh2;
154 1.1.4.2 skrll g6 (wh1,wl1, wh1,wl1); wl4 ^= wl1 ^ 5; wh4 ^= wh1;
155 1.1.4.2 skrll g0 (wh4,wl4, wh4,wl4); wl3 ^= wl4 ^ 6; wh3 ^= wh4;
156 1.1.4.2 skrll g4 (wh3,wl3, wh3,wl3); wl2 ^= wl3 ^ 7; wh2 ^= wh3;
157 1.1.4.2 skrll g8 (wh2,wl2, wh2,wl2); wl1 ^= wl2 ^ 8; wh1 ^= wh2;
158 1.1.4.2 skrll
159 1.1.4.2 skrll /* second 8 rounds */
160 1.1.4.2 skrll wh2 ^= wh1; wl2 ^= wl1 ^ 9 ; g2 (wh1,wl1, wh1,wl1);
161 1.1.4.2 skrll wh1 ^= wh4; wl1 ^= wl4 ^ 10; g6 (wh4,wl4, wh4,wl4);
162 1.1.4.2 skrll wh4 ^= wh3; wl4 ^= wl3 ^ 11; g0 (wh3,wl3, wh3,wl3);
163 1.1.4.2 skrll wh3 ^= wh2; wl3 ^= wl2 ^ 12; g4 (wh2,wl2, wh2,wl2);
164 1.1.4.2 skrll wh2 ^= wh1; wl2 ^= wl1 ^ 13; g8 (wh1,wl1, wh1,wl1);
165 1.1.4.2 skrll wh1 ^= wh4; wl1 ^= wl4 ^ 14; g2 (wh4,wl4, wh4,wl4);
166 1.1.4.2 skrll wh4 ^= wh3; wl4 ^= wl3 ^ 15; g6 (wh3,wl3, wh3,wl3);
167 1.1.4.2 skrll wh3 ^= wh2; wl3 ^= wl2 ^ 16; g0 (wh2,wl2, wh2,wl2);
168 1.1.4.2 skrll
169 1.1.4.2 skrll /* third 8 rounds */
170 1.1.4.2 skrll g4 (wh1,wl1, wh1,wl1); wl4 ^= wl1 ^ 17; wh4 ^= wh1;
171 1.1.4.2 skrll g8 (wh4,wl4, wh4,wl4); wl3 ^= wl4 ^ 18; wh3 ^= wh4;
172 1.1.4.2 skrll g2 (wh3,wl3, wh3,wl3); wl2 ^= wl3 ^ 19; wh2 ^= wh3;
173 1.1.4.2 skrll g6 (wh2,wl2, wh2,wl2); wl1 ^= wl2 ^ 20; wh1 ^= wh2;
174 1.1.4.2 skrll g0 (wh1,wl1, wh1,wl1); wl4 ^= wl1 ^ 21; wh4 ^= wh1;
175 1.1.4.2 skrll g4 (wh4,wl4, wh4,wl4); wl3 ^= wl4 ^ 22; wh3 ^= wh4;
176 1.1.4.2 skrll g8 (wh3,wl3, wh3,wl3); wl2 ^= wl3 ^ 23; wh2 ^= wh3;
177 1.1.4.2 skrll g2 (wh2,wl2, wh2,wl2); wl1 ^= wl2 ^ 24; wh1 ^= wh2;
178 1.1.4.2 skrll
179 1.1.4.2 skrll /* last 8 rounds */
180 1.1.4.2 skrll wh2 ^= wh1; wl2 ^= wl1 ^ 25; g6 (wh1,wl1, wh1,wl1);
181 1.1.4.2 skrll wh1 ^= wh4; wl1 ^= wl4 ^ 26; g0 (wh4,wl4, wh4,wl4);
182 1.1.4.2 skrll wh4 ^= wh3; wl4 ^= wl3 ^ 27; g4 (wh3,wl3, wh3,wl3);
183 1.1.4.2 skrll wh3 ^= wh2; wl3 ^= wl2 ^ 28; g8 (wh2,wl2, wh2,wl2);
184 1.1.4.2 skrll wh2 ^= wh1; wl2 ^= wl1 ^ 29; g2 (wh1,wl1, wh1,wl1);
185 1.1.4.2 skrll wh1 ^= wh4; wl1 ^= wl4 ^ 30; g6 (wh4,wl4, wh4,wl4);
186 1.1.4.2 skrll wh4 ^= wh3; wl4 ^= wl3 ^ 31; g0 (wh3,wl3, wh3,wl3);
187 1.1.4.2 skrll wh3 ^= wh2; wl3 ^= wl2 ^ 32; g4 (wh2,wl2, wh2,wl2);
188 1.1.4.2 skrll
189 1.1.4.2 skrll /* pack into byte vector */
190 1.1.4.2 skrll cipher [0] = wh1; cipher [1] = wl1;
191 1.1.4.2 skrll cipher [2] = wh2; cipher [3] = wl2;
192 1.1.4.2 skrll cipher [4] = wh3; cipher [5] = wl3;
193 1.1.4.2 skrll cipher [6] = wh4; cipher [7] = wl4;
194 1.1.4.2 skrll }
195 1.1.4.2 skrll
196 1.1.4.2 skrll
197 1.1.4.2 skrll void
198 1.1.4.2 skrll skipjack_backwards (u_int8_t *cipher, u_int8_t *plain, u_int8_t **key_tables)
199 1.1.4.2 skrll {
200 1.1.4.2 skrll /* setup 4 16-bit portions */
201 1.1.4.2 skrll u_int8_t wh1 = cipher[0]; u_int8_t wl1 = cipher[1];
202 1.1.4.2 skrll u_int8_t wh2 = cipher[2]; u_int8_t wl2 = cipher[3];
203 1.1.4.2 skrll u_int8_t wh3 = cipher[4]; u_int8_t wl3 = cipher[5];
204 1.1.4.2 skrll u_int8_t wh4 = cipher[6]; u_int8_t wl4 = cipher[7];
205 1.1.4.2 skrll
206 1.1.4.2 skrll u_int8_t * k0 = key_tables [0];
207 1.1.4.2 skrll u_int8_t * k1 = key_tables [1];
208 1.1.4.2 skrll u_int8_t * k2 = key_tables [2];
209 1.1.4.2 skrll u_int8_t * k3 = key_tables [3];
210 1.1.4.2 skrll u_int8_t * k4 = key_tables [4];
211 1.1.4.2 skrll u_int8_t * k5 = key_tables [5];
212 1.1.4.2 skrll u_int8_t * k6 = key_tables [6];
213 1.1.4.2 skrll u_int8_t * k7 = key_tables [7];
214 1.1.4.2 skrll u_int8_t * k8 = key_tables [8];
215 1.1.4.2 skrll u_int8_t * k9 = key_tables [9];
216 1.1.4.2 skrll
217 1.1.4.2 skrll /* first 8 rounds */
218 1.1.4.2 skrll g4_inv (wh2,wl2, wh2,wl2); wl3 ^= wl2 ^ 32; wh3 ^= wh2;
219 1.1.4.2 skrll g0_inv (wh3,wl3, wh3,wl3); wl4 ^= wl3 ^ 31; wh4 ^= wh3;
220 1.1.4.2 skrll g6_inv (wh4,wl4, wh4,wl4); wl1 ^= wl4 ^ 30; wh1 ^= wh4;
221 1.1.4.2 skrll g2_inv (wh1,wl1, wh1,wl1); wl2 ^= wl1 ^ 29; wh2 ^= wh1;
222 1.1.4.2 skrll g8_inv (wh2,wl2, wh2,wl2); wl3 ^= wl2 ^ 28; wh3 ^= wh2;
223 1.1.4.2 skrll g4_inv (wh3,wl3, wh3,wl3); wl4 ^= wl3 ^ 27; wh4 ^= wh3;
224 1.1.4.2 skrll g0_inv (wh4,wl4, wh4,wl4); wl1 ^= wl4 ^ 26; wh1 ^= wh4;
225 1.1.4.2 skrll g6_inv (wh1,wl1, wh1,wl1); wl2 ^= wl1 ^ 25; wh2 ^= wh1;
226 1.1.4.2 skrll
227 1.1.4.2 skrll /* second 8 rounds */
228 1.1.4.2 skrll wh1 ^= wh2; wl1 ^= wl2 ^ 24; g2_inv (wh2,wl2, wh2,wl2);
229 1.1.4.2 skrll wh2 ^= wh3; wl2 ^= wl3 ^ 23; g8_inv (wh3,wl3, wh3,wl3);
230 1.1.4.2 skrll wh3 ^= wh4; wl3 ^= wl4 ^ 22; g4_inv (wh4,wl4, wh4,wl4);
231 1.1.4.2 skrll wh4 ^= wh1; wl4 ^= wl1 ^ 21; g0_inv (wh1,wl1, wh1,wl1);
232 1.1.4.2 skrll wh1 ^= wh2; wl1 ^= wl2 ^ 20; g6_inv (wh2,wl2, wh2,wl2);
233 1.1.4.2 skrll wh2 ^= wh3; wl2 ^= wl3 ^ 19; g2_inv (wh3,wl3, wh3,wl3);
234 1.1.4.2 skrll wh3 ^= wh4; wl3 ^= wl4 ^ 18; g8_inv (wh4,wl4, wh4,wl4);
235 1.1.4.2 skrll wh4 ^= wh1; wl4 ^= wl1 ^ 17; g4_inv (wh1,wl1, wh1,wl1);
236 1.1.4.2 skrll
237 1.1.4.2 skrll /* third 8 rounds */
238 1.1.4.2 skrll g0_inv (wh2,wl2, wh2,wl2); wl3 ^= wl2 ^ 16; wh3 ^= wh2;
239 1.1.4.2 skrll g6_inv (wh3,wl3, wh3,wl3); wl4 ^= wl3 ^ 15; wh4 ^= wh3;
240 1.1.4.2 skrll g2_inv (wh4,wl4, wh4,wl4); wl1 ^= wl4 ^ 14; wh1 ^= wh4;
241 1.1.4.2 skrll g8_inv (wh1,wl1, wh1,wl1); wl2 ^= wl1 ^ 13; wh2 ^= wh1;
242 1.1.4.2 skrll g4_inv (wh2,wl2, wh2,wl2); wl3 ^= wl2 ^ 12; wh3 ^= wh2;
243 1.1.4.2 skrll g0_inv (wh3,wl3, wh3,wl3); wl4 ^= wl3 ^ 11; wh4 ^= wh3;
244 1.1.4.2 skrll g6_inv (wh4,wl4, wh4,wl4); wl1 ^= wl4 ^ 10; wh1 ^= wh4;
245 1.1.4.2 skrll g2_inv (wh1,wl1, wh1,wl1); wl2 ^= wl1 ^ 9; wh2 ^= wh1;
246 1.1.4.2 skrll
247 1.1.4.2 skrll /* last 8 rounds */
248 1.1.4.2 skrll wh1 ^= wh2; wl1 ^= wl2 ^ 8; g8_inv (wh2,wl2, wh2,wl2);
249 1.1.4.2 skrll wh2 ^= wh3; wl2 ^= wl3 ^ 7; g4_inv (wh3,wl3, wh3,wl3);
250 1.1.4.2 skrll wh3 ^= wh4; wl3 ^= wl4 ^ 6; g0_inv (wh4,wl4, wh4,wl4);
251 1.1.4.2 skrll wh4 ^= wh1; wl4 ^= wl1 ^ 5; g6_inv (wh1,wl1, wh1,wl1);
252 1.1.4.2 skrll wh1 ^= wh2; wl1 ^= wl2 ^ 4; g2_inv (wh2,wl2, wh2,wl2);
253 1.1.4.2 skrll wh2 ^= wh3; wl2 ^= wl3 ^ 3; g8_inv (wh3,wl3, wh3,wl3);
254 1.1.4.2 skrll wh3 ^= wh4; wl3 ^= wl4 ^ 2; g4_inv (wh4,wl4, wh4,wl4);
255 1.1.4.2 skrll wh4 ^= wh1; wl4 ^= wl1 ^ 1; g0_inv (wh1,wl1, wh1,wl1);
256 1.1.4.2 skrll
257 1.1.4.2 skrll /* pack into byte vector */
258 1.1.4.2 skrll plain [0] = wh1; plain [1] = wl1;
259 1.1.4.2 skrll plain [2] = wh2; plain [3] = wl2;
260 1.1.4.2 skrll plain [4] = wh3; plain [5] = wl3;
261 1.1.4.2 skrll plain [6] = wh4; plain [7] = wl4;
262 1.1.4.2 skrll }
263