1 1.4 pgoyette /* $NetBSD: skipjack.c,v 1.4 2014/01/01 15:18:57 pgoyette Exp $ */ 2 1.1 tls /* $OpenBSD: skipjack.c,v 1.3 2001/05/05 00:31:34 angelos Exp $ */ 3 1.1 tls 4 1.2 perry /* 5 1.2 perry * Further optimized test implementation of SKIPJACK algorithm 6 1.1 tls * Mark Tillotson <markt (at) chaos.org.uk>, 25 June 98 7 1.1 tls * Optimizations suit RISC (lots of registers) machine best. 8 1.1 tls * 9 1.1 tls * based on unoptimized implementation of 10 1.1 tls * Panu Rissanen <bande (at) lut.fi> 960624 11 1.1 tls * 12 1.2 perry * SKIPJACK and KEA Algorithm Specifications 13 1.2 perry * Version 2.0 14 1.1 tls * 29 May 1998 15 1.1 tls */ 16 1.1 tls 17 1.1 tls #include <sys/cdefs.h> 18 1.4 pgoyette __KERNEL_RCSID(0, "$NetBSD: skipjack.c,v 1.4 2014/01/01 15:18:57 pgoyette Exp $"); 19 1.1 tls 20 1.4 pgoyette #include <sys/errno.h> 21 1.4 pgoyette #include <sys/malloc.h> 22 1.4 pgoyette #include <sys/module.h> 23 1.1 tls #include <sys/param.h> 24 1.4 pgoyette 25 1.1 tls #include <crypto/skipjack/skipjack.h> 26 1.1 tls #include <opencrypto/cryptodev.h> 27 1.1 tls 28 1.1 tls static const u_int8_t ftable[0x100] = 29 1.2 perry { 30 1.2 perry 0xa3, 0xd7, 0x09, 0x83, 0xf8, 0x48, 0xf6, 0xf4, 31 1.2 perry 0xb3, 0x21, 0x15, 0x78, 0x99, 0xb1, 0xaf, 0xf9, 32 1.2 perry 0xe7, 0x2d, 0x4d, 0x8a, 0xce, 0x4c, 0xca, 0x2e, 33 1.2 perry 0x52, 0x95, 0xd9, 0x1e, 0x4e, 0x38, 0x44, 0x28, 34 1.2 perry 0x0a, 0xdf, 0x02, 0xa0, 0x17, 0xf1, 0x60, 0x68, 35 1.2 perry 0x12, 0xb7, 0x7a, 0xc3, 0xe9, 0xfa, 0x3d, 0x53, 36 1.2 perry 0x96, 0x84, 0x6b, 0xba, 0xf2, 0x63, 0x9a, 0x19, 37 1.2 perry 0x7c, 0xae, 0xe5, 0xf5, 0xf7, 0x16, 0x6a, 0xa2, 38 1.2 perry 0x39, 0xb6, 0x7b, 0x0f, 0xc1, 0x93, 0x81, 0x1b, 39 1.2 perry 0xee, 0xb4, 0x1a, 0xea, 0xd0, 0x91, 0x2f, 0xb8, 40 1.2 perry 0x55, 0xb9, 0xda, 0x85, 0x3f, 0x41, 0xbf, 0xe0, 41 1.2 perry 0x5a, 0x58, 0x80, 0x5f, 0x66, 0x0b, 0xd8, 0x90, 42 1.2 perry 0x35, 0xd5, 0xc0, 0xa7, 0x33, 0x06, 0x65, 0x69, 43 1.2 perry 0x45, 0x00, 0x94, 0x56, 0x6d, 0x98, 0x9b, 0x76, 44 1.2 perry 0x97, 0xfc, 0xb2, 0xc2, 0xb0, 0xfe, 0xdb, 0x20, 45 1.2 perry 0xe1, 0xeb, 0xd6, 0xe4, 0xdd, 0x47, 0x4a, 0x1d, 46 1.2 perry 0x42, 0xed, 0x9e, 0x6e, 0x49, 0x3c, 0xcd, 0x43, 47 1.2 perry 0x27, 0xd2, 0x07, 0xd4, 0xde, 0xc7, 0x67, 0x18, 48 1.2 perry 0x89, 0xcb, 0x30, 0x1f, 0x8d, 0xc6, 0x8f, 0xaa, 49 1.2 perry 0xc8, 0x74, 0xdc, 0xc9, 0x5d, 0x5c, 0x31, 0xa4, 50 1.2 perry 0x70, 0x88, 0x61, 0x2c, 0x9f, 0x0d, 0x2b, 0x87, 51 1.2 perry 0x50, 0x82, 0x54, 0x64, 0x26, 0x7d, 0x03, 0x40, 52 1.2 perry 0x34, 0x4b, 0x1c, 0x73, 0xd1, 0xc4, 0xfd, 0x3b, 53 1.2 perry 0xcc, 0xfb, 0x7f, 0xab, 0xe6, 0x3e, 0x5b, 0xa5, 54 1.2 perry 0xad, 0x04, 0x23, 0x9c, 0x14, 0x51, 0x22, 0xf0, 55 1.2 perry 0x29, 0x79, 0x71, 0x7e, 0xff, 0x8c, 0x0e, 0xe2, 56 1.2 perry 0x0c, 0xef, 0xbc, 0x72, 0x75, 0x6f, 0x37, 0xa1, 57 1.2 perry 0xec, 0xd3, 0x8e, 0x62, 0x8b, 0x86, 0x10, 0xe8, 58 1.2 perry 0x08, 0x77, 0x11, 0xbe, 0x92, 0x4f, 0x24, 0xc5, 59 1.2 perry 0x32, 0x36, 0x9d, 0xcf, 0xf3, 0xa6, 0xbb, 0xac, 60 1.2 perry 0x5e, 0x6c, 0xa9, 0x13, 0x57, 0x25, 0xb5, 0xe3, 61 1.1 tls 0xbd, 0xa8, 0x3a, 0x01, 0x05, 0x59, 0x2a, 0x46 62 1.1 tls }; 63 1.1 tls 64 1.1 tls /* 65 1.2 perry * For each key byte generate a table to represent the function 66 1.1 tls * ftable [in ^ keybyte] 67 1.1 tls * 68 1.1 tls * These tables used to save an XOR in each stage of the G-function 69 1.1 tls * the tables are hopefully pointed to by register allocated variables 70 1.1 tls * k0, k1..k9 71 1.1 tls */ 72 1.1 tls void 73 1.1 tls subkey_table_gen (const u_int8_t *key, u_int8_t **key_tables) 74 1.1 tls { 75 1.1 tls int i, k; 76 1.1 tls 77 1.1 tls for (k = 0; k < 10; k++) { 78 1.1 tls u_int8_t key_byte = key [k]; 79 1.1 tls u_int8_t * table = key_tables[k]; 80 1.1 tls for (i = 0; i < 0x100; i++) 81 1.1 tls table [i] = ftable [i ^ key_byte]; 82 1.1 tls } 83 1.1 tls } 84 1.1 tls 85 1.1 tls 86 1.1 tls #define g(k0, k1, k2, k3, ih, il, oh, ol) \ 87 1.1 tls { \ 88 1.1 tls oh = k##k0 [il] ^ ih; \ 89 1.1 tls ol = k##k1 [oh] ^ il; \ 90 1.1 tls oh = k##k2 [ol] ^ oh; \ 91 1.1 tls ol = k##k3 [oh] ^ ol; \ 92 1.1 tls } 93 1.1 tls 94 1.1 tls #define g0(ih, il, oh, ol) g(0, 1, 2, 3, ih, il, oh, ol) 95 1.1 tls #define g4(ih, il, oh, ol) g(4, 5, 6, 7, ih, il, oh, ol) 96 1.1 tls #define g8(ih, il, oh, ol) g(8, 9, 0, 1, ih, il, oh, ol) 97 1.1 tls #define g2(ih, il, oh, ol) g(2, 3, 4, 5, ih, il, oh, ol) 98 1.1 tls #define g6(ih, il, oh, ol) g(6, 7, 8, 9, ih, il, oh, ol) 99 1.1 tls 100 1.2 perry 101 1.1 tls #define g_inv(k0, k1, k2, k3, ih, il, oh, ol) \ 102 1.1 tls { \ 103 1.1 tls ol = k##k3 [ih] ^ il; \ 104 1.1 tls oh = k##k2 [ol] ^ ih; \ 105 1.1 tls ol = k##k1 [oh] ^ ol; \ 106 1.1 tls oh = k##k0 [ol] ^ oh; \ 107 1.1 tls } 108 1.1 tls 109 1.1 tls 110 1.1 tls #define g0_inv(ih, il, oh, ol) g_inv(0, 1, 2, 3, ih, il, oh, ol) 111 1.1 tls #define g4_inv(ih, il, oh, ol) g_inv(4, 5, 6, 7, ih, il, oh, ol) 112 1.1 tls #define g8_inv(ih, il, oh, ol) g_inv(8, 9, 0, 1, ih, il, oh, ol) 113 1.1 tls #define g2_inv(ih, il, oh, ol) g_inv(2, 3, 4, 5, ih, il, oh, ol) 114 1.1 tls #define g6_inv(ih, il, oh, ol) g_inv(6, 7, 8, 9, ih, il, oh, ol) 115 1.1 tls 116 1.1 tls /* optimized version of Skipjack algorithm 117 1.1 tls * 118 1.1 tls * the appropriate g-function is inlined for each round 119 1.1 tls * 120 1.2 perry * the data movement is minimized by rotating the names of the 121 1.1 tls * variables w1..w4, not their contents (saves 3 moves per round) 122 1.1 tls * 123 1.1 tls * the loops are completely unrolled (needed to staticize choice of g) 124 1.1 tls * 125 1.1 tls * compiles to about 470 instructions on a Sparc (gcc -O) 126 1.1 tls * which is about 58 instructions per byte, 14 per round. 127 1.1 tls * gcc seems to leave in some unnecessary and with 0xFF operations 128 1.1 tls * but only in the latter part of the functions. Perhaps it 129 1.1 tls * runs out of resources to properly optimize long inlined function? 130 1.1 tls * in theory should get about 11 instructions per round, not 14 131 1.1 tls */ 132 1.1 tls 133 1.1 tls void 134 1.1 tls skipjack_forwards(u_int8_t *plain, u_int8_t *cipher, u_int8_t **key_tables) 135 1.1 tls { 136 1.1 tls u_int8_t wh1 = plain[0]; u_int8_t wl1 = plain[1]; 137 1.1 tls u_int8_t wh2 = plain[2]; u_int8_t wl2 = plain[3]; 138 1.1 tls u_int8_t wh3 = plain[4]; u_int8_t wl3 = plain[5]; 139 1.1 tls u_int8_t wh4 = plain[6]; u_int8_t wl4 = plain[7]; 140 1.1 tls 141 1.1 tls u_int8_t * k0 = key_tables [0]; 142 1.1 tls u_int8_t * k1 = key_tables [1]; 143 1.1 tls u_int8_t * k2 = key_tables [2]; 144 1.1 tls u_int8_t * k3 = key_tables [3]; 145 1.1 tls u_int8_t * k4 = key_tables [4]; 146 1.1 tls u_int8_t * k5 = key_tables [5]; 147 1.1 tls u_int8_t * k6 = key_tables [6]; 148 1.1 tls u_int8_t * k7 = key_tables [7]; 149 1.1 tls u_int8_t * k8 = key_tables [8]; 150 1.1 tls u_int8_t * k9 = key_tables [9]; 151 1.1 tls 152 1.1 tls /* first 8 rounds */ 153 1.1 tls g0 (wh1,wl1, wh1,wl1); wl4 ^= wl1 ^ 1; wh4 ^= wh1; 154 1.1 tls g4 (wh4,wl4, wh4,wl4); wl3 ^= wl4 ^ 2; wh3 ^= wh4; 155 1.1 tls g8 (wh3,wl3, wh3,wl3); wl2 ^= wl3 ^ 3; wh2 ^= wh3; 156 1.1 tls g2 (wh2,wl2, wh2,wl2); wl1 ^= wl2 ^ 4; wh1 ^= wh2; 157 1.1 tls g6 (wh1,wl1, wh1,wl1); wl4 ^= wl1 ^ 5; wh4 ^= wh1; 158 1.1 tls g0 (wh4,wl4, wh4,wl4); wl3 ^= wl4 ^ 6; wh3 ^= wh4; 159 1.1 tls g4 (wh3,wl3, wh3,wl3); wl2 ^= wl3 ^ 7; wh2 ^= wh3; 160 1.1 tls g8 (wh2,wl2, wh2,wl2); wl1 ^= wl2 ^ 8; wh1 ^= wh2; 161 1.1 tls 162 1.1 tls /* second 8 rounds */ 163 1.1 tls wh2 ^= wh1; wl2 ^= wl1 ^ 9 ; g2 (wh1,wl1, wh1,wl1); 164 1.1 tls wh1 ^= wh4; wl1 ^= wl4 ^ 10; g6 (wh4,wl4, wh4,wl4); 165 1.1 tls wh4 ^= wh3; wl4 ^= wl3 ^ 11; g0 (wh3,wl3, wh3,wl3); 166 1.1 tls wh3 ^= wh2; wl3 ^= wl2 ^ 12; g4 (wh2,wl2, wh2,wl2); 167 1.1 tls wh2 ^= wh1; wl2 ^= wl1 ^ 13; g8 (wh1,wl1, wh1,wl1); 168 1.1 tls wh1 ^= wh4; wl1 ^= wl4 ^ 14; g2 (wh4,wl4, wh4,wl4); 169 1.1 tls wh4 ^= wh3; wl4 ^= wl3 ^ 15; g6 (wh3,wl3, wh3,wl3); 170 1.1 tls wh3 ^= wh2; wl3 ^= wl2 ^ 16; g0 (wh2,wl2, wh2,wl2); 171 1.1 tls 172 1.1 tls /* third 8 rounds */ 173 1.1 tls g4 (wh1,wl1, wh1,wl1); wl4 ^= wl1 ^ 17; wh4 ^= wh1; 174 1.1 tls g8 (wh4,wl4, wh4,wl4); wl3 ^= wl4 ^ 18; wh3 ^= wh4; 175 1.1 tls g2 (wh3,wl3, wh3,wl3); wl2 ^= wl3 ^ 19; wh2 ^= wh3; 176 1.1 tls g6 (wh2,wl2, wh2,wl2); wl1 ^= wl2 ^ 20; wh1 ^= wh2; 177 1.1 tls g0 (wh1,wl1, wh1,wl1); wl4 ^= wl1 ^ 21; wh4 ^= wh1; 178 1.1 tls g4 (wh4,wl4, wh4,wl4); wl3 ^= wl4 ^ 22; wh3 ^= wh4; 179 1.1 tls g8 (wh3,wl3, wh3,wl3); wl2 ^= wl3 ^ 23; wh2 ^= wh3; 180 1.1 tls g2 (wh2,wl2, wh2,wl2); wl1 ^= wl2 ^ 24; wh1 ^= wh2; 181 1.1 tls 182 1.1 tls /* last 8 rounds */ 183 1.1 tls wh2 ^= wh1; wl2 ^= wl1 ^ 25; g6 (wh1,wl1, wh1,wl1); 184 1.1 tls wh1 ^= wh4; wl1 ^= wl4 ^ 26; g0 (wh4,wl4, wh4,wl4); 185 1.1 tls wh4 ^= wh3; wl4 ^= wl3 ^ 27; g4 (wh3,wl3, wh3,wl3); 186 1.1 tls wh3 ^= wh2; wl3 ^= wl2 ^ 28; g8 (wh2,wl2, wh2,wl2); 187 1.1 tls wh2 ^= wh1; wl2 ^= wl1 ^ 29; g2 (wh1,wl1, wh1,wl1); 188 1.1 tls wh1 ^= wh4; wl1 ^= wl4 ^ 30; g6 (wh4,wl4, wh4,wl4); 189 1.1 tls wh4 ^= wh3; wl4 ^= wl3 ^ 31; g0 (wh3,wl3, wh3,wl3); 190 1.1 tls wh3 ^= wh2; wl3 ^= wl2 ^ 32; g4 (wh2,wl2, wh2,wl2); 191 1.1 tls 192 1.1 tls /* pack into byte vector */ 193 1.1 tls cipher [0] = wh1; cipher [1] = wl1; 194 1.1 tls cipher [2] = wh2; cipher [3] = wl2; 195 1.1 tls cipher [4] = wh3; cipher [5] = wl3; 196 1.1 tls cipher [6] = wh4; cipher [7] = wl4; 197 1.1 tls } 198 1.1 tls 199 1.1 tls 200 1.1 tls void 201 1.1 tls skipjack_backwards (u_int8_t *cipher, u_int8_t *plain, u_int8_t **key_tables) 202 1.1 tls { 203 1.1 tls /* setup 4 16-bit portions */ 204 1.1 tls u_int8_t wh1 = cipher[0]; u_int8_t wl1 = cipher[1]; 205 1.1 tls u_int8_t wh2 = cipher[2]; u_int8_t wl2 = cipher[3]; 206 1.1 tls u_int8_t wh3 = cipher[4]; u_int8_t wl3 = cipher[5]; 207 1.1 tls u_int8_t wh4 = cipher[6]; u_int8_t wl4 = cipher[7]; 208 1.1 tls 209 1.1 tls u_int8_t * k0 = key_tables [0]; 210 1.1 tls u_int8_t * k1 = key_tables [1]; 211 1.1 tls u_int8_t * k2 = key_tables [2]; 212 1.1 tls u_int8_t * k3 = key_tables [3]; 213 1.1 tls u_int8_t * k4 = key_tables [4]; 214 1.1 tls u_int8_t * k5 = key_tables [5]; 215 1.1 tls u_int8_t * k6 = key_tables [6]; 216 1.1 tls u_int8_t * k7 = key_tables [7]; 217 1.1 tls u_int8_t * k8 = key_tables [8]; 218 1.1 tls u_int8_t * k9 = key_tables [9]; 219 1.1 tls 220 1.1 tls /* first 8 rounds */ 221 1.1 tls g4_inv (wh2,wl2, wh2,wl2); wl3 ^= wl2 ^ 32; wh3 ^= wh2; 222 1.1 tls g0_inv (wh3,wl3, wh3,wl3); wl4 ^= wl3 ^ 31; wh4 ^= wh3; 223 1.1 tls g6_inv (wh4,wl4, wh4,wl4); wl1 ^= wl4 ^ 30; wh1 ^= wh4; 224 1.1 tls g2_inv (wh1,wl1, wh1,wl1); wl2 ^= wl1 ^ 29; wh2 ^= wh1; 225 1.1 tls g8_inv (wh2,wl2, wh2,wl2); wl3 ^= wl2 ^ 28; wh3 ^= wh2; 226 1.1 tls g4_inv (wh3,wl3, wh3,wl3); wl4 ^= wl3 ^ 27; wh4 ^= wh3; 227 1.1 tls g0_inv (wh4,wl4, wh4,wl4); wl1 ^= wl4 ^ 26; wh1 ^= wh4; 228 1.1 tls g6_inv (wh1,wl1, wh1,wl1); wl2 ^= wl1 ^ 25; wh2 ^= wh1; 229 1.1 tls 230 1.1 tls /* second 8 rounds */ 231 1.1 tls wh1 ^= wh2; wl1 ^= wl2 ^ 24; g2_inv (wh2,wl2, wh2,wl2); 232 1.1 tls wh2 ^= wh3; wl2 ^= wl3 ^ 23; g8_inv (wh3,wl3, wh3,wl3); 233 1.1 tls wh3 ^= wh4; wl3 ^= wl4 ^ 22; g4_inv (wh4,wl4, wh4,wl4); 234 1.1 tls wh4 ^= wh1; wl4 ^= wl1 ^ 21; g0_inv (wh1,wl1, wh1,wl1); 235 1.1 tls wh1 ^= wh2; wl1 ^= wl2 ^ 20; g6_inv (wh2,wl2, wh2,wl2); 236 1.1 tls wh2 ^= wh3; wl2 ^= wl3 ^ 19; g2_inv (wh3,wl3, wh3,wl3); 237 1.1 tls wh3 ^= wh4; wl3 ^= wl4 ^ 18; g8_inv (wh4,wl4, wh4,wl4); 238 1.1 tls wh4 ^= wh1; wl4 ^= wl1 ^ 17; g4_inv (wh1,wl1, wh1,wl1); 239 1.1 tls 240 1.1 tls /* third 8 rounds */ 241 1.1 tls g0_inv (wh2,wl2, wh2,wl2); wl3 ^= wl2 ^ 16; wh3 ^= wh2; 242 1.1 tls g6_inv (wh3,wl3, wh3,wl3); wl4 ^= wl3 ^ 15; wh4 ^= wh3; 243 1.1 tls g2_inv (wh4,wl4, wh4,wl4); wl1 ^= wl4 ^ 14; wh1 ^= wh4; 244 1.1 tls g8_inv (wh1,wl1, wh1,wl1); wl2 ^= wl1 ^ 13; wh2 ^= wh1; 245 1.1 tls g4_inv (wh2,wl2, wh2,wl2); wl3 ^= wl2 ^ 12; wh3 ^= wh2; 246 1.1 tls g0_inv (wh3,wl3, wh3,wl3); wl4 ^= wl3 ^ 11; wh4 ^= wh3; 247 1.1 tls g6_inv (wh4,wl4, wh4,wl4); wl1 ^= wl4 ^ 10; wh1 ^= wh4; 248 1.1 tls g2_inv (wh1,wl1, wh1,wl1); wl2 ^= wl1 ^ 9; wh2 ^= wh1; 249 1.1 tls 250 1.1 tls /* last 8 rounds */ 251 1.1 tls wh1 ^= wh2; wl1 ^= wl2 ^ 8; g8_inv (wh2,wl2, wh2,wl2); 252 1.1 tls wh2 ^= wh3; wl2 ^= wl3 ^ 7; g4_inv (wh3,wl3, wh3,wl3); 253 1.1 tls wh3 ^= wh4; wl3 ^= wl4 ^ 6; g0_inv (wh4,wl4, wh4,wl4); 254 1.1 tls wh4 ^= wh1; wl4 ^= wl1 ^ 5; g6_inv (wh1,wl1, wh1,wl1); 255 1.1 tls wh1 ^= wh2; wl1 ^= wl2 ^ 4; g2_inv (wh2,wl2, wh2,wl2); 256 1.1 tls wh2 ^= wh3; wl2 ^= wl3 ^ 3; g8_inv (wh3,wl3, wh3,wl3); 257 1.1 tls wh3 ^= wh4; wl3 ^= wl4 ^ 2; g4_inv (wh4,wl4, wh4,wl4); 258 1.1 tls wh4 ^= wh1; wl4 ^= wl1 ^ 1; g0_inv (wh1,wl1, wh1,wl1); 259 1.1 tls 260 1.1 tls /* pack into byte vector */ 261 1.1 tls plain [0] = wh1; plain [1] = wl1; 262 1.1 tls plain [2] = wh2; plain [3] = wl2; 263 1.1 tls plain [4] = wh3; plain [5] = wl3; 264 1.1 tls plain [6] = wh4; plain [7] = wl4; 265 1.1 tls } 266 1.4 pgoyette 267 1.4 pgoyette MODULE(MODULE_CLASS_MISC, skipjack, NULL); 268 1.4 pgoyette 269 1.4 pgoyette static int 270 1.4 pgoyette skipjack_modcmd(modcmd_t cmd, void *opaque) 271 1.4 pgoyette { 272 1.4 pgoyette 273 1.4 pgoyette switch (cmd) { 274 1.4 pgoyette case MODULE_CMD_INIT: 275 1.4 pgoyette return 0; 276 1.4 pgoyette case MODULE_CMD_FINI: 277 1.4 pgoyette return 0; 278 1.4 pgoyette default: 279 1.4 pgoyette return ENOTTY; 280 1.4 pgoyette } 281 1.4 pgoyette } 282