umac.c revision 1.17 1 /* $NetBSD: umac.c,v 1.17 2019/01/27 02:08:33 pgoyette Exp $ */
2 /* $OpenBSD: umac.c,v 1.17 2018/04/10 00:10:49 djm Exp $ */
3
4 /* -----------------------------------------------------------------------
5 *
6 * umac.c -- C Implementation UMAC Message Authentication
7 *
8 * Version 0.93b of rfc4418.txt -- 2006 July 18
9 *
10 * For a full description of UMAC message authentication see the UMAC
11 * world-wide-web page at http://www.cs.ucdavis.edu/~rogaway/umac
12 * Please report bugs and suggestions to the UMAC webpage.
13 *
14 * Copyright (c) 1999-2006 Ted Krovetz
15 *
16 * Permission to use, copy, modify, and distribute this software and
17 * its documentation for any purpose and with or without fee, is hereby
18 * granted provided that the above copyright notice appears in all copies
19 * and in supporting documentation, and that the name of the copyright
20 * holder not be used in advertising or publicity pertaining to
21 * distribution of the software without specific, written prior permission.
22 *
23 * Comments should be directed to Ted Krovetz (tdk (at) acm.org)
24 *
25 * ---------------------------------------------------------------------- */
26
27 /* ////////////////////// IMPORTANT NOTES /////////////////////////////////
28 *
29 * 1) This version does not work properly on messages larger than 16MB
30 *
31 * 2) If you set the switch to use SSE2, then all data must be 16-byte
32 * aligned
33 *
34 * 3) When calling the function umac(), it is assumed that msg is in
35 * a writable buffer of length divisible by 32 bytes. The message itself
36 * does not have to fill the entire buffer, but bytes beyond msg may be
37 * zeroed.
38 *
39 * 4) Three free AES implementations are supported by this implementation of
40 * UMAC. Paulo Barreto's version is in the public domain and can be found
41 * at http://www.esat.kuleuven.ac.be/~rijmen/rijndael/ (search for
42 * "Barreto"). The only two files needed are rijndael-alg-fst.c and
43 * rijndael-alg-fst.h. Brian Gladman's version is distributed with the GNU
44 * Public lisence at http://fp.gladman.plus.com/AES/index.htm. It
45 * includes a fast IA-32 assembly version. The OpenSSL crypo library is
46 * the third.
47 *
48 * 5) With FORCE_C_ONLY flags set to 0, incorrect results are sometimes
49 * produced under gcc with optimizations set -O3 or higher. Dunno why.
50 *
51 /////////////////////////////////////////////////////////////////////// */
52
53 /* ---------------------------------------------------------------------- */
54 /* --- User Switches ---------------------------------------------------- */
55 /* ---------------------------------------------------------------------- */
56
57 #ifndef UMAC_OUTPUT_LEN
58 #define UMAC_OUTPUT_LEN 8 /* Alowable: 4, 8, 12, 16 */
59 #endif
60 /* #define FORCE_C_ONLY 1 ANSI C and 64-bit integers req'd */
61 /* #define AES_IMPLEMENTAION 1 1 = OpenSSL, 2 = Barreto, 3 = Gladman */
62 /* #define SSE2 0 Is SSE2 is available? */
63 /* #define RUN_TESTS 0 Run basic correctness/speed tests */
64 /* #define UMAC_AE_SUPPORT 0 Enable authenticated encryption */
65
66 /* ---------------------------------------------------------------------- */
67 /* -- Global Includes --------------------------------------------------- */
68 /* ---------------------------------------------------------------------- */
69
70 #include "includes.h"
71 __RCSID("$NetBSD: umac.c,v 1.17 2019/01/27 02:08:33 pgoyette Exp $");
72 #include <sys/types.h>
73 #include <sys/endian.h>
74 #include <string.h>
75 #include <stdio.h>
76 #include <stdlib.h>
77 #include <stddef.h>
78 #include <time.h>
79
80 #include "xmalloc.h"
81 #include "umac.h"
82 #include "misc.h"
83
84 /* ---------------------------------------------------------------------- */
85 /* --- Primitive Data Types --- */
86 /* ---------------------------------------------------------------------- */
87
88 /* The following assumptions may need change on your system */
89 typedef u_int8_t UINT8; /* 1 byte */
90 typedef u_int16_t UINT16; /* 2 byte */
91 typedef u_int32_t UINT32; /* 4 byte */
92 typedef u_int64_t UINT64; /* 8 bytes */
93 typedef unsigned int UWORD; /* Register */
94
95 /* ---------------------------------------------------------------------- */
96 /* --- Constants -------------------------------------------------------- */
97 /* ---------------------------------------------------------------------- */
98
99 #define UMAC_KEY_LEN 16 /* UMAC takes 16 bytes of external key */
100
101 /* Message "words" are read from memory in an endian-specific manner. */
102 /* For this implementation to behave correctly, __LITTLE_ENDIAN__ must */
103 /* be set true if the host computer is little-endian. */
104
105 #if BYTE_ORDER == LITTLE_ENDIAN
106 #define __LITTLE_ENDIAN__ 1
107 #else
108 #define __LITTLE_ENDIAN__ 0
109 #endif
110
111 /* ---------------------------------------------------------------------- */
112 /* ---------------------------------------------------------------------- */
113 /* ----- Architecture Specific ------------------------------------------ */
114 /* ---------------------------------------------------------------------- */
115 /* ---------------------------------------------------------------------- */
116
117
118 /* ---------------------------------------------------------------------- */
119 /* ---------------------------------------------------------------------- */
120 /* ----- Primitive Routines --------------------------------------------- */
121 /* ---------------------------------------------------------------------- */
122 /* ---------------------------------------------------------------------- */
123
124
125 /* ---------------------------------------------------------------------- */
126 /* --- 32-bit by 32-bit to 64-bit Multiplication ------------------------ */
127 /* ---------------------------------------------------------------------- */
128
129 #define MUL64(a,b) ((UINT64)((UINT64)(UINT32)(a) * (UINT64)(UINT32)(b)))
130
131 /* ---------------------------------------------------------------------- */
132 /* --- Endian Conversion --- Forcing assembly on some platforms */
133 /* ---------------------------------------------------------------------- */
134
135 /* The following definitions use the above reversal-primitives to do the right
136 * thing on endian specific load and stores.
137 */
138
139 #if BYTE_ORDER == LITTLE_ENDIAN
140 #define LOAD_UINT32_REVERSED(p) get_u32(p)
141 #define STORE_UINT32_REVERSED(p,v) put_u32(p,v)
142 #else
143 #define LOAD_UINT32_REVERSED(p) get_u32_le(p)
144 #define STORE_UINT32_REVERSED(p,v) put_u32_le(p,v)
145 #endif
146
147 #define LOAD_UINT32_LITTLE(p) (get_u32_le(p))
148 #define STORE_UINT32_BIG(p,v) put_u32(p, v)
149
150
151
152 /* ---------------------------------------------------------------------- */
153 /* ---------------------------------------------------------------------- */
154 /* ----- Begin KDF & PDF Section ---------------------------------------- */
155 /* ---------------------------------------------------------------------- */
156 /* ---------------------------------------------------------------------- */
157
158 /* UMAC uses AES with 16 byte block and key lengths */
159 #define AES_BLOCK_LEN 16
160
161 #ifdef WITH_OPENSSL
162 #include <openssl/aes.h>
163 typedef AES_KEY aes_int_key[1];
164 #define aes_encryption(in,out,int_key) \
165 AES_encrypt((u_char *)(in),(u_char *)(out),(AES_KEY *)int_key)
166 #define aes_key_setup(key,int_key) \
167 AES_set_encrypt_key((const u_char *)(key),UMAC_KEY_LEN*8,int_key)
168 #else
169 #include "rijndael.h"
170 #define AES_ROUNDS ((UMAC_KEY_LEN / 4) + 6)
171 typedef UINT8 aes_int_key[AES_ROUNDS+1][4][4]; /* AES internal */
172 #define aes_encryption(in,out,int_key) \
173 rijndaelEncrypt((u32 *)(int_key), AES_ROUNDS, (u8 *)(in), (u8 *)(out))
174 #define aes_key_setup(key,int_key) \
175 rijndaelKeySetupEnc((u32 *)(int_key), (const unsigned char *)(key), \
176 UMAC_KEY_LEN*8)
177 #endif
178
179 /* The user-supplied UMAC key is stretched using AES in a counter
180 * mode to supply all random bits needed by UMAC. The kdf function takes
181 * an AES internal key representation 'key' and writes a stream of
182 * 'nbytes' bytes to the memory pointed at by 'buffer_ptr'. Each distinct
183 * 'ndx' causes a distinct byte stream.
184 */
185 static void kdf(void *buffer_ptr, aes_int_key key, UINT8 ndx, int nbytes)
186 {
187 UINT8 in_buf[AES_BLOCK_LEN] = {0};
188 UINT8 out_buf[AES_BLOCK_LEN];
189 UINT8 *dst_buf = (UINT8 *)buffer_ptr;
190 int i;
191
192 /* Setup the initial value */
193 in_buf[AES_BLOCK_LEN-9] = ndx;
194 in_buf[AES_BLOCK_LEN-1] = i = 1;
195
196 while (nbytes >= AES_BLOCK_LEN) {
197 aes_encryption(in_buf, out_buf, key);
198 memcpy(dst_buf,out_buf,AES_BLOCK_LEN);
199 in_buf[AES_BLOCK_LEN-1] = ++i;
200 nbytes -= AES_BLOCK_LEN;
201 dst_buf += AES_BLOCK_LEN;
202 }
203 if (nbytes) {
204 aes_encryption(in_buf, out_buf, key);
205 memcpy(dst_buf,out_buf,nbytes);
206 }
207 explicit_bzero(in_buf, sizeof(in_buf));
208 explicit_bzero(out_buf, sizeof(out_buf));
209 }
210
211 /* The final UHASH result is XOR'd with the output of a pseudorandom
212 * function. Here, we use AES to generate random output and
213 * xor the appropriate bytes depending on the last bits of nonce.
214 * This scheme is optimized for sequential, increasing big-endian nonces.
215 */
216
217 typedef struct {
218 UINT8 cache[AES_BLOCK_LEN]; /* Previous AES output is saved */
219 UINT8 nonce[AES_BLOCK_LEN]; /* The AES input making above cache */
220 aes_int_key prf_key; /* Expanded AES key for PDF */
221 } pdf_ctx;
222
223 static void pdf_init(pdf_ctx *pc, aes_int_key prf_key)
224 {
225 UINT8 buf[UMAC_KEY_LEN];
226
227 kdf(buf, prf_key, 0, UMAC_KEY_LEN);
228 aes_key_setup(buf, pc->prf_key);
229
230 /* Initialize pdf and cache */
231 memset(pc->nonce, 0, sizeof(pc->nonce));
232 aes_encryption(pc->nonce, pc->cache, pc->prf_key);
233 explicit_bzero(buf, sizeof(buf));
234 }
235
236 static inline void
237 xor64(uint8_t *dp, int di, uint8_t *sp, int si)
238 {
239 uint64_t dst, src;
240 memcpy(&dst, dp + sizeof(dst) * di, sizeof(dst));
241 memcpy(&src, sp + sizeof(src) * si, sizeof(src));
242 dst ^= src;
243 memcpy(dp + sizeof(dst) * di, &dst, sizeof(dst));
244 }
245
246 __unused static inline void
247 xor32(uint8_t *dp, int di, uint8_t *sp, int si)
248 {
249 uint32_t dst, src;
250 memcpy(&dst, dp + sizeof(dst) * di, sizeof(dst));
251 memcpy(&src, sp + sizeof(src) * si, sizeof(src));
252 dst ^= src;
253 memcpy(dp + sizeof(dst) * di, &dst, sizeof(dst));
254 }
255
256 static void pdf_gen_xor(pdf_ctx *pc, const UINT8 nonce[8], UINT8 buf[8])
257 {
258 /* 'ndx' indicates that we'll be using the 0th or 1st eight bytes
259 * of the AES output. If last time around we returned the ndx-1st
260 * element, then we may have the result in the cache already.
261 */
262
263 #if (UMAC_OUTPUT_LEN == 4)
264 #define LOW_BIT_MASK 3
265 #elif (UMAC_OUTPUT_LEN == 8)
266 #define LOW_BIT_MASK 1
267 #elif (UMAC_OUTPUT_LEN > 8)
268 #define LOW_BIT_MASK 0
269 #endif
270 union {
271 UINT8 tmp_nonce_lo[4];
272 UINT32 align;
273 } t;
274 #if LOW_BIT_MASK != 0
275 int ndx = nonce[7] & LOW_BIT_MASK;
276 #endif
277 memcpy(t.tmp_nonce_lo, nonce + 4, sizeof(t.tmp_nonce_lo));
278 t.tmp_nonce_lo[3] &= ~LOW_BIT_MASK; /* zero last bit */
279
280 if (memcmp(t.tmp_nonce_lo, pc->nonce + 1, sizeof(t.tmp_nonce_lo)) != 0 ||
281 memcmp(nonce, pc->nonce, sizeof(t.tmp_nonce_lo)) != 0)
282 {
283 memcpy(pc->nonce, nonce, sizeof(t.tmp_nonce_lo));
284 memcpy(pc->nonce + 4, t.tmp_nonce_lo, sizeof(t.tmp_nonce_lo));
285 aes_encryption(pc->nonce, pc->cache, pc->prf_key);
286 }
287
288 #if (UMAC_OUTPUT_LEN == 4)
289 xor32(buf, 0, pc->cache, ndx);
290 #elif (UMAC_OUTPUT_LEN == 8)
291 xor64(buf, 0, pc->cache, ndx);
292 #elif (UMAC_OUTPUT_LEN == 12)
293 xor64(buf, 0, pc->cache, 0);
294 xor32(buf, 2, pc->cache, 2);
295 #elif (UMAC_OUTPUT_LEN == 16)
296 xor64(buf, 0, pc->cache, 0);
297 xor64(buf, 1, pc->cache, 1);
298 #endif
299 }
300
301 /* ---------------------------------------------------------------------- */
302 /* ---------------------------------------------------------------------- */
303 /* ----- Begin NH Hash Section ------------------------------------------ */
304 /* ---------------------------------------------------------------------- */
305 /* ---------------------------------------------------------------------- */
306
307 /* The NH-based hash functions used in UMAC are described in the UMAC paper
308 * and specification, both of which can be found at the UMAC website.
309 * The interface to this implementation has two
310 * versions, one expects the entire message being hashed to be passed
311 * in a single buffer and returns the hash result immediately. The second
312 * allows the message to be passed in a sequence of buffers. In the
313 * muliple-buffer interface, the client calls the routine nh_update() as
314 * many times as necessary. When there is no more data to be fed to the
315 * hash, the client calls nh_final() which calculates the hash output.
316 * Before beginning another hash calculation the nh_reset() routine
317 * must be called. The single-buffer routine, nh(), is equivalent to
318 * the sequence of calls nh_update() and nh_final(); however it is
319 * optimized and should be preferred whenever the multiple-buffer interface
320 * is not necessary. When using either interface, it is the client's
321 * responsibility to pass no more than L1_KEY_LEN bytes per hash result.
322 *
323 * The routine nh_init() initializes the nh_ctx data structure and
324 * must be called once, before any other PDF routine.
325 */
326
327 /* The "nh_aux" routines do the actual NH hashing work. They
328 * expect buffers to be multiples of L1_PAD_BOUNDARY. These routines
329 * produce output for all STREAMS NH iterations in one call,
330 * allowing the parallel implementation of the streams.
331 */
332
333 #define STREAMS (UMAC_OUTPUT_LEN / 4) /* Number of times hash is applied */
334 #define L1_KEY_LEN 1024 /* Internal key bytes */
335 #define L1_KEY_SHIFT 16 /* Toeplitz key shift between streams */
336 #define L1_PAD_BOUNDARY 32 /* pad message to boundary multiple */
337 #define ALLOC_BOUNDARY 16 /* Keep buffers aligned to this */
338 #define HASH_BUF_BYTES 64 /* nh_aux_hb buffer multiple */
339
340 typedef struct {
341 UINT8 nh_key [L1_KEY_LEN + L1_KEY_SHIFT * (STREAMS - 1)]; /* NH Key */
342 UINT8 data [HASH_BUF_BYTES]; /* Incoming data buffer */
343 int next_data_empty; /* Bookkeeping variable for data buffer. */
344 int bytes_hashed; /* Bytes (out of L1_KEY_LEN) incorporated. */
345 UINT64 state[STREAMS]; /* on-line state */
346 } nh_ctx;
347
348
349 #if (UMAC_OUTPUT_LEN == 4)
350
351 static void nh_aux(void *kp, const void *dp, void *hp, UINT32 dlen)
352 /* NH hashing primitive. Previous (partial) hash result is loaded and
353 * then stored via hp pointer. The length of the data pointed at by "dp",
354 * "dlen", is guaranteed to be divisible by L1_PAD_BOUNDARY (32). Key
355 * is expected to be endian compensated in memory at key setup.
356 */
357 {
358 UINT64 h;
359 UWORD c = dlen / 32;
360 UINT32 *k = (UINT32 *)kp;
361 const UINT32 *d = (const UINT32 *)dp;
362 UINT32 d0,d1,d2,d3,d4,d5,d6,d7;
363 UINT32 k0,k1,k2,k3,k4,k5,k6,k7;
364
365 h = *((UINT64 *)hp);
366 do {
367 d0 = LOAD_UINT32_LITTLE(d+0); d1 = LOAD_UINT32_LITTLE(d+1);
368 d2 = LOAD_UINT32_LITTLE(d+2); d3 = LOAD_UINT32_LITTLE(d+3);
369 d4 = LOAD_UINT32_LITTLE(d+4); d5 = LOAD_UINT32_LITTLE(d+5);
370 d6 = LOAD_UINT32_LITTLE(d+6); d7 = LOAD_UINT32_LITTLE(d+7);
371 k0 = *(k+0); k1 = *(k+1); k2 = *(k+2); k3 = *(k+3);
372 k4 = *(k+4); k5 = *(k+5); k6 = *(k+6); k7 = *(k+7);
373 h += MUL64((k0 + d0), (k4 + d4));
374 h += MUL64((k1 + d1), (k5 + d5));
375 h += MUL64((k2 + d2), (k6 + d6));
376 h += MUL64((k3 + d3), (k7 + d7));
377
378 d += 8;
379 k += 8;
380 } while (--c);
381 *((UINT64 *)hp) = h;
382 }
383
384 #elif (UMAC_OUTPUT_LEN == 8)
385
386 static void nh_aux(void *kp, const void *dp, void *hp, UINT32 dlen)
387 /* Same as previous nh_aux, but two streams are handled in one pass,
388 * reading and writing 16 bytes of hash-state per call.
389 */
390 {
391 UINT64 h1,h2;
392 UWORD c = dlen / 32;
393 UINT32 *k = (UINT32 *)kp;
394 const UINT32 *d = (const UINT32 *)dp;
395 UINT32 d0,d1,d2,d3,d4,d5,d6,d7;
396 UINT32 k0,k1,k2,k3,k4,k5,k6,k7,
397 k8,k9,k10,k11;
398
399 h1 = *((UINT64 *)hp);
400 h2 = *((UINT64 *)hp + 1);
401 k0 = *(k+0); k1 = *(k+1); k2 = *(k+2); k3 = *(k+3);
402 do {
403 d0 = LOAD_UINT32_LITTLE(d+0); d1 = LOAD_UINT32_LITTLE(d+1);
404 d2 = LOAD_UINT32_LITTLE(d+2); d3 = LOAD_UINT32_LITTLE(d+3);
405 d4 = LOAD_UINT32_LITTLE(d+4); d5 = LOAD_UINT32_LITTLE(d+5);
406 d6 = LOAD_UINT32_LITTLE(d+6); d7 = LOAD_UINT32_LITTLE(d+7);
407 k4 = *(k+4); k5 = *(k+5); k6 = *(k+6); k7 = *(k+7);
408 k8 = *(k+8); k9 = *(k+9); k10 = *(k+10); k11 = *(k+11);
409
410 h1 += MUL64((k0 + d0), (k4 + d4));
411 h2 += MUL64((k4 + d0), (k8 + d4));
412
413 h1 += MUL64((k1 + d1), (k5 + d5));
414 h2 += MUL64((k5 + d1), (k9 + d5));
415
416 h1 += MUL64((k2 + d2), (k6 + d6));
417 h2 += MUL64((k6 + d2), (k10 + d6));
418
419 h1 += MUL64((k3 + d3), (k7 + d7));
420 h2 += MUL64((k7 + d3), (k11 + d7));
421
422 k0 = k8; k1 = k9; k2 = k10; k3 = k11;
423
424 d += 8;
425 k += 8;
426 } while (--c);
427 ((UINT64 *)hp)[0] = h1;
428 ((UINT64 *)hp)[1] = h2;
429 }
430
431 #elif (UMAC_OUTPUT_LEN == 12)
432
433 static void nh_aux(void *kp, const void *dp, void *hp, UINT32 dlen)
434 /* Same as previous nh_aux, but two streams are handled in one pass,
435 * reading and writing 24 bytes of hash-state per call.
436 */
437 {
438 UINT64 h1,h2,h3;
439 UWORD c = dlen / 32;
440 UINT32 *k = (UINT32 *)kp;
441 const UINT32 *d = (const UINT32 *)dp;
442 UINT32 d0,d1,d2,d3,d4,d5,d6,d7;
443 UINT32 k0,k1,k2,k3,k4,k5,k6,k7,
444 k8,k9,k10,k11,k12,k13,k14,k15;
445
446 h1 = *((UINT64 *)hp);
447 h2 = *((UINT64 *)hp + 1);
448 h3 = *((UINT64 *)hp + 2);
449 k0 = *(k+0); k1 = *(k+1); k2 = *(k+2); k3 = *(k+3);
450 k4 = *(k+4); k5 = *(k+5); k6 = *(k+6); k7 = *(k+7);
451 do {
452 d0 = LOAD_UINT32_LITTLE(d+0); d1 = LOAD_UINT32_LITTLE(d+1);
453 d2 = LOAD_UINT32_LITTLE(d+2); d3 = LOAD_UINT32_LITTLE(d+3);
454 d4 = LOAD_UINT32_LITTLE(d+4); d5 = LOAD_UINT32_LITTLE(d+5);
455 d6 = LOAD_UINT32_LITTLE(d+6); d7 = LOAD_UINT32_LITTLE(d+7);
456 k8 = *(k+8); k9 = *(k+9); k10 = *(k+10); k11 = *(k+11);
457 k12 = *(k+12); k13 = *(k+13); k14 = *(k+14); k15 = *(k+15);
458
459 h1 += MUL64((k0 + d0), (k4 + d4));
460 h2 += MUL64((k4 + d0), (k8 + d4));
461 h3 += MUL64((k8 + d0), (k12 + d4));
462
463 h1 += MUL64((k1 + d1), (k5 + d5));
464 h2 += MUL64((k5 + d1), (k9 + d5));
465 h3 += MUL64((k9 + d1), (k13 + d5));
466
467 h1 += MUL64((k2 + d2), (k6 + d6));
468 h2 += MUL64((k6 + d2), (k10 + d6));
469 h3 += MUL64((k10 + d2), (k14 + d6));
470
471 h1 += MUL64((k3 + d3), (k7 + d7));
472 h2 += MUL64((k7 + d3), (k11 + d7));
473 h3 += MUL64((k11 + d3), (k15 + d7));
474
475 k0 = k8; k1 = k9; k2 = k10; k3 = k11;
476 k4 = k12; k5 = k13; k6 = k14; k7 = k15;
477
478 d += 8;
479 k += 8;
480 } while (--c);
481 ((UINT64 *)hp)[0] = h1;
482 ((UINT64 *)hp)[1] = h2;
483 ((UINT64 *)hp)[2] = h3;
484 }
485
486 #elif (UMAC_OUTPUT_LEN == 16)
487
488 static void nh_aux(void *kp, const void *dp, void *hp, UINT32 dlen)
489 /* Same as previous nh_aux, but two streams are handled in one pass,
490 * reading and writing 24 bytes of hash-state per call.
491 */
492 {
493 UINT64 h1,h2,h3,h4;
494 UWORD c = dlen / 32;
495 UINT32 *k = (UINT32 *)kp;
496 const UINT32 *d = (const UINT32 *)dp;
497 UINT32 d0,d1,d2,d3,d4,d5,d6,d7;
498 UINT32 k0,k1,k2,k3,k4,k5,k6,k7,
499 k8,k9,k10,k11,k12,k13,k14,k15,
500 k16,k17,k18,k19;
501
502 h1 = *((UINT64 *)hp);
503 h2 = *((UINT64 *)hp + 1);
504 h3 = *((UINT64 *)hp + 2);
505 h4 = *((UINT64 *)hp + 3);
506 k0 = *(k+0); k1 = *(k+1); k2 = *(k+2); k3 = *(k+3);
507 k4 = *(k+4); k5 = *(k+5); k6 = *(k+6); k7 = *(k+7);
508 do {
509 d0 = LOAD_UINT32_LITTLE(d+0); d1 = LOAD_UINT32_LITTLE(d+1);
510 d2 = LOAD_UINT32_LITTLE(d+2); d3 = LOAD_UINT32_LITTLE(d+3);
511 d4 = LOAD_UINT32_LITTLE(d+4); d5 = LOAD_UINT32_LITTLE(d+5);
512 d6 = LOAD_UINT32_LITTLE(d+6); d7 = LOAD_UINT32_LITTLE(d+7);
513 k8 = *(k+8); k9 = *(k+9); k10 = *(k+10); k11 = *(k+11);
514 k12 = *(k+12); k13 = *(k+13); k14 = *(k+14); k15 = *(k+15);
515 k16 = *(k+16); k17 = *(k+17); k18 = *(k+18); k19 = *(k+19);
516
517 h1 += MUL64((k0 + d0), (k4 + d4));
518 h2 += MUL64((k4 + d0), (k8 + d4));
519 h3 += MUL64((k8 + d0), (k12 + d4));
520 h4 += MUL64((k12 + d0), (k16 + d4));
521
522 h1 += MUL64((k1 + d1), (k5 + d5));
523 h2 += MUL64((k5 + d1), (k9 + d5));
524 h3 += MUL64((k9 + d1), (k13 + d5));
525 h4 += MUL64((k13 + d1), (k17 + d5));
526
527 h1 += MUL64((k2 + d2), (k6 + d6));
528 h2 += MUL64((k6 + d2), (k10 + d6));
529 h3 += MUL64((k10 + d2), (k14 + d6));
530 h4 += MUL64((k14 + d2), (k18 + d6));
531
532 h1 += MUL64((k3 + d3), (k7 + d7));
533 h2 += MUL64((k7 + d3), (k11 + d7));
534 h3 += MUL64((k11 + d3), (k15 + d7));
535 h4 += MUL64((k15 + d3), (k19 + d7));
536
537 k0 = k8; k1 = k9; k2 = k10; k3 = k11;
538 k4 = k12; k5 = k13; k6 = k14; k7 = k15;
539 k8 = k16; k9 = k17; k10 = k18; k11 = k19;
540
541 d += 8;
542 k += 8;
543 } while (--c);
544 ((UINT64 *)hp)[0] = h1;
545 ((UINT64 *)hp)[1] = h2;
546 ((UINT64 *)hp)[2] = h3;
547 ((UINT64 *)hp)[3] = h4;
548 }
549
550 /* ---------------------------------------------------------------------- */
551 #endif /* UMAC_OUTPUT_LENGTH */
552 /* ---------------------------------------------------------------------- */
553
554
555 /* ---------------------------------------------------------------------- */
556
557 static void nh_transform(nh_ctx *hc, const UINT8 *buf, UINT32 nbytes)
558 /* This function is a wrapper for the primitive NH hash functions. It takes
559 * as argument "hc" the current hash context and a buffer which must be a
560 * multiple of L1_PAD_BOUNDARY. The key passed to nh_aux is offset
561 * appropriately according to how much message has been hashed already.
562 */
563 {
564 UINT8 *key;
565
566 key = hc->nh_key + hc->bytes_hashed;
567 nh_aux(key, buf, hc->state, nbytes);
568 }
569
570 /* ---------------------------------------------------------------------- */
571
572 #if (__LITTLE_ENDIAN__)
573 static void endian_convert(void *buf, UWORD bpw, UINT32 num_bytes)
574 /* We endian convert the keys on little-endian computers to */
575 /* compensate for the lack of big-endian memory reads during hashing. */
576 {
577 UWORD iters = num_bytes / bpw;
578 if (bpw == 4) {
579 UINT32 *p = (UINT32 *)buf;
580 do {
581 *p = LOAD_UINT32_REVERSED(p);
582 p++;
583 } while (--iters);
584 } else if (bpw == 8) {
585 UINT64 *p = (UINT64 *)buf;
586 UINT64 th;
587 UINT64 t;
588 do {
589 t = LOAD_UINT32_REVERSED((UINT32 *)p+1);
590 th = LOAD_UINT32_REVERSED((UINT32 *)p);
591 *p++ = t | (th << 32);
592 } while (--iters);
593 }
594 }
595 #define endian_convert_if_le(x,y,z) endian_convert((x),(y),(z))
596 #else
597 #define endian_convert_if_le(x,y,z) do{}while(0) /* Do nothing */
598 #endif
599
600 /* ---------------------------------------------------------------------- */
601
602 static void nh_reset(nh_ctx *hc)
603 /* Reset nh_ctx to ready for hashing of new data */
604 {
605 hc->bytes_hashed = 0;
606 hc->next_data_empty = 0;
607 hc->state[0] = 0;
608 #if (UMAC_OUTPUT_LEN >= 8)
609 hc->state[1] = 0;
610 #endif
611 #if (UMAC_OUTPUT_LEN >= 12)
612 hc->state[2] = 0;
613 #endif
614 #if (UMAC_OUTPUT_LEN == 16)
615 hc->state[3] = 0;
616 #endif
617
618 }
619
620 /* ---------------------------------------------------------------------- */
621
622 static void nh_init(nh_ctx *hc, aes_int_key prf_key)
623 /* Generate nh_key, endian convert and reset to be ready for hashing. */
624 {
625 kdf(hc->nh_key, prf_key, 1, sizeof(hc->nh_key));
626 endian_convert_if_le(hc->nh_key, 4, sizeof(hc->nh_key));
627 nh_reset(hc);
628 }
629
630 /* ---------------------------------------------------------------------- */
631
632 static void nh_update(nh_ctx *hc, const UINT8 *buf, UINT32 nbytes)
633 /* Incorporate nbytes of data into a nh_ctx, buffer whatever is not an */
634 /* even multiple of HASH_BUF_BYTES. */
635 {
636 UINT32 i,j;
637
638 j = hc->next_data_empty;
639 if ((j + nbytes) >= HASH_BUF_BYTES) {
640 if (j) {
641 i = HASH_BUF_BYTES - j;
642 memcpy(hc->data+j, buf, i);
643 nh_transform(hc,hc->data,HASH_BUF_BYTES);
644 nbytes -= i;
645 buf += i;
646 hc->bytes_hashed += HASH_BUF_BYTES;
647 }
648 if (nbytes >= HASH_BUF_BYTES) {
649 i = nbytes & ~(HASH_BUF_BYTES - 1);
650 nh_transform(hc, buf, i);
651 nbytes -= i;
652 buf += i;
653 hc->bytes_hashed += i;
654 }
655 j = 0;
656 }
657 memcpy(hc->data + j, buf, nbytes);
658 hc->next_data_empty = j + nbytes;
659 }
660
661 /* ---------------------------------------------------------------------- */
662
663 static void zero_pad(UINT8 *p, int nbytes)
664 {
665 /* Write "nbytes" of zeroes, beginning at "p" */
666 if (nbytes >= (int)sizeof(UWORD)) {
667 while ((ptrdiff_t)p % sizeof(UWORD)) {
668 *p = 0;
669 nbytes--;
670 p++;
671 }
672 while (nbytes >= (int)sizeof(UWORD)) {
673 *(UWORD *)p = 0;
674 nbytes -= sizeof(UWORD);
675 p += sizeof(UWORD);
676 }
677 }
678 while (nbytes) {
679 *p = 0;
680 nbytes--;
681 p++;
682 }
683 }
684
685 /* ---------------------------------------------------------------------- */
686
687 static void nh_final(nh_ctx *hc, UINT8 *result)
688 /* After passing some number of data buffers to nh_update() for integration
689 * into an NH context, nh_final is called to produce a hash result. If any
690 * bytes are in the buffer hc->data, incorporate them into the
691 * NH context. Finally, add into the NH accumulation "state" the total number
692 * of bits hashed. The resulting numbers are written to the buffer "result".
693 * If nh_update was never called, L1_PAD_BOUNDARY zeroes are incorporated.
694 */
695 {
696 int nh_len, nbits;
697
698 if (hc->next_data_empty != 0) {
699 nh_len = ((hc->next_data_empty + (L1_PAD_BOUNDARY - 1)) &
700 ~(L1_PAD_BOUNDARY - 1));
701 zero_pad(hc->data + hc->next_data_empty,
702 nh_len - hc->next_data_empty);
703 nh_transform(hc, hc->data, nh_len);
704 hc->bytes_hashed += hc->next_data_empty;
705 } else if (hc->bytes_hashed == 0) {
706 nh_len = L1_PAD_BOUNDARY;
707 zero_pad(hc->data, L1_PAD_BOUNDARY);
708 nh_transform(hc, hc->data, nh_len);
709 }
710
711 nbits = (hc->bytes_hashed << 3);
712 ((UINT64 *)result)[0] = ((UINT64 *)hc->state)[0] + nbits;
713 #if (UMAC_OUTPUT_LEN >= 8)
714 ((UINT64 *)result)[1] = ((UINT64 *)hc->state)[1] + nbits;
715 #endif
716 #if (UMAC_OUTPUT_LEN >= 12)
717 ((UINT64 *)result)[2] = ((UINT64 *)hc->state)[2] + nbits;
718 #endif
719 #if (UMAC_OUTPUT_LEN == 16)
720 ((UINT64 *)result)[3] = ((UINT64 *)hc->state)[3] + nbits;
721 #endif
722 nh_reset(hc);
723 }
724
725 /* ---------------------------------------------------------------------- */
726
727 static void nh(nh_ctx *hc, const UINT8 *buf, UINT32 padded_len,
728 UINT32 unpadded_len, UINT8 *result)
729 /* All-in-one nh_update() and nh_final() equivalent.
730 * Assumes that padded_len is divisible by L1_PAD_BOUNDARY and result is
731 * well aligned
732 */
733 {
734 UINT32 nbits;
735
736 /* Initialize the hash state */
737 nbits = (unpadded_len << 3);
738
739 ((UINT64 *)result)[0] = nbits;
740 #if (UMAC_OUTPUT_LEN >= 8)
741 ((UINT64 *)result)[1] = nbits;
742 #endif
743 #if (UMAC_OUTPUT_LEN >= 12)
744 ((UINT64 *)result)[2] = nbits;
745 #endif
746 #if (UMAC_OUTPUT_LEN == 16)
747 ((UINT64 *)result)[3] = nbits;
748 #endif
749
750 nh_aux(hc->nh_key, buf, result, padded_len);
751 }
752
753 /* ---------------------------------------------------------------------- */
754 /* ---------------------------------------------------------------------- */
755 /* ----- Begin UHASH Section -------------------------------------------- */
756 /* ---------------------------------------------------------------------- */
757 /* ---------------------------------------------------------------------- */
758
759 /* UHASH is a multi-layered algorithm. Data presented to UHASH is first
760 * hashed by NH. The NH output is then hashed by a polynomial-hash layer
761 * unless the initial data to be hashed is short. After the polynomial-
762 * layer, an inner-product hash is used to produce the final UHASH output.
763 *
764 * UHASH provides two interfaces, one all-at-once and another where data
765 * buffers are presented sequentially. In the sequential interface, the
766 * UHASH client calls the routine uhash_update() as many times as necessary.
767 * When there is no more data to be fed to UHASH, the client calls
768 * uhash_final() which
769 * calculates the UHASH output. Before beginning another UHASH calculation
770 * the uhash_reset() routine must be called. The all-at-once UHASH routine,
771 * uhash(), is equivalent to the sequence of calls uhash_update() and
772 * uhash_final(); however it is optimized and should be
773 * used whenever the sequential interface is not necessary.
774 *
775 * The routine uhash_init() initializes the uhash_ctx data structure and
776 * must be called once, before any other UHASH routine.
777 */
778
779 /* ---------------------------------------------------------------------- */
780 /* ----- Constants and uhash_ctx ---------------------------------------- */
781 /* ---------------------------------------------------------------------- */
782
783 /* ---------------------------------------------------------------------- */
784 /* ----- Poly hash and Inner-Product hash Constants --------------------- */
785 /* ---------------------------------------------------------------------- */
786
787 /* Primes and masks */
788 #define p36 ((UINT64)0x0000000FFFFFFFFBull) /* 2^36 - 5 */
789 #define p64 ((UINT64)0xFFFFFFFFFFFFFFC5ull) /* 2^64 - 59 */
790 #define m36 ((UINT64)0x0000000FFFFFFFFFull) /* The low 36 of 64 bits */
791
792
793 /* ---------------------------------------------------------------------- */
794
795 typedef struct uhash_ctx {
796 nh_ctx hash; /* Hash context for L1 NH hash */
797 UINT64 poly_key_8[STREAMS]; /* p64 poly keys */
798 UINT64 poly_accum[STREAMS]; /* poly hash result */
799 UINT64 ip_keys[STREAMS*4]; /* Inner-product keys */
800 UINT32 ip_trans[STREAMS]; /* Inner-product translation */
801 UINT32 msg_len; /* Total length of data passed */
802 /* to uhash */
803 } uhash_ctx;
804 typedef struct uhash_ctx *uhash_ctx_t;
805
806 /* ---------------------------------------------------------------------- */
807
808
809 /* The polynomial hashes use Horner's rule to evaluate a polynomial one
810 * word at a time. As described in the specification, poly32 and poly64
811 * require keys from special domains. The following implementations exploit
812 * the special domains to avoid overflow. The results are not guaranteed to
813 * be within Z_p32 and Z_p64, but the Inner-Product hash implementation
814 * patches any errant values.
815 */
816
817 static UINT64 poly64(UINT64 cur, UINT64 key, UINT64 data)
818 {
819 UINT32 key_hi = (UINT32)(key >> 32),
820 key_lo = (UINT32)key,
821 cur_hi = (UINT32)(cur >> 32),
822 cur_lo = (UINT32)cur,
823 x_lo,
824 x_hi;
825 UINT64 X,T,res;
826
827 X = MUL64(key_hi, cur_lo) + MUL64(cur_hi, key_lo);
828 x_lo = (UINT32)X;
829 x_hi = (UINT32)(X >> 32);
830
831 res = (MUL64(key_hi, cur_hi) + x_hi) * 59 + MUL64(key_lo, cur_lo);
832
833 T = ((UINT64)x_lo << 32);
834 res += T;
835 if (res < T)
836 res += 59;
837
838 res += data;
839 if (res < data)
840 res += 59;
841
842 return res;
843 }
844
845
846 /* Although UMAC is specified to use a ramped polynomial hash scheme, this
847 * implementation does not handle all ramp levels. Because we don't handle
848 * the ramp up to p128 modulus in this implementation, we are limited to
849 * 2^14 poly_hash() invocations per stream (for a total capacity of 2^24
850 * bytes input to UMAC per tag, ie. 16MB).
851 */
852 static void poly_hash(uhash_ctx_t hc, UINT32 data_in[])
853 {
854 int i;
855 UINT64 *data=(UINT64*)data_in;
856
857 for (i = 0; i < STREAMS; i++) {
858 if ((UINT32)(data[i] >> 32) == 0xfffffffful) {
859 hc->poly_accum[i] = poly64(hc->poly_accum[i],
860 hc->poly_key_8[i], p64 - 1);
861 hc->poly_accum[i] = poly64(hc->poly_accum[i],
862 hc->poly_key_8[i], (data[i] - 59));
863 } else {
864 hc->poly_accum[i] = poly64(hc->poly_accum[i],
865 hc->poly_key_8[i], data[i]);
866 }
867 }
868 }
869
870
871 /* ---------------------------------------------------------------------- */
872
873
874 /* The final step in UHASH is an inner-product hash. The poly hash
875 * produces a result not necessarily WORD_LEN bytes long. The inner-
876 * product hash breaks the polyhash output into 16-bit chunks and
877 * multiplies each with a 36 bit key.
878 */
879
880 static UINT64 ip_aux(UINT64 t, UINT64 *ipkp, UINT64 data)
881 {
882 t = t + ipkp[0] * (UINT64)(UINT16)(data >> 48);
883 t = t + ipkp[1] * (UINT64)(UINT16)(data >> 32);
884 t = t + ipkp[2] * (UINT64)(UINT16)(data >> 16);
885 t = t + ipkp[3] * (UINT64)(UINT16)(data);
886
887 return t;
888 }
889
890 static UINT32 ip_reduce_p36(UINT64 t)
891 {
892 /* Divisionless modular reduction */
893 UINT64 ret;
894
895 ret = (t & m36) + 5 * (t >> 36);
896 if (ret >= p36)
897 ret -= p36;
898
899 /* return least significant 32 bits */
900 return (UINT32)(ret);
901 }
902
903
904 /* If the data being hashed by UHASH is no longer than L1_KEY_LEN, then
905 * the polyhash stage is skipped and ip_short is applied directly to the
906 * NH output.
907 */
908 static void ip_short(uhash_ctx_t ahc, UINT8 *nh_res, u_char *res)
909 {
910 UINT64 t;
911 UINT64 *nhp = (UINT64 *)nh_res;
912
913 t = ip_aux(0,ahc->ip_keys, nhp[0]);
914 STORE_UINT32_BIG((UINT32 *)res+0, ip_reduce_p36(t) ^ ahc->ip_trans[0]);
915 #if (UMAC_OUTPUT_LEN >= 8)
916 t = ip_aux(0,ahc->ip_keys+4, nhp[1]);
917 STORE_UINT32_BIG((UINT32 *)res+1, ip_reduce_p36(t) ^ ahc->ip_trans[1]);
918 #endif
919 #if (UMAC_OUTPUT_LEN >= 12)
920 t = ip_aux(0,ahc->ip_keys+8, nhp[2]);
921 STORE_UINT32_BIG((UINT32 *)res+2, ip_reduce_p36(t) ^ ahc->ip_trans[2]);
922 #endif
923 #if (UMAC_OUTPUT_LEN == 16)
924 t = ip_aux(0,ahc->ip_keys+12, nhp[3]);
925 STORE_UINT32_BIG((UINT32 *)res+3, ip_reduce_p36(t) ^ ahc->ip_trans[3]);
926 #endif
927 }
928
929 /* If the data being hashed by UHASH is longer than L1_KEY_LEN, then
930 * the polyhash stage is not skipped and ip_long is applied to the
931 * polyhash output.
932 */
933 static void ip_long(uhash_ctx_t ahc, u_char *res)
934 {
935 int i;
936 UINT64 t;
937
938 for (i = 0; i < STREAMS; i++) {
939 /* fix polyhash output not in Z_p64 */
940 if (ahc->poly_accum[i] >= p64)
941 ahc->poly_accum[i] -= p64;
942 t = ip_aux(0,ahc->ip_keys+(i*4), ahc->poly_accum[i]);
943 STORE_UINT32_BIG((UINT32 *)res+i,
944 ip_reduce_p36(t) ^ ahc->ip_trans[i]);
945 }
946 }
947
948
949 /* ---------------------------------------------------------------------- */
950
951 /* ---------------------------------------------------------------------- */
952
953 /* Reset uhash context for next hash session */
954 static int uhash_reset(uhash_ctx_t pc)
955 {
956 nh_reset(&pc->hash);
957 pc->msg_len = 0;
958 pc->poly_accum[0] = 1;
959 #if (UMAC_OUTPUT_LEN >= 8)
960 pc->poly_accum[1] = 1;
961 #endif
962 #if (UMAC_OUTPUT_LEN >= 12)
963 pc->poly_accum[2] = 1;
964 #endif
965 #if (UMAC_OUTPUT_LEN == 16)
966 pc->poly_accum[3] = 1;
967 #endif
968 return 1;
969 }
970
971 /* ---------------------------------------------------------------------- */
972
973 /* Given a pointer to the internal key needed by kdf() and a uhash context,
974 * initialize the NH context and generate keys needed for poly and inner-
975 * product hashing. All keys are endian adjusted in memory so that native
976 * loads cause correct keys to be in registers during calculation.
977 */
978 static void uhash_init(uhash_ctx_t ahc, aes_int_key prf_key)
979 {
980 int i;
981 UINT8 buf[(8*STREAMS+4)*sizeof(UINT64)];
982
983 /* Zero the entire uhash context */
984 memset(ahc, 0, sizeof(uhash_ctx));
985
986 /* Initialize the L1 hash */
987 nh_init(&ahc->hash, prf_key);
988
989 /* Setup L2 hash variables */
990 kdf(buf, prf_key, 2, sizeof(buf)); /* Fill buffer with index 1 key */
991 for (i = 0; i < STREAMS; i++) {
992 /* Fill keys from the buffer, skipping bytes in the buffer not
993 * used by this implementation. Endian reverse the keys if on a
994 * little-endian computer.
995 */
996 memcpy(ahc->poly_key_8+i, buf+24*i, 8);
997 endian_convert_if_le(ahc->poly_key_8+i, 8, 8);
998 /* Mask the 64-bit keys to their special domain */
999 ahc->poly_key_8[i] &= ((UINT64)0x01ffffffu << 32) + 0x01ffffffu;
1000 ahc->poly_accum[i] = 1; /* Our polyhash prepends a non-zero word */
1001 }
1002
1003 /* Setup L3-1 hash variables */
1004 kdf(buf, prf_key, 3, sizeof(buf)); /* Fill buffer with index 2 key */
1005 for (i = 0; i < STREAMS; i++)
1006 memcpy(ahc->ip_keys+4*i, buf+(8*i+4)*sizeof(UINT64),
1007 4*sizeof(UINT64));
1008 endian_convert_if_le(ahc->ip_keys, sizeof(UINT64),
1009 sizeof(ahc->ip_keys));
1010 for (i = 0; i < STREAMS*4; i++)
1011 ahc->ip_keys[i] %= p36; /* Bring into Z_p36 */
1012
1013 /* Setup L3-2 hash variables */
1014 /* Fill buffer with index 4 key */
1015 kdf(ahc->ip_trans, prf_key, 4, STREAMS * sizeof(UINT32));
1016 endian_convert_if_le(ahc->ip_trans, sizeof(UINT32),
1017 STREAMS * sizeof(UINT32));
1018 explicit_bzero(buf, sizeof(buf));
1019 }
1020
1021 /* ---------------------------------------------------------------------- */
1022
1023 #if 0
1024 static uhash_ctx_t uhash_alloc(u_char key[])
1025 {
1026 /* Allocate memory and force to a 16-byte boundary. */
1027 uhash_ctx_t ctx;
1028 u_char bytes_to_add;
1029 aes_int_key prf_key;
1030
1031 ctx = (uhash_ctx_t)malloc(sizeof(uhash_ctx)+ALLOC_BOUNDARY);
1032 if (ctx) {
1033 if (ALLOC_BOUNDARY) {
1034 bytes_to_add = ALLOC_BOUNDARY -
1035 ((ptrdiff_t)ctx & (ALLOC_BOUNDARY -1));
1036 ctx = (uhash_ctx_t)((u_char *)ctx + bytes_to_add);
1037 *((u_char *)ctx - 1) = bytes_to_add;
1038 }
1039 aes_key_setup(key,prf_key);
1040 uhash_init(ctx, prf_key);
1041 }
1042 return (ctx);
1043 }
1044 #endif
1045
1046 /* ---------------------------------------------------------------------- */
1047
1048 #if 0
1049 static int uhash_free(uhash_ctx_t ctx)
1050 {
1051 /* Free memory allocated by uhash_alloc */
1052 u_char bytes_to_sub;
1053
1054 if (ctx) {
1055 if (ALLOC_BOUNDARY) {
1056 bytes_to_sub = *((u_char *)ctx - 1);
1057 ctx = (uhash_ctx_t)((u_char *)ctx - bytes_to_sub);
1058 }
1059 free(ctx);
1060 }
1061 return (1);
1062 }
1063 #endif
1064 /* ---------------------------------------------------------------------- */
1065
1066 static int uhash_update(uhash_ctx_t ctx, const u_char *input, long len)
1067 /* Given len bytes of data, we parse it into L1_KEY_LEN chunks and
1068 * hash each one with NH, calling the polyhash on each NH output.
1069 */
1070 {
1071 UWORD bytes_hashed, bytes_remaining;
1072 UINT64 result_buf[STREAMS];
1073 UINT8 *nh_result = (UINT8 *)&result_buf;
1074
1075 if (ctx->msg_len + len <= L1_KEY_LEN) {
1076 nh_update(&ctx->hash, (const UINT8 *)input, len);
1077 ctx->msg_len += len;
1078 } else {
1079
1080 bytes_hashed = ctx->msg_len % L1_KEY_LEN;
1081 if (ctx->msg_len == L1_KEY_LEN)
1082 bytes_hashed = L1_KEY_LEN;
1083
1084 if (bytes_hashed + len >= L1_KEY_LEN) {
1085
1086 /* If some bytes have been passed to the hash function */
1087 /* then we want to pass at most (L1_KEY_LEN - bytes_hashed) */
1088 /* bytes to complete the current nh_block. */
1089 if (bytes_hashed) {
1090 bytes_remaining = (L1_KEY_LEN - bytes_hashed);
1091 nh_update(&ctx->hash, (const UINT8 *)input, bytes_remaining);
1092 nh_final(&ctx->hash, nh_result);
1093 ctx->msg_len += bytes_remaining;
1094 poly_hash(ctx,(UINT32 *)nh_result);
1095 len -= bytes_remaining;
1096 input += bytes_remaining;
1097 }
1098
1099 /* Hash directly from input stream if enough bytes */
1100 while (len >= L1_KEY_LEN) {
1101 nh(&ctx->hash, (const UINT8 *)input, L1_KEY_LEN,
1102 L1_KEY_LEN, nh_result);
1103 ctx->msg_len += L1_KEY_LEN;
1104 len -= L1_KEY_LEN;
1105 input += L1_KEY_LEN;
1106 poly_hash(ctx,(UINT32 *)nh_result);
1107 }
1108 }
1109
1110 /* pass remaining < L1_KEY_LEN bytes of input data to NH */
1111 if (len) {
1112 nh_update(&ctx->hash, (const UINT8 *)input, len);
1113 ctx->msg_len += len;
1114 }
1115 }
1116
1117 return (1);
1118 }
1119
1120 /* ---------------------------------------------------------------------- */
1121
1122 static int uhash_final(uhash_ctx_t ctx, u_char *res)
1123 /* Incorporate any pending data, pad, and generate tag */
1124 {
1125 UINT64 result_buf[STREAMS];
1126 UINT8 *nh_result = (UINT8 *)&result_buf;
1127
1128 if (ctx->msg_len > L1_KEY_LEN) {
1129 if (ctx->msg_len % L1_KEY_LEN) {
1130 nh_final(&ctx->hash, nh_result);
1131 poly_hash(ctx,(UINT32 *)nh_result);
1132 }
1133 ip_long(ctx, res);
1134 } else {
1135 nh_final(&ctx->hash, nh_result);
1136 ip_short(ctx,nh_result, res);
1137 }
1138 uhash_reset(ctx);
1139 return (1);
1140 }
1141
1142 /* ---------------------------------------------------------------------- */
1143
1144 #if 0
1145 static int uhash(uhash_ctx_t ahc, u_char *msg, long len, u_char *res)
1146 /* assumes that msg is in a writable buffer of length divisible by */
1147 /* L1_PAD_BOUNDARY. Bytes beyond msg[len] may be zeroed. */
1148 {
1149 UINT8 nh_result[STREAMS*sizeof(UINT64)];
1150 UINT32 nh_len;
1151 int extra_zeroes_needed;
1152
1153 /* If the message to be hashed is no longer than L1_HASH_LEN, we skip
1154 * the polyhash.
1155 */
1156 if (len <= L1_KEY_LEN) {
1157 if (len == 0) /* If zero length messages will not */
1158 nh_len = L1_PAD_BOUNDARY; /* be seen, comment out this case */
1159 else
1160 nh_len = ((len + (L1_PAD_BOUNDARY - 1)) & ~(L1_PAD_BOUNDARY - 1));
1161 extra_zeroes_needed = nh_len - len;
1162 zero_pad((UINT8 *)msg + len, extra_zeroes_needed);
1163 nh(&ahc->hash, (UINT8 *)msg, nh_len, len, nh_result);
1164 ip_short(ahc,nh_result, res);
1165 } else {
1166 /* Otherwise, we hash each L1_KEY_LEN chunk with NH, passing the NH
1167 * output to poly_hash().
1168 */
1169 do {
1170 nh(&ahc->hash, (UINT8 *)msg, L1_KEY_LEN, L1_KEY_LEN, nh_result);
1171 poly_hash(ahc,(UINT32 *)nh_result);
1172 len -= L1_KEY_LEN;
1173 msg += L1_KEY_LEN;
1174 } while (len >= L1_KEY_LEN);
1175 if (len) {
1176 nh_len = ((len + (L1_PAD_BOUNDARY - 1)) & ~(L1_PAD_BOUNDARY - 1));
1177 extra_zeroes_needed = nh_len - len;
1178 zero_pad((UINT8 *)msg + len, extra_zeroes_needed);
1179 nh(&ahc->hash, (UINT8 *)msg, nh_len, len, nh_result);
1180 poly_hash(ahc,(UINT32 *)nh_result);
1181 }
1182
1183 ip_long(ahc, res);
1184 }
1185
1186 uhash_reset(ahc);
1187 return 1;
1188 }
1189 #endif
1190
1191 /* ---------------------------------------------------------------------- */
1192 /* ---------------------------------------------------------------------- */
1193 /* ----- Begin UMAC Section --------------------------------------------- */
1194 /* ---------------------------------------------------------------------- */
1195 /* ---------------------------------------------------------------------- */
1196
1197 /* The UMAC interface has two interfaces, an all-at-once interface where
1198 * the entire message to be authenticated is passed to UMAC in one buffer,
1199 * and a sequential interface where the message is presented a little at a
1200 * time. The all-at-once is more optimaized than the sequential version and
1201 * should be preferred when the sequential interface is not required.
1202 */
1203 struct umac_ctx {
1204 uhash_ctx hash; /* Hash function for message compression */
1205 pdf_ctx pdf; /* PDF for hashed output */
1206 void *free_ptr; /* Address to free this struct via */
1207 } umac_ctx;
1208
1209 /* ---------------------------------------------------------------------- */
1210
1211 #if 0
1212 int umac_reset(struct umac_ctx *ctx)
1213 /* Reset the hash function to begin a new authentication. */
1214 {
1215 uhash_reset(&ctx->hash);
1216 return (1);
1217 }
1218 #endif
1219
1220 /* ---------------------------------------------------------------------- */
1221
1222 int umac_delete(struct umac_ctx *ctx)
1223 /* Deallocate the ctx structure */
1224 {
1225 if (ctx) {
1226 if (ALLOC_BOUNDARY)
1227 ctx = (struct umac_ctx *)ctx->free_ptr;
1228 explicit_bzero(ctx, sizeof(*ctx) + ALLOC_BOUNDARY);
1229 free(ctx);
1230 }
1231 return (1);
1232 }
1233
1234 /* ---------------------------------------------------------------------- */
1235
1236 struct umac_ctx *umac_new(const u_char key[])
1237 /* Dynamically allocate a umac_ctx struct, initialize variables,
1238 * generate subkeys from key. Align to 16-byte boundary.
1239 */
1240 {
1241 struct umac_ctx *ctx, *octx;
1242 size_t bytes_to_add;
1243 aes_int_key prf_key;
1244
1245 octx = ctx = xcalloc(1, sizeof(*ctx) + ALLOC_BOUNDARY);
1246 if (ctx) {
1247 if (ALLOC_BOUNDARY) {
1248 bytes_to_add = ALLOC_BOUNDARY -
1249 ((ptrdiff_t)ctx & (ALLOC_BOUNDARY - 1));
1250 ctx = (struct umac_ctx *)((u_char *)ctx + bytes_to_add);
1251 }
1252 ctx->free_ptr = octx;
1253 aes_key_setup(key, prf_key);
1254 pdf_init(&ctx->pdf, prf_key);
1255 uhash_init(&ctx->hash, prf_key);
1256 explicit_bzero(prf_key, sizeof(prf_key));
1257 }
1258
1259 return (ctx);
1260 }
1261
1262 /* ---------------------------------------------------------------------- */
1263
1264 int umac_final(struct umac_ctx *ctx, u_char tag[], const u_char nonce[8])
1265 /* Incorporate any pending data, pad, and generate tag */
1266 {
1267 uhash_final(&ctx->hash, (u_char *)tag);
1268 pdf_gen_xor(&ctx->pdf, (const UINT8 *)nonce, (UINT8 *)tag);
1269
1270 return (1);
1271 }
1272
1273 /* ---------------------------------------------------------------------- */
1274
1275 int umac_update(struct umac_ctx *ctx, const u_char *input, long len)
1276 /* Given len bytes of data, we parse it into L1_KEY_LEN chunks and */
1277 /* hash each one, calling the PDF on the hashed output whenever the hash- */
1278 /* output buffer is full. */
1279 {
1280 uhash_update(&ctx->hash, input, len);
1281 return (1);
1282 }
1283
1284 /* ---------------------------------------------------------------------- */
1285
1286 #if 0
1287 int umac(struct umac_ctx *ctx, u_char *input,
1288 long len, u_char tag[],
1289 u_char nonce[8])
1290 /* All-in-one version simply calls umac_update() and umac_final(). */
1291 {
1292 uhash(&ctx->hash, input, len, (u_char *)tag);
1293 pdf_gen_xor(&ctx->pdf, (UINT8 *)nonce, (UINT8 *)tag);
1294
1295 return (1);
1296 }
1297 #endif
1298
1299 /* ---------------------------------------------------------------------- */
1300 /* ---------------------------------------------------------------------- */
1301 /* ----- End UMAC Section ----------------------------------------------- */
1302 /* ---------------------------------------------------------------------- */
1303 /* ---------------------------------------------------------------------- */
1304