e_padlock.c revision 1.1.1.1 1 1.1 christos /*-
2 1.1 christos * Support for VIA PadLock Advanced Cryptography Engine (ACE)
3 1.1 christos * Written by Michal Ludvig <michal (at) logix.cz>
4 1.1 christos * http://www.logix.cz/michal
5 1.1 christos *
6 1.1 christos * Big thanks to Andy Polyakov for a help with optimization,
7 1.1 christos * assembler fixes, port to MS Windows and a lot of other
8 1.1 christos * valuable work on this engine!
9 1.1 christos */
10 1.1 christos
11 1.1 christos /* ====================================================================
12 1.1 christos * Copyright (c) 1999-2001 The OpenSSL Project. All rights reserved.
13 1.1 christos *
14 1.1 christos * Redistribution and use in source and binary forms, with or without
15 1.1 christos * modification, are permitted provided that the following conditions
16 1.1 christos * are met:
17 1.1 christos *
18 1.1 christos * 1. Redistributions of source code must retain the above copyright
19 1.1 christos * notice, this list of conditions and the following disclaimer.
20 1.1 christos *
21 1.1 christos * 2. Redistributions in binary form must reproduce the above copyright
22 1.1 christos * notice, this list of conditions and the following disclaimer in
23 1.1 christos * the documentation and/or other materials provided with the
24 1.1 christos * distribution.
25 1.1 christos *
26 1.1 christos * 3. All advertising materials mentioning features or use of this
27 1.1 christos * software must display the following acknowledgment:
28 1.1 christos * "This product includes software developed by the OpenSSL Project
29 1.1 christos * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
30 1.1 christos *
31 1.1 christos * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
32 1.1 christos * endorse or promote products derived from this software without
33 1.1 christos * prior written permission. For written permission, please contact
34 1.1 christos * licensing (at) OpenSSL.org.
35 1.1 christos *
36 1.1 christos * 5. Products derived from this software may not be called "OpenSSL"
37 1.1 christos * nor may "OpenSSL" appear in their names without prior written
38 1.1 christos * permission of the OpenSSL Project.
39 1.1 christos *
40 1.1 christos * 6. Redistributions of any form whatsoever must retain the following
41 1.1 christos * acknowledgment:
42 1.1 christos * "This product includes software developed by the OpenSSL Project
43 1.1 christos * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
44 1.1 christos *
45 1.1 christos * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
46 1.1 christos * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47 1.1 christos * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
48 1.1 christos * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
49 1.1 christos * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
50 1.1 christos * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
51 1.1 christos * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
52 1.1 christos * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
53 1.1 christos * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
54 1.1 christos * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
55 1.1 christos * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
56 1.1 christos * OF THE POSSIBILITY OF SUCH DAMAGE.
57 1.1 christos * ====================================================================
58 1.1 christos *
59 1.1 christos * This product includes cryptographic software written by Eric Young
60 1.1 christos * (eay (at) cryptsoft.com). This product includes software written by Tim
61 1.1 christos * Hudson (tjh (at) cryptsoft.com).
62 1.1 christos *
63 1.1 christos */
64 1.1 christos
65 1.1 christos #include <stdio.h>
66 1.1 christos #include <string.h>
67 1.1 christos
68 1.1 christos #include <openssl/opensslconf.h>
69 1.1 christos #include <openssl/crypto.h>
70 1.1 christos #include <openssl/dso.h>
71 1.1 christos #include <openssl/engine.h>
72 1.1 christos #include <openssl/evp.h>
73 1.1 christos #ifndef OPENSSL_NO_AES
74 1.1 christos # include <openssl/aes.h>
75 1.1 christos #endif
76 1.1 christos #include <openssl/rand.h>
77 1.1 christos #include <openssl/err.h>
78 1.1 christos
79 1.1 christos #ifndef OPENSSL_NO_HW
80 1.1 christos # ifndef OPENSSL_NO_HW_PADLOCK
81 1.1 christos
82 1.1 christos /* Attempt to have a single source for both 0.9.7 and 0.9.8 :-) */
83 1.1 christos # if (OPENSSL_VERSION_NUMBER >= 0x00908000L)
84 1.1 christos # ifndef OPENSSL_NO_DYNAMIC_ENGINE
85 1.1 christos # define DYNAMIC_ENGINE
86 1.1 christos # endif
87 1.1 christos # elif (OPENSSL_VERSION_NUMBER >= 0x00907000L)
88 1.1 christos # ifdef ENGINE_DYNAMIC_SUPPORT
89 1.1 christos # define DYNAMIC_ENGINE
90 1.1 christos # endif
91 1.1 christos # else
92 1.1 christos # error "Only OpenSSL >= 0.9.7 is supported"
93 1.1 christos # endif
94 1.1 christos
95 1.1 christos /*
96 1.1 christos * VIA PadLock AES is available *ONLY* on some x86 CPUs. Not only that it
97 1.1 christos * doesn't exist elsewhere, but it even can't be compiled on other platforms!
98 1.1 christos *
99 1.1 christos * In addition, because of the heavy use of inline assembler, compiler choice
100 1.1 christos * is limited to GCC and Microsoft C.
101 1.1 christos */
102 1.1 christos # undef COMPILE_HW_PADLOCK
103 1.1 christos # if !defined(I386_ONLY) && !defined(OPENSSL_NO_INLINE_ASM)
104 1.1 christos # if (defined(__GNUC__) && (defined(__i386__) || defined(__i386))) || \
105 1.1 christos (defined(_MSC_VER) && defined(_M_IX86))
106 1.1 christos # define COMPILE_HW_PADLOCK
107 1.1 christos # endif
108 1.1 christos # endif
109 1.1 christos
110 1.1 christos # ifdef OPENSSL_NO_DYNAMIC_ENGINE
111 1.1 christos # ifdef COMPILE_HW_PADLOCK
112 1.1 christos static ENGINE *ENGINE_padlock(void);
113 1.1 christos # endif
114 1.1 christos
115 1.1 christos void ENGINE_load_padlock(void)
116 1.1 christos {
117 1.1 christos /* On non-x86 CPUs it just returns. */
118 1.1 christos # ifdef COMPILE_HW_PADLOCK
119 1.1 christos ENGINE *toadd = ENGINE_padlock();
120 1.1 christos if (!toadd)
121 1.1 christos return;
122 1.1 christos ENGINE_add(toadd);
123 1.1 christos ENGINE_free(toadd);
124 1.1 christos ERR_clear_error();
125 1.1 christos # endif
126 1.1 christos }
127 1.1 christos
128 1.1 christos # endif
129 1.1 christos
130 1.1 christos # ifdef COMPILE_HW_PADLOCK
131 1.1 christos /*
132 1.1 christos * We do these includes here to avoid header problems on platforms that do
133 1.1 christos * not have the VIA padlock anyway...
134 1.1 christos */
135 1.1 christos # include <stdlib.h>
136 1.1 christos # ifdef _WIN32
137 1.1 christos # include <malloc.h>
138 1.1 christos # ifndef alloca
139 1.1 christos # define alloca _alloca
140 1.1 christos # endif
141 1.1 christos # elif defined(__GNUC__)
142 1.1 christos # ifndef alloca
143 1.1 christos # define alloca(s) __builtin_alloca(s)
144 1.1 christos # endif
145 1.1 christos # endif
146 1.1 christos
147 1.1 christos /* Function for ENGINE detection and control */
148 1.1 christos static int padlock_available(void);
149 1.1 christos static int padlock_init(ENGINE *e);
150 1.1 christos
151 1.1 christos /* RNG Stuff */
152 1.1 christos static RAND_METHOD padlock_rand;
153 1.1 christos
154 1.1 christos /* Cipher Stuff */
155 1.1 christos # ifndef OPENSSL_NO_AES
156 1.1 christos static int padlock_ciphers(ENGINE *e, const EVP_CIPHER **cipher,
157 1.1 christos const int **nids, int nid);
158 1.1 christos # endif
159 1.1 christos
160 1.1 christos /* Engine names */
161 1.1 christos static const char *padlock_id = "padlock";
162 1.1 christos static char padlock_name[100];
163 1.1 christos
164 1.1 christos /* Available features */
165 1.1 christos static int padlock_use_ace = 0; /* Advanced Cryptography Engine */
166 1.1 christos static int padlock_use_rng = 0; /* Random Number Generator */
167 1.1 christos # ifndef OPENSSL_NO_AES
168 1.1 christos static int padlock_aes_align_required = 1;
169 1.1 christos # endif
170 1.1 christos
171 1.1 christos /* ===== Engine "management" functions ===== */
172 1.1 christos
173 1.1 christos /* Prepare the ENGINE structure for registration */
174 1.1 christos static int padlock_bind_helper(ENGINE *e)
175 1.1 christos {
176 1.1 christos /* Check available features */
177 1.1 christos padlock_available();
178 1.1 christos
179 1.1 christos # if 1 /* disable RNG for now, see commentary in
180 1.1 christos * vicinity of RNG code */
181 1.1 christos padlock_use_rng = 0;
182 1.1 christos # endif
183 1.1 christos
184 1.1 christos /* Generate a nice engine name with available features */
185 1.1 christos BIO_snprintf(padlock_name, sizeof(padlock_name),
186 1.1 christos "VIA PadLock (%s, %s)",
187 1.1 christos padlock_use_rng ? "RNG" : "no-RNG",
188 1.1 christos padlock_use_ace ? "ACE" : "no-ACE");
189 1.1 christos
190 1.1 christos /* Register everything or return with an error */
191 1.1 christos if (!ENGINE_set_id(e, padlock_id) ||
192 1.1 christos !ENGINE_set_name(e, padlock_name) ||
193 1.1 christos !ENGINE_set_init_function(e, padlock_init) ||
194 1.1 christos # ifndef OPENSSL_NO_AES
195 1.1 christos (padlock_use_ace && !ENGINE_set_ciphers(e, padlock_ciphers)) ||
196 1.1 christos # endif
197 1.1 christos (padlock_use_rng && !ENGINE_set_RAND(e, &padlock_rand))) {
198 1.1 christos return 0;
199 1.1 christos }
200 1.1 christos
201 1.1 christos /* Everything looks good */
202 1.1 christos return 1;
203 1.1 christos }
204 1.1 christos
205 1.1 christos # ifdef OPENSSL_NO_DYNAMIC_ENGINE
206 1.1 christos
207 1.1 christos /* Constructor */
208 1.1 christos static ENGINE *ENGINE_padlock(void)
209 1.1 christos {
210 1.1 christos ENGINE *eng = ENGINE_new();
211 1.1 christos
212 1.1 christos if (!eng) {
213 1.1 christos return NULL;
214 1.1 christos }
215 1.1 christos
216 1.1 christos if (!padlock_bind_helper(eng)) {
217 1.1 christos ENGINE_free(eng);
218 1.1 christos return NULL;
219 1.1 christos }
220 1.1 christos
221 1.1 christos return eng;
222 1.1 christos }
223 1.1 christos
224 1.1 christos # endif
225 1.1 christos
226 1.1 christos /* Check availability of the engine */
227 1.1 christos static int padlock_init(ENGINE *e)
228 1.1 christos {
229 1.1 christos return (padlock_use_rng || padlock_use_ace);
230 1.1 christos }
231 1.1 christos
232 1.1 christos /*
233 1.1 christos * This stuff is needed if this ENGINE is being compiled into a
234 1.1 christos * self-contained shared-library.
235 1.1 christos */
236 1.1 christos # ifdef DYNAMIC_ENGINE
237 1.1 christos static int padlock_bind_fn(ENGINE *e, const char *id)
238 1.1 christos {
239 1.1 christos if (id && (strcmp(id, padlock_id) != 0)) {
240 1.1 christos return 0;
241 1.1 christos }
242 1.1 christos
243 1.1 christos if (!padlock_bind_helper(e)) {
244 1.1 christos return 0;
245 1.1 christos }
246 1.1 christos
247 1.1 christos return 1;
248 1.1 christos }
249 1.1 christos
250 1.1 christos IMPLEMENT_DYNAMIC_CHECK_FN()
251 1.1 christos IMPLEMENT_DYNAMIC_BIND_FN(padlock_bind_fn)
252 1.1 christos # endif /* DYNAMIC_ENGINE */
253 1.1 christos /* ===== Here comes the "real" engine ===== */
254 1.1 christos # ifndef OPENSSL_NO_AES
255 1.1 christos /* Some AES-related constants */
256 1.1 christos # define AES_BLOCK_SIZE 16
257 1.1 christos # define AES_KEY_SIZE_128 16
258 1.1 christos # define AES_KEY_SIZE_192 24
259 1.1 christos # define AES_KEY_SIZE_256 32
260 1.1 christos /*
261 1.1 christos * Here we store the status information relevant to the current context.
262 1.1 christos */
263 1.1 christos /*
264 1.1 christos * BIG FAT WARNING: Inline assembler in PADLOCK_XCRYPT_ASM() depends on
265 1.1 christos * the order of items in this structure. Don't blindly modify, reorder,
266 1.1 christos * etc!
267 1.1 christos */
268 1.1 christos struct padlock_cipher_data {
269 1.1 christos unsigned char iv[AES_BLOCK_SIZE]; /* Initialization vector */
270 1.1 christos union {
271 1.1 christos unsigned int pad[4];
272 1.1 christos struct {
273 1.1 christos int rounds:4;
274 1.1 christos int dgst:1; /* n/a in C3 */
275 1.1 christos int align:1; /* n/a in C3 */
276 1.1 christos int ciphr:1; /* n/a in C3 */
277 1.1 christos unsigned int keygen:1;
278 1.1 christos int interm:1;
279 1.1 christos unsigned int encdec:1;
280 1.1 christos int ksize:2;
281 1.1 christos } b;
282 1.1 christos } cword; /* Control word */
283 1.1 christos AES_KEY ks; /* Encryption key */
284 1.1 christos };
285 1.1 christos
286 1.1 christos /*
287 1.1 christos * Essentially this variable belongs in thread local storage.
288 1.1 christos * Having this variable global on the other hand can only cause
289 1.1 christos * few bogus key reloads [if any at all on single-CPU system],
290 1.1 christos * so we accept the penatly...
291 1.1 christos */
292 1.1 christos static volatile struct padlock_cipher_data *padlock_saved_context;
293 1.1 christos # endif
294 1.1 christos
295 1.1 christos /*-
296 1.1 christos * =======================================================
297 1.1 christos * Inline assembler section(s).
298 1.1 christos * =======================================================
299 1.1 christos * Order of arguments is chosen to facilitate Windows port
300 1.1 christos * using __fastcall calling convention. If you wish to add
301 1.1 christos * more routines, keep in mind that first __fastcall
302 1.1 christos * argument is passed in %ecx and second - in %edx.
303 1.1 christos * =======================================================
304 1.1 christos */
305 1.1 christos # if defined(__GNUC__) && __GNUC__>=2
306 1.1 christos /*
307 1.1 christos * As for excessive "push %ebx"/"pop %ebx" found all over.
308 1.1 christos * When generating position-independent code GCC won't let
309 1.1 christos * us use "b" in assembler templates nor even respect "ebx"
310 1.1 christos * in "clobber description." Therefore the trouble...
311 1.1 christos */
312 1.1 christos
313 1.1 christos /*
314 1.1 christos * Helper function - check if a CPUID instruction is available on this CPU
315 1.1 christos */
316 1.1 christos static int padlock_insn_cpuid_available(void)
317 1.1 christos {
318 1.1 christos int result = -1;
319 1.1 christos
320 1.1 christos /*
321 1.1 christos * We're checking if the bit #21 of EFLAGS can be toggled. If yes =
322 1.1 christos * CPUID is available.
323 1.1 christos */
324 1.1 christos asm volatile ("pushf\n"
325 1.1 christos "popl %%eax\n"
326 1.1 christos "xorl $0x200000, %%eax\n"
327 1.1 christos "movl %%eax, %%ecx\n"
328 1.1 christos "andl $0x200000, %%ecx\n"
329 1.1 christos "pushl %%eax\n"
330 1.1 christos "popf\n"
331 1.1 christos "pushf\n"
332 1.1 christos "popl %%eax\n"
333 1.1 christos "andl $0x200000, %%eax\n"
334 1.1 christos "xorl %%eax, %%ecx\n"
335 1.1 christos "movl %%ecx, %0\n":"=r" (result)::"eax", "ecx");
336 1.1 christos
337 1.1 christos return (result == 0);
338 1.1 christos }
339 1.1 christos
340 1.1 christos /*
341 1.1 christos * Load supported features of the CPU to see if the PadLock is available.
342 1.1 christos */
343 1.1 christos static int padlock_available(void)
344 1.1 christos {
345 1.1 christos char vendor_string[16];
346 1.1 christos unsigned int eax, edx;
347 1.1 christos
348 1.1 christos /* First check if the CPUID instruction is available at all... */
349 1.1 christos if (!padlock_insn_cpuid_available())
350 1.1 christos return 0;
351 1.1 christos
352 1.1 christos /* Are we running on the Centaur (VIA) CPU? */
353 1.1 christos eax = 0x00000000;
354 1.1 christos vendor_string[12] = 0;
355 1.1 christos asm volatile ("pushl %%ebx\n"
356 1.1 christos "cpuid\n"
357 1.1 christos "movl %%ebx,(%%edi)\n"
358 1.1 christos "movl %%edx,4(%%edi)\n"
359 1.1 christos "movl %%ecx,8(%%edi)\n"
360 1.1 christos "popl %%ebx":"+a" (eax):"D"(vendor_string):"ecx", "edx");
361 1.1 christos if (strcmp(vendor_string, "CentaurHauls") != 0)
362 1.1 christos return 0;
363 1.1 christos
364 1.1 christos /* Check for Centaur Extended Feature Flags presence */
365 1.1 christos eax = 0xC0000000;
366 1.1 christos asm volatile ("pushl %%ebx; cpuid; popl %%ebx":"+a" (eax)::"ecx", "edx");
367 1.1 christos if (eax < 0xC0000001)
368 1.1 christos return 0;
369 1.1 christos
370 1.1 christos /* Read the Centaur Extended Feature Flags */
371 1.1 christos eax = 0xC0000001;
372 1.1 christos asm volatile ("pushl %%ebx; cpuid; popl %%ebx":"+a" (eax),
373 1.1 christos "=d"(edx)::"ecx");
374 1.1 christos
375 1.1 christos /* Fill up some flags */
376 1.1 christos padlock_use_ace = ((edx & (0x3 << 6)) == (0x3 << 6));
377 1.1 christos padlock_use_rng = ((edx & (0x3 << 2)) == (0x3 << 2));
378 1.1 christos
379 1.1 christos return padlock_use_ace + padlock_use_rng;
380 1.1 christos }
381 1.1 christos
382 1.1 christos # ifndef OPENSSL_NO_AES
383 1.1 christos # ifndef AES_ASM
384 1.1 christos /* Our own htonl()/ntohl() */
385 1.1 christos static inline void padlock_bswapl(AES_KEY *ks)
386 1.1 christos {
387 1.1 christos size_t i = sizeof(ks->rd_key) / sizeof(ks->rd_key[0]);
388 1.1 christos unsigned int *key = ks->rd_key;
389 1.1 christos
390 1.1 christos while (i--) {
391 1.1 christos asm volatile ("bswapl %0":"+r" (*key));
392 1.1 christos key++;
393 1.1 christos }
394 1.1 christos }
395 1.1 christos # endif
396 1.1 christos # endif
397 1.1 christos
398 1.1 christos /*
399 1.1 christos * Force key reload from memory to the CPU microcode. Loading EFLAGS from the
400 1.1 christos * stack clears EFLAGS[30] which does the trick.
401 1.1 christos */
402 1.1 christos static inline void padlock_reload_key(void)
403 1.1 christos {
404 1.1 christos asm volatile ("pushfl; popfl");
405 1.1 christos }
406 1.1 christos
407 1.1 christos # ifndef OPENSSL_NO_AES
408 1.1 christos /*
409 1.1 christos * This is heuristic key context tracing. At first one
410 1.1 christos * believes that one should use atomic swap instructions,
411 1.1 christos * but it's not actually necessary. Point is that if
412 1.1 christos * padlock_saved_context was changed by another thread
413 1.1 christos * after we've read it and before we compare it with cdata,
414 1.1 christos * our key *shall* be reloaded upon thread context switch
415 1.1 christos * and we are therefore set in either case...
416 1.1 christos */
417 1.1 christos static inline void padlock_verify_context(struct padlock_cipher_data *cdata)
418 1.1 christos {
419 1.1 christos asm volatile ("pushfl\n"
420 1.1 christos " btl $30,(%%esp)\n"
421 1.1 christos " jnc 1f\n"
422 1.1 christos " cmpl %2,%1\n"
423 1.1 christos " je 1f\n"
424 1.1 christos " popfl\n"
425 1.1 christos " subl $4,%%esp\n"
426 1.1 christos "1: addl $4,%%esp\n"
427 1.1 christos " movl %2,%0":"+m" (padlock_saved_context)
428 1.1 christos :"r"(padlock_saved_context), "r"(cdata):"cc");
429 1.1 christos }
430 1.1 christos
431 1.1 christos /* Template for padlock_xcrypt_* modes */
432 1.1 christos /*
433 1.1 christos * BIG FAT WARNING: The offsets used with 'leal' instructions describe items
434 1.1 christos * of the 'padlock_cipher_data' structure.
435 1.1 christos */
436 1.1 christos # define PADLOCK_XCRYPT_ASM(name,rep_xcrypt) \
437 1.1 christos static inline void *name(size_t cnt, \
438 1.1 christos struct padlock_cipher_data *cdata, \
439 1.1 christos void *out, const void *inp) \
440 1.1 christos { void *iv; \
441 1.1 christos asm volatile ( "pushl %%ebx\n" \
442 1.1 christos " leal 16(%0),%%edx\n" \
443 1.1 christos " leal 32(%0),%%ebx\n" \
444 1.1 christos rep_xcrypt "\n" \
445 1.1 christos " popl %%ebx" \
446 1.1 christos : "=a"(iv), "=c"(cnt), "=D"(out), "=S"(inp) \
447 1.1 christos : "0"(cdata), "1"(cnt), "2"(out), "3"(inp) \
448 1.1 christos : "edx", "cc", "memory"); \
449 1.1 christos return iv; \
450 1.1 christos }
451 1.1 christos
452 1.1 christos /* Generate all functions with appropriate opcodes */
453 1.1 christos /* rep xcryptecb */
454 1.1 christos PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb, ".byte 0xf3,0x0f,0xa7,0xc8")
455 1.1 christos /* rep xcryptcbc */
456 1.1 christos PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc, ".byte 0xf3,0x0f,0xa7,0xd0")
457 1.1 christos /* rep xcryptcfb */
458 1.1 christos PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb, ".byte 0xf3,0x0f,0xa7,0xe0")
459 1.1 christos /* rep xcryptofb */
460 1.1 christos PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb, ".byte 0xf3,0x0f,0xa7,0xe8")
461 1.1 christos # endif
462 1.1 christos /* The RNG call itself */
463 1.1 christos static inline unsigned int padlock_xstore(void *addr, unsigned int edx_in)
464 1.1 christos {
465 1.1 christos unsigned int eax_out;
466 1.1 christos
467 1.1 christos asm volatile (".byte 0x0f,0xa7,0xc0" /* xstore */
468 1.1 christos :"=a" (eax_out), "=m"(*(unsigned *)addr)
469 1.1 christos :"D"(addr), "d"(edx_in)
470 1.1 christos );
471 1.1 christos
472 1.1 christos return eax_out;
473 1.1 christos }
474 1.1 christos
475 1.1 christos /*
476 1.1 christos * Why not inline 'rep movsd'? I failed to find information on what value in
477 1.1 christos * Direction Flag one can expect and consequently have to apply
478 1.1 christos * "better-safe-than-sorry" approach and assume "undefined." I could
479 1.1 christos * explicitly clear it and restore the original value upon return from
480 1.1 christos * padlock_aes_cipher, but it's presumably too much trouble for too little
481 1.1 christos * gain... In case you wonder 'rep xcrypt*' instructions above are *not*
482 1.1 christos * affected by the Direction Flag and pointers advance toward larger
483 1.1 christos * addresses unconditionally.
484 1.1 christos */
485 1.1 christos static inline unsigned char *padlock_memcpy(void *dst, const void *src,
486 1.1 christos size_t n)
487 1.1 christos {
488 1.1 christos long *d = dst;
489 1.1 christos const long *s = src;
490 1.1 christos
491 1.1 christos n /= sizeof(*d);
492 1.1 christos do {
493 1.1 christos *d++ = *s++;
494 1.1 christos } while (--n);
495 1.1 christos
496 1.1 christos return dst;
497 1.1 christos }
498 1.1 christos
499 1.1 christos # elif defined(_MSC_VER)
500 1.1 christos /*
501 1.1 christos * Unlike GCC these are real functions. In order to minimize impact
502 1.1 christos * on performance we adhere to __fastcall calling convention in
503 1.1 christos * order to get two first arguments passed through %ecx and %edx.
504 1.1 christos * Which kind of suits very well, as instructions in question use
505 1.1 christos * both %ecx and %edx as input:-)
506 1.1 christos */
507 1.1 christos # define REP_XCRYPT(code) \
508 1.1 christos _asm _emit 0xf3 \
509 1.1 christos _asm _emit 0x0f _asm _emit 0xa7 \
510 1.1 christos _asm _emit code
511 1.1 christos
512 1.1 christos /*
513 1.1 christos * BIG FAT WARNING: The offsets used with 'lea' instructions describe items
514 1.1 christos * of the 'padlock_cipher_data' structure.
515 1.1 christos */
516 1.1 christos # define PADLOCK_XCRYPT_ASM(name,code) \
517 1.1 christos static void * __fastcall \
518 1.1 christos name (size_t cnt, void *cdata, \
519 1.1 christos void *outp, const void *inp) \
520 1.1 christos { _asm mov eax,edx \
521 1.1 christos _asm lea edx,[eax+16] \
522 1.1 christos _asm lea ebx,[eax+32] \
523 1.1 christos _asm mov edi,outp \
524 1.1 christos _asm mov esi,inp \
525 1.1 christos REP_XCRYPT(code) \
526 1.1 christos }
527 1.1 christos
528 1.1 christos PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb,0xc8)
529 1.1 christos PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc,0xd0)
530 1.1 christos PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb,0xe0)
531 1.1 christos PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb,0xe8)
532 1.1 christos
533 1.1 christos static int __fastcall padlock_xstore(void *outp, unsigned int code)
534 1.1 christos {
535 1.1 christos _asm mov edi,ecx
536 1.1 christos _asm _emit 0x0f _asm _emit 0xa7 _asm _emit 0xc0
537 1.1 christos }
538 1.1 christos
539 1.1 christos static void __fastcall padlock_reload_key(void)
540 1.1 christos {
541 1.1 christos _asm pushfd
542 1.1 christos _asm popfd
543 1.1 christos }
544 1.1 christos
545 1.1 christos static void __fastcall padlock_verify_context(void *cdata)
546 1.1 christos {
547 1.1 christos _asm {
548 1.1 christos pushfd
549 1.1 christos bt DWORD PTR[esp],30
550 1.1 christos jnc skip
551 1.1 christos cmp ecx,padlock_saved_context
552 1.1 christos je skip
553 1.1 christos popfd
554 1.1 christos sub esp,4
555 1.1 christos skip: add esp,4
556 1.1 christos mov padlock_saved_context,ecx
557 1.1 christos }
558 1.1 christos }
559 1.1 christos
560 1.1 christos static int
561 1.1 christos padlock_available(void)
562 1.1 christos {
563 1.1 christos _asm {
564 1.1 christos pushfd
565 1.1 christos pop eax
566 1.1 christos mov ecx,eax
567 1.1 christos xor eax,1<<21
568 1.1 christos push eax
569 1.1 christos popfd
570 1.1 christos pushfd
571 1.1 christos pop eax
572 1.1 christos xor eax,ecx
573 1.1 christos bt eax,21
574 1.1 christos jnc noluck
575 1.1 christos mov eax,0
576 1.1 christos cpuid
577 1.1 christos xor eax,eax
578 1.1 christos cmp ebx,'tneC'
579 1.1 christos jne noluck
580 1.1 christos cmp edx,'Hrua'
581 1.1 christos jne noluck
582 1.1 christos cmp ecx,'slua'
583 1.1 christos jne noluck
584 1.1 christos mov eax,0xC0000000
585 1.1 christos cpuid
586 1.1 christos mov edx,eax
587 1.1 christos xor eax,eax
588 1.1 christos cmp edx,0xC0000001
589 1.1 christos jb noluck
590 1.1 christos mov eax,0xC0000001
591 1.1 christos cpuid
592 1.1 christos xor eax,eax
593 1.1 christos bt edx,6
594 1.1 christos jnc skip_a
595 1.1 christos bt edx,7
596 1.1 christos jnc skip_a
597 1.1 christos mov padlock_use_ace,1
598 1.1 christos inc eax
599 1.1 christos skip_a: bt edx,2
600 1.1 christos jnc skip_r
601 1.1 christos bt edx,3
602 1.1 christos jnc skip_r
603 1.1 christos mov padlock_use_rng,1
604 1.1 christos inc eax
605 1.1 christos skip_r:
606 1.1 christos noluck:
607 1.1 christos }
608 1.1 christos }
609 1.1 christos
610 1.1 christos static void __fastcall padlock_bswapl(void *key)
611 1.1 christos {
612 1.1 christos _asm {
613 1.1 christos pushfd
614 1.1 christos cld
615 1.1 christos mov esi,ecx
616 1.1 christos mov edi,ecx
617 1.1 christos mov ecx,60
618 1.1 christos up: lodsd
619 1.1 christos bswap eax
620 1.1 christos stosd
621 1.1 christos loop up
622 1.1 christos popfd
623 1.1 christos }
624 1.1 christos }
625 1.1 christos
626 1.1 christos /*
627 1.1 christos * MS actually specifies status of Direction Flag and compiler even manages
628 1.1 christos * to compile following as 'rep movsd' all by itself...
629 1.1 christos */
630 1.1 christos # define padlock_memcpy(o,i,n) ((unsigned char *)memcpy((o),(i),(n)&~3U))
631 1.1 christos # endif
632 1.1 christos /* ===== AES encryption/decryption ===== */
633 1.1 christos # ifndef OPENSSL_NO_AES
634 1.1 christos # if defined(NID_aes_128_cfb128) && ! defined (NID_aes_128_cfb)
635 1.1 christos # define NID_aes_128_cfb NID_aes_128_cfb128
636 1.1 christos # endif
637 1.1 christos # if defined(NID_aes_128_ofb128) && ! defined (NID_aes_128_ofb)
638 1.1 christos # define NID_aes_128_ofb NID_aes_128_ofb128
639 1.1 christos # endif
640 1.1 christos # if defined(NID_aes_192_cfb128) && ! defined (NID_aes_192_cfb)
641 1.1 christos # define NID_aes_192_cfb NID_aes_192_cfb128
642 1.1 christos # endif
643 1.1 christos # if defined(NID_aes_192_ofb128) && ! defined (NID_aes_192_ofb)
644 1.1 christos # define NID_aes_192_ofb NID_aes_192_ofb128
645 1.1 christos # endif
646 1.1 christos # if defined(NID_aes_256_cfb128) && ! defined (NID_aes_256_cfb)
647 1.1 christos # define NID_aes_256_cfb NID_aes_256_cfb128
648 1.1 christos # endif
649 1.1 christos # if defined(NID_aes_256_ofb128) && ! defined (NID_aes_256_ofb)
650 1.1 christos # define NID_aes_256_ofb NID_aes_256_ofb128
651 1.1 christos # endif
652 1.1 christos /*
653 1.1 christos * List of supported ciphers.
654 1.1 christos */ static int padlock_cipher_nids[] = {
655 1.1 christos NID_aes_128_ecb,
656 1.1 christos NID_aes_128_cbc,
657 1.1 christos NID_aes_128_cfb,
658 1.1 christos NID_aes_128_ofb,
659 1.1 christos
660 1.1 christos NID_aes_192_ecb,
661 1.1 christos NID_aes_192_cbc,
662 1.1 christos NID_aes_192_cfb,
663 1.1 christos NID_aes_192_ofb,
664 1.1 christos
665 1.1 christos NID_aes_256_ecb,
666 1.1 christos NID_aes_256_cbc,
667 1.1 christos NID_aes_256_cfb,
668 1.1 christos NID_aes_256_ofb,
669 1.1 christos };
670 1.1 christos
671 1.1 christos static int padlock_cipher_nids_num = (sizeof(padlock_cipher_nids) /
672 1.1 christos sizeof(padlock_cipher_nids[0]));
673 1.1 christos
674 1.1 christos /* Function prototypes ... */
675 1.1 christos static int padlock_aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
676 1.1 christos const unsigned char *iv, int enc);
677 1.1 christos static int padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
678 1.1 christos const unsigned char *in, size_t nbytes);
679 1.1 christos
680 1.1 christos # define NEAREST_ALIGNED(ptr) ( (unsigned char *)(ptr) + \
681 1.1 christos ( (0x10 - ((size_t)(ptr) & 0x0F)) & 0x0F ) )
682 1.1 christos # define ALIGNED_CIPHER_DATA(ctx) ((struct padlock_cipher_data *)\
683 1.1 christos NEAREST_ALIGNED(ctx->cipher_data))
684 1.1 christos
685 1.1 christos # define EVP_CIPHER_block_size_ECB AES_BLOCK_SIZE
686 1.1 christos # define EVP_CIPHER_block_size_CBC AES_BLOCK_SIZE
687 1.1 christos # define EVP_CIPHER_block_size_OFB 1
688 1.1 christos # define EVP_CIPHER_block_size_CFB 1
689 1.1 christos
690 1.1 christos /*
691 1.1 christos * Declaring so many ciphers by hand would be a pain. Instead introduce a bit
692 1.1 christos * of preprocessor magic :-)
693 1.1 christos */
694 1.1 christos # define DECLARE_AES_EVP(ksize,lmode,umode) \
695 1.1 christos static const EVP_CIPHER padlock_aes_##ksize##_##lmode = { \
696 1.1 christos NID_aes_##ksize##_##lmode, \
697 1.1 christos EVP_CIPHER_block_size_##umode, \
698 1.1 christos AES_KEY_SIZE_##ksize, \
699 1.1 christos AES_BLOCK_SIZE, \
700 1.1 christos 0 | EVP_CIPH_##umode##_MODE, \
701 1.1 christos padlock_aes_init_key, \
702 1.1 christos padlock_aes_cipher, \
703 1.1 christos NULL, \
704 1.1 christos sizeof(struct padlock_cipher_data) + 16, \
705 1.1 christos EVP_CIPHER_set_asn1_iv, \
706 1.1 christos EVP_CIPHER_get_asn1_iv, \
707 1.1 christos NULL, \
708 1.1 christos NULL \
709 1.1 christos }
710 1.1 christos
711 1.1 christos DECLARE_AES_EVP(128, ecb, ECB);
712 1.1 christos DECLARE_AES_EVP(128, cbc, CBC);
713 1.1 christos DECLARE_AES_EVP(128, cfb, CFB);
714 1.1 christos DECLARE_AES_EVP(128, ofb, OFB);
715 1.1 christos
716 1.1 christos DECLARE_AES_EVP(192, ecb, ECB);
717 1.1 christos DECLARE_AES_EVP(192, cbc, CBC);
718 1.1 christos DECLARE_AES_EVP(192, cfb, CFB);
719 1.1 christos DECLARE_AES_EVP(192, ofb, OFB);
720 1.1 christos
721 1.1 christos DECLARE_AES_EVP(256, ecb, ECB);
722 1.1 christos DECLARE_AES_EVP(256, cbc, CBC);
723 1.1 christos DECLARE_AES_EVP(256, cfb, CFB);
724 1.1 christos DECLARE_AES_EVP(256, ofb, OFB);
725 1.1 christos
726 1.1 christos static int
727 1.1 christos padlock_ciphers(ENGINE *e, const EVP_CIPHER **cipher, const int **nids,
728 1.1 christos int nid)
729 1.1 christos {
730 1.1 christos /* No specific cipher => return a list of supported nids ... */
731 1.1 christos if (!cipher) {
732 1.1 christos *nids = padlock_cipher_nids;
733 1.1 christos return padlock_cipher_nids_num;
734 1.1 christos }
735 1.1 christos
736 1.1 christos /* ... or the requested "cipher" otherwise */
737 1.1 christos switch (nid) {
738 1.1 christos case NID_aes_128_ecb:
739 1.1 christos *cipher = &padlock_aes_128_ecb;
740 1.1 christos break;
741 1.1 christos case NID_aes_128_cbc:
742 1.1 christos *cipher = &padlock_aes_128_cbc;
743 1.1 christos break;
744 1.1 christos case NID_aes_128_cfb:
745 1.1 christos *cipher = &padlock_aes_128_cfb;
746 1.1 christos break;
747 1.1 christos case NID_aes_128_ofb:
748 1.1 christos *cipher = &padlock_aes_128_ofb;
749 1.1 christos break;
750 1.1 christos
751 1.1 christos case NID_aes_192_ecb:
752 1.1 christos *cipher = &padlock_aes_192_ecb;
753 1.1 christos break;
754 1.1 christos case NID_aes_192_cbc:
755 1.1 christos *cipher = &padlock_aes_192_cbc;
756 1.1 christos break;
757 1.1 christos case NID_aes_192_cfb:
758 1.1 christos *cipher = &padlock_aes_192_cfb;
759 1.1 christos break;
760 1.1 christos case NID_aes_192_ofb:
761 1.1 christos *cipher = &padlock_aes_192_ofb;
762 1.1 christos break;
763 1.1 christos
764 1.1 christos case NID_aes_256_ecb:
765 1.1 christos *cipher = &padlock_aes_256_ecb;
766 1.1 christos break;
767 1.1 christos case NID_aes_256_cbc:
768 1.1 christos *cipher = &padlock_aes_256_cbc;
769 1.1 christos break;
770 1.1 christos case NID_aes_256_cfb:
771 1.1 christos *cipher = &padlock_aes_256_cfb;
772 1.1 christos break;
773 1.1 christos case NID_aes_256_ofb:
774 1.1 christos *cipher = &padlock_aes_256_ofb;
775 1.1 christos break;
776 1.1 christos
777 1.1 christos default:
778 1.1 christos /* Sorry, we don't support this NID */
779 1.1 christos *cipher = NULL;
780 1.1 christos return 0;
781 1.1 christos }
782 1.1 christos
783 1.1 christos return 1;
784 1.1 christos }
785 1.1 christos
786 1.1 christos /* Prepare the encryption key for PadLock usage */
787 1.1 christos static int
788 1.1 christos padlock_aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
789 1.1 christos const unsigned char *iv, int enc)
790 1.1 christos {
791 1.1 christos struct padlock_cipher_data *cdata;
792 1.1 christos int key_len = EVP_CIPHER_CTX_key_length(ctx) * 8;
793 1.1 christos
794 1.1 christos if (key == NULL)
795 1.1 christos return 0; /* ERROR */
796 1.1 christos
797 1.1 christos cdata = ALIGNED_CIPHER_DATA(ctx);
798 1.1 christos memset(cdata, 0, sizeof(struct padlock_cipher_data));
799 1.1 christos
800 1.1 christos /* Prepare Control word. */
801 1.1 christos if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE)
802 1.1 christos cdata->cword.b.encdec = 0;
803 1.1 christos else
804 1.1 christos cdata->cword.b.encdec = (ctx->encrypt == 0);
805 1.1 christos cdata->cword.b.rounds = 10 + (key_len - 128) / 32;
806 1.1 christos cdata->cword.b.ksize = (key_len - 128) / 64;
807 1.1 christos
808 1.1 christos switch (key_len) {
809 1.1 christos case 128:
810 1.1 christos /*
811 1.1 christos * PadLock can generate an extended key for AES128 in hardware
812 1.1 christos */
813 1.1 christos memcpy(cdata->ks.rd_key, key, AES_KEY_SIZE_128);
814 1.1 christos cdata->cword.b.keygen = 0;
815 1.1 christos break;
816 1.1 christos
817 1.1 christos case 192:
818 1.1 christos case 256:
819 1.1 christos /*
820 1.1 christos * Generate an extended AES key in software. Needed for AES192/AES256
821 1.1 christos */
822 1.1 christos /*
823 1.1 christos * Well, the above applies to Stepping 8 CPUs and is listed as
824 1.1 christos * hardware errata. They most likely will fix it at some point and
825 1.1 christos * then a check for stepping would be due here.
826 1.1 christos */
827 1.1 christos if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_CFB_MODE ||
828 1.1 christos EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE || enc)
829 1.1 christos AES_set_encrypt_key(key, key_len, &cdata->ks);
830 1.1 christos else
831 1.1 christos AES_set_decrypt_key(key, key_len, &cdata->ks);
832 1.1 christos # ifndef AES_ASM
833 1.1 christos /*
834 1.1 christos * OpenSSL C functions use byte-swapped extended key.
835 1.1 christos */
836 1.1 christos padlock_bswapl(&cdata->ks);
837 1.1 christos # endif
838 1.1 christos cdata->cword.b.keygen = 1;
839 1.1 christos break;
840 1.1 christos
841 1.1 christos default:
842 1.1 christos /* ERROR */
843 1.1 christos return 0;
844 1.1 christos }
845 1.1 christos
846 1.1 christos /*
847 1.1 christos * This is done to cover for cases when user reuses the
848 1.1 christos * context for new key. The catch is that if we don't do
849 1.1 christos * this, padlock_eas_cipher might proceed with old key...
850 1.1 christos */
851 1.1 christos padlock_reload_key();
852 1.1 christos
853 1.1 christos return 1;
854 1.1 christos }
855 1.1 christos
856 1.1 christos /*-
857 1.1 christos * Simplified version of padlock_aes_cipher() used when
858 1.1 christos * 1) both input and output buffers are at aligned addresses.
859 1.1 christos * or when
860 1.1 christos * 2) running on a newer CPU that doesn't require aligned buffers.
861 1.1 christos */
862 1.1 christos static int
863 1.1 christos padlock_aes_cipher_omnivorous(EVP_CIPHER_CTX *ctx, unsigned char *out_arg,
864 1.1 christos const unsigned char *in_arg, size_t nbytes)
865 1.1 christos {
866 1.1 christos struct padlock_cipher_data *cdata;
867 1.1 christos void *iv;
868 1.1 christos
869 1.1 christos cdata = ALIGNED_CIPHER_DATA(ctx);
870 1.1 christos padlock_verify_context(cdata);
871 1.1 christos
872 1.1 christos switch (EVP_CIPHER_CTX_mode(ctx)) {
873 1.1 christos case EVP_CIPH_ECB_MODE:
874 1.1 christos padlock_xcrypt_ecb(nbytes / AES_BLOCK_SIZE, cdata, out_arg, in_arg);
875 1.1 christos break;
876 1.1 christos
877 1.1 christos case EVP_CIPH_CBC_MODE:
878 1.1 christos memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
879 1.1 christos iv = padlock_xcrypt_cbc(nbytes / AES_BLOCK_SIZE, cdata, out_arg,
880 1.1 christos in_arg);
881 1.1 christos memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
882 1.1 christos break;
883 1.1 christos
884 1.1 christos case EVP_CIPH_CFB_MODE:
885 1.1 christos memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
886 1.1 christos iv = padlock_xcrypt_cfb(nbytes / AES_BLOCK_SIZE, cdata, out_arg,
887 1.1 christos in_arg);
888 1.1 christos memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
889 1.1 christos break;
890 1.1 christos
891 1.1 christos case EVP_CIPH_OFB_MODE:
892 1.1 christos memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
893 1.1 christos padlock_xcrypt_ofb(nbytes / AES_BLOCK_SIZE, cdata, out_arg, in_arg);
894 1.1 christos memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE);
895 1.1 christos break;
896 1.1 christos
897 1.1 christos default:
898 1.1 christos return 0;
899 1.1 christos }
900 1.1 christos
901 1.1 christos memset(cdata->iv, 0, AES_BLOCK_SIZE);
902 1.1 christos
903 1.1 christos return 1;
904 1.1 christos }
905 1.1 christos
906 1.1 christos # ifndef PADLOCK_CHUNK
907 1.1 christos # define PADLOCK_CHUNK 512 /* Must be a power of 2 larger than 16 */
908 1.1 christos # endif
909 1.1 christos # if PADLOCK_CHUNK<16 || PADLOCK_CHUNK&(PADLOCK_CHUNK-1)
910 1.1 christos # error "insane PADLOCK_CHUNK..."
911 1.1 christos # endif
912 1.1 christos
913 1.1 christos /*
914 1.1 christos * Re-align the arguments to 16-Bytes boundaries and run the encryption
915 1.1 christos * function itself. This function is not AES-specific.
916 1.1 christos */
917 1.1 christos static int
918 1.1 christos padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out_arg,
919 1.1 christos const unsigned char *in_arg, size_t nbytes)
920 1.1 christos {
921 1.1 christos struct padlock_cipher_data *cdata;
922 1.1 christos const void *inp;
923 1.1 christos unsigned char *out, *tofree;
924 1.1 christos void *iv;
925 1.1 christos int inp_misaligned, out_misaligned, realign_in_loop;
926 1.1 christos size_t chunk, allocated = 0;
927 1.1 christos
928 1.1 christos /*
929 1.1 christos * ctx->num is maintained in byte-oriented modes, such as CFB and OFB...
930 1.1 christos */
931 1.1 christos if ((chunk = ctx->num)) { /* borrow chunk variable */
932 1.1 christos unsigned char *ivp = ctx->iv;
933 1.1 christos
934 1.1 christos switch (EVP_CIPHER_CTX_mode(ctx)) {
935 1.1 christos case EVP_CIPH_CFB_MODE:
936 1.1 christos if (chunk >= AES_BLOCK_SIZE)
937 1.1 christos return 0; /* bogus value */
938 1.1 christos
939 1.1 christos if (ctx->encrypt)
940 1.1 christos while (chunk < AES_BLOCK_SIZE && nbytes != 0) {
941 1.1 christos ivp[chunk] = *(out_arg++) = *(in_arg++) ^ ivp[chunk];
942 1.1 christos chunk++, nbytes--;
943 1.1 christos } else
944 1.1 christos while (chunk < AES_BLOCK_SIZE && nbytes != 0) {
945 1.1 christos unsigned char c = *(in_arg++);
946 1.1 christos *(out_arg++) = c ^ ivp[chunk];
947 1.1 christos ivp[chunk++] = c, nbytes--;
948 1.1 christos }
949 1.1 christos
950 1.1 christos ctx->num = chunk % AES_BLOCK_SIZE;
951 1.1 christos break;
952 1.1 christos case EVP_CIPH_OFB_MODE:
953 1.1 christos if (chunk >= AES_BLOCK_SIZE)
954 1.1 christos return 0; /* bogus value */
955 1.1 christos
956 1.1 christos while (chunk < AES_BLOCK_SIZE && nbytes != 0) {
957 1.1 christos *(out_arg++) = *(in_arg++) ^ ivp[chunk];
958 1.1 christos chunk++, nbytes--;
959 1.1 christos }
960 1.1 christos
961 1.1 christos ctx->num = chunk % AES_BLOCK_SIZE;
962 1.1 christos break;
963 1.1 christos }
964 1.1 christos }
965 1.1 christos
966 1.1 christos if (nbytes == 0)
967 1.1 christos return 1;
968 1.1 christos # if 0
969 1.1 christos if (nbytes % AES_BLOCK_SIZE)
970 1.1 christos return 0; /* are we expected to do tail processing? */
971 1.1 christos # else
972 1.1 christos /*
973 1.1 christos * nbytes is always multiple of AES_BLOCK_SIZE in ECB and CBC modes and
974 1.1 christos * arbitrary value in byte-oriented modes, such as CFB and OFB...
975 1.1 christos */
976 1.1 christos # endif
977 1.1 christos
978 1.1 christos /*
979 1.1 christos * VIA promises CPUs that won't require alignment in the future. For now
980 1.1 christos * padlock_aes_align_required is initialized to 1 and the condition is
981 1.1 christos * never met...
982 1.1 christos */
983 1.1 christos /*
984 1.1 christos * C7 core is capable to manage unaligned input in non-ECB[!] mode, but
985 1.1 christos * performance penalties appear to be approximately same as for software
986 1.1 christos * alignment below or ~3x. They promise to improve it in the future, but
987 1.1 christos * for now we can just as well pretend that it can only handle aligned
988 1.1 christos * input...
989 1.1 christos */
990 1.1 christos if (!padlock_aes_align_required && (nbytes % AES_BLOCK_SIZE) == 0)
991 1.1 christos return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes);
992 1.1 christos
993 1.1 christos inp_misaligned = (((size_t)in_arg) & 0x0F);
994 1.1 christos out_misaligned = (((size_t)out_arg) & 0x0F);
995 1.1 christos
996 1.1 christos /*
997 1.1 christos * Note that even if output is aligned and input not, I still prefer to
998 1.1 christos * loop instead of copy the whole input and then encrypt in one stroke.
999 1.1 christos * This is done in order to improve L1 cache utilization...
1000 1.1 christos */
1001 1.1 christos realign_in_loop = out_misaligned | inp_misaligned;
1002 1.1 christos
1003 1.1 christos if (!realign_in_loop && (nbytes % AES_BLOCK_SIZE) == 0)
1004 1.1 christos return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes);
1005 1.1 christos
1006 1.1 christos /* this takes one "if" out of the loops */
1007 1.1 christos chunk = nbytes;
1008 1.1 christos chunk %= PADLOCK_CHUNK;
1009 1.1 christos if (chunk == 0)
1010 1.1 christos chunk = PADLOCK_CHUNK;
1011 1.1 christos
1012 1.1 christos if (out_misaligned) {
1013 1.1 christos /* optmize for small input */
1014 1.1 christos allocated = (chunk < nbytes ? PADLOCK_CHUNK : nbytes);
1015 1.1 christos tofree = malloc(0x10 + allocated);
1016 1.1 christos if (tofree == NULL)
1017 1.1 christos return 0;
1018 1.1 christos out = NEAREST_ALIGNED(tofree);
1019 1.1 christos } else {
1020 1.1 christos out = out_arg;
1021 1.1 christos tofree = NULL;
1022 1.1 christos }
1023 1.1 christos
1024 1.1 christos cdata = ALIGNED_CIPHER_DATA(ctx);
1025 1.1 christos padlock_verify_context(cdata);
1026 1.1 christos
1027 1.1 christos switch (EVP_CIPHER_CTX_mode(ctx)) {
1028 1.1 christos case EVP_CIPH_ECB_MODE:
1029 1.1 christos do {
1030 1.1 christos if (inp_misaligned)
1031 1.1 christos inp = padlock_memcpy(out, in_arg, chunk);
1032 1.1 christos else
1033 1.1 christos inp = in_arg;
1034 1.1 christos in_arg += chunk;
1035 1.1 christos
1036 1.1 christos padlock_xcrypt_ecb(chunk / AES_BLOCK_SIZE, cdata, out, inp);
1037 1.1 christos
1038 1.1 christos if (out_misaligned)
1039 1.1 christos out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
1040 1.1 christos else
1041 1.1 christos out = out_arg += chunk;
1042 1.1 christos
1043 1.1 christos nbytes -= chunk;
1044 1.1 christos chunk = PADLOCK_CHUNK;
1045 1.1 christos } while (nbytes);
1046 1.1 christos break;
1047 1.1 christos
1048 1.1 christos case EVP_CIPH_CBC_MODE:
1049 1.1 christos memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
1050 1.1 christos goto cbc_shortcut;
1051 1.1 christos do {
1052 1.1 christos if (iv != cdata->iv)
1053 1.1 christos memcpy(cdata->iv, iv, AES_BLOCK_SIZE);
1054 1.1 christos chunk = PADLOCK_CHUNK;
1055 1.1 christos cbc_shortcut: /* optimize for small input */
1056 1.1 christos if (inp_misaligned)
1057 1.1 christos inp = padlock_memcpy(out, in_arg, chunk);
1058 1.1 christos else
1059 1.1 christos inp = in_arg;
1060 1.1 christos in_arg += chunk;
1061 1.1 christos
1062 1.1 christos iv = padlock_xcrypt_cbc(chunk / AES_BLOCK_SIZE, cdata, out, inp);
1063 1.1 christos
1064 1.1 christos if (out_misaligned)
1065 1.1 christos out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
1066 1.1 christos else
1067 1.1 christos out = out_arg += chunk;
1068 1.1 christos
1069 1.1 christos } while (nbytes -= chunk);
1070 1.1 christos memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
1071 1.1 christos break;
1072 1.1 christos
1073 1.1 christos case EVP_CIPH_CFB_MODE:
1074 1.1 christos memcpy(iv = cdata->iv, ctx->iv, AES_BLOCK_SIZE);
1075 1.1 christos chunk &= ~(AES_BLOCK_SIZE - 1);
1076 1.1 christos if (chunk)
1077 1.1 christos goto cfb_shortcut;
1078 1.1 christos else
1079 1.1 christos goto cfb_skiploop;
1080 1.1 christos do {
1081 1.1 christos if (iv != cdata->iv)
1082 1.1 christos memcpy(cdata->iv, iv, AES_BLOCK_SIZE);
1083 1.1 christos chunk = PADLOCK_CHUNK;
1084 1.1 christos cfb_shortcut: /* optimize for small input */
1085 1.1 christos if (inp_misaligned)
1086 1.1 christos inp = padlock_memcpy(out, in_arg, chunk);
1087 1.1 christos else
1088 1.1 christos inp = in_arg;
1089 1.1 christos in_arg += chunk;
1090 1.1 christos
1091 1.1 christos iv = padlock_xcrypt_cfb(chunk / AES_BLOCK_SIZE, cdata, out, inp);
1092 1.1 christos
1093 1.1 christos if (out_misaligned)
1094 1.1 christos out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
1095 1.1 christos else
1096 1.1 christos out = out_arg += chunk;
1097 1.1 christos
1098 1.1 christos nbytes -= chunk;
1099 1.1 christos } while (nbytes >= AES_BLOCK_SIZE);
1100 1.1 christos
1101 1.1 christos cfb_skiploop:
1102 1.1 christos if (nbytes) {
1103 1.1 christos unsigned char *ivp = cdata->iv;
1104 1.1 christos
1105 1.1 christos if (iv != ivp) {
1106 1.1 christos memcpy(ivp, iv, AES_BLOCK_SIZE);
1107 1.1 christos iv = ivp;
1108 1.1 christos }
1109 1.1 christos ctx->num = nbytes;
1110 1.1 christos if (cdata->cword.b.encdec) {
1111 1.1 christos cdata->cword.b.encdec = 0;
1112 1.1 christos padlock_reload_key();
1113 1.1 christos padlock_xcrypt_ecb(1, cdata, ivp, ivp);
1114 1.1 christos cdata->cword.b.encdec = 1;
1115 1.1 christos padlock_reload_key();
1116 1.1 christos while (nbytes) {
1117 1.1 christos unsigned char c = *(in_arg++);
1118 1.1 christos *(out_arg++) = c ^ *ivp;
1119 1.1 christos *(ivp++) = c, nbytes--;
1120 1.1 christos }
1121 1.1 christos } else {
1122 1.1 christos padlock_reload_key();
1123 1.1 christos padlock_xcrypt_ecb(1, cdata, ivp, ivp);
1124 1.1 christos padlock_reload_key();
1125 1.1 christos while (nbytes) {
1126 1.1 christos *ivp = *(out_arg++) = *(in_arg++) ^ *ivp;
1127 1.1 christos ivp++, nbytes--;
1128 1.1 christos }
1129 1.1 christos }
1130 1.1 christos }
1131 1.1 christos
1132 1.1 christos memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
1133 1.1 christos break;
1134 1.1 christos
1135 1.1 christos case EVP_CIPH_OFB_MODE:
1136 1.1 christos memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
1137 1.1 christos chunk &= ~(AES_BLOCK_SIZE - 1);
1138 1.1 christos if (chunk)
1139 1.1 christos do {
1140 1.1 christos if (inp_misaligned)
1141 1.1 christos inp = padlock_memcpy(out, in_arg, chunk);
1142 1.1 christos else
1143 1.1 christos inp = in_arg;
1144 1.1 christos in_arg += chunk;
1145 1.1 christos
1146 1.1 christos padlock_xcrypt_ofb(chunk / AES_BLOCK_SIZE, cdata, out, inp);
1147 1.1 christos
1148 1.1 christos if (out_misaligned)
1149 1.1 christos out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
1150 1.1 christos else
1151 1.1 christos out = out_arg += chunk;
1152 1.1 christos
1153 1.1 christos nbytes -= chunk;
1154 1.1 christos chunk = PADLOCK_CHUNK;
1155 1.1 christos } while (nbytes >= AES_BLOCK_SIZE);
1156 1.1 christos
1157 1.1 christos if (nbytes) {
1158 1.1 christos unsigned char *ivp = cdata->iv;
1159 1.1 christos
1160 1.1 christos ctx->num = nbytes;
1161 1.1 christos padlock_reload_key(); /* empirically found */
1162 1.1 christos padlock_xcrypt_ecb(1, cdata, ivp, ivp);
1163 1.1 christos padlock_reload_key(); /* empirically found */
1164 1.1 christos while (nbytes) {
1165 1.1 christos *(out_arg++) = *(in_arg++) ^ *ivp;
1166 1.1 christos ivp++, nbytes--;
1167 1.1 christos }
1168 1.1 christos }
1169 1.1 christos
1170 1.1 christos memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE);
1171 1.1 christos break;
1172 1.1 christos
1173 1.1 christos default:
1174 1.1 christos free(tofree);
1175 1.1 christos return 0;
1176 1.1 christos }
1177 1.1 christos
1178 1.1 christos /* Clean the realign buffer if it was used */
1179 1.1 christos if (out_misaligned) {
1180 1.1 christos volatile unsigned long *p = (void *)out;
1181 1.1 christos size_t n = allocated / sizeof(*p);
1182 1.1 christos while (n--)
1183 1.1 christos *p++ = 0;
1184 1.1 christos }
1185 1.1 christos
1186 1.1 christos memset(cdata->iv, 0, AES_BLOCK_SIZE);
1187 1.1 christos free(tofree);
1188 1.1 christos
1189 1.1 christos return 1;
1190 1.1 christos }
1191 1.1 christos
1192 1.1 christos # endif /* OPENSSL_NO_AES */
1193 1.1 christos
1194 1.1 christos /* ===== Random Number Generator ===== */
1195 1.1 christos /*
1196 1.1 christos * This code is not engaged. The reason is that it does not comply
1197 1.1 christos * with recommendations for VIA RNG usage for secure applications
1198 1.1 christos * (posted at http://www.via.com.tw/en/viac3/c3.jsp) nor does it
1199 1.1 christos * provide meaningful error control...
1200 1.1 christos */
1201 1.1 christos /*
1202 1.1 christos * Wrapper that provides an interface between the API and the raw PadLock
1203 1.1 christos * RNG
1204 1.1 christos */
1205 1.1 christos static int padlock_rand_bytes(unsigned char *output, int count)
1206 1.1 christos {
1207 1.1 christos unsigned int eax, buf;
1208 1.1 christos
1209 1.1 christos while (count >= 8) {
1210 1.1 christos eax = padlock_xstore(output, 0);
1211 1.1 christos if (!(eax & (1 << 6)))
1212 1.1 christos return 0; /* RNG disabled */
1213 1.1 christos /* this ---vv--- covers DC bias, Raw Bits and String Filter */
1214 1.1 christos if (eax & (0x1F << 10))
1215 1.1 christos return 0;
1216 1.1 christos if ((eax & 0x1F) == 0)
1217 1.1 christos continue; /* no data, retry... */
1218 1.1 christos if ((eax & 0x1F) != 8)
1219 1.1 christos return 0; /* fatal failure... */
1220 1.1 christos output += 8;
1221 1.1 christos count -= 8;
1222 1.1 christos }
1223 1.1 christos while (count > 0) {
1224 1.1 christos eax = padlock_xstore(&buf, 3);
1225 1.1 christos if (!(eax & (1 << 6)))
1226 1.1 christos return 0; /* RNG disabled */
1227 1.1 christos /* this ---vv--- covers DC bias, Raw Bits and String Filter */
1228 1.1 christos if (eax & (0x1F << 10))
1229 1.1 christos return 0;
1230 1.1 christos if ((eax & 0x1F) == 0)
1231 1.1 christos continue; /* no data, retry... */
1232 1.1 christos if ((eax & 0x1F) != 1)
1233 1.1 christos return 0; /* fatal failure... */
1234 1.1 christos *output++ = (unsigned char)buf;
1235 1.1 christos count--;
1236 1.1 christos }
1237 1.1 christos *(volatile unsigned int *)&buf = 0;
1238 1.1 christos
1239 1.1 christos return 1;
1240 1.1 christos }
1241 1.1 christos
1242 1.1 christos /* Dummy but necessary function */
1243 1.1 christos static int padlock_rand_status(void)
1244 1.1 christos {
1245 1.1 christos return 1;
1246 1.1 christos }
1247 1.1 christos
1248 1.1 christos /* Prepare structure for registration */
1249 1.1 christos static RAND_METHOD padlock_rand = {
1250 1.1 christos NULL, /* seed */
1251 1.1 christos padlock_rand_bytes, /* bytes */
1252 1.1 christos NULL, /* cleanup */
1253 1.1 christos NULL, /* add */
1254 1.1 christos padlock_rand_bytes, /* pseudorand */
1255 1.1 christos padlock_rand_status, /* rand status */
1256 1.1 christos };
1257 1.1 christos
1258 1.1 christos # else /* !COMPILE_HW_PADLOCK */
1259 1.1 christos # ifndef OPENSSL_NO_DYNAMIC_ENGINE
1260 1.1 christos OPENSSL_EXPORT
1261 1.1 christos int bind_engine(ENGINE *e, const char *id, const dynamic_fns *fns);
1262 1.1 christos OPENSSL_EXPORT
1263 1.1 christos int bind_engine(ENGINE *e, const char *id, const dynamic_fns *fns)
1264 1.1 christos {
1265 1.1 christos return 0;
1266 1.1 christos }
1267 1.1 christos
1268 1.1 christos IMPLEMENT_DYNAMIC_CHECK_FN()
1269 1.1 christos # endif
1270 1.1 christos # endif /* COMPILE_HW_PADLOCK */
1271 1.1 christos # endif /* !OPENSSL_NO_HW_PADLOCK */
1272 1.1 christos #endif /* !OPENSSL_NO_HW */
1273