e_padlock.c revision 1.2 1 1.1 christos /*
2 1.1 christos * Support for VIA PadLock Advanced Cryptography Engine (ACE)
3 1.1 christos * Written by Michal Ludvig <michal (at) logix.cz>
4 1.1 christos * http://www.logix.cz/michal
5 1.1 christos *
6 1.1 christos * Big thanks to Andy Polyakov for a help with optimization,
7 1.1 christos * assembler fixes, port to MS Windows and a lot of other
8 1.1 christos * valuable work on this engine!
9 1.1 christos */
10 1.1 christos
11 1.1 christos /* ====================================================================
12 1.1 christos * Copyright (c) 1999-2001 The OpenSSL Project. All rights reserved.
13 1.1 christos *
14 1.1 christos * Redistribution and use in source and binary forms, with or without
15 1.1 christos * modification, are permitted provided that the following conditions
16 1.1 christos * are met:
17 1.1 christos *
18 1.1 christos * 1. Redistributions of source code must retain the above copyright
19 1.1 christos * notice, this list of conditions and the following disclaimer.
20 1.1 christos *
21 1.1 christos * 2. Redistributions in binary form must reproduce the above copyright
22 1.1 christos * notice, this list of conditions and the following disclaimer in
23 1.1 christos * the documentation and/or other materials provided with the
24 1.1 christos * distribution.
25 1.1 christos *
26 1.1 christos * 3. All advertising materials mentioning features or use of this
27 1.1 christos * software must display the following acknowledgment:
28 1.1 christos * "This product includes software developed by the OpenSSL Project
29 1.1 christos * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
30 1.1 christos *
31 1.1 christos * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
32 1.1 christos * endorse or promote products derived from this software without
33 1.1 christos * prior written permission. For written permission, please contact
34 1.1 christos * licensing (at) OpenSSL.org.
35 1.1 christos *
36 1.1 christos * 5. Products derived from this software may not be called "OpenSSL"
37 1.1 christos * nor may "OpenSSL" appear in their names without prior written
38 1.1 christos * permission of the OpenSSL Project.
39 1.1 christos *
40 1.1 christos * 6. Redistributions of any form whatsoever must retain the following
41 1.1 christos * acknowledgment:
42 1.1 christos * "This product includes software developed by the OpenSSL Project
43 1.1 christos * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
44 1.1 christos *
45 1.1 christos * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
46 1.1 christos * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47 1.1 christos * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
48 1.1 christos * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
49 1.1 christos * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
50 1.1 christos * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
51 1.1 christos * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
52 1.1 christos * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
53 1.1 christos * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
54 1.1 christos * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
55 1.1 christos * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
56 1.1 christos * OF THE POSSIBILITY OF SUCH DAMAGE.
57 1.1 christos * ====================================================================
58 1.1 christos *
59 1.1 christos * This product includes cryptographic software written by Eric Young
60 1.1 christos * (eay (at) cryptsoft.com). This product includes software written by Tim
61 1.1 christos * Hudson (tjh (at) cryptsoft.com).
62 1.1 christos *
63 1.1 christos */
64 1.1 christos
65 1.1 christos
66 1.1 christos #include <stdio.h>
67 1.1 christos #include <string.h>
68 1.1 christos
69 1.1 christos #include <openssl/opensslconf.h>
70 1.1 christos #include <openssl/crypto.h>
71 1.1 christos #include <openssl/dso.h>
72 1.1 christos #include <openssl/engine.h>
73 1.1 christos #include <openssl/evp.h>
74 1.1 christos #ifndef OPENSSL_NO_AES
75 1.1 christos #include <openssl/aes.h>
76 1.1 christos #endif
77 1.1 christos #include <openssl/rand.h>
78 1.1 christos #include <openssl/err.h>
79 1.1 christos
80 1.1 christos #ifndef OPENSSL_NO_HW
81 1.1 christos #ifndef OPENSSL_NO_HW_PADLOCK
82 1.1 christos
83 1.1 christos /* Attempt to have a single source for both 0.9.7 and 0.9.8 :-) */
84 1.1 christos #if (OPENSSL_VERSION_NUMBER >= 0x00908000L)
85 1.1 christos # ifndef OPENSSL_NO_DYNAMIC_ENGINE
86 1.1 christos # define DYNAMIC_ENGINE
87 1.1 christos # endif
88 1.1 christos #elif (OPENSSL_VERSION_NUMBER >= 0x00907000L)
89 1.1 christos # ifdef ENGINE_DYNAMIC_SUPPORT
90 1.1 christos # define DYNAMIC_ENGINE
91 1.1 christos # endif
92 1.1 christos #else
93 1.1 christos # error "Only OpenSSL >= 0.9.7 is supported"
94 1.1 christos #endif
95 1.1 christos
96 1.1 christos /* VIA PadLock AES is available *ONLY* on some x86 CPUs.
97 1.1 christos Not only that it doesn't exist elsewhere, but it
98 1.1 christos even can't be compiled on other platforms!
99 1.1 christos
100 1.1 christos In addition, because of the heavy use of inline assembler,
101 1.1 christos compiler choice is limited to GCC and Microsoft C. */
102 1.1 christos #undef COMPILE_HW_PADLOCK
103 1.1 christos #if !defined(I386_ONLY) && !defined(OPENSSL_NO_INLINE_ASM)
104 1.2 christos # if (defined(__GNUC__) && (defined(__i386__) || defined(__i386))) || \
105 1.1 christos (defined(_MSC_VER) && defined(_M_IX86))
106 1.1 christos # define COMPILE_HW_PADLOCK
107 1.2 christos # endif
108 1.2 christos #endif
109 1.2 christos
110 1.2 christos #ifdef OPENSSL_NO_DYNAMIC_ENGINE
111 1.2 christos #ifdef COMPILE_HW_PADLOCK
112 1.1 christos static ENGINE *ENGINE_padlock (void);
113 1.1 christos #endif
114 1.1 christos
115 1.1 christos void ENGINE_load_padlock (void)
116 1.1 christos {
117 1.1 christos /* On non-x86 CPUs it just returns. */
118 1.1 christos #ifdef COMPILE_HW_PADLOCK
119 1.1 christos ENGINE *toadd = ENGINE_padlock ();
120 1.1 christos if (!toadd) return;
121 1.1 christos ENGINE_add (toadd);
122 1.1 christos ENGINE_free (toadd);
123 1.1 christos ERR_clear_error ();
124 1.1 christos #endif
125 1.1 christos }
126 1.1 christos
127 1.2 christos #endif
128 1.2 christos
129 1.1 christos #ifdef COMPILE_HW_PADLOCK
130 1.1 christos /* We do these includes here to avoid header problems on platforms that
131 1.1 christos do not have the VIA padlock anyway... */
132 1.1 christos #include <stdlib.h>
133 1.1 christos #ifdef _WIN32
134 1.1 christos # include <malloc.h>
135 1.1 christos # ifndef alloca
136 1.1 christos # define alloca _alloca
137 1.1 christos # endif
138 1.1 christos #elif defined(__GNUC__)
139 1.1 christos # ifndef alloca
140 1.2 christos # define alloca(s) __builtin_alloca(s)
141 1.1 christos # endif
142 1.1 christos #endif
143 1.1 christos
144 1.1 christos /* Function for ENGINE detection and control */
145 1.1 christos static int padlock_available(void);
146 1.1 christos static int padlock_init(ENGINE *e);
147 1.1 christos
148 1.1 christos /* RNG Stuff */
149 1.1 christos static RAND_METHOD padlock_rand;
150 1.1 christos
151 1.1 christos /* Cipher Stuff */
152 1.1 christos #ifndef OPENSSL_NO_AES
153 1.1 christos static int padlock_ciphers(ENGINE *e, const EVP_CIPHER **cipher, const int **nids, int nid);
154 1.1 christos #endif
155 1.1 christos
156 1.1 christos /* Engine names */
157 1.1 christos static const char *padlock_id = "padlock";
158 1.1 christos static char padlock_name[100];
159 1.1 christos
160 1.1 christos /* Available features */
161 1.1 christos static int padlock_use_ace = 0; /* Advanced Cryptography Engine */
162 1.1 christos static int padlock_use_rng = 0; /* Random Number Generator */
163 1.1 christos #ifndef OPENSSL_NO_AES
164 1.1 christos static int padlock_aes_align_required = 1;
165 1.1 christos #endif
166 1.1 christos
167 1.1 christos /* ===== Engine "management" functions ===== */
168 1.1 christos
169 1.1 christos /* Prepare the ENGINE structure for registration */
170 1.1 christos static int
171 1.1 christos padlock_bind_helper(ENGINE *e)
172 1.1 christos {
173 1.1 christos /* Check available features */
174 1.1 christos padlock_available();
175 1.1 christos
176 1.1 christos #if 1 /* disable RNG for now, see commentary in vicinity of RNG code */
177 1.1 christos padlock_use_rng=0;
178 1.1 christos #endif
179 1.1 christos
180 1.1 christos /* Generate a nice engine name with available features */
181 1.1 christos BIO_snprintf(padlock_name, sizeof(padlock_name),
182 1.1 christos "VIA PadLock (%s, %s)",
183 1.1 christos padlock_use_rng ? "RNG" : "no-RNG",
184 1.1 christos padlock_use_ace ? "ACE" : "no-ACE");
185 1.1 christos
186 1.1 christos /* Register everything or return with an error */
187 1.1 christos if (!ENGINE_set_id(e, padlock_id) ||
188 1.1 christos !ENGINE_set_name(e, padlock_name) ||
189 1.1 christos
190 1.1 christos !ENGINE_set_init_function(e, padlock_init) ||
191 1.1 christos #ifndef OPENSSL_NO_AES
192 1.1 christos (padlock_use_ace && !ENGINE_set_ciphers (e, padlock_ciphers)) ||
193 1.1 christos #endif
194 1.1 christos (padlock_use_rng && !ENGINE_set_RAND (e, &padlock_rand))) {
195 1.1 christos return 0;
196 1.1 christos }
197 1.1 christos
198 1.1 christos /* Everything looks good */
199 1.1 christos return 1;
200 1.1 christos }
201 1.1 christos
202 1.2 christos #ifdef OPENSSL_NO_DYNAMIC_ENGINE
203 1.2 christos
204 1.1 christos /* Constructor */
205 1.1 christos static ENGINE *
206 1.1 christos ENGINE_padlock(void)
207 1.1 christos {
208 1.1 christos ENGINE *eng = ENGINE_new();
209 1.1 christos
210 1.1 christos if (!eng) {
211 1.1 christos return NULL;
212 1.1 christos }
213 1.1 christos
214 1.1 christos if (!padlock_bind_helper(eng)) {
215 1.1 christos ENGINE_free(eng);
216 1.1 christos return NULL;
217 1.1 christos }
218 1.1 christos
219 1.1 christos return eng;
220 1.1 christos }
221 1.1 christos
222 1.2 christos #endif
223 1.2 christos
224 1.1 christos /* Check availability of the engine */
225 1.1 christos static int
226 1.1 christos padlock_init(ENGINE *e)
227 1.1 christos {
228 1.1 christos return (padlock_use_rng || padlock_use_ace);
229 1.1 christos }
230 1.1 christos
231 1.1 christos /* This stuff is needed if this ENGINE is being compiled into a self-contained
232 1.1 christos * shared-library.
233 1.1 christos */
234 1.1 christos #ifdef DYNAMIC_ENGINE
235 1.1 christos static int
236 1.1 christos padlock_bind_fn(ENGINE *e, const char *id)
237 1.1 christos {
238 1.1 christos if (id && (strcmp(id, padlock_id) != 0)) {
239 1.1 christos return 0;
240 1.1 christos }
241 1.1 christos
242 1.1 christos if (!padlock_bind_helper(e)) {
243 1.1 christos return 0;
244 1.1 christos }
245 1.1 christos
246 1.1 christos return 1;
247 1.1 christos }
248 1.1 christos
249 1.1 christos IMPLEMENT_DYNAMIC_CHECK_FN()
250 1.1 christos IMPLEMENT_DYNAMIC_BIND_FN (padlock_bind_fn)
251 1.1 christos #endif /* DYNAMIC_ENGINE */
252 1.1 christos
253 1.1 christos /* ===== Here comes the "real" engine ===== */
254 1.1 christos
255 1.1 christos #ifndef OPENSSL_NO_AES
256 1.1 christos /* Some AES-related constants */
257 1.1 christos #define AES_BLOCK_SIZE 16
258 1.1 christos #define AES_KEY_SIZE_128 16
259 1.1 christos #define AES_KEY_SIZE_192 24
260 1.1 christos #define AES_KEY_SIZE_256 32
261 1.1 christos
262 1.1 christos /* Here we store the status information relevant to the
263 1.1 christos current context. */
264 1.1 christos /* BIG FAT WARNING:
265 1.1 christos * Inline assembler in PADLOCK_XCRYPT_ASM()
266 1.1 christos * depends on the order of items in this structure.
267 1.1 christos * Don't blindly modify, reorder, etc!
268 1.1 christos */
269 1.1 christos struct padlock_cipher_data
270 1.1 christos {
271 1.1 christos unsigned char iv[AES_BLOCK_SIZE]; /* Initialization vector */
272 1.1 christos union { unsigned int pad[4];
273 1.1 christos struct {
274 1.1 christos int rounds:4;
275 1.1 christos int dgst:1; /* n/a in C3 */
276 1.1 christos int align:1; /* n/a in C3 */
277 1.1 christos int ciphr:1; /* n/a in C3 */
278 1.1 christos unsigned int keygen:1;
279 1.1 christos int interm:1;
280 1.1 christos unsigned int encdec:1;
281 1.1 christos int ksize:2;
282 1.1 christos } b;
283 1.1 christos } cword; /* Control word */
284 1.1 christos AES_KEY ks; /* Encryption key */
285 1.1 christos };
286 1.1 christos
287 1.1 christos /*
288 1.1 christos * Essentially this variable belongs in thread local storage.
289 1.1 christos * Having this variable global on the other hand can only cause
290 1.1 christos * few bogus key reloads [if any at all on single-CPU system],
291 1.1 christos * so we accept the penatly...
292 1.1 christos */
293 1.1 christos static volatile struct padlock_cipher_data *padlock_saved_context;
294 1.1 christos #endif
295 1.1 christos
296 1.1 christos /*
297 1.1 christos * =======================================================
298 1.1 christos * Inline assembler section(s).
299 1.1 christos * =======================================================
300 1.1 christos * Order of arguments is chosen to facilitate Windows port
301 1.1 christos * using __fastcall calling convention. If you wish to add
302 1.1 christos * more routines, keep in mind that first __fastcall
303 1.1 christos * argument is passed in %ecx and second - in %edx.
304 1.1 christos * =======================================================
305 1.1 christos */
306 1.1 christos #if defined(__GNUC__) && __GNUC__>=2
307 1.1 christos /*
308 1.1 christos * As for excessive "push %ebx"/"pop %ebx" found all over.
309 1.1 christos * When generating position-independent code GCC won't let
310 1.1 christos * us use "b" in assembler templates nor even respect "ebx"
311 1.1 christos * in "clobber description." Therefore the trouble...
312 1.1 christos */
313 1.1 christos
314 1.1 christos /* Helper function - check if a CPUID instruction
315 1.1 christos is available on this CPU */
316 1.1 christos static int
317 1.1 christos padlock_insn_cpuid_available(void)
318 1.1 christos {
319 1.1 christos int result = -1;
320 1.1 christos
321 1.1 christos /* We're checking if the bit #21 of EFLAGS
322 1.1 christos can be toggled. If yes = CPUID is available. */
323 1.1 christos asm volatile (
324 1.1 christos "pushf\n"
325 1.1 christos "popl %%eax\n"
326 1.1 christos "xorl $0x200000, %%eax\n"
327 1.1 christos "movl %%eax, %%ecx\n"
328 1.1 christos "andl $0x200000, %%ecx\n"
329 1.1 christos "pushl %%eax\n"
330 1.1 christos "popf\n"
331 1.1 christos "pushf\n"
332 1.1 christos "popl %%eax\n"
333 1.1 christos "andl $0x200000, %%eax\n"
334 1.1 christos "xorl %%eax, %%ecx\n"
335 1.1 christos "movl %%ecx, %0\n"
336 1.1 christos : "=r" (result) : : "eax", "ecx");
337 1.1 christos
338 1.1 christos return (result == 0);
339 1.1 christos }
340 1.1 christos
341 1.1 christos /* Load supported features of the CPU to see if
342 1.1 christos the PadLock is available. */
343 1.1 christos static int
344 1.1 christos padlock_available(void)
345 1.1 christos {
346 1.1 christos char vendor_string[16];
347 1.1 christos unsigned int eax, edx;
348 1.1 christos
349 1.1 christos /* First check if the CPUID instruction is available at all... */
350 1.1 christos if (! padlock_insn_cpuid_available())
351 1.1 christos return 0;
352 1.1 christos
353 1.1 christos /* Are we running on the Centaur (VIA) CPU? */
354 1.1 christos eax = 0x00000000;
355 1.1 christos vendor_string[12] = 0;
356 1.1 christos asm volatile (
357 1.1 christos "pushl %%ebx\n"
358 1.1 christos "cpuid\n"
359 1.1 christos "movl %%ebx,(%%edi)\n"
360 1.1 christos "movl %%edx,4(%%edi)\n"
361 1.1 christos "movl %%ecx,8(%%edi)\n"
362 1.1 christos "popl %%ebx"
363 1.1 christos : "+a"(eax) : "D"(vendor_string) : "ecx", "edx");
364 1.1 christos if (strcmp(vendor_string, "CentaurHauls") != 0)
365 1.1 christos return 0;
366 1.1 christos
367 1.1 christos /* Check for Centaur Extended Feature Flags presence */
368 1.1 christos eax = 0xC0000000;
369 1.1 christos asm volatile ("pushl %%ebx; cpuid; popl %%ebx"
370 1.1 christos : "+a"(eax) : : "ecx", "edx");
371 1.1 christos if (eax < 0xC0000001)
372 1.1 christos return 0;
373 1.1 christos
374 1.1 christos /* Read the Centaur Extended Feature Flags */
375 1.1 christos eax = 0xC0000001;
376 1.1 christos asm volatile ("pushl %%ebx; cpuid; popl %%ebx"
377 1.1 christos : "+a"(eax), "=d"(edx) : : "ecx");
378 1.1 christos
379 1.1 christos /* Fill up some flags */
380 1.1 christos padlock_use_ace = ((edx & (0x3<<6)) == (0x3<<6));
381 1.1 christos padlock_use_rng = ((edx & (0x3<<2)) == (0x3<<2));
382 1.1 christos
383 1.1 christos return padlock_use_ace + padlock_use_rng;
384 1.1 christos }
385 1.1 christos
386 1.2 christos #ifndef OPENSSL_NO_AES
387 1.2 christos /* Our own htonl()/ntohl() */
388 1.2 christos static inline void
389 1.2 christos padlock_bswapl(AES_KEY *ks)
390 1.2 christos {
391 1.2 christos size_t i = sizeof(ks->rd_key)/sizeof(ks->rd_key[0]);
392 1.2 christos unsigned int *key = ks->rd_key;
393 1.2 christos
394 1.2 christos while (i--) {
395 1.2 christos asm volatile ("bswapl %0" : "+r"(*key));
396 1.2 christos key++;
397 1.2 christos }
398 1.2 christos }
399 1.2 christos #endif
400 1.2 christos
401 1.1 christos /* Force key reload from memory to the CPU microcode.
402 1.1 christos Loading EFLAGS from the stack clears EFLAGS[30]
403 1.1 christos which does the trick. */
404 1.1 christos static inline void
405 1.1 christos padlock_reload_key(void)
406 1.1 christos {
407 1.1 christos asm volatile ("pushfl; popfl");
408 1.1 christos }
409 1.1 christos
410 1.1 christos #ifndef OPENSSL_NO_AES
411 1.1 christos /*
412 1.1 christos * This is heuristic key context tracing. At first one
413 1.1 christos * believes that one should use atomic swap instructions,
414 1.1 christos * but it's not actually necessary. Point is that if
415 1.1 christos * padlock_saved_context was changed by another thread
416 1.1 christos * after we've read it and before we compare it with cdata,
417 1.1 christos * our key *shall* be reloaded upon thread context switch
418 1.1 christos * and we are therefore set in either case...
419 1.1 christos */
420 1.1 christos static inline void
421 1.1 christos padlock_verify_context(struct padlock_cipher_data *cdata)
422 1.1 christos {
423 1.1 christos asm volatile (
424 1.1 christos "pushfl\n"
425 1.1 christos " btl $30,(%%esp)\n"
426 1.1 christos " jnc 1f\n"
427 1.1 christos " cmpl %2,%1\n"
428 1.1 christos " je 1f\n"
429 1.1 christos " popfl\n"
430 1.1 christos " subl $4,%%esp\n"
431 1.1 christos "1: addl $4,%%esp\n"
432 1.1 christos " movl %2,%0"
433 1.1 christos :"+m"(padlock_saved_context)
434 1.1 christos : "r"(padlock_saved_context), "r"(cdata) : "cc");
435 1.1 christos }
436 1.1 christos
437 1.1 christos /* Template for padlock_xcrypt_* modes */
438 1.1 christos /* BIG FAT WARNING:
439 1.1 christos * The offsets used with 'leal' instructions
440 1.1 christos * describe items of the 'padlock_cipher_data'
441 1.1 christos * structure.
442 1.1 christos */
443 1.1 christos #define PADLOCK_XCRYPT_ASM(name,rep_xcrypt) \
444 1.1 christos static inline void *name(size_t cnt, \
445 1.1 christos struct padlock_cipher_data *cdata, \
446 1.1 christos void *out, const void *inp) \
447 1.1 christos { void *iv; \
448 1.1 christos asm volatile ( "pushl %%ebx\n" \
449 1.1 christos " leal 16(%0),%%edx\n" \
450 1.1 christos " leal 32(%0),%%ebx\n" \
451 1.1 christos rep_xcrypt "\n" \
452 1.1 christos " popl %%ebx" \
453 1.1 christos : "=a"(iv), "=c"(cnt), "=D"(out), "=S"(inp) \
454 1.1 christos : "0"(cdata), "1"(cnt), "2"(out), "3"(inp) \
455 1.1 christos : "edx", "cc", "memory"); \
456 1.1 christos return iv; \
457 1.1 christos }
458 1.1 christos
459 1.1 christos /* Generate all functions with appropriate opcodes */
460 1.1 christos PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb, ".byte 0xf3,0x0f,0xa7,0xc8") /* rep xcryptecb */
461 1.1 christos PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc, ".byte 0xf3,0x0f,0xa7,0xd0") /* rep xcryptcbc */
462 1.1 christos PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb, ".byte 0xf3,0x0f,0xa7,0xe0") /* rep xcryptcfb */
463 1.1 christos PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb, ".byte 0xf3,0x0f,0xa7,0xe8") /* rep xcryptofb */
464 1.1 christos #endif
465 1.1 christos
466 1.1 christos /* The RNG call itself */
467 1.1 christos static inline unsigned int
468 1.1 christos padlock_xstore(void *addr, unsigned int edx_in)
469 1.1 christos {
470 1.1 christos unsigned int eax_out;
471 1.1 christos
472 1.1 christos asm volatile (".byte 0x0f,0xa7,0xc0" /* xstore */
473 1.1 christos : "=a"(eax_out),"=m"(*(unsigned *)addr)
474 1.1 christos : "D"(addr), "d" (edx_in)
475 1.1 christos );
476 1.1 christos
477 1.1 christos return eax_out;
478 1.1 christos }
479 1.1 christos
480 1.1 christos /* Why not inline 'rep movsd'? I failed to find information on what
481 1.1 christos * value in Direction Flag one can expect and consequently have to
482 1.1 christos * apply "better-safe-than-sorry" approach and assume "undefined."
483 1.1 christos * I could explicitly clear it and restore the original value upon
484 1.1 christos * return from padlock_aes_cipher, but it's presumably too much
485 1.1 christos * trouble for too little gain...
486 1.1 christos *
487 1.1 christos * In case you wonder 'rep xcrypt*' instructions above are *not*
488 1.1 christos * affected by the Direction Flag and pointers advance toward
489 1.1 christos * larger addresses unconditionally.
490 1.1 christos */
491 1.1 christos static inline unsigned char *
492 1.1 christos padlock_memcpy(void *dst,const void *src,size_t n)
493 1.1 christos {
494 1.2 christos long *d=dst;
495 1.2 christos const long *s=src;
496 1.1 christos
497 1.1 christos n /= sizeof(*d);
498 1.1 christos do { *d++ = *s++; } while (--n);
499 1.1 christos
500 1.1 christos return dst;
501 1.1 christos }
502 1.1 christos
503 1.1 christos #elif defined(_MSC_VER)
504 1.1 christos /*
505 1.1 christos * Unlike GCC these are real functions. In order to minimize impact
506 1.1 christos * on performance we adhere to __fastcall calling convention in
507 1.1 christos * order to get two first arguments passed through %ecx and %edx.
508 1.1 christos * Which kind of suits very well, as instructions in question use
509 1.1 christos * both %ecx and %edx as input:-)
510 1.1 christos */
511 1.1 christos #define REP_XCRYPT(code) \
512 1.1 christos _asm _emit 0xf3 \
513 1.1 christos _asm _emit 0x0f _asm _emit 0xa7 \
514 1.1 christos _asm _emit code
515 1.1 christos
516 1.1 christos /* BIG FAT WARNING:
517 1.1 christos * The offsets used with 'lea' instructions
518 1.1 christos * describe items of the 'padlock_cipher_data'
519 1.1 christos * structure.
520 1.1 christos */
521 1.1 christos #define PADLOCK_XCRYPT_ASM(name,code) \
522 1.1 christos static void * __fastcall \
523 1.1 christos name (size_t cnt, void *cdata, \
524 1.1 christos void *outp, const void *inp) \
525 1.1 christos { _asm mov eax,edx \
526 1.1 christos _asm lea edx,[eax+16] \
527 1.1 christos _asm lea ebx,[eax+32] \
528 1.1 christos _asm mov edi,outp \
529 1.1 christos _asm mov esi,inp \
530 1.1 christos REP_XCRYPT(code) \
531 1.1 christos }
532 1.1 christos
533 1.1 christos PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb,0xc8)
534 1.1 christos PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc,0xd0)
535 1.1 christos PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb,0xe0)
536 1.1 christos PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb,0xe8)
537 1.1 christos
538 1.1 christos static int __fastcall
539 1.1 christos padlock_xstore(void *outp,unsigned int code)
540 1.1 christos { _asm mov edi,ecx
541 1.1 christos _asm _emit 0x0f _asm _emit 0xa7 _asm _emit 0xc0
542 1.1 christos }
543 1.1 christos
544 1.1 christos static void __fastcall
545 1.1 christos padlock_reload_key(void)
546 1.1 christos { _asm pushfd _asm popfd }
547 1.1 christos
548 1.1 christos static void __fastcall
549 1.1 christos padlock_verify_context(void *cdata)
550 1.1 christos { _asm {
551 1.1 christos pushfd
552 1.1 christos bt DWORD PTR[esp],30
553 1.1 christos jnc skip
554 1.1 christos cmp ecx,padlock_saved_context
555 1.1 christos je skip
556 1.1 christos popfd
557 1.1 christos sub esp,4
558 1.1 christos skip: add esp,4
559 1.1 christos mov padlock_saved_context,ecx
560 1.1 christos }
561 1.1 christos }
562 1.1 christos
563 1.1 christos static int
564 1.1 christos padlock_available(void)
565 1.1 christos { _asm {
566 1.1 christos pushfd
567 1.1 christos pop eax
568 1.1 christos mov ecx,eax
569 1.1 christos xor eax,1<<21
570 1.1 christos push eax
571 1.1 christos popfd
572 1.1 christos pushfd
573 1.1 christos pop eax
574 1.1 christos xor eax,ecx
575 1.1 christos bt eax,21
576 1.1 christos jnc noluck
577 1.1 christos mov eax,0
578 1.1 christos cpuid
579 1.1 christos xor eax,eax
580 1.1 christos cmp ebx,'tneC'
581 1.1 christos jne noluck
582 1.1 christos cmp edx,'Hrua'
583 1.1 christos jne noluck
584 1.1 christos cmp ecx,'slua'
585 1.1 christos jne noluck
586 1.1 christos mov eax,0xC0000000
587 1.1 christos cpuid
588 1.1 christos mov edx,eax
589 1.1 christos xor eax,eax
590 1.1 christos cmp edx,0xC0000001
591 1.1 christos jb noluck
592 1.1 christos mov eax,0xC0000001
593 1.1 christos cpuid
594 1.1 christos xor eax,eax
595 1.1 christos bt edx,6
596 1.1 christos jnc skip_a
597 1.1 christos bt edx,7
598 1.1 christos jnc skip_a
599 1.1 christos mov padlock_use_ace,1
600 1.1 christos inc eax
601 1.1 christos skip_a: bt edx,2
602 1.1 christos jnc skip_r
603 1.1 christos bt edx,3
604 1.1 christos jnc skip_r
605 1.1 christos mov padlock_use_rng,1
606 1.1 christos inc eax
607 1.1 christos skip_r:
608 1.1 christos noluck:
609 1.1 christos }
610 1.1 christos }
611 1.1 christos
612 1.1 christos static void __fastcall
613 1.1 christos padlock_bswapl(void *key)
614 1.1 christos { _asm {
615 1.1 christos pushfd
616 1.1 christos cld
617 1.1 christos mov esi,ecx
618 1.1 christos mov edi,ecx
619 1.1 christos mov ecx,60
620 1.1 christos up: lodsd
621 1.1 christos bswap eax
622 1.1 christos stosd
623 1.1 christos loop up
624 1.1 christos popfd
625 1.1 christos }
626 1.1 christos }
627 1.1 christos
628 1.1 christos /* MS actually specifies status of Direction Flag and compiler even
629 1.1 christos * manages to compile following as 'rep movsd' all by itself...
630 1.1 christos */
631 1.1 christos #define padlock_memcpy(o,i,n) ((unsigned char *)memcpy((o),(i),(n)&~3U))
632 1.1 christos #endif
633 1.1 christos
634 1.1 christos /* ===== AES encryption/decryption ===== */
635 1.1 christos #ifndef OPENSSL_NO_AES
636 1.1 christos
637 1.1 christos #if defined(NID_aes_128_cfb128) && ! defined (NID_aes_128_cfb)
638 1.1 christos #define NID_aes_128_cfb NID_aes_128_cfb128
639 1.1 christos #endif
640 1.1 christos
641 1.1 christos #if defined(NID_aes_128_ofb128) && ! defined (NID_aes_128_ofb)
642 1.1 christos #define NID_aes_128_ofb NID_aes_128_ofb128
643 1.1 christos #endif
644 1.1 christos
645 1.1 christos #if defined(NID_aes_192_cfb128) && ! defined (NID_aes_192_cfb)
646 1.1 christos #define NID_aes_192_cfb NID_aes_192_cfb128
647 1.1 christos #endif
648 1.1 christos
649 1.1 christos #if defined(NID_aes_192_ofb128) && ! defined (NID_aes_192_ofb)
650 1.1 christos #define NID_aes_192_ofb NID_aes_192_ofb128
651 1.1 christos #endif
652 1.1 christos
653 1.1 christos #if defined(NID_aes_256_cfb128) && ! defined (NID_aes_256_cfb)
654 1.1 christos #define NID_aes_256_cfb NID_aes_256_cfb128
655 1.1 christos #endif
656 1.1 christos
657 1.1 christos #if defined(NID_aes_256_ofb128) && ! defined (NID_aes_256_ofb)
658 1.1 christos #define NID_aes_256_ofb NID_aes_256_ofb128
659 1.1 christos #endif
660 1.1 christos
661 1.1 christos /* List of supported ciphers. */
662 1.1 christos static int padlock_cipher_nids[] = {
663 1.1 christos NID_aes_128_ecb,
664 1.1 christos NID_aes_128_cbc,
665 1.1 christos NID_aes_128_cfb,
666 1.1 christos NID_aes_128_ofb,
667 1.1 christos
668 1.1 christos NID_aes_192_ecb,
669 1.1 christos NID_aes_192_cbc,
670 1.1 christos NID_aes_192_cfb,
671 1.1 christos NID_aes_192_ofb,
672 1.1 christos
673 1.1 christos NID_aes_256_ecb,
674 1.1 christos NID_aes_256_cbc,
675 1.1 christos NID_aes_256_cfb,
676 1.1 christos NID_aes_256_ofb,
677 1.1 christos };
678 1.1 christos static int padlock_cipher_nids_num = (sizeof(padlock_cipher_nids)/
679 1.1 christos sizeof(padlock_cipher_nids[0]));
680 1.1 christos
681 1.1 christos /* Function prototypes ... */
682 1.1 christos static int padlock_aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
683 1.1 christos const unsigned char *iv, int enc);
684 1.1 christos static int padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
685 1.1 christos const unsigned char *in, size_t nbytes);
686 1.1 christos
687 1.1 christos #define NEAREST_ALIGNED(ptr) ( (unsigned char *)(ptr) + \
688 1.1 christos ( (0x10 - ((size_t)(ptr) & 0x0F)) & 0x0F ) )
689 1.1 christos #define ALIGNED_CIPHER_DATA(ctx) ((struct padlock_cipher_data *)\
690 1.1 christos NEAREST_ALIGNED(ctx->cipher_data))
691 1.1 christos
692 1.1 christos #define EVP_CIPHER_block_size_ECB AES_BLOCK_SIZE
693 1.1 christos #define EVP_CIPHER_block_size_CBC AES_BLOCK_SIZE
694 1.1 christos #define EVP_CIPHER_block_size_OFB 1
695 1.1 christos #define EVP_CIPHER_block_size_CFB 1
696 1.1 christos
697 1.1 christos /* Declaring so many ciphers by hand would be a pain.
698 1.1 christos Instead introduce a bit of preprocessor magic :-) */
699 1.1 christos #define DECLARE_AES_EVP(ksize,lmode,umode) \
700 1.1 christos static const EVP_CIPHER padlock_aes_##ksize##_##lmode = { \
701 1.1 christos NID_aes_##ksize##_##lmode, \
702 1.1 christos EVP_CIPHER_block_size_##umode, \
703 1.1 christos AES_KEY_SIZE_##ksize, \
704 1.1 christos AES_BLOCK_SIZE, \
705 1.1 christos 0 | EVP_CIPH_##umode##_MODE, \
706 1.1 christos padlock_aes_init_key, \
707 1.1 christos padlock_aes_cipher, \
708 1.1 christos NULL, \
709 1.1 christos sizeof(struct padlock_cipher_data) + 16, \
710 1.1 christos EVP_CIPHER_set_asn1_iv, \
711 1.1 christos EVP_CIPHER_get_asn1_iv, \
712 1.1 christos NULL, \
713 1.1 christos NULL \
714 1.1 christos }
715 1.1 christos
716 1.1 christos DECLARE_AES_EVP(128,ecb,ECB);
717 1.1 christos DECLARE_AES_EVP(128,cbc,CBC);
718 1.1 christos DECLARE_AES_EVP(128,cfb,CFB);
719 1.1 christos DECLARE_AES_EVP(128,ofb,OFB);
720 1.1 christos
721 1.1 christos DECLARE_AES_EVP(192,ecb,ECB);
722 1.1 christos DECLARE_AES_EVP(192,cbc,CBC);
723 1.1 christos DECLARE_AES_EVP(192,cfb,CFB);
724 1.1 christos DECLARE_AES_EVP(192,ofb,OFB);
725 1.1 christos
726 1.1 christos DECLARE_AES_EVP(256,ecb,ECB);
727 1.1 christos DECLARE_AES_EVP(256,cbc,CBC);
728 1.1 christos DECLARE_AES_EVP(256,cfb,CFB);
729 1.1 christos DECLARE_AES_EVP(256,ofb,OFB);
730 1.1 christos
731 1.1 christos static int
732 1.1 christos padlock_ciphers (ENGINE *e, const EVP_CIPHER **cipher, const int **nids, int nid)
733 1.1 christos {
734 1.1 christos /* No specific cipher => return a list of supported nids ... */
735 1.1 christos if (!cipher) {
736 1.1 christos *nids = padlock_cipher_nids;
737 1.1 christos return padlock_cipher_nids_num;
738 1.1 christos }
739 1.1 christos
740 1.1 christos /* ... or the requested "cipher" otherwise */
741 1.1 christos switch (nid) {
742 1.1 christos case NID_aes_128_ecb:
743 1.1 christos *cipher = &padlock_aes_128_ecb;
744 1.1 christos break;
745 1.1 christos case NID_aes_128_cbc:
746 1.1 christos *cipher = &padlock_aes_128_cbc;
747 1.1 christos break;
748 1.1 christos case NID_aes_128_cfb:
749 1.1 christos *cipher = &padlock_aes_128_cfb;
750 1.1 christos break;
751 1.1 christos case NID_aes_128_ofb:
752 1.1 christos *cipher = &padlock_aes_128_ofb;
753 1.1 christos break;
754 1.1 christos
755 1.1 christos case NID_aes_192_ecb:
756 1.1 christos *cipher = &padlock_aes_192_ecb;
757 1.1 christos break;
758 1.1 christos case NID_aes_192_cbc:
759 1.1 christos *cipher = &padlock_aes_192_cbc;
760 1.1 christos break;
761 1.1 christos case NID_aes_192_cfb:
762 1.1 christos *cipher = &padlock_aes_192_cfb;
763 1.1 christos break;
764 1.1 christos case NID_aes_192_ofb:
765 1.1 christos *cipher = &padlock_aes_192_ofb;
766 1.1 christos break;
767 1.1 christos
768 1.1 christos case NID_aes_256_ecb:
769 1.1 christos *cipher = &padlock_aes_256_ecb;
770 1.1 christos break;
771 1.1 christos case NID_aes_256_cbc:
772 1.1 christos *cipher = &padlock_aes_256_cbc;
773 1.1 christos break;
774 1.1 christos case NID_aes_256_cfb:
775 1.1 christos *cipher = &padlock_aes_256_cfb;
776 1.1 christos break;
777 1.1 christos case NID_aes_256_ofb:
778 1.1 christos *cipher = &padlock_aes_256_ofb;
779 1.1 christos break;
780 1.1 christos
781 1.1 christos default:
782 1.1 christos /* Sorry, we don't support this NID */
783 1.1 christos *cipher = NULL;
784 1.1 christos return 0;
785 1.1 christos }
786 1.1 christos
787 1.1 christos return 1;
788 1.1 christos }
789 1.1 christos
790 1.1 christos /* Prepare the encryption key for PadLock usage */
791 1.1 christos static int
792 1.1 christos padlock_aes_init_key (EVP_CIPHER_CTX *ctx, const unsigned char *key,
793 1.1 christos const unsigned char *iv, int enc)
794 1.1 christos {
795 1.1 christos struct padlock_cipher_data *cdata;
796 1.1 christos int key_len = EVP_CIPHER_CTX_key_length(ctx) * 8;
797 1.1 christos
798 1.1 christos if (key==NULL) return 0; /* ERROR */
799 1.1 christos
800 1.1 christos cdata = ALIGNED_CIPHER_DATA(ctx);
801 1.1 christos memset(cdata, 0, sizeof(struct padlock_cipher_data));
802 1.1 christos
803 1.1 christos /* Prepare Control word. */
804 1.1 christos if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE)
805 1.1 christos cdata->cword.b.encdec = 0;
806 1.1 christos else
807 1.1 christos cdata->cword.b.encdec = (ctx->encrypt == 0);
808 1.1 christos cdata->cword.b.rounds = 10 + (key_len - 128) / 32;
809 1.1 christos cdata->cword.b.ksize = (key_len - 128) / 64;
810 1.1 christos
811 1.1 christos switch(key_len) {
812 1.1 christos case 128:
813 1.1 christos /* PadLock can generate an extended key for
814 1.1 christos AES128 in hardware */
815 1.1 christos memcpy(cdata->ks.rd_key, key, AES_KEY_SIZE_128);
816 1.1 christos cdata->cword.b.keygen = 0;
817 1.1 christos break;
818 1.1 christos
819 1.1 christos case 192:
820 1.1 christos case 256:
821 1.1 christos /* Generate an extended AES key in software.
822 1.1 christos Needed for AES192/AES256 */
823 1.1 christos /* Well, the above applies to Stepping 8 CPUs
824 1.1 christos and is listed as hardware errata. They most
825 1.1 christos likely will fix it at some point and then
826 1.1 christos a check for stepping would be due here. */
827 1.1 christos if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_CFB_MODE ||
828 1.1 christos EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE ||
829 1.1 christos enc)
830 1.1 christos AES_set_encrypt_key(key, key_len, &cdata->ks);
831 1.1 christos else
832 1.1 christos AES_set_decrypt_key(key, key_len, &cdata->ks);
833 1.1 christos #ifndef AES_ASM
834 1.1 christos /* OpenSSL C functions use byte-swapped extended key. */
835 1.1 christos padlock_bswapl(&cdata->ks);
836 1.1 christos #endif
837 1.1 christos cdata->cword.b.keygen = 1;
838 1.1 christos break;
839 1.1 christos
840 1.1 christos default:
841 1.1 christos /* ERROR */
842 1.1 christos return 0;
843 1.1 christos }
844 1.1 christos
845 1.1 christos /*
846 1.1 christos * This is done to cover for cases when user reuses the
847 1.1 christos * context for new key. The catch is that if we don't do
848 1.1 christos * this, padlock_eas_cipher might proceed with old key...
849 1.1 christos */
850 1.1 christos padlock_reload_key ();
851 1.1 christos
852 1.1 christos return 1;
853 1.1 christos }
854 1.1 christos
855 1.1 christos /*
856 1.1 christos * Simplified version of padlock_aes_cipher() used when
857 1.1 christos * 1) both input and output buffers are at aligned addresses.
858 1.1 christos * or when
859 1.1 christos * 2) running on a newer CPU that doesn't require aligned buffers.
860 1.1 christos */
861 1.1 christos static int
862 1.1 christos padlock_aes_cipher_omnivorous(EVP_CIPHER_CTX *ctx, unsigned char *out_arg,
863 1.1 christos const unsigned char *in_arg, size_t nbytes)
864 1.1 christos {
865 1.1 christos struct padlock_cipher_data *cdata;
866 1.1 christos void *iv;
867 1.1 christos
868 1.1 christos cdata = ALIGNED_CIPHER_DATA(ctx);
869 1.1 christos padlock_verify_context(cdata);
870 1.1 christos
871 1.1 christos switch (EVP_CIPHER_CTX_mode(ctx)) {
872 1.1 christos case EVP_CIPH_ECB_MODE:
873 1.1 christos padlock_xcrypt_ecb(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg);
874 1.1 christos break;
875 1.1 christos
876 1.1 christos case EVP_CIPH_CBC_MODE:
877 1.1 christos memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
878 1.1 christos iv = padlock_xcrypt_cbc(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg);
879 1.1 christos memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
880 1.1 christos break;
881 1.1 christos
882 1.1 christos case EVP_CIPH_CFB_MODE:
883 1.1 christos memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
884 1.1 christos iv = padlock_xcrypt_cfb(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg);
885 1.1 christos memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
886 1.1 christos break;
887 1.1 christos
888 1.1 christos case EVP_CIPH_OFB_MODE:
889 1.1 christos memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
890 1.1 christos padlock_xcrypt_ofb(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg);
891 1.1 christos memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE);
892 1.1 christos break;
893 1.1 christos
894 1.1 christos default:
895 1.1 christos return 0;
896 1.1 christos }
897 1.1 christos
898 1.1 christos memset(cdata->iv, 0, AES_BLOCK_SIZE);
899 1.1 christos
900 1.1 christos return 1;
901 1.1 christos }
902 1.1 christos
903 1.1 christos #ifndef PADLOCK_CHUNK
904 1.1 christos # define PADLOCK_CHUNK 512 /* Must be a power of 2 larger than 16 */
905 1.1 christos #endif
906 1.1 christos #if PADLOCK_CHUNK<16 || PADLOCK_CHUNK&(PADLOCK_CHUNK-1)
907 1.1 christos # error "insane PADLOCK_CHUNK..."
908 1.1 christos #endif
909 1.1 christos
910 1.1 christos /* Re-align the arguments to 16-Bytes boundaries and run the
911 1.1 christos encryption function itself. This function is not AES-specific. */
912 1.1 christos static int
913 1.1 christos padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out_arg,
914 1.1 christos const unsigned char *in_arg, size_t nbytes)
915 1.1 christos {
916 1.1 christos struct padlock_cipher_data *cdata;
917 1.1 christos const void *inp;
918 1.2 christos unsigned char *out, *tofree;
919 1.1 christos void *iv;
920 1.1 christos int inp_misaligned, out_misaligned, realign_in_loop;
921 1.1 christos size_t chunk, allocated=0;
922 1.1 christos
923 1.1 christos /* ctx->num is maintained in byte-oriented modes,
924 1.1 christos such as CFB and OFB... */
925 1.1 christos if ((chunk = ctx->num)) { /* borrow chunk variable */
926 1.1 christos unsigned char *ivp=ctx->iv;
927 1.1 christos
928 1.1 christos switch (EVP_CIPHER_CTX_mode(ctx)) {
929 1.1 christos case EVP_CIPH_CFB_MODE:
930 1.1 christos if (chunk >= AES_BLOCK_SIZE)
931 1.1 christos return 0; /* bogus value */
932 1.1 christos
933 1.1 christos if (ctx->encrypt)
934 1.1 christos while (chunk<AES_BLOCK_SIZE && nbytes!=0) {
935 1.1 christos ivp[chunk] = *(out_arg++) = *(in_arg++) ^ ivp[chunk];
936 1.1 christos chunk++, nbytes--;
937 1.1 christos }
938 1.1 christos else while (chunk<AES_BLOCK_SIZE && nbytes!=0) {
939 1.1 christos unsigned char c = *(in_arg++);
940 1.1 christos *(out_arg++) = c ^ ivp[chunk];
941 1.1 christos ivp[chunk++] = c, nbytes--;
942 1.1 christos }
943 1.1 christos
944 1.1 christos ctx->num = chunk%AES_BLOCK_SIZE;
945 1.1 christos break;
946 1.1 christos case EVP_CIPH_OFB_MODE:
947 1.1 christos if (chunk >= AES_BLOCK_SIZE)
948 1.1 christos return 0; /* bogus value */
949 1.1 christos
950 1.1 christos while (chunk<AES_BLOCK_SIZE && nbytes!=0) {
951 1.1 christos *(out_arg++) = *(in_arg++) ^ ivp[chunk];
952 1.1 christos chunk++, nbytes--;
953 1.1 christos }
954 1.1 christos
955 1.1 christos ctx->num = chunk%AES_BLOCK_SIZE;
956 1.1 christos break;
957 1.1 christos }
958 1.1 christos }
959 1.1 christos
960 1.1 christos if (nbytes == 0)
961 1.1 christos return 1;
962 1.1 christos #if 0
963 1.1 christos if (nbytes % AES_BLOCK_SIZE)
964 1.1 christos return 0; /* are we expected to do tail processing? */
965 1.1 christos #else
966 1.1 christos /* nbytes is always multiple of AES_BLOCK_SIZE in ECB and CBC
967 1.1 christos modes and arbitrary value in byte-oriented modes, such as
968 1.1 christos CFB and OFB... */
969 1.1 christos #endif
970 1.1 christos
971 1.1 christos /* VIA promises CPUs that won't require alignment in the future.
972 1.1 christos For now padlock_aes_align_required is initialized to 1 and
973 1.1 christos the condition is never met... */
974 1.1 christos /* C7 core is capable to manage unaligned input in non-ECB[!]
975 1.1 christos mode, but performance penalties appear to be approximately
976 1.1 christos same as for software alignment below or ~3x. They promise to
977 1.1 christos improve it in the future, but for now we can just as well
978 1.1 christos pretend that it can only handle aligned input... */
979 1.1 christos if (!padlock_aes_align_required && (nbytes%AES_BLOCK_SIZE)==0)
980 1.1 christos return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes);
981 1.1 christos
982 1.1 christos inp_misaligned = (((size_t)in_arg) & 0x0F);
983 1.1 christos out_misaligned = (((size_t)out_arg) & 0x0F);
984 1.1 christos
985 1.1 christos /* Note that even if output is aligned and input not,
986 1.1 christos * I still prefer to loop instead of copy the whole
987 1.1 christos * input and then encrypt in one stroke. This is done
988 1.1 christos * in order to improve L1 cache utilization... */
989 1.1 christos realign_in_loop = out_misaligned|inp_misaligned;
990 1.1 christos
991 1.1 christos if (!realign_in_loop && (nbytes%AES_BLOCK_SIZE)==0)
992 1.1 christos return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes);
993 1.1 christos
994 1.1 christos /* this takes one "if" out of the loops */
995 1.1 christos chunk = nbytes;
996 1.1 christos chunk %= PADLOCK_CHUNK;
997 1.1 christos if (chunk==0) chunk = PADLOCK_CHUNK;
998 1.1 christos
999 1.1 christos if (out_misaligned) {
1000 1.1 christos /* optmize for small input */
1001 1.1 christos allocated = (chunk<nbytes?PADLOCK_CHUNK:nbytes);
1002 1.2 christos tofree = malloc(0x10 + allocated);
1003 1.2 christos if (tofree == NULL)
1004 1.2 christos return 0;
1005 1.2 christos out = NEAREST_ALIGNED(tofree);
1006 1.1 christos }
1007 1.2 christos else {
1008 1.1 christos out = out_arg;
1009 1.2 christos tofree = NULL;
1010 1.2 christos }
1011 1.1 christos
1012 1.1 christos cdata = ALIGNED_CIPHER_DATA(ctx);
1013 1.1 christos padlock_verify_context(cdata);
1014 1.1 christos
1015 1.1 christos switch (EVP_CIPHER_CTX_mode(ctx)) {
1016 1.1 christos case EVP_CIPH_ECB_MODE:
1017 1.1 christos do {
1018 1.1 christos if (inp_misaligned)
1019 1.1 christos inp = padlock_memcpy(out, in_arg, chunk);
1020 1.1 christos else
1021 1.1 christos inp = in_arg;
1022 1.1 christos in_arg += chunk;
1023 1.1 christos
1024 1.1 christos padlock_xcrypt_ecb(chunk/AES_BLOCK_SIZE, cdata, out, inp);
1025 1.1 christos
1026 1.1 christos if (out_misaligned)
1027 1.1 christos out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
1028 1.1 christos else
1029 1.1 christos out = out_arg+=chunk;
1030 1.1 christos
1031 1.1 christos nbytes -= chunk;
1032 1.1 christos chunk = PADLOCK_CHUNK;
1033 1.1 christos } while (nbytes);
1034 1.1 christos break;
1035 1.1 christos
1036 1.1 christos case EVP_CIPH_CBC_MODE:
1037 1.1 christos memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
1038 1.1 christos goto cbc_shortcut;
1039 1.1 christos do {
1040 1.1 christos if (iv != cdata->iv)
1041 1.1 christos memcpy(cdata->iv, iv, AES_BLOCK_SIZE);
1042 1.1 christos chunk = PADLOCK_CHUNK;
1043 1.1 christos cbc_shortcut: /* optimize for small input */
1044 1.1 christos if (inp_misaligned)
1045 1.1 christos inp = padlock_memcpy(out, in_arg, chunk);
1046 1.1 christos else
1047 1.1 christos inp = in_arg;
1048 1.1 christos in_arg += chunk;
1049 1.1 christos
1050 1.1 christos iv = padlock_xcrypt_cbc(chunk/AES_BLOCK_SIZE, cdata, out, inp);
1051 1.1 christos
1052 1.1 christos if (out_misaligned)
1053 1.1 christos out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
1054 1.1 christos else
1055 1.1 christos out = out_arg+=chunk;
1056 1.1 christos
1057 1.1 christos } while (nbytes -= chunk);
1058 1.1 christos memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
1059 1.1 christos break;
1060 1.1 christos
1061 1.1 christos case EVP_CIPH_CFB_MODE:
1062 1.1 christos memcpy (iv = cdata->iv, ctx->iv, AES_BLOCK_SIZE);
1063 1.1 christos chunk &= ~(AES_BLOCK_SIZE-1);
1064 1.1 christos if (chunk) goto cfb_shortcut;
1065 1.1 christos else goto cfb_skiploop;
1066 1.1 christos do {
1067 1.1 christos if (iv != cdata->iv)
1068 1.1 christos memcpy(cdata->iv, iv, AES_BLOCK_SIZE);
1069 1.1 christos chunk = PADLOCK_CHUNK;
1070 1.1 christos cfb_shortcut: /* optimize for small input */
1071 1.1 christos if (inp_misaligned)
1072 1.1 christos inp = padlock_memcpy(out, in_arg, chunk);
1073 1.1 christos else
1074 1.1 christos inp = in_arg;
1075 1.1 christos in_arg += chunk;
1076 1.1 christos
1077 1.1 christos iv = padlock_xcrypt_cfb(chunk/AES_BLOCK_SIZE, cdata, out, inp);
1078 1.1 christos
1079 1.1 christos if (out_misaligned)
1080 1.1 christos out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
1081 1.1 christos else
1082 1.1 christos out = out_arg+=chunk;
1083 1.1 christos
1084 1.1 christos nbytes -= chunk;
1085 1.1 christos } while (nbytes >= AES_BLOCK_SIZE);
1086 1.1 christos
1087 1.1 christos cfb_skiploop:
1088 1.1 christos if (nbytes) {
1089 1.1 christos unsigned char *ivp = cdata->iv;
1090 1.1 christos
1091 1.1 christos if (iv != ivp) {
1092 1.1 christos memcpy(ivp, iv, AES_BLOCK_SIZE);
1093 1.1 christos iv = ivp;
1094 1.1 christos }
1095 1.1 christos ctx->num = nbytes;
1096 1.1 christos if (cdata->cword.b.encdec) {
1097 1.1 christos cdata->cword.b.encdec=0;
1098 1.1 christos padlock_reload_key();
1099 1.1 christos padlock_xcrypt_ecb(1,cdata,ivp,ivp);
1100 1.1 christos cdata->cword.b.encdec=1;
1101 1.1 christos padlock_reload_key();
1102 1.1 christos while(nbytes) {
1103 1.1 christos unsigned char c = *(in_arg++);
1104 1.1 christos *(out_arg++) = c ^ *ivp;
1105 1.1 christos *(ivp++) = c, nbytes--;
1106 1.1 christos }
1107 1.1 christos }
1108 1.1 christos else { padlock_reload_key();
1109 1.1 christos padlock_xcrypt_ecb(1,cdata,ivp,ivp);
1110 1.1 christos padlock_reload_key();
1111 1.1 christos while (nbytes) {
1112 1.1 christos *ivp = *(out_arg++) = *(in_arg++) ^ *ivp;
1113 1.1 christos ivp++, nbytes--;
1114 1.1 christos }
1115 1.1 christos }
1116 1.1 christos }
1117 1.1 christos
1118 1.1 christos memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
1119 1.1 christos break;
1120 1.1 christos
1121 1.1 christos case EVP_CIPH_OFB_MODE:
1122 1.1 christos memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
1123 1.1 christos chunk &= ~(AES_BLOCK_SIZE-1);
1124 1.1 christos if (chunk) do {
1125 1.1 christos if (inp_misaligned)
1126 1.1 christos inp = padlock_memcpy(out, in_arg, chunk);
1127 1.1 christos else
1128 1.1 christos inp = in_arg;
1129 1.1 christos in_arg += chunk;
1130 1.1 christos
1131 1.1 christos padlock_xcrypt_ofb(chunk/AES_BLOCK_SIZE, cdata, out, inp);
1132 1.1 christos
1133 1.1 christos if (out_misaligned)
1134 1.1 christos out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
1135 1.1 christos else
1136 1.1 christos out = out_arg+=chunk;
1137 1.1 christos
1138 1.1 christos nbytes -= chunk;
1139 1.1 christos chunk = PADLOCK_CHUNK;
1140 1.1 christos } while (nbytes >= AES_BLOCK_SIZE);
1141 1.1 christos
1142 1.1 christos if (nbytes) {
1143 1.1 christos unsigned char *ivp = cdata->iv;
1144 1.1 christos
1145 1.1 christos ctx->num = nbytes;
1146 1.1 christos padlock_reload_key(); /* empirically found */
1147 1.1 christos padlock_xcrypt_ecb(1,cdata,ivp,ivp);
1148 1.1 christos padlock_reload_key(); /* empirically found */
1149 1.1 christos while (nbytes) {
1150 1.1 christos *(out_arg++) = *(in_arg++) ^ *ivp;
1151 1.1 christos ivp++, nbytes--;
1152 1.1 christos }
1153 1.1 christos }
1154 1.1 christos
1155 1.1 christos memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE);
1156 1.1 christos break;
1157 1.1 christos
1158 1.1 christos default:
1159 1.2 christos free(tofree);
1160 1.1 christos return 0;
1161 1.1 christos }
1162 1.1 christos
1163 1.1 christos /* Clean the realign buffer if it was used */
1164 1.1 christos if (out_misaligned) {
1165 1.1 christos volatile unsigned long *p=(void *)out;
1166 1.1 christos size_t n = allocated/sizeof(*p);
1167 1.1 christos while (n--) *p++=0;
1168 1.1 christos }
1169 1.1 christos
1170 1.1 christos memset(cdata->iv, 0, AES_BLOCK_SIZE);
1171 1.2 christos free(tofree);
1172 1.1 christos
1173 1.1 christos return 1;
1174 1.1 christos }
1175 1.1 christos
1176 1.1 christos #endif /* OPENSSL_NO_AES */
1177 1.1 christos
1178 1.1 christos /* ===== Random Number Generator ===== */
1179 1.1 christos /*
1180 1.1 christos * This code is not engaged. The reason is that it does not comply
1181 1.1 christos * with recommendations for VIA RNG usage for secure applications
1182 1.1 christos * (posted at http://www.via.com.tw/en/viac3/c3.jsp) nor does it
1183 1.1 christos * provide meaningful error control...
1184 1.1 christos */
1185 1.1 christos /* Wrapper that provides an interface between the API and
1186 1.1 christos the raw PadLock RNG */
1187 1.1 christos static int
1188 1.1 christos padlock_rand_bytes(unsigned char *output, int count)
1189 1.1 christos {
1190 1.1 christos unsigned int eax, buf;
1191 1.1 christos
1192 1.1 christos while (count >= 8) {
1193 1.1 christos eax = padlock_xstore(output, 0);
1194 1.1 christos if (!(eax&(1<<6))) return 0; /* RNG disabled */
1195 1.1 christos /* this ---vv--- covers DC bias, Raw Bits and String Filter */
1196 1.1 christos if (eax&(0x1F<<10)) return 0;
1197 1.1 christos if ((eax&0x1F)==0) continue; /* no data, retry... */
1198 1.1 christos if ((eax&0x1F)!=8) return 0; /* fatal failure... */
1199 1.1 christos output += 8;
1200 1.1 christos count -= 8;
1201 1.1 christos }
1202 1.1 christos while (count > 0) {
1203 1.1 christos eax = padlock_xstore(&buf, 3);
1204 1.1 christos if (!(eax&(1<<6))) return 0; /* RNG disabled */
1205 1.1 christos /* this ---vv--- covers DC bias, Raw Bits and String Filter */
1206 1.1 christos if (eax&(0x1F<<10)) return 0;
1207 1.1 christos if ((eax&0x1F)==0) continue; /* no data, retry... */
1208 1.1 christos if ((eax&0x1F)!=1) return 0; /* fatal failure... */
1209 1.1 christos *output++ = (unsigned char)buf;
1210 1.1 christos count--;
1211 1.1 christos }
1212 1.1 christos *(volatile unsigned int *)&buf=0;
1213 1.1 christos
1214 1.1 christos return 1;
1215 1.1 christos }
1216 1.1 christos
1217 1.1 christos /* Dummy but necessary function */
1218 1.1 christos static int
1219 1.1 christos padlock_rand_status(void)
1220 1.1 christos {
1221 1.1 christos return 1;
1222 1.1 christos }
1223 1.1 christos
1224 1.1 christos /* Prepare structure for registration */
1225 1.1 christos static RAND_METHOD padlock_rand = {
1226 1.1 christos NULL, /* seed */
1227 1.1 christos padlock_rand_bytes, /* bytes */
1228 1.1 christos NULL, /* cleanup */
1229 1.1 christos NULL, /* add */
1230 1.1 christos padlock_rand_bytes, /* pseudorand */
1231 1.1 christos padlock_rand_status, /* rand status */
1232 1.1 christos };
1233 1.1 christos
1234 1.1 christos #else /* !COMPILE_HW_PADLOCK */
1235 1.1 christos #ifndef OPENSSL_NO_DYNAMIC_ENGINE
1236 1.1 christos OPENSSL_EXPORT
1237 1.2 christos int bind_engine(ENGINE *e, const char *id, const dynamic_fns *fns);
1238 1.2 christos OPENSSL_EXPORT
1239 1.1 christos int bind_engine(ENGINE *e, const char *id, const dynamic_fns *fns) { return 0; }
1240 1.1 christos IMPLEMENT_DYNAMIC_CHECK_FN()
1241 1.1 christos #endif
1242 1.1 christos #endif /* COMPILE_HW_PADLOCK */
1243 1.1 christos
1244 1.1 christos #endif /* !OPENSSL_NO_HW_PADLOCK */
1245 1.1 christos #endif /* !OPENSSL_NO_HW */
1246