Home | History | Annotate | Line # | Download | only in engines
e_padlock.c revision 1.2
      1  1.1  christos /*
      2  1.1  christos  * Support for VIA PadLock Advanced Cryptography Engine (ACE)
      3  1.1  christos  * Written by Michal Ludvig <michal (at) logix.cz>
      4  1.1  christos  *            http://www.logix.cz/michal
      5  1.1  christos  *
      6  1.1  christos  * Big thanks to Andy Polyakov for a help with optimization,
      7  1.1  christos  * assembler fixes, port to MS Windows and a lot of other
      8  1.1  christos  * valuable work on this engine!
      9  1.1  christos  */
     10  1.1  christos 
     11  1.1  christos /* ====================================================================
     12  1.1  christos  * Copyright (c) 1999-2001 The OpenSSL Project.  All rights reserved.
     13  1.1  christos  *
     14  1.1  christos  * Redistribution and use in source and binary forms, with or without
     15  1.1  christos  * modification, are permitted provided that the following conditions
     16  1.1  christos  * are met:
     17  1.1  christos  *
     18  1.1  christos  * 1. Redistributions of source code must retain the above copyright
     19  1.1  christos  *    notice, this list of conditions and the following disclaimer.
     20  1.1  christos  *
     21  1.1  christos  * 2. Redistributions in binary form must reproduce the above copyright
     22  1.1  christos  *    notice, this list of conditions and the following disclaimer in
     23  1.1  christos  *    the documentation and/or other materials provided with the
     24  1.1  christos  *    distribution.
     25  1.1  christos  *
     26  1.1  christos  * 3. All advertising materials mentioning features or use of this
     27  1.1  christos  *    software must display the following acknowledgment:
     28  1.1  christos  *    "This product includes software developed by the OpenSSL Project
     29  1.1  christos  *    for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
     30  1.1  christos  *
     31  1.1  christos  * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
     32  1.1  christos  *    endorse or promote products derived from this software without
     33  1.1  christos  *    prior written permission. For written permission, please contact
     34  1.1  christos  *    licensing (at) OpenSSL.org.
     35  1.1  christos  *
     36  1.1  christos  * 5. Products derived from this software may not be called "OpenSSL"
     37  1.1  christos  *    nor may "OpenSSL" appear in their names without prior written
     38  1.1  christos  *    permission of the OpenSSL Project.
     39  1.1  christos  *
     40  1.1  christos  * 6. Redistributions of any form whatsoever must retain the following
     41  1.1  christos  *    acknowledgment:
     42  1.1  christos  *    "This product includes software developed by the OpenSSL Project
     43  1.1  christos  *    for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
     44  1.1  christos  *
     45  1.1  christos  * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
     46  1.1  christos  * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     47  1.1  christos  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     48  1.1  christos  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
     49  1.1  christos  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     50  1.1  christos  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     51  1.1  christos  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     52  1.1  christos  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     53  1.1  christos  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
     54  1.1  christos  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     55  1.1  christos  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
     56  1.1  christos  * OF THE POSSIBILITY OF SUCH DAMAGE.
     57  1.1  christos  * ====================================================================
     58  1.1  christos  *
     59  1.1  christos  * This product includes cryptographic software written by Eric Young
     60  1.1  christos  * (eay (at) cryptsoft.com).  This product includes software written by Tim
     61  1.1  christos  * Hudson (tjh (at) cryptsoft.com).
     62  1.1  christos  *
     63  1.1  christos  */
     64  1.1  christos 
     65  1.1  christos 
     66  1.1  christos #include <stdio.h>
     67  1.1  christos #include <string.h>
     68  1.1  christos 
     69  1.1  christos #include <openssl/opensslconf.h>
     70  1.1  christos #include <openssl/crypto.h>
     71  1.1  christos #include <openssl/dso.h>
     72  1.1  christos #include <openssl/engine.h>
     73  1.1  christos #include <openssl/evp.h>
     74  1.1  christos #ifndef OPENSSL_NO_AES
     75  1.1  christos #include <openssl/aes.h>
     76  1.1  christos #endif
     77  1.1  christos #include <openssl/rand.h>
     78  1.1  christos #include <openssl/err.h>
     79  1.1  christos 
     80  1.1  christos #ifndef OPENSSL_NO_HW
     81  1.1  christos #ifndef OPENSSL_NO_HW_PADLOCK
     82  1.1  christos 
     83  1.1  christos /* Attempt to have a single source for both 0.9.7 and 0.9.8 :-) */
     84  1.1  christos #if (OPENSSL_VERSION_NUMBER >= 0x00908000L)
     85  1.1  christos #  ifndef OPENSSL_NO_DYNAMIC_ENGINE
     86  1.1  christos #    define DYNAMIC_ENGINE
     87  1.1  christos #  endif
     88  1.1  christos #elif (OPENSSL_VERSION_NUMBER >= 0x00907000L)
     89  1.1  christos #  ifdef ENGINE_DYNAMIC_SUPPORT
     90  1.1  christos #    define DYNAMIC_ENGINE
     91  1.1  christos #  endif
     92  1.1  christos #else
     93  1.1  christos #  error "Only OpenSSL >= 0.9.7 is supported"
     94  1.1  christos #endif
     95  1.1  christos 
     96  1.1  christos /* VIA PadLock AES is available *ONLY* on some x86 CPUs.
     97  1.1  christos    Not only that it doesn't exist elsewhere, but it
     98  1.1  christos    even can't be compiled on other platforms!
     99  1.1  christos 
    100  1.1  christos    In addition, because of the heavy use of inline assembler,
    101  1.1  christos    compiler choice is limited to GCC and Microsoft C. */
    102  1.1  christos #undef COMPILE_HW_PADLOCK
    103  1.1  christos #if !defined(I386_ONLY) && !defined(OPENSSL_NO_INLINE_ASM)
    104  1.2  christos # if (defined(__GNUC__) && (defined(__i386__) || defined(__i386))) || \
    105  1.1  christos      (defined(_MSC_VER) && defined(_M_IX86))
    106  1.1  christos #  define COMPILE_HW_PADLOCK
    107  1.2  christos # endif
    108  1.2  christos #endif
    109  1.2  christos 
    110  1.2  christos #ifdef OPENSSL_NO_DYNAMIC_ENGINE
    111  1.2  christos #ifdef COMPILE_HW_PADLOCK
    112  1.1  christos static ENGINE *ENGINE_padlock (void);
    113  1.1  christos #endif
    114  1.1  christos 
    115  1.1  christos void ENGINE_load_padlock (void)
    116  1.1  christos {
    117  1.1  christos /* On non-x86 CPUs it just returns. */
    118  1.1  christos #ifdef COMPILE_HW_PADLOCK
    119  1.1  christos 	ENGINE *toadd = ENGINE_padlock ();
    120  1.1  christos 	if (!toadd) return;
    121  1.1  christos 	ENGINE_add (toadd);
    122  1.1  christos 	ENGINE_free (toadd);
    123  1.1  christos 	ERR_clear_error ();
    124  1.1  christos #endif
    125  1.1  christos }
    126  1.1  christos 
    127  1.2  christos #endif
    128  1.2  christos 
    129  1.1  christos #ifdef COMPILE_HW_PADLOCK
    130  1.1  christos /* We do these includes here to avoid header problems on platforms that
    131  1.1  christos    do not have the VIA padlock anyway... */
    132  1.1  christos #include <stdlib.h>
    133  1.1  christos #ifdef _WIN32
    134  1.1  christos # include <malloc.h>
    135  1.1  christos # ifndef alloca
    136  1.1  christos #  define alloca _alloca
    137  1.1  christos # endif
    138  1.1  christos #elif defined(__GNUC__)
    139  1.1  christos # ifndef alloca
    140  1.2  christos #  define alloca(s) __builtin_alloca(s)
    141  1.1  christos # endif
    142  1.1  christos #endif
    143  1.1  christos 
    144  1.1  christos /* Function for ENGINE detection and control */
    145  1.1  christos static int padlock_available(void);
    146  1.1  christos static int padlock_init(ENGINE *e);
    147  1.1  christos 
    148  1.1  christos /* RNG Stuff */
    149  1.1  christos static RAND_METHOD padlock_rand;
    150  1.1  christos 
    151  1.1  christos /* Cipher Stuff */
    152  1.1  christos #ifndef OPENSSL_NO_AES
    153  1.1  christos static int padlock_ciphers(ENGINE *e, const EVP_CIPHER **cipher, const int **nids, int nid);
    154  1.1  christos #endif
    155  1.1  christos 
    156  1.1  christos /* Engine names */
    157  1.1  christos static const char *padlock_id = "padlock";
    158  1.1  christos static char padlock_name[100];
    159  1.1  christos 
    160  1.1  christos /* Available features */
    161  1.1  christos static int padlock_use_ace = 0;	/* Advanced Cryptography Engine */
    162  1.1  christos static int padlock_use_rng = 0;	/* Random Number Generator */
    163  1.1  christos #ifndef OPENSSL_NO_AES
    164  1.1  christos static int padlock_aes_align_required = 1;
    165  1.1  christos #endif
    166  1.1  christos 
    167  1.1  christos /* ===== Engine "management" functions ===== */
    168  1.1  christos 
    169  1.1  christos /* Prepare the ENGINE structure for registration */
    170  1.1  christos static int
    171  1.1  christos padlock_bind_helper(ENGINE *e)
    172  1.1  christos {
    173  1.1  christos 	/* Check available features */
    174  1.1  christos 	padlock_available();
    175  1.1  christos 
    176  1.1  christos #if 1	/* disable RNG for now, see commentary in vicinity of RNG code */
    177  1.1  christos 	padlock_use_rng=0;
    178  1.1  christos #endif
    179  1.1  christos 
    180  1.1  christos 	/* Generate a nice engine name with available features */
    181  1.1  christos 	BIO_snprintf(padlock_name, sizeof(padlock_name),
    182  1.1  christos 		"VIA PadLock (%s, %s)",
    183  1.1  christos 		 padlock_use_rng ? "RNG" : "no-RNG",
    184  1.1  christos 		 padlock_use_ace ? "ACE" : "no-ACE");
    185  1.1  christos 
    186  1.1  christos 	/* Register everything or return with an error */
    187  1.1  christos 	if (!ENGINE_set_id(e, padlock_id) ||
    188  1.1  christos 	    !ENGINE_set_name(e, padlock_name) ||
    189  1.1  christos 
    190  1.1  christos 	    !ENGINE_set_init_function(e, padlock_init) ||
    191  1.1  christos #ifndef OPENSSL_NO_AES
    192  1.1  christos 	    (padlock_use_ace && !ENGINE_set_ciphers (e, padlock_ciphers)) ||
    193  1.1  christos #endif
    194  1.1  christos 	    (padlock_use_rng && !ENGINE_set_RAND (e, &padlock_rand))) {
    195  1.1  christos 		return 0;
    196  1.1  christos 	}
    197  1.1  christos 
    198  1.1  christos 	/* Everything looks good */
    199  1.1  christos 	return 1;
    200  1.1  christos }
    201  1.1  christos 
    202  1.2  christos #ifdef OPENSSL_NO_DYNAMIC_ENGINE
    203  1.2  christos 
    204  1.1  christos /* Constructor */
    205  1.1  christos static ENGINE *
    206  1.1  christos ENGINE_padlock(void)
    207  1.1  christos {
    208  1.1  christos 	ENGINE *eng = ENGINE_new();
    209  1.1  christos 
    210  1.1  christos 	if (!eng) {
    211  1.1  christos 		return NULL;
    212  1.1  christos 	}
    213  1.1  christos 
    214  1.1  christos 	if (!padlock_bind_helper(eng)) {
    215  1.1  christos 		ENGINE_free(eng);
    216  1.1  christos 		return NULL;
    217  1.1  christos 	}
    218  1.1  christos 
    219  1.1  christos 	return eng;
    220  1.1  christos }
    221  1.1  christos 
    222  1.2  christos #endif
    223  1.2  christos 
    224  1.1  christos /* Check availability of the engine */
    225  1.1  christos static int
    226  1.1  christos padlock_init(ENGINE *e)
    227  1.1  christos {
    228  1.1  christos 	return (padlock_use_rng || padlock_use_ace);
    229  1.1  christos }
    230  1.1  christos 
    231  1.1  christos /* This stuff is needed if this ENGINE is being compiled into a self-contained
    232  1.1  christos  * shared-library.
    233  1.1  christos  */
    234  1.1  christos #ifdef DYNAMIC_ENGINE
    235  1.1  christos static int
    236  1.1  christos padlock_bind_fn(ENGINE *e, const char *id)
    237  1.1  christos {
    238  1.1  christos 	if (id && (strcmp(id, padlock_id) != 0)) {
    239  1.1  christos 		return 0;
    240  1.1  christos 	}
    241  1.1  christos 
    242  1.1  christos 	if (!padlock_bind_helper(e))  {
    243  1.1  christos 		return 0;
    244  1.1  christos 	}
    245  1.1  christos 
    246  1.1  christos 	return 1;
    247  1.1  christos }
    248  1.1  christos 
    249  1.1  christos IMPLEMENT_DYNAMIC_CHECK_FN()
    250  1.1  christos IMPLEMENT_DYNAMIC_BIND_FN (padlock_bind_fn)
    251  1.1  christos #endif /* DYNAMIC_ENGINE */
    252  1.1  christos 
    253  1.1  christos /* ===== Here comes the "real" engine ===== */
    254  1.1  christos 
    255  1.1  christos #ifndef OPENSSL_NO_AES
    256  1.1  christos /* Some AES-related constants */
    257  1.1  christos #define AES_BLOCK_SIZE		16
    258  1.1  christos #define AES_KEY_SIZE_128	16
    259  1.1  christos #define AES_KEY_SIZE_192	24
    260  1.1  christos #define AES_KEY_SIZE_256	32
    261  1.1  christos 
    262  1.1  christos /* Here we store the status information relevant to the
    263  1.1  christos    current context. */
    264  1.1  christos /* BIG FAT WARNING:
    265  1.1  christos  * 	Inline assembler in PADLOCK_XCRYPT_ASM()
    266  1.1  christos  * 	depends on the order of items in this structure.
    267  1.1  christos  * 	Don't blindly modify, reorder, etc!
    268  1.1  christos  */
    269  1.1  christos struct padlock_cipher_data
    270  1.1  christos {
    271  1.1  christos 	unsigned char iv[AES_BLOCK_SIZE];	/* Initialization vector */
    272  1.1  christos 	union {	unsigned int pad[4];
    273  1.1  christos 		struct {
    274  1.1  christos 			int rounds:4;
    275  1.1  christos 			int dgst:1;	/* n/a in C3 */
    276  1.1  christos 			int align:1;	/* n/a in C3 */
    277  1.1  christos 			int ciphr:1;	/* n/a in C3 */
    278  1.1  christos 			unsigned int keygen:1;
    279  1.1  christos 			int interm:1;
    280  1.1  christos 			unsigned int encdec:1;
    281  1.1  christos 			int ksize:2;
    282  1.1  christos 		} b;
    283  1.1  christos 	} cword;		/* Control word */
    284  1.1  christos 	AES_KEY ks;		/* Encryption key */
    285  1.1  christos };
    286  1.1  christos 
    287  1.1  christos /*
    288  1.1  christos  * Essentially this variable belongs in thread local storage.
    289  1.1  christos  * Having this variable global on the other hand can only cause
    290  1.1  christos  * few bogus key reloads [if any at all on single-CPU system],
    291  1.1  christos  * so we accept the penatly...
    292  1.1  christos  */
    293  1.1  christos static volatile struct padlock_cipher_data *padlock_saved_context;
    294  1.1  christos #endif
    295  1.1  christos 
    296  1.1  christos /*
    297  1.1  christos  * =======================================================
    298  1.1  christos  * Inline assembler section(s).
    299  1.1  christos  * =======================================================
    300  1.1  christos  * Order of arguments is chosen to facilitate Windows port
    301  1.1  christos  * using __fastcall calling convention. If you wish to add
    302  1.1  christos  * more routines, keep in mind that first __fastcall
    303  1.1  christos  * argument is passed in %ecx and second - in %edx.
    304  1.1  christos  * =======================================================
    305  1.1  christos  */
    306  1.1  christos #if defined(__GNUC__) && __GNUC__>=2
    307  1.1  christos /*
    308  1.1  christos  * As for excessive "push %ebx"/"pop %ebx" found all over.
    309  1.1  christos  * When generating position-independent code GCC won't let
    310  1.1  christos  * us use "b" in assembler templates nor even respect "ebx"
    311  1.1  christos  * in "clobber description." Therefore the trouble...
    312  1.1  christos  */
    313  1.1  christos 
    314  1.1  christos /* Helper function - check if a CPUID instruction
    315  1.1  christos    is available on this CPU */
    316  1.1  christos static int
    317  1.1  christos padlock_insn_cpuid_available(void)
    318  1.1  christos {
    319  1.1  christos 	int result = -1;
    320  1.1  christos 
    321  1.1  christos 	/* We're checking if the bit #21 of EFLAGS
    322  1.1  christos 	   can be toggled. If yes = CPUID is available. */
    323  1.1  christos 	asm volatile (
    324  1.1  christos 		"pushf\n"
    325  1.1  christos 		"popl %%eax\n"
    326  1.1  christos 		"xorl $0x200000, %%eax\n"
    327  1.1  christos 		"movl %%eax, %%ecx\n"
    328  1.1  christos 		"andl $0x200000, %%ecx\n"
    329  1.1  christos 		"pushl %%eax\n"
    330  1.1  christos 		"popf\n"
    331  1.1  christos 		"pushf\n"
    332  1.1  christos 		"popl %%eax\n"
    333  1.1  christos 		"andl $0x200000, %%eax\n"
    334  1.1  christos 		"xorl %%eax, %%ecx\n"
    335  1.1  christos 		"movl %%ecx, %0\n"
    336  1.1  christos 		: "=r" (result) : : "eax", "ecx");
    337  1.1  christos 
    338  1.1  christos 	return (result == 0);
    339  1.1  christos }
    340  1.1  christos 
    341  1.1  christos /* Load supported features of the CPU to see if
    342  1.1  christos    the PadLock is available. */
    343  1.1  christos static int
    344  1.1  christos padlock_available(void)
    345  1.1  christos {
    346  1.1  christos 	char vendor_string[16];
    347  1.1  christos 	unsigned int eax, edx;
    348  1.1  christos 
    349  1.1  christos 	/* First check if the CPUID instruction is available at all... */
    350  1.1  christos 	if (! padlock_insn_cpuid_available())
    351  1.1  christos 		return 0;
    352  1.1  christos 
    353  1.1  christos 	/* Are we running on the Centaur (VIA) CPU? */
    354  1.1  christos 	eax = 0x00000000;
    355  1.1  christos 	vendor_string[12] = 0;
    356  1.1  christos 	asm volatile (
    357  1.1  christos 		"pushl	%%ebx\n"
    358  1.1  christos 		"cpuid\n"
    359  1.1  christos 		"movl	%%ebx,(%%edi)\n"
    360  1.1  christos 		"movl	%%edx,4(%%edi)\n"
    361  1.1  christos 		"movl	%%ecx,8(%%edi)\n"
    362  1.1  christos 		"popl	%%ebx"
    363  1.1  christos 		: "+a"(eax) : "D"(vendor_string) : "ecx", "edx");
    364  1.1  christos 	if (strcmp(vendor_string, "CentaurHauls") != 0)
    365  1.1  christos 		return 0;
    366  1.1  christos 
    367  1.1  christos 	/* Check for Centaur Extended Feature Flags presence */
    368  1.1  christos 	eax = 0xC0000000;
    369  1.1  christos 	asm volatile ("pushl %%ebx; cpuid; popl	%%ebx"
    370  1.1  christos 		: "+a"(eax) : : "ecx", "edx");
    371  1.1  christos 	if (eax < 0xC0000001)
    372  1.1  christos 		return 0;
    373  1.1  christos 
    374  1.1  christos 	/* Read the Centaur Extended Feature Flags */
    375  1.1  christos 	eax = 0xC0000001;
    376  1.1  christos 	asm volatile ("pushl %%ebx; cpuid; popl %%ebx"
    377  1.1  christos 		: "+a"(eax), "=d"(edx) : : "ecx");
    378  1.1  christos 
    379  1.1  christos 	/* Fill up some flags */
    380  1.1  christos 	padlock_use_ace = ((edx & (0x3<<6)) == (0x3<<6));
    381  1.1  christos 	padlock_use_rng = ((edx & (0x3<<2)) == (0x3<<2));
    382  1.1  christos 
    383  1.1  christos 	return padlock_use_ace + padlock_use_rng;
    384  1.1  christos }
    385  1.1  christos 
    386  1.2  christos #ifndef OPENSSL_NO_AES
    387  1.2  christos /* Our own htonl()/ntohl() */
    388  1.2  christos static inline void
    389  1.2  christos padlock_bswapl(AES_KEY *ks)
    390  1.2  christos {
    391  1.2  christos 	size_t i = sizeof(ks->rd_key)/sizeof(ks->rd_key[0]);
    392  1.2  christos 	unsigned int *key = ks->rd_key;
    393  1.2  christos 
    394  1.2  christos 	while (i--) {
    395  1.2  christos 		asm volatile ("bswapl %0" : "+r"(*key));
    396  1.2  christos 		key++;
    397  1.2  christos 	}
    398  1.2  christos }
    399  1.2  christos #endif
    400  1.2  christos 
    401  1.1  christos /* Force key reload from memory to the CPU microcode.
    402  1.1  christos    Loading EFLAGS from the stack clears EFLAGS[30]
    403  1.1  christos    which does the trick. */
    404  1.1  christos static inline void
    405  1.1  christos padlock_reload_key(void)
    406  1.1  christos {
    407  1.1  christos 	asm volatile ("pushfl; popfl");
    408  1.1  christos }
    409  1.1  christos 
    410  1.1  christos #ifndef OPENSSL_NO_AES
    411  1.1  christos /*
    412  1.1  christos  * This is heuristic key context tracing. At first one
    413  1.1  christos  * believes that one should use atomic swap instructions,
    414  1.1  christos  * but it's not actually necessary. Point is that if
    415  1.1  christos  * padlock_saved_context was changed by another thread
    416  1.1  christos  * after we've read it and before we compare it with cdata,
    417  1.1  christos  * our key *shall* be reloaded upon thread context switch
    418  1.1  christos  * and we are therefore set in either case...
    419  1.1  christos  */
    420  1.1  christos static inline void
    421  1.1  christos padlock_verify_context(struct padlock_cipher_data *cdata)
    422  1.1  christos {
    423  1.1  christos 	asm volatile (
    424  1.1  christos 	"pushfl\n"
    425  1.1  christos "	btl	$30,(%%esp)\n"
    426  1.1  christos "	jnc	1f\n"
    427  1.1  christos "	cmpl	%2,%1\n"
    428  1.1  christos "	je	1f\n"
    429  1.1  christos "	popfl\n"
    430  1.1  christos "	subl	$4,%%esp\n"
    431  1.1  christos "1:	addl	$4,%%esp\n"
    432  1.1  christos "	movl	%2,%0"
    433  1.1  christos 	:"+m"(padlock_saved_context)
    434  1.1  christos 	: "r"(padlock_saved_context), "r"(cdata) : "cc");
    435  1.1  christos }
    436  1.1  christos 
    437  1.1  christos /* Template for padlock_xcrypt_* modes */
    438  1.1  christos /* BIG FAT WARNING:
    439  1.1  christos  * 	The offsets used with 'leal' instructions
    440  1.1  christos  * 	describe items of the 'padlock_cipher_data'
    441  1.1  christos  * 	structure.
    442  1.1  christos  */
    443  1.1  christos #define PADLOCK_XCRYPT_ASM(name,rep_xcrypt)	\
    444  1.1  christos static inline void *name(size_t cnt,		\
    445  1.1  christos 	struct padlock_cipher_data *cdata,	\
    446  1.1  christos 	void *out, const void *inp) 		\
    447  1.1  christos {	void *iv; 				\
    448  1.1  christos 	asm volatile ( "pushl	%%ebx\n"	\
    449  1.1  christos 		"	leal	16(%0),%%edx\n"	\
    450  1.1  christos 		"	leal	32(%0),%%ebx\n"	\
    451  1.1  christos 			rep_xcrypt "\n"		\
    452  1.1  christos 		"	popl	%%ebx"		\
    453  1.1  christos 		: "=a"(iv), "=c"(cnt), "=D"(out), "=S"(inp) \
    454  1.1  christos 		: "0"(cdata), "1"(cnt), "2"(out), "3"(inp)  \
    455  1.1  christos 		: "edx", "cc", "memory");	\
    456  1.1  christos 	return iv;				\
    457  1.1  christos }
    458  1.1  christos 
    459  1.1  christos /* Generate all functions with appropriate opcodes */
    460  1.1  christos PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb, ".byte 0xf3,0x0f,0xa7,0xc8")	/* rep xcryptecb */
    461  1.1  christos PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc, ".byte 0xf3,0x0f,0xa7,0xd0")	/* rep xcryptcbc */
    462  1.1  christos PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb, ".byte 0xf3,0x0f,0xa7,0xe0")	/* rep xcryptcfb */
    463  1.1  christos PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb, ".byte 0xf3,0x0f,0xa7,0xe8")	/* rep xcryptofb */
    464  1.1  christos #endif
    465  1.1  christos 
    466  1.1  christos /* The RNG call itself */
    467  1.1  christos static inline unsigned int
    468  1.1  christos padlock_xstore(void *addr, unsigned int edx_in)
    469  1.1  christos {
    470  1.1  christos 	unsigned int eax_out;
    471  1.1  christos 
    472  1.1  christos 	asm volatile (".byte 0x0f,0xa7,0xc0"	/* xstore */
    473  1.1  christos 	    : "=a"(eax_out),"=m"(*(unsigned *)addr)
    474  1.1  christos 	    : "D"(addr), "d" (edx_in)
    475  1.1  christos 	    );
    476  1.1  christos 
    477  1.1  christos 	return eax_out;
    478  1.1  christos }
    479  1.1  christos 
    480  1.1  christos /* Why not inline 'rep movsd'? I failed to find information on what
    481  1.1  christos  * value in Direction Flag one can expect and consequently have to
    482  1.1  christos  * apply "better-safe-than-sorry" approach and assume "undefined."
    483  1.1  christos  * I could explicitly clear it and restore the original value upon
    484  1.1  christos  * return from padlock_aes_cipher, but it's presumably too much
    485  1.1  christos  * trouble for too little gain...
    486  1.1  christos  *
    487  1.1  christos  * In case you wonder 'rep xcrypt*' instructions above are *not*
    488  1.1  christos  * affected by the Direction Flag and pointers advance toward
    489  1.1  christos  * larger addresses unconditionally.
    490  1.1  christos  */
    491  1.1  christos static inline unsigned char *
    492  1.1  christos padlock_memcpy(void *dst,const void *src,size_t n)
    493  1.1  christos {
    494  1.2  christos 	long       *d=dst;
    495  1.2  christos 	const long *s=src;
    496  1.1  christos 
    497  1.1  christos 	n /= sizeof(*d);
    498  1.1  christos 	do { *d++ = *s++; } while (--n);
    499  1.1  christos 
    500  1.1  christos 	return dst;
    501  1.1  christos }
    502  1.1  christos 
    503  1.1  christos #elif defined(_MSC_VER)
    504  1.1  christos /*
    505  1.1  christos  * Unlike GCC these are real functions. In order to minimize impact
    506  1.1  christos  * on performance we adhere to __fastcall calling convention in
    507  1.1  christos  * order to get two first arguments passed through %ecx and %edx.
    508  1.1  christos  * Which kind of suits very well, as instructions in question use
    509  1.1  christos  * both %ecx and %edx as input:-)
    510  1.1  christos  */
    511  1.1  christos #define REP_XCRYPT(code)		\
    512  1.1  christos 	_asm _emit 0xf3			\
    513  1.1  christos 	_asm _emit 0x0f _asm _emit 0xa7	\
    514  1.1  christos 	_asm _emit code
    515  1.1  christos 
    516  1.1  christos /* BIG FAT WARNING:
    517  1.1  christos  * 	The offsets used with 'lea' instructions
    518  1.1  christos  * 	describe items of the 'padlock_cipher_data'
    519  1.1  christos  * 	structure.
    520  1.1  christos  */
    521  1.1  christos #define PADLOCK_XCRYPT_ASM(name,code)	\
    522  1.1  christos static void * __fastcall 		\
    523  1.1  christos 	name (size_t cnt, void *cdata,	\
    524  1.1  christos 	void *outp, const void *inp)	\
    525  1.1  christos {	_asm	mov	eax,edx		\
    526  1.1  christos 	_asm	lea	edx,[eax+16]	\
    527  1.1  christos 	_asm	lea	ebx,[eax+32]	\
    528  1.1  christos 	_asm	mov	edi,outp	\
    529  1.1  christos 	_asm	mov	esi,inp		\
    530  1.1  christos 	REP_XCRYPT(code)		\
    531  1.1  christos }
    532  1.1  christos 
    533  1.1  christos PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb,0xc8)
    534  1.1  christos PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc,0xd0)
    535  1.1  christos PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb,0xe0)
    536  1.1  christos PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb,0xe8)
    537  1.1  christos 
    538  1.1  christos static int __fastcall
    539  1.1  christos padlock_xstore(void *outp,unsigned int code)
    540  1.1  christos {	_asm	mov	edi,ecx
    541  1.1  christos 	_asm _emit 0x0f _asm _emit 0xa7 _asm _emit 0xc0
    542  1.1  christos }
    543  1.1  christos 
    544  1.1  christos static void __fastcall
    545  1.1  christos padlock_reload_key(void)
    546  1.1  christos {	_asm pushfd _asm popfd		}
    547  1.1  christos 
    548  1.1  christos static void __fastcall
    549  1.1  christos padlock_verify_context(void *cdata)
    550  1.1  christos {	_asm	{
    551  1.1  christos 		pushfd
    552  1.1  christos 		bt	DWORD PTR[esp],30
    553  1.1  christos 		jnc	skip
    554  1.1  christos 		cmp	ecx,padlock_saved_context
    555  1.1  christos 		je	skip
    556  1.1  christos 		popfd
    557  1.1  christos 		sub	esp,4
    558  1.1  christos 	skip:	add	esp,4
    559  1.1  christos 		mov	padlock_saved_context,ecx
    560  1.1  christos 		}
    561  1.1  christos }
    562  1.1  christos 
    563  1.1  christos static int
    564  1.1  christos padlock_available(void)
    565  1.1  christos {	_asm	{
    566  1.1  christos 		pushfd
    567  1.1  christos 		pop	eax
    568  1.1  christos 		mov	ecx,eax
    569  1.1  christos 		xor	eax,1<<21
    570  1.1  christos 		push	eax
    571  1.1  christos 		popfd
    572  1.1  christos 		pushfd
    573  1.1  christos 		pop	eax
    574  1.1  christos 		xor	eax,ecx
    575  1.1  christos 		bt	eax,21
    576  1.1  christos 		jnc	noluck
    577  1.1  christos 		mov	eax,0
    578  1.1  christos 		cpuid
    579  1.1  christos 		xor	eax,eax
    580  1.1  christos 		cmp	ebx,'tneC'
    581  1.1  christos 		jne	noluck
    582  1.1  christos 		cmp	edx,'Hrua'
    583  1.1  christos 		jne	noluck
    584  1.1  christos 		cmp	ecx,'slua'
    585  1.1  christos 		jne	noluck
    586  1.1  christos 		mov	eax,0xC0000000
    587  1.1  christos 		cpuid
    588  1.1  christos 		mov	edx,eax
    589  1.1  christos 		xor	eax,eax
    590  1.1  christos 		cmp	edx,0xC0000001
    591  1.1  christos 		jb	noluck
    592  1.1  christos 		mov	eax,0xC0000001
    593  1.1  christos 		cpuid
    594  1.1  christos 		xor	eax,eax
    595  1.1  christos 		bt	edx,6
    596  1.1  christos 		jnc	skip_a
    597  1.1  christos 		bt	edx,7
    598  1.1  christos 		jnc	skip_a
    599  1.1  christos 		mov	padlock_use_ace,1
    600  1.1  christos 		inc	eax
    601  1.1  christos 	skip_a:	bt	edx,2
    602  1.1  christos 		jnc	skip_r
    603  1.1  christos 		bt	edx,3
    604  1.1  christos 		jnc	skip_r
    605  1.1  christos 		mov	padlock_use_rng,1
    606  1.1  christos 		inc	eax
    607  1.1  christos 	skip_r:
    608  1.1  christos 	noluck:
    609  1.1  christos 		}
    610  1.1  christos }
    611  1.1  christos 
    612  1.1  christos static void __fastcall
    613  1.1  christos padlock_bswapl(void *key)
    614  1.1  christos {	_asm	{
    615  1.1  christos 		pushfd
    616  1.1  christos 		cld
    617  1.1  christos 		mov	esi,ecx
    618  1.1  christos 		mov	edi,ecx
    619  1.1  christos 		mov	ecx,60
    620  1.1  christos 	up:	lodsd
    621  1.1  christos 		bswap	eax
    622  1.1  christos 		stosd
    623  1.1  christos 		loop	up
    624  1.1  christos 		popfd
    625  1.1  christos 		}
    626  1.1  christos }
    627  1.1  christos 
    628  1.1  christos /* MS actually specifies status of Direction Flag and compiler even
    629  1.1  christos  * manages to compile following as 'rep movsd' all by itself...
    630  1.1  christos  */
    631  1.1  christos #define padlock_memcpy(o,i,n) ((unsigned char *)memcpy((o),(i),(n)&~3U))
    632  1.1  christos #endif
    633  1.1  christos 
    634  1.1  christos /* ===== AES encryption/decryption ===== */
    635  1.1  christos #ifndef OPENSSL_NO_AES
    636  1.1  christos 
    637  1.1  christos #if defined(NID_aes_128_cfb128) && ! defined (NID_aes_128_cfb)
    638  1.1  christos #define NID_aes_128_cfb	NID_aes_128_cfb128
    639  1.1  christos #endif
    640  1.1  christos 
    641  1.1  christos #if defined(NID_aes_128_ofb128) && ! defined (NID_aes_128_ofb)
    642  1.1  christos #define NID_aes_128_ofb	NID_aes_128_ofb128
    643  1.1  christos #endif
    644  1.1  christos 
    645  1.1  christos #if defined(NID_aes_192_cfb128) && ! defined (NID_aes_192_cfb)
    646  1.1  christos #define NID_aes_192_cfb	NID_aes_192_cfb128
    647  1.1  christos #endif
    648  1.1  christos 
    649  1.1  christos #if defined(NID_aes_192_ofb128) && ! defined (NID_aes_192_ofb)
    650  1.1  christos #define NID_aes_192_ofb	NID_aes_192_ofb128
    651  1.1  christos #endif
    652  1.1  christos 
    653  1.1  christos #if defined(NID_aes_256_cfb128) && ! defined (NID_aes_256_cfb)
    654  1.1  christos #define NID_aes_256_cfb	NID_aes_256_cfb128
    655  1.1  christos #endif
    656  1.1  christos 
    657  1.1  christos #if defined(NID_aes_256_ofb128) && ! defined (NID_aes_256_ofb)
    658  1.1  christos #define NID_aes_256_ofb	NID_aes_256_ofb128
    659  1.1  christos #endif
    660  1.1  christos 
    661  1.1  christos /* List of supported ciphers. */
    662  1.1  christos static int padlock_cipher_nids[] = {
    663  1.1  christos 	NID_aes_128_ecb,
    664  1.1  christos 	NID_aes_128_cbc,
    665  1.1  christos 	NID_aes_128_cfb,
    666  1.1  christos 	NID_aes_128_ofb,
    667  1.1  christos 
    668  1.1  christos 	NID_aes_192_ecb,
    669  1.1  christos 	NID_aes_192_cbc,
    670  1.1  christos 	NID_aes_192_cfb,
    671  1.1  christos 	NID_aes_192_ofb,
    672  1.1  christos 
    673  1.1  christos 	NID_aes_256_ecb,
    674  1.1  christos 	NID_aes_256_cbc,
    675  1.1  christos 	NID_aes_256_cfb,
    676  1.1  christos 	NID_aes_256_ofb,
    677  1.1  christos };
    678  1.1  christos static int padlock_cipher_nids_num = (sizeof(padlock_cipher_nids)/
    679  1.1  christos 				      sizeof(padlock_cipher_nids[0]));
    680  1.1  christos 
    681  1.1  christos /* Function prototypes ... */
    682  1.1  christos static int padlock_aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
    683  1.1  christos 				const unsigned char *iv, int enc);
    684  1.1  christos static int padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
    685  1.1  christos 			      const unsigned char *in, size_t nbytes);
    686  1.1  christos 
    687  1.1  christos #define NEAREST_ALIGNED(ptr) ( (unsigned char *)(ptr) +		\
    688  1.1  christos 	( (0x10 - ((size_t)(ptr) & 0x0F)) & 0x0F )	)
    689  1.1  christos #define ALIGNED_CIPHER_DATA(ctx) ((struct padlock_cipher_data *)\
    690  1.1  christos 	NEAREST_ALIGNED(ctx->cipher_data))
    691  1.1  christos 
    692  1.1  christos #define EVP_CIPHER_block_size_ECB	AES_BLOCK_SIZE
    693  1.1  christos #define EVP_CIPHER_block_size_CBC	AES_BLOCK_SIZE
    694  1.1  christos #define EVP_CIPHER_block_size_OFB	1
    695  1.1  christos #define EVP_CIPHER_block_size_CFB	1
    696  1.1  christos 
    697  1.1  christos /* Declaring so many ciphers by hand would be a pain.
    698  1.1  christos    Instead introduce a bit of preprocessor magic :-) */
    699  1.1  christos #define	DECLARE_AES_EVP(ksize,lmode,umode)	\
    700  1.1  christos static const EVP_CIPHER padlock_aes_##ksize##_##lmode = {	\
    701  1.1  christos 	NID_aes_##ksize##_##lmode,		\
    702  1.1  christos 	EVP_CIPHER_block_size_##umode,	\
    703  1.1  christos 	AES_KEY_SIZE_##ksize,		\
    704  1.1  christos 	AES_BLOCK_SIZE,			\
    705  1.1  christos 	0 | EVP_CIPH_##umode##_MODE,	\
    706  1.1  christos 	padlock_aes_init_key,		\
    707  1.1  christos 	padlock_aes_cipher,		\
    708  1.1  christos 	NULL,				\
    709  1.1  christos 	sizeof(struct padlock_cipher_data) + 16,	\
    710  1.1  christos 	EVP_CIPHER_set_asn1_iv,		\
    711  1.1  christos 	EVP_CIPHER_get_asn1_iv,		\
    712  1.1  christos 	NULL,				\
    713  1.1  christos 	NULL				\
    714  1.1  christos }
    715  1.1  christos 
    716  1.1  christos DECLARE_AES_EVP(128,ecb,ECB);
    717  1.1  christos DECLARE_AES_EVP(128,cbc,CBC);
    718  1.1  christos DECLARE_AES_EVP(128,cfb,CFB);
    719  1.1  christos DECLARE_AES_EVP(128,ofb,OFB);
    720  1.1  christos 
    721  1.1  christos DECLARE_AES_EVP(192,ecb,ECB);
    722  1.1  christos DECLARE_AES_EVP(192,cbc,CBC);
    723  1.1  christos DECLARE_AES_EVP(192,cfb,CFB);
    724  1.1  christos DECLARE_AES_EVP(192,ofb,OFB);
    725  1.1  christos 
    726  1.1  christos DECLARE_AES_EVP(256,ecb,ECB);
    727  1.1  christos DECLARE_AES_EVP(256,cbc,CBC);
    728  1.1  christos DECLARE_AES_EVP(256,cfb,CFB);
    729  1.1  christos DECLARE_AES_EVP(256,ofb,OFB);
    730  1.1  christos 
    731  1.1  christos static int
    732  1.1  christos padlock_ciphers (ENGINE *e, const EVP_CIPHER **cipher, const int **nids, int nid)
    733  1.1  christos {
    734  1.1  christos 	/* No specific cipher => return a list of supported nids ... */
    735  1.1  christos 	if (!cipher) {
    736  1.1  christos 		*nids = padlock_cipher_nids;
    737  1.1  christos 		return padlock_cipher_nids_num;
    738  1.1  christos 	}
    739  1.1  christos 
    740  1.1  christos 	/* ... or the requested "cipher" otherwise */
    741  1.1  christos 	switch (nid) {
    742  1.1  christos 	  case NID_aes_128_ecb:
    743  1.1  christos 	    *cipher = &padlock_aes_128_ecb;
    744  1.1  christos 	    break;
    745  1.1  christos 	  case NID_aes_128_cbc:
    746  1.1  christos 	    *cipher = &padlock_aes_128_cbc;
    747  1.1  christos 	    break;
    748  1.1  christos 	  case NID_aes_128_cfb:
    749  1.1  christos 	    *cipher = &padlock_aes_128_cfb;
    750  1.1  christos 	    break;
    751  1.1  christos 	  case NID_aes_128_ofb:
    752  1.1  christos 	    *cipher = &padlock_aes_128_ofb;
    753  1.1  christos 	    break;
    754  1.1  christos 
    755  1.1  christos 	  case NID_aes_192_ecb:
    756  1.1  christos 	    *cipher = &padlock_aes_192_ecb;
    757  1.1  christos 	    break;
    758  1.1  christos 	  case NID_aes_192_cbc:
    759  1.1  christos 	    *cipher = &padlock_aes_192_cbc;
    760  1.1  christos 	    break;
    761  1.1  christos 	  case NID_aes_192_cfb:
    762  1.1  christos 	    *cipher = &padlock_aes_192_cfb;
    763  1.1  christos 	    break;
    764  1.1  christos 	  case NID_aes_192_ofb:
    765  1.1  christos 	    *cipher = &padlock_aes_192_ofb;
    766  1.1  christos 	    break;
    767  1.1  christos 
    768  1.1  christos 	  case NID_aes_256_ecb:
    769  1.1  christos 	    *cipher = &padlock_aes_256_ecb;
    770  1.1  christos 	    break;
    771  1.1  christos 	  case NID_aes_256_cbc:
    772  1.1  christos 	    *cipher = &padlock_aes_256_cbc;
    773  1.1  christos 	    break;
    774  1.1  christos 	  case NID_aes_256_cfb:
    775  1.1  christos 	    *cipher = &padlock_aes_256_cfb;
    776  1.1  christos 	    break;
    777  1.1  christos 	  case NID_aes_256_ofb:
    778  1.1  christos 	    *cipher = &padlock_aes_256_ofb;
    779  1.1  christos 	    break;
    780  1.1  christos 
    781  1.1  christos 	  default:
    782  1.1  christos 	    /* Sorry, we don't support this NID */
    783  1.1  christos 	    *cipher = NULL;
    784  1.1  christos 	    return 0;
    785  1.1  christos 	}
    786  1.1  christos 
    787  1.1  christos 	return 1;
    788  1.1  christos }
    789  1.1  christos 
    790  1.1  christos /* Prepare the encryption key for PadLock usage */
    791  1.1  christos static int
    792  1.1  christos padlock_aes_init_key (EVP_CIPHER_CTX *ctx, const unsigned char *key,
    793  1.1  christos 		      const unsigned char *iv, int enc)
    794  1.1  christos {
    795  1.1  christos 	struct padlock_cipher_data *cdata;
    796  1.1  christos 	int key_len = EVP_CIPHER_CTX_key_length(ctx) * 8;
    797  1.1  christos 
    798  1.1  christos 	if (key==NULL) return 0;	/* ERROR */
    799  1.1  christos 
    800  1.1  christos 	cdata = ALIGNED_CIPHER_DATA(ctx);
    801  1.1  christos 	memset(cdata, 0, sizeof(struct padlock_cipher_data));
    802  1.1  christos 
    803  1.1  christos 	/* Prepare Control word. */
    804  1.1  christos 	if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE)
    805  1.1  christos 		cdata->cword.b.encdec = 0;
    806  1.1  christos 	else
    807  1.1  christos 		cdata->cword.b.encdec = (ctx->encrypt == 0);
    808  1.1  christos 	cdata->cword.b.rounds = 10 + (key_len - 128) / 32;
    809  1.1  christos 	cdata->cword.b.ksize = (key_len - 128) / 64;
    810  1.1  christos 
    811  1.1  christos 	switch(key_len) {
    812  1.1  christos 		case 128:
    813  1.1  christos 			/* PadLock can generate an extended key for
    814  1.1  christos 			   AES128 in hardware */
    815  1.1  christos 			memcpy(cdata->ks.rd_key, key, AES_KEY_SIZE_128);
    816  1.1  christos 			cdata->cword.b.keygen = 0;
    817  1.1  christos 			break;
    818  1.1  christos 
    819  1.1  christos 		case 192:
    820  1.1  christos 		case 256:
    821  1.1  christos 			/* Generate an extended AES key in software.
    822  1.1  christos 			   Needed for AES192/AES256 */
    823  1.1  christos 			/* Well, the above applies to Stepping 8 CPUs
    824  1.1  christos 			   and is listed as hardware errata. They most
    825  1.1  christos 			   likely will fix it at some point and then
    826  1.1  christos 			   a check for stepping would be due here. */
    827  1.1  christos 			if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_CFB_MODE ||
    828  1.1  christos 			    EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE ||
    829  1.1  christos 			    enc)
    830  1.1  christos 				AES_set_encrypt_key(key, key_len, &cdata->ks);
    831  1.1  christos 			else
    832  1.1  christos 				AES_set_decrypt_key(key, key_len, &cdata->ks);
    833  1.1  christos #ifndef AES_ASM
    834  1.1  christos 			/* OpenSSL C functions use byte-swapped extended key. */
    835  1.1  christos 			padlock_bswapl(&cdata->ks);
    836  1.1  christos #endif
    837  1.1  christos 			cdata->cword.b.keygen = 1;
    838  1.1  christos 			break;
    839  1.1  christos 
    840  1.1  christos 		default:
    841  1.1  christos 			/* ERROR */
    842  1.1  christos 			return 0;
    843  1.1  christos 	}
    844  1.1  christos 
    845  1.1  christos 	/*
    846  1.1  christos 	 * This is done to cover for cases when user reuses the
    847  1.1  christos 	 * context for new key. The catch is that if we don't do
    848  1.1  christos 	 * this, padlock_eas_cipher might proceed with old key...
    849  1.1  christos 	 */
    850  1.1  christos 	padlock_reload_key ();
    851  1.1  christos 
    852  1.1  christos 	return 1;
    853  1.1  christos }
    854  1.1  christos 
    855  1.1  christos /*
    856  1.1  christos  * Simplified version of padlock_aes_cipher() used when
    857  1.1  christos  * 1) both input and output buffers are at aligned addresses.
    858  1.1  christos  * or when
    859  1.1  christos  * 2) running on a newer CPU that doesn't require aligned buffers.
    860  1.1  christos  */
    861  1.1  christos static int
    862  1.1  christos padlock_aes_cipher_omnivorous(EVP_CIPHER_CTX *ctx, unsigned char *out_arg,
    863  1.1  christos 		const unsigned char *in_arg, size_t nbytes)
    864  1.1  christos {
    865  1.1  christos 	struct padlock_cipher_data *cdata;
    866  1.1  christos 	void  *iv;
    867  1.1  christos 
    868  1.1  christos 	cdata = ALIGNED_CIPHER_DATA(ctx);
    869  1.1  christos 	padlock_verify_context(cdata);
    870  1.1  christos 
    871  1.1  christos 	switch (EVP_CIPHER_CTX_mode(ctx)) {
    872  1.1  christos 	case EVP_CIPH_ECB_MODE:
    873  1.1  christos 		padlock_xcrypt_ecb(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg);
    874  1.1  christos 		break;
    875  1.1  christos 
    876  1.1  christos 	case EVP_CIPH_CBC_MODE:
    877  1.1  christos 		memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
    878  1.1  christos 		iv = padlock_xcrypt_cbc(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg);
    879  1.1  christos 		memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
    880  1.1  christos 		break;
    881  1.1  christos 
    882  1.1  christos 	case EVP_CIPH_CFB_MODE:
    883  1.1  christos 		memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
    884  1.1  christos 		iv = padlock_xcrypt_cfb(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg);
    885  1.1  christos 		memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
    886  1.1  christos 		break;
    887  1.1  christos 
    888  1.1  christos 	case EVP_CIPH_OFB_MODE:
    889  1.1  christos 		memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
    890  1.1  christos 		padlock_xcrypt_ofb(nbytes/AES_BLOCK_SIZE, cdata, out_arg, in_arg);
    891  1.1  christos 		memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE);
    892  1.1  christos 		break;
    893  1.1  christos 
    894  1.1  christos 	default:
    895  1.1  christos 		return 0;
    896  1.1  christos 	}
    897  1.1  christos 
    898  1.1  christos 	memset(cdata->iv, 0, AES_BLOCK_SIZE);
    899  1.1  christos 
    900  1.1  christos 	return 1;
    901  1.1  christos }
    902  1.1  christos 
    903  1.1  christos #ifndef  PADLOCK_CHUNK
    904  1.1  christos # define PADLOCK_CHUNK	512	/* Must be a power of 2 larger than 16 */
    905  1.1  christos #endif
    906  1.1  christos #if PADLOCK_CHUNK<16 || PADLOCK_CHUNK&(PADLOCK_CHUNK-1)
    907  1.1  christos # error "insane PADLOCK_CHUNK..."
    908  1.1  christos #endif
    909  1.1  christos 
    910  1.1  christos /* Re-align the arguments to 16-Bytes boundaries and run the
    911  1.1  christos    encryption function itself. This function is not AES-specific. */
    912  1.1  christos static int
    913  1.1  christos padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out_arg,
    914  1.1  christos 		   const unsigned char *in_arg, size_t nbytes)
    915  1.1  christos {
    916  1.1  christos 	struct padlock_cipher_data *cdata;
    917  1.1  christos 	const  void *inp;
    918  1.2  christos 	unsigned char  *out, *tofree;
    919  1.1  christos 	void  *iv;
    920  1.1  christos 	int    inp_misaligned, out_misaligned, realign_in_loop;
    921  1.1  christos 	size_t chunk, allocated=0;
    922  1.1  christos 
    923  1.1  christos 	/* ctx->num is maintained in byte-oriented modes,
    924  1.1  christos 	   such as CFB and OFB... */
    925  1.1  christos 	if ((chunk = ctx->num)) { /* borrow chunk variable */
    926  1.1  christos 		unsigned char *ivp=ctx->iv;
    927  1.1  christos 
    928  1.1  christos 		switch (EVP_CIPHER_CTX_mode(ctx)) {
    929  1.1  christos 		case EVP_CIPH_CFB_MODE:
    930  1.1  christos 			if (chunk >= AES_BLOCK_SIZE)
    931  1.1  christos 				return 0; /* bogus value */
    932  1.1  christos 
    933  1.1  christos 			if (ctx->encrypt)
    934  1.1  christos 				while (chunk<AES_BLOCK_SIZE && nbytes!=0) {
    935  1.1  christos 					ivp[chunk] = *(out_arg++) = *(in_arg++) ^ ivp[chunk];
    936  1.1  christos 					chunk++, nbytes--;
    937  1.1  christos 				}
    938  1.1  christos 			else	while (chunk<AES_BLOCK_SIZE && nbytes!=0) {
    939  1.1  christos 					unsigned char c = *(in_arg++);
    940  1.1  christos 					*(out_arg++) = c ^ ivp[chunk];
    941  1.1  christos 					ivp[chunk++] = c, nbytes--;
    942  1.1  christos 				}
    943  1.1  christos 
    944  1.1  christos 			ctx->num = chunk%AES_BLOCK_SIZE;
    945  1.1  christos 			break;
    946  1.1  christos 		case EVP_CIPH_OFB_MODE:
    947  1.1  christos 			if (chunk >= AES_BLOCK_SIZE)
    948  1.1  christos 				return 0; /* bogus value */
    949  1.1  christos 
    950  1.1  christos 			while (chunk<AES_BLOCK_SIZE && nbytes!=0) {
    951  1.1  christos 				*(out_arg++) = *(in_arg++) ^ ivp[chunk];
    952  1.1  christos 				chunk++, nbytes--;
    953  1.1  christos 			}
    954  1.1  christos 
    955  1.1  christos 			ctx->num = chunk%AES_BLOCK_SIZE;
    956  1.1  christos 			break;
    957  1.1  christos 		}
    958  1.1  christos 	}
    959  1.1  christos 
    960  1.1  christos 	if (nbytes == 0)
    961  1.1  christos 		return 1;
    962  1.1  christos #if 0
    963  1.1  christos 	if (nbytes % AES_BLOCK_SIZE)
    964  1.1  christos 		return 0; /* are we expected to do tail processing? */
    965  1.1  christos #else
    966  1.1  christos 	/* nbytes is always multiple of AES_BLOCK_SIZE in ECB and CBC
    967  1.1  christos 	   modes and arbitrary value in byte-oriented modes, such as
    968  1.1  christos 	   CFB and OFB... */
    969  1.1  christos #endif
    970  1.1  christos 
    971  1.1  christos 	/* VIA promises CPUs that won't require alignment in the future.
    972  1.1  christos 	   For now padlock_aes_align_required is initialized to 1 and
    973  1.1  christos 	   the condition is never met... */
    974  1.1  christos 	/* C7 core is capable to manage unaligned input in non-ECB[!]
    975  1.1  christos 	   mode, but performance penalties appear to be approximately
    976  1.1  christos 	   same as for software alignment below or ~3x. They promise to
    977  1.1  christos 	   improve it in the future, but for now we can just as well
    978  1.1  christos 	   pretend that it can only handle aligned input... */
    979  1.1  christos 	if (!padlock_aes_align_required && (nbytes%AES_BLOCK_SIZE)==0)
    980  1.1  christos 		return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes);
    981  1.1  christos 
    982  1.1  christos 	inp_misaligned = (((size_t)in_arg) & 0x0F);
    983  1.1  christos 	out_misaligned = (((size_t)out_arg) & 0x0F);
    984  1.1  christos 
    985  1.1  christos 	/* Note that even if output is aligned and input not,
    986  1.1  christos 	 * I still prefer to loop instead of copy the whole
    987  1.1  christos 	 * input and then encrypt in one stroke. This is done
    988  1.1  christos 	 * in order to improve L1 cache utilization... */
    989  1.1  christos 	realign_in_loop = out_misaligned|inp_misaligned;
    990  1.1  christos 
    991  1.1  christos 	if (!realign_in_loop && (nbytes%AES_BLOCK_SIZE)==0)
    992  1.1  christos 		return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes);
    993  1.1  christos 
    994  1.1  christos 	/* this takes one "if" out of the loops */
    995  1.1  christos 	chunk  = nbytes;
    996  1.1  christos 	chunk %= PADLOCK_CHUNK;
    997  1.1  christos 	if (chunk==0) chunk = PADLOCK_CHUNK;
    998  1.1  christos 
    999  1.1  christos 	if (out_misaligned) {
   1000  1.1  christos 		/* optmize for small input */
   1001  1.1  christos 		allocated = (chunk<nbytes?PADLOCK_CHUNK:nbytes);
   1002  1.2  christos 		tofree = malloc(0x10 + allocated);
   1003  1.2  christos 		if (tofree == NULL)
   1004  1.2  christos 			return 0;
   1005  1.2  christos 		out = NEAREST_ALIGNED(tofree);
   1006  1.1  christos 	}
   1007  1.2  christos 	else {
   1008  1.1  christos 		out = out_arg;
   1009  1.2  christos 		tofree = NULL;
   1010  1.2  christos 	}
   1011  1.1  christos 
   1012  1.1  christos 	cdata = ALIGNED_CIPHER_DATA(ctx);
   1013  1.1  christos 	padlock_verify_context(cdata);
   1014  1.1  christos 
   1015  1.1  christos 	switch (EVP_CIPHER_CTX_mode(ctx)) {
   1016  1.1  christos 	case EVP_CIPH_ECB_MODE:
   1017  1.1  christos 		do	{
   1018  1.1  christos 			if (inp_misaligned)
   1019  1.1  christos 				inp = padlock_memcpy(out, in_arg, chunk);
   1020  1.1  christos 			else
   1021  1.1  christos 				inp = in_arg;
   1022  1.1  christos 			in_arg += chunk;
   1023  1.1  christos 
   1024  1.1  christos 			padlock_xcrypt_ecb(chunk/AES_BLOCK_SIZE, cdata, out, inp);
   1025  1.1  christos 
   1026  1.1  christos 			if (out_misaligned)
   1027  1.1  christos 				out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
   1028  1.1  christos 			else
   1029  1.1  christos 				out     = out_arg+=chunk;
   1030  1.1  christos 
   1031  1.1  christos 			nbytes -= chunk;
   1032  1.1  christos 			chunk   = PADLOCK_CHUNK;
   1033  1.1  christos 		} while (nbytes);
   1034  1.1  christos 		break;
   1035  1.1  christos 
   1036  1.1  christos 	case EVP_CIPH_CBC_MODE:
   1037  1.1  christos 		memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
   1038  1.1  christos 		goto cbc_shortcut;
   1039  1.1  christos 		do	{
   1040  1.1  christos 			if (iv != cdata->iv)
   1041  1.1  christos 				memcpy(cdata->iv, iv, AES_BLOCK_SIZE);
   1042  1.1  christos 			chunk = PADLOCK_CHUNK;
   1043  1.1  christos 		cbc_shortcut: /* optimize for small input */
   1044  1.1  christos 			if (inp_misaligned)
   1045  1.1  christos 				inp = padlock_memcpy(out, in_arg, chunk);
   1046  1.1  christos 			else
   1047  1.1  christos 				inp = in_arg;
   1048  1.1  christos 			in_arg += chunk;
   1049  1.1  christos 
   1050  1.1  christos 			iv = padlock_xcrypt_cbc(chunk/AES_BLOCK_SIZE, cdata, out, inp);
   1051  1.1  christos 
   1052  1.1  christos 			if (out_misaligned)
   1053  1.1  christos 				out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
   1054  1.1  christos 			else
   1055  1.1  christos 				out     = out_arg+=chunk;
   1056  1.1  christos 
   1057  1.1  christos 		} while (nbytes -= chunk);
   1058  1.1  christos 		memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
   1059  1.1  christos 		break;
   1060  1.1  christos 
   1061  1.1  christos 	case EVP_CIPH_CFB_MODE:
   1062  1.1  christos 		memcpy (iv = cdata->iv, ctx->iv, AES_BLOCK_SIZE);
   1063  1.1  christos 		chunk &= ~(AES_BLOCK_SIZE-1);
   1064  1.1  christos 		if (chunk)	goto cfb_shortcut;
   1065  1.1  christos 		else		goto cfb_skiploop;
   1066  1.1  christos 		do	{
   1067  1.1  christos 			if (iv != cdata->iv)
   1068  1.1  christos 				memcpy(cdata->iv, iv, AES_BLOCK_SIZE);
   1069  1.1  christos 			chunk = PADLOCK_CHUNK;
   1070  1.1  christos 		cfb_shortcut: /* optimize for small input */
   1071  1.1  christos 			if (inp_misaligned)
   1072  1.1  christos 				inp = padlock_memcpy(out, in_arg, chunk);
   1073  1.1  christos 			else
   1074  1.1  christos 				inp = in_arg;
   1075  1.1  christos 			in_arg += chunk;
   1076  1.1  christos 
   1077  1.1  christos 			iv = padlock_xcrypt_cfb(chunk/AES_BLOCK_SIZE, cdata, out, inp);
   1078  1.1  christos 
   1079  1.1  christos 			if (out_misaligned)
   1080  1.1  christos 				out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
   1081  1.1  christos 			else
   1082  1.1  christos 				out     = out_arg+=chunk;
   1083  1.1  christos 
   1084  1.1  christos 			nbytes -= chunk;
   1085  1.1  christos 		} while (nbytes >= AES_BLOCK_SIZE);
   1086  1.1  christos 
   1087  1.1  christos 		cfb_skiploop:
   1088  1.1  christos 		if (nbytes) {
   1089  1.1  christos 			unsigned char *ivp = cdata->iv;
   1090  1.1  christos 
   1091  1.1  christos 			if (iv != ivp) {
   1092  1.1  christos 				memcpy(ivp, iv, AES_BLOCK_SIZE);
   1093  1.1  christos 				iv = ivp;
   1094  1.1  christos 			}
   1095  1.1  christos 			ctx->num = nbytes;
   1096  1.1  christos 			if (cdata->cword.b.encdec) {
   1097  1.1  christos 				cdata->cword.b.encdec=0;
   1098  1.1  christos 				padlock_reload_key();
   1099  1.1  christos 				padlock_xcrypt_ecb(1,cdata,ivp,ivp);
   1100  1.1  christos 				cdata->cword.b.encdec=1;
   1101  1.1  christos 				padlock_reload_key();
   1102  1.1  christos 				while(nbytes) {
   1103  1.1  christos 					unsigned char c = *(in_arg++);
   1104  1.1  christos 					*(out_arg++) = c ^ *ivp;
   1105  1.1  christos 					*(ivp++) = c, nbytes--;
   1106  1.1  christos 				}
   1107  1.1  christos 			}
   1108  1.1  christos 			else {	padlock_reload_key();
   1109  1.1  christos 				padlock_xcrypt_ecb(1,cdata,ivp,ivp);
   1110  1.1  christos 				padlock_reload_key();
   1111  1.1  christos 				while (nbytes) {
   1112  1.1  christos 					*ivp = *(out_arg++) = *(in_arg++) ^ *ivp;
   1113  1.1  christos 					ivp++, nbytes--;
   1114  1.1  christos 				}
   1115  1.1  christos 			}
   1116  1.1  christos 		}
   1117  1.1  christos 
   1118  1.1  christos 		memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
   1119  1.1  christos 		break;
   1120  1.1  christos 
   1121  1.1  christos 	case EVP_CIPH_OFB_MODE:
   1122  1.1  christos 		memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
   1123  1.1  christos 		chunk &= ~(AES_BLOCK_SIZE-1);
   1124  1.1  christos 		if (chunk) do	{
   1125  1.1  christos 			if (inp_misaligned)
   1126  1.1  christos 				inp = padlock_memcpy(out, in_arg, chunk);
   1127  1.1  christos 			else
   1128  1.1  christos 				inp = in_arg;
   1129  1.1  christos 			in_arg += chunk;
   1130  1.1  christos 
   1131  1.1  christos 			padlock_xcrypt_ofb(chunk/AES_BLOCK_SIZE, cdata, out, inp);
   1132  1.1  christos 
   1133  1.1  christos 			if (out_misaligned)
   1134  1.1  christos 				out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
   1135  1.1  christos 			else
   1136  1.1  christos 				out     = out_arg+=chunk;
   1137  1.1  christos 
   1138  1.1  christos 			nbytes -= chunk;
   1139  1.1  christos 			chunk   = PADLOCK_CHUNK;
   1140  1.1  christos 		} while (nbytes >= AES_BLOCK_SIZE);
   1141  1.1  christos 
   1142  1.1  christos 		if (nbytes) {
   1143  1.1  christos 			unsigned char *ivp = cdata->iv;
   1144  1.1  christos 
   1145  1.1  christos 			ctx->num = nbytes;
   1146  1.1  christos 			padlock_reload_key();	/* empirically found */
   1147  1.1  christos 			padlock_xcrypt_ecb(1,cdata,ivp,ivp);
   1148  1.1  christos 			padlock_reload_key();	/* empirically found */
   1149  1.1  christos 			while (nbytes) {
   1150  1.1  christos 				*(out_arg++) = *(in_arg++) ^ *ivp;
   1151  1.1  christos 				ivp++, nbytes--;
   1152  1.1  christos 			}
   1153  1.1  christos 		}
   1154  1.1  christos 
   1155  1.1  christos 		memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE);
   1156  1.1  christos 		break;
   1157  1.1  christos 
   1158  1.1  christos 	default:
   1159  1.2  christos 		free(tofree);
   1160  1.1  christos 		return 0;
   1161  1.1  christos 	}
   1162  1.1  christos 
   1163  1.1  christos 	/* Clean the realign buffer if it was used */
   1164  1.1  christos 	if (out_misaligned) {
   1165  1.1  christos 		volatile unsigned long *p=(void *)out;
   1166  1.1  christos 		size_t   n = allocated/sizeof(*p);
   1167  1.1  christos 		while (n--) *p++=0;
   1168  1.1  christos 	}
   1169  1.1  christos 
   1170  1.1  christos 	memset(cdata->iv, 0, AES_BLOCK_SIZE);
   1171  1.2  christos 	free(tofree);
   1172  1.1  christos 
   1173  1.1  christos 	return 1;
   1174  1.1  christos }
   1175  1.1  christos 
   1176  1.1  christos #endif /* OPENSSL_NO_AES */
   1177  1.1  christos 
   1178  1.1  christos /* ===== Random Number Generator ===== */
   1179  1.1  christos /*
   1180  1.1  christos  * This code is not engaged. The reason is that it does not comply
   1181  1.1  christos  * with recommendations for VIA RNG usage for secure applications
   1182  1.1  christos  * (posted at http://www.via.com.tw/en/viac3/c3.jsp) nor does it
   1183  1.1  christos  * provide meaningful error control...
   1184  1.1  christos  */
   1185  1.1  christos /* Wrapper that provides an interface between the API and
   1186  1.1  christos    the raw PadLock RNG */
   1187  1.1  christos static int
   1188  1.1  christos padlock_rand_bytes(unsigned char *output, int count)
   1189  1.1  christos {
   1190  1.1  christos 	unsigned int eax, buf;
   1191  1.1  christos 
   1192  1.1  christos 	while (count >= 8) {
   1193  1.1  christos 		eax = padlock_xstore(output, 0);
   1194  1.1  christos 		if (!(eax&(1<<6)))	return 0; /* RNG disabled */
   1195  1.1  christos 		/* this ---vv--- covers DC bias, Raw Bits and String Filter */
   1196  1.1  christos 		if (eax&(0x1F<<10))	return 0;
   1197  1.1  christos 		if ((eax&0x1F)==0)	continue; /* no data, retry... */
   1198  1.1  christos 		if ((eax&0x1F)!=8)	return 0; /* fatal failure...  */
   1199  1.1  christos 		output += 8;
   1200  1.1  christos 		count  -= 8;
   1201  1.1  christos 	}
   1202  1.1  christos 	while (count > 0) {
   1203  1.1  christos 		eax = padlock_xstore(&buf, 3);
   1204  1.1  christos 		if (!(eax&(1<<6)))	return 0; /* RNG disabled */
   1205  1.1  christos 		/* this ---vv--- covers DC bias, Raw Bits and String Filter */
   1206  1.1  christos 		if (eax&(0x1F<<10))	return 0;
   1207  1.1  christos 		if ((eax&0x1F)==0)	continue; /* no data, retry... */
   1208  1.1  christos 		if ((eax&0x1F)!=1)	return 0; /* fatal failure...  */
   1209  1.1  christos 		*output++ = (unsigned char)buf;
   1210  1.1  christos 		count--;
   1211  1.1  christos 	}
   1212  1.1  christos 	*(volatile unsigned int *)&buf=0;
   1213  1.1  christos 
   1214  1.1  christos 	return 1;
   1215  1.1  christos }
   1216  1.1  christos 
   1217  1.1  christos /* Dummy but necessary function */
   1218  1.1  christos static int
   1219  1.1  christos padlock_rand_status(void)
   1220  1.1  christos {
   1221  1.1  christos 	return 1;
   1222  1.1  christos }
   1223  1.1  christos 
   1224  1.1  christos /* Prepare structure for registration */
   1225  1.1  christos static RAND_METHOD padlock_rand = {
   1226  1.1  christos 	NULL,			/* seed */
   1227  1.1  christos 	padlock_rand_bytes,	/* bytes */
   1228  1.1  christos 	NULL,			/* cleanup */
   1229  1.1  christos 	NULL,			/* add */
   1230  1.1  christos 	padlock_rand_bytes,	/* pseudorand */
   1231  1.1  christos 	padlock_rand_status,	/* rand status */
   1232  1.1  christos };
   1233  1.1  christos 
   1234  1.1  christos #else  /* !COMPILE_HW_PADLOCK */
   1235  1.1  christos #ifndef OPENSSL_NO_DYNAMIC_ENGINE
   1236  1.1  christos OPENSSL_EXPORT
   1237  1.2  christos int bind_engine(ENGINE *e, const char *id, const dynamic_fns *fns);
   1238  1.2  christos OPENSSL_EXPORT
   1239  1.1  christos int bind_engine(ENGINE *e, const char *id, const dynamic_fns *fns) { return 0; }
   1240  1.1  christos IMPLEMENT_DYNAMIC_CHECK_FN()
   1241  1.1  christos #endif
   1242  1.1  christos #endif /* COMPILE_HW_PADLOCK */
   1243  1.1  christos 
   1244  1.1  christos #endif /* !OPENSSL_NO_HW_PADLOCK */
   1245  1.1  christos #endif /* !OPENSSL_NO_HW */
   1246