Home | History | Annotate | Line # | Download | only in engines
e_padlock.c revision 1.1.1.1
      1  1.1  christos /*-
      2  1.1  christos  * Support for VIA PadLock Advanced Cryptography Engine (ACE)
      3  1.1  christos  * Written by Michal Ludvig <michal (at) logix.cz>
      4  1.1  christos  *            http://www.logix.cz/michal
      5  1.1  christos  *
      6  1.1  christos  * Big thanks to Andy Polyakov for a help with optimization,
      7  1.1  christos  * assembler fixes, port to MS Windows and a lot of other
      8  1.1  christos  * valuable work on this engine!
      9  1.1  christos  */
     10  1.1  christos 
     11  1.1  christos /* ====================================================================
     12  1.1  christos  * Copyright (c) 1999-2001 The OpenSSL Project.  All rights reserved.
     13  1.1  christos  *
     14  1.1  christos  * Redistribution and use in source and binary forms, with or without
     15  1.1  christos  * modification, are permitted provided that the following conditions
     16  1.1  christos  * are met:
     17  1.1  christos  *
     18  1.1  christos  * 1. Redistributions of source code must retain the above copyright
     19  1.1  christos  *    notice, this list of conditions and the following disclaimer.
     20  1.1  christos  *
     21  1.1  christos  * 2. Redistributions in binary form must reproduce the above copyright
     22  1.1  christos  *    notice, this list of conditions and the following disclaimer in
     23  1.1  christos  *    the documentation and/or other materials provided with the
     24  1.1  christos  *    distribution.
     25  1.1  christos  *
     26  1.1  christos  * 3. All advertising materials mentioning features or use of this
     27  1.1  christos  *    software must display the following acknowledgment:
     28  1.1  christos  *    "This product includes software developed by the OpenSSL Project
     29  1.1  christos  *    for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
     30  1.1  christos  *
     31  1.1  christos  * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
     32  1.1  christos  *    endorse or promote products derived from this software without
     33  1.1  christos  *    prior written permission. For written permission, please contact
     34  1.1  christos  *    licensing (at) OpenSSL.org.
     35  1.1  christos  *
     36  1.1  christos  * 5. Products derived from this software may not be called "OpenSSL"
     37  1.1  christos  *    nor may "OpenSSL" appear in their names without prior written
     38  1.1  christos  *    permission of the OpenSSL Project.
     39  1.1  christos  *
     40  1.1  christos  * 6. Redistributions of any form whatsoever must retain the following
     41  1.1  christos  *    acknowledgment:
     42  1.1  christos  *    "This product includes software developed by the OpenSSL Project
     43  1.1  christos  *    for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
     44  1.1  christos  *
     45  1.1  christos  * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
     46  1.1  christos  * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     47  1.1  christos  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     48  1.1  christos  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
     49  1.1  christos  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     50  1.1  christos  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     51  1.1  christos  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     52  1.1  christos  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     53  1.1  christos  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
     54  1.1  christos  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     55  1.1  christos  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
     56  1.1  christos  * OF THE POSSIBILITY OF SUCH DAMAGE.
     57  1.1  christos  * ====================================================================
     58  1.1  christos  *
     59  1.1  christos  * This product includes cryptographic software written by Eric Young
     60  1.1  christos  * (eay (at) cryptsoft.com).  This product includes software written by Tim
     61  1.1  christos  * Hudson (tjh (at) cryptsoft.com).
     62  1.1  christos  *
     63  1.1  christos  */
     64  1.1  christos 
     65  1.1  christos #include <stdio.h>
     66  1.1  christos #include <string.h>
     67  1.1  christos 
     68  1.1  christos #include <openssl/opensslconf.h>
     69  1.1  christos #include <openssl/crypto.h>
     70  1.1  christos #include <openssl/dso.h>
     71  1.1  christos #include <openssl/engine.h>
     72  1.1  christos #include <openssl/evp.h>
     73  1.1  christos #ifndef OPENSSL_NO_AES
     74  1.1  christos # include <openssl/aes.h>
     75  1.1  christos #endif
     76  1.1  christos #include <openssl/rand.h>
     77  1.1  christos #include <openssl/err.h>
     78  1.1  christos 
     79  1.1  christos #ifndef OPENSSL_NO_HW
     80  1.1  christos # ifndef OPENSSL_NO_HW_PADLOCK
     81  1.1  christos 
     82  1.1  christos /* Attempt to have a single source for both 0.9.7 and 0.9.8 :-) */
     83  1.1  christos #  if (OPENSSL_VERSION_NUMBER >= 0x00908000L)
     84  1.1  christos #   ifndef OPENSSL_NO_DYNAMIC_ENGINE
     85  1.1  christos #    define DYNAMIC_ENGINE
     86  1.1  christos #   endif
     87  1.1  christos #  elif (OPENSSL_VERSION_NUMBER >= 0x00907000L)
     88  1.1  christos #   ifdef ENGINE_DYNAMIC_SUPPORT
     89  1.1  christos #    define DYNAMIC_ENGINE
     90  1.1  christos #   endif
     91  1.1  christos #  else
     92  1.1  christos #   error "Only OpenSSL >= 0.9.7 is supported"
     93  1.1  christos #  endif
     94  1.1  christos 
     95  1.1  christos /*
     96  1.1  christos  * VIA PadLock AES is available *ONLY* on some x86 CPUs. Not only that it
     97  1.1  christos  * doesn't exist elsewhere, but it even can't be compiled on other platforms!
     98  1.1  christos  *
     99  1.1  christos  * In addition, because of the heavy use of inline assembler, compiler choice
    100  1.1  christos  * is limited to GCC and Microsoft C.
    101  1.1  christos  */
    102  1.1  christos #  undef COMPILE_HW_PADLOCK
    103  1.1  christos #  if !defined(I386_ONLY) && !defined(OPENSSL_NO_INLINE_ASM)
    104  1.1  christos #   if (defined(__GNUC__) && (defined(__i386__) || defined(__i386))) || \
    105  1.1  christos      (defined(_MSC_VER) && defined(_M_IX86))
    106  1.1  christos #    define COMPILE_HW_PADLOCK
    107  1.1  christos #   endif
    108  1.1  christos #  endif
    109  1.1  christos 
    110  1.1  christos #  ifdef OPENSSL_NO_DYNAMIC_ENGINE
    111  1.1  christos #   ifdef COMPILE_HW_PADLOCK
    112  1.1  christos static ENGINE *ENGINE_padlock(void);
    113  1.1  christos #   endif
    114  1.1  christos 
    115  1.1  christos void ENGINE_load_padlock(void)
    116  1.1  christos {
    117  1.1  christos /* On non-x86 CPUs it just returns. */
    118  1.1  christos #   ifdef COMPILE_HW_PADLOCK
    119  1.1  christos     ENGINE *toadd = ENGINE_padlock();
    120  1.1  christos     if (!toadd)
    121  1.1  christos         return;
    122  1.1  christos     ENGINE_add(toadd);
    123  1.1  christos     ENGINE_free(toadd);
    124  1.1  christos     ERR_clear_error();
    125  1.1  christos #   endif
    126  1.1  christos }
    127  1.1  christos 
    128  1.1  christos #  endif
    129  1.1  christos 
    130  1.1  christos #  ifdef COMPILE_HW_PADLOCK
    131  1.1  christos /*
    132  1.1  christos  * We do these includes here to avoid header problems on platforms that do
    133  1.1  christos  * not have the VIA padlock anyway...
    134  1.1  christos  */
    135  1.1  christos #   include <stdlib.h>
    136  1.1  christos #   ifdef _WIN32
    137  1.1  christos #    include <malloc.h>
    138  1.1  christos #    ifndef alloca
    139  1.1  christos #     define alloca _alloca
    140  1.1  christos #    endif
    141  1.1  christos #   elif defined(__GNUC__)
    142  1.1  christos #    ifndef alloca
    143  1.1  christos #     define alloca(s) __builtin_alloca(s)
    144  1.1  christos #    endif
    145  1.1  christos #   endif
    146  1.1  christos 
    147  1.1  christos /* Function for ENGINE detection and control */
    148  1.1  christos static int padlock_available(void);
    149  1.1  christos static int padlock_init(ENGINE *e);
    150  1.1  christos 
    151  1.1  christos /* RNG Stuff */
    152  1.1  christos static RAND_METHOD padlock_rand;
    153  1.1  christos 
    154  1.1  christos /* Cipher Stuff */
    155  1.1  christos #   ifndef OPENSSL_NO_AES
    156  1.1  christos static int padlock_ciphers(ENGINE *e, const EVP_CIPHER **cipher,
    157  1.1  christos                            const int **nids, int nid);
    158  1.1  christos #   endif
    159  1.1  christos 
    160  1.1  christos /* Engine names */
    161  1.1  christos static const char *padlock_id = "padlock";
    162  1.1  christos static char padlock_name[100];
    163  1.1  christos 
    164  1.1  christos /* Available features */
    165  1.1  christos static int padlock_use_ace = 0; /* Advanced Cryptography Engine */
    166  1.1  christos static int padlock_use_rng = 0; /* Random Number Generator */
    167  1.1  christos #   ifndef OPENSSL_NO_AES
    168  1.1  christos static int padlock_aes_align_required = 1;
    169  1.1  christos #   endif
    170  1.1  christos 
    171  1.1  christos /* ===== Engine "management" functions ===== */
    172  1.1  christos 
    173  1.1  christos /* Prepare the ENGINE structure for registration */
    174  1.1  christos static int padlock_bind_helper(ENGINE *e)
    175  1.1  christos {
    176  1.1  christos     /* Check available features */
    177  1.1  christos     padlock_available();
    178  1.1  christos 
    179  1.1  christos #   if 1                        /* disable RNG for now, see commentary in
    180  1.1  christos                                  * vicinity of RNG code */
    181  1.1  christos     padlock_use_rng = 0;
    182  1.1  christos #   endif
    183  1.1  christos 
    184  1.1  christos     /* Generate a nice engine name with available features */
    185  1.1  christos     BIO_snprintf(padlock_name, sizeof(padlock_name),
    186  1.1  christos                  "VIA PadLock (%s, %s)",
    187  1.1  christos                  padlock_use_rng ? "RNG" : "no-RNG",
    188  1.1  christos                  padlock_use_ace ? "ACE" : "no-ACE");
    189  1.1  christos 
    190  1.1  christos     /* Register everything or return with an error */
    191  1.1  christos     if (!ENGINE_set_id(e, padlock_id) ||
    192  1.1  christos         !ENGINE_set_name(e, padlock_name) ||
    193  1.1  christos         !ENGINE_set_init_function(e, padlock_init) ||
    194  1.1  christos #   ifndef OPENSSL_NO_AES
    195  1.1  christos         (padlock_use_ace && !ENGINE_set_ciphers(e, padlock_ciphers)) ||
    196  1.1  christos #   endif
    197  1.1  christos         (padlock_use_rng && !ENGINE_set_RAND(e, &padlock_rand))) {
    198  1.1  christos         return 0;
    199  1.1  christos     }
    200  1.1  christos 
    201  1.1  christos     /* Everything looks good */
    202  1.1  christos     return 1;
    203  1.1  christos }
    204  1.1  christos 
    205  1.1  christos #   ifdef OPENSSL_NO_DYNAMIC_ENGINE
    206  1.1  christos 
    207  1.1  christos /* Constructor */
    208  1.1  christos static ENGINE *ENGINE_padlock(void)
    209  1.1  christos {
    210  1.1  christos     ENGINE *eng = ENGINE_new();
    211  1.1  christos 
    212  1.1  christos     if (!eng) {
    213  1.1  christos         return NULL;
    214  1.1  christos     }
    215  1.1  christos 
    216  1.1  christos     if (!padlock_bind_helper(eng)) {
    217  1.1  christos         ENGINE_free(eng);
    218  1.1  christos         return NULL;
    219  1.1  christos     }
    220  1.1  christos 
    221  1.1  christos     return eng;
    222  1.1  christos }
    223  1.1  christos 
    224  1.1  christos #   endif
    225  1.1  christos 
    226  1.1  christos /* Check availability of the engine */
    227  1.1  christos static int padlock_init(ENGINE *e)
    228  1.1  christos {
    229  1.1  christos     return (padlock_use_rng || padlock_use_ace);
    230  1.1  christos }
    231  1.1  christos 
    232  1.1  christos /*
    233  1.1  christos  * This stuff is needed if this ENGINE is being compiled into a
    234  1.1  christos  * self-contained shared-library.
    235  1.1  christos  */
    236  1.1  christos #   ifdef DYNAMIC_ENGINE
    237  1.1  christos static int padlock_bind_fn(ENGINE *e, const char *id)
    238  1.1  christos {
    239  1.1  christos     if (id && (strcmp(id, padlock_id) != 0)) {
    240  1.1  christos         return 0;
    241  1.1  christos     }
    242  1.1  christos 
    243  1.1  christos     if (!padlock_bind_helper(e)) {
    244  1.1  christos         return 0;
    245  1.1  christos     }
    246  1.1  christos 
    247  1.1  christos     return 1;
    248  1.1  christos }
    249  1.1  christos 
    250  1.1  christos IMPLEMENT_DYNAMIC_CHECK_FN()
    251  1.1  christos     IMPLEMENT_DYNAMIC_BIND_FN(padlock_bind_fn)
    252  1.1  christos #   endif                       /* DYNAMIC_ENGINE */
    253  1.1  christos /* ===== Here comes the "real" engine ===== */
    254  1.1  christos #   ifndef OPENSSL_NO_AES
    255  1.1  christos /* Some AES-related constants */
    256  1.1  christos #    define AES_BLOCK_SIZE          16
    257  1.1  christos #    define AES_KEY_SIZE_128        16
    258  1.1  christos #    define AES_KEY_SIZE_192        24
    259  1.1  christos #    define AES_KEY_SIZE_256        32
    260  1.1  christos     /*
    261  1.1  christos      * Here we store the status information relevant to the current context.
    262  1.1  christos      */
    263  1.1  christos     /*
    264  1.1  christos      * BIG FAT WARNING: Inline assembler in PADLOCK_XCRYPT_ASM() depends on
    265  1.1  christos      * the order of items in this structure.  Don't blindly modify, reorder,
    266  1.1  christos      * etc!
    267  1.1  christos      */
    268  1.1  christos struct padlock_cipher_data {
    269  1.1  christos     unsigned char iv[AES_BLOCK_SIZE]; /* Initialization vector */
    270  1.1  christos     union {
    271  1.1  christos         unsigned int pad[4];
    272  1.1  christos         struct {
    273  1.1  christos             int rounds:4;
    274  1.1  christos             int dgst:1;         /* n/a in C3 */
    275  1.1  christos             int align:1;        /* n/a in C3 */
    276  1.1  christos             int ciphr:1;        /* n/a in C3 */
    277  1.1  christos             unsigned int keygen:1;
    278  1.1  christos             int interm:1;
    279  1.1  christos             unsigned int encdec:1;
    280  1.1  christos             int ksize:2;
    281  1.1  christos         } b;
    282  1.1  christos     } cword;                    /* Control word */
    283  1.1  christos     AES_KEY ks;                 /* Encryption key */
    284  1.1  christos };
    285  1.1  christos 
    286  1.1  christos /*
    287  1.1  christos  * Essentially this variable belongs in thread local storage.
    288  1.1  christos  * Having this variable global on the other hand can only cause
    289  1.1  christos  * few bogus key reloads [if any at all on single-CPU system],
    290  1.1  christos  * so we accept the penatly...
    291  1.1  christos  */
    292  1.1  christos static volatile struct padlock_cipher_data *padlock_saved_context;
    293  1.1  christos #   endif
    294  1.1  christos 
    295  1.1  christos /*-
    296  1.1  christos  * =======================================================
    297  1.1  christos  * Inline assembler section(s).
    298  1.1  christos  * =======================================================
    299  1.1  christos  * Order of arguments is chosen to facilitate Windows port
    300  1.1  christos  * using __fastcall calling convention. If you wish to add
    301  1.1  christos  * more routines, keep in mind that first __fastcall
    302  1.1  christos  * argument is passed in %ecx and second - in %edx.
    303  1.1  christos  * =======================================================
    304  1.1  christos  */
    305  1.1  christos #   if defined(__GNUC__) && __GNUC__>=2
    306  1.1  christos /*
    307  1.1  christos  * As for excessive "push %ebx"/"pop %ebx" found all over.
    308  1.1  christos  * When generating position-independent code GCC won't let
    309  1.1  christos  * us use "b" in assembler templates nor even respect "ebx"
    310  1.1  christos  * in "clobber description." Therefore the trouble...
    311  1.1  christos  */
    312  1.1  christos 
    313  1.1  christos /*
    314  1.1  christos  * Helper function - check if a CPUID instruction is available on this CPU
    315  1.1  christos  */
    316  1.1  christos static int padlock_insn_cpuid_available(void)
    317  1.1  christos {
    318  1.1  christos     int result = -1;
    319  1.1  christos 
    320  1.1  christos     /*
    321  1.1  christos      * We're checking if the bit #21 of EFLAGS can be toggled. If yes =
    322  1.1  christos      * CPUID is available.
    323  1.1  christos      */
    324  1.1  christos     asm volatile ("pushf\n"
    325  1.1  christos                   "popl %%eax\n"
    326  1.1  christos                   "xorl $0x200000, %%eax\n"
    327  1.1  christos                   "movl %%eax, %%ecx\n"
    328  1.1  christos                   "andl $0x200000, %%ecx\n"
    329  1.1  christos                   "pushl %%eax\n"
    330  1.1  christos                   "popf\n"
    331  1.1  christos                   "pushf\n"
    332  1.1  christos                   "popl %%eax\n"
    333  1.1  christos                   "andl $0x200000, %%eax\n"
    334  1.1  christos                   "xorl %%eax, %%ecx\n"
    335  1.1  christos                   "movl %%ecx, %0\n":"=r" (result)::"eax", "ecx");
    336  1.1  christos 
    337  1.1  christos     return (result == 0);
    338  1.1  christos }
    339  1.1  christos 
    340  1.1  christos /*
    341  1.1  christos  * Load supported features of the CPU to see if the PadLock is available.
    342  1.1  christos  */
    343  1.1  christos static int padlock_available(void)
    344  1.1  christos {
    345  1.1  christos     char vendor_string[16];
    346  1.1  christos     unsigned int eax, edx;
    347  1.1  christos 
    348  1.1  christos     /* First check if the CPUID instruction is available at all... */
    349  1.1  christos     if (!padlock_insn_cpuid_available())
    350  1.1  christos         return 0;
    351  1.1  christos 
    352  1.1  christos     /* Are we running on the Centaur (VIA) CPU? */
    353  1.1  christos     eax = 0x00000000;
    354  1.1  christos     vendor_string[12] = 0;
    355  1.1  christos     asm volatile ("pushl  %%ebx\n"
    356  1.1  christos                   "cpuid\n"
    357  1.1  christos                   "movl   %%ebx,(%%edi)\n"
    358  1.1  christos                   "movl   %%edx,4(%%edi)\n"
    359  1.1  christos                   "movl   %%ecx,8(%%edi)\n"
    360  1.1  christos                   "popl   %%ebx":"+a" (eax):"D"(vendor_string):"ecx", "edx");
    361  1.1  christos     if (strcmp(vendor_string, "CentaurHauls") != 0)
    362  1.1  christos         return 0;
    363  1.1  christos 
    364  1.1  christos     /* Check for Centaur Extended Feature Flags presence */
    365  1.1  christos     eax = 0xC0000000;
    366  1.1  christos     asm volatile ("pushl %%ebx; cpuid; popl %%ebx":"+a" (eax)::"ecx", "edx");
    367  1.1  christos     if (eax < 0xC0000001)
    368  1.1  christos         return 0;
    369  1.1  christos 
    370  1.1  christos     /* Read the Centaur Extended Feature Flags */
    371  1.1  christos     eax = 0xC0000001;
    372  1.1  christos     asm volatile ("pushl %%ebx; cpuid; popl %%ebx":"+a" (eax),
    373  1.1  christos                   "=d"(edx)::"ecx");
    374  1.1  christos 
    375  1.1  christos     /* Fill up some flags */
    376  1.1  christos     padlock_use_ace = ((edx & (0x3 << 6)) == (0x3 << 6));
    377  1.1  christos     padlock_use_rng = ((edx & (0x3 << 2)) == (0x3 << 2));
    378  1.1  christos 
    379  1.1  christos     return padlock_use_ace + padlock_use_rng;
    380  1.1  christos }
    381  1.1  christos 
    382  1.1  christos #    ifndef OPENSSL_NO_AES
    383  1.1  christos #     ifndef AES_ASM
    384  1.1  christos /* Our own htonl()/ntohl() */
    385  1.1  christos static inline void padlock_bswapl(AES_KEY *ks)
    386  1.1  christos {
    387  1.1  christos     size_t i = sizeof(ks->rd_key) / sizeof(ks->rd_key[0]);
    388  1.1  christos     unsigned int *key = ks->rd_key;
    389  1.1  christos 
    390  1.1  christos     while (i--) {
    391  1.1  christos         asm volatile ("bswapl %0":"+r" (*key));
    392  1.1  christos         key++;
    393  1.1  christos     }
    394  1.1  christos }
    395  1.1  christos #     endif
    396  1.1  christos #    endif
    397  1.1  christos 
    398  1.1  christos /*
    399  1.1  christos  * Force key reload from memory to the CPU microcode. Loading EFLAGS from the
    400  1.1  christos  * stack clears EFLAGS[30] which does the trick.
    401  1.1  christos  */
    402  1.1  christos static inline void padlock_reload_key(void)
    403  1.1  christos {
    404  1.1  christos     asm volatile ("pushfl; popfl");
    405  1.1  christos }
    406  1.1  christos 
    407  1.1  christos #    ifndef OPENSSL_NO_AES
    408  1.1  christos /*
    409  1.1  christos  * This is heuristic key context tracing. At first one
    410  1.1  christos  * believes that one should use atomic swap instructions,
    411  1.1  christos  * but it's not actually necessary. Point is that if
    412  1.1  christos  * padlock_saved_context was changed by another thread
    413  1.1  christos  * after we've read it and before we compare it with cdata,
    414  1.1  christos  * our key *shall* be reloaded upon thread context switch
    415  1.1  christos  * and we are therefore set in either case...
    416  1.1  christos  */
    417  1.1  christos static inline void padlock_verify_context(struct padlock_cipher_data *cdata)
    418  1.1  christos {
    419  1.1  christos     asm volatile ("pushfl\n"
    420  1.1  christos                   "       btl     $30,(%%esp)\n"
    421  1.1  christos                   "       jnc     1f\n"
    422  1.1  christos                   "       cmpl    %2,%1\n"
    423  1.1  christos                   "       je      1f\n"
    424  1.1  christos                   "       popfl\n"
    425  1.1  christos                   "       subl    $4,%%esp\n"
    426  1.1  christos                   "1:     addl    $4,%%esp\n"
    427  1.1  christos                   "       movl    %2,%0":"+m" (padlock_saved_context)
    428  1.1  christos                   :"r"(padlock_saved_context), "r"(cdata):"cc");
    429  1.1  christos }
    430  1.1  christos 
    431  1.1  christos /* Template for padlock_xcrypt_* modes */
    432  1.1  christos /*
    433  1.1  christos  * BIG FAT WARNING: The offsets used with 'leal' instructions describe items
    434  1.1  christos  * of the 'padlock_cipher_data' structure.
    435  1.1  christos  */
    436  1.1  christos #     define PADLOCK_XCRYPT_ASM(name,rep_xcrypt)     \
    437  1.1  christos static inline void *name(size_t cnt,            \
    438  1.1  christos         struct padlock_cipher_data *cdata,      \
    439  1.1  christos         void *out, const void *inp)             \
    440  1.1  christos {       void *iv;                               \
    441  1.1  christos         asm volatile ( "pushl   %%ebx\n"        \
    442  1.1  christos                 "       leal    16(%0),%%edx\n" \
    443  1.1  christos                 "       leal    32(%0),%%ebx\n" \
    444  1.1  christos                         rep_xcrypt "\n"         \
    445  1.1  christos                 "       popl    %%ebx"          \
    446  1.1  christos                 : "=a"(iv), "=c"(cnt), "=D"(out), "=S"(inp) \
    447  1.1  christos                 : "0"(cdata), "1"(cnt), "2"(out), "3"(inp)  \
    448  1.1  christos                 : "edx", "cc", "memory");       \
    449  1.1  christos         return iv;                              \
    450  1.1  christos }
    451  1.1  christos 
    452  1.1  christos /* Generate all functions with appropriate opcodes */
    453  1.1  christos /* rep xcryptecb */
    454  1.1  christos PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb, ".byte 0xf3,0x0f,0xa7,0xc8")
    455  1.1  christos /* rep xcryptcbc */
    456  1.1  christos     PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc, ".byte 0xf3,0x0f,0xa7,0xd0")
    457  1.1  christos /* rep xcryptcfb */
    458  1.1  christos     PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb, ".byte 0xf3,0x0f,0xa7,0xe0")
    459  1.1  christos /* rep xcryptofb */
    460  1.1  christos     PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb, ".byte 0xf3,0x0f,0xa7,0xe8")
    461  1.1  christos #    endif
    462  1.1  christos /* The RNG call itself */
    463  1.1  christos static inline unsigned int padlock_xstore(void *addr, unsigned int edx_in)
    464  1.1  christos {
    465  1.1  christos     unsigned int eax_out;
    466  1.1  christos 
    467  1.1  christos     asm volatile (".byte 0x0f,0xa7,0xc0" /* xstore */
    468  1.1  christos                   :"=a" (eax_out), "=m"(*(unsigned *)addr)
    469  1.1  christos                   :"D"(addr), "d"(edx_in)
    470  1.1  christos         );
    471  1.1  christos 
    472  1.1  christos     return eax_out;
    473  1.1  christos }
    474  1.1  christos 
    475  1.1  christos /*
    476  1.1  christos  * Why not inline 'rep movsd'? I failed to find information on what value in
    477  1.1  christos  * Direction Flag one can expect and consequently have to apply
    478  1.1  christos  * "better-safe-than-sorry" approach and assume "undefined." I could
    479  1.1  christos  * explicitly clear it and restore the original value upon return from
    480  1.1  christos  * padlock_aes_cipher, but it's presumably too much trouble for too little
    481  1.1  christos  * gain... In case you wonder 'rep xcrypt*' instructions above are *not*
    482  1.1  christos  * affected by the Direction Flag and pointers advance toward larger
    483  1.1  christos  * addresses unconditionally.
    484  1.1  christos  */
    485  1.1  christos static inline unsigned char *padlock_memcpy(void *dst, const void *src,
    486  1.1  christos                                             size_t n)
    487  1.1  christos {
    488  1.1  christos     long *d = dst;
    489  1.1  christos     const long *s = src;
    490  1.1  christos 
    491  1.1  christos     n /= sizeof(*d);
    492  1.1  christos     do {
    493  1.1  christos         *d++ = *s++;
    494  1.1  christos     } while (--n);
    495  1.1  christos 
    496  1.1  christos     return dst;
    497  1.1  christos }
    498  1.1  christos 
    499  1.1  christos #   elif defined(_MSC_VER)
    500  1.1  christos /*
    501  1.1  christos  * Unlike GCC these are real functions. In order to minimize impact
    502  1.1  christos  * on performance we adhere to __fastcall calling convention in
    503  1.1  christos  * order to get two first arguments passed through %ecx and %edx.
    504  1.1  christos  * Which kind of suits very well, as instructions in question use
    505  1.1  christos  * both %ecx and %edx as input:-)
    506  1.1  christos  */
    507  1.1  christos #    define REP_XCRYPT(code)                \
    508  1.1  christos         _asm _emit 0xf3                 \
    509  1.1  christos         _asm _emit 0x0f _asm _emit 0xa7 \
    510  1.1  christos         _asm _emit code
    511  1.1  christos 
    512  1.1  christos /*
    513  1.1  christos  * BIG FAT WARNING: The offsets used with 'lea' instructions describe items
    514  1.1  christos  * of the 'padlock_cipher_data' structure.
    515  1.1  christos  */
    516  1.1  christos #    define PADLOCK_XCRYPT_ASM(name,code)   \
    517  1.1  christos static void * __fastcall                \
    518  1.1  christos         name (size_t cnt, void *cdata,  \
    519  1.1  christos         void *outp, const void *inp)    \
    520  1.1  christos {       _asm    mov     eax,edx         \
    521  1.1  christos         _asm    lea     edx,[eax+16]    \
    522  1.1  christos         _asm    lea     ebx,[eax+32]    \
    523  1.1  christos         _asm    mov     edi,outp        \
    524  1.1  christos         _asm    mov     esi,inp         \
    525  1.1  christos         REP_XCRYPT(code)                \
    526  1.1  christos }
    527  1.1  christos 
    528  1.1  christos PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb,0xc8)
    529  1.1  christos PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc,0xd0)
    530  1.1  christos PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb,0xe0)
    531  1.1  christos PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb,0xe8)
    532  1.1  christos 
    533  1.1  christos static int __fastcall padlock_xstore(void *outp, unsigned int code)
    534  1.1  christos {
    535  1.1  christos     _asm    mov edi,ecx
    536  1.1  christos     _asm _emit 0x0f _asm _emit 0xa7 _asm _emit 0xc0
    537  1.1  christos }
    538  1.1  christos 
    539  1.1  christos static void __fastcall padlock_reload_key(void)
    540  1.1  christos {
    541  1.1  christos     _asm pushfd
    542  1.1  christos     _asm popfd
    543  1.1  christos }
    544  1.1  christos 
    545  1.1  christos static void __fastcall padlock_verify_context(void *cdata)
    546  1.1  christos {
    547  1.1  christos     _asm    {
    548  1.1  christos         pushfd
    549  1.1  christos         bt  DWORD PTR[esp],30
    550  1.1  christos         jnc skip
    551  1.1  christos         cmp ecx,padlock_saved_context
    552  1.1  christos         je  skip
    553  1.1  christos         popfd
    554  1.1  christos         sub esp,4
    555  1.1  christos     skip:   add esp,4
    556  1.1  christos         mov padlock_saved_context,ecx
    557  1.1  christos     }
    558  1.1  christos }
    559  1.1  christos 
    560  1.1  christos static int
    561  1.1  christos padlock_available(void)
    562  1.1  christos {
    563  1.1  christos     _asm    {
    564  1.1  christos         pushfd
    565  1.1  christos         pop eax
    566  1.1  christos         mov ecx,eax
    567  1.1  christos         xor eax,1<<21
    568  1.1  christos         push    eax
    569  1.1  christos         popfd
    570  1.1  christos         pushfd
    571  1.1  christos         pop eax
    572  1.1  christos         xor eax,ecx
    573  1.1  christos         bt  eax,21
    574  1.1  christos         jnc noluck
    575  1.1  christos         mov eax,0
    576  1.1  christos         cpuid
    577  1.1  christos         xor eax,eax
    578  1.1  christos         cmp ebx,'tneC'
    579  1.1  christos         jne noluck
    580  1.1  christos         cmp edx,'Hrua'
    581  1.1  christos         jne noluck
    582  1.1  christos         cmp ecx,'slua'
    583  1.1  christos         jne noluck
    584  1.1  christos         mov eax,0xC0000000
    585  1.1  christos         cpuid
    586  1.1  christos         mov edx,eax
    587  1.1  christos         xor eax,eax
    588  1.1  christos         cmp edx,0xC0000001
    589  1.1  christos         jb  noluck
    590  1.1  christos         mov eax,0xC0000001
    591  1.1  christos         cpuid
    592  1.1  christos         xor eax,eax
    593  1.1  christos         bt  edx,6
    594  1.1  christos         jnc skip_a
    595  1.1  christos         bt  edx,7
    596  1.1  christos         jnc skip_a
    597  1.1  christos         mov padlock_use_ace,1
    598  1.1  christos         inc eax
    599  1.1  christos     skip_a: bt  edx,2
    600  1.1  christos         jnc skip_r
    601  1.1  christos         bt  edx,3
    602  1.1  christos         jnc skip_r
    603  1.1  christos         mov padlock_use_rng,1
    604  1.1  christos         inc eax
    605  1.1  christos     skip_r:
    606  1.1  christos     noluck:
    607  1.1  christos     }
    608  1.1  christos }
    609  1.1  christos 
    610  1.1  christos static void __fastcall padlock_bswapl(void *key)
    611  1.1  christos {
    612  1.1  christos     _asm    {
    613  1.1  christos         pushfd
    614  1.1  christos         cld
    615  1.1  christos         mov esi,ecx
    616  1.1  christos         mov edi,ecx
    617  1.1  christos         mov ecx,60
    618  1.1  christos     up: lodsd
    619  1.1  christos         bswap   eax
    620  1.1  christos         stosd
    621  1.1  christos         loop    up
    622  1.1  christos         popfd
    623  1.1  christos     }
    624  1.1  christos }
    625  1.1  christos 
    626  1.1  christos /*
    627  1.1  christos  * MS actually specifies status of Direction Flag and compiler even manages
    628  1.1  christos  * to compile following as 'rep movsd' all by itself...
    629  1.1  christos  */
    630  1.1  christos #    define padlock_memcpy(o,i,n) ((unsigned char *)memcpy((o),(i),(n)&~3U))
    631  1.1  christos #   endif
    632  1.1  christos /* ===== AES encryption/decryption ===== */
    633  1.1  christos #   ifndef OPENSSL_NO_AES
    634  1.1  christos #    if defined(NID_aes_128_cfb128) && ! defined (NID_aes_128_cfb)
    635  1.1  christos #     define NID_aes_128_cfb NID_aes_128_cfb128
    636  1.1  christos #    endif
    637  1.1  christos #    if defined(NID_aes_128_ofb128) && ! defined (NID_aes_128_ofb)
    638  1.1  christos #     define NID_aes_128_ofb NID_aes_128_ofb128
    639  1.1  christos #    endif
    640  1.1  christos #    if defined(NID_aes_192_cfb128) && ! defined (NID_aes_192_cfb)
    641  1.1  christos #     define NID_aes_192_cfb NID_aes_192_cfb128
    642  1.1  christos #    endif
    643  1.1  christos #    if defined(NID_aes_192_ofb128) && ! defined (NID_aes_192_ofb)
    644  1.1  christos #     define NID_aes_192_ofb NID_aes_192_ofb128
    645  1.1  christos #    endif
    646  1.1  christos #    if defined(NID_aes_256_cfb128) && ! defined (NID_aes_256_cfb)
    647  1.1  christos #     define NID_aes_256_cfb NID_aes_256_cfb128
    648  1.1  christos #    endif
    649  1.1  christos #    if defined(NID_aes_256_ofb128) && ! defined (NID_aes_256_ofb)
    650  1.1  christos #     define NID_aes_256_ofb NID_aes_256_ofb128
    651  1.1  christos #    endif
    652  1.1  christos /*
    653  1.1  christos  * List of supported ciphers.
    654  1.1  christos  */ static int padlock_cipher_nids[] = {
    655  1.1  christos     NID_aes_128_ecb,
    656  1.1  christos     NID_aes_128_cbc,
    657  1.1  christos     NID_aes_128_cfb,
    658  1.1  christos     NID_aes_128_ofb,
    659  1.1  christos 
    660  1.1  christos     NID_aes_192_ecb,
    661  1.1  christos     NID_aes_192_cbc,
    662  1.1  christos     NID_aes_192_cfb,
    663  1.1  christos     NID_aes_192_ofb,
    664  1.1  christos 
    665  1.1  christos     NID_aes_256_ecb,
    666  1.1  christos     NID_aes_256_cbc,
    667  1.1  christos     NID_aes_256_cfb,
    668  1.1  christos     NID_aes_256_ofb,
    669  1.1  christos };
    670  1.1  christos 
    671  1.1  christos static int padlock_cipher_nids_num = (sizeof(padlock_cipher_nids) /
    672  1.1  christos                                       sizeof(padlock_cipher_nids[0]));
    673  1.1  christos 
    674  1.1  christos /* Function prototypes ... */
    675  1.1  christos static int padlock_aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
    676  1.1  christos                                 const unsigned char *iv, int enc);
    677  1.1  christos static int padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
    678  1.1  christos                               const unsigned char *in, size_t nbytes);
    679  1.1  christos 
    680  1.1  christos #    define NEAREST_ALIGNED(ptr) ( (unsigned char *)(ptr) +         \
    681  1.1  christos         ( (0x10 - ((size_t)(ptr) & 0x0F)) & 0x0F )      )
    682  1.1  christos #    define ALIGNED_CIPHER_DATA(ctx) ((struct padlock_cipher_data *)\
    683  1.1  christos         NEAREST_ALIGNED(ctx->cipher_data))
    684  1.1  christos 
    685  1.1  christos #    define EVP_CIPHER_block_size_ECB       AES_BLOCK_SIZE
    686  1.1  christos #    define EVP_CIPHER_block_size_CBC       AES_BLOCK_SIZE
    687  1.1  christos #    define EVP_CIPHER_block_size_OFB       1
    688  1.1  christos #    define EVP_CIPHER_block_size_CFB       1
    689  1.1  christos 
    690  1.1  christos /*
    691  1.1  christos  * Declaring so many ciphers by hand would be a pain. Instead introduce a bit
    692  1.1  christos  * of preprocessor magic :-)
    693  1.1  christos  */
    694  1.1  christos #    define DECLARE_AES_EVP(ksize,lmode,umode)      \
    695  1.1  christos static const EVP_CIPHER padlock_aes_##ksize##_##lmode = {       \
    696  1.1  christos         NID_aes_##ksize##_##lmode,              \
    697  1.1  christos         EVP_CIPHER_block_size_##umode,  \
    698  1.1  christos         AES_KEY_SIZE_##ksize,           \
    699  1.1  christos         AES_BLOCK_SIZE,                 \
    700  1.1  christos         0 | EVP_CIPH_##umode##_MODE,    \
    701  1.1  christos         padlock_aes_init_key,           \
    702  1.1  christos         padlock_aes_cipher,             \
    703  1.1  christos         NULL,                           \
    704  1.1  christos         sizeof(struct padlock_cipher_data) + 16,        \
    705  1.1  christos         EVP_CIPHER_set_asn1_iv,         \
    706  1.1  christos         EVP_CIPHER_get_asn1_iv,         \
    707  1.1  christos         NULL,                           \
    708  1.1  christos         NULL                            \
    709  1.1  christos }
    710  1.1  christos 
    711  1.1  christos DECLARE_AES_EVP(128, ecb, ECB);
    712  1.1  christos DECLARE_AES_EVP(128, cbc, CBC);
    713  1.1  christos DECLARE_AES_EVP(128, cfb, CFB);
    714  1.1  christos DECLARE_AES_EVP(128, ofb, OFB);
    715  1.1  christos 
    716  1.1  christos DECLARE_AES_EVP(192, ecb, ECB);
    717  1.1  christos DECLARE_AES_EVP(192, cbc, CBC);
    718  1.1  christos DECLARE_AES_EVP(192, cfb, CFB);
    719  1.1  christos DECLARE_AES_EVP(192, ofb, OFB);
    720  1.1  christos 
    721  1.1  christos DECLARE_AES_EVP(256, ecb, ECB);
    722  1.1  christos DECLARE_AES_EVP(256, cbc, CBC);
    723  1.1  christos DECLARE_AES_EVP(256, cfb, CFB);
    724  1.1  christos DECLARE_AES_EVP(256, ofb, OFB);
    725  1.1  christos 
    726  1.1  christos static int
    727  1.1  christos padlock_ciphers(ENGINE *e, const EVP_CIPHER **cipher, const int **nids,
    728  1.1  christos                 int nid)
    729  1.1  christos {
    730  1.1  christos     /* No specific cipher => return a list of supported nids ... */
    731  1.1  christos     if (!cipher) {
    732  1.1  christos         *nids = padlock_cipher_nids;
    733  1.1  christos         return padlock_cipher_nids_num;
    734  1.1  christos     }
    735  1.1  christos 
    736  1.1  christos     /* ... or the requested "cipher" otherwise */
    737  1.1  christos     switch (nid) {
    738  1.1  christos     case NID_aes_128_ecb:
    739  1.1  christos         *cipher = &padlock_aes_128_ecb;
    740  1.1  christos         break;
    741  1.1  christos     case NID_aes_128_cbc:
    742  1.1  christos         *cipher = &padlock_aes_128_cbc;
    743  1.1  christos         break;
    744  1.1  christos     case NID_aes_128_cfb:
    745  1.1  christos         *cipher = &padlock_aes_128_cfb;
    746  1.1  christos         break;
    747  1.1  christos     case NID_aes_128_ofb:
    748  1.1  christos         *cipher = &padlock_aes_128_ofb;
    749  1.1  christos         break;
    750  1.1  christos 
    751  1.1  christos     case NID_aes_192_ecb:
    752  1.1  christos         *cipher = &padlock_aes_192_ecb;
    753  1.1  christos         break;
    754  1.1  christos     case NID_aes_192_cbc:
    755  1.1  christos         *cipher = &padlock_aes_192_cbc;
    756  1.1  christos         break;
    757  1.1  christos     case NID_aes_192_cfb:
    758  1.1  christos         *cipher = &padlock_aes_192_cfb;
    759  1.1  christos         break;
    760  1.1  christos     case NID_aes_192_ofb:
    761  1.1  christos         *cipher = &padlock_aes_192_ofb;
    762  1.1  christos         break;
    763  1.1  christos 
    764  1.1  christos     case NID_aes_256_ecb:
    765  1.1  christos         *cipher = &padlock_aes_256_ecb;
    766  1.1  christos         break;
    767  1.1  christos     case NID_aes_256_cbc:
    768  1.1  christos         *cipher = &padlock_aes_256_cbc;
    769  1.1  christos         break;
    770  1.1  christos     case NID_aes_256_cfb:
    771  1.1  christos         *cipher = &padlock_aes_256_cfb;
    772  1.1  christos         break;
    773  1.1  christos     case NID_aes_256_ofb:
    774  1.1  christos         *cipher = &padlock_aes_256_ofb;
    775  1.1  christos         break;
    776  1.1  christos 
    777  1.1  christos     default:
    778  1.1  christos         /* Sorry, we don't support this NID */
    779  1.1  christos         *cipher = NULL;
    780  1.1  christos         return 0;
    781  1.1  christos     }
    782  1.1  christos 
    783  1.1  christos     return 1;
    784  1.1  christos }
    785  1.1  christos 
    786  1.1  christos /* Prepare the encryption key for PadLock usage */
    787  1.1  christos static int
    788  1.1  christos padlock_aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
    789  1.1  christos                      const unsigned char *iv, int enc)
    790  1.1  christos {
    791  1.1  christos     struct padlock_cipher_data *cdata;
    792  1.1  christos     int key_len = EVP_CIPHER_CTX_key_length(ctx) * 8;
    793  1.1  christos 
    794  1.1  christos     if (key == NULL)
    795  1.1  christos         return 0;               /* ERROR */
    796  1.1  christos 
    797  1.1  christos     cdata = ALIGNED_CIPHER_DATA(ctx);
    798  1.1  christos     memset(cdata, 0, sizeof(struct padlock_cipher_data));
    799  1.1  christos 
    800  1.1  christos     /* Prepare Control word. */
    801  1.1  christos     if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE)
    802  1.1  christos         cdata->cword.b.encdec = 0;
    803  1.1  christos     else
    804  1.1  christos         cdata->cword.b.encdec = (ctx->encrypt == 0);
    805  1.1  christos     cdata->cword.b.rounds = 10 + (key_len - 128) / 32;
    806  1.1  christos     cdata->cword.b.ksize = (key_len - 128) / 64;
    807  1.1  christos 
    808  1.1  christos     switch (key_len) {
    809  1.1  christos     case 128:
    810  1.1  christos         /*
    811  1.1  christos          * PadLock can generate an extended key for AES128 in hardware
    812  1.1  christos          */
    813  1.1  christos         memcpy(cdata->ks.rd_key, key, AES_KEY_SIZE_128);
    814  1.1  christos         cdata->cword.b.keygen = 0;
    815  1.1  christos         break;
    816  1.1  christos 
    817  1.1  christos     case 192:
    818  1.1  christos     case 256:
    819  1.1  christos         /*
    820  1.1  christos          * Generate an extended AES key in software. Needed for AES192/AES256
    821  1.1  christos          */
    822  1.1  christos         /*
    823  1.1  christos          * Well, the above applies to Stepping 8 CPUs and is listed as
    824  1.1  christos          * hardware errata. They most likely will fix it at some point and
    825  1.1  christos          * then a check for stepping would be due here.
    826  1.1  christos          */
    827  1.1  christos         if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_CFB_MODE ||
    828  1.1  christos             EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE || enc)
    829  1.1  christos             AES_set_encrypt_key(key, key_len, &cdata->ks);
    830  1.1  christos         else
    831  1.1  christos             AES_set_decrypt_key(key, key_len, &cdata->ks);
    832  1.1  christos #    ifndef AES_ASM
    833  1.1  christos         /*
    834  1.1  christos          * OpenSSL C functions use byte-swapped extended key.
    835  1.1  christos          */
    836  1.1  christos         padlock_bswapl(&cdata->ks);
    837  1.1  christos #    endif
    838  1.1  christos         cdata->cword.b.keygen = 1;
    839  1.1  christos         break;
    840  1.1  christos 
    841  1.1  christos     default:
    842  1.1  christos         /* ERROR */
    843  1.1  christos         return 0;
    844  1.1  christos     }
    845  1.1  christos 
    846  1.1  christos     /*
    847  1.1  christos      * This is done to cover for cases when user reuses the
    848  1.1  christos      * context for new key. The catch is that if we don't do
    849  1.1  christos      * this, padlock_eas_cipher might proceed with old key...
    850  1.1  christos      */
    851  1.1  christos     padlock_reload_key();
    852  1.1  christos 
    853  1.1  christos     return 1;
    854  1.1  christos }
    855  1.1  christos 
    856  1.1  christos /*-
    857  1.1  christos  * Simplified version of padlock_aes_cipher() used when
    858  1.1  christos  * 1) both input and output buffers are at aligned addresses.
    859  1.1  christos  * or when
    860  1.1  christos  * 2) running on a newer CPU that doesn't require aligned buffers.
    861  1.1  christos  */
    862  1.1  christos static int
    863  1.1  christos padlock_aes_cipher_omnivorous(EVP_CIPHER_CTX *ctx, unsigned char *out_arg,
    864  1.1  christos                               const unsigned char *in_arg, size_t nbytes)
    865  1.1  christos {
    866  1.1  christos     struct padlock_cipher_data *cdata;
    867  1.1  christos     void *iv;
    868  1.1  christos 
    869  1.1  christos     cdata = ALIGNED_CIPHER_DATA(ctx);
    870  1.1  christos     padlock_verify_context(cdata);
    871  1.1  christos 
    872  1.1  christos     switch (EVP_CIPHER_CTX_mode(ctx)) {
    873  1.1  christos     case EVP_CIPH_ECB_MODE:
    874  1.1  christos         padlock_xcrypt_ecb(nbytes / AES_BLOCK_SIZE, cdata, out_arg, in_arg);
    875  1.1  christos         break;
    876  1.1  christos 
    877  1.1  christos     case EVP_CIPH_CBC_MODE:
    878  1.1  christos         memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
    879  1.1  christos         iv = padlock_xcrypt_cbc(nbytes / AES_BLOCK_SIZE, cdata, out_arg,
    880  1.1  christos                                 in_arg);
    881  1.1  christos         memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
    882  1.1  christos         break;
    883  1.1  christos 
    884  1.1  christos     case EVP_CIPH_CFB_MODE:
    885  1.1  christos         memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
    886  1.1  christos         iv = padlock_xcrypt_cfb(nbytes / AES_BLOCK_SIZE, cdata, out_arg,
    887  1.1  christos                                 in_arg);
    888  1.1  christos         memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
    889  1.1  christos         break;
    890  1.1  christos 
    891  1.1  christos     case EVP_CIPH_OFB_MODE:
    892  1.1  christos         memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
    893  1.1  christos         padlock_xcrypt_ofb(nbytes / AES_BLOCK_SIZE, cdata, out_arg, in_arg);
    894  1.1  christos         memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE);
    895  1.1  christos         break;
    896  1.1  christos 
    897  1.1  christos     default:
    898  1.1  christos         return 0;
    899  1.1  christos     }
    900  1.1  christos 
    901  1.1  christos     memset(cdata->iv, 0, AES_BLOCK_SIZE);
    902  1.1  christos 
    903  1.1  christos     return 1;
    904  1.1  christos }
    905  1.1  christos 
    906  1.1  christos #    ifndef  PADLOCK_CHUNK
    907  1.1  christos #     define PADLOCK_CHUNK  512 /* Must be a power of 2 larger than 16 */
    908  1.1  christos #    endif
    909  1.1  christos #    if PADLOCK_CHUNK<16 || PADLOCK_CHUNK&(PADLOCK_CHUNK-1)
    910  1.1  christos #     error "insane PADLOCK_CHUNK..."
    911  1.1  christos #    endif
    912  1.1  christos 
    913  1.1  christos /*
    914  1.1  christos  * Re-align the arguments to 16-Bytes boundaries and run the encryption
    915  1.1  christos  * function itself. This function is not AES-specific.
    916  1.1  christos  */
    917  1.1  christos static int
    918  1.1  christos padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out_arg,
    919  1.1  christos                    const unsigned char *in_arg, size_t nbytes)
    920  1.1  christos {
    921  1.1  christos     struct padlock_cipher_data *cdata;
    922  1.1  christos     const void *inp;
    923  1.1  christos     unsigned char *out, *tofree;
    924  1.1  christos     void *iv;
    925  1.1  christos     int inp_misaligned, out_misaligned, realign_in_loop;
    926  1.1  christos     size_t chunk, allocated = 0;
    927  1.1  christos 
    928  1.1  christos     /*
    929  1.1  christos      * ctx->num is maintained in byte-oriented modes, such as CFB and OFB...
    930  1.1  christos      */
    931  1.1  christos     if ((chunk = ctx->num)) {   /* borrow chunk variable */
    932  1.1  christos         unsigned char *ivp = ctx->iv;
    933  1.1  christos 
    934  1.1  christos         switch (EVP_CIPHER_CTX_mode(ctx)) {
    935  1.1  christos         case EVP_CIPH_CFB_MODE:
    936  1.1  christos             if (chunk >= AES_BLOCK_SIZE)
    937  1.1  christos                 return 0;       /* bogus value */
    938  1.1  christos 
    939  1.1  christos             if (ctx->encrypt)
    940  1.1  christos                 while (chunk < AES_BLOCK_SIZE && nbytes != 0) {
    941  1.1  christos                     ivp[chunk] = *(out_arg++) = *(in_arg++) ^ ivp[chunk];
    942  1.1  christos                     chunk++, nbytes--;
    943  1.1  christos             } else
    944  1.1  christos                 while (chunk < AES_BLOCK_SIZE && nbytes != 0) {
    945  1.1  christos                     unsigned char c = *(in_arg++);
    946  1.1  christos                     *(out_arg++) = c ^ ivp[chunk];
    947  1.1  christos                     ivp[chunk++] = c, nbytes--;
    948  1.1  christos                 }
    949  1.1  christos 
    950  1.1  christos             ctx->num = chunk % AES_BLOCK_SIZE;
    951  1.1  christos             break;
    952  1.1  christos         case EVP_CIPH_OFB_MODE:
    953  1.1  christos             if (chunk >= AES_BLOCK_SIZE)
    954  1.1  christos                 return 0;       /* bogus value */
    955  1.1  christos 
    956  1.1  christos             while (chunk < AES_BLOCK_SIZE && nbytes != 0) {
    957  1.1  christos                 *(out_arg++) = *(in_arg++) ^ ivp[chunk];
    958  1.1  christos                 chunk++, nbytes--;
    959  1.1  christos             }
    960  1.1  christos 
    961  1.1  christos             ctx->num = chunk % AES_BLOCK_SIZE;
    962  1.1  christos             break;
    963  1.1  christos         }
    964  1.1  christos     }
    965  1.1  christos 
    966  1.1  christos     if (nbytes == 0)
    967  1.1  christos         return 1;
    968  1.1  christos #    if 0
    969  1.1  christos     if (nbytes % AES_BLOCK_SIZE)
    970  1.1  christos         return 0;               /* are we expected to do tail processing? */
    971  1.1  christos #    else
    972  1.1  christos     /*
    973  1.1  christos      * nbytes is always multiple of AES_BLOCK_SIZE in ECB and CBC modes and
    974  1.1  christos      * arbitrary value in byte-oriented modes, such as CFB and OFB...
    975  1.1  christos      */
    976  1.1  christos #    endif
    977  1.1  christos 
    978  1.1  christos     /*
    979  1.1  christos      * VIA promises CPUs that won't require alignment in the future. For now
    980  1.1  christos      * padlock_aes_align_required is initialized to 1 and the condition is
    981  1.1  christos      * never met...
    982  1.1  christos      */
    983  1.1  christos     /*
    984  1.1  christos      * C7 core is capable to manage unaligned input in non-ECB[!] mode, but
    985  1.1  christos      * performance penalties appear to be approximately same as for software
    986  1.1  christos      * alignment below or ~3x. They promise to improve it in the future, but
    987  1.1  christos      * for now we can just as well pretend that it can only handle aligned
    988  1.1  christos      * input...
    989  1.1  christos      */
    990  1.1  christos     if (!padlock_aes_align_required && (nbytes % AES_BLOCK_SIZE) == 0)
    991  1.1  christos         return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes);
    992  1.1  christos 
    993  1.1  christos     inp_misaligned = (((size_t)in_arg) & 0x0F);
    994  1.1  christos     out_misaligned = (((size_t)out_arg) & 0x0F);
    995  1.1  christos 
    996  1.1  christos     /*
    997  1.1  christos      * Note that even if output is aligned and input not, I still prefer to
    998  1.1  christos      * loop instead of copy the whole input and then encrypt in one stroke.
    999  1.1  christos      * This is done in order to improve L1 cache utilization...
   1000  1.1  christos      */
   1001  1.1  christos     realign_in_loop = out_misaligned | inp_misaligned;
   1002  1.1  christos 
   1003  1.1  christos     if (!realign_in_loop && (nbytes % AES_BLOCK_SIZE) == 0)
   1004  1.1  christos         return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes);
   1005  1.1  christos 
   1006  1.1  christos     /* this takes one "if" out of the loops */
   1007  1.1  christos     chunk = nbytes;
   1008  1.1  christos     chunk %= PADLOCK_CHUNK;
   1009  1.1  christos     if (chunk == 0)
   1010  1.1  christos         chunk = PADLOCK_CHUNK;
   1011  1.1  christos 
   1012  1.1  christos     if (out_misaligned) {
   1013  1.1  christos         /* optmize for small input */
   1014  1.1  christos         allocated = (chunk < nbytes ? PADLOCK_CHUNK : nbytes);
   1015  1.1  christos         tofree = malloc(0x10 + allocated);
   1016  1.1  christos         if (tofree == NULL)
   1017  1.1  christos             return 0;
   1018  1.1  christos         out = NEAREST_ALIGNED(tofree);
   1019  1.1  christos     } else {
   1020  1.1  christos         out = out_arg;
   1021  1.1  christos         tofree = NULL;
   1022  1.1  christos     }
   1023  1.1  christos 
   1024  1.1  christos     cdata = ALIGNED_CIPHER_DATA(ctx);
   1025  1.1  christos     padlock_verify_context(cdata);
   1026  1.1  christos 
   1027  1.1  christos     switch (EVP_CIPHER_CTX_mode(ctx)) {
   1028  1.1  christos     case EVP_CIPH_ECB_MODE:
   1029  1.1  christos         do {
   1030  1.1  christos             if (inp_misaligned)
   1031  1.1  christos                 inp = padlock_memcpy(out, in_arg, chunk);
   1032  1.1  christos             else
   1033  1.1  christos                 inp = in_arg;
   1034  1.1  christos             in_arg += chunk;
   1035  1.1  christos 
   1036  1.1  christos             padlock_xcrypt_ecb(chunk / AES_BLOCK_SIZE, cdata, out, inp);
   1037  1.1  christos 
   1038  1.1  christos             if (out_misaligned)
   1039  1.1  christos                 out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
   1040  1.1  christos             else
   1041  1.1  christos                 out = out_arg += chunk;
   1042  1.1  christos 
   1043  1.1  christos             nbytes -= chunk;
   1044  1.1  christos             chunk = PADLOCK_CHUNK;
   1045  1.1  christos         } while (nbytes);
   1046  1.1  christos         break;
   1047  1.1  christos 
   1048  1.1  christos     case EVP_CIPH_CBC_MODE:
   1049  1.1  christos         memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
   1050  1.1  christos         goto cbc_shortcut;
   1051  1.1  christos         do {
   1052  1.1  christos             if (iv != cdata->iv)
   1053  1.1  christos                 memcpy(cdata->iv, iv, AES_BLOCK_SIZE);
   1054  1.1  christos             chunk = PADLOCK_CHUNK;
   1055  1.1  christos  cbc_shortcut:                 /* optimize for small input */
   1056  1.1  christos             if (inp_misaligned)
   1057  1.1  christos                 inp = padlock_memcpy(out, in_arg, chunk);
   1058  1.1  christos             else
   1059  1.1  christos                 inp = in_arg;
   1060  1.1  christos             in_arg += chunk;
   1061  1.1  christos 
   1062  1.1  christos             iv = padlock_xcrypt_cbc(chunk / AES_BLOCK_SIZE, cdata, out, inp);
   1063  1.1  christos 
   1064  1.1  christos             if (out_misaligned)
   1065  1.1  christos                 out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
   1066  1.1  christos             else
   1067  1.1  christos                 out = out_arg += chunk;
   1068  1.1  christos 
   1069  1.1  christos         } while (nbytes -= chunk);
   1070  1.1  christos         memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
   1071  1.1  christos         break;
   1072  1.1  christos 
   1073  1.1  christos     case EVP_CIPH_CFB_MODE:
   1074  1.1  christos         memcpy(iv = cdata->iv, ctx->iv, AES_BLOCK_SIZE);
   1075  1.1  christos         chunk &= ~(AES_BLOCK_SIZE - 1);
   1076  1.1  christos         if (chunk)
   1077  1.1  christos             goto cfb_shortcut;
   1078  1.1  christos         else
   1079  1.1  christos             goto cfb_skiploop;
   1080  1.1  christos         do {
   1081  1.1  christos             if (iv != cdata->iv)
   1082  1.1  christos                 memcpy(cdata->iv, iv, AES_BLOCK_SIZE);
   1083  1.1  christos             chunk = PADLOCK_CHUNK;
   1084  1.1  christos  cfb_shortcut:                 /* optimize for small input */
   1085  1.1  christos             if (inp_misaligned)
   1086  1.1  christos                 inp = padlock_memcpy(out, in_arg, chunk);
   1087  1.1  christos             else
   1088  1.1  christos                 inp = in_arg;
   1089  1.1  christos             in_arg += chunk;
   1090  1.1  christos 
   1091  1.1  christos             iv = padlock_xcrypt_cfb(chunk / AES_BLOCK_SIZE, cdata, out, inp);
   1092  1.1  christos 
   1093  1.1  christos             if (out_misaligned)
   1094  1.1  christos                 out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
   1095  1.1  christos             else
   1096  1.1  christos                 out = out_arg += chunk;
   1097  1.1  christos 
   1098  1.1  christos             nbytes -= chunk;
   1099  1.1  christos         } while (nbytes >= AES_BLOCK_SIZE);
   1100  1.1  christos 
   1101  1.1  christos  cfb_skiploop:
   1102  1.1  christos         if (nbytes) {
   1103  1.1  christos             unsigned char *ivp = cdata->iv;
   1104  1.1  christos 
   1105  1.1  christos             if (iv != ivp) {
   1106  1.1  christos                 memcpy(ivp, iv, AES_BLOCK_SIZE);
   1107  1.1  christos                 iv = ivp;
   1108  1.1  christos             }
   1109  1.1  christos             ctx->num = nbytes;
   1110  1.1  christos             if (cdata->cword.b.encdec) {
   1111  1.1  christos                 cdata->cword.b.encdec = 0;
   1112  1.1  christos                 padlock_reload_key();
   1113  1.1  christos                 padlock_xcrypt_ecb(1, cdata, ivp, ivp);
   1114  1.1  christos                 cdata->cword.b.encdec = 1;
   1115  1.1  christos                 padlock_reload_key();
   1116  1.1  christos                 while (nbytes) {
   1117  1.1  christos                     unsigned char c = *(in_arg++);
   1118  1.1  christos                     *(out_arg++) = c ^ *ivp;
   1119  1.1  christos                     *(ivp++) = c, nbytes--;
   1120  1.1  christos                 }
   1121  1.1  christos             } else {
   1122  1.1  christos                 padlock_reload_key();
   1123  1.1  christos                 padlock_xcrypt_ecb(1, cdata, ivp, ivp);
   1124  1.1  christos                 padlock_reload_key();
   1125  1.1  christos                 while (nbytes) {
   1126  1.1  christos                     *ivp = *(out_arg++) = *(in_arg++) ^ *ivp;
   1127  1.1  christos                     ivp++, nbytes--;
   1128  1.1  christos                 }
   1129  1.1  christos             }
   1130  1.1  christos         }
   1131  1.1  christos 
   1132  1.1  christos         memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
   1133  1.1  christos         break;
   1134  1.1  christos 
   1135  1.1  christos     case EVP_CIPH_OFB_MODE:
   1136  1.1  christos         memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
   1137  1.1  christos         chunk &= ~(AES_BLOCK_SIZE - 1);
   1138  1.1  christos         if (chunk)
   1139  1.1  christos             do {
   1140  1.1  christos                 if (inp_misaligned)
   1141  1.1  christos                     inp = padlock_memcpy(out, in_arg, chunk);
   1142  1.1  christos                 else
   1143  1.1  christos                     inp = in_arg;
   1144  1.1  christos                 in_arg += chunk;
   1145  1.1  christos 
   1146  1.1  christos                 padlock_xcrypt_ofb(chunk / AES_BLOCK_SIZE, cdata, out, inp);
   1147  1.1  christos 
   1148  1.1  christos                 if (out_misaligned)
   1149  1.1  christos                     out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
   1150  1.1  christos                 else
   1151  1.1  christos                     out = out_arg += chunk;
   1152  1.1  christos 
   1153  1.1  christos                 nbytes -= chunk;
   1154  1.1  christos                 chunk = PADLOCK_CHUNK;
   1155  1.1  christos             } while (nbytes >= AES_BLOCK_SIZE);
   1156  1.1  christos 
   1157  1.1  christos         if (nbytes) {
   1158  1.1  christos             unsigned char *ivp = cdata->iv;
   1159  1.1  christos 
   1160  1.1  christos             ctx->num = nbytes;
   1161  1.1  christos             padlock_reload_key(); /* empirically found */
   1162  1.1  christos             padlock_xcrypt_ecb(1, cdata, ivp, ivp);
   1163  1.1  christos             padlock_reload_key(); /* empirically found */
   1164  1.1  christos             while (nbytes) {
   1165  1.1  christos                 *(out_arg++) = *(in_arg++) ^ *ivp;
   1166  1.1  christos                 ivp++, nbytes--;
   1167  1.1  christos             }
   1168  1.1  christos         }
   1169  1.1  christos 
   1170  1.1  christos         memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE);
   1171  1.1  christos         break;
   1172  1.1  christos 
   1173  1.1  christos     default:
   1174  1.1  christos         free(tofree);
   1175  1.1  christos         return 0;
   1176  1.1  christos     }
   1177  1.1  christos 
   1178  1.1  christos     /* Clean the realign buffer if it was used */
   1179  1.1  christos     if (out_misaligned) {
   1180  1.1  christos         volatile unsigned long *p = (void *)out;
   1181  1.1  christos         size_t n = allocated / sizeof(*p);
   1182  1.1  christos         while (n--)
   1183  1.1  christos             *p++ = 0;
   1184  1.1  christos     }
   1185  1.1  christos 
   1186  1.1  christos     memset(cdata->iv, 0, AES_BLOCK_SIZE);
   1187  1.1  christos     free(tofree);
   1188  1.1  christos 
   1189  1.1  christos     return 1;
   1190  1.1  christos }
   1191  1.1  christos 
   1192  1.1  christos #   endif                       /* OPENSSL_NO_AES */
   1193  1.1  christos 
   1194  1.1  christos /* ===== Random Number Generator ===== */
   1195  1.1  christos /*
   1196  1.1  christos  * This code is not engaged. The reason is that it does not comply
   1197  1.1  christos  * with recommendations for VIA RNG usage for secure applications
   1198  1.1  christos  * (posted at http://www.via.com.tw/en/viac3/c3.jsp) nor does it
   1199  1.1  christos  * provide meaningful error control...
   1200  1.1  christos  */
   1201  1.1  christos /*
   1202  1.1  christos  * Wrapper that provides an interface between the API and the raw PadLock
   1203  1.1  christos  * RNG
   1204  1.1  christos  */
   1205  1.1  christos static int padlock_rand_bytes(unsigned char *output, int count)
   1206  1.1  christos {
   1207  1.1  christos     unsigned int eax, buf;
   1208  1.1  christos 
   1209  1.1  christos     while (count >= 8) {
   1210  1.1  christos         eax = padlock_xstore(output, 0);
   1211  1.1  christos         if (!(eax & (1 << 6)))
   1212  1.1  christos             return 0;           /* RNG disabled */
   1213  1.1  christos         /* this ---vv--- covers DC bias, Raw Bits and String Filter */
   1214  1.1  christos         if (eax & (0x1F << 10))
   1215  1.1  christos             return 0;
   1216  1.1  christos         if ((eax & 0x1F) == 0)
   1217  1.1  christos             continue;           /* no data, retry... */
   1218  1.1  christos         if ((eax & 0x1F) != 8)
   1219  1.1  christos             return 0;           /* fatal failure...  */
   1220  1.1  christos         output += 8;
   1221  1.1  christos         count -= 8;
   1222  1.1  christos     }
   1223  1.1  christos     while (count > 0) {
   1224  1.1  christos         eax = padlock_xstore(&buf, 3);
   1225  1.1  christos         if (!(eax & (1 << 6)))
   1226  1.1  christos             return 0;           /* RNG disabled */
   1227  1.1  christos         /* this ---vv--- covers DC bias, Raw Bits and String Filter */
   1228  1.1  christos         if (eax & (0x1F << 10))
   1229  1.1  christos             return 0;
   1230  1.1  christos         if ((eax & 0x1F) == 0)
   1231  1.1  christos             continue;           /* no data, retry... */
   1232  1.1  christos         if ((eax & 0x1F) != 1)
   1233  1.1  christos             return 0;           /* fatal failure...  */
   1234  1.1  christos         *output++ = (unsigned char)buf;
   1235  1.1  christos         count--;
   1236  1.1  christos     }
   1237  1.1  christos     *(volatile unsigned int *)&buf = 0;
   1238  1.1  christos 
   1239  1.1  christos     return 1;
   1240  1.1  christos }
   1241  1.1  christos 
   1242  1.1  christos /* Dummy but necessary function */
   1243  1.1  christos static int padlock_rand_status(void)
   1244  1.1  christos {
   1245  1.1  christos     return 1;
   1246  1.1  christos }
   1247  1.1  christos 
   1248  1.1  christos /* Prepare structure for registration */
   1249  1.1  christos static RAND_METHOD padlock_rand = {
   1250  1.1  christos     NULL,                       /* seed */
   1251  1.1  christos     padlock_rand_bytes,         /* bytes */
   1252  1.1  christos     NULL,                       /* cleanup */
   1253  1.1  christos     NULL,                       /* add */
   1254  1.1  christos     padlock_rand_bytes,         /* pseudorand */
   1255  1.1  christos     padlock_rand_status,        /* rand status */
   1256  1.1  christos };
   1257  1.1  christos 
   1258  1.1  christos #  else                         /* !COMPILE_HW_PADLOCK */
   1259  1.1  christos #   ifndef OPENSSL_NO_DYNAMIC_ENGINE
   1260  1.1  christos OPENSSL_EXPORT
   1261  1.1  christos     int bind_engine(ENGINE *e, const char *id, const dynamic_fns *fns);
   1262  1.1  christos OPENSSL_EXPORT
   1263  1.1  christos     int bind_engine(ENGINE *e, const char *id, const dynamic_fns *fns)
   1264  1.1  christos {
   1265  1.1  christos     return 0;
   1266  1.1  christos }
   1267  1.1  christos 
   1268  1.1  christos IMPLEMENT_DYNAMIC_CHECK_FN()
   1269  1.1  christos #   endif
   1270  1.1  christos #  endif                        /* COMPILE_HW_PADLOCK */
   1271  1.1  christos # endif                         /* !OPENSSL_NO_HW_PADLOCK */
   1272  1.1  christos #endif                          /* !OPENSSL_NO_HW */
   1273