1 /* $NetBSD: aes_ct_dec.c,v 1.2 2020/06/29 23:36:59 riastradh Exp $ */ 2 3 /* 4 * Copyright (c) 2016 Thomas Pornin <pornin (at) bolet.org> 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining 7 * a copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sublicense, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be 15 * included in all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 21 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 22 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 23 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 * SOFTWARE. 25 */ 26 27 #include <sys/cdefs.h> 28 __KERNEL_RCSID(1, "$NetBSD: aes_ct_dec.c,v 1.2 2020/06/29 23:36:59 riastradh Exp $"); 29 30 #include <sys/types.h> 31 32 #include <crypto/aes/aes_bear.h> 33 34 /* see inner.h */ 35 void 36 br_aes_ct_bitslice_invSbox(uint32_t *q) 37 { 38 /* 39 * AES S-box is: 40 * S(x) = A(I(x)) ^ 0x63 41 * where I() is inversion in GF(256), and A() is a linear 42 * transform (0 is formally defined to be its own inverse). 43 * Since inversion is an involution, the inverse S-box can be 44 * computed from the S-box as: 45 * iS(x) = B(S(B(x ^ 0x63)) ^ 0x63) 46 * where B() is the inverse of A(). Indeed, for any y in GF(256): 47 * iS(S(y)) = B(A(I(B(A(I(y)) ^ 0x63 ^ 0x63))) ^ 0x63 ^ 0x63) = y 48 * 49 * Note: we reuse the implementation of the forward S-box, 50 * instead of duplicating it here, so that total code size is 51 * lower. By merging the B() transforms into the S-box circuit 52 * we could make faster CBC decryption, but CBC decryption is 53 * already quite faster than CBC encryption because we can 54 * process two blocks in parallel. 55 */ 56 uint32_t q0, q1, q2, q3, q4, q5, q6, q7; 57 58 q0 = ~q[0]; 59 q1 = ~q[1]; 60 q2 = q[2]; 61 q3 = q[3]; 62 q4 = q[4]; 63 q5 = ~q[5]; 64 q6 = ~q[6]; 65 q7 = q[7]; 66 q[7] = q1 ^ q4 ^ q6; 67 q[6] = q0 ^ q3 ^ q5; 68 q[5] = q7 ^ q2 ^ q4; 69 q[4] = q6 ^ q1 ^ q3; 70 q[3] = q5 ^ q0 ^ q2; 71 q[2] = q4 ^ q7 ^ q1; 72 q[1] = q3 ^ q6 ^ q0; 73 q[0] = q2 ^ q5 ^ q7; 74 75 br_aes_ct_bitslice_Sbox(q); 76 77 q0 = ~q[0]; 78 q1 = ~q[1]; 79 q2 = q[2]; 80 q3 = q[3]; 81 q4 = q[4]; 82 q5 = ~q[5]; 83 q6 = ~q[6]; 84 q7 = q[7]; 85 q[7] = q1 ^ q4 ^ q6; 86 q[6] = q0 ^ q3 ^ q5; 87 q[5] = q7 ^ q2 ^ q4; 88 q[4] = q6 ^ q1 ^ q3; 89 q[3] = q5 ^ q0 ^ q2; 90 q[2] = q4 ^ q7 ^ q1; 91 q[1] = q3 ^ q6 ^ q0; 92 q[0] = q2 ^ q5 ^ q7; 93 } 94 95 static void 96 add_round_key(uint32_t *q, const uint32_t *sk) 97 { 98 int i; 99 100 for (i = 0; i < 8; i ++) { 101 q[i] ^= sk[i]; 102 } 103 } 104 105 static void 106 inv_shift_rows(uint32_t *q) 107 { 108 int i; 109 110 for (i = 0; i < 8; i ++) { 111 uint32_t x; 112 113 x = q[i]; 114 q[i] = (x & 0x000000FF) 115 | ((x & 0x00003F00) << 2) | ((x & 0x0000C000) >> 6) 116 | ((x & 0x000F0000) << 4) | ((x & 0x00F00000) >> 4) 117 | ((x & 0x03000000) << 6) | ((x & 0xFC000000) >> 2); 118 } 119 } 120 121 static inline uint32_t 122 rotr16(uint32_t x) 123 { 124 return (x << 16) | (x >> 16); 125 } 126 127 static void 128 inv_mix_columns(uint32_t *q) 129 { 130 uint32_t q0, q1, q2, q3, q4, q5, q6, q7; 131 uint32_t r0, r1, r2, r3, r4, r5, r6, r7; 132 133 q0 = q[0]; 134 q1 = q[1]; 135 q2 = q[2]; 136 q3 = q[3]; 137 q4 = q[4]; 138 q5 = q[5]; 139 q6 = q[6]; 140 q7 = q[7]; 141 r0 = (q0 >> 8) | (q0 << 24); 142 r1 = (q1 >> 8) | (q1 << 24); 143 r2 = (q2 >> 8) | (q2 << 24); 144 r3 = (q3 >> 8) | (q3 << 24); 145 r4 = (q4 >> 8) | (q4 << 24); 146 r5 = (q5 >> 8) | (q5 << 24); 147 r6 = (q6 >> 8) | (q6 << 24); 148 r7 = (q7 >> 8) | (q7 << 24); 149 150 q[0] = q5 ^ q6 ^ q7 ^ r0 ^ r5 ^ r7 ^ rotr16(q0 ^ q5 ^ q6 ^ r0 ^ r5); 151 q[1] = q0 ^ q5 ^ r0 ^ r1 ^ r5 ^ r6 ^ r7 ^ rotr16(q1 ^ q5 ^ q7 ^ r1 ^ r5 ^ r6); 152 q[2] = q0 ^ q1 ^ q6 ^ r1 ^ r2 ^ r6 ^ r7 ^ rotr16(q0 ^ q2 ^ q6 ^ r2 ^ r6 ^ r7); 153 q[3] = q0 ^ q1 ^ q2 ^ q5 ^ q6 ^ r0 ^ r2 ^ r3 ^ r5 ^ rotr16(q0 ^ q1 ^ q3 ^ q5 ^ q6 ^ q7 ^ r0 ^ r3 ^ r5 ^ r7); 154 q[4] = q1 ^ q2 ^ q3 ^ q5 ^ r1 ^ r3 ^ r4 ^ r5 ^ r6 ^ r7 ^ rotr16(q1 ^ q2 ^ q4 ^ q5 ^ q7 ^ r1 ^ r4 ^ r5 ^ r6); 155 q[5] = q2 ^ q3 ^ q4 ^ q6 ^ r2 ^ r4 ^ r5 ^ r6 ^ r7 ^ rotr16(q2 ^ q3 ^ q5 ^ q6 ^ r2 ^ r5 ^ r6 ^ r7); 156 q[6] = q3 ^ q4 ^ q5 ^ q7 ^ r3 ^ r5 ^ r6 ^ r7 ^ rotr16(q3 ^ q4 ^ q6 ^ q7 ^ r3 ^ r6 ^ r7); 157 q[7] = q4 ^ q5 ^ q6 ^ r4 ^ r6 ^ r7 ^ rotr16(q4 ^ q5 ^ q7 ^ r4 ^ r7); 158 } 159 160 /* see inner.h */ 161 void 162 br_aes_ct_bitslice_decrypt(unsigned num_rounds, 163 const uint32_t *skey, uint32_t *q) 164 { 165 unsigned u; 166 167 add_round_key(q, skey + (num_rounds << 3)); 168 for (u = num_rounds - 1; u > 0; u --) { 169 inv_shift_rows(q); 170 br_aes_ct_bitslice_invSbox(q); 171 add_round_key(q, skey + (u << 3)); 172 inv_mix_columns(q); 173 } 174 inv_shift_rows(q); 175 br_aes_ct_bitslice_invSbox(q); 176 add_round_key(q, skey); 177 } 178 179 /* NetBSD addition, for generating compatible decryption keys */ 180 void 181 br_aes_ct_inv_mix_columns(uint32_t *q) 182 { 183 184 inv_mix_columns(q); 185 } 186