1 1.15 riastrad /* $NetBSD: aes_armv8_64.S,v 1.15 2020/09/08 23:58:09 riastradh Exp $ */ 2 1.1 riastrad 3 1.1 riastrad /*- 4 1.1 riastrad * Copyright (c) 2020 The NetBSD Foundation, Inc. 5 1.1 riastrad * All rights reserved. 6 1.1 riastrad * 7 1.1 riastrad * Redistribution and use in source and binary forms, with or without 8 1.1 riastrad * modification, are permitted provided that the following conditions 9 1.1 riastrad * are met: 10 1.1 riastrad * 1. Redistributions of source code must retain the above copyright 11 1.1 riastrad * notice, this list of conditions and the following disclaimer. 12 1.1 riastrad * 2. Redistributions in binary form must reproduce the above copyright 13 1.1 riastrad * notice, this list of conditions and the following disclaimer in the 14 1.1 riastrad * documentation and/or other materials provided with the distribution. 15 1.1 riastrad * 16 1.1 riastrad * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 1.1 riastrad * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 1.1 riastrad * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 1.1 riastrad * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 1.1 riastrad * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 1.1 riastrad * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 1.1 riastrad * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 1.1 riastrad * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 1.1 riastrad * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 1.1 riastrad * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 1.1 riastrad * POSSIBILITY OF SUCH DAMAGE. 27 1.1 riastrad */ 28 1.1 riastrad 29 1.1 riastrad #include <aarch64/asm.h> 30 1.1 riastrad 31 1.15 riastrad RCSID("$NetBSD: aes_armv8_64.S,v 1.15 2020/09/08 23:58:09 riastradh Exp $") 32 1.11 riastrad 33 1.3 riastrad .arch_extension aes 34 1.1 riastrad 35 1.1 riastrad /* 36 1.1 riastrad * uint32_t rcon[10] 37 1.1 riastrad * 38 1.1 riastrad * Table mapping n ---> x^n mod (x^8 + x^4 + x^3 + x + 1) in GF(2). 39 1.1 riastrad * Such elements of GF(8) need only eight bits to be represented, 40 1.1 riastrad * but we store them in 4-byte units so we can copy one into all 41 1.1 riastrad * four 4-byte lanes of a vector register with a single LD1R. The 42 1.1 riastrad * access pattern is fixed, so indices into this table are never 43 1.1 riastrad * secret. 44 1.1 riastrad */ 45 1.1 riastrad .section .rodata 46 1.2 riastrad .p2align 2 47 1.1 riastrad .type rcon,@object 48 1.1 riastrad rcon: 49 1.1 riastrad .long 0x01 50 1.1 riastrad .long 0x02 51 1.1 riastrad .long 0x04 52 1.1 riastrad .long 0x08 53 1.1 riastrad .long 0x10 54 1.1 riastrad .long 0x20 55 1.1 riastrad .long 0x40 56 1.1 riastrad .long 0x80 57 1.1 riastrad .long 0x1b 58 1.1 riastrad .long 0x36 59 1.1 riastrad END(rcon) 60 1.1 riastrad 61 1.1 riastrad /* 62 1.1 riastrad * uint128_t unshiftrows_rotword_1 63 1.1 riastrad * 64 1.1 riastrad * Table for TBL instruction to undo ShiftRows, and then do 65 1.1 riastrad * RotWord on word 1, and then copy it into all the other words. 66 1.1 riastrad */ 67 1.1 riastrad .section .rodata 68 1.2 riastrad .p2align 4 69 1.1 riastrad .type unshiftrows_rotword_1,@object 70 1.1 riastrad unshiftrows_rotword_1: 71 1.1 riastrad .byte 0x01,0x0e,0x0b,0x04 72 1.1 riastrad .byte 0x01,0x0e,0x0b,0x04 73 1.1 riastrad .byte 0x01,0x0e,0x0b,0x04 74 1.1 riastrad .byte 0x01,0x0e,0x0b,0x04 75 1.1 riastrad END(unshiftrows_rotword_1) 76 1.1 riastrad 77 1.1 riastrad /* 78 1.1 riastrad * uint128_t unshiftrows_3 79 1.1 riastrad * 80 1.1 riastrad * Table for TBL instruction to undo ShiftRows, and then copy word 81 1.1 riastrad * 3 into all the other words. 82 1.1 riastrad */ 83 1.1 riastrad .section .rodata 84 1.2 riastrad .p2align 4 85 1.1 riastrad .type unshiftrows_3,@object 86 1.1 riastrad unshiftrows_3: 87 1.1 riastrad .byte 0x0c,0x09,0x06,0x03 88 1.1 riastrad .byte 0x0c,0x09,0x06,0x03 89 1.1 riastrad .byte 0x0c,0x09,0x06,0x03 90 1.1 riastrad .byte 0x0c,0x09,0x06,0x03 91 1.1 riastrad END(unshiftrows_3) 92 1.1 riastrad 93 1.1 riastrad /* 94 1.1 riastrad * uint128_t unshiftrows_rotword_3 95 1.1 riastrad * 96 1.1 riastrad * Table for TBL instruction to undo ShiftRows, and then do 97 1.1 riastrad * RotWord on word 3, and then copy it into all the other words. 98 1.1 riastrad */ 99 1.1 riastrad .section .rodata 100 1.2 riastrad .p2align 4 101 1.1 riastrad .type unshiftrows_rotword_3,@object 102 1.1 riastrad unshiftrows_rotword_3: 103 1.1 riastrad .byte 0x09,0x06,0x03,0x0c 104 1.1 riastrad .byte 0x09,0x06,0x03,0x0c 105 1.1 riastrad .byte 0x09,0x06,0x03,0x0c 106 1.1 riastrad .byte 0x09,0x06,0x03,0x0c 107 1.1 riastrad END(unshiftrows_rotword_3) 108 1.1 riastrad 109 1.1 riastrad /* 110 1.1 riastrad * aesarmv8_setenckey128(struct aesenc *enckey@x0, const uint8_t key[16] @x1) 111 1.1 riastrad * 112 1.1 riastrad * Expand a 16-byte AES-128 key into 10 round keys. 113 1.1 riastrad * 114 1.1 riastrad * Standard ABI calling convention. 115 1.1 riastrad */ 116 1.1 riastrad ENTRY(aesarmv8_setenckey128) 117 1.13 riastrad ld1 {v1.16b}, [x1] /* q1 := master key */ 118 1.1 riastrad 119 1.1 riastrad adrl x4, unshiftrows_rotword_3 120 1.1 riastrad eor v0.16b, v0.16b, v0.16b /* q0 := 0 */ 121 1.13 riastrad ld1 {v16.16b}, [x4] /* q16 := unshiftrows_rotword_3 table */ 122 1.1 riastrad 123 1.1 riastrad str q1, [x0], #0x10 /* store master key as first round key */ 124 1.1 riastrad mov x2, #10 /* round count */ 125 1.1 riastrad adrl x3, rcon /* round constant */ 126 1.1 riastrad 127 1.1 riastrad 1: /* 128 1.1 riastrad * q0 = 0 129 1.1 riastrad * v1.4s = (prk[0], prk[1], prk[2], prk[3]) 130 1.1 riastrad * x0 = pointer to round key to compute 131 1.1 riastrad * x2 = round count 132 1.1 riastrad * x3 = rcon pointer 133 1.1 riastrad */ 134 1.1 riastrad 135 1.1 riastrad /* q3 := ShiftRows(SubBytes(q1)) */ 136 1.1 riastrad mov v3.16b, v1.16b 137 1.1 riastrad aese v3.16b, v0.16b 138 1.1 riastrad 139 1.1 riastrad /* v3.4s[i] := RotWords(SubBytes(prk[3])) ^ RCON */ 140 1.1 riastrad ld1r {v4.4s}, [x3], #4 141 1.4 riastrad tbl v3.16b, {v3.16b}, v16.16b 142 1.1 riastrad eor v3.16b, v3.16b, v4.16b 143 1.1 riastrad 144 1.1 riastrad /* 145 1.1 riastrad * v5.4s := (0,prk[0],prk[1],prk[2]) 146 1.1 riastrad * v6.4s := (0,0,prk[0],prk[1]) 147 1.1 riastrad * v7.4s := (0,0,0,prk[0]) 148 1.1 riastrad */ 149 1.1 riastrad ext v5.16b, v0.16b, v1.16b, #12 150 1.1 riastrad ext v6.16b, v0.16b, v1.16b, #8 151 1.1 riastrad ext v7.16b, v0.16b, v1.16b, #4 152 1.1 riastrad 153 1.1 riastrad /* v1.4s := (rk[0], rk[1], rk[2], rk[3]) */ 154 1.1 riastrad eor v1.16b, v1.16b, v3.16b 155 1.1 riastrad eor v1.16b, v1.16b, v5.16b 156 1.1 riastrad eor v1.16b, v1.16b, v6.16b 157 1.1 riastrad eor v1.16b, v1.16b, v7.16b 158 1.1 riastrad 159 1.1 riastrad subs x2, x2, #1 /* count down rounds */ 160 1.1 riastrad str q1, [x0], #0x10 /* store round key */ 161 1.1 riastrad b.ne 1b 162 1.1 riastrad 163 1.1 riastrad ret 164 1.1 riastrad END(aesarmv8_setenckey128) 165 1.1 riastrad 166 1.1 riastrad /* 167 1.1 riastrad * aesarmv8_setenckey192(struct aesenc *enckey@x0, const uint8_t key[24] @x1) 168 1.1 riastrad * 169 1.1 riastrad * Expand a 24-byte AES-192 key into 12 round keys. 170 1.1 riastrad * 171 1.1 riastrad * Standard ABI calling convention. 172 1.1 riastrad */ 173 1.1 riastrad ENTRY(aesarmv8_setenckey192) 174 1.13 riastrad ld1 {v1.16b}, [x1], #0x10 /* q1 := master key[0:128) */ 175 1.13 riastrad ld1 {v2.8b}, [x1] /* d2 := master key[128:192) */ 176 1.1 riastrad 177 1.1 riastrad adrl x4, unshiftrows_rotword_1 178 1.1 riastrad adrl x5, unshiftrows_rotword_3 179 1.1 riastrad eor v0.16b, v0.16b, v0.16b /* q0 := 0 */ 180 1.13 riastrad ld1 {v16.16b}, [x4] /* q16 := unshiftrows_rotword_1 */ 181 1.13 riastrad ld1 {v17.16b}, [x5] /* q17 := unshiftrows_rotword_3 */ 182 1.1 riastrad 183 1.1 riastrad str q1, [x0], #0x10 /* store master key[0:128) as round key */ 184 1.1 riastrad mov x2, #12 /* round count */ 185 1.1 riastrad adrl x3, rcon /* round constant */ 186 1.1 riastrad 187 1.1 riastrad 1: /* 188 1.1 riastrad * q0 = 0 189 1.1 riastrad * v1.4s = (prk[0], prk[1], prk[2], prk[3]) 190 1.1 riastrad * v2.4s = (rklo[0], rklo[1], xxx, xxx) 191 1.1 riastrad * x0 = pointer to three round keys to compute 192 1.1 riastrad * x2 = round count 193 1.1 riastrad * x3 = rcon pointer 194 1.1 riastrad */ 195 1.1 riastrad 196 1.1 riastrad /* q3 := ShiftRows(SubBytes(q2)) */ 197 1.1 riastrad mov v3.16b, v2.16b 198 1.1 riastrad aese v3.16b, v0.16b 199 1.1 riastrad 200 1.1 riastrad /* v3.4s[i] := RotWords(SubBytes(rklo[1])) ^ RCON */ 201 1.1 riastrad ld1r {v4.4s}, [x3], #4 202 1.4 riastrad tbl v3.16b, {v3.16b}, v16.16b 203 1.1 riastrad eor v3.16b, v3.16b, v4.16b 204 1.1 riastrad 205 1.1 riastrad /* 206 1.1 riastrad * We need to compute: 207 1.1 riastrad * 208 1.1 riastrad * rk[0] := rklo[0] 209 1.1 riastrad * rk[1] := rklo[1] 210 1.1 riastrad * rk[2] := Rot(Sub(rklo[1])) ^ RCON ^ prk[0] 211 1.1 riastrad * rk[3] := Rot(Sub(rklo[1])) ^ RCON ^ prk[0] ^ prk[1] 212 1.1 riastrad * nrk[0] := Rot(Sub(rklo[1])) ^ RCON ^ prk[0] ^ prk[1] ^ prk[2] 213 1.1 riastrad * nrk[1] := Rot(Sub(rklo[1])) ^ RCON ^ prk[0] ^ ... ^ prk[3] 214 1.1 riastrad * nrk[2] := Rot(Sub(rklo[1])) ^ RCON ^ prk[0] ^ ... ^ prk[3] ^ rklo[0] 215 1.1 riastrad * nrk[3] := Rot(Sub(rklo[1])) ^ RCON ^ prk[0] ^ ... ^ prk[3] ^ rklo[0] 216 1.1 riastrad * ^ rklo[1] 217 1.1 riastrad */ 218 1.1 riastrad 219 1.1 riastrad /* 220 1.1 riastrad * v5.4s := (0,prk[0],prk[1],prk[2]) 221 1.1 riastrad * v6.4s := (0,0,prk[0],prk[1]) 222 1.1 riastrad * v7.4s := (0,0,0,prk[0]) 223 1.1 riastrad */ 224 1.1 riastrad ext v5.16b, v0.16b, v1.16b, #12 225 1.1 riastrad ext v6.16b, v0.16b, v1.16b, #8 226 1.1 riastrad ext v7.16b, v0.16b, v1.16b, #4 227 1.1 riastrad 228 1.1 riastrad /* v5.4s := (rk[2], rk[3], nrk[0], nrk[1]) */ 229 1.1 riastrad eor v5.16b, v5.16b, v1.16b 230 1.1 riastrad eor v5.16b, v5.16b, v3.16b 231 1.1 riastrad eor v5.16b, v5.16b, v6.16b 232 1.1 riastrad eor v5.16b, v5.16b, v7.16b 233 1.1 riastrad 234 1.1 riastrad /* 235 1.1 riastrad * At this point, rk is split across v2.4s = (rk[0],rk[1],...) 236 1.1 riastrad * and v5.4s = (rk[2],rk[3],...); nrk is in v5.4s = 237 1.1 riastrad * (...,nrk[0],nrk[1]); and we have yet to compute nrk[2] or 238 1.1 riastrad * nrk[3], which requires rklo[0] and rklo[1] in v2.4s = 239 1.1 riastrad * (rklo[0],rklo[1],...). 240 1.1 riastrad */ 241 1.1 riastrad 242 1.1 riastrad /* v1.4s := (nrk[0], nrk[1], nrk[1], nrk[1]) */ 243 1.5 ryo dup v1.4s, v5.s[3] 244 1.5 ryo mov v1.s[0], v5.s[2] 245 1.1 riastrad 246 1.1 riastrad /* 247 1.1 riastrad * v6.4s := (0, 0, rklo[0], rklo[1]) 248 1.1 riastrad * v7.4s := (0, 0, 0, rklo[0]) 249 1.1 riastrad */ 250 1.1 riastrad ext v6.16b, v0.16b, v2.16b, #8 251 1.1 riastrad ext v7.16b, v0.16b, v2.16b, #4 252 1.1 riastrad 253 1.1 riastrad /* v3.4s := (nrk[0], nrk[1], nrk[2], nrk[3]) */ 254 1.1 riastrad eor v3.16b, v1.16b, v6.16b 255 1.1 riastrad eor v3.16b, v3.16b, v7.16b 256 1.1 riastrad 257 1.1 riastrad /* 258 1.1 riastrad * Recall v2.4s = (rk[0], rk[1], xxx, xxx) 259 1.1 riastrad * and v5.4s = (rk[2], rk[3], xxx, xxx). Set 260 1.1 riastrad * v2.4s := (rk[0], rk[1], rk[2], rk[3]) 261 1.1 riastrad */ 262 1.5 ryo mov v2.d[1], v5.d[0] 263 1.1 riastrad 264 1.1 riastrad /* store two round keys */ 265 1.1 riastrad stp q2, q3, [x0], #0x20 266 1.1 riastrad 267 1.1 riastrad /* 268 1.1 riastrad * Live vector registers at this point: 269 1.1 riastrad * 270 1.1 riastrad * q0 = zero 271 1.1 riastrad * q2 = rk 272 1.1 riastrad * q3 = nrk 273 1.1 riastrad * v5.4s = (rk[2], rk[3], nrk[0], nrk[1]) 274 1.4 riastrad * q16 = unshiftrows_rotword_1 275 1.4 riastrad * q17 = unshiftrows_rotword_3 276 1.1 riastrad * 277 1.1 riastrad * We have to compute, in q1: 278 1.1 riastrad * 279 1.1 riastrad * nnrk[0] := Rot(Sub(nrk[3])) ^ RCON' ^ rk[2] 280 1.1 riastrad * nnrk[1] := Rot(Sub(nrk[3])) ^ RCON' ^ rk[2] ^ rk[3] 281 1.1 riastrad * nnrk[2] := Rot(Sub(nrk[3])) ^ RCON' ^ rk[2] ^ rk[3] ^ nrk[0] 282 1.1 riastrad * nnrk[3] := Rot(Sub(nrk[3])) ^ RCON' ^ rk[2] ^ rk[3] ^ nrk[0] 283 1.1 riastrad * ^ nrk[1] 284 1.1 riastrad * 285 1.1 riastrad * And, if there's any more afterward, in q2: 286 1.1 riastrad * 287 1.1 riastrad * nnnrklo[0] := Rot(Sub(nrk[3])) ^ RCON' ^ rk[2] ^ rk[3] ^ nrk[0] 288 1.1 riastrad * ^ nrk[1] ^ nrk[2] 289 1.1 riastrad * nnnrklo[1] := Rot(Sub(nrk[3])) ^ RCON' ^ rk[2] ^ rk[3] ^ nrk[0] 290 1.1 riastrad * ^ nrk[1] ^ nrk[2] ^ nrk[3] 291 1.1 riastrad */ 292 1.1 riastrad 293 1.1 riastrad /* q1 := RotWords(SubBytes(q3)) */ 294 1.1 riastrad mov v1.16b, v3.16b 295 1.1 riastrad aese v1.16b, v0.16b 296 1.1 riastrad 297 1.1 riastrad /* v1.4s[i] := RotWords(SubBytes(nrk[3])) ^ RCON' */ 298 1.1 riastrad ld1r {v4.4s}, [x3], #4 299 1.4 riastrad tbl v1.16b, {v1.16b}, v17.16b 300 1.1 riastrad eor v1.16b, v1.16b, v4.16b 301 1.1 riastrad 302 1.1 riastrad /* 303 1.1 riastrad * v5.4s := (rk[2], rk[3], nrk[0], nrk[1]) [already] 304 1.1 riastrad * v4.4s := (0, rk[2], rk[3], nrk[0]) 305 1.1 riastrad * v6.4s := (0, 0, rk[2], rk[3]) 306 1.1 riastrad * v7.4s := (0, 0, 0, rk[2]) 307 1.1 riastrad */ 308 1.1 riastrad ext v4.16b, v0.16b, v5.16b, #12 309 1.1 riastrad ext v6.16b, v0.16b, v5.16b, #8 310 1.1 riastrad ext v7.16b, v0.16b, v5.16b, #4 311 1.1 riastrad 312 1.1 riastrad /* v1.4s := (nnrk[0], nnrk[1], nnrk[2], nnrk[3]) */ 313 1.1 riastrad eor v1.16b, v1.16b, v5.16b 314 1.1 riastrad eor v1.16b, v1.16b, v4.16b 315 1.1 riastrad eor v1.16b, v1.16b, v6.16b 316 1.1 riastrad eor v1.16b, v1.16b, v7.16b 317 1.1 riastrad 318 1.1 riastrad subs x2, x2, #3 /* count down three rounds */ 319 1.1 riastrad str q1, [x0], #0x10 /* store third round key */ 320 1.1 riastrad b.eq 2f 321 1.1 riastrad 322 1.1 riastrad /* 323 1.1 riastrad * v4.4s := (nrk[2], nrk[3], xxx, xxx) 324 1.1 riastrad * v5.4s := (0, nrk[2], xxx, xxx) 325 1.1 riastrad */ 326 1.1 riastrad ext v4.16b, v3.16b, v0.16b, #8 327 1.1 riastrad ext v5.16b, v0.16b, v4.16b, #12 328 1.1 riastrad 329 1.1 riastrad /* v2.4s := (nnrk[3], nnrk[3], xxx, xxx) */ 330 1.5 ryo dup v2.4s, v1.s[3] 331 1.1 riastrad 332 1.1 riastrad /* 333 1.1 riastrad * v2.4s := (nnnrklo[0] = nnrk[3] ^ nrk[2], 334 1.1 riastrad * nnnrklo[1] = nnrk[3] ^ nrk[2] ^ nrk[3], 335 1.1 riastrad * xxx, xxx) 336 1.1 riastrad */ 337 1.1 riastrad eor v2.16b, v2.16b, v4.16b 338 1.1 riastrad eor v2.16b, v2.16b, v5.16b 339 1.1 riastrad 340 1.1 riastrad b 1b 341 1.1 riastrad 342 1.1 riastrad 2: ret 343 1.1 riastrad END(aesarmv8_setenckey192) 344 1.1 riastrad 345 1.1 riastrad /* 346 1.1 riastrad * aesarmv8_setenckey256(struct aesenc *enckey@x0, const uint8_t key[32] @x1) 347 1.1 riastrad * 348 1.1 riastrad * Expand a 32-byte AES-256 key into 14 round keys. 349 1.1 riastrad * 350 1.1 riastrad * Standard ABI calling convention. 351 1.1 riastrad */ 352 1.1 riastrad ENTRY(aesarmv8_setenckey256) 353 1.1 riastrad /* q1 := key[0:128), q2 := key[128:256) */ 354 1.13 riastrad ld1 {v1.16b-v2.16b}, [x1], #0x20 355 1.1 riastrad 356 1.1 riastrad adrl x4, unshiftrows_rotword_3 357 1.1 riastrad adrl x5, unshiftrows_3 358 1.1 riastrad eor v0.16b, v0.16b, v0.16b /* q0 := 0 */ 359 1.13 riastrad ld1 {v16.16b}, [x4] /* q16 := unshiftrows_rotword_3 */ 360 1.13 riastrad ld1 {v17.16b}, [x5] /* q17 := unshiftrows_3 */ 361 1.1 riastrad 362 1.1 riastrad /* store master key as first two round keys */ 363 1.1 riastrad stp q1, q2, [x0], #0x20 364 1.1 riastrad mov x2, #14 /* round count */ 365 1.1 riastrad adrl x3, rcon /* round constant */ 366 1.1 riastrad 367 1.1 riastrad 1: /* 368 1.1 riastrad * q0 = 0 369 1.1 riastrad * v1.4s = (pprk[0], pprk[1], pprk[2], pprk[3]) 370 1.1 riastrad * v2.4s = (prk[0], prk[1], prk[2], prk[3]) 371 1.1 riastrad * x2 = round count 372 1.1 riastrad * x3 = rcon pointer 373 1.1 riastrad */ 374 1.1 riastrad 375 1.1 riastrad /* q3 := ShiftRows(SubBytes(q2)) */ 376 1.1 riastrad mov v3.16b, v2.16b 377 1.1 riastrad aese v3.16b, v0.16b 378 1.1 riastrad 379 1.1 riastrad /* v3.4s[i] := RotWords(SubBytes(prk[3])) ^ RCON */ 380 1.1 riastrad ld1r {v4.4s}, [x3], #4 381 1.4 riastrad tbl v3.16b, {v3.16b}, v16.16b 382 1.1 riastrad eor v3.16b, v3.16b, v4.16b 383 1.1 riastrad 384 1.1 riastrad /* 385 1.1 riastrad * v5.4s := (0,pprk[0],pprk[1],pprk[2]) 386 1.1 riastrad * v6.4s := (0,0,pprk[0],pprk[1]) 387 1.1 riastrad * v7.4s := (0,0,0,pprk[0]) 388 1.1 riastrad */ 389 1.1 riastrad ext v5.16b, v0.16b, v1.16b, #12 390 1.1 riastrad ext v6.16b, v0.16b, v1.16b, #8 391 1.1 riastrad ext v7.16b, v0.16b, v1.16b, #4 392 1.1 riastrad 393 1.1 riastrad /* v1.4s := (rk[0], rk[1], rk[2], rk[3]) */ 394 1.1 riastrad eor v1.16b, v1.16b, v3.16b 395 1.1 riastrad eor v1.16b, v1.16b, v5.16b 396 1.1 riastrad eor v1.16b, v1.16b, v6.16b 397 1.1 riastrad eor v1.16b, v1.16b, v7.16b 398 1.1 riastrad 399 1.1 riastrad subs x2, x2, #2 /* count down two rounds */ 400 1.1 riastrad b.eq 2f /* stop if this is the last one */ 401 1.1 riastrad 402 1.1 riastrad /* q3 := ShiftRows(SubBytes(q1)) */ 403 1.1 riastrad mov v3.16b, v1.16b 404 1.1 riastrad aese v3.16b, v0.16b 405 1.1 riastrad 406 1.1 riastrad /* v3.4s[i] := SubBytes(rk[3]) */ 407 1.4 riastrad tbl v3.16b, {v3.16b}, v17.16b 408 1.1 riastrad 409 1.1 riastrad /* 410 1.1 riastrad * v5.4s := (0,prk[0],prk[1],prk[2]) 411 1.1 riastrad * v6.4s := (0,0,prk[0],prk[1]) 412 1.1 riastrad * v7.4s := (0,0,0,prk[0]) 413 1.1 riastrad */ 414 1.1 riastrad ext v5.16b, v0.16b, v2.16b, #12 415 1.1 riastrad ext v6.16b, v0.16b, v2.16b, #8 416 1.1 riastrad ext v7.16b, v0.16b, v2.16b, #4 417 1.1 riastrad 418 1.1 riastrad /* v2.4s := (nrk[0], nrk[1], nrk[2], nrk[3]) */ 419 1.1 riastrad eor v2.16b, v2.16b, v3.16b 420 1.1 riastrad eor v2.16b, v2.16b, v5.16b 421 1.1 riastrad eor v2.16b, v2.16b, v6.16b 422 1.1 riastrad eor v2.16b, v2.16b, v7.16b 423 1.1 riastrad 424 1.1 riastrad stp q1, q2, [x0], #0x20 /* store two round keys */ 425 1.1 riastrad b 1b 426 1.1 riastrad 427 1.1 riastrad 2: str q1, [x0] /* store last round key */ 428 1.1 riastrad ret 429 1.1 riastrad END(aesarmv8_setenckey256) 430 1.1 riastrad 431 1.1 riastrad /* 432 1.1 riastrad * aesarmv8_enctodec(const struct aesenc *enckey@x0, struct aesdec *deckey@x1, 433 1.1 riastrad * uint32_t nrounds@x2) 434 1.1 riastrad * 435 1.1 riastrad * Convert AES encryption round keys to AES decryption round keys. 436 1.1 riastrad * `rounds' must be between 10 and 14. 437 1.1 riastrad * 438 1.1 riastrad * Standard ABI calling convention. 439 1.1 riastrad */ 440 1.1 riastrad ENTRY(aesarmv8_enctodec) 441 1.1 riastrad ldr q0, [x0, x2, lsl #4] /* load last round key */ 442 1.7 riastrad b 2f 443 1.9 riastrad _ALIGN_TEXT 444 1.7 riastrad 1: aesimc v0.16b, v0.16b /* convert encryption to decryption */ 445 1.7 riastrad 2: str q0, [x1], #0x10 /* store round key */ 446 1.1 riastrad subs x2, x2, #1 /* count down round */ 447 1.1 riastrad ldr q0, [x0, x2, lsl #4] /* load previous round key */ 448 1.7 riastrad b.ne 1b /* repeat if there's more */ 449 1.7 riastrad str q0, [x1] /* store first round key verbatim */ 450 1.1 riastrad ret 451 1.1 riastrad END(aesarmv8_enctodec) 452 1.1 riastrad 453 1.1 riastrad /* 454 1.1 riastrad * aesarmv8_enc(const struct aesenc *enckey@x0, const uint8_t in[16] @x1, 455 1.1 riastrad * uint8_t out[16] @x2, uint32_t nrounds@x3) 456 1.1 riastrad * 457 1.1 riastrad * Encrypt a single block. 458 1.1 riastrad * 459 1.1 riastrad * Standard ABI calling convention. 460 1.1 riastrad */ 461 1.1 riastrad ENTRY(aesarmv8_enc) 462 1.1 riastrad stp fp, lr, [sp, #-16]! /* push stack frame */ 463 1.1 riastrad mov fp, sp 464 1.13 riastrad ld1 {v0.16b}, [x1] /* q0 := ptxt */ 465 1.4 riastrad bl aesarmv8_enc1 /* q0 := ctxt; trash x0/x3/q16 */ 466 1.13 riastrad st1 {v0.16b}, [x2] /* store ctxt */ 467 1.1 riastrad ldp fp, lr, [sp], #16 /* pop stack frame */ 468 1.1 riastrad ret 469 1.1 riastrad END(aesarmv8_enc) 470 1.1 riastrad 471 1.1 riastrad /* 472 1.1 riastrad * aesarmv8_dec(const struct aesdec *deckey@x0, const uint8_t in[16] @x1, 473 1.1 riastrad * uint8_t out[16] @x2, uint32_t nrounds@x3) 474 1.1 riastrad * 475 1.1 riastrad * Decrypt a single block. 476 1.1 riastrad * 477 1.1 riastrad * Standard ABI calling convention. 478 1.1 riastrad */ 479 1.1 riastrad ENTRY(aesarmv8_dec) 480 1.1 riastrad stp fp, lr, [sp, #-16]! /* push stack frame */ 481 1.1 riastrad mov fp, sp 482 1.13 riastrad ld1 {v0.16b}, [x1] /* q0 := ctxt */ 483 1.4 riastrad bl aesarmv8_dec1 /* q0 := ptxt; trash x0/x3/q16 */ 484 1.13 riastrad st1 {v0.16b}, [x2] /* store ptxt */ 485 1.1 riastrad ldp fp, lr, [sp], #16 /* pop stack frame */ 486 1.1 riastrad ret 487 1.1 riastrad END(aesarmv8_dec) 488 1.1 riastrad 489 1.1 riastrad /* 490 1.1 riastrad * aesarmv8_cbc_enc(const struct aesenc *enckey@x0, const uint8_t *in@x1, 491 1.1 riastrad * uint8_t *out@x2, size_t nbytes@x3, uint8_t iv[16] @x4, 492 1.1 riastrad * uint32_t nrounds@x5) 493 1.1 riastrad * 494 1.1 riastrad * Encrypt a contiguous sequence of blocks with AES-CBC. 495 1.1 riastrad * 496 1.1 riastrad * nbytes must be an integral multiple of 16. 497 1.1 riastrad * 498 1.1 riastrad * Standard ABI calling convention. 499 1.1 riastrad */ 500 1.1 riastrad ENTRY(aesarmv8_cbc_enc) 501 1.1 riastrad cbz x3, 2f /* stop if nothing to do */ 502 1.1 riastrad stp fp, lr, [sp, #-16]! /* push stack frame */ 503 1.1 riastrad mov fp, sp 504 1.1 riastrad mov x9, x0 /* x9 := enckey */ 505 1.1 riastrad mov x10, x3 /* x10 := nbytes */ 506 1.13 riastrad ld1 {v0.16b}, [x4] /* q0 := chaining value */ 507 1.9 riastrad _ALIGN_TEXT 508 1.13 riastrad 1: ld1 {v1.16b}, [x1], #0x10 /* q1 := plaintext block */ 509 1.1 riastrad eor v0.16b, v0.16b, v1.16b /* q0 := cv ^ ptxt */ 510 1.1 riastrad mov x0, x9 /* x0 := enckey */ 511 1.1 riastrad mov x3, x5 /* x3 := nrounds */ 512 1.4 riastrad bl aesarmv8_enc1 /* q0 := ctxt; trash x0/x3/q16 */ 513 1.1 riastrad subs x10, x10, #0x10 /* count down nbytes */ 514 1.13 riastrad st1 {v0.16b}, [x2], #0x10 /* store ciphertext block */ 515 1.1 riastrad b.ne 1b /* repeat if x10 is nonzero */ 516 1.13 riastrad st1 {v0.16b}, [x4] /* store chaining value */ 517 1.1 riastrad ldp fp, lr, [sp], #16 /* pop stack frame */ 518 1.1 riastrad 2: ret 519 1.1 riastrad END(aesarmv8_cbc_enc) 520 1.1 riastrad 521 1.1 riastrad /* 522 1.1 riastrad * aesarmv8_cbc_dec1(const struct aesdec *deckey@x0, const uint8_t *in@x1, 523 1.1 riastrad * uint8_t *out@x2, size_t nbytes@x3, const uint8_t iv[16] @x4, 524 1.1 riastrad * uint32_t nrounds@x5) 525 1.1 riastrad * 526 1.1 riastrad * Decrypt a contiguous sequence of blocks with AES-CBC. 527 1.1 riastrad * 528 1.1 riastrad * nbytes must be a positive integral multiple of 16. This routine 529 1.1 riastrad * is not vectorized; use aesarmv8_cbc_dec8 for >=8 blocks at once. 530 1.1 riastrad * 531 1.1 riastrad * Standard ABI calling convention. 532 1.1 riastrad */ 533 1.1 riastrad ENTRY(aesarmv8_cbc_dec1) 534 1.4 riastrad stp fp, lr, [sp, #-16]! /* push stack frame */ 535 1.1 riastrad mov fp, sp 536 1.13 riastrad ld1 {v24.16b}, [x4] /* q24 := iv */ 537 1.1 riastrad mov x9, x0 /* x9 := enckey */ 538 1.1 riastrad mov x10, x3 /* x10 := nbytes */ 539 1.1 riastrad add x1, x1, x3 /* x1 := pointer past end of in */ 540 1.1 riastrad add x2, x2, x3 /* x2 := pointer past end of out */ 541 1.13 riastrad sub x1, x1, #0x10 542 1.13 riastrad ld1 {v0.16b}, [x1] /* q0 := last ciphertext block */ 543 1.13 riastrad st1 {v0.16b}, [x4] /* update iv */ 544 1.7 riastrad b 2f 545 1.9 riastrad _ALIGN_TEXT 546 1.13 riastrad 1: sub x1, x1, #0x10 547 1.13 riastrad ld1 {v31.16b}, [x1] /* q31 := chaining value */ 548 1.13 riastrad sub x2, x2, #0x10 549 1.7 riastrad eor v0.16b, v0.16b, v31.16b /* q0 := plaintext block */ 550 1.13 riastrad st1 {v0.16b}, [x2] /* store plaintext block */ 551 1.7 riastrad mov v0.16b, v31.16b /* move cv = ciphertext block */ 552 1.7 riastrad 2: mov x0, x9 /* x0 := enckey */ 553 1.1 riastrad mov x3, x5 /* x3 := nrounds */ 554 1.4 riastrad bl aesarmv8_dec1 /* q0 := cv ^ ptxt; trash x0/x3/q16 */ 555 1.1 riastrad subs x10, x10, #0x10 /* count down nbytes */ 556 1.7 riastrad b.ne 1b /* repeat if more blocks */ 557 1.7 riastrad eor v0.16b, v0.16b, v24.16b /* q0 := first plaintext block */ 558 1.13 riastrad sub x2, x2, #0x10 /* store first plaintext block */ 559 1.13 riastrad st1 {v0.16b}, [x2] 560 1.4 riastrad ldp fp, lr, [sp], #16 /* pop stack frame */ 561 1.1 riastrad ret 562 1.1 riastrad END(aesarmv8_cbc_dec1) 563 1.1 riastrad 564 1.1 riastrad /* 565 1.1 riastrad * aesarmv8_cbc_dec8(const struct aesdec *deckey@x0, const uint8_t *in@x1, 566 1.1 riastrad * uint8_t *out@x2, size_t nbytes@x3, const uint8_t iv[16] @x4, 567 1.1 riastrad * uint32_t nrounds@x5) 568 1.1 riastrad * 569 1.1 riastrad * Decrypt a contiguous sequence of 8-block units with AES-CBC. 570 1.1 riastrad * 571 1.1 riastrad * nbytes must be a positive integral multiple of 128. 572 1.1 riastrad * 573 1.1 riastrad * Standard ABI calling convention. 574 1.1 riastrad */ 575 1.1 riastrad ENTRY(aesarmv8_cbc_dec8) 576 1.4 riastrad stp fp, lr, [sp, #-16]! /* push stack frame */ 577 1.1 riastrad mov fp, sp 578 1.13 riastrad ld1 {v24.16b}, [x4] /* q24 := iv */ 579 1.1 riastrad mov x9, x0 /* x9 := enckey */ 580 1.1 riastrad mov x10, x3 /* x10 := nbytes */ 581 1.1 riastrad add x1, x1, x3 /* x1 := pointer past end of in */ 582 1.1 riastrad add x2, x2, x3 /* x2 := pointer past end of out */ 583 1.13 riastrad sub x1, x1, #0x20 584 1.13 riastrad ld1 {v6.16b, v7.16b}, [x1] /* q6, q7 := last ciphertext blocks */ 585 1.13 riastrad st1 {v7.16b}, [x4] /* update iv */ 586 1.7 riastrad b 2f 587 1.9 riastrad _ALIGN_TEXT 588 1.13 riastrad 1: sub x1, x1, #0x20 589 1.13 riastrad ld1 {v6.16b, v7.16b}, [x1] 590 1.7 riastrad eor v0.16b, v0.16b, v7.16b /* q0 := pt0 */ 591 1.13 riastrad sub x2, x2, #0x20 592 1.13 riastrad st1 {v0.16b, v1.16b}, [x2] 593 1.13 riastrad 2: sub x1, x1, #0x20 594 1.13 riastrad ld1 {v4.16b-v5.16b}, [x1] 595 1.13 riastrad sub x1, x1, #0x40 596 1.13 riastrad ld1 {v0.16b-v3.16b}, [x1] 597 1.13 riastrad 598 1.4 riastrad mov v31.16b, v6.16b /* q[24+i] := cv[i], 0<i<8 */ 599 1.4 riastrad mov v30.16b, v5.16b 600 1.4 riastrad mov v29.16b, v4.16b 601 1.4 riastrad mov v28.16b, v3.16b 602 1.4 riastrad mov v27.16b, v2.16b 603 1.4 riastrad mov v26.16b, v1.16b 604 1.4 riastrad mov v25.16b, v0.16b 605 1.1 riastrad mov x0, x9 /* x0 := enckey */ 606 1.1 riastrad mov x3, x5 /* x3 := nrounds */ 607 1.4 riastrad bl aesarmv8_dec8 /* q[i] := cv[i] ^ pt[i]; 608 1.4 riastrad * trash x0/x3/q16 */ 609 1.4 riastrad eor v7.16b, v7.16b, v31.16b /* q[i] := pt[i] */ 610 1.4 riastrad eor v6.16b, v6.16b, v30.16b 611 1.4 riastrad eor v5.16b, v5.16b, v29.16b 612 1.4 riastrad eor v4.16b, v4.16b, v28.16b 613 1.4 riastrad eor v3.16b, v3.16b, v27.16b 614 1.4 riastrad eor v2.16b, v2.16b, v26.16b 615 1.4 riastrad eor v1.16b, v1.16b, v25.16b 616 1.1 riastrad subs x10, x10, #0x80 /* count down nbytes */ 617 1.13 riastrad sub x2, x2, #0x20 /* store plaintext blocks */ 618 1.13 riastrad st1 {v6.16b-v7.16b}, [x2] 619 1.13 riastrad sub x2, x2, #0x40 620 1.13 riastrad st1 {v2.16b-v5.16b}, [x2] 621 1.7 riastrad b.ne 1b /* repeat if there's more */ 622 1.7 riastrad eor v0.16b, v0.16b, v24.16b /* q0 := pt0 */ 623 1.13 riastrad sub x2, x2, #0x20 624 1.13 riastrad st1 {v0.16b, v1.16b}, [x2] /* store first two plaintext blocks */ 625 1.4 riastrad ldp fp, lr, [sp], #16 /* pop stack frame */ 626 1.1 riastrad ret 627 1.1 riastrad END(aesarmv8_cbc_dec8) 628 1.1 riastrad 629 1.1 riastrad /* 630 1.1 riastrad * aesarmv8_xts_enc1(const struct aesenc *enckey@x0, const uint8_t *in@x1, 631 1.1 riastrad * uint8_t *out@x2, size_t nbytes@x3, uint8_t tweak[16] @x4, 632 1.1 riastrad * uint32_t nrounds@x5) 633 1.1 riastrad * 634 1.1 riastrad * Encrypt a contiguous sequence of blocks with AES-XTS. 635 1.1 riastrad * 636 1.1 riastrad * nbytes must be a positive integral multiple of 16. This routine 637 1.1 riastrad * is not vectorized; use aesarmv8_xts_enc8 for >=8 blocks at once. 638 1.1 riastrad * 639 1.1 riastrad * Standard ABI calling convention. 640 1.1 riastrad */ 641 1.1 riastrad ENTRY(aesarmv8_xts_enc1) 642 1.1 riastrad stp fp, lr, [sp, #-16]! /* push stack frame */ 643 1.1 riastrad mov fp, sp 644 1.1 riastrad mov x9, x0 /* x9 := enckey */ 645 1.1 riastrad mov x10, x3 /* x10 := nbytes */ 646 1.13 riastrad ld1 {v31.16b}, [x4] /* q31 := tweak */ 647 1.9 riastrad _ALIGN_TEXT 648 1.13 riastrad 1: ld1 {v0.16b}, [x1], #0x10 /* q0 := ptxt */ 649 1.1 riastrad mov x0, x9 /* x0 := enckey */ 650 1.1 riastrad mov x3, x5 /* x3 := nrounds */ 651 1.4 riastrad eor v0.16b, v0.16b, v31.16b /* q0 := ptxt ^ tweak */ 652 1.4 riastrad bl aesarmv8_enc1 /* q0 := AES(...); trash x0/x3/q16 */ 653 1.4 riastrad eor v0.16b, v0.16b, v31.16b /* q0 := AES(ptxt ^ tweak) ^ tweak */ 654 1.13 riastrad st1 {v0.16b}, [x2], #0x10 /* store ciphertext block */ 655 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */ 656 1.1 riastrad subs x10, x10, #0x10 /* count down nbytes */ 657 1.1 riastrad b.ne 1b /* repeat if more blocks */ 658 1.13 riastrad st1 {v31.16b}, [x4] /* update tweak */ 659 1.1 riastrad ldp fp, lr, [sp], #16 /* pop stack frame */ 660 1.1 riastrad ret 661 1.1 riastrad END(aesarmv8_xts_enc1) 662 1.1 riastrad 663 1.1 riastrad /* 664 1.1 riastrad * aesarmv8_xts_enc8(const struct aesenc *enckey@x0, const uint8_t *in@x1, 665 1.1 riastrad * uint8_t *out@x2, size_t nbytes@x3, uint8_t tweak[16] @x4, 666 1.1 riastrad * uint32_t nrounds@x5) 667 1.1 riastrad * 668 1.1 riastrad * Encrypt a contiguous sequence of blocks with AES-XTS. 669 1.1 riastrad * 670 1.1 riastrad * nbytes must be a positive integral multiple of 128. 671 1.1 riastrad * 672 1.1 riastrad * Standard ABI calling convention. 673 1.1 riastrad */ 674 1.1 riastrad ENTRY(aesarmv8_xts_enc8) 675 1.4 riastrad stp fp, lr, [sp, #-16]! /* push stack frame */ 676 1.1 riastrad mov fp, sp 677 1.1 riastrad mov x9, x0 /* x9 := enckey */ 678 1.1 riastrad mov x10, x3 /* x10 := nbytes */ 679 1.13 riastrad ld1 {v31.16b}, [x4] /* q31 := tweak */ 680 1.9 riastrad _ALIGN_TEXT 681 1.4 riastrad 1: mov v24.16b, v31.16b /* q24 := tweak[0] */ 682 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */ 683 1.4 riastrad mov v25.16b, v31.16b /* q25 := tweak[1] */ 684 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */ 685 1.4 riastrad mov v26.16b, v31.16b /* q26 := tweak[2] */ 686 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */ 687 1.4 riastrad mov v27.16b, v31.16b /* q27 := tweak[3] */ 688 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */ 689 1.4 riastrad mov v28.16b, v31.16b /* q28 := tweak[4] */ 690 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */ 691 1.4 riastrad mov v29.16b, v31.16b /* q29 := tweak[5] */ 692 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */ 693 1.4 riastrad mov v30.16b, v31.16b /* q30 := tweak[6] */ 694 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */ 695 1.4 riastrad /* q31 := tweak[7] */ 696 1.14 riastrad ld1 {v0.16b-v3.16b}, [x1], #0x40 /* q[i] := ptxt[i] */ 697 1.14 riastrad ld1 {v4.16b-v7.16b}, [x1], #0x40 698 1.4 riastrad eor v0.16b, v0.16b, v24.16b /* q[i] := ptxt[i] ^ tweak[i] */ 699 1.4 riastrad eor v1.16b, v1.16b, v25.16b 700 1.4 riastrad eor v2.16b, v2.16b, v26.16b 701 1.4 riastrad eor v3.16b, v3.16b, v27.16b 702 1.4 riastrad eor v4.16b, v4.16b, v28.16b 703 1.4 riastrad eor v5.16b, v5.16b, v29.16b 704 1.4 riastrad eor v6.16b, v6.16b, v30.16b 705 1.4 riastrad eor v7.16b, v7.16b, v31.16b 706 1.1 riastrad mov x0, x9 /* x0 := enckey */ 707 1.1 riastrad mov x3, x5 /* x3 := nrounds */ 708 1.4 riastrad bl aesarmv8_enc8 /* encrypt q0-q7; trash x0/x3/q16 */ 709 1.4 riastrad eor v0.16b, v0.16b, v24.16b /* q[i] := AES(...) ^ tweak[i] */ 710 1.4 riastrad eor v1.16b, v1.16b, v25.16b 711 1.4 riastrad eor v2.16b, v2.16b, v26.16b 712 1.4 riastrad eor v3.16b, v3.16b, v27.16b 713 1.4 riastrad eor v4.16b, v4.16b, v28.16b 714 1.4 riastrad eor v5.16b, v5.16b, v29.16b 715 1.4 riastrad eor v6.16b, v6.16b, v30.16b 716 1.4 riastrad eor v7.16b, v7.16b, v31.16b 717 1.14 riastrad st1 {v0.16b-v3.16b}, [x2], #0x40 /* store ciphertext blocks */ 718 1.14 riastrad st1 {v4.16b-v7.16b}, [x2], #0x40 719 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */ 720 1.1 riastrad subs x10, x10, #0x80 /* count down nbytes */ 721 1.1 riastrad b.ne 1b /* repeat if more block groups */ 722 1.13 riastrad st1 {v31.16b}, [x4] /* update tweak */ 723 1.4 riastrad ldp fp, lr, [sp], #16 /* pop stack frame */ 724 1.1 riastrad ret 725 1.1 riastrad END(aesarmv8_xts_enc8) 726 1.1 riastrad 727 1.1 riastrad /* 728 1.1 riastrad * aesarmv8_xts_dec1(const struct aesdec *deckey@x0, const uint8_t *in@x1, 729 1.1 riastrad * uint8_t *out@x2, size_t nbytes@x3, uint8_t tweak[16] @x4, 730 1.1 riastrad * uint32_t nrounds@x5) 731 1.1 riastrad * 732 1.4 riastrad * Decrypt a contiguous sequdece of blocks with AES-XTS. 733 1.1 riastrad * 734 1.1 riastrad * nbytes must be a positive integral multiple of 16. This routine 735 1.1 riastrad * is not vectorized; use aesarmv8_xts_dec8 for >=8 blocks at once. 736 1.1 riastrad * 737 1.1 riastrad * Standard ABI calling convention. 738 1.1 riastrad */ 739 1.1 riastrad ENTRY(aesarmv8_xts_dec1) 740 1.1 riastrad stp fp, lr, [sp, #-16]! /* push stack frame */ 741 1.1 riastrad mov fp, sp 742 1.1 riastrad mov x9, x0 /* x9 := deckey */ 743 1.1 riastrad mov x10, x3 /* x10 := nbytes */ 744 1.13 riastrad ld1 {v31.16b}, [x4] /* q31 := tweak */ 745 1.9 riastrad _ALIGN_TEXT 746 1.13 riastrad 1: ld1 {v0.16b}, [x1], #0x10 /* q0 := ctxt */ 747 1.1 riastrad mov x0, x9 /* x0 := deckey */ 748 1.1 riastrad mov x3, x5 /* x3 := nrounds */ 749 1.4 riastrad eor v0.16b, v0.16b, v31.16b /* q0 := ctxt ^ tweak */ 750 1.4 riastrad bl aesarmv8_dec1 /* q0 := AES(...); trash x0/x3/q16 */ 751 1.4 riastrad eor v0.16b, v0.16b, v31.16b /* q0 := AES(ctxt ^ tweak) ^ tweak */ 752 1.13 riastrad st1 {v0.16b}, [x2], #0x10 /* store plaintext block */ 753 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */ 754 1.1 riastrad subs x10, x10, #0x10 /* count down nbytes */ 755 1.1 riastrad b.ne 1b /* repeat if more blocks */ 756 1.13 riastrad st1 {v31.16b}, [x4] /* update tweak */ 757 1.1 riastrad ldp fp, lr, [sp], #16 /* pop stack frame */ 758 1.1 riastrad ret 759 1.1 riastrad END(aesarmv8_xts_dec1) 760 1.1 riastrad 761 1.1 riastrad /* 762 1.1 riastrad * aesarmv8_xts_dec8(const struct aesdec *deckey@x0, const uint8_t *in@x1, 763 1.1 riastrad * uint8_t *out@x2, size_t nbytes@x3, uint8_t tweak[16] @x4, 764 1.1 riastrad * uint32_t nrounds@x5) 765 1.1 riastrad * 766 1.4 riastrad * Decrypt a contiguous sequdece of blocks with AES-XTS. 767 1.1 riastrad * 768 1.1 riastrad * nbytes must be a positive integral multiple of 128. 769 1.1 riastrad * 770 1.1 riastrad * Standard ABI calling convention. 771 1.1 riastrad */ 772 1.1 riastrad ENTRY(aesarmv8_xts_dec8) 773 1.4 riastrad stp fp, lr, [sp, #-16]! /* push stack frame */ 774 1.1 riastrad mov fp, sp 775 1.1 riastrad mov x9, x0 /* x9 := deckey */ 776 1.1 riastrad mov x10, x3 /* x10 := nbytes */ 777 1.13 riastrad ld1 {v31.16b}, [x4] /* q31 := tweak */ 778 1.9 riastrad _ALIGN_TEXT 779 1.4 riastrad 1: mov v24.16b, v31.16b /* q24 := tweak[0] */ 780 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */ 781 1.4 riastrad mov v25.16b, v31.16b /* q25 := tweak[1] */ 782 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */ 783 1.4 riastrad mov v26.16b, v31.16b /* q26 := tweak[2] */ 784 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */ 785 1.4 riastrad mov v27.16b, v31.16b /* q27 := tweak[3] */ 786 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */ 787 1.4 riastrad mov v28.16b, v31.16b /* q28 := tweak[4] */ 788 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */ 789 1.4 riastrad mov v29.16b, v31.16b /* q29 := tweak[5] */ 790 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */ 791 1.4 riastrad mov v30.16b, v31.16b /* q30 := tweak[6] */ 792 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */ 793 1.4 riastrad /* q31 := tweak[7] */ 794 1.13 riastrad ld1 {v0.16b-v3.16b}, [x1], #0x40 /* q[i] := ctxt[i] */ 795 1.13 riastrad ld1 {v4.16b-v7.16b}, [x1], #0x40 796 1.4 riastrad eor v0.16b, v0.16b, v24.16b /* q[i] := ctxt[i] ^ tweak[i] */ 797 1.4 riastrad eor v1.16b, v1.16b, v25.16b 798 1.4 riastrad eor v2.16b, v2.16b, v26.16b 799 1.4 riastrad eor v3.16b, v3.16b, v27.16b 800 1.4 riastrad eor v4.16b, v4.16b, v28.16b 801 1.4 riastrad eor v5.16b, v5.16b, v29.16b 802 1.4 riastrad eor v6.16b, v6.16b, v30.16b 803 1.4 riastrad eor v7.16b, v7.16b, v31.16b 804 1.1 riastrad mov x0, x9 /* x0 := deckey */ 805 1.1 riastrad mov x3, x5 /* x3 := nrounds */ 806 1.4 riastrad bl aesarmv8_dec8 /* decrypt q0-q7; trash x0/x3/q16 */ 807 1.4 riastrad eor v0.16b, v0.16b, v24.16b /* q[i] := AES(...) ^ tweak[i] */ 808 1.4 riastrad eor v1.16b, v1.16b, v25.16b 809 1.4 riastrad eor v2.16b, v2.16b, v26.16b 810 1.4 riastrad eor v3.16b, v3.16b, v27.16b 811 1.4 riastrad eor v4.16b, v4.16b, v28.16b 812 1.4 riastrad eor v5.16b, v5.16b, v29.16b 813 1.4 riastrad eor v6.16b, v6.16b, v30.16b 814 1.4 riastrad eor v7.16b, v7.16b, v31.16b 815 1.13 riastrad st1 {v0.16b-v3.16b}, [x2], #0x40 /* store plaintext blocks */ 816 1.13 riastrad st1 {v4.16b-v7.16b}, [x2], #0x40 817 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */ 818 1.1 riastrad subs x10, x10, #0x80 /* count down nbytes */ 819 1.1 riastrad b.ne 1b /* repeat if more block groups */ 820 1.13 riastrad st1 {v31.16b}, [x4] /* update tweak */ 821 1.4 riastrad ldp fp, lr, [sp], #16 /* pop stack frame */ 822 1.1 riastrad ret 823 1.1 riastrad END(aesarmv8_xts_dec8) 824 1.1 riastrad 825 1.1 riastrad /* 826 1.4 riastrad * aesarmv8_xts_mulx(tweak@q31) 827 1.1 riastrad * 828 1.4 riastrad * Multiply q31 by x, modulo x^128 + x^7 + x^2 + x + 1, in place. 829 1.1 riastrad * Uses x0 and q0/q1 as temporaries. 830 1.1 riastrad */ 831 1.1 riastrad .text 832 1.1 riastrad _ALIGN_TEXT 833 1.1 riastrad .type aesarmv8_xts_mulx,@function 834 1.1 riastrad aesarmv8_xts_mulx: 835 1.1 riastrad /* 836 1.1 riastrad * Simultaneously determine 837 1.1 riastrad * (a) whether the high bit of the low half must be 838 1.1 riastrad * shifted into the low bit of the high half, and 839 1.1 riastrad * (b) whether the high bit of the high half must be 840 1.1 riastrad * carried into x^128 = x^7 + x^2 + x + 1. 841 1.1 riastrad */ 842 1.1 riastrad adrl x0, xtscarry 843 1.6 riastrad cmlt v1.2d, v31.2d, #0 /* v1.2d[i] := -1 if v31.2d[i] < 0, else 0 */ 844 1.13 riastrad ld1 {v0.16b}, [x0] /* q0 := xtscarry */ 845 1.1 riastrad ext v1.16b, v1.16b, v1.16b, #8 /* swap halves of q1 */ 846 1.4 riastrad shl v31.2d, v31.2d, #1 /* shift */ 847 1.1 riastrad and v0.16b, v0.16b, v1.16b /* copy xtscarry according to mask */ 848 1.4 riastrad eor v31.16b, v31.16b, v0.16b /* incorporate (a) and (b) */ 849 1.1 riastrad ret 850 1.1 riastrad END(aesarmv8_xts_mulx) 851 1.1 riastrad 852 1.1 riastrad .section .rodata 853 1.2 riastrad .p2align 4 854 1.1 riastrad .type xtscarry,@object 855 1.1 riastrad xtscarry: 856 1.1 riastrad .byte 0x87,0,0,0, 0,0,0,0, 1,0,0,0, 0,0,0,0 857 1.1 riastrad END(xtscarry) 858 1.1 riastrad 859 1.1 riastrad /* 860 1.1 riastrad * aesarmv8_xts_update(const uint8_t in[16] @x0, uint8_t out[16] @x1) 861 1.1 riastrad * 862 1.1 riastrad * Update an AES-XTS tweak. 863 1.1 riastrad * 864 1.1 riastrad * Standard ABI calling convention. 865 1.1 riastrad */ 866 1.1 riastrad ENTRY(aesarmv8_xts_update) 867 1.1 riastrad stp fp, lr, [sp, #-16]! /* push stack frame */ 868 1.1 riastrad mov fp, sp 869 1.13 riastrad ld1 {v31.16b}, [x0] /* load tweak */ 870 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */ 871 1.13 riastrad st1 {v31.16b}, [x1] /* store tweak */ 872 1.1 riastrad ldp fp, lr, [sp], #16 /* pop stack frame */ 873 1.1 riastrad ret 874 1.1 riastrad END(aesarmv8_xts_update) 875 1.1 riastrad 876 1.1 riastrad /* 877 1.8 riastrad * aesarmv8_cbcmac_update1(const struct aesenc *enckey@x0, 878 1.8 riastrad * const uint8_t *in@x1, size_t nbytes@x2, uint8_t auth[16] @x3, 879 1.8 riastrad * uint32_t nrounds@x4) 880 1.8 riastrad * 881 1.8 riastrad * Update CBC-MAC. 882 1.8 riastrad * 883 1.8 riastrad * nbytes must be a positive integral multiple of 16. 884 1.8 riastrad * 885 1.8 riastrad * Standard ABI calling convention. 886 1.8 riastrad */ 887 1.8 riastrad ENTRY(aesarmv8_cbcmac_update1) 888 1.8 riastrad stp fp, lr, [sp, #-16]! /* push stack frame */ 889 1.8 riastrad mov fp, sp 890 1.13 riastrad ld1 {v0.16b}, [x3] /* q0 := initial authenticator */ 891 1.8 riastrad mov x9, x0 /* x9 := enckey */ 892 1.8 riastrad mov x5, x3 /* x5 := &auth (enc1 trashes x3) */ 893 1.9 riastrad _ALIGN_TEXT 894 1.13 riastrad 1: ld1 {v1.16b}, [x1], #0x10 /* q1 := plaintext block */ 895 1.8 riastrad mov x0, x9 /* x0 := enckey */ 896 1.8 riastrad mov x3, x4 /* x3 := nrounds */ 897 1.8 riastrad eor v0.16b, v0.16b, v1.16b /* q0 := auth ^ ptxt */ 898 1.8 riastrad bl aesarmv8_enc1 /* q0 := auth'; trash x0/x3/q16 */ 899 1.8 riastrad subs x2, x2, #0x10 /* count down nbytes */ 900 1.8 riastrad b.ne 1b /* repeat if x10 is nonzero */ 901 1.13 riastrad st1 {v0.16b}, [x5] /* store updated authenticator */ 902 1.8 riastrad ldp fp, lr, [sp], #16 /* pop stack frame */ 903 1.8 riastrad ret 904 1.8 riastrad END(aesarmv8_cbcmac_update1) 905 1.8 riastrad 906 1.8 riastrad /* 907 1.8 riastrad * aesarmv8_ccm_enc1(const struct aesenc *enckey@x0, const uint8_t *in@x1, 908 1.8 riastrad * uint8_t *out@x2, size_t nbytes@x3, uint8_t authctr[32] @x4, 909 1.8 riastrad * uint32_t nrounds@x5) 910 1.8 riastrad * 911 1.8 riastrad * Update CCM encryption. 912 1.8 riastrad * 913 1.8 riastrad * nbytes must be a positive integral multiple of 16. 914 1.8 riastrad * 915 1.8 riastrad * Standard ABI calling convention. 916 1.8 riastrad */ 917 1.8 riastrad ENTRY(aesarmv8_ccm_enc1) 918 1.8 riastrad stp fp, lr, [sp, #-16]! /* push stack frame */ 919 1.8 riastrad mov fp, sp 920 1.15 riastrad ld1 {v0.16b-v1.16b}, [x4] /* q0 := auth, q1 := ctr (be) */ 921 1.8 riastrad adrl x11, ctr32_inc /* x11 := &ctr32_inc */ 922 1.8 riastrad ld1 {v5.4s}, [x11] /* q5 := (0,0,0,1) (host-endian) */ 923 1.8 riastrad mov x9, x0 /* x9 := enckey */ 924 1.8 riastrad mov x10, x3 /* x10 := nbytes */ 925 1.15 riastrad rev32 v2.16b, v1.16b /* q2 := ctr (host-endian) */ 926 1.9 riastrad _ALIGN_TEXT 927 1.13 riastrad 1: ld1 {v3.16b}, [x1], #0x10 /* q3 := plaintext block */ 928 1.8 riastrad add v2.4s, v2.4s, v5.4s /* increment ctr (32-bit) */ 929 1.8 riastrad mov x0, x9 /* x0 := enckey */ 930 1.8 riastrad mov x3, x5 /* x3 := nrounds */ 931 1.8 riastrad rev32 v1.16b, v2.16b /* q1 := ctr (big-endian) */ 932 1.8 riastrad eor v0.16b, v0.16b, v3.16b /* q0 := auth ^ ptxt */ 933 1.8 riastrad bl aesarmv8_enc2 /* q0 := auth', q1 := pad; 934 1.8 riastrad * trash x0/x3/q16 */ 935 1.8 riastrad eor v3.16b, v1.16b, v3.16b /* q3 := ciphertext block */ 936 1.8 riastrad subs x10, x10, #0x10 /* count down bytes */ 937 1.13 riastrad st1 {v3.16b}, [x2], #0x10 /* store ciphertext block */ 938 1.8 riastrad b.ne 1b /* repeat if more blocks */ 939 1.15 riastrad rev32 v1.16b, v2.16b /* q1 := ctr (big-endian) */ 940 1.15 riastrad st1 {v0.16b-v1.16b}, [x4] /* store updated auth/ctr */ 941 1.8 riastrad ldp fp, lr, [sp], #16 /* pop stack frame */ 942 1.8 riastrad ret 943 1.8 riastrad END(aesarmv8_ccm_enc1) 944 1.8 riastrad 945 1.8 riastrad /* 946 1.8 riastrad * aesarmv8_ccm_dec1(const struct aesenc *enckey@x0, const uint8_t *in@x1, 947 1.8 riastrad * uint8_t *out@x2, size_t nbytes@x3, uint8_t authctr[32] @x4, 948 1.8 riastrad * uint32_t nrounds@x5) 949 1.8 riastrad * 950 1.8 riastrad * Update CCM decryption. 951 1.8 riastrad * 952 1.8 riastrad * nbytes must be a positive integral multiple of 16. 953 1.8 riastrad * 954 1.8 riastrad * Standard ABI calling convention. 955 1.8 riastrad */ 956 1.8 riastrad ENTRY(aesarmv8_ccm_dec1) 957 1.8 riastrad stp fp, lr, [sp, #-16]! /* push stack frame */ 958 1.8 riastrad mov fp, sp 959 1.13 riastrad ld1 {v1.16b, v2.16b}, [x4] /* q1 := auth, q2 := ctr (be) */ 960 1.8 riastrad adrl x11, ctr32_inc /* x11 := &ctr32_inc */ 961 1.8 riastrad ld1 {v5.4s}, [x11] /* q5 := (0,0,0,1) (host-endian) */ 962 1.8 riastrad mov x9, x0 /* x9 := enckey */ 963 1.8 riastrad mov x10, x3 /* x10 := nbytes */ 964 1.8 riastrad rev32 v2.16b, v2.16b /* q2 := ctr (host-endian) */ 965 1.8 riastrad 966 1.8 riastrad /* Decrypt the first block. */ 967 1.8 riastrad add v2.4s, v2.4s, v5.4s /* increment ctr (32-bit) */ 968 1.8 riastrad mov x3, x5 /* x3 := nrounds */ 969 1.8 riastrad rev32 v0.16b, v2.16b /* q0 := ctr (big-endian) */ 970 1.13 riastrad ld1 {v3.16b}, [x1], #0x10 /* q3 := ctxt */ 971 1.8 riastrad bl aesarmv8_enc1 /* q0 := pad; trash x0/x3/q16 */ 972 1.8 riastrad b 2f 973 1.8 riastrad 974 1.9 riastrad _ALIGN_TEXT 975 1.8 riastrad 1: /* 976 1.8 riastrad * Authenticate the last block and decrypt the next block 977 1.8 riastrad * simultaneously. 978 1.8 riastrad * 979 1.8 riastrad * q1 = auth ^ ptxt[-1] 980 1.8 riastrad * q2 = ctr[-1] (le) 981 1.8 riastrad */ 982 1.8 riastrad add v2.4s, v2.4s, v5.4s /* increment ctr (32-bit) */ 983 1.8 riastrad mov x0, x9 /* x0 := enckey */ 984 1.8 riastrad mov x3, x5 /* x3 := nrounds */ 985 1.8 riastrad rev32 v0.16b, v2.16b /* q0 := ctr (big-endian) */ 986 1.13 riastrad ld1 {v3.16b}, [x1], #0x10 /* q3 := ctxt */ 987 1.8 riastrad bl aesarmv8_enc2 /* q0 := pad, q1 := auth'; 988 1.8 riastrad * trash x0/x3/q16 */ 989 1.8 riastrad 2: eor v3.16b, v0.16b, v3.16b /* q3 := plaintext block */ 990 1.8 riastrad subs x10, x10, #0x10 991 1.13 riastrad st1 {v3.16b}, [x2], #0x10 /* store plaintext */ 992 1.8 riastrad eor v1.16b, v1.16b, v3.16b /* q1 := auth ^ ptxt */ 993 1.8 riastrad b.ne 1b 994 1.8 riastrad 995 1.8 riastrad rev32 v2.16b, v2.16b /* q2 := ctr (big-endian) */ 996 1.8 riastrad 997 1.8 riastrad /* Authenticate the last block. */ 998 1.8 riastrad mov x0, x9 /* x0 := enckey */ 999 1.8 riastrad mov x3, x5 /* x3 := nrounds */ 1000 1.8 riastrad mov v0.16b, v1.16b /* q0 := auth ^ ptxt */ 1001 1.8 riastrad bl aesarmv8_enc1 /* q0 := auth'; trash x0/x3/q16 */ 1002 1.12 riastrad 1003 1.13 riastrad mov v1.16b, v2.16b /* store updated auth/ctr */ 1004 1.13 riastrad st1 {v0.16b-v1.16b}, [x4] 1005 1.8 riastrad ldp fp, lr, [sp], #16 /* pop stack frame */ 1006 1.8 riastrad ret 1007 1.8 riastrad END(aesarmv8_ccm_dec1) 1008 1.8 riastrad 1009 1.8 riastrad .section .rodata 1010 1.8 riastrad .p2align 4 1011 1.8 riastrad .type ctr32_inc,@object 1012 1.8 riastrad ctr32_inc: 1013 1.8 riastrad .int 0, 0, 0, 1 1014 1.8 riastrad END(ctr32_inc) 1015 1.8 riastrad 1016 1.8 riastrad /* 1017 1.1 riastrad * aesarmv8_enc1(const struct aesenc *enckey@x0, 1018 1.1 riastrad * uint128_t block@q0, uint32_t nrounds@x3) 1019 1.1 riastrad * 1020 1.1 riastrad * Encrypt a single AES block in q0. 1021 1.1 riastrad * 1022 1.4 riastrad * Internal ABI. Uses q16 as temporary. Destroys x0 and x3. 1023 1.1 riastrad */ 1024 1.1 riastrad .text 1025 1.1 riastrad _ALIGN_TEXT 1026 1.1 riastrad .type aesarmv8_enc1,@function 1027 1.1 riastrad aesarmv8_enc1: 1028 1.4 riastrad ldr q16, [x0], #0x10 /* load round key */ 1029 1.10 riastrad sub x3, x3, #1 1030 1.9 riastrad _ALIGN_TEXT 1031 1.10 riastrad 1: /* q0 := MixColumns(ShiftRows(SubBytes(AddRoundKey_q16(q0)))) */ 1032 1.10 riastrad aese v0.16b, v16.16b 1033 1.7 riastrad aesmc v0.16b, v0.16b 1034 1.10 riastrad ldr q16, [x0], #0x10 1035 1.10 riastrad subs x3, x3, #1 1036 1.10 riastrad b.ne 1b 1037 1.4 riastrad /* q0 := ShiftRows(SubBytes(AddRoundKey_q16(q0))) */ 1038 1.4 riastrad aese v0.16b, v16.16b 1039 1.10 riastrad ldr q16, [x0] /* load last round key */ 1040 1.10 riastrad /* q0 := AddRoundKey_q16(q0) */ 1041 1.7 riastrad eor v0.16b, v0.16b, v16.16b 1042 1.1 riastrad ret 1043 1.1 riastrad END(aesarmv8_enc1) 1044 1.1 riastrad 1045 1.1 riastrad /* 1046 1.8 riastrad * aesarmv8_enc2(const struct aesenc *enckey@x0, 1047 1.8 riastrad * uint128_t block@q0, uint128_t block@q1, uint32_t nrounds@x3) 1048 1.8 riastrad * 1049 1.8 riastrad * Encrypt two AES blocks in q0 and q1. 1050 1.8 riastrad * 1051 1.8 riastrad * Internal ABI. Uses q16 as temporary. Destroys x0 and x3. 1052 1.8 riastrad */ 1053 1.8 riastrad .text 1054 1.8 riastrad _ALIGN_TEXT 1055 1.8 riastrad .type aesarmv8_enc2,@function 1056 1.8 riastrad aesarmv8_enc2: 1057 1.8 riastrad ldr q16, [x0], #0x10 /* load round key */ 1058 1.10 riastrad sub x3, x3, #1 1059 1.9 riastrad _ALIGN_TEXT 1060 1.10 riastrad 1: /* q[i] := MixColumns(ShiftRows(SubBytes(AddRoundKey_q16(q[i])))) */ 1061 1.10 riastrad aese v0.16b, v16.16b 1062 1.8 riastrad aesmc v0.16b, v0.16b 1063 1.10 riastrad aese v1.16b, v16.16b 1064 1.8 riastrad aesmc v1.16b, v1.16b 1065 1.10 riastrad ldr q16, [x0], #0x10 /* load next round key */ 1066 1.10 riastrad subs x3, x3, #1 1067 1.10 riastrad b.ne 1b 1068 1.8 riastrad /* q[i] := ShiftRows(SubBytes(AddRoundKey_q16(q[i]))) */ 1069 1.8 riastrad aese v0.16b, v16.16b 1070 1.8 riastrad aese v1.16b, v16.16b 1071 1.10 riastrad ldr q16, [x0] /* load last round key */ 1072 1.10 riastrad /* q[i] := AddRoundKey_q16(q[i]) */ 1073 1.8 riastrad eor v0.16b, v0.16b, v16.16b 1074 1.8 riastrad eor v1.16b, v1.16b, v16.16b 1075 1.8 riastrad ret 1076 1.8 riastrad END(aesarmv8_enc2) 1077 1.8 riastrad 1078 1.8 riastrad /* 1079 1.1 riastrad * aesarmv8_enc8(const struct aesenc *enckey@x0, 1080 1.1 riastrad * uint128_t block0@q0, ..., uint128_t block7@q7, 1081 1.1 riastrad * uint32_t nrounds@x3) 1082 1.1 riastrad * 1083 1.1 riastrad * Encrypt eight AES blocks in q0 through q7 in parallel. 1084 1.1 riastrad * 1085 1.4 riastrad * Internal ABI. Uses q16 as temporary. Destroys x0 and x3. 1086 1.1 riastrad */ 1087 1.1 riastrad .text 1088 1.1 riastrad _ALIGN_TEXT 1089 1.1 riastrad .type aesarmv8_enc8,@function 1090 1.1 riastrad aesarmv8_enc8: 1091 1.4 riastrad ldr q16, [x0], #0x10 /* load round key */ 1092 1.10 riastrad sub x3, x3, #1 1093 1.9 riastrad _ALIGN_TEXT 1094 1.10 riastrad 1: /* q[i] := MixColumns(ShiftRows(SubBytes(AddRoundKey_q16(q[i])))) */ 1095 1.10 riastrad aese v0.16b, v16.16b 1096 1.7 riastrad aesmc v0.16b, v0.16b 1097 1.10 riastrad aese v1.16b, v16.16b 1098 1.7 riastrad aesmc v1.16b, v1.16b 1099 1.10 riastrad aese v2.16b, v16.16b 1100 1.7 riastrad aesmc v2.16b, v2.16b 1101 1.10 riastrad aese v3.16b, v16.16b 1102 1.7 riastrad aesmc v3.16b, v3.16b 1103 1.10 riastrad aese v4.16b, v16.16b 1104 1.7 riastrad aesmc v4.16b, v4.16b 1105 1.10 riastrad aese v5.16b, v16.16b 1106 1.7 riastrad aesmc v5.16b, v5.16b 1107 1.10 riastrad aese v6.16b, v16.16b 1108 1.7 riastrad aesmc v6.16b, v6.16b 1109 1.10 riastrad aese v7.16b, v16.16b 1110 1.7 riastrad aesmc v7.16b, v7.16b 1111 1.10 riastrad ldr q16, [x0], #0x10 /* load next round key */ 1112 1.10 riastrad subs x3, x3, #1 1113 1.10 riastrad b.ne 1b 1114 1.4 riastrad /* q[i] := ShiftRows(SubBytes(AddRoundKey_q16(q[i]))) */ 1115 1.4 riastrad aese v0.16b, v16.16b 1116 1.4 riastrad aese v1.16b, v16.16b 1117 1.4 riastrad aese v2.16b, v16.16b 1118 1.4 riastrad aese v3.16b, v16.16b 1119 1.4 riastrad aese v4.16b, v16.16b 1120 1.4 riastrad aese v5.16b, v16.16b 1121 1.4 riastrad aese v6.16b, v16.16b 1122 1.4 riastrad aese v7.16b, v16.16b 1123 1.10 riastrad ldr q16, [x0] /* load last round key */ 1124 1.10 riastrad /* q[i] := AddRoundKey_q16(q[i]) */ 1125 1.10 riastrad eor v0.16b, v0.16b, v16.16b 1126 1.4 riastrad eor v1.16b, v1.16b, v16.16b 1127 1.4 riastrad eor v2.16b, v2.16b, v16.16b 1128 1.4 riastrad eor v3.16b, v3.16b, v16.16b 1129 1.4 riastrad eor v4.16b, v4.16b, v16.16b 1130 1.4 riastrad eor v5.16b, v5.16b, v16.16b 1131 1.4 riastrad eor v6.16b, v6.16b, v16.16b 1132 1.4 riastrad eor v7.16b, v7.16b, v16.16b 1133 1.1 riastrad ret 1134 1.1 riastrad END(aesarmv8_enc8) 1135 1.1 riastrad 1136 1.1 riastrad /* 1137 1.1 riastrad * aesarmv8_dec1(const struct aesdec *deckey@x0, 1138 1.1 riastrad * uint128_t block@q0, uint32_t nrounds@x3) 1139 1.1 riastrad * 1140 1.1 riastrad * Decrypt a single AES block in q0. 1141 1.1 riastrad * 1142 1.4 riastrad * Internal ABI. Uses q16 as temporary. Destroys x0 and x3. 1143 1.1 riastrad */ 1144 1.1 riastrad .text 1145 1.1 riastrad _ALIGN_TEXT 1146 1.1 riastrad .type aesarmv8_dec1,@function 1147 1.1 riastrad aesarmv8_dec1: 1148 1.4 riastrad ldr q16, [x0], #0x10 /* load round key */ 1149 1.10 riastrad sub x3, x3, #1 1150 1.9 riastrad _ALIGN_TEXT 1151 1.10 riastrad 1: /* q0 := InSubBytes(InShiftRows(AddRoundKey_q16(q0))) */ 1152 1.10 riastrad aesd v0.16b, v16.16b 1153 1.10 riastrad /* q0 := InMixColumns(q0) */ 1154 1.7 riastrad aesimc v0.16b, v0.16b 1155 1.10 riastrad ldr q16, [x0], #0x10 /* load next round key */ 1156 1.10 riastrad subs x3, x3, #1 1157 1.10 riastrad b.ne 1b 1158 1.4 riastrad /* q0 := InSubBytes(InShiftRows(AddRoundKey_q16(q0))) */ 1159 1.4 riastrad aesd v0.16b, v16.16b 1160 1.10 riastrad ldr q16, [x0] /* load last round key */ 1161 1.10 riastrad /* q0 := AddRoundKey_q16(q0) */ 1162 1.7 riastrad eor v0.16b, v0.16b, v16.16b 1163 1.1 riastrad ret 1164 1.1 riastrad END(aesarmv8_dec1) 1165 1.1 riastrad 1166 1.1 riastrad /* 1167 1.1 riastrad * aesarmv8_dec8(const struct aesdec *deckey@x0, 1168 1.1 riastrad * uint128_t block0@q0, ..., uint128_t block7@q7, 1169 1.1 riastrad * uint32_t nrounds@x3) 1170 1.1 riastrad * 1171 1.1 riastrad * Decrypt eight AES blocks in q0 through q7 in parallel. 1172 1.1 riastrad * 1173 1.4 riastrad * Internal ABI. Uses q16 as temporary. Destroys x0 and x3. 1174 1.1 riastrad */ 1175 1.1 riastrad .text 1176 1.1 riastrad _ALIGN_TEXT 1177 1.1 riastrad .type aesarmv8_dec8,@function 1178 1.1 riastrad aesarmv8_dec8: 1179 1.4 riastrad ldr q16, [x0], #0x10 /* load round key */ 1180 1.10 riastrad sub x3, x3, #1 1181 1.9 riastrad _ALIGN_TEXT 1182 1.10 riastrad 1: /* q[i] := InSubBytes(InShiftRows(AddRoundKey_q16(q[i]))) */ 1183 1.10 riastrad aesd v0.16b, v16.16b 1184 1.10 riastrad /* q[i] := InMixColumns(q[i]) */ 1185 1.7 riastrad aesimc v0.16b, v0.16b 1186 1.10 riastrad aesd v1.16b, v16.16b 1187 1.7 riastrad aesimc v1.16b, v1.16b 1188 1.10 riastrad aesd v2.16b, v16.16b 1189 1.7 riastrad aesimc v2.16b, v2.16b 1190 1.10 riastrad aesd v3.16b, v16.16b 1191 1.7 riastrad aesimc v3.16b, v3.16b 1192 1.10 riastrad aesd v4.16b, v16.16b 1193 1.7 riastrad aesimc v4.16b, v4.16b 1194 1.10 riastrad aesd v5.16b, v16.16b 1195 1.7 riastrad aesimc v5.16b, v5.16b 1196 1.10 riastrad aesd v6.16b, v16.16b 1197 1.7 riastrad aesimc v6.16b, v6.16b 1198 1.10 riastrad aesd v7.16b, v16.16b 1199 1.7 riastrad aesimc v7.16b, v7.16b 1200 1.10 riastrad ldr q16, [x0], #0x10 /* load next round key */ 1201 1.10 riastrad subs x3, x3, #1 1202 1.10 riastrad b.ne 1b 1203 1.4 riastrad /* q[i] := InSubBytes(InShiftRows(AddRoundKey_q16(q[i]))) */ 1204 1.4 riastrad aesd v0.16b, v16.16b 1205 1.4 riastrad aesd v1.16b, v16.16b 1206 1.4 riastrad aesd v2.16b, v16.16b 1207 1.4 riastrad aesd v3.16b, v16.16b 1208 1.4 riastrad aesd v4.16b, v16.16b 1209 1.4 riastrad aesd v5.16b, v16.16b 1210 1.4 riastrad aesd v6.16b, v16.16b 1211 1.4 riastrad aesd v7.16b, v16.16b 1212 1.10 riastrad ldr q16, [x0] /* load last round key */ 1213 1.10 riastrad /* q[i] := AddRoundKey_q16(q[i]) */ 1214 1.10 riastrad eor v0.16b, v0.16b, v16.16b 1215 1.4 riastrad eor v1.16b, v1.16b, v16.16b 1216 1.4 riastrad eor v2.16b, v2.16b, v16.16b 1217 1.4 riastrad eor v3.16b, v3.16b, v16.16b 1218 1.4 riastrad eor v4.16b, v4.16b, v16.16b 1219 1.4 riastrad eor v5.16b, v5.16b, v16.16b 1220 1.4 riastrad eor v6.16b, v6.16b, v16.16b 1221 1.4 riastrad eor v7.16b, v7.16b, v16.16b 1222 1.1 riastrad ret 1223 1.1 riastrad END(aesarmv8_dec8) 1224