aes_armv8_64.S revision 1.11 1 1.11 riastrad /* $NetBSD: aes_armv8_64.S,v 1.11 2020/07/27 20:57:23 riastradh Exp $ */
2 1.1 riastrad
3 1.1 riastrad /*-
4 1.1 riastrad * Copyright (c) 2020 The NetBSD Foundation, Inc.
5 1.1 riastrad * All rights reserved.
6 1.1 riastrad *
7 1.1 riastrad * Redistribution and use in source and binary forms, with or without
8 1.1 riastrad * modification, are permitted provided that the following conditions
9 1.1 riastrad * are met:
10 1.1 riastrad * 1. Redistributions of source code must retain the above copyright
11 1.1 riastrad * notice, this list of conditions and the following disclaimer.
12 1.1 riastrad * 2. Redistributions in binary form must reproduce the above copyright
13 1.1 riastrad * notice, this list of conditions and the following disclaimer in the
14 1.1 riastrad * documentation and/or other materials provided with the distribution.
15 1.1 riastrad *
16 1.1 riastrad * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 1.1 riastrad * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 1.1 riastrad * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 1.1 riastrad * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 1.1 riastrad * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 1.1 riastrad * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 1.1 riastrad * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 1.1 riastrad * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 1.1 riastrad * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 1.1 riastrad * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 1.1 riastrad * POSSIBILITY OF SUCH DAMAGE.
27 1.1 riastrad */
28 1.1 riastrad
29 1.8 riastrad #include <sys/endian.h>
30 1.8 riastrad
31 1.1 riastrad #include <aarch64/asm.h>
32 1.1 riastrad
33 1.11 riastrad RCSID("$NetBSD: aes_armv8_64.S,v 1.11 2020/07/27 20:57:23 riastradh Exp $")
34 1.11 riastrad
35 1.3 riastrad .arch_extension aes
36 1.1 riastrad
37 1.1 riastrad /*
38 1.1 riastrad * uint32_t rcon[10]
39 1.1 riastrad *
40 1.1 riastrad * Table mapping n ---> x^n mod (x^8 + x^4 + x^3 + x + 1) in GF(2).
41 1.1 riastrad * Such elements of GF(8) need only eight bits to be represented,
42 1.1 riastrad * but we store them in 4-byte units so we can copy one into all
43 1.1 riastrad * four 4-byte lanes of a vector register with a single LD1R. The
44 1.1 riastrad * access pattern is fixed, so indices into this table are never
45 1.1 riastrad * secret.
46 1.1 riastrad */
47 1.1 riastrad .section .rodata
48 1.2 riastrad .p2align 2
49 1.1 riastrad .type rcon,@object
50 1.1 riastrad rcon:
51 1.1 riastrad .long 0x01
52 1.1 riastrad .long 0x02
53 1.1 riastrad .long 0x04
54 1.1 riastrad .long 0x08
55 1.1 riastrad .long 0x10
56 1.1 riastrad .long 0x20
57 1.1 riastrad .long 0x40
58 1.1 riastrad .long 0x80
59 1.1 riastrad .long 0x1b
60 1.1 riastrad .long 0x36
61 1.1 riastrad END(rcon)
62 1.1 riastrad
63 1.1 riastrad /*
64 1.1 riastrad * uint128_t unshiftrows_rotword_1
65 1.1 riastrad *
66 1.1 riastrad * Table for TBL instruction to undo ShiftRows, and then do
67 1.1 riastrad * RotWord on word 1, and then copy it into all the other words.
68 1.1 riastrad */
69 1.1 riastrad .section .rodata
70 1.2 riastrad .p2align 4
71 1.1 riastrad .type unshiftrows_rotword_1,@object
72 1.1 riastrad unshiftrows_rotword_1:
73 1.1 riastrad .byte 0x01,0x0e,0x0b,0x04
74 1.1 riastrad .byte 0x01,0x0e,0x0b,0x04
75 1.1 riastrad .byte 0x01,0x0e,0x0b,0x04
76 1.1 riastrad .byte 0x01,0x0e,0x0b,0x04
77 1.1 riastrad END(unshiftrows_rotword_1)
78 1.1 riastrad
79 1.1 riastrad /*
80 1.1 riastrad * uint128_t unshiftrows_3
81 1.1 riastrad *
82 1.1 riastrad * Table for TBL instruction to undo ShiftRows, and then copy word
83 1.1 riastrad * 3 into all the other words.
84 1.1 riastrad */
85 1.1 riastrad .section .rodata
86 1.2 riastrad .p2align 4
87 1.1 riastrad .type unshiftrows_3,@object
88 1.1 riastrad unshiftrows_3:
89 1.1 riastrad .byte 0x0c,0x09,0x06,0x03
90 1.1 riastrad .byte 0x0c,0x09,0x06,0x03
91 1.1 riastrad .byte 0x0c,0x09,0x06,0x03
92 1.1 riastrad .byte 0x0c,0x09,0x06,0x03
93 1.1 riastrad END(unshiftrows_3)
94 1.1 riastrad
95 1.1 riastrad /*
96 1.1 riastrad * uint128_t unshiftrows_rotword_3
97 1.1 riastrad *
98 1.1 riastrad * Table for TBL instruction to undo ShiftRows, and then do
99 1.1 riastrad * RotWord on word 3, and then copy it into all the other words.
100 1.1 riastrad */
101 1.1 riastrad .section .rodata
102 1.2 riastrad .p2align 4
103 1.1 riastrad .type unshiftrows_rotword_3,@object
104 1.1 riastrad unshiftrows_rotword_3:
105 1.1 riastrad .byte 0x09,0x06,0x03,0x0c
106 1.1 riastrad .byte 0x09,0x06,0x03,0x0c
107 1.1 riastrad .byte 0x09,0x06,0x03,0x0c
108 1.1 riastrad .byte 0x09,0x06,0x03,0x0c
109 1.1 riastrad END(unshiftrows_rotword_3)
110 1.1 riastrad
111 1.1 riastrad /*
112 1.1 riastrad * aesarmv8_setenckey128(struct aesenc *enckey@x0, const uint8_t key[16] @x1)
113 1.1 riastrad *
114 1.1 riastrad * Expand a 16-byte AES-128 key into 10 round keys.
115 1.1 riastrad *
116 1.1 riastrad * Standard ABI calling convention.
117 1.1 riastrad */
118 1.1 riastrad ENTRY(aesarmv8_setenckey128)
119 1.1 riastrad ldr q1, [x1] /* q1 := master key */
120 1.1 riastrad
121 1.1 riastrad adrl x4, unshiftrows_rotword_3
122 1.1 riastrad eor v0.16b, v0.16b, v0.16b /* q0 := 0 */
123 1.4 riastrad ldr q16, [x4] /* q16 := unshiftrows_rotword_3 table */
124 1.1 riastrad
125 1.1 riastrad str q1, [x0], #0x10 /* store master key as first round key */
126 1.1 riastrad mov x2, #10 /* round count */
127 1.1 riastrad adrl x3, rcon /* round constant */
128 1.1 riastrad
129 1.1 riastrad 1: /*
130 1.1 riastrad * q0 = 0
131 1.1 riastrad * v1.4s = (prk[0], prk[1], prk[2], prk[3])
132 1.1 riastrad * x0 = pointer to round key to compute
133 1.1 riastrad * x2 = round count
134 1.1 riastrad * x3 = rcon pointer
135 1.1 riastrad */
136 1.1 riastrad
137 1.1 riastrad /* q3 := ShiftRows(SubBytes(q1)) */
138 1.1 riastrad mov v3.16b, v1.16b
139 1.1 riastrad aese v3.16b, v0.16b
140 1.1 riastrad
141 1.1 riastrad /* v3.4s[i] := RotWords(SubBytes(prk[3])) ^ RCON */
142 1.1 riastrad ld1r {v4.4s}, [x3], #4
143 1.4 riastrad tbl v3.16b, {v3.16b}, v16.16b
144 1.1 riastrad eor v3.16b, v3.16b, v4.16b
145 1.1 riastrad
146 1.1 riastrad /*
147 1.1 riastrad * v5.4s := (0,prk[0],prk[1],prk[2])
148 1.1 riastrad * v6.4s := (0,0,prk[0],prk[1])
149 1.1 riastrad * v7.4s := (0,0,0,prk[0])
150 1.1 riastrad */
151 1.1 riastrad ext v5.16b, v0.16b, v1.16b, #12
152 1.1 riastrad ext v6.16b, v0.16b, v1.16b, #8
153 1.1 riastrad ext v7.16b, v0.16b, v1.16b, #4
154 1.1 riastrad
155 1.1 riastrad /* v1.4s := (rk[0], rk[1], rk[2], rk[3]) */
156 1.1 riastrad eor v1.16b, v1.16b, v3.16b
157 1.1 riastrad eor v1.16b, v1.16b, v5.16b
158 1.1 riastrad eor v1.16b, v1.16b, v6.16b
159 1.1 riastrad eor v1.16b, v1.16b, v7.16b
160 1.1 riastrad
161 1.1 riastrad subs x2, x2, #1 /* count down rounds */
162 1.1 riastrad str q1, [x0], #0x10 /* store round key */
163 1.1 riastrad b.ne 1b
164 1.1 riastrad
165 1.1 riastrad ret
166 1.1 riastrad END(aesarmv8_setenckey128)
167 1.1 riastrad
168 1.1 riastrad /*
169 1.1 riastrad * aesarmv8_setenckey192(struct aesenc *enckey@x0, const uint8_t key[24] @x1)
170 1.1 riastrad *
171 1.1 riastrad * Expand a 24-byte AES-192 key into 12 round keys.
172 1.1 riastrad *
173 1.1 riastrad * Standard ABI calling convention.
174 1.1 riastrad */
175 1.1 riastrad ENTRY(aesarmv8_setenckey192)
176 1.1 riastrad ldr q1, [x1], #0x10 /* q1 := master key[0:128) */
177 1.1 riastrad ldr d2, [x1] /* d2 := master key[128:192) */
178 1.1 riastrad
179 1.1 riastrad adrl x4, unshiftrows_rotword_1
180 1.1 riastrad adrl x5, unshiftrows_rotword_3
181 1.1 riastrad eor v0.16b, v0.16b, v0.16b /* q0 := 0 */
182 1.4 riastrad ldr q16, [x4] /* q16 := unshiftrows_rotword_1 */
183 1.4 riastrad ldr q17, [x5] /* q17 := unshiftrows_rotword_3 */
184 1.1 riastrad
185 1.1 riastrad str q1, [x0], #0x10 /* store master key[0:128) as round key */
186 1.1 riastrad mov x2, #12 /* round count */
187 1.1 riastrad adrl x3, rcon /* round constant */
188 1.1 riastrad
189 1.1 riastrad 1: /*
190 1.1 riastrad * q0 = 0
191 1.1 riastrad * v1.4s = (prk[0], prk[1], prk[2], prk[3])
192 1.1 riastrad * v2.4s = (rklo[0], rklo[1], xxx, xxx)
193 1.1 riastrad * x0 = pointer to three round keys to compute
194 1.1 riastrad * x2 = round count
195 1.1 riastrad * x3 = rcon pointer
196 1.1 riastrad */
197 1.1 riastrad
198 1.1 riastrad /* q3 := ShiftRows(SubBytes(q2)) */
199 1.1 riastrad mov v3.16b, v2.16b
200 1.1 riastrad aese v3.16b, v0.16b
201 1.1 riastrad
202 1.1 riastrad /* v3.4s[i] := RotWords(SubBytes(rklo[1])) ^ RCON */
203 1.1 riastrad ld1r {v4.4s}, [x3], #4
204 1.4 riastrad tbl v3.16b, {v3.16b}, v16.16b
205 1.1 riastrad eor v3.16b, v3.16b, v4.16b
206 1.1 riastrad
207 1.1 riastrad /*
208 1.1 riastrad * We need to compute:
209 1.1 riastrad *
210 1.1 riastrad * rk[0] := rklo[0]
211 1.1 riastrad * rk[1] := rklo[1]
212 1.1 riastrad * rk[2] := Rot(Sub(rklo[1])) ^ RCON ^ prk[0]
213 1.1 riastrad * rk[3] := Rot(Sub(rklo[1])) ^ RCON ^ prk[0] ^ prk[1]
214 1.1 riastrad * nrk[0] := Rot(Sub(rklo[1])) ^ RCON ^ prk[0] ^ prk[1] ^ prk[2]
215 1.1 riastrad * nrk[1] := Rot(Sub(rklo[1])) ^ RCON ^ prk[0] ^ ... ^ prk[3]
216 1.1 riastrad * nrk[2] := Rot(Sub(rklo[1])) ^ RCON ^ prk[0] ^ ... ^ prk[3] ^ rklo[0]
217 1.1 riastrad * nrk[3] := Rot(Sub(rklo[1])) ^ RCON ^ prk[0] ^ ... ^ prk[3] ^ rklo[0]
218 1.1 riastrad * ^ rklo[1]
219 1.1 riastrad */
220 1.1 riastrad
221 1.1 riastrad /*
222 1.1 riastrad * v5.4s := (0,prk[0],prk[1],prk[2])
223 1.1 riastrad * v6.4s := (0,0,prk[0],prk[1])
224 1.1 riastrad * v7.4s := (0,0,0,prk[0])
225 1.1 riastrad */
226 1.1 riastrad ext v5.16b, v0.16b, v1.16b, #12
227 1.1 riastrad ext v6.16b, v0.16b, v1.16b, #8
228 1.1 riastrad ext v7.16b, v0.16b, v1.16b, #4
229 1.1 riastrad
230 1.1 riastrad /* v5.4s := (rk[2], rk[3], nrk[0], nrk[1]) */
231 1.1 riastrad eor v5.16b, v5.16b, v1.16b
232 1.1 riastrad eor v5.16b, v5.16b, v3.16b
233 1.1 riastrad eor v5.16b, v5.16b, v6.16b
234 1.1 riastrad eor v5.16b, v5.16b, v7.16b
235 1.1 riastrad
236 1.1 riastrad /*
237 1.1 riastrad * At this point, rk is split across v2.4s = (rk[0],rk[1],...)
238 1.1 riastrad * and v5.4s = (rk[2],rk[3],...); nrk is in v5.4s =
239 1.1 riastrad * (...,nrk[0],nrk[1]); and we have yet to compute nrk[2] or
240 1.1 riastrad * nrk[3], which requires rklo[0] and rklo[1] in v2.4s =
241 1.1 riastrad * (rklo[0],rklo[1],...).
242 1.1 riastrad */
243 1.1 riastrad
244 1.1 riastrad /* v1.4s := (nrk[0], nrk[1], nrk[1], nrk[1]) */
245 1.5 ryo dup v1.4s, v5.s[3]
246 1.5 ryo mov v1.s[0], v5.s[2]
247 1.1 riastrad
248 1.1 riastrad /*
249 1.1 riastrad * v6.4s := (0, 0, rklo[0], rklo[1])
250 1.1 riastrad * v7.4s := (0, 0, 0, rklo[0])
251 1.1 riastrad */
252 1.1 riastrad ext v6.16b, v0.16b, v2.16b, #8
253 1.1 riastrad ext v7.16b, v0.16b, v2.16b, #4
254 1.1 riastrad
255 1.1 riastrad /* v3.4s := (nrk[0], nrk[1], nrk[2], nrk[3]) */
256 1.1 riastrad eor v3.16b, v1.16b, v6.16b
257 1.1 riastrad eor v3.16b, v3.16b, v7.16b
258 1.1 riastrad
259 1.1 riastrad /*
260 1.1 riastrad * Recall v2.4s = (rk[0], rk[1], xxx, xxx)
261 1.1 riastrad * and v5.4s = (rk[2], rk[3], xxx, xxx). Set
262 1.1 riastrad * v2.4s := (rk[0], rk[1], rk[2], rk[3])
263 1.1 riastrad */
264 1.5 ryo mov v2.d[1], v5.d[0]
265 1.1 riastrad
266 1.1 riastrad /* store two round keys */
267 1.1 riastrad stp q2, q3, [x0], #0x20
268 1.1 riastrad
269 1.1 riastrad /*
270 1.1 riastrad * Live vector registers at this point:
271 1.1 riastrad *
272 1.1 riastrad * q0 = zero
273 1.1 riastrad * q2 = rk
274 1.1 riastrad * q3 = nrk
275 1.1 riastrad * v5.4s = (rk[2], rk[3], nrk[0], nrk[1])
276 1.4 riastrad * q16 = unshiftrows_rotword_1
277 1.4 riastrad * q17 = unshiftrows_rotword_3
278 1.1 riastrad *
279 1.1 riastrad * We have to compute, in q1:
280 1.1 riastrad *
281 1.1 riastrad * nnrk[0] := Rot(Sub(nrk[3])) ^ RCON' ^ rk[2]
282 1.1 riastrad * nnrk[1] := Rot(Sub(nrk[3])) ^ RCON' ^ rk[2] ^ rk[3]
283 1.1 riastrad * nnrk[2] := Rot(Sub(nrk[3])) ^ RCON' ^ rk[2] ^ rk[3] ^ nrk[0]
284 1.1 riastrad * nnrk[3] := Rot(Sub(nrk[3])) ^ RCON' ^ rk[2] ^ rk[3] ^ nrk[0]
285 1.1 riastrad * ^ nrk[1]
286 1.1 riastrad *
287 1.1 riastrad * And, if there's any more afterward, in q2:
288 1.1 riastrad *
289 1.1 riastrad * nnnrklo[0] := Rot(Sub(nrk[3])) ^ RCON' ^ rk[2] ^ rk[3] ^ nrk[0]
290 1.1 riastrad * ^ nrk[1] ^ nrk[2]
291 1.1 riastrad * nnnrklo[1] := Rot(Sub(nrk[3])) ^ RCON' ^ rk[2] ^ rk[3] ^ nrk[0]
292 1.1 riastrad * ^ nrk[1] ^ nrk[2] ^ nrk[3]
293 1.1 riastrad */
294 1.1 riastrad
295 1.1 riastrad /* q1 := RotWords(SubBytes(q3)) */
296 1.1 riastrad mov v1.16b, v3.16b
297 1.1 riastrad aese v1.16b, v0.16b
298 1.1 riastrad
299 1.1 riastrad /* v1.4s[i] := RotWords(SubBytes(nrk[3])) ^ RCON' */
300 1.1 riastrad ld1r {v4.4s}, [x3], #4
301 1.4 riastrad tbl v1.16b, {v1.16b}, v17.16b
302 1.1 riastrad eor v1.16b, v1.16b, v4.16b
303 1.1 riastrad
304 1.1 riastrad /*
305 1.1 riastrad * v5.4s := (rk[2], rk[3], nrk[0], nrk[1]) [already]
306 1.1 riastrad * v4.4s := (0, rk[2], rk[3], nrk[0])
307 1.1 riastrad * v6.4s := (0, 0, rk[2], rk[3])
308 1.1 riastrad * v7.4s := (0, 0, 0, rk[2])
309 1.1 riastrad */
310 1.1 riastrad ext v4.16b, v0.16b, v5.16b, #12
311 1.1 riastrad ext v6.16b, v0.16b, v5.16b, #8
312 1.1 riastrad ext v7.16b, v0.16b, v5.16b, #4
313 1.1 riastrad
314 1.1 riastrad /* v1.4s := (nnrk[0], nnrk[1], nnrk[2], nnrk[3]) */
315 1.1 riastrad eor v1.16b, v1.16b, v5.16b
316 1.1 riastrad eor v1.16b, v1.16b, v4.16b
317 1.1 riastrad eor v1.16b, v1.16b, v6.16b
318 1.1 riastrad eor v1.16b, v1.16b, v7.16b
319 1.1 riastrad
320 1.1 riastrad subs x2, x2, #3 /* count down three rounds */
321 1.1 riastrad str q1, [x0], #0x10 /* store third round key */
322 1.1 riastrad b.eq 2f
323 1.1 riastrad
324 1.1 riastrad /*
325 1.1 riastrad * v4.4s := (nrk[2], nrk[3], xxx, xxx)
326 1.1 riastrad * v5.4s := (0, nrk[2], xxx, xxx)
327 1.1 riastrad */
328 1.1 riastrad ext v4.16b, v3.16b, v0.16b, #8
329 1.1 riastrad ext v5.16b, v0.16b, v4.16b, #12
330 1.1 riastrad
331 1.1 riastrad /* v2.4s := (nnrk[3], nnrk[3], xxx, xxx) */
332 1.5 ryo dup v2.4s, v1.s[3]
333 1.1 riastrad
334 1.1 riastrad /*
335 1.1 riastrad * v2.4s := (nnnrklo[0] = nnrk[3] ^ nrk[2],
336 1.1 riastrad * nnnrklo[1] = nnrk[3] ^ nrk[2] ^ nrk[3],
337 1.1 riastrad * xxx, xxx)
338 1.1 riastrad */
339 1.1 riastrad eor v2.16b, v2.16b, v4.16b
340 1.1 riastrad eor v2.16b, v2.16b, v5.16b
341 1.1 riastrad
342 1.1 riastrad b 1b
343 1.1 riastrad
344 1.1 riastrad 2: ret
345 1.1 riastrad END(aesarmv8_setenckey192)
346 1.1 riastrad
347 1.1 riastrad /*
348 1.1 riastrad * aesarmv8_setenckey256(struct aesenc *enckey@x0, const uint8_t key[32] @x1)
349 1.1 riastrad *
350 1.1 riastrad * Expand a 32-byte AES-256 key into 14 round keys.
351 1.1 riastrad *
352 1.1 riastrad * Standard ABI calling convention.
353 1.1 riastrad */
354 1.1 riastrad ENTRY(aesarmv8_setenckey256)
355 1.1 riastrad /* q1 := key[0:128), q2 := key[128:256) */
356 1.1 riastrad ldp q1, q2, [x1], #0x20
357 1.1 riastrad
358 1.1 riastrad adrl x4, unshiftrows_rotword_3
359 1.1 riastrad adrl x5, unshiftrows_3
360 1.1 riastrad eor v0.16b, v0.16b, v0.16b /* q0 := 0 */
361 1.4 riastrad ldr q16, [x4] /* q16 := unshiftrows_rotword_3 */
362 1.4 riastrad ldr q17, [x5] /* q17 := unshiftrows_3 */
363 1.1 riastrad
364 1.1 riastrad /* store master key as first two round keys */
365 1.1 riastrad stp q1, q2, [x0], #0x20
366 1.1 riastrad mov x2, #14 /* round count */
367 1.1 riastrad adrl x3, rcon /* round constant */
368 1.1 riastrad
369 1.1 riastrad 1: /*
370 1.1 riastrad * q0 = 0
371 1.1 riastrad * v1.4s = (pprk[0], pprk[1], pprk[2], pprk[3])
372 1.1 riastrad * v2.4s = (prk[0], prk[1], prk[2], prk[3])
373 1.1 riastrad * x2 = round count
374 1.1 riastrad * x3 = rcon pointer
375 1.1 riastrad */
376 1.1 riastrad
377 1.1 riastrad /* q3 := ShiftRows(SubBytes(q2)) */
378 1.1 riastrad mov v3.16b, v2.16b
379 1.1 riastrad aese v3.16b, v0.16b
380 1.1 riastrad
381 1.1 riastrad /* v3.4s[i] := RotWords(SubBytes(prk[3])) ^ RCON */
382 1.1 riastrad ld1r {v4.4s}, [x3], #4
383 1.4 riastrad tbl v3.16b, {v3.16b}, v16.16b
384 1.1 riastrad eor v3.16b, v3.16b, v4.16b
385 1.1 riastrad
386 1.1 riastrad /*
387 1.1 riastrad * v5.4s := (0,pprk[0],pprk[1],pprk[2])
388 1.1 riastrad * v6.4s := (0,0,pprk[0],pprk[1])
389 1.1 riastrad * v7.4s := (0,0,0,pprk[0])
390 1.1 riastrad */
391 1.1 riastrad ext v5.16b, v0.16b, v1.16b, #12
392 1.1 riastrad ext v6.16b, v0.16b, v1.16b, #8
393 1.1 riastrad ext v7.16b, v0.16b, v1.16b, #4
394 1.1 riastrad
395 1.1 riastrad /* v1.4s := (rk[0], rk[1], rk[2], rk[3]) */
396 1.1 riastrad eor v1.16b, v1.16b, v3.16b
397 1.1 riastrad eor v1.16b, v1.16b, v5.16b
398 1.1 riastrad eor v1.16b, v1.16b, v6.16b
399 1.1 riastrad eor v1.16b, v1.16b, v7.16b
400 1.1 riastrad
401 1.1 riastrad subs x2, x2, #2 /* count down two rounds */
402 1.1 riastrad b.eq 2f /* stop if this is the last one */
403 1.1 riastrad
404 1.1 riastrad /* q3 := ShiftRows(SubBytes(q1)) */
405 1.1 riastrad mov v3.16b, v1.16b
406 1.1 riastrad aese v3.16b, v0.16b
407 1.1 riastrad
408 1.1 riastrad /* v3.4s[i] := SubBytes(rk[3]) */
409 1.4 riastrad tbl v3.16b, {v3.16b}, v17.16b
410 1.1 riastrad
411 1.1 riastrad /*
412 1.1 riastrad * v5.4s := (0,prk[0],prk[1],prk[2])
413 1.1 riastrad * v6.4s := (0,0,prk[0],prk[1])
414 1.1 riastrad * v7.4s := (0,0,0,prk[0])
415 1.1 riastrad */
416 1.1 riastrad ext v5.16b, v0.16b, v2.16b, #12
417 1.1 riastrad ext v6.16b, v0.16b, v2.16b, #8
418 1.1 riastrad ext v7.16b, v0.16b, v2.16b, #4
419 1.1 riastrad
420 1.1 riastrad /* v2.4s := (nrk[0], nrk[1], nrk[2], nrk[3]) */
421 1.1 riastrad eor v2.16b, v2.16b, v3.16b
422 1.1 riastrad eor v2.16b, v2.16b, v5.16b
423 1.1 riastrad eor v2.16b, v2.16b, v6.16b
424 1.1 riastrad eor v2.16b, v2.16b, v7.16b
425 1.1 riastrad
426 1.1 riastrad stp q1, q2, [x0], #0x20 /* store two round keys */
427 1.1 riastrad b 1b
428 1.1 riastrad
429 1.1 riastrad 2: str q1, [x0] /* store last round key */
430 1.1 riastrad ret
431 1.1 riastrad END(aesarmv8_setenckey256)
432 1.1 riastrad
433 1.1 riastrad /*
434 1.1 riastrad * aesarmv8_enctodec(const struct aesenc *enckey@x0, struct aesdec *deckey@x1,
435 1.1 riastrad * uint32_t nrounds@x2)
436 1.1 riastrad *
437 1.1 riastrad * Convert AES encryption round keys to AES decryption round keys.
438 1.1 riastrad * `rounds' must be between 10 and 14.
439 1.1 riastrad *
440 1.1 riastrad * Standard ABI calling convention.
441 1.1 riastrad */
442 1.1 riastrad ENTRY(aesarmv8_enctodec)
443 1.1 riastrad ldr q0, [x0, x2, lsl #4] /* load last round key */
444 1.7 riastrad b 2f
445 1.9 riastrad _ALIGN_TEXT
446 1.7 riastrad 1: aesimc v0.16b, v0.16b /* convert encryption to decryption */
447 1.7 riastrad 2: str q0, [x1], #0x10 /* store round key */
448 1.1 riastrad subs x2, x2, #1 /* count down round */
449 1.1 riastrad ldr q0, [x0, x2, lsl #4] /* load previous round key */
450 1.7 riastrad b.ne 1b /* repeat if there's more */
451 1.7 riastrad str q0, [x1] /* store first round key verbatim */
452 1.1 riastrad ret
453 1.1 riastrad END(aesarmv8_enctodec)
454 1.1 riastrad
455 1.1 riastrad /*
456 1.1 riastrad * aesarmv8_enc(const struct aesenc *enckey@x0, const uint8_t in[16] @x1,
457 1.1 riastrad * uint8_t out[16] @x2, uint32_t nrounds@x3)
458 1.1 riastrad *
459 1.1 riastrad * Encrypt a single block.
460 1.1 riastrad *
461 1.1 riastrad * Standard ABI calling convention.
462 1.1 riastrad */
463 1.1 riastrad ENTRY(aesarmv8_enc)
464 1.1 riastrad stp fp, lr, [sp, #-16]! /* push stack frame */
465 1.1 riastrad mov fp, sp
466 1.4 riastrad ldr q0, [x1] /* q0 := ptxt */
467 1.4 riastrad bl aesarmv8_enc1 /* q0 := ctxt; trash x0/x3/q16 */
468 1.4 riastrad str q0, [x2] /* store ctxt */
469 1.1 riastrad ldp fp, lr, [sp], #16 /* pop stack frame */
470 1.1 riastrad ret
471 1.1 riastrad END(aesarmv8_enc)
472 1.1 riastrad
473 1.1 riastrad /*
474 1.1 riastrad * aesarmv8_dec(const struct aesdec *deckey@x0, const uint8_t in[16] @x1,
475 1.1 riastrad * uint8_t out[16] @x2, uint32_t nrounds@x3)
476 1.1 riastrad *
477 1.1 riastrad * Decrypt a single block.
478 1.1 riastrad *
479 1.1 riastrad * Standard ABI calling convention.
480 1.1 riastrad */
481 1.1 riastrad ENTRY(aesarmv8_dec)
482 1.1 riastrad stp fp, lr, [sp, #-16]! /* push stack frame */
483 1.1 riastrad mov fp, sp
484 1.4 riastrad ldr q0, [x1] /* q0 := ctxt */
485 1.4 riastrad bl aesarmv8_dec1 /* q0 := ptxt; trash x0/x3/q16 */
486 1.4 riastrad str q0, [x2] /* store ptxt */
487 1.1 riastrad ldp fp, lr, [sp], #16 /* pop stack frame */
488 1.1 riastrad ret
489 1.1 riastrad END(aesarmv8_dec)
490 1.1 riastrad
491 1.1 riastrad /*
492 1.1 riastrad * aesarmv8_cbc_enc(const struct aesenc *enckey@x0, const uint8_t *in@x1,
493 1.1 riastrad * uint8_t *out@x2, size_t nbytes@x3, uint8_t iv[16] @x4,
494 1.1 riastrad * uint32_t nrounds@x5)
495 1.1 riastrad *
496 1.1 riastrad * Encrypt a contiguous sequence of blocks with AES-CBC.
497 1.1 riastrad *
498 1.1 riastrad * nbytes must be an integral multiple of 16.
499 1.1 riastrad *
500 1.1 riastrad * Standard ABI calling convention.
501 1.1 riastrad */
502 1.1 riastrad ENTRY(aesarmv8_cbc_enc)
503 1.1 riastrad cbz x3, 2f /* stop if nothing to do */
504 1.1 riastrad stp fp, lr, [sp, #-16]! /* push stack frame */
505 1.1 riastrad mov fp, sp
506 1.1 riastrad mov x9, x0 /* x9 := enckey */
507 1.1 riastrad mov x10, x3 /* x10 := nbytes */
508 1.1 riastrad ldr q0, [x4] /* q0 := chaining value */
509 1.9 riastrad _ALIGN_TEXT
510 1.1 riastrad 1: ldr q1, [x1], #0x10 /* q1 := plaintext block */
511 1.1 riastrad eor v0.16b, v0.16b, v1.16b /* q0 := cv ^ ptxt */
512 1.1 riastrad mov x0, x9 /* x0 := enckey */
513 1.1 riastrad mov x3, x5 /* x3 := nrounds */
514 1.4 riastrad bl aesarmv8_enc1 /* q0 := ctxt; trash x0/x3/q16 */
515 1.1 riastrad subs x10, x10, #0x10 /* count down nbytes */
516 1.1 riastrad str q0, [x2], #0x10 /* store ciphertext block */
517 1.1 riastrad b.ne 1b /* repeat if x10 is nonzero */
518 1.1 riastrad str q0, [x4] /* store chaining value */
519 1.1 riastrad ldp fp, lr, [sp], #16 /* pop stack frame */
520 1.1 riastrad 2: ret
521 1.1 riastrad END(aesarmv8_cbc_enc)
522 1.1 riastrad
523 1.1 riastrad /*
524 1.1 riastrad * aesarmv8_cbc_dec1(const struct aesdec *deckey@x0, const uint8_t *in@x1,
525 1.1 riastrad * uint8_t *out@x2, size_t nbytes@x3, const uint8_t iv[16] @x4,
526 1.1 riastrad * uint32_t nrounds@x5)
527 1.1 riastrad *
528 1.1 riastrad * Decrypt a contiguous sequence of blocks with AES-CBC.
529 1.1 riastrad *
530 1.1 riastrad * nbytes must be a positive integral multiple of 16. This routine
531 1.1 riastrad * is not vectorized; use aesarmv8_cbc_dec8 for >=8 blocks at once.
532 1.1 riastrad *
533 1.1 riastrad * Standard ABI calling convention.
534 1.1 riastrad */
535 1.1 riastrad ENTRY(aesarmv8_cbc_dec1)
536 1.4 riastrad stp fp, lr, [sp, #-16]! /* push stack frame */
537 1.1 riastrad mov fp, sp
538 1.4 riastrad ldr q24, [x4] /* q24 := iv */
539 1.1 riastrad mov x9, x0 /* x9 := enckey */
540 1.1 riastrad mov x10, x3 /* x10 := nbytes */
541 1.1 riastrad add x1, x1, x3 /* x1 := pointer past end of in */
542 1.1 riastrad add x2, x2, x3 /* x2 := pointer past end of out */
543 1.1 riastrad ldr q0, [x1, #-0x10]! /* q0 := last ciphertext block */
544 1.1 riastrad str q0, [x4] /* update iv */
545 1.7 riastrad b 2f
546 1.9 riastrad _ALIGN_TEXT
547 1.7 riastrad 1: ldr q31, [x1, #-0x10]! /* q31 := chaining value */
548 1.7 riastrad eor v0.16b, v0.16b, v31.16b /* q0 := plaintext block */
549 1.7 riastrad str q0, [x2, #-0x10]! /* store plaintext block */
550 1.7 riastrad mov v0.16b, v31.16b /* move cv = ciphertext block */
551 1.7 riastrad 2: mov x0, x9 /* x0 := enckey */
552 1.1 riastrad mov x3, x5 /* x3 := nrounds */
553 1.4 riastrad bl aesarmv8_dec1 /* q0 := cv ^ ptxt; trash x0/x3/q16 */
554 1.1 riastrad subs x10, x10, #0x10 /* count down nbytes */
555 1.7 riastrad b.ne 1b /* repeat if more blocks */
556 1.7 riastrad eor v0.16b, v0.16b, v24.16b /* q0 := first plaintext block */
557 1.1 riastrad str q0, [x2, #-0x10]! /* store first plaintext block */
558 1.4 riastrad ldp fp, lr, [sp], #16 /* pop stack frame */
559 1.1 riastrad ret
560 1.1 riastrad END(aesarmv8_cbc_dec1)
561 1.1 riastrad
562 1.1 riastrad /*
563 1.1 riastrad * aesarmv8_cbc_dec8(const struct aesdec *deckey@x0, const uint8_t *in@x1,
564 1.1 riastrad * uint8_t *out@x2, size_t nbytes@x3, const uint8_t iv[16] @x4,
565 1.1 riastrad * uint32_t nrounds@x5)
566 1.1 riastrad *
567 1.1 riastrad * Decrypt a contiguous sequence of 8-block units with AES-CBC.
568 1.1 riastrad *
569 1.1 riastrad * nbytes must be a positive integral multiple of 128.
570 1.1 riastrad *
571 1.1 riastrad * Standard ABI calling convention.
572 1.1 riastrad */
573 1.1 riastrad ENTRY(aesarmv8_cbc_dec8)
574 1.4 riastrad stp fp, lr, [sp, #-16]! /* push stack frame */
575 1.1 riastrad mov fp, sp
576 1.4 riastrad ldr q24, [x4] /* q24 := iv */
577 1.1 riastrad mov x9, x0 /* x9 := enckey */
578 1.1 riastrad mov x10, x3 /* x10 := nbytes */
579 1.1 riastrad add x1, x1, x3 /* x1 := pointer past end of in */
580 1.1 riastrad add x2, x2, x3 /* x2 := pointer past end of out */
581 1.1 riastrad ldp q6, q7, [x1, #-0x20]! /* q6, q7 := last ciphertext blocks */
582 1.1 riastrad str q7, [x4] /* update iv */
583 1.7 riastrad b 2f
584 1.9 riastrad _ALIGN_TEXT
585 1.7 riastrad 1: ldp q6, q7, [x1, #-0x20]!
586 1.7 riastrad eor v0.16b, v0.16b, v7.16b /* q0 := pt0 */
587 1.7 riastrad stp q0, q1, [x2, #-0x20]!
588 1.7 riastrad 2: ldp q4, q5, [x1, #-0x20]!
589 1.1 riastrad ldp q2, q3, [x1, #-0x20]!
590 1.1 riastrad ldp q0, q1, [x1, #-0x20]!
591 1.4 riastrad mov v31.16b, v6.16b /* q[24+i] := cv[i], 0<i<8 */
592 1.4 riastrad mov v30.16b, v5.16b
593 1.4 riastrad mov v29.16b, v4.16b
594 1.4 riastrad mov v28.16b, v3.16b
595 1.4 riastrad mov v27.16b, v2.16b
596 1.4 riastrad mov v26.16b, v1.16b
597 1.4 riastrad mov v25.16b, v0.16b
598 1.1 riastrad mov x0, x9 /* x0 := enckey */
599 1.1 riastrad mov x3, x5 /* x3 := nrounds */
600 1.4 riastrad bl aesarmv8_dec8 /* q[i] := cv[i] ^ pt[i];
601 1.4 riastrad * trash x0/x3/q16 */
602 1.4 riastrad eor v7.16b, v7.16b, v31.16b /* q[i] := pt[i] */
603 1.4 riastrad eor v6.16b, v6.16b, v30.16b
604 1.4 riastrad eor v5.16b, v5.16b, v29.16b
605 1.4 riastrad eor v4.16b, v4.16b, v28.16b
606 1.4 riastrad eor v3.16b, v3.16b, v27.16b
607 1.4 riastrad eor v2.16b, v2.16b, v26.16b
608 1.4 riastrad eor v1.16b, v1.16b, v25.16b
609 1.1 riastrad subs x10, x10, #0x80 /* count down nbytes */
610 1.1 riastrad stp q6, q7, [x2, #-0x20]! /* store plaintext blocks */
611 1.1 riastrad stp q4, q5, [x2, #-0x20]!
612 1.1 riastrad stp q2, q3, [x2, #-0x20]!
613 1.7 riastrad b.ne 1b /* repeat if there's more */
614 1.7 riastrad eor v0.16b, v0.16b, v24.16b /* q0 := pt0 */
615 1.1 riastrad stp q0, q1, [x2, #-0x20]! /* store first two plaintext blocks */
616 1.4 riastrad ldp fp, lr, [sp], #16 /* pop stack frame */
617 1.1 riastrad ret
618 1.1 riastrad END(aesarmv8_cbc_dec8)
619 1.1 riastrad
620 1.1 riastrad /*
621 1.1 riastrad * aesarmv8_xts_enc1(const struct aesenc *enckey@x0, const uint8_t *in@x1,
622 1.1 riastrad * uint8_t *out@x2, size_t nbytes@x3, uint8_t tweak[16] @x4,
623 1.1 riastrad * uint32_t nrounds@x5)
624 1.1 riastrad *
625 1.1 riastrad * Encrypt a contiguous sequence of blocks with AES-XTS.
626 1.1 riastrad *
627 1.1 riastrad * nbytes must be a positive integral multiple of 16. This routine
628 1.1 riastrad * is not vectorized; use aesarmv8_xts_enc8 for >=8 blocks at once.
629 1.1 riastrad *
630 1.1 riastrad * Standard ABI calling convention.
631 1.1 riastrad */
632 1.1 riastrad ENTRY(aesarmv8_xts_enc1)
633 1.1 riastrad stp fp, lr, [sp, #-16]! /* push stack frame */
634 1.1 riastrad mov fp, sp
635 1.1 riastrad mov x9, x0 /* x9 := enckey */
636 1.1 riastrad mov x10, x3 /* x10 := nbytes */
637 1.4 riastrad ldr q31, [x4] /* q31 := tweak */
638 1.9 riastrad _ALIGN_TEXT
639 1.1 riastrad 1: ldr q0, [x1], #0x10 /* q0 := ptxt */
640 1.1 riastrad mov x0, x9 /* x0 := enckey */
641 1.1 riastrad mov x3, x5 /* x3 := nrounds */
642 1.4 riastrad eor v0.16b, v0.16b, v31.16b /* q0 := ptxt ^ tweak */
643 1.4 riastrad bl aesarmv8_enc1 /* q0 := AES(...); trash x0/x3/q16 */
644 1.4 riastrad eor v0.16b, v0.16b, v31.16b /* q0 := AES(ptxt ^ tweak) ^ tweak */
645 1.1 riastrad str q0, [x2], #0x10 /* store ciphertext block */
646 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
647 1.1 riastrad subs x10, x10, #0x10 /* count down nbytes */
648 1.1 riastrad b.ne 1b /* repeat if more blocks */
649 1.4 riastrad str q31, [x4] /* update tweak */
650 1.1 riastrad ldp fp, lr, [sp], #16 /* pop stack frame */
651 1.1 riastrad ret
652 1.1 riastrad END(aesarmv8_xts_enc1)
653 1.1 riastrad
654 1.1 riastrad /*
655 1.1 riastrad * aesarmv8_xts_enc8(const struct aesenc *enckey@x0, const uint8_t *in@x1,
656 1.1 riastrad * uint8_t *out@x2, size_t nbytes@x3, uint8_t tweak[16] @x4,
657 1.1 riastrad * uint32_t nrounds@x5)
658 1.1 riastrad *
659 1.1 riastrad * Encrypt a contiguous sequence of blocks with AES-XTS.
660 1.1 riastrad *
661 1.1 riastrad * nbytes must be a positive integral multiple of 128.
662 1.1 riastrad *
663 1.1 riastrad * Standard ABI calling convention.
664 1.1 riastrad */
665 1.1 riastrad ENTRY(aesarmv8_xts_enc8)
666 1.4 riastrad stp fp, lr, [sp, #-16]! /* push stack frame */
667 1.1 riastrad mov fp, sp
668 1.1 riastrad mov x9, x0 /* x9 := enckey */
669 1.1 riastrad mov x10, x3 /* x10 := nbytes */
670 1.4 riastrad ldr q31, [x4] /* q31 := tweak */
671 1.9 riastrad _ALIGN_TEXT
672 1.4 riastrad 1: mov v24.16b, v31.16b /* q24 := tweak[0] */
673 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
674 1.4 riastrad mov v25.16b, v31.16b /* q25 := tweak[1] */
675 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
676 1.4 riastrad mov v26.16b, v31.16b /* q26 := tweak[2] */
677 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
678 1.4 riastrad mov v27.16b, v31.16b /* q27 := tweak[3] */
679 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
680 1.4 riastrad mov v28.16b, v31.16b /* q28 := tweak[4] */
681 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
682 1.4 riastrad mov v29.16b, v31.16b /* q29 := tweak[5] */
683 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
684 1.4 riastrad mov v30.16b, v31.16b /* q30 := tweak[6] */
685 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
686 1.4 riastrad /* q31 := tweak[7] */
687 1.4 riastrad ldp q0, q1, [x1], #0x20 /* q[i] := ptxt[i] */
688 1.1 riastrad ldp q2, q3, [x1], #0x20
689 1.1 riastrad ldp q4, q5, [x1], #0x20
690 1.1 riastrad ldp q6, q7, [x1], #0x20
691 1.4 riastrad eor v0.16b, v0.16b, v24.16b /* q[i] := ptxt[i] ^ tweak[i] */
692 1.4 riastrad eor v1.16b, v1.16b, v25.16b
693 1.4 riastrad eor v2.16b, v2.16b, v26.16b
694 1.4 riastrad eor v3.16b, v3.16b, v27.16b
695 1.4 riastrad eor v4.16b, v4.16b, v28.16b
696 1.4 riastrad eor v5.16b, v5.16b, v29.16b
697 1.4 riastrad eor v6.16b, v6.16b, v30.16b
698 1.4 riastrad eor v7.16b, v7.16b, v31.16b
699 1.1 riastrad mov x0, x9 /* x0 := enckey */
700 1.1 riastrad mov x3, x5 /* x3 := nrounds */
701 1.4 riastrad bl aesarmv8_enc8 /* encrypt q0-q7; trash x0/x3/q16 */
702 1.4 riastrad eor v0.16b, v0.16b, v24.16b /* q[i] := AES(...) ^ tweak[i] */
703 1.4 riastrad eor v1.16b, v1.16b, v25.16b
704 1.4 riastrad eor v2.16b, v2.16b, v26.16b
705 1.4 riastrad eor v3.16b, v3.16b, v27.16b
706 1.4 riastrad eor v4.16b, v4.16b, v28.16b
707 1.4 riastrad eor v5.16b, v5.16b, v29.16b
708 1.4 riastrad eor v6.16b, v6.16b, v30.16b
709 1.4 riastrad eor v7.16b, v7.16b, v31.16b
710 1.1 riastrad stp q0, q1, [x2], #0x20 /* store ciphertext blocks */
711 1.4 riastrad stp q2, q3, [x2], #0x20
712 1.4 riastrad stp q4, q5, [x2], #0x20
713 1.4 riastrad stp q6, q7, [x2], #0x20
714 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
715 1.1 riastrad subs x10, x10, #0x80 /* count down nbytes */
716 1.1 riastrad b.ne 1b /* repeat if more block groups */
717 1.4 riastrad str q31, [x4] /* update tweak */
718 1.4 riastrad ldp fp, lr, [sp], #16 /* pop stack frame */
719 1.1 riastrad ret
720 1.1 riastrad END(aesarmv8_xts_enc8)
721 1.1 riastrad
722 1.1 riastrad /*
723 1.1 riastrad * aesarmv8_xts_dec1(const struct aesdec *deckey@x0, const uint8_t *in@x1,
724 1.1 riastrad * uint8_t *out@x2, size_t nbytes@x3, uint8_t tweak[16] @x4,
725 1.1 riastrad * uint32_t nrounds@x5)
726 1.1 riastrad *
727 1.4 riastrad * Decrypt a contiguous sequdece of blocks with AES-XTS.
728 1.1 riastrad *
729 1.1 riastrad * nbytes must be a positive integral multiple of 16. This routine
730 1.1 riastrad * is not vectorized; use aesarmv8_xts_dec8 for >=8 blocks at once.
731 1.1 riastrad *
732 1.1 riastrad * Standard ABI calling convention.
733 1.1 riastrad */
734 1.1 riastrad ENTRY(aesarmv8_xts_dec1)
735 1.1 riastrad stp fp, lr, [sp, #-16]! /* push stack frame */
736 1.1 riastrad mov fp, sp
737 1.1 riastrad mov x9, x0 /* x9 := deckey */
738 1.1 riastrad mov x10, x3 /* x10 := nbytes */
739 1.4 riastrad ldr q31, [x4] /* q31 := tweak */
740 1.9 riastrad _ALIGN_TEXT
741 1.4 riastrad 1: ldr q0, [x1], #0x10 /* q0 := ctxt */
742 1.1 riastrad mov x0, x9 /* x0 := deckey */
743 1.1 riastrad mov x3, x5 /* x3 := nrounds */
744 1.4 riastrad eor v0.16b, v0.16b, v31.16b /* q0 := ctxt ^ tweak */
745 1.4 riastrad bl aesarmv8_dec1 /* q0 := AES(...); trash x0/x3/q16 */
746 1.4 riastrad eor v0.16b, v0.16b, v31.16b /* q0 := AES(ctxt ^ tweak) ^ tweak */
747 1.4 riastrad str q0, [x2], #0x10 /* store plaintext block */
748 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
749 1.1 riastrad subs x10, x10, #0x10 /* count down nbytes */
750 1.1 riastrad b.ne 1b /* repeat if more blocks */
751 1.4 riastrad str q31, [x4] /* update tweak */
752 1.1 riastrad ldp fp, lr, [sp], #16 /* pop stack frame */
753 1.1 riastrad ret
754 1.1 riastrad END(aesarmv8_xts_dec1)
755 1.1 riastrad
756 1.1 riastrad /*
757 1.1 riastrad * aesarmv8_xts_dec8(const struct aesdec *deckey@x0, const uint8_t *in@x1,
758 1.1 riastrad * uint8_t *out@x2, size_t nbytes@x3, uint8_t tweak[16] @x4,
759 1.1 riastrad * uint32_t nrounds@x5)
760 1.1 riastrad *
761 1.4 riastrad * Decrypt a contiguous sequdece of blocks with AES-XTS.
762 1.1 riastrad *
763 1.1 riastrad * nbytes must be a positive integral multiple of 128.
764 1.1 riastrad *
765 1.1 riastrad * Standard ABI calling convention.
766 1.1 riastrad */
767 1.1 riastrad ENTRY(aesarmv8_xts_dec8)
768 1.4 riastrad stp fp, lr, [sp, #-16]! /* push stack frame */
769 1.1 riastrad mov fp, sp
770 1.1 riastrad mov x9, x0 /* x9 := deckey */
771 1.1 riastrad mov x10, x3 /* x10 := nbytes */
772 1.4 riastrad ldr q31, [x4] /* q31 := tweak */
773 1.9 riastrad _ALIGN_TEXT
774 1.4 riastrad 1: mov v24.16b, v31.16b /* q24 := tweak[0] */
775 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
776 1.4 riastrad mov v25.16b, v31.16b /* q25 := tweak[1] */
777 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
778 1.4 riastrad mov v26.16b, v31.16b /* q26 := tweak[2] */
779 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
780 1.4 riastrad mov v27.16b, v31.16b /* q27 := tweak[3] */
781 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
782 1.4 riastrad mov v28.16b, v31.16b /* q28 := tweak[4] */
783 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
784 1.4 riastrad mov v29.16b, v31.16b /* q29 := tweak[5] */
785 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
786 1.4 riastrad mov v30.16b, v31.16b /* q30 := tweak[6] */
787 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
788 1.4 riastrad /* q31 := tweak[7] */
789 1.4 riastrad ldp q0, q1, [x1], #0x20 /* q[i] := ctxt[i] */
790 1.1 riastrad ldp q2, q3, [x1], #0x20
791 1.1 riastrad ldp q4, q5, [x1], #0x20
792 1.1 riastrad ldp q6, q7, [x1], #0x20
793 1.4 riastrad eor v0.16b, v0.16b, v24.16b /* q[i] := ctxt[i] ^ tweak[i] */
794 1.4 riastrad eor v1.16b, v1.16b, v25.16b
795 1.4 riastrad eor v2.16b, v2.16b, v26.16b
796 1.4 riastrad eor v3.16b, v3.16b, v27.16b
797 1.4 riastrad eor v4.16b, v4.16b, v28.16b
798 1.4 riastrad eor v5.16b, v5.16b, v29.16b
799 1.4 riastrad eor v6.16b, v6.16b, v30.16b
800 1.4 riastrad eor v7.16b, v7.16b, v31.16b
801 1.1 riastrad mov x0, x9 /* x0 := deckey */
802 1.1 riastrad mov x3, x5 /* x3 := nrounds */
803 1.4 riastrad bl aesarmv8_dec8 /* decrypt q0-q7; trash x0/x3/q16 */
804 1.4 riastrad eor v0.16b, v0.16b, v24.16b /* q[i] := AES(...) ^ tweak[i] */
805 1.4 riastrad eor v1.16b, v1.16b, v25.16b
806 1.4 riastrad eor v2.16b, v2.16b, v26.16b
807 1.4 riastrad eor v3.16b, v3.16b, v27.16b
808 1.4 riastrad eor v4.16b, v4.16b, v28.16b
809 1.4 riastrad eor v5.16b, v5.16b, v29.16b
810 1.4 riastrad eor v6.16b, v6.16b, v30.16b
811 1.4 riastrad eor v7.16b, v7.16b, v31.16b
812 1.4 riastrad stp q0, q1, [x2], #0x20 /* store plaintext blocks */
813 1.4 riastrad stp q2, q3, [x2], #0x20
814 1.4 riastrad stp q4, q5, [x2], #0x20
815 1.4 riastrad stp q6, q7, [x2], #0x20
816 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
817 1.1 riastrad subs x10, x10, #0x80 /* count down nbytes */
818 1.1 riastrad b.ne 1b /* repeat if more block groups */
819 1.4 riastrad str q31, [x4] /* update tweak */
820 1.4 riastrad ldp fp, lr, [sp], #16 /* pop stack frame */
821 1.1 riastrad ret
822 1.1 riastrad END(aesarmv8_xts_dec8)
823 1.1 riastrad
824 1.1 riastrad /*
825 1.4 riastrad * aesarmv8_xts_mulx(tweak@q31)
826 1.1 riastrad *
827 1.4 riastrad * Multiply q31 by x, modulo x^128 + x^7 + x^2 + x + 1, in place.
828 1.1 riastrad * Uses x0 and q0/q1 as temporaries.
829 1.1 riastrad */
830 1.1 riastrad .text
831 1.1 riastrad _ALIGN_TEXT
832 1.1 riastrad .type aesarmv8_xts_mulx,@function
833 1.1 riastrad aesarmv8_xts_mulx:
834 1.1 riastrad /*
835 1.1 riastrad * Simultaneously determine
836 1.1 riastrad * (a) whether the high bit of the low half must be
837 1.1 riastrad * shifted into the low bit of the high half, and
838 1.1 riastrad * (b) whether the high bit of the high half must be
839 1.1 riastrad * carried into x^128 = x^7 + x^2 + x + 1.
840 1.1 riastrad */
841 1.1 riastrad adrl x0, xtscarry
842 1.6 riastrad cmlt v1.2d, v31.2d, #0 /* v1.2d[i] := -1 if v31.2d[i] < 0, else 0 */
843 1.1 riastrad ldr q0, [x0] /* q0 := xtscarry */
844 1.1 riastrad ext v1.16b, v1.16b, v1.16b, #8 /* swap halves of q1 */
845 1.4 riastrad shl v31.2d, v31.2d, #1 /* shift */
846 1.1 riastrad and v0.16b, v0.16b, v1.16b /* copy xtscarry according to mask */
847 1.4 riastrad eor v31.16b, v31.16b, v0.16b /* incorporate (a) and (b) */
848 1.1 riastrad ret
849 1.1 riastrad END(aesarmv8_xts_mulx)
850 1.1 riastrad
851 1.1 riastrad .section .rodata
852 1.2 riastrad .p2align 4
853 1.1 riastrad .type xtscarry,@object
854 1.1 riastrad xtscarry:
855 1.1 riastrad .byte 0x87,0,0,0, 0,0,0,0, 1,0,0,0, 0,0,0,0
856 1.1 riastrad END(xtscarry)
857 1.1 riastrad
858 1.1 riastrad /*
859 1.1 riastrad * aesarmv8_xts_update(const uint8_t in[16] @x0, uint8_t out[16] @x1)
860 1.1 riastrad *
861 1.1 riastrad * Update an AES-XTS tweak.
862 1.1 riastrad *
863 1.1 riastrad * Standard ABI calling convention.
864 1.1 riastrad */
865 1.1 riastrad ENTRY(aesarmv8_xts_update)
866 1.1 riastrad stp fp, lr, [sp, #-16]! /* push stack frame */
867 1.1 riastrad mov fp, sp
868 1.4 riastrad ldr q31, [x0] /* load tweak */
869 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
870 1.4 riastrad str q31, [x1] /* store tweak */
871 1.1 riastrad ldp fp, lr, [sp], #16 /* pop stack frame */
872 1.1 riastrad ret
873 1.1 riastrad END(aesarmv8_xts_update)
874 1.1 riastrad
875 1.1 riastrad /*
876 1.8 riastrad * aesarmv8_cbcmac_update1(const struct aesenc *enckey@x0,
877 1.8 riastrad * const uint8_t *in@x1, size_t nbytes@x2, uint8_t auth[16] @x3,
878 1.8 riastrad * uint32_t nrounds@x4)
879 1.8 riastrad *
880 1.8 riastrad * Update CBC-MAC.
881 1.8 riastrad *
882 1.8 riastrad * nbytes must be a positive integral multiple of 16.
883 1.8 riastrad *
884 1.8 riastrad * Standard ABI calling convention.
885 1.8 riastrad */
886 1.8 riastrad ENTRY(aesarmv8_cbcmac_update1)
887 1.8 riastrad stp fp, lr, [sp, #-16]! /* push stack frame */
888 1.8 riastrad mov fp, sp
889 1.8 riastrad ldr q0, [x3] /* q0 := initial authenticator */
890 1.8 riastrad mov x9, x0 /* x9 := enckey */
891 1.8 riastrad mov x5, x3 /* x5 := &auth (enc1 trashes x3) */
892 1.9 riastrad _ALIGN_TEXT
893 1.8 riastrad 1: ldr q1, [x1], #0x10 /* q1 := plaintext block */
894 1.8 riastrad mov x0, x9 /* x0 := enckey */
895 1.8 riastrad mov x3, x4 /* x3 := nrounds */
896 1.8 riastrad eor v0.16b, v0.16b, v1.16b /* q0 := auth ^ ptxt */
897 1.8 riastrad bl aesarmv8_enc1 /* q0 := auth'; trash x0/x3/q16 */
898 1.8 riastrad subs x2, x2, #0x10 /* count down nbytes */
899 1.8 riastrad b.ne 1b /* repeat if x10 is nonzero */
900 1.8 riastrad str q0, [x5] /* store updated authenticator */
901 1.8 riastrad ldp fp, lr, [sp], #16 /* pop stack frame */
902 1.8 riastrad ret
903 1.8 riastrad END(aesarmv8_cbcmac_update1)
904 1.8 riastrad
905 1.8 riastrad /*
906 1.8 riastrad * aesarmv8_ccm_enc1(const struct aesenc *enckey@x0, const uint8_t *in@x1,
907 1.8 riastrad * uint8_t *out@x2, size_t nbytes@x3, uint8_t authctr[32] @x4,
908 1.8 riastrad * uint32_t nrounds@x5)
909 1.8 riastrad *
910 1.8 riastrad * Update CCM encryption.
911 1.8 riastrad *
912 1.8 riastrad * nbytes must be a positive integral multiple of 16.
913 1.8 riastrad *
914 1.8 riastrad * Standard ABI calling convention.
915 1.8 riastrad */
916 1.8 riastrad ENTRY(aesarmv8_ccm_enc1)
917 1.8 riastrad stp fp, lr, [sp, #-16]! /* push stack frame */
918 1.8 riastrad mov fp, sp
919 1.8 riastrad ldp q0, q2, [x4] /* q0 := auth, q2 := ctr (be) */
920 1.8 riastrad adrl x11, ctr32_inc /* x11 := &ctr32_inc */
921 1.8 riastrad ld1 {v5.4s}, [x11] /* q5 := (0,0,0,1) (host-endian) */
922 1.8 riastrad mov x9, x0 /* x9 := enckey */
923 1.8 riastrad mov x10, x3 /* x10 := nbytes */
924 1.8 riastrad #if _BYTE_ORDER == _LITTLE_ENDIAN
925 1.8 riastrad rev32 v2.16b, v2.16b /* q2 := ctr (host-endian) */
926 1.8 riastrad #endif
927 1.9 riastrad _ALIGN_TEXT
928 1.8 riastrad 1: ldr q3, [x1], #0x10 /* q3 := plaintext block */
929 1.8 riastrad add v2.4s, v2.4s, v5.4s /* increment ctr (32-bit) */
930 1.8 riastrad mov x0, x9 /* x0 := enckey */
931 1.8 riastrad mov x3, x5 /* x3 := nrounds */
932 1.8 riastrad #if _BYTE_ORDER == _LITTLE_ENDIAN
933 1.8 riastrad rev32 v1.16b, v2.16b /* q1 := ctr (big-endian) */
934 1.8 riastrad #else
935 1.8 riastrad mov v1.16b, v2.16b /* q1 := ctr (big-endian) */
936 1.8 riastrad #endif
937 1.8 riastrad eor v0.16b, v0.16b, v3.16b /* q0 := auth ^ ptxt */
938 1.8 riastrad bl aesarmv8_enc2 /* q0 := auth', q1 := pad;
939 1.8 riastrad * trash x0/x3/q16 */
940 1.8 riastrad eor v3.16b, v1.16b, v3.16b /* q3 := ciphertext block */
941 1.8 riastrad subs x10, x10, #0x10 /* count down bytes */
942 1.8 riastrad str q3, [x2], #0x10 /* store ciphertext block */
943 1.8 riastrad b.ne 1b /* repeat if more blocks */
944 1.8 riastrad #if _BYTE_ORDER == _LITTLE_ENDIAN
945 1.8 riastrad rev32 v2.16b, v2.16b /* q2 := ctr (big-endian) */
946 1.8 riastrad #endif
947 1.8 riastrad stp q0, q2, [x4] /* store updated auth/ctr */
948 1.8 riastrad ldp fp, lr, [sp], #16 /* pop stack frame */
949 1.8 riastrad ret
950 1.8 riastrad END(aesarmv8_ccm_enc1)
951 1.8 riastrad
952 1.8 riastrad /*
953 1.8 riastrad * aesarmv8_ccm_dec1(const struct aesenc *enckey@x0, const uint8_t *in@x1,
954 1.8 riastrad * uint8_t *out@x2, size_t nbytes@x3, uint8_t authctr[32] @x4,
955 1.8 riastrad * uint32_t nrounds@x5)
956 1.8 riastrad *
957 1.8 riastrad * Update CCM decryption.
958 1.8 riastrad *
959 1.8 riastrad * nbytes must be a positive integral multiple of 16.
960 1.8 riastrad *
961 1.8 riastrad * Standard ABI calling convention.
962 1.8 riastrad */
963 1.8 riastrad ENTRY(aesarmv8_ccm_dec1)
964 1.8 riastrad stp fp, lr, [sp, #-16]! /* push stack frame */
965 1.8 riastrad mov fp, sp
966 1.8 riastrad ldp q1, q2, [x4] /* q1 := auth, q2 := ctr (be) */
967 1.8 riastrad adrl x11, ctr32_inc /* x11 := &ctr32_inc */
968 1.8 riastrad ld1 {v5.4s}, [x11] /* q5 := (0,0,0,1) (host-endian) */
969 1.8 riastrad mov x9, x0 /* x9 := enckey */
970 1.8 riastrad mov x10, x3 /* x10 := nbytes */
971 1.8 riastrad #if _BYTE_ORDER == _LITTLE_ENDIAN
972 1.8 riastrad rev32 v2.16b, v2.16b /* q2 := ctr (host-endian) */
973 1.8 riastrad #endif
974 1.8 riastrad
975 1.8 riastrad /* Decrypt the first block. */
976 1.8 riastrad add v2.4s, v2.4s, v5.4s /* increment ctr (32-bit) */
977 1.8 riastrad mov x3, x5 /* x3 := nrounds */
978 1.8 riastrad #if _BYTE_ORDER == _LITTLE_ENDIAN
979 1.8 riastrad rev32 v0.16b, v2.16b /* q0 := ctr (big-endian) */
980 1.8 riastrad #else
981 1.8 riastrad mov v0.16b, v2.16b /* q0 := ctr (big-endian) */
982 1.8 riastrad #endif
983 1.8 riastrad ldr q3, [x1], #0x10 /* q3 := ctxt */
984 1.8 riastrad bl aesarmv8_enc1 /* q0 := pad; trash x0/x3/q16 */
985 1.8 riastrad b 2f
986 1.8 riastrad
987 1.9 riastrad _ALIGN_TEXT
988 1.8 riastrad 1: /*
989 1.8 riastrad * Authenticate the last block and decrypt the next block
990 1.8 riastrad * simultaneously.
991 1.8 riastrad *
992 1.8 riastrad * q1 = auth ^ ptxt[-1]
993 1.8 riastrad * q2 = ctr[-1] (le)
994 1.8 riastrad */
995 1.8 riastrad add v2.4s, v2.4s, v5.4s /* increment ctr (32-bit) */
996 1.8 riastrad mov x0, x9 /* x0 := enckey */
997 1.8 riastrad mov x3, x5 /* x3 := nrounds */
998 1.8 riastrad #if _BYTE_ORDER == _LITTLE_ENDIAN
999 1.8 riastrad rev32 v0.16b, v2.16b /* q0 := ctr (big-endian) */
1000 1.8 riastrad #else
1001 1.8 riastrad mov v0.16b, v2.16b /* q0 := ctr (big-endian) */
1002 1.8 riastrad #endif
1003 1.8 riastrad ldr q3, [x1], #0x10 /* q3 := ctxt */
1004 1.8 riastrad bl aesarmv8_enc2 /* q0 := pad, q1 := auth';
1005 1.8 riastrad * trash x0/x3/q16 */
1006 1.8 riastrad 2: eor v3.16b, v0.16b, v3.16b /* q3 := plaintext block */
1007 1.8 riastrad subs x10, x10, #0x10
1008 1.8 riastrad str q3, [x2], #0x10 /* store plaintext */
1009 1.8 riastrad eor v1.16b, v1.16b, v3.16b /* q1 := auth ^ ptxt */
1010 1.8 riastrad b.ne 1b
1011 1.8 riastrad
1012 1.8 riastrad #if _BYTE_ORDER == _LITTLE_ENDIAN
1013 1.8 riastrad rev32 v2.16b, v2.16b /* q2 := ctr (big-endian) */
1014 1.8 riastrad #endif
1015 1.8 riastrad
1016 1.8 riastrad /* Authenticate the last block. */
1017 1.8 riastrad mov x0, x9 /* x0 := enckey */
1018 1.8 riastrad mov x3, x5 /* x3 := nrounds */
1019 1.8 riastrad mov v0.16b, v1.16b /* q0 := auth ^ ptxt */
1020 1.8 riastrad bl aesarmv8_enc1 /* q0 := auth'; trash x0/x3/q16 */
1021 1.8 riastrad stp q0, q2, [x4] /* store updated auth/ctr */
1022 1.8 riastrad ldp fp, lr, [sp], #16 /* pop stack frame */
1023 1.8 riastrad ret
1024 1.8 riastrad END(aesarmv8_ccm_dec1)
1025 1.8 riastrad
1026 1.8 riastrad .section .rodata
1027 1.8 riastrad .p2align 4
1028 1.8 riastrad .type ctr32_inc,@object
1029 1.8 riastrad ctr32_inc:
1030 1.8 riastrad .int 0, 0, 0, 1
1031 1.8 riastrad END(ctr32_inc)
1032 1.8 riastrad
1033 1.8 riastrad /*
1034 1.1 riastrad * aesarmv8_enc1(const struct aesenc *enckey@x0,
1035 1.1 riastrad * uint128_t block@q0, uint32_t nrounds@x3)
1036 1.1 riastrad *
1037 1.1 riastrad * Encrypt a single AES block in q0.
1038 1.1 riastrad *
1039 1.4 riastrad * Internal ABI. Uses q16 as temporary. Destroys x0 and x3.
1040 1.1 riastrad */
1041 1.1 riastrad .text
1042 1.1 riastrad _ALIGN_TEXT
1043 1.1 riastrad .type aesarmv8_enc1,@function
1044 1.1 riastrad aesarmv8_enc1:
1045 1.4 riastrad ldr q16, [x0], #0x10 /* load round key */
1046 1.10 riastrad sub x3, x3, #1
1047 1.9 riastrad _ALIGN_TEXT
1048 1.10 riastrad 1: /* q0 := MixColumns(ShiftRows(SubBytes(AddRoundKey_q16(q0)))) */
1049 1.10 riastrad aese v0.16b, v16.16b
1050 1.7 riastrad aesmc v0.16b, v0.16b
1051 1.10 riastrad ldr q16, [x0], #0x10
1052 1.10 riastrad subs x3, x3, #1
1053 1.10 riastrad b.ne 1b
1054 1.4 riastrad /* q0 := ShiftRows(SubBytes(AddRoundKey_q16(q0))) */
1055 1.4 riastrad aese v0.16b, v16.16b
1056 1.10 riastrad ldr q16, [x0] /* load last round key */
1057 1.10 riastrad /* q0 := AddRoundKey_q16(q0) */
1058 1.7 riastrad eor v0.16b, v0.16b, v16.16b
1059 1.1 riastrad ret
1060 1.1 riastrad END(aesarmv8_enc1)
1061 1.1 riastrad
1062 1.1 riastrad /*
1063 1.8 riastrad * aesarmv8_enc2(const struct aesenc *enckey@x0,
1064 1.8 riastrad * uint128_t block@q0, uint128_t block@q1, uint32_t nrounds@x3)
1065 1.8 riastrad *
1066 1.8 riastrad * Encrypt two AES blocks in q0 and q1.
1067 1.8 riastrad *
1068 1.8 riastrad * Internal ABI. Uses q16 as temporary. Destroys x0 and x3.
1069 1.8 riastrad */
1070 1.8 riastrad .text
1071 1.8 riastrad _ALIGN_TEXT
1072 1.8 riastrad .type aesarmv8_enc2,@function
1073 1.8 riastrad aesarmv8_enc2:
1074 1.8 riastrad ldr q16, [x0], #0x10 /* load round key */
1075 1.10 riastrad sub x3, x3, #1
1076 1.9 riastrad _ALIGN_TEXT
1077 1.10 riastrad 1: /* q[i] := MixColumns(ShiftRows(SubBytes(AddRoundKey_q16(q[i])))) */
1078 1.10 riastrad aese v0.16b, v16.16b
1079 1.8 riastrad aesmc v0.16b, v0.16b
1080 1.10 riastrad aese v1.16b, v16.16b
1081 1.8 riastrad aesmc v1.16b, v1.16b
1082 1.10 riastrad ldr q16, [x0], #0x10 /* load next round key */
1083 1.10 riastrad subs x3, x3, #1
1084 1.10 riastrad b.ne 1b
1085 1.8 riastrad /* q[i] := ShiftRows(SubBytes(AddRoundKey_q16(q[i]))) */
1086 1.8 riastrad aese v0.16b, v16.16b
1087 1.8 riastrad aese v1.16b, v16.16b
1088 1.10 riastrad ldr q16, [x0] /* load last round key */
1089 1.10 riastrad /* q[i] := AddRoundKey_q16(q[i]) */
1090 1.8 riastrad eor v0.16b, v0.16b, v16.16b
1091 1.8 riastrad eor v1.16b, v1.16b, v16.16b
1092 1.8 riastrad ret
1093 1.8 riastrad END(aesarmv8_enc2)
1094 1.8 riastrad
1095 1.8 riastrad /*
1096 1.1 riastrad * aesarmv8_enc8(const struct aesenc *enckey@x0,
1097 1.1 riastrad * uint128_t block0@q0, ..., uint128_t block7@q7,
1098 1.1 riastrad * uint32_t nrounds@x3)
1099 1.1 riastrad *
1100 1.1 riastrad * Encrypt eight AES blocks in q0 through q7 in parallel.
1101 1.1 riastrad *
1102 1.4 riastrad * Internal ABI. Uses q16 as temporary. Destroys x0 and x3.
1103 1.1 riastrad */
1104 1.1 riastrad .text
1105 1.1 riastrad _ALIGN_TEXT
1106 1.1 riastrad .type aesarmv8_enc8,@function
1107 1.1 riastrad aesarmv8_enc8:
1108 1.4 riastrad ldr q16, [x0], #0x10 /* load round key */
1109 1.10 riastrad sub x3, x3, #1
1110 1.9 riastrad _ALIGN_TEXT
1111 1.10 riastrad 1: /* q[i] := MixColumns(ShiftRows(SubBytes(AddRoundKey_q16(q[i])))) */
1112 1.10 riastrad aese v0.16b, v16.16b
1113 1.7 riastrad aesmc v0.16b, v0.16b
1114 1.10 riastrad aese v1.16b, v16.16b
1115 1.7 riastrad aesmc v1.16b, v1.16b
1116 1.10 riastrad aese v2.16b, v16.16b
1117 1.7 riastrad aesmc v2.16b, v2.16b
1118 1.10 riastrad aese v3.16b, v16.16b
1119 1.7 riastrad aesmc v3.16b, v3.16b
1120 1.10 riastrad aese v4.16b, v16.16b
1121 1.7 riastrad aesmc v4.16b, v4.16b
1122 1.10 riastrad aese v5.16b, v16.16b
1123 1.7 riastrad aesmc v5.16b, v5.16b
1124 1.10 riastrad aese v6.16b, v16.16b
1125 1.7 riastrad aesmc v6.16b, v6.16b
1126 1.10 riastrad aese v7.16b, v16.16b
1127 1.7 riastrad aesmc v7.16b, v7.16b
1128 1.10 riastrad ldr q16, [x0], #0x10 /* load next round key */
1129 1.10 riastrad subs x3, x3, #1
1130 1.10 riastrad b.ne 1b
1131 1.4 riastrad /* q[i] := ShiftRows(SubBytes(AddRoundKey_q16(q[i]))) */
1132 1.4 riastrad aese v0.16b, v16.16b
1133 1.4 riastrad aese v1.16b, v16.16b
1134 1.4 riastrad aese v2.16b, v16.16b
1135 1.4 riastrad aese v3.16b, v16.16b
1136 1.4 riastrad aese v4.16b, v16.16b
1137 1.4 riastrad aese v5.16b, v16.16b
1138 1.4 riastrad aese v6.16b, v16.16b
1139 1.4 riastrad aese v7.16b, v16.16b
1140 1.10 riastrad ldr q16, [x0] /* load last round key */
1141 1.10 riastrad /* q[i] := AddRoundKey_q16(q[i]) */
1142 1.10 riastrad eor v0.16b, v0.16b, v16.16b
1143 1.4 riastrad eor v1.16b, v1.16b, v16.16b
1144 1.4 riastrad eor v2.16b, v2.16b, v16.16b
1145 1.4 riastrad eor v3.16b, v3.16b, v16.16b
1146 1.4 riastrad eor v4.16b, v4.16b, v16.16b
1147 1.4 riastrad eor v5.16b, v5.16b, v16.16b
1148 1.4 riastrad eor v6.16b, v6.16b, v16.16b
1149 1.4 riastrad eor v7.16b, v7.16b, v16.16b
1150 1.1 riastrad ret
1151 1.1 riastrad END(aesarmv8_enc8)
1152 1.1 riastrad
1153 1.1 riastrad /*
1154 1.1 riastrad * aesarmv8_dec1(const struct aesdec *deckey@x0,
1155 1.1 riastrad * uint128_t block@q0, uint32_t nrounds@x3)
1156 1.1 riastrad *
1157 1.1 riastrad * Decrypt a single AES block in q0.
1158 1.1 riastrad *
1159 1.4 riastrad * Internal ABI. Uses q16 as temporary. Destroys x0 and x3.
1160 1.1 riastrad */
1161 1.1 riastrad .text
1162 1.1 riastrad _ALIGN_TEXT
1163 1.1 riastrad .type aesarmv8_dec1,@function
1164 1.1 riastrad aesarmv8_dec1:
1165 1.4 riastrad ldr q16, [x0], #0x10 /* load round key */
1166 1.10 riastrad sub x3, x3, #1
1167 1.9 riastrad _ALIGN_TEXT
1168 1.10 riastrad 1: /* q0 := InSubBytes(InShiftRows(AddRoundKey_q16(q0))) */
1169 1.10 riastrad aesd v0.16b, v16.16b
1170 1.10 riastrad /* q0 := InMixColumns(q0) */
1171 1.7 riastrad aesimc v0.16b, v0.16b
1172 1.10 riastrad ldr q16, [x0], #0x10 /* load next round key */
1173 1.10 riastrad subs x3, x3, #1
1174 1.10 riastrad b.ne 1b
1175 1.4 riastrad /* q0 := InSubBytes(InShiftRows(AddRoundKey_q16(q0))) */
1176 1.4 riastrad aesd v0.16b, v16.16b
1177 1.10 riastrad ldr q16, [x0] /* load last round key */
1178 1.10 riastrad /* q0 := AddRoundKey_q16(q0) */
1179 1.7 riastrad eor v0.16b, v0.16b, v16.16b
1180 1.1 riastrad ret
1181 1.1 riastrad END(aesarmv8_dec1)
1182 1.1 riastrad
1183 1.1 riastrad /*
1184 1.1 riastrad * aesarmv8_dec8(const struct aesdec *deckey@x0,
1185 1.1 riastrad * uint128_t block0@q0, ..., uint128_t block7@q7,
1186 1.1 riastrad * uint32_t nrounds@x3)
1187 1.1 riastrad *
1188 1.1 riastrad * Decrypt eight AES blocks in q0 through q7 in parallel.
1189 1.1 riastrad *
1190 1.4 riastrad * Internal ABI. Uses q16 as temporary. Destroys x0 and x3.
1191 1.1 riastrad */
1192 1.1 riastrad .text
1193 1.1 riastrad _ALIGN_TEXT
1194 1.1 riastrad .type aesarmv8_dec8,@function
1195 1.1 riastrad aesarmv8_dec8:
1196 1.4 riastrad ldr q16, [x0], #0x10 /* load round key */
1197 1.10 riastrad sub x3, x3, #1
1198 1.9 riastrad _ALIGN_TEXT
1199 1.10 riastrad 1: /* q[i] := InSubBytes(InShiftRows(AddRoundKey_q16(q[i]))) */
1200 1.10 riastrad aesd v0.16b, v16.16b
1201 1.10 riastrad /* q[i] := InMixColumns(q[i]) */
1202 1.7 riastrad aesimc v0.16b, v0.16b
1203 1.10 riastrad aesd v1.16b, v16.16b
1204 1.7 riastrad aesimc v1.16b, v1.16b
1205 1.10 riastrad aesd v2.16b, v16.16b
1206 1.7 riastrad aesimc v2.16b, v2.16b
1207 1.10 riastrad aesd v3.16b, v16.16b
1208 1.7 riastrad aesimc v3.16b, v3.16b
1209 1.10 riastrad aesd v4.16b, v16.16b
1210 1.7 riastrad aesimc v4.16b, v4.16b
1211 1.10 riastrad aesd v5.16b, v16.16b
1212 1.7 riastrad aesimc v5.16b, v5.16b
1213 1.10 riastrad aesd v6.16b, v16.16b
1214 1.7 riastrad aesimc v6.16b, v6.16b
1215 1.10 riastrad aesd v7.16b, v16.16b
1216 1.7 riastrad aesimc v7.16b, v7.16b
1217 1.10 riastrad ldr q16, [x0], #0x10 /* load next round key */
1218 1.10 riastrad subs x3, x3, #1
1219 1.10 riastrad b.ne 1b
1220 1.4 riastrad /* q[i] := InSubBytes(InShiftRows(AddRoundKey_q16(q[i]))) */
1221 1.4 riastrad aesd v0.16b, v16.16b
1222 1.4 riastrad aesd v1.16b, v16.16b
1223 1.4 riastrad aesd v2.16b, v16.16b
1224 1.4 riastrad aesd v3.16b, v16.16b
1225 1.4 riastrad aesd v4.16b, v16.16b
1226 1.4 riastrad aesd v5.16b, v16.16b
1227 1.4 riastrad aesd v6.16b, v16.16b
1228 1.4 riastrad aesd v7.16b, v16.16b
1229 1.10 riastrad ldr q16, [x0] /* load last round key */
1230 1.10 riastrad /* q[i] := AddRoundKey_q16(q[i]) */
1231 1.10 riastrad eor v0.16b, v0.16b, v16.16b
1232 1.4 riastrad eor v1.16b, v1.16b, v16.16b
1233 1.4 riastrad eor v2.16b, v2.16b, v16.16b
1234 1.4 riastrad eor v3.16b, v3.16b, v16.16b
1235 1.4 riastrad eor v4.16b, v4.16b, v16.16b
1236 1.4 riastrad eor v5.16b, v5.16b, v16.16b
1237 1.4 riastrad eor v6.16b, v6.16b, v16.16b
1238 1.4 riastrad eor v7.16b, v7.16b, v16.16b
1239 1.1 riastrad ret
1240 1.1 riastrad END(aesarmv8_dec8)
1241