aes_armv8_64.S revision 1.9 1 1.9 riastrad /* $NetBSD: aes_armv8_64.S,v 1.9 2020/07/27 20:53:22 riastradh Exp $ */
2 1.1 riastrad
3 1.1 riastrad /*-
4 1.1 riastrad * Copyright (c) 2020 The NetBSD Foundation, Inc.
5 1.1 riastrad * All rights reserved.
6 1.1 riastrad *
7 1.1 riastrad * Redistribution and use in source and binary forms, with or without
8 1.1 riastrad * modification, are permitted provided that the following conditions
9 1.1 riastrad * are met:
10 1.1 riastrad * 1. Redistributions of source code must retain the above copyright
11 1.1 riastrad * notice, this list of conditions and the following disclaimer.
12 1.1 riastrad * 2. Redistributions in binary form must reproduce the above copyright
13 1.1 riastrad * notice, this list of conditions and the following disclaimer in the
14 1.1 riastrad * documentation and/or other materials provided with the distribution.
15 1.1 riastrad *
16 1.1 riastrad * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 1.1 riastrad * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 1.1 riastrad * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 1.1 riastrad * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 1.1 riastrad * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 1.1 riastrad * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 1.1 riastrad * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 1.1 riastrad * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 1.1 riastrad * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 1.1 riastrad * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 1.1 riastrad * POSSIBILITY OF SUCH DAMAGE.
27 1.1 riastrad */
28 1.1 riastrad
29 1.8 riastrad #include <sys/endian.h>
30 1.8 riastrad
31 1.1 riastrad #include <aarch64/asm.h>
32 1.1 riastrad
33 1.3 riastrad .arch_extension aes
34 1.1 riastrad
35 1.1 riastrad /*
36 1.1 riastrad * uint32_t rcon[10]
37 1.1 riastrad *
38 1.1 riastrad * Table mapping n ---> x^n mod (x^8 + x^4 + x^3 + x + 1) in GF(2).
39 1.1 riastrad * Such elements of GF(8) need only eight bits to be represented,
40 1.1 riastrad * but we store them in 4-byte units so we can copy one into all
41 1.1 riastrad * four 4-byte lanes of a vector register with a single LD1R. The
42 1.1 riastrad * access pattern is fixed, so indices into this table are never
43 1.1 riastrad * secret.
44 1.1 riastrad */
45 1.1 riastrad .section .rodata
46 1.2 riastrad .p2align 2
47 1.1 riastrad .type rcon,@object
48 1.1 riastrad rcon:
49 1.1 riastrad .long 0x01
50 1.1 riastrad .long 0x02
51 1.1 riastrad .long 0x04
52 1.1 riastrad .long 0x08
53 1.1 riastrad .long 0x10
54 1.1 riastrad .long 0x20
55 1.1 riastrad .long 0x40
56 1.1 riastrad .long 0x80
57 1.1 riastrad .long 0x1b
58 1.1 riastrad .long 0x36
59 1.1 riastrad END(rcon)
60 1.1 riastrad
61 1.1 riastrad /*
62 1.1 riastrad * uint128_t unshiftrows_rotword_1
63 1.1 riastrad *
64 1.1 riastrad * Table for TBL instruction to undo ShiftRows, and then do
65 1.1 riastrad * RotWord on word 1, and then copy it into all the other words.
66 1.1 riastrad */
67 1.1 riastrad .section .rodata
68 1.2 riastrad .p2align 4
69 1.1 riastrad .type unshiftrows_rotword_1,@object
70 1.1 riastrad unshiftrows_rotword_1:
71 1.1 riastrad .byte 0x01,0x0e,0x0b,0x04
72 1.1 riastrad .byte 0x01,0x0e,0x0b,0x04
73 1.1 riastrad .byte 0x01,0x0e,0x0b,0x04
74 1.1 riastrad .byte 0x01,0x0e,0x0b,0x04
75 1.1 riastrad END(unshiftrows_rotword_1)
76 1.1 riastrad
77 1.1 riastrad /*
78 1.1 riastrad * uint128_t unshiftrows_3
79 1.1 riastrad *
80 1.1 riastrad * Table for TBL instruction to undo ShiftRows, and then copy word
81 1.1 riastrad * 3 into all the other words.
82 1.1 riastrad */
83 1.1 riastrad .section .rodata
84 1.2 riastrad .p2align 4
85 1.1 riastrad .type unshiftrows_3,@object
86 1.1 riastrad unshiftrows_3:
87 1.1 riastrad .byte 0x0c,0x09,0x06,0x03
88 1.1 riastrad .byte 0x0c,0x09,0x06,0x03
89 1.1 riastrad .byte 0x0c,0x09,0x06,0x03
90 1.1 riastrad .byte 0x0c,0x09,0x06,0x03
91 1.1 riastrad END(unshiftrows_3)
92 1.1 riastrad
93 1.1 riastrad /*
94 1.1 riastrad * uint128_t unshiftrows_rotword_3
95 1.1 riastrad *
96 1.1 riastrad * Table for TBL instruction to undo ShiftRows, and then do
97 1.1 riastrad * RotWord on word 3, and then copy it into all the other words.
98 1.1 riastrad */
99 1.1 riastrad .section .rodata
100 1.2 riastrad .p2align 4
101 1.1 riastrad .type unshiftrows_rotword_3,@object
102 1.1 riastrad unshiftrows_rotword_3:
103 1.1 riastrad .byte 0x09,0x06,0x03,0x0c
104 1.1 riastrad .byte 0x09,0x06,0x03,0x0c
105 1.1 riastrad .byte 0x09,0x06,0x03,0x0c
106 1.1 riastrad .byte 0x09,0x06,0x03,0x0c
107 1.1 riastrad END(unshiftrows_rotword_3)
108 1.1 riastrad
109 1.1 riastrad /*
110 1.1 riastrad * aesarmv8_setenckey128(struct aesenc *enckey@x0, const uint8_t key[16] @x1)
111 1.1 riastrad *
112 1.1 riastrad * Expand a 16-byte AES-128 key into 10 round keys.
113 1.1 riastrad *
114 1.1 riastrad * Standard ABI calling convention.
115 1.1 riastrad */
116 1.1 riastrad ENTRY(aesarmv8_setenckey128)
117 1.1 riastrad ldr q1, [x1] /* q1 := master key */
118 1.1 riastrad
119 1.1 riastrad adrl x4, unshiftrows_rotword_3
120 1.1 riastrad eor v0.16b, v0.16b, v0.16b /* q0 := 0 */
121 1.4 riastrad ldr q16, [x4] /* q16 := unshiftrows_rotword_3 table */
122 1.1 riastrad
123 1.1 riastrad str q1, [x0], #0x10 /* store master key as first round key */
124 1.1 riastrad mov x2, #10 /* round count */
125 1.1 riastrad adrl x3, rcon /* round constant */
126 1.1 riastrad
127 1.1 riastrad 1: /*
128 1.1 riastrad * q0 = 0
129 1.1 riastrad * v1.4s = (prk[0], prk[1], prk[2], prk[3])
130 1.1 riastrad * x0 = pointer to round key to compute
131 1.1 riastrad * x2 = round count
132 1.1 riastrad * x3 = rcon pointer
133 1.1 riastrad */
134 1.1 riastrad
135 1.1 riastrad /* q3 := ShiftRows(SubBytes(q1)) */
136 1.1 riastrad mov v3.16b, v1.16b
137 1.1 riastrad aese v3.16b, v0.16b
138 1.1 riastrad
139 1.1 riastrad /* v3.4s[i] := RotWords(SubBytes(prk[3])) ^ RCON */
140 1.1 riastrad ld1r {v4.4s}, [x3], #4
141 1.4 riastrad tbl v3.16b, {v3.16b}, v16.16b
142 1.1 riastrad eor v3.16b, v3.16b, v4.16b
143 1.1 riastrad
144 1.1 riastrad /*
145 1.1 riastrad * v5.4s := (0,prk[0],prk[1],prk[2])
146 1.1 riastrad * v6.4s := (0,0,prk[0],prk[1])
147 1.1 riastrad * v7.4s := (0,0,0,prk[0])
148 1.1 riastrad */
149 1.1 riastrad ext v5.16b, v0.16b, v1.16b, #12
150 1.1 riastrad ext v6.16b, v0.16b, v1.16b, #8
151 1.1 riastrad ext v7.16b, v0.16b, v1.16b, #4
152 1.1 riastrad
153 1.1 riastrad /* v1.4s := (rk[0], rk[1], rk[2], rk[3]) */
154 1.1 riastrad eor v1.16b, v1.16b, v3.16b
155 1.1 riastrad eor v1.16b, v1.16b, v5.16b
156 1.1 riastrad eor v1.16b, v1.16b, v6.16b
157 1.1 riastrad eor v1.16b, v1.16b, v7.16b
158 1.1 riastrad
159 1.1 riastrad subs x2, x2, #1 /* count down rounds */
160 1.1 riastrad str q1, [x0], #0x10 /* store round key */
161 1.1 riastrad b.ne 1b
162 1.1 riastrad
163 1.1 riastrad ret
164 1.1 riastrad END(aesarmv8_setenckey128)
165 1.1 riastrad
166 1.1 riastrad /*
167 1.1 riastrad * aesarmv8_setenckey192(struct aesenc *enckey@x0, const uint8_t key[24] @x1)
168 1.1 riastrad *
169 1.1 riastrad * Expand a 24-byte AES-192 key into 12 round keys.
170 1.1 riastrad *
171 1.1 riastrad * Standard ABI calling convention.
172 1.1 riastrad */
173 1.1 riastrad ENTRY(aesarmv8_setenckey192)
174 1.1 riastrad ldr q1, [x1], #0x10 /* q1 := master key[0:128) */
175 1.1 riastrad ldr d2, [x1] /* d2 := master key[128:192) */
176 1.1 riastrad
177 1.1 riastrad adrl x4, unshiftrows_rotword_1
178 1.1 riastrad adrl x5, unshiftrows_rotword_3
179 1.1 riastrad eor v0.16b, v0.16b, v0.16b /* q0 := 0 */
180 1.4 riastrad ldr q16, [x4] /* q16 := unshiftrows_rotword_1 */
181 1.4 riastrad ldr q17, [x5] /* q17 := unshiftrows_rotword_3 */
182 1.1 riastrad
183 1.1 riastrad str q1, [x0], #0x10 /* store master key[0:128) as round key */
184 1.1 riastrad mov x2, #12 /* round count */
185 1.1 riastrad adrl x3, rcon /* round constant */
186 1.1 riastrad
187 1.1 riastrad 1: /*
188 1.1 riastrad * q0 = 0
189 1.1 riastrad * v1.4s = (prk[0], prk[1], prk[2], prk[3])
190 1.1 riastrad * v2.4s = (rklo[0], rklo[1], xxx, xxx)
191 1.1 riastrad * x0 = pointer to three round keys to compute
192 1.1 riastrad * x2 = round count
193 1.1 riastrad * x3 = rcon pointer
194 1.1 riastrad */
195 1.1 riastrad
196 1.1 riastrad /* q3 := ShiftRows(SubBytes(q2)) */
197 1.1 riastrad mov v3.16b, v2.16b
198 1.1 riastrad aese v3.16b, v0.16b
199 1.1 riastrad
200 1.1 riastrad /* v3.4s[i] := RotWords(SubBytes(rklo[1])) ^ RCON */
201 1.1 riastrad ld1r {v4.4s}, [x3], #4
202 1.4 riastrad tbl v3.16b, {v3.16b}, v16.16b
203 1.1 riastrad eor v3.16b, v3.16b, v4.16b
204 1.1 riastrad
205 1.1 riastrad /*
206 1.1 riastrad * We need to compute:
207 1.1 riastrad *
208 1.1 riastrad * rk[0] := rklo[0]
209 1.1 riastrad * rk[1] := rklo[1]
210 1.1 riastrad * rk[2] := Rot(Sub(rklo[1])) ^ RCON ^ prk[0]
211 1.1 riastrad * rk[3] := Rot(Sub(rklo[1])) ^ RCON ^ prk[0] ^ prk[1]
212 1.1 riastrad * nrk[0] := Rot(Sub(rklo[1])) ^ RCON ^ prk[0] ^ prk[1] ^ prk[2]
213 1.1 riastrad * nrk[1] := Rot(Sub(rklo[1])) ^ RCON ^ prk[0] ^ ... ^ prk[3]
214 1.1 riastrad * nrk[2] := Rot(Sub(rklo[1])) ^ RCON ^ prk[0] ^ ... ^ prk[3] ^ rklo[0]
215 1.1 riastrad * nrk[3] := Rot(Sub(rklo[1])) ^ RCON ^ prk[0] ^ ... ^ prk[3] ^ rklo[0]
216 1.1 riastrad * ^ rklo[1]
217 1.1 riastrad */
218 1.1 riastrad
219 1.1 riastrad /*
220 1.1 riastrad * v5.4s := (0,prk[0],prk[1],prk[2])
221 1.1 riastrad * v6.4s := (0,0,prk[0],prk[1])
222 1.1 riastrad * v7.4s := (0,0,0,prk[0])
223 1.1 riastrad */
224 1.1 riastrad ext v5.16b, v0.16b, v1.16b, #12
225 1.1 riastrad ext v6.16b, v0.16b, v1.16b, #8
226 1.1 riastrad ext v7.16b, v0.16b, v1.16b, #4
227 1.1 riastrad
228 1.1 riastrad /* v5.4s := (rk[2], rk[3], nrk[0], nrk[1]) */
229 1.1 riastrad eor v5.16b, v5.16b, v1.16b
230 1.1 riastrad eor v5.16b, v5.16b, v3.16b
231 1.1 riastrad eor v5.16b, v5.16b, v6.16b
232 1.1 riastrad eor v5.16b, v5.16b, v7.16b
233 1.1 riastrad
234 1.1 riastrad /*
235 1.1 riastrad * At this point, rk is split across v2.4s = (rk[0],rk[1],...)
236 1.1 riastrad * and v5.4s = (rk[2],rk[3],...); nrk is in v5.4s =
237 1.1 riastrad * (...,nrk[0],nrk[1]); and we have yet to compute nrk[2] or
238 1.1 riastrad * nrk[3], which requires rklo[0] and rklo[1] in v2.4s =
239 1.1 riastrad * (rklo[0],rklo[1],...).
240 1.1 riastrad */
241 1.1 riastrad
242 1.1 riastrad /* v1.4s := (nrk[0], nrk[1], nrk[1], nrk[1]) */
243 1.5 ryo dup v1.4s, v5.s[3]
244 1.5 ryo mov v1.s[0], v5.s[2]
245 1.1 riastrad
246 1.1 riastrad /*
247 1.1 riastrad * v6.4s := (0, 0, rklo[0], rklo[1])
248 1.1 riastrad * v7.4s := (0, 0, 0, rklo[0])
249 1.1 riastrad */
250 1.1 riastrad ext v6.16b, v0.16b, v2.16b, #8
251 1.1 riastrad ext v7.16b, v0.16b, v2.16b, #4
252 1.1 riastrad
253 1.1 riastrad /* v3.4s := (nrk[0], nrk[1], nrk[2], nrk[3]) */
254 1.1 riastrad eor v3.16b, v1.16b, v6.16b
255 1.1 riastrad eor v3.16b, v3.16b, v7.16b
256 1.1 riastrad
257 1.1 riastrad /*
258 1.1 riastrad * Recall v2.4s = (rk[0], rk[1], xxx, xxx)
259 1.1 riastrad * and v5.4s = (rk[2], rk[3], xxx, xxx). Set
260 1.1 riastrad * v2.4s := (rk[0], rk[1], rk[2], rk[3])
261 1.1 riastrad */
262 1.5 ryo mov v2.d[1], v5.d[0]
263 1.1 riastrad
264 1.1 riastrad /* store two round keys */
265 1.1 riastrad stp q2, q3, [x0], #0x20
266 1.1 riastrad
267 1.1 riastrad /*
268 1.1 riastrad * Live vector registers at this point:
269 1.1 riastrad *
270 1.1 riastrad * q0 = zero
271 1.1 riastrad * q2 = rk
272 1.1 riastrad * q3 = nrk
273 1.1 riastrad * v5.4s = (rk[2], rk[3], nrk[0], nrk[1])
274 1.4 riastrad * q16 = unshiftrows_rotword_1
275 1.4 riastrad * q17 = unshiftrows_rotword_3
276 1.1 riastrad *
277 1.1 riastrad * We have to compute, in q1:
278 1.1 riastrad *
279 1.1 riastrad * nnrk[0] := Rot(Sub(nrk[3])) ^ RCON' ^ rk[2]
280 1.1 riastrad * nnrk[1] := Rot(Sub(nrk[3])) ^ RCON' ^ rk[2] ^ rk[3]
281 1.1 riastrad * nnrk[2] := Rot(Sub(nrk[3])) ^ RCON' ^ rk[2] ^ rk[3] ^ nrk[0]
282 1.1 riastrad * nnrk[3] := Rot(Sub(nrk[3])) ^ RCON' ^ rk[2] ^ rk[3] ^ nrk[0]
283 1.1 riastrad * ^ nrk[1]
284 1.1 riastrad *
285 1.1 riastrad * And, if there's any more afterward, in q2:
286 1.1 riastrad *
287 1.1 riastrad * nnnrklo[0] := Rot(Sub(nrk[3])) ^ RCON' ^ rk[2] ^ rk[3] ^ nrk[0]
288 1.1 riastrad * ^ nrk[1] ^ nrk[2]
289 1.1 riastrad * nnnrklo[1] := Rot(Sub(nrk[3])) ^ RCON' ^ rk[2] ^ rk[3] ^ nrk[0]
290 1.1 riastrad * ^ nrk[1] ^ nrk[2] ^ nrk[3]
291 1.1 riastrad */
292 1.1 riastrad
293 1.1 riastrad /* q1 := RotWords(SubBytes(q3)) */
294 1.1 riastrad mov v1.16b, v3.16b
295 1.1 riastrad aese v1.16b, v0.16b
296 1.1 riastrad
297 1.1 riastrad /* v1.4s[i] := RotWords(SubBytes(nrk[3])) ^ RCON' */
298 1.1 riastrad ld1r {v4.4s}, [x3], #4
299 1.4 riastrad tbl v1.16b, {v1.16b}, v17.16b
300 1.1 riastrad eor v1.16b, v1.16b, v4.16b
301 1.1 riastrad
302 1.1 riastrad /*
303 1.1 riastrad * v5.4s := (rk[2], rk[3], nrk[0], nrk[1]) [already]
304 1.1 riastrad * v4.4s := (0, rk[2], rk[3], nrk[0])
305 1.1 riastrad * v6.4s := (0, 0, rk[2], rk[3])
306 1.1 riastrad * v7.4s := (0, 0, 0, rk[2])
307 1.1 riastrad */
308 1.1 riastrad ext v4.16b, v0.16b, v5.16b, #12
309 1.1 riastrad ext v6.16b, v0.16b, v5.16b, #8
310 1.1 riastrad ext v7.16b, v0.16b, v5.16b, #4
311 1.1 riastrad
312 1.1 riastrad /* v1.4s := (nnrk[0], nnrk[1], nnrk[2], nnrk[3]) */
313 1.1 riastrad eor v1.16b, v1.16b, v5.16b
314 1.1 riastrad eor v1.16b, v1.16b, v4.16b
315 1.1 riastrad eor v1.16b, v1.16b, v6.16b
316 1.1 riastrad eor v1.16b, v1.16b, v7.16b
317 1.1 riastrad
318 1.1 riastrad subs x2, x2, #3 /* count down three rounds */
319 1.1 riastrad str q1, [x0], #0x10 /* store third round key */
320 1.1 riastrad b.eq 2f
321 1.1 riastrad
322 1.1 riastrad /*
323 1.1 riastrad * v4.4s := (nrk[2], nrk[3], xxx, xxx)
324 1.1 riastrad * v5.4s := (0, nrk[2], xxx, xxx)
325 1.1 riastrad */
326 1.1 riastrad ext v4.16b, v3.16b, v0.16b, #8
327 1.1 riastrad ext v5.16b, v0.16b, v4.16b, #12
328 1.1 riastrad
329 1.1 riastrad /* v2.4s := (nnrk[3], nnrk[3], xxx, xxx) */
330 1.5 ryo dup v2.4s, v1.s[3]
331 1.1 riastrad
332 1.1 riastrad /*
333 1.1 riastrad * v2.4s := (nnnrklo[0] = nnrk[3] ^ nrk[2],
334 1.1 riastrad * nnnrklo[1] = nnrk[3] ^ nrk[2] ^ nrk[3],
335 1.1 riastrad * xxx, xxx)
336 1.1 riastrad */
337 1.1 riastrad eor v2.16b, v2.16b, v4.16b
338 1.1 riastrad eor v2.16b, v2.16b, v5.16b
339 1.1 riastrad
340 1.1 riastrad b 1b
341 1.1 riastrad
342 1.1 riastrad 2: ret
343 1.1 riastrad END(aesarmv8_setenckey192)
344 1.1 riastrad
345 1.1 riastrad /*
346 1.1 riastrad * aesarmv8_setenckey256(struct aesenc *enckey@x0, const uint8_t key[32] @x1)
347 1.1 riastrad *
348 1.1 riastrad * Expand a 32-byte AES-256 key into 14 round keys.
349 1.1 riastrad *
350 1.1 riastrad * Standard ABI calling convention.
351 1.1 riastrad */
352 1.1 riastrad ENTRY(aesarmv8_setenckey256)
353 1.1 riastrad /* q1 := key[0:128), q2 := key[128:256) */
354 1.1 riastrad ldp q1, q2, [x1], #0x20
355 1.1 riastrad
356 1.1 riastrad adrl x4, unshiftrows_rotword_3
357 1.1 riastrad adrl x5, unshiftrows_3
358 1.1 riastrad eor v0.16b, v0.16b, v0.16b /* q0 := 0 */
359 1.4 riastrad ldr q16, [x4] /* q16 := unshiftrows_rotword_3 */
360 1.4 riastrad ldr q17, [x5] /* q17 := unshiftrows_3 */
361 1.1 riastrad
362 1.1 riastrad /* store master key as first two round keys */
363 1.1 riastrad stp q1, q2, [x0], #0x20
364 1.1 riastrad mov x2, #14 /* round count */
365 1.1 riastrad adrl x3, rcon /* round constant */
366 1.1 riastrad
367 1.1 riastrad 1: /*
368 1.1 riastrad * q0 = 0
369 1.1 riastrad * v1.4s = (pprk[0], pprk[1], pprk[2], pprk[3])
370 1.1 riastrad * v2.4s = (prk[0], prk[1], prk[2], prk[3])
371 1.1 riastrad * x2 = round count
372 1.1 riastrad * x3 = rcon pointer
373 1.1 riastrad */
374 1.1 riastrad
375 1.1 riastrad /* q3 := ShiftRows(SubBytes(q2)) */
376 1.1 riastrad mov v3.16b, v2.16b
377 1.1 riastrad aese v3.16b, v0.16b
378 1.1 riastrad
379 1.1 riastrad /* v3.4s[i] := RotWords(SubBytes(prk[3])) ^ RCON */
380 1.1 riastrad ld1r {v4.4s}, [x3], #4
381 1.4 riastrad tbl v3.16b, {v3.16b}, v16.16b
382 1.1 riastrad eor v3.16b, v3.16b, v4.16b
383 1.1 riastrad
384 1.1 riastrad /*
385 1.1 riastrad * v5.4s := (0,pprk[0],pprk[1],pprk[2])
386 1.1 riastrad * v6.4s := (0,0,pprk[0],pprk[1])
387 1.1 riastrad * v7.4s := (0,0,0,pprk[0])
388 1.1 riastrad */
389 1.1 riastrad ext v5.16b, v0.16b, v1.16b, #12
390 1.1 riastrad ext v6.16b, v0.16b, v1.16b, #8
391 1.1 riastrad ext v7.16b, v0.16b, v1.16b, #4
392 1.1 riastrad
393 1.1 riastrad /* v1.4s := (rk[0], rk[1], rk[2], rk[3]) */
394 1.1 riastrad eor v1.16b, v1.16b, v3.16b
395 1.1 riastrad eor v1.16b, v1.16b, v5.16b
396 1.1 riastrad eor v1.16b, v1.16b, v6.16b
397 1.1 riastrad eor v1.16b, v1.16b, v7.16b
398 1.1 riastrad
399 1.1 riastrad subs x2, x2, #2 /* count down two rounds */
400 1.1 riastrad b.eq 2f /* stop if this is the last one */
401 1.1 riastrad
402 1.1 riastrad /* q3 := ShiftRows(SubBytes(q1)) */
403 1.1 riastrad mov v3.16b, v1.16b
404 1.1 riastrad aese v3.16b, v0.16b
405 1.1 riastrad
406 1.1 riastrad /* v3.4s[i] := SubBytes(rk[3]) */
407 1.4 riastrad tbl v3.16b, {v3.16b}, v17.16b
408 1.1 riastrad
409 1.1 riastrad /*
410 1.1 riastrad * v5.4s := (0,prk[0],prk[1],prk[2])
411 1.1 riastrad * v6.4s := (0,0,prk[0],prk[1])
412 1.1 riastrad * v7.4s := (0,0,0,prk[0])
413 1.1 riastrad */
414 1.1 riastrad ext v5.16b, v0.16b, v2.16b, #12
415 1.1 riastrad ext v6.16b, v0.16b, v2.16b, #8
416 1.1 riastrad ext v7.16b, v0.16b, v2.16b, #4
417 1.1 riastrad
418 1.1 riastrad /* v2.4s := (nrk[0], nrk[1], nrk[2], nrk[3]) */
419 1.1 riastrad eor v2.16b, v2.16b, v3.16b
420 1.1 riastrad eor v2.16b, v2.16b, v5.16b
421 1.1 riastrad eor v2.16b, v2.16b, v6.16b
422 1.1 riastrad eor v2.16b, v2.16b, v7.16b
423 1.1 riastrad
424 1.1 riastrad stp q1, q2, [x0], #0x20 /* store two round keys */
425 1.1 riastrad b 1b
426 1.1 riastrad
427 1.1 riastrad 2: str q1, [x0] /* store last round key */
428 1.1 riastrad ret
429 1.1 riastrad END(aesarmv8_setenckey256)
430 1.1 riastrad
431 1.1 riastrad /*
432 1.1 riastrad * aesarmv8_enctodec(const struct aesenc *enckey@x0, struct aesdec *deckey@x1,
433 1.1 riastrad * uint32_t nrounds@x2)
434 1.1 riastrad *
435 1.1 riastrad * Convert AES encryption round keys to AES decryption round keys.
436 1.1 riastrad * `rounds' must be between 10 and 14.
437 1.1 riastrad *
438 1.1 riastrad * Standard ABI calling convention.
439 1.1 riastrad */
440 1.1 riastrad ENTRY(aesarmv8_enctodec)
441 1.1 riastrad ldr q0, [x0, x2, lsl #4] /* load last round key */
442 1.7 riastrad b 2f
443 1.9 riastrad _ALIGN_TEXT
444 1.7 riastrad 1: aesimc v0.16b, v0.16b /* convert encryption to decryption */
445 1.7 riastrad 2: str q0, [x1], #0x10 /* store round key */
446 1.1 riastrad subs x2, x2, #1 /* count down round */
447 1.1 riastrad ldr q0, [x0, x2, lsl #4] /* load previous round key */
448 1.7 riastrad b.ne 1b /* repeat if there's more */
449 1.7 riastrad str q0, [x1] /* store first round key verbatim */
450 1.1 riastrad ret
451 1.1 riastrad END(aesarmv8_enctodec)
452 1.1 riastrad
453 1.1 riastrad /*
454 1.1 riastrad * aesarmv8_enc(const struct aesenc *enckey@x0, const uint8_t in[16] @x1,
455 1.1 riastrad * uint8_t out[16] @x2, uint32_t nrounds@x3)
456 1.1 riastrad *
457 1.1 riastrad * Encrypt a single block.
458 1.1 riastrad *
459 1.1 riastrad * Standard ABI calling convention.
460 1.1 riastrad */
461 1.1 riastrad ENTRY(aesarmv8_enc)
462 1.1 riastrad stp fp, lr, [sp, #-16]! /* push stack frame */
463 1.1 riastrad mov fp, sp
464 1.4 riastrad ldr q0, [x1] /* q0 := ptxt */
465 1.4 riastrad bl aesarmv8_enc1 /* q0 := ctxt; trash x0/x3/q16 */
466 1.4 riastrad str q0, [x2] /* store ctxt */
467 1.1 riastrad ldp fp, lr, [sp], #16 /* pop stack frame */
468 1.1 riastrad ret
469 1.1 riastrad END(aesarmv8_enc)
470 1.1 riastrad
471 1.1 riastrad /*
472 1.1 riastrad * aesarmv8_dec(const struct aesdec *deckey@x0, const uint8_t in[16] @x1,
473 1.1 riastrad * uint8_t out[16] @x2, uint32_t nrounds@x3)
474 1.1 riastrad *
475 1.1 riastrad * Decrypt a single block.
476 1.1 riastrad *
477 1.1 riastrad * Standard ABI calling convention.
478 1.1 riastrad */
479 1.1 riastrad ENTRY(aesarmv8_dec)
480 1.1 riastrad stp fp, lr, [sp, #-16]! /* push stack frame */
481 1.1 riastrad mov fp, sp
482 1.4 riastrad ldr q0, [x1] /* q0 := ctxt */
483 1.4 riastrad bl aesarmv8_dec1 /* q0 := ptxt; trash x0/x3/q16 */
484 1.4 riastrad str q0, [x2] /* store ptxt */
485 1.1 riastrad ldp fp, lr, [sp], #16 /* pop stack frame */
486 1.1 riastrad ret
487 1.1 riastrad END(aesarmv8_dec)
488 1.1 riastrad
489 1.1 riastrad /*
490 1.1 riastrad * aesarmv8_cbc_enc(const struct aesenc *enckey@x0, const uint8_t *in@x1,
491 1.1 riastrad * uint8_t *out@x2, size_t nbytes@x3, uint8_t iv[16] @x4,
492 1.1 riastrad * uint32_t nrounds@x5)
493 1.1 riastrad *
494 1.1 riastrad * Encrypt a contiguous sequence of blocks with AES-CBC.
495 1.1 riastrad *
496 1.1 riastrad * nbytes must be an integral multiple of 16.
497 1.1 riastrad *
498 1.1 riastrad * Standard ABI calling convention.
499 1.1 riastrad */
500 1.1 riastrad ENTRY(aesarmv8_cbc_enc)
501 1.1 riastrad cbz x3, 2f /* stop if nothing to do */
502 1.1 riastrad stp fp, lr, [sp, #-16]! /* push stack frame */
503 1.1 riastrad mov fp, sp
504 1.1 riastrad mov x9, x0 /* x9 := enckey */
505 1.1 riastrad mov x10, x3 /* x10 := nbytes */
506 1.1 riastrad ldr q0, [x4] /* q0 := chaining value */
507 1.9 riastrad _ALIGN_TEXT
508 1.1 riastrad 1: ldr q1, [x1], #0x10 /* q1 := plaintext block */
509 1.1 riastrad eor v0.16b, v0.16b, v1.16b /* q0 := cv ^ ptxt */
510 1.1 riastrad mov x0, x9 /* x0 := enckey */
511 1.1 riastrad mov x3, x5 /* x3 := nrounds */
512 1.4 riastrad bl aesarmv8_enc1 /* q0 := ctxt; trash x0/x3/q16 */
513 1.1 riastrad subs x10, x10, #0x10 /* count down nbytes */
514 1.1 riastrad str q0, [x2], #0x10 /* store ciphertext block */
515 1.1 riastrad b.ne 1b /* repeat if x10 is nonzero */
516 1.1 riastrad str q0, [x4] /* store chaining value */
517 1.1 riastrad ldp fp, lr, [sp], #16 /* pop stack frame */
518 1.1 riastrad 2: ret
519 1.1 riastrad END(aesarmv8_cbc_enc)
520 1.1 riastrad
521 1.1 riastrad /*
522 1.1 riastrad * aesarmv8_cbc_dec1(const struct aesdec *deckey@x0, const uint8_t *in@x1,
523 1.1 riastrad * uint8_t *out@x2, size_t nbytes@x3, const uint8_t iv[16] @x4,
524 1.1 riastrad * uint32_t nrounds@x5)
525 1.1 riastrad *
526 1.1 riastrad * Decrypt a contiguous sequence of blocks with AES-CBC.
527 1.1 riastrad *
528 1.1 riastrad * nbytes must be a positive integral multiple of 16. This routine
529 1.1 riastrad * is not vectorized; use aesarmv8_cbc_dec8 for >=8 blocks at once.
530 1.1 riastrad *
531 1.1 riastrad * Standard ABI calling convention.
532 1.1 riastrad */
533 1.1 riastrad ENTRY(aesarmv8_cbc_dec1)
534 1.4 riastrad stp fp, lr, [sp, #-16]! /* push stack frame */
535 1.1 riastrad mov fp, sp
536 1.4 riastrad ldr q24, [x4] /* q24 := iv */
537 1.1 riastrad mov x9, x0 /* x9 := enckey */
538 1.1 riastrad mov x10, x3 /* x10 := nbytes */
539 1.1 riastrad add x1, x1, x3 /* x1 := pointer past end of in */
540 1.1 riastrad add x2, x2, x3 /* x2 := pointer past end of out */
541 1.1 riastrad ldr q0, [x1, #-0x10]! /* q0 := last ciphertext block */
542 1.1 riastrad str q0, [x4] /* update iv */
543 1.7 riastrad b 2f
544 1.9 riastrad _ALIGN_TEXT
545 1.7 riastrad 1: ldr q31, [x1, #-0x10]! /* q31 := chaining value */
546 1.7 riastrad eor v0.16b, v0.16b, v31.16b /* q0 := plaintext block */
547 1.7 riastrad str q0, [x2, #-0x10]! /* store plaintext block */
548 1.7 riastrad mov v0.16b, v31.16b /* move cv = ciphertext block */
549 1.7 riastrad 2: mov x0, x9 /* x0 := enckey */
550 1.1 riastrad mov x3, x5 /* x3 := nrounds */
551 1.4 riastrad bl aesarmv8_dec1 /* q0 := cv ^ ptxt; trash x0/x3/q16 */
552 1.1 riastrad subs x10, x10, #0x10 /* count down nbytes */
553 1.7 riastrad b.ne 1b /* repeat if more blocks */
554 1.7 riastrad eor v0.16b, v0.16b, v24.16b /* q0 := first plaintext block */
555 1.1 riastrad str q0, [x2, #-0x10]! /* store first plaintext block */
556 1.4 riastrad ldp fp, lr, [sp], #16 /* pop stack frame */
557 1.1 riastrad ret
558 1.1 riastrad END(aesarmv8_cbc_dec1)
559 1.1 riastrad
560 1.1 riastrad /*
561 1.1 riastrad * aesarmv8_cbc_dec8(const struct aesdec *deckey@x0, const uint8_t *in@x1,
562 1.1 riastrad * uint8_t *out@x2, size_t nbytes@x3, const uint8_t iv[16] @x4,
563 1.1 riastrad * uint32_t nrounds@x5)
564 1.1 riastrad *
565 1.1 riastrad * Decrypt a contiguous sequence of 8-block units with AES-CBC.
566 1.1 riastrad *
567 1.1 riastrad * nbytes must be a positive integral multiple of 128.
568 1.1 riastrad *
569 1.1 riastrad * Standard ABI calling convention.
570 1.1 riastrad */
571 1.1 riastrad ENTRY(aesarmv8_cbc_dec8)
572 1.4 riastrad stp fp, lr, [sp, #-16]! /* push stack frame */
573 1.1 riastrad mov fp, sp
574 1.4 riastrad ldr q24, [x4] /* q24 := iv */
575 1.1 riastrad mov x9, x0 /* x9 := enckey */
576 1.1 riastrad mov x10, x3 /* x10 := nbytes */
577 1.1 riastrad add x1, x1, x3 /* x1 := pointer past end of in */
578 1.1 riastrad add x2, x2, x3 /* x2 := pointer past end of out */
579 1.1 riastrad ldp q6, q7, [x1, #-0x20]! /* q6, q7 := last ciphertext blocks */
580 1.1 riastrad str q7, [x4] /* update iv */
581 1.7 riastrad b 2f
582 1.9 riastrad _ALIGN_TEXT
583 1.7 riastrad 1: ldp q6, q7, [x1, #-0x20]!
584 1.7 riastrad eor v0.16b, v0.16b, v7.16b /* q0 := pt0 */
585 1.7 riastrad stp q0, q1, [x2, #-0x20]!
586 1.7 riastrad 2: ldp q4, q5, [x1, #-0x20]!
587 1.1 riastrad ldp q2, q3, [x1, #-0x20]!
588 1.1 riastrad ldp q0, q1, [x1, #-0x20]!
589 1.4 riastrad mov v31.16b, v6.16b /* q[24+i] := cv[i], 0<i<8 */
590 1.4 riastrad mov v30.16b, v5.16b
591 1.4 riastrad mov v29.16b, v4.16b
592 1.4 riastrad mov v28.16b, v3.16b
593 1.4 riastrad mov v27.16b, v2.16b
594 1.4 riastrad mov v26.16b, v1.16b
595 1.4 riastrad mov v25.16b, v0.16b
596 1.1 riastrad mov x0, x9 /* x0 := enckey */
597 1.1 riastrad mov x3, x5 /* x3 := nrounds */
598 1.4 riastrad bl aesarmv8_dec8 /* q[i] := cv[i] ^ pt[i];
599 1.4 riastrad * trash x0/x3/q16 */
600 1.4 riastrad eor v7.16b, v7.16b, v31.16b /* q[i] := pt[i] */
601 1.4 riastrad eor v6.16b, v6.16b, v30.16b
602 1.4 riastrad eor v5.16b, v5.16b, v29.16b
603 1.4 riastrad eor v4.16b, v4.16b, v28.16b
604 1.4 riastrad eor v3.16b, v3.16b, v27.16b
605 1.4 riastrad eor v2.16b, v2.16b, v26.16b
606 1.4 riastrad eor v1.16b, v1.16b, v25.16b
607 1.1 riastrad subs x10, x10, #0x80 /* count down nbytes */
608 1.1 riastrad stp q6, q7, [x2, #-0x20]! /* store plaintext blocks */
609 1.1 riastrad stp q4, q5, [x2, #-0x20]!
610 1.1 riastrad stp q2, q3, [x2, #-0x20]!
611 1.7 riastrad b.ne 1b /* repeat if there's more */
612 1.7 riastrad eor v0.16b, v0.16b, v24.16b /* q0 := pt0 */
613 1.1 riastrad stp q0, q1, [x2, #-0x20]! /* store first two plaintext blocks */
614 1.4 riastrad ldp fp, lr, [sp], #16 /* pop stack frame */
615 1.1 riastrad ret
616 1.1 riastrad END(aesarmv8_cbc_dec8)
617 1.1 riastrad
618 1.1 riastrad /*
619 1.1 riastrad * aesarmv8_xts_enc1(const struct aesenc *enckey@x0, const uint8_t *in@x1,
620 1.1 riastrad * uint8_t *out@x2, size_t nbytes@x3, uint8_t tweak[16] @x4,
621 1.1 riastrad * uint32_t nrounds@x5)
622 1.1 riastrad *
623 1.1 riastrad * Encrypt a contiguous sequence of blocks with AES-XTS.
624 1.1 riastrad *
625 1.1 riastrad * nbytes must be a positive integral multiple of 16. This routine
626 1.1 riastrad * is not vectorized; use aesarmv8_xts_enc8 for >=8 blocks at once.
627 1.1 riastrad *
628 1.1 riastrad * Standard ABI calling convention.
629 1.1 riastrad */
630 1.1 riastrad ENTRY(aesarmv8_xts_enc1)
631 1.1 riastrad stp fp, lr, [sp, #-16]! /* push stack frame */
632 1.1 riastrad mov fp, sp
633 1.1 riastrad mov x9, x0 /* x9 := enckey */
634 1.1 riastrad mov x10, x3 /* x10 := nbytes */
635 1.4 riastrad ldr q31, [x4] /* q31 := tweak */
636 1.9 riastrad _ALIGN_TEXT
637 1.1 riastrad 1: ldr q0, [x1], #0x10 /* q0 := ptxt */
638 1.1 riastrad mov x0, x9 /* x0 := enckey */
639 1.1 riastrad mov x3, x5 /* x3 := nrounds */
640 1.4 riastrad eor v0.16b, v0.16b, v31.16b /* q0 := ptxt ^ tweak */
641 1.4 riastrad bl aesarmv8_enc1 /* q0 := AES(...); trash x0/x3/q16 */
642 1.4 riastrad eor v0.16b, v0.16b, v31.16b /* q0 := AES(ptxt ^ tweak) ^ tweak */
643 1.1 riastrad str q0, [x2], #0x10 /* store ciphertext block */
644 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
645 1.1 riastrad subs x10, x10, #0x10 /* count down nbytes */
646 1.1 riastrad b.ne 1b /* repeat if more blocks */
647 1.4 riastrad str q31, [x4] /* update tweak */
648 1.1 riastrad ldp fp, lr, [sp], #16 /* pop stack frame */
649 1.1 riastrad ret
650 1.1 riastrad END(aesarmv8_xts_enc1)
651 1.1 riastrad
652 1.1 riastrad /*
653 1.1 riastrad * aesarmv8_xts_enc8(const struct aesenc *enckey@x0, const uint8_t *in@x1,
654 1.1 riastrad * uint8_t *out@x2, size_t nbytes@x3, uint8_t tweak[16] @x4,
655 1.1 riastrad * uint32_t nrounds@x5)
656 1.1 riastrad *
657 1.1 riastrad * Encrypt a contiguous sequence of blocks with AES-XTS.
658 1.1 riastrad *
659 1.1 riastrad * nbytes must be a positive integral multiple of 128.
660 1.1 riastrad *
661 1.1 riastrad * Standard ABI calling convention.
662 1.1 riastrad */
663 1.1 riastrad ENTRY(aesarmv8_xts_enc8)
664 1.4 riastrad stp fp, lr, [sp, #-16]! /* push stack frame */
665 1.1 riastrad mov fp, sp
666 1.1 riastrad mov x9, x0 /* x9 := enckey */
667 1.1 riastrad mov x10, x3 /* x10 := nbytes */
668 1.4 riastrad ldr q31, [x4] /* q31 := tweak */
669 1.9 riastrad _ALIGN_TEXT
670 1.4 riastrad 1: mov v24.16b, v31.16b /* q24 := tweak[0] */
671 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
672 1.4 riastrad mov v25.16b, v31.16b /* q25 := tweak[1] */
673 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
674 1.4 riastrad mov v26.16b, v31.16b /* q26 := tweak[2] */
675 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
676 1.4 riastrad mov v27.16b, v31.16b /* q27 := tweak[3] */
677 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
678 1.4 riastrad mov v28.16b, v31.16b /* q28 := tweak[4] */
679 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
680 1.4 riastrad mov v29.16b, v31.16b /* q29 := tweak[5] */
681 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
682 1.4 riastrad mov v30.16b, v31.16b /* q30 := tweak[6] */
683 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
684 1.4 riastrad /* q31 := tweak[7] */
685 1.4 riastrad ldp q0, q1, [x1], #0x20 /* q[i] := ptxt[i] */
686 1.1 riastrad ldp q2, q3, [x1], #0x20
687 1.1 riastrad ldp q4, q5, [x1], #0x20
688 1.1 riastrad ldp q6, q7, [x1], #0x20
689 1.4 riastrad eor v0.16b, v0.16b, v24.16b /* q[i] := ptxt[i] ^ tweak[i] */
690 1.4 riastrad eor v1.16b, v1.16b, v25.16b
691 1.4 riastrad eor v2.16b, v2.16b, v26.16b
692 1.4 riastrad eor v3.16b, v3.16b, v27.16b
693 1.4 riastrad eor v4.16b, v4.16b, v28.16b
694 1.4 riastrad eor v5.16b, v5.16b, v29.16b
695 1.4 riastrad eor v6.16b, v6.16b, v30.16b
696 1.4 riastrad eor v7.16b, v7.16b, v31.16b
697 1.1 riastrad mov x0, x9 /* x0 := enckey */
698 1.1 riastrad mov x3, x5 /* x3 := nrounds */
699 1.4 riastrad bl aesarmv8_enc8 /* encrypt q0-q7; trash x0/x3/q16 */
700 1.4 riastrad eor v0.16b, v0.16b, v24.16b /* q[i] := AES(...) ^ tweak[i] */
701 1.4 riastrad eor v1.16b, v1.16b, v25.16b
702 1.4 riastrad eor v2.16b, v2.16b, v26.16b
703 1.4 riastrad eor v3.16b, v3.16b, v27.16b
704 1.4 riastrad eor v4.16b, v4.16b, v28.16b
705 1.4 riastrad eor v5.16b, v5.16b, v29.16b
706 1.4 riastrad eor v6.16b, v6.16b, v30.16b
707 1.4 riastrad eor v7.16b, v7.16b, v31.16b
708 1.1 riastrad stp q0, q1, [x2], #0x20 /* store ciphertext blocks */
709 1.4 riastrad stp q2, q3, [x2], #0x20
710 1.4 riastrad stp q4, q5, [x2], #0x20
711 1.4 riastrad stp q6, q7, [x2], #0x20
712 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
713 1.1 riastrad subs x10, x10, #0x80 /* count down nbytes */
714 1.1 riastrad b.ne 1b /* repeat if more block groups */
715 1.4 riastrad str q31, [x4] /* update tweak */
716 1.4 riastrad ldp fp, lr, [sp], #16 /* pop stack frame */
717 1.1 riastrad ret
718 1.1 riastrad END(aesarmv8_xts_enc8)
719 1.1 riastrad
720 1.1 riastrad /*
721 1.1 riastrad * aesarmv8_xts_dec1(const struct aesdec *deckey@x0, const uint8_t *in@x1,
722 1.1 riastrad * uint8_t *out@x2, size_t nbytes@x3, uint8_t tweak[16] @x4,
723 1.1 riastrad * uint32_t nrounds@x5)
724 1.1 riastrad *
725 1.4 riastrad * Decrypt a contiguous sequdece of blocks with AES-XTS.
726 1.1 riastrad *
727 1.1 riastrad * nbytes must be a positive integral multiple of 16. This routine
728 1.1 riastrad * is not vectorized; use aesarmv8_xts_dec8 for >=8 blocks at once.
729 1.1 riastrad *
730 1.1 riastrad * Standard ABI calling convention.
731 1.1 riastrad */
732 1.1 riastrad ENTRY(aesarmv8_xts_dec1)
733 1.1 riastrad stp fp, lr, [sp, #-16]! /* push stack frame */
734 1.1 riastrad mov fp, sp
735 1.1 riastrad mov x9, x0 /* x9 := deckey */
736 1.1 riastrad mov x10, x3 /* x10 := nbytes */
737 1.4 riastrad ldr q31, [x4] /* q31 := tweak */
738 1.9 riastrad _ALIGN_TEXT
739 1.4 riastrad 1: ldr q0, [x1], #0x10 /* q0 := ctxt */
740 1.1 riastrad mov x0, x9 /* x0 := deckey */
741 1.1 riastrad mov x3, x5 /* x3 := nrounds */
742 1.4 riastrad eor v0.16b, v0.16b, v31.16b /* q0 := ctxt ^ tweak */
743 1.4 riastrad bl aesarmv8_dec1 /* q0 := AES(...); trash x0/x3/q16 */
744 1.4 riastrad eor v0.16b, v0.16b, v31.16b /* q0 := AES(ctxt ^ tweak) ^ tweak */
745 1.4 riastrad str q0, [x2], #0x10 /* store plaintext block */
746 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
747 1.1 riastrad subs x10, x10, #0x10 /* count down nbytes */
748 1.1 riastrad b.ne 1b /* repeat if more blocks */
749 1.4 riastrad str q31, [x4] /* update tweak */
750 1.1 riastrad ldp fp, lr, [sp], #16 /* pop stack frame */
751 1.1 riastrad ret
752 1.1 riastrad END(aesarmv8_xts_dec1)
753 1.1 riastrad
754 1.1 riastrad /*
755 1.1 riastrad * aesarmv8_xts_dec8(const struct aesdec *deckey@x0, const uint8_t *in@x1,
756 1.1 riastrad * uint8_t *out@x2, size_t nbytes@x3, uint8_t tweak[16] @x4,
757 1.1 riastrad * uint32_t nrounds@x5)
758 1.1 riastrad *
759 1.4 riastrad * Decrypt a contiguous sequdece of blocks with AES-XTS.
760 1.1 riastrad *
761 1.1 riastrad * nbytes must be a positive integral multiple of 128.
762 1.1 riastrad *
763 1.1 riastrad * Standard ABI calling convention.
764 1.1 riastrad */
765 1.1 riastrad ENTRY(aesarmv8_xts_dec8)
766 1.4 riastrad stp fp, lr, [sp, #-16]! /* push stack frame */
767 1.1 riastrad mov fp, sp
768 1.1 riastrad mov x9, x0 /* x9 := deckey */
769 1.1 riastrad mov x10, x3 /* x10 := nbytes */
770 1.4 riastrad ldr q31, [x4] /* q31 := tweak */
771 1.9 riastrad _ALIGN_TEXT
772 1.4 riastrad 1: mov v24.16b, v31.16b /* q24 := tweak[0] */
773 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
774 1.4 riastrad mov v25.16b, v31.16b /* q25 := tweak[1] */
775 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
776 1.4 riastrad mov v26.16b, v31.16b /* q26 := tweak[2] */
777 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
778 1.4 riastrad mov v27.16b, v31.16b /* q27 := tweak[3] */
779 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
780 1.4 riastrad mov v28.16b, v31.16b /* q28 := tweak[4] */
781 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
782 1.4 riastrad mov v29.16b, v31.16b /* q29 := tweak[5] */
783 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
784 1.4 riastrad mov v30.16b, v31.16b /* q30 := tweak[6] */
785 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
786 1.4 riastrad /* q31 := tweak[7] */
787 1.4 riastrad ldp q0, q1, [x1], #0x20 /* q[i] := ctxt[i] */
788 1.1 riastrad ldp q2, q3, [x1], #0x20
789 1.1 riastrad ldp q4, q5, [x1], #0x20
790 1.1 riastrad ldp q6, q7, [x1], #0x20
791 1.4 riastrad eor v0.16b, v0.16b, v24.16b /* q[i] := ctxt[i] ^ tweak[i] */
792 1.4 riastrad eor v1.16b, v1.16b, v25.16b
793 1.4 riastrad eor v2.16b, v2.16b, v26.16b
794 1.4 riastrad eor v3.16b, v3.16b, v27.16b
795 1.4 riastrad eor v4.16b, v4.16b, v28.16b
796 1.4 riastrad eor v5.16b, v5.16b, v29.16b
797 1.4 riastrad eor v6.16b, v6.16b, v30.16b
798 1.4 riastrad eor v7.16b, v7.16b, v31.16b
799 1.1 riastrad mov x0, x9 /* x0 := deckey */
800 1.1 riastrad mov x3, x5 /* x3 := nrounds */
801 1.4 riastrad bl aesarmv8_dec8 /* decrypt q0-q7; trash x0/x3/q16 */
802 1.4 riastrad eor v0.16b, v0.16b, v24.16b /* q[i] := AES(...) ^ tweak[i] */
803 1.4 riastrad eor v1.16b, v1.16b, v25.16b
804 1.4 riastrad eor v2.16b, v2.16b, v26.16b
805 1.4 riastrad eor v3.16b, v3.16b, v27.16b
806 1.4 riastrad eor v4.16b, v4.16b, v28.16b
807 1.4 riastrad eor v5.16b, v5.16b, v29.16b
808 1.4 riastrad eor v6.16b, v6.16b, v30.16b
809 1.4 riastrad eor v7.16b, v7.16b, v31.16b
810 1.4 riastrad stp q0, q1, [x2], #0x20 /* store plaintext blocks */
811 1.4 riastrad stp q2, q3, [x2], #0x20
812 1.4 riastrad stp q4, q5, [x2], #0x20
813 1.4 riastrad stp q6, q7, [x2], #0x20
814 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
815 1.1 riastrad subs x10, x10, #0x80 /* count down nbytes */
816 1.1 riastrad b.ne 1b /* repeat if more block groups */
817 1.4 riastrad str q31, [x4] /* update tweak */
818 1.4 riastrad ldp fp, lr, [sp], #16 /* pop stack frame */
819 1.1 riastrad ret
820 1.1 riastrad END(aesarmv8_xts_dec8)
821 1.1 riastrad
822 1.1 riastrad /*
823 1.4 riastrad * aesarmv8_xts_mulx(tweak@q31)
824 1.1 riastrad *
825 1.4 riastrad * Multiply q31 by x, modulo x^128 + x^7 + x^2 + x + 1, in place.
826 1.1 riastrad * Uses x0 and q0/q1 as temporaries.
827 1.1 riastrad */
828 1.1 riastrad .text
829 1.1 riastrad _ALIGN_TEXT
830 1.1 riastrad .type aesarmv8_xts_mulx,@function
831 1.1 riastrad aesarmv8_xts_mulx:
832 1.1 riastrad /*
833 1.1 riastrad * Simultaneously determine
834 1.1 riastrad * (a) whether the high bit of the low half must be
835 1.1 riastrad * shifted into the low bit of the high half, and
836 1.1 riastrad * (b) whether the high bit of the high half must be
837 1.1 riastrad * carried into x^128 = x^7 + x^2 + x + 1.
838 1.1 riastrad */
839 1.1 riastrad adrl x0, xtscarry
840 1.6 riastrad cmlt v1.2d, v31.2d, #0 /* v1.2d[i] := -1 if v31.2d[i] < 0, else 0 */
841 1.1 riastrad ldr q0, [x0] /* q0 := xtscarry */
842 1.1 riastrad ext v1.16b, v1.16b, v1.16b, #8 /* swap halves of q1 */
843 1.4 riastrad shl v31.2d, v31.2d, #1 /* shift */
844 1.1 riastrad and v0.16b, v0.16b, v1.16b /* copy xtscarry according to mask */
845 1.4 riastrad eor v31.16b, v31.16b, v0.16b /* incorporate (a) and (b) */
846 1.1 riastrad ret
847 1.1 riastrad END(aesarmv8_xts_mulx)
848 1.1 riastrad
849 1.1 riastrad .section .rodata
850 1.2 riastrad .p2align 4
851 1.1 riastrad .type xtscarry,@object
852 1.1 riastrad xtscarry:
853 1.1 riastrad .byte 0x87,0,0,0, 0,0,0,0, 1,0,0,0, 0,0,0,0
854 1.1 riastrad END(xtscarry)
855 1.1 riastrad
856 1.1 riastrad /*
857 1.1 riastrad * aesarmv8_xts_update(const uint8_t in[16] @x0, uint8_t out[16] @x1)
858 1.1 riastrad *
859 1.1 riastrad * Update an AES-XTS tweak.
860 1.1 riastrad *
861 1.1 riastrad * Standard ABI calling convention.
862 1.1 riastrad */
863 1.1 riastrad ENTRY(aesarmv8_xts_update)
864 1.1 riastrad stp fp, lr, [sp, #-16]! /* push stack frame */
865 1.1 riastrad mov fp, sp
866 1.4 riastrad ldr q31, [x0] /* load tweak */
867 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
868 1.4 riastrad str q31, [x1] /* store tweak */
869 1.1 riastrad ldp fp, lr, [sp], #16 /* pop stack frame */
870 1.1 riastrad ret
871 1.1 riastrad END(aesarmv8_xts_update)
872 1.1 riastrad
873 1.1 riastrad /*
874 1.8 riastrad * aesarmv8_cbcmac_update1(const struct aesenc *enckey@x0,
875 1.8 riastrad * const uint8_t *in@x1, size_t nbytes@x2, uint8_t auth[16] @x3,
876 1.8 riastrad * uint32_t nrounds@x4)
877 1.8 riastrad *
878 1.8 riastrad * Update CBC-MAC.
879 1.8 riastrad *
880 1.8 riastrad * nbytes must be a positive integral multiple of 16.
881 1.8 riastrad *
882 1.8 riastrad * Standard ABI calling convention.
883 1.8 riastrad */
884 1.8 riastrad ENTRY(aesarmv8_cbcmac_update1)
885 1.8 riastrad stp fp, lr, [sp, #-16]! /* push stack frame */
886 1.8 riastrad mov fp, sp
887 1.8 riastrad ldr q0, [x3] /* q0 := initial authenticator */
888 1.8 riastrad mov x9, x0 /* x9 := enckey */
889 1.8 riastrad mov x5, x3 /* x5 := &auth (enc1 trashes x3) */
890 1.9 riastrad _ALIGN_TEXT
891 1.8 riastrad 1: ldr q1, [x1], #0x10 /* q1 := plaintext block */
892 1.8 riastrad mov x0, x9 /* x0 := enckey */
893 1.8 riastrad mov x3, x4 /* x3 := nrounds */
894 1.8 riastrad eor v0.16b, v0.16b, v1.16b /* q0 := auth ^ ptxt */
895 1.8 riastrad bl aesarmv8_enc1 /* q0 := auth'; trash x0/x3/q16 */
896 1.8 riastrad subs x2, x2, #0x10 /* count down nbytes */
897 1.8 riastrad b.ne 1b /* repeat if x10 is nonzero */
898 1.8 riastrad str q0, [x5] /* store updated authenticator */
899 1.8 riastrad ldp fp, lr, [sp], #16 /* pop stack frame */
900 1.8 riastrad ret
901 1.8 riastrad END(aesarmv8_cbcmac_update1)
902 1.8 riastrad
903 1.8 riastrad /*
904 1.8 riastrad * aesarmv8_ccm_enc1(const struct aesenc *enckey@x0, const uint8_t *in@x1,
905 1.8 riastrad * uint8_t *out@x2, size_t nbytes@x3, uint8_t authctr[32] @x4,
906 1.8 riastrad * uint32_t nrounds@x5)
907 1.8 riastrad *
908 1.8 riastrad * Update CCM encryption.
909 1.8 riastrad *
910 1.8 riastrad * nbytes must be a positive integral multiple of 16.
911 1.8 riastrad *
912 1.8 riastrad * Standard ABI calling convention.
913 1.8 riastrad */
914 1.8 riastrad ENTRY(aesarmv8_ccm_enc1)
915 1.8 riastrad stp fp, lr, [sp, #-16]! /* push stack frame */
916 1.8 riastrad mov fp, sp
917 1.8 riastrad ldp q0, q2, [x4] /* q0 := auth, q2 := ctr (be) */
918 1.8 riastrad adrl x11, ctr32_inc /* x11 := &ctr32_inc */
919 1.8 riastrad ld1 {v5.4s}, [x11] /* q5 := (0,0,0,1) (host-endian) */
920 1.8 riastrad mov x9, x0 /* x9 := enckey */
921 1.8 riastrad mov x10, x3 /* x10 := nbytes */
922 1.8 riastrad #if _BYTE_ORDER == _LITTLE_ENDIAN
923 1.8 riastrad rev32 v2.16b, v2.16b /* q2 := ctr (host-endian) */
924 1.8 riastrad #endif
925 1.9 riastrad _ALIGN_TEXT
926 1.8 riastrad 1: ldr q3, [x1], #0x10 /* q3 := plaintext block */
927 1.8 riastrad add v2.4s, v2.4s, v5.4s /* increment ctr (32-bit) */
928 1.8 riastrad mov x0, x9 /* x0 := enckey */
929 1.8 riastrad mov x3, x5 /* x3 := nrounds */
930 1.8 riastrad #if _BYTE_ORDER == _LITTLE_ENDIAN
931 1.8 riastrad rev32 v1.16b, v2.16b /* q1 := ctr (big-endian) */
932 1.8 riastrad #else
933 1.8 riastrad mov v1.16b, v2.16b /* q1 := ctr (big-endian) */
934 1.8 riastrad #endif
935 1.8 riastrad eor v0.16b, v0.16b, v3.16b /* q0 := auth ^ ptxt */
936 1.8 riastrad bl aesarmv8_enc2 /* q0 := auth', q1 := pad;
937 1.8 riastrad * trash x0/x3/q16 */
938 1.8 riastrad eor v3.16b, v1.16b, v3.16b /* q3 := ciphertext block */
939 1.8 riastrad subs x10, x10, #0x10 /* count down bytes */
940 1.8 riastrad str q3, [x2], #0x10 /* store ciphertext block */
941 1.8 riastrad b.ne 1b /* repeat if more blocks */
942 1.8 riastrad #if _BYTE_ORDER == _LITTLE_ENDIAN
943 1.8 riastrad rev32 v2.16b, v2.16b /* q2 := ctr (big-endian) */
944 1.8 riastrad #endif
945 1.8 riastrad stp q0, q2, [x4] /* store updated auth/ctr */
946 1.8 riastrad ldp fp, lr, [sp], #16 /* pop stack frame */
947 1.8 riastrad ret
948 1.8 riastrad END(aesarmv8_ccm_enc1)
949 1.8 riastrad
950 1.8 riastrad /*
951 1.8 riastrad * aesarmv8_ccm_dec1(const struct aesenc *enckey@x0, const uint8_t *in@x1,
952 1.8 riastrad * uint8_t *out@x2, size_t nbytes@x3, uint8_t authctr[32] @x4,
953 1.8 riastrad * uint32_t nrounds@x5)
954 1.8 riastrad *
955 1.8 riastrad * Update CCM decryption.
956 1.8 riastrad *
957 1.8 riastrad * nbytes must be a positive integral multiple of 16.
958 1.8 riastrad *
959 1.8 riastrad * Standard ABI calling convention.
960 1.8 riastrad */
961 1.8 riastrad ENTRY(aesarmv8_ccm_dec1)
962 1.8 riastrad stp fp, lr, [sp, #-16]! /* push stack frame */
963 1.8 riastrad mov fp, sp
964 1.8 riastrad ldp q1, q2, [x4] /* q1 := auth, q2 := ctr (be) */
965 1.8 riastrad adrl x11, ctr32_inc /* x11 := &ctr32_inc */
966 1.8 riastrad ld1 {v5.4s}, [x11] /* q5 := (0,0,0,1) (host-endian) */
967 1.8 riastrad mov x9, x0 /* x9 := enckey */
968 1.8 riastrad mov x10, x3 /* x10 := nbytes */
969 1.8 riastrad #if _BYTE_ORDER == _LITTLE_ENDIAN
970 1.8 riastrad rev32 v2.16b, v2.16b /* q2 := ctr (host-endian) */
971 1.8 riastrad #endif
972 1.8 riastrad
973 1.8 riastrad /* Decrypt the first block. */
974 1.8 riastrad add v2.4s, v2.4s, v5.4s /* increment ctr (32-bit) */
975 1.8 riastrad mov x3, x5 /* x3 := nrounds */
976 1.8 riastrad #if _BYTE_ORDER == _LITTLE_ENDIAN
977 1.8 riastrad rev32 v0.16b, v2.16b /* q0 := ctr (big-endian) */
978 1.8 riastrad #else
979 1.8 riastrad mov v0.16b, v2.16b /* q0 := ctr (big-endian) */
980 1.8 riastrad #endif
981 1.8 riastrad ldr q3, [x1], #0x10 /* q3 := ctxt */
982 1.8 riastrad bl aesarmv8_enc1 /* q0 := pad; trash x0/x3/q16 */
983 1.8 riastrad b 2f
984 1.8 riastrad
985 1.9 riastrad _ALIGN_TEXT
986 1.8 riastrad 1: /*
987 1.8 riastrad * Authenticate the last block and decrypt the next block
988 1.8 riastrad * simultaneously.
989 1.8 riastrad *
990 1.8 riastrad * q1 = auth ^ ptxt[-1]
991 1.8 riastrad * q2 = ctr[-1] (le)
992 1.8 riastrad */
993 1.8 riastrad add v2.4s, v2.4s, v5.4s /* increment ctr (32-bit) */
994 1.8 riastrad mov x0, x9 /* x0 := enckey */
995 1.8 riastrad mov x3, x5 /* x3 := nrounds */
996 1.8 riastrad #if _BYTE_ORDER == _LITTLE_ENDIAN
997 1.8 riastrad rev32 v0.16b, v2.16b /* q0 := ctr (big-endian) */
998 1.8 riastrad #else
999 1.8 riastrad mov v0.16b, v2.16b /* q0 := ctr (big-endian) */
1000 1.8 riastrad #endif
1001 1.8 riastrad ldr q3, [x1], #0x10 /* q3 := ctxt */
1002 1.8 riastrad bl aesarmv8_enc2 /* q0 := pad, q1 := auth';
1003 1.8 riastrad * trash x0/x3/q16 */
1004 1.8 riastrad 2: eor v3.16b, v0.16b, v3.16b /* q3 := plaintext block */
1005 1.8 riastrad subs x10, x10, #0x10
1006 1.8 riastrad str q3, [x2], #0x10 /* store plaintext */
1007 1.8 riastrad eor v1.16b, v1.16b, v3.16b /* q1 := auth ^ ptxt */
1008 1.8 riastrad b.ne 1b
1009 1.8 riastrad
1010 1.8 riastrad #if _BYTE_ORDER == _LITTLE_ENDIAN
1011 1.8 riastrad rev32 v2.16b, v2.16b /* q2 := ctr (big-endian) */
1012 1.8 riastrad #endif
1013 1.8 riastrad
1014 1.8 riastrad /* Authenticate the last block. */
1015 1.8 riastrad mov x0, x9 /* x0 := enckey */
1016 1.8 riastrad mov x3, x5 /* x3 := nrounds */
1017 1.8 riastrad mov v0.16b, v1.16b /* q0 := auth ^ ptxt */
1018 1.8 riastrad bl aesarmv8_enc1 /* q0 := auth'; trash x0/x3/q16 */
1019 1.8 riastrad stp q0, q2, [x4] /* store updated auth/ctr */
1020 1.8 riastrad ldp fp, lr, [sp], #16 /* pop stack frame */
1021 1.8 riastrad ret
1022 1.8 riastrad END(aesarmv8_ccm_dec1)
1023 1.8 riastrad
1024 1.8 riastrad .section .rodata
1025 1.8 riastrad .p2align 4
1026 1.8 riastrad .type ctr32_inc,@object
1027 1.8 riastrad ctr32_inc:
1028 1.8 riastrad .int 0, 0, 0, 1
1029 1.8 riastrad END(ctr32_inc)
1030 1.8 riastrad
1031 1.8 riastrad /*
1032 1.1 riastrad * aesarmv8_enc1(const struct aesenc *enckey@x0,
1033 1.1 riastrad * uint128_t block@q0, uint32_t nrounds@x3)
1034 1.1 riastrad *
1035 1.1 riastrad * Encrypt a single AES block in q0.
1036 1.1 riastrad *
1037 1.4 riastrad * Internal ABI. Uses q16 as temporary. Destroys x0 and x3.
1038 1.1 riastrad */
1039 1.1 riastrad .text
1040 1.1 riastrad _ALIGN_TEXT
1041 1.1 riastrad .type aesarmv8_enc1,@function
1042 1.1 riastrad aesarmv8_enc1:
1043 1.4 riastrad ldr q16, [x0], #0x10 /* load round key */
1044 1.7 riastrad b 2f
1045 1.9 riastrad _ALIGN_TEXT
1046 1.7 riastrad 1: /* q0 := MixColumns(q0) */
1047 1.7 riastrad aesmc v0.16b, v0.16b
1048 1.7 riastrad 2: subs x3, x3, #1
1049 1.4 riastrad /* q0 := ShiftRows(SubBytes(AddRoundKey_q16(q0))) */
1050 1.4 riastrad aese v0.16b, v16.16b
1051 1.4 riastrad ldr q16, [x0], #0x10 /* load next round key */
1052 1.7 riastrad b.ne 1b
1053 1.7 riastrad eor v0.16b, v0.16b, v16.16b
1054 1.1 riastrad ret
1055 1.1 riastrad END(aesarmv8_enc1)
1056 1.1 riastrad
1057 1.1 riastrad /*
1058 1.8 riastrad * aesarmv8_enc2(const struct aesenc *enckey@x0,
1059 1.8 riastrad * uint128_t block@q0, uint128_t block@q1, uint32_t nrounds@x3)
1060 1.8 riastrad *
1061 1.8 riastrad * Encrypt two AES blocks in q0 and q1.
1062 1.8 riastrad *
1063 1.8 riastrad * Internal ABI. Uses q16 as temporary. Destroys x0 and x3.
1064 1.8 riastrad */
1065 1.8 riastrad .text
1066 1.8 riastrad _ALIGN_TEXT
1067 1.8 riastrad .type aesarmv8_enc2,@function
1068 1.8 riastrad aesarmv8_enc2:
1069 1.8 riastrad ldr q16, [x0], #0x10 /* load round key */
1070 1.8 riastrad b 2f
1071 1.9 riastrad _ALIGN_TEXT
1072 1.8 riastrad 1: /* q[i] := MixColumns(q[i]) */
1073 1.8 riastrad aesmc v0.16b, v0.16b
1074 1.8 riastrad aesmc v1.16b, v1.16b
1075 1.8 riastrad 2: subs x3, x3, #1
1076 1.8 riastrad /* q[i] := ShiftRows(SubBytes(AddRoundKey_q16(q[i]))) */
1077 1.8 riastrad aese v0.16b, v16.16b
1078 1.8 riastrad aese v1.16b, v16.16b
1079 1.8 riastrad ldr q16, [x0], #0x10 /* load next round key */
1080 1.8 riastrad b.ne 1b
1081 1.8 riastrad eor v0.16b, v0.16b, v16.16b
1082 1.8 riastrad eor v1.16b, v1.16b, v16.16b
1083 1.8 riastrad ret
1084 1.8 riastrad END(aesarmv8_enc2)
1085 1.8 riastrad
1086 1.8 riastrad /*
1087 1.1 riastrad * aesarmv8_enc8(const struct aesenc *enckey@x0,
1088 1.1 riastrad * uint128_t block0@q0, ..., uint128_t block7@q7,
1089 1.1 riastrad * uint32_t nrounds@x3)
1090 1.1 riastrad *
1091 1.1 riastrad * Encrypt eight AES blocks in q0 through q7 in parallel.
1092 1.1 riastrad *
1093 1.4 riastrad * Internal ABI. Uses q16 as temporary. Destroys x0 and x3.
1094 1.1 riastrad */
1095 1.1 riastrad .text
1096 1.1 riastrad _ALIGN_TEXT
1097 1.1 riastrad .type aesarmv8_enc8,@function
1098 1.1 riastrad aesarmv8_enc8:
1099 1.4 riastrad ldr q16, [x0], #0x10 /* load round key */
1100 1.7 riastrad b 2f
1101 1.9 riastrad _ALIGN_TEXT
1102 1.7 riastrad 1: /* q[i] := MixColumns(q[i]) */
1103 1.7 riastrad aesmc v0.16b, v0.16b
1104 1.7 riastrad aesmc v1.16b, v1.16b
1105 1.7 riastrad aesmc v2.16b, v2.16b
1106 1.7 riastrad aesmc v3.16b, v3.16b
1107 1.7 riastrad aesmc v4.16b, v4.16b
1108 1.7 riastrad aesmc v5.16b, v5.16b
1109 1.7 riastrad aesmc v6.16b, v6.16b
1110 1.7 riastrad aesmc v7.16b, v7.16b
1111 1.7 riastrad 2: subs x3, x3, #1
1112 1.4 riastrad /* q[i] := ShiftRows(SubBytes(AddRoundKey_q16(q[i]))) */
1113 1.4 riastrad aese v0.16b, v16.16b
1114 1.4 riastrad aese v1.16b, v16.16b
1115 1.4 riastrad aese v2.16b, v16.16b
1116 1.4 riastrad aese v3.16b, v16.16b
1117 1.4 riastrad aese v4.16b, v16.16b
1118 1.4 riastrad aese v5.16b, v16.16b
1119 1.4 riastrad aese v6.16b, v16.16b
1120 1.4 riastrad aese v7.16b, v16.16b
1121 1.4 riastrad ldr q16, [x0], #0x10 /* load next round key */
1122 1.7 riastrad b.ne 1b
1123 1.7 riastrad eor v0.16b, v0.16b, v16.16b /* AddRoundKey */
1124 1.4 riastrad eor v1.16b, v1.16b, v16.16b
1125 1.4 riastrad eor v2.16b, v2.16b, v16.16b
1126 1.4 riastrad eor v3.16b, v3.16b, v16.16b
1127 1.4 riastrad eor v4.16b, v4.16b, v16.16b
1128 1.4 riastrad eor v5.16b, v5.16b, v16.16b
1129 1.4 riastrad eor v6.16b, v6.16b, v16.16b
1130 1.4 riastrad eor v7.16b, v7.16b, v16.16b
1131 1.1 riastrad ret
1132 1.1 riastrad END(aesarmv8_enc8)
1133 1.1 riastrad
1134 1.1 riastrad /*
1135 1.1 riastrad * aesarmv8_dec1(const struct aesdec *deckey@x0,
1136 1.1 riastrad * uint128_t block@q0, uint32_t nrounds@x3)
1137 1.1 riastrad *
1138 1.1 riastrad * Decrypt a single AES block in q0.
1139 1.1 riastrad *
1140 1.4 riastrad * Internal ABI. Uses q16 as temporary. Destroys x0 and x3.
1141 1.1 riastrad */
1142 1.1 riastrad .text
1143 1.1 riastrad _ALIGN_TEXT
1144 1.1 riastrad .type aesarmv8_dec1,@function
1145 1.1 riastrad aesarmv8_dec1:
1146 1.4 riastrad ldr q16, [x0], #0x10 /* load round key */
1147 1.7 riastrad b 2f
1148 1.9 riastrad _ALIGN_TEXT
1149 1.7 riastrad 1: /* q0 := InMixColumns(q0) */
1150 1.7 riastrad aesimc v0.16b, v0.16b
1151 1.7 riastrad 2: subs x3, x3, #1
1152 1.4 riastrad /* q0 := InSubBytes(InShiftRows(AddRoundKey_q16(q0))) */
1153 1.4 riastrad aesd v0.16b, v16.16b
1154 1.4 riastrad ldr q16, [x0], #0x10 /* load next round key */
1155 1.7 riastrad b.ne 1b
1156 1.7 riastrad eor v0.16b, v0.16b, v16.16b
1157 1.1 riastrad ret
1158 1.1 riastrad END(aesarmv8_dec1)
1159 1.1 riastrad
1160 1.1 riastrad /*
1161 1.1 riastrad * aesarmv8_dec8(const struct aesdec *deckey@x0,
1162 1.1 riastrad * uint128_t block0@q0, ..., uint128_t block7@q7,
1163 1.1 riastrad * uint32_t nrounds@x3)
1164 1.1 riastrad *
1165 1.1 riastrad * Decrypt eight AES blocks in q0 through q7 in parallel.
1166 1.1 riastrad *
1167 1.4 riastrad * Internal ABI. Uses q16 as temporary. Destroys x0 and x3.
1168 1.1 riastrad */
1169 1.1 riastrad .text
1170 1.1 riastrad _ALIGN_TEXT
1171 1.1 riastrad .type aesarmv8_dec8,@function
1172 1.1 riastrad aesarmv8_dec8:
1173 1.4 riastrad ldr q16, [x0], #0x10 /* load round key */
1174 1.7 riastrad b 2f
1175 1.9 riastrad _ALIGN_TEXT
1176 1.7 riastrad 1: /* q[i] := InMixColumns(q[i]) */
1177 1.7 riastrad aesimc v0.16b, v0.16b
1178 1.7 riastrad aesimc v1.16b, v1.16b
1179 1.7 riastrad aesimc v2.16b, v2.16b
1180 1.7 riastrad aesimc v3.16b, v3.16b
1181 1.7 riastrad aesimc v4.16b, v4.16b
1182 1.7 riastrad aesimc v5.16b, v5.16b
1183 1.7 riastrad aesimc v6.16b, v6.16b
1184 1.7 riastrad aesimc v7.16b, v7.16b
1185 1.7 riastrad 2: subs x3, x3, #1
1186 1.4 riastrad /* q[i] := InSubBytes(InShiftRows(AddRoundKey_q16(q[i]))) */
1187 1.4 riastrad aesd v0.16b, v16.16b
1188 1.4 riastrad aesd v1.16b, v16.16b
1189 1.4 riastrad aesd v2.16b, v16.16b
1190 1.4 riastrad aesd v3.16b, v16.16b
1191 1.4 riastrad aesd v4.16b, v16.16b
1192 1.4 riastrad aesd v5.16b, v16.16b
1193 1.4 riastrad aesd v6.16b, v16.16b
1194 1.4 riastrad aesd v7.16b, v16.16b
1195 1.4 riastrad ldr q16, [x0], #0x10 /* load next round key */
1196 1.7 riastrad b.ne 1b
1197 1.7 riastrad eor v0.16b, v0.16b, v16.16b /* AddRoundKey */
1198 1.4 riastrad eor v1.16b, v1.16b, v16.16b
1199 1.4 riastrad eor v2.16b, v2.16b, v16.16b
1200 1.4 riastrad eor v3.16b, v3.16b, v16.16b
1201 1.4 riastrad eor v4.16b, v4.16b, v16.16b
1202 1.4 riastrad eor v5.16b, v5.16b, v16.16b
1203 1.4 riastrad eor v6.16b, v6.16b, v16.16b
1204 1.4 riastrad eor v7.16b, v7.16b, v16.16b
1205 1.1 riastrad ret
1206 1.1 riastrad END(aesarmv8_dec8)
1207