aes_armv8_64.S revision 1.14 1 1.14 riastrad /* $NetBSD: aes_armv8_64.S,v 1.14 2020/09/08 23:57:43 riastradh Exp $ */
2 1.1 riastrad
3 1.1 riastrad /*-
4 1.1 riastrad * Copyright (c) 2020 The NetBSD Foundation, Inc.
5 1.1 riastrad * All rights reserved.
6 1.1 riastrad *
7 1.1 riastrad * Redistribution and use in source and binary forms, with or without
8 1.1 riastrad * modification, are permitted provided that the following conditions
9 1.1 riastrad * are met:
10 1.1 riastrad * 1. Redistributions of source code must retain the above copyright
11 1.1 riastrad * notice, this list of conditions and the following disclaimer.
12 1.1 riastrad * 2. Redistributions in binary form must reproduce the above copyright
13 1.1 riastrad * notice, this list of conditions and the following disclaimer in the
14 1.1 riastrad * documentation and/or other materials provided with the distribution.
15 1.1 riastrad *
16 1.1 riastrad * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 1.1 riastrad * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 1.1 riastrad * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 1.1 riastrad * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 1.1 riastrad * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 1.1 riastrad * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 1.1 riastrad * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 1.1 riastrad * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 1.1 riastrad * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 1.1 riastrad * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 1.1 riastrad * POSSIBILITY OF SUCH DAMAGE.
27 1.1 riastrad */
28 1.1 riastrad
29 1.1 riastrad #include <aarch64/asm.h>
30 1.1 riastrad
31 1.14 riastrad RCSID("$NetBSD: aes_armv8_64.S,v 1.14 2020/09/08 23:57:43 riastradh Exp $")
32 1.11 riastrad
33 1.3 riastrad .arch_extension aes
34 1.1 riastrad
35 1.1 riastrad /*
36 1.1 riastrad * uint32_t rcon[10]
37 1.1 riastrad *
38 1.1 riastrad * Table mapping n ---> x^n mod (x^8 + x^4 + x^3 + x + 1) in GF(2).
39 1.1 riastrad * Such elements of GF(8) need only eight bits to be represented,
40 1.1 riastrad * but we store them in 4-byte units so we can copy one into all
41 1.1 riastrad * four 4-byte lanes of a vector register with a single LD1R. The
42 1.1 riastrad * access pattern is fixed, so indices into this table are never
43 1.1 riastrad * secret.
44 1.1 riastrad */
45 1.1 riastrad .section .rodata
46 1.2 riastrad .p2align 2
47 1.1 riastrad .type rcon,@object
48 1.1 riastrad rcon:
49 1.1 riastrad .long 0x01
50 1.1 riastrad .long 0x02
51 1.1 riastrad .long 0x04
52 1.1 riastrad .long 0x08
53 1.1 riastrad .long 0x10
54 1.1 riastrad .long 0x20
55 1.1 riastrad .long 0x40
56 1.1 riastrad .long 0x80
57 1.1 riastrad .long 0x1b
58 1.1 riastrad .long 0x36
59 1.1 riastrad END(rcon)
60 1.1 riastrad
61 1.1 riastrad /*
62 1.1 riastrad * uint128_t unshiftrows_rotword_1
63 1.1 riastrad *
64 1.1 riastrad * Table for TBL instruction to undo ShiftRows, and then do
65 1.1 riastrad * RotWord on word 1, and then copy it into all the other words.
66 1.1 riastrad */
67 1.1 riastrad .section .rodata
68 1.2 riastrad .p2align 4
69 1.1 riastrad .type unshiftrows_rotword_1,@object
70 1.1 riastrad unshiftrows_rotword_1:
71 1.1 riastrad .byte 0x01,0x0e,0x0b,0x04
72 1.1 riastrad .byte 0x01,0x0e,0x0b,0x04
73 1.1 riastrad .byte 0x01,0x0e,0x0b,0x04
74 1.1 riastrad .byte 0x01,0x0e,0x0b,0x04
75 1.1 riastrad END(unshiftrows_rotword_1)
76 1.1 riastrad
77 1.1 riastrad /*
78 1.1 riastrad * uint128_t unshiftrows_3
79 1.1 riastrad *
80 1.1 riastrad * Table for TBL instruction to undo ShiftRows, and then copy word
81 1.1 riastrad * 3 into all the other words.
82 1.1 riastrad */
83 1.1 riastrad .section .rodata
84 1.2 riastrad .p2align 4
85 1.1 riastrad .type unshiftrows_3,@object
86 1.1 riastrad unshiftrows_3:
87 1.1 riastrad .byte 0x0c,0x09,0x06,0x03
88 1.1 riastrad .byte 0x0c,0x09,0x06,0x03
89 1.1 riastrad .byte 0x0c,0x09,0x06,0x03
90 1.1 riastrad .byte 0x0c,0x09,0x06,0x03
91 1.1 riastrad END(unshiftrows_3)
92 1.1 riastrad
93 1.1 riastrad /*
94 1.1 riastrad * uint128_t unshiftrows_rotword_3
95 1.1 riastrad *
96 1.1 riastrad * Table for TBL instruction to undo ShiftRows, and then do
97 1.1 riastrad * RotWord on word 3, and then copy it into all the other words.
98 1.1 riastrad */
99 1.1 riastrad .section .rodata
100 1.2 riastrad .p2align 4
101 1.1 riastrad .type unshiftrows_rotword_3,@object
102 1.1 riastrad unshiftrows_rotword_3:
103 1.1 riastrad .byte 0x09,0x06,0x03,0x0c
104 1.1 riastrad .byte 0x09,0x06,0x03,0x0c
105 1.1 riastrad .byte 0x09,0x06,0x03,0x0c
106 1.1 riastrad .byte 0x09,0x06,0x03,0x0c
107 1.1 riastrad END(unshiftrows_rotword_3)
108 1.1 riastrad
109 1.1 riastrad /*
110 1.1 riastrad * aesarmv8_setenckey128(struct aesenc *enckey@x0, const uint8_t key[16] @x1)
111 1.1 riastrad *
112 1.1 riastrad * Expand a 16-byte AES-128 key into 10 round keys.
113 1.1 riastrad *
114 1.1 riastrad * Standard ABI calling convention.
115 1.1 riastrad */
116 1.1 riastrad ENTRY(aesarmv8_setenckey128)
117 1.13 riastrad ld1 {v1.16b}, [x1] /* q1 := master key */
118 1.1 riastrad
119 1.1 riastrad adrl x4, unshiftrows_rotword_3
120 1.1 riastrad eor v0.16b, v0.16b, v0.16b /* q0 := 0 */
121 1.13 riastrad ld1 {v16.16b}, [x4] /* q16 := unshiftrows_rotword_3 table */
122 1.1 riastrad
123 1.1 riastrad str q1, [x0], #0x10 /* store master key as first round key */
124 1.1 riastrad mov x2, #10 /* round count */
125 1.1 riastrad adrl x3, rcon /* round constant */
126 1.1 riastrad
127 1.1 riastrad 1: /*
128 1.1 riastrad * q0 = 0
129 1.1 riastrad * v1.4s = (prk[0], prk[1], prk[2], prk[3])
130 1.1 riastrad * x0 = pointer to round key to compute
131 1.1 riastrad * x2 = round count
132 1.1 riastrad * x3 = rcon pointer
133 1.1 riastrad */
134 1.1 riastrad
135 1.1 riastrad /* q3 := ShiftRows(SubBytes(q1)) */
136 1.1 riastrad mov v3.16b, v1.16b
137 1.1 riastrad aese v3.16b, v0.16b
138 1.1 riastrad
139 1.1 riastrad /* v3.4s[i] := RotWords(SubBytes(prk[3])) ^ RCON */
140 1.1 riastrad ld1r {v4.4s}, [x3], #4
141 1.4 riastrad tbl v3.16b, {v3.16b}, v16.16b
142 1.1 riastrad eor v3.16b, v3.16b, v4.16b
143 1.1 riastrad
144 1.1 riastrad /*
145 1.1 riastrad * v5.4s := (0,prk[0],prk[1],prk[2])
146 1.1 riastrad * v6.4s := (0,0,prk[0],prk[1])
147 1.1 riastrad * v7.4s := (0,0,0,prk[0])
148 1.1 riastrad */
149 1.1 riastrad ext v5.16b, v0.16b, v1.16b, #12
150 1.1 riastrad ext v6.16b, v0.16b, v1.16b, #8
151 1.1 riastrad ext v7.16b, v0.16b, v1.16b, #4
152 1.1 riastrad
153 1.1 riastrad /* v1.4s := (rk[0], rk[1], rk[2], rk[3]) */
154 1.1 riastrad eor v1.16b, v1.16b, v3.16b
155 1.1 riastrad eor v1.16b, v1.16b, v5.16b
156 1.1 riastrad eor v1.16b, v1.16b, v6.16b
157 1.1 riastrad eor v1.16b, v1.16b, v7.16b
158 1.1 riastrad
159 1.1 riastrad subs x2, x2, #1 /* count down rounds */
160 1.1 riastrad str q1, [x0], #0x10 /* store round key */
161 1.1 riastrad b.ne 1b
162 1.1 riastrad
163 1.1 riastrad ret
164 1.1 riastrad END(aesarmv8_setenckey128)
165 1.1 riastrad
166 1.1 riastrad /*
167 1.1 riastrad * aesarmv8_setenckey192(struct aesenc *enckey@x0, const uint8_t key[24] @x1)
168 1.1 riastrad *
169 1.1 riastrad * Expand a 24-byte AES-192 key into 12 round keys.
170 1.1 riastrad *
171 1.1 riastrad * Standard ABI calling convention.
172 1.1 riastrad */
173 1.1 riastrad ENTRY(aesarmv8_setenckey192)
174 1.13 riastrad ld1 {v1.16b}, [x1], #0x10 /* q1 := master key[0:128) */
175 1.13 riastrad ld1 {v2.8b}, [x1] /* d2 := master key[128:192) */
176 1.1 riastrad
177 1.1 riastrad adrl x4, unshiftrows_rotword_1
178 1.1 riastrad adrl x5, unshiftrows_rotword_3
179 1.1 riastrad eor v0.16b, v0.16b, v0.16b /* q0 := 0 */
180 1.13 riastrad ld1 {v16.16b}, [x4] /* q16 := unshiftrows_rotword_1 */
181 1.13 riastrad ld1 {v17.16b}, [x5] /* q17 := unshiftrows_rotword_3 */
182 1.1 riastrad
183 1.1 riastrad str q1, [x0], #0x10 /* store master key[0:128) as round key */
184 1.1 riastrad mov x2, #12 /* round count */
185 1.1 riastrad adrl x3, rcon /* round constant */
186 1.1 riastrad
187 1.1 riastrad 1: /*
188 1.1 riastrad * q0 = 0
189 1.1 riastrad * v1.4s = (prk[0], prk[1], prk[2], prk[3])
190 1.1 riastrad * v2.4s = (rklo[0], rklo[1], xxx, xxx)
191 1.1 riastrad * x0 = pointer to three round keys to compute
192 1.1 riastrad * x2 = round count
193 1.1 riastrad * x3 = rcon pointer
194 1.1 riastrad */
195 1.1 riastrad
196 1.1 riastrad /* q3 := ShiftRows(SubBytes(q2)) */
197 1.1 riastrad mov v3.16b, v2.16b
198 1.1 riastrad aese v3.16b, v0.16b
199 1.1 riastrad
200 1.1 riastrad /* v3.4s[i] := RotWords(SubBytes(rklo[1])) ^ RCON */
201 1.1 riastrad ld1r {v4.4s}, [x3], #4
202 1.4 riastrad tbl v3.16b, {v3.16b}, v16.16b
203 1.1 riastrad eor v3.16b, v3.16b, v4.16b
204 1.1 riastrad
205 1.1 riastrad /*
206 1.1 riastrad * We need to compute:
207 1.1 riastrad *
208 1.1 riastrad * rk[0] := rklo[0]
209 1.1 riastrad * rk[1] := rklo[1]
210 1.1 riastrad * rk[2] := Rot(Sub(rklo[1])) ^ RCON ^ prk[0]
211 1.1 riastrad * rk[3] := Rot(Sub(rklo[1])) ^ RCON ^ prk[0] ^ prk[1]
212 1.1 riastrad * nrk[0] := Rot(Sub(rklo[1])) ^ RCON ^ prk[0] ^ prk[1] ^ prk[2]
213 1.1 riastrad * nrk[1] := Rot(Sub(rklo[1])) ^ RCON ^ prk[0] ^ ... ^ prk[3]
214 1.1 riastrad * nrk[2] := Rot(Sub(rklo[1])) ^ RCON ^ prk[0] ^ ... ^ prk[3] ^ rklo[0]
215 1.1 riastrad * nrk[3] := Rot(Sub(rklo[1])) ^ RCON ^ prk[0] ^ ... ^ prk[3] ^ rklo[0]
216 1.1 riastrad * ^ rklo[1]
217 1.1 riastrad */
218 1.1 riastrad
219 1.1 riastrad /*
220 1.1 riastrad * v5.4s := (0,prk[0],prk[1],prk[2])
221 1.1 riastrad * v6.4s := (0,0,prk[0],prk[1])
222 1.1 riastrad * v7.4s := (0,0,0,prk[0])
223 1.1 riastrad */
224 1.1 riastrad ext v5.16b, v0.16b, v1.16b, #12
225 1.1 riastrad ext v6.16b, v0.16b, v1.16b, #8
226 1.1 riastrad ext v7.16b, v0.16b, v1.16b, #4
227 1.1 riastrad
228 1.1 riastrad /* v5.4s := (rk[2], rk[3], nrk[0], nrk[1]) */
229 1.1 riastrad eor v5.16b, v5.16b, v1.16b
230 1.1 riastrad eor v5.16b, v5.16b, v3.16b
231 1.1 riastrad eor v5.16b, v5.16b, v6.16b
232 1.1 riastrad eor v5.16b, v5.16b, v7.16b
233 1.1 riastrad
234 1.1 riastrad /*
235 1.1 riastrad * At this point, rk is split across v2.4s = (rk[0],rk[1],...)
236 1.1 riastrad * and v5.4s = (rk[2],rk[3],...); nrk is in v5.4s =
237 1.1 riastrad * (...,nrk[0],nrk[1]); and we have yet to compute nrk[2] or
238 1.1 riastrad * nrk[3], which requires rklo[0] and rklo[1] in v2.4s =
239 1.1 riastrad * (rklo[0],rklo[1],...).
240 1.1 riastrad */
241 1.1 riastrad
242 1.1 riastrad /* v1.4s := (nrk[0], nrk[1], nrk[1], nrk[1]) */
243 1.5 ryo dup v1.4s, v5.s[3]
244 1.5 ryo mov v1.s[0], v5.s[2]
245 1.1 riastrad
246 1.1 riastrad /*
247 1.1 riastrad * v6.4s := (0, 0, rklo[0], rklo[1])
248 1.1 riastrad * v7.4s := (0, 0, 0, rklo[0])
249 1.1 riastrad */
250 1.1 riastrad ext v6.16b, v0.16b, v2.16b, #8
251 1.1 riastrad ext v7.16b, v0.16b, v2.16b, #4
252 1.1 riastrad
253 1.1 riastrad /* v3.4s := (nrk[0], nrk[1], nrk[2], nrk[3]) */
254 1.1 riastrad eor v3.16b, v1.16b, v6.16b
255 1.1 riastrad eor v3.16b, v3.16b, v7.16b
256 1.1 riastrad
257 1.1 riastrad /*
258 1.1 riastrad * Recall v2.4s = (rk[0], rk[1], xxx, xxx)
259 1.1 riastrad * and v5.4s = (rk[2], rk[3], xxx, xxx). Set
260 1.1 riastrad * v2.4s := (rk[0], rk[1], rk[2], rk[3])
261 1.1 riastrad */
262 1.5 ryo mov v2.d[1], v5.d[0]
263 1.1 riastrad
264 1.1 riastrad /* store two round keys */
265 1.1 riastrad stp q2, q3, [x0], #0x20
266 1.1 riastrad
267 1.1 riastrad /*
268 1.1 riastrad * Live vector registers at this point:
269 1.1 riastrad *
270 1.1 riastrad * q0 = zero
271 1.1 riastrad * q2 = rk
272 1.1 riastrad * q3 = nrk
273 1.1 riastrad * v5.4s = (rk[2], rk[3], nrk[0], nrk[1])
274 1.4 riastrad * q16 = unshiftrows_rotword_1
275 1.4 riastrad * q17 = unshiftrows_rotword_3
276 1.1 riastrad *
277 1.1 riastrad * We have to compute, in q1:
278 1.1 riastrad *
279 1.1 riastrad * nnrk[0] := Rot(Sub(nrk[3])) ^ RCON' ^ rk[2]
280 1.1 riastrad * nnrk[1] := Rot(Sub(nrk[3])) ^ RCON' ^ rk[2] ^ rk[3]
281 1.1 riastrad * nnrk[2] := Rot(Sub(nrk[3])) ^ RCON' ^ rk[2] ^ rk[3] ^ nrk[0]
282 1.1 riastrad * nnrk[3] := Rot(Sub(nrk[3])) ^ RCON' ^ rk[2] ^ rk[3] ^ nrk[0]
283 1.1 riastrad * ^ nrk[1]
284 1.1 riastrad *
285 1.1 riastrad * And, if there's any more afterward, in q2:
286 1.1 riastrad *
287 1.1 riastrad * nnnrklo[0] := Rot(Sub(nrk[3])) ^ RCON' ^ rk[2] ^ rk[3] ^ nrk[0]
288 1.1 riastrad * ^ nrk[1] ^ nrk[2]
289 1.1 riastrad * nnnrklo[1] := Rot(Sub(nrk[3])) ^ RCON' ^ rk[2] ^ rk[3] ^ nrk[0]
290 1.1 riastrad * ^ nrk[1] ^ nrk[2] ^ nrk[3]
291 1.1 riastrad */
292 1.1 riastrad
293 1.1 riastrad /* q1 := RotWords(SubBytes(q3)) */
294 1.1 riastrad mov v1.16b, v3.16b
295 1.1 riastrad aese v1.16b, v0.16b
296 1.1 riastrad
297 1.1 riastrad /* v1.4s[i] := RotWords(SubBytes(nrk[3])) ^ RCON' */
298 1.1 riastrad ld1r {v4.4s}, [x3], #4
299 1.4 riastrad tbl v1.16b, {v1.16b}, v17.16b
300 1.1 riastrad eor v1.16b, v1.16b, v4.16b
301 1.1 riastrad
302 1.1 riastrad /*
303 1.1 riastrad * v5.4s := (rk[2], rk[3], nrk[0], nrk[1]) [already]
304 1.1 riastrad * v4.4s := (0, rk[2], rk[3], nrk[0])
305 1.1 riastrad * v6.4s := (0, 0, rk[2], rk[3])
306 1.1 riastrad * v7.4s := (0, 0, 0, rk[2])
307 1.1 riastrad */
308 1.1 riastrad ext v4.16b, v0.16b, v5.16b, #12
309 1.1 riastrad ext v6.16b, v0.16b, v5.16b, #8
310 1.1 riastrad ext v7.16b, v0.16b, v5.16b, #4
311 1.1 riastrad
312 1.1 riastrad /* v1.4s := (nnrk[0], nnrk[1], nnrk[2], nnrk[3]) */
313 1.1 riastrad eor v1.16b, v1.16b, v5.16b
314 1.1 riastrad eor v1.16b, v1.16b, v4.16b
315 1.1 riastrad eor v1.16b, v1.16b, v6.16b
316 1.1 riastrad eor v1.16b, v1.16b, v7.16b
317 1.1 riastrad
318 1.1 riastrad subs x2, x2, #3 /* count down three rounds */
319 1.1 riastrad str q1, [x0], #0x10 /* store third round key */
320 1.1 riastrad b.eq 2f
321 1.1 riastrad
322 1.1 riastrad /*
323 1.1 riastrad * v4.4s := (nrk[2], nrk[3], xxx, xxx)
324 1.1 riastrad * v5.4s := (0, nrk[2], xxx, xxx)
325 1.1 riastrad */
326 1.1 riastrad ext v4.16b, v3.16b, v0.16b, #8
327 1.1 riastrad ext v5.16b, v0.16b, v4.16b, #12
328 1.1 riastrad
329 1.1 riastrad /* v2.4s := (nnrk[3], nnrk[3], xxx, xxx) */
330 1.5 ryo dup v2.4s, v1.s[3]
331 1.1 riastrad
332 1.1 riastrad /*
333 1.1 riastrad * v2.4s := (nnnrklo[0] = nnrk[3] ^ nrk[2],
334 1.1 riastrad * nnnrklo[1] = nnrk[3] ^ nrk[2] ^ nrk[3],
335 1.1 riastrad * xxx, xxx)
336 1.1 riastrad */
337 1.1 riastrad eor v2.16b, v2.16b, v4.16b
338 1.1 riastrad eor v2.16b, v2.16b, v5.16b
339 1.1 riastrad
340 1.1 riastrad b 1b
341 1.1 riastrad
342 1.1 riastrad 2: ret
343 1.1 riastrad END(aesarmv8_setenckey192)
344 1.1 riastrad
345 1.1 riastrad /*
346 1.1 riastrad * aesarmv8_setenckey256(struct aesenc *enckey@x0, const uint8_t key[32] @x1)
347 1.1 riastrad *
348 1.1 riastrad * Expand a 32-byte AES-256 key into 14 round keys.
349 1.1 riastrad *
350 1.1 riastrad * Standard ABI calling convention.
351 1.1 riastrad */
352 1.1 riastrad ENTRY(aesarmv8_setenckey256)
353 1.1 riastrad /* q1 := key[0:128), q2 := key[128:256) */
354 1.13 riastrad ld1 {v1.16b-v2.16b}, [x1], #0x20
355 1.1 riastrad
356 1.1 riastrad adrl x4, unshiftrows_rotword_3
357 1.1 riastrad adrl x5, unshiftrows_3
358 1.1 riastrad eor v0.16b, v0.16b, v0.16b /* q0 := 0 */
359 1.13 riastrad ld1 {v16.16b}, [x4] /* q16 := unshiftrows_rotword_3 */
360 1.13 riastrad ld1 {v17.16b}, [x5] /* q17 := unshiftrows_3 */
361 1.1 riastrad
362 1.1 riastrad /* store master key as first two round keys */
363 1.1 riastrad stp q1, q2, [x0], #0x20
364 1.1 riastrad mov x2, #14 /* round count */
365 1.1 riastrad adrl x3, rcon /* round constant */
366 1.1 riastrad
367 1.1 riastrad 1: /*
368 1.1 riastrad * q0 = 0
369 1.1 riastrad * v1.4s = (pprk[0], pprk[1], pprk[2], pprk[3])
370 1.1 riastrad * v2.4s = (prk[0], prk[1], prk[2], prk[3])
371 1.1 riastrad * x2 = round count
372 1.1 riastrad * x3 = rcon pointer
373 1.1 riastrad */
374 1.1 riastrad
375 1.1 riastrad /* q3 := ShiftRows(SubBytes(q2)) */
376 1.1 riastrad mov v3.16b, v2.16b
377 1.1 riastrad aese v3.16b, v0.16b
378 1.1 riastrad
379 1.1 riastrad /* v3.4s[i] := RotWords(SubBytes(prk[3])) ^ RCON */
380 1.1 riastrad ld1r {v4.4s}, [x3], #4
381 1.4 riastrad tbl v3.16b, {v3.16b}, v16.16b
382 1.1 riastrad eor v3.16b, v3.16b, v4.16b
383 1.1 riastrad
384 1.1 riastrad /*
385 1.1 riastrad * v5.4s := (0,pprk[0],pprk[1],pprk[2])
386 1.1 riastrad * v6.4s := (0,0,pprk[0],pprk[1])
387 1.1 riastrad * v7.4s := (0,0,0,pprk[0])
388 1.1 riastrad */
389 1.1 riastrad ext v5.16b, v0.16b, v1.16b, #12
390 1.1 riastrad ext v6.16b, v0.16b, v1.16b, #8
391 1.1 riastrad ext v7.16b, v0.16b, v1.16b, #4
392 1.1 riastrad
393 1.1 riastrad /* v1.4s := (rk[0], rk[1], rk[2], rk[3]) */
394 1.1 riastrad eor v1.16b, v1.16b, v3.16b
395 1.1 riastrad eor v1.16b, v1.16b, v5.16b
396 1.1 riastrad eor v1.16b, v1.16b, v6.16b
397 1.1 riastrad eor v1.16b, v1.16b, v7.16b
398 1.1 riastrad
399 1.1 riastrad subs x2, x2, #2 /* count down two rounds */
400 1.1 riastrad b.eq 2f /* stop if this is the last one */
401 1.1 riastrad
402 1.1 riastrad /* q3 := ShiftRows(SubBytes(q1)) */
403 1.1 riastrad mov v3.16b, v1.16b
404 1.1 riastrad aese v3.16b, v0.16b
405 1.1 riastrad
406 1.1 riastrad /* v3.4s[i] := SubBytes(rk[3]) */
407 1.4 riastrad tbl v3.16b, {v3.16b}, v17.16b
408 1.1 riastrad
409 1.1 riastrad /*
410 1.1 riastrad * v5.4s := (0,prk[0],prk[1],prk[2])
411 1.1 riastrad * v6.4s := (0,0,prk[0],prk[1])
412 1.1 riastrad * v7.4s := (0,0,0,prk[0])
413 1.1 riastrad */
414 1.1 riastrad ext v5.16b, v0.16b, v2.16b, #12
415 1.1 riastrad ext v6.16b, v0.16b, v2.16b, #8
416 1.1 riastrad ext v7.16b, v0.16b, v2.16b, #4
417 1.1 riastrad
418 1.1 riastrad /* v2.4s := (nrk[0], nrk[1], nrk[2], nrk[3]) */
419 1.1 riastrad eor v2.16b, v2.16b, v3.16b
420 1.1 riastrad eor v2.16b, v2.16b, v5.16b
421 1.1 riastrad eor v2.16b, v2.16b, v6.16b
422 1.1 riastrad eor v2.16b, v2.16b, v7.16b
423 1.1 riastrad
424 1.1 riastrad stp q1, q2, [x0], #0x20 /* store two round keys */
425 1.1 riastrad b 1b
426 1.1 riastrad
427 1.1 riastrad 2: str q1, [x0] /* store last round key */
428 1.1 riastrad ret
429 1.1 riastrad END(aesarmv8_setenckey256)
430 1.1 riastrad
431 1.1 riastrad /*
432 1.1 riastrad * aesarmv8_enctodec(const struct aesenc *enckey@x0, struct aesdec *deckey@x1,
433 1.1 riastrad * uint32_t nrounds@x2)
434 1.1 riastrad *
435 1.1 riastrad * Convert AES encryption round keys to AES decryption round keys.
436 1.1 riastrad * `rounds' must be between 10 and 14.
437 1.1 riastrad *
438 1.1 riastrad * Standard ABI calling convention.
439 1.1 riastrad */
440 1.1 riastrad ENTRY(aesarmv8_enctodec)
441 1.1 riastrad ldr q0, [x0, x2, lsl #4] /* load last round key */
442 1.7 riastrad b 2f
443 1.9 riastrad _ALIGN_TEXT
444 1.7 riastrad 1: aesimc v0.16b, v0.16b /* convert encryption to decryption */
445 1.7 riastrad 2: str q0, [x1], #0x10 /* store round key */
446 1.1 riastrad subs x2, x2, #1 /* count down round */
447 1.1 riastrad ldr q0, [x0, x2, lsl #4] /* load previous round key */
448 1.7 riastrad b.ne 1b /* repeat if there's more */
449 1.7 riastrad str q0, [x1] /* store first round key verbatim */
450 1.1 riastrad ret
451 1.1 riastrad END(aesarmv8_enctodec)
452 1.1 riastrad
453 1.1 riastrad /*
454 1.1 riastrad * aesarmv8_enc(const struct aesenc *enckey@x0, const uint8_t in[16] @x1,
455 1.1 riastrad * uint8_t out[16] @x2, uint32_t nrounds@x3)
456 1.1 riastrad *
457 1.1 riastrad * Encrypt a single block.
458 1.1 riastrad *
459 1.1 riastrad * Standard ABI calling convention.
460 1.1 riastrad */
461 1.1 riastrad ENTRY(aesarmv8_enc)
462 1.1 riastrad stp fp, lr, [sp, #-16]! /* push stack frame */
463 1.1 riastrad mov fp, sp
464 1.13 riastrad ld1 {v0.16b}, [x1] /* q0 := ptxt */
465 1.4 riastrad bl aesarmv8_enc1 /* q0 := ctxt; trash x0/x3/q16 */
466 1.13 riastrad st1 {v0.16b}, [x2] /* store ctxt */
467 1.1 riastrad ldp fp, lr, [sp], #16 /* pop stack frame */
468 1.1 riastrad ret
469 1.1 riastrad END(aesarmv8_enc)
470 1.1 riastrad
471 1.1 riastrad /*
472 1.1 riastrad * aesarmv8_dec(const struct aesdec *deckey@x0, const uint8_t in[16] @x1,
473 1.1 riastrad * uint8_t out[16] @x2, uint32_t nrounds@x3)
474 1.1 riastrad *
475 1.1 riastrad * Decrypt a single block.
476 1.1 riastrad *
477 1.1 riastrad * Standard ABI calling convention.
478 1.1 riastrad */
479 1.1 riastrad ENTRY(aesarmv8_dec)
480 1.1 riastrad stp fp, lr, [sp, #-16]! /* push stack frame */
481 1.1 riastrad mov fp, sp
482 1.13 riastrad ld1 {v0.16b}, [x1] /* q0 := ctxt */
483 1.4 riastrad bl aesarmv8_dec1 /* q0 := ptxt; trash x0/x3/q16 */
484 1.13 riastrad st1 {v0.16b}, [x2] /* store ptxt */
485 1.1 riastrad ldp fp, lr, [sp], #16 /* pop stack frame */
486 1.1 riastrad ret
487 1.1 riastrad END(aesarmv8_dec)
488 1.1 riastrad
489 1.1 riastrad /*
490 1.1 riastrad * aesarmv8_cbc_enc(const struct aesenc *enckey@x0, const uint8_t *in@x1,
491 1.1 riastrad * uint8_t *out@x2, size_t nbytes@x3, uint8_t iv[16] @x4,
492 1.1 riastrad * uint32_t nrounds@x5)
493 1.1 riastrad *
494 1.1 riastrad * Encrypt a contiguous sequence of blocks with AES-CBC.
495 1.1 riastrad *
496 1.1 riastrad * nbytes must be an integral multiple of 16.
497 1.1 riastrad *
498 1.1 riastrad * Standard ABI calling convention.
499 1.1 riastrad */
500 1.1 riastrad ENTRY(aesarmv8_cbc_enc)
501 1.1 riastrad cbz x3, 2f /* stop if nothing to do */
502 1.1 riastrad stp fp, lr, [sp, #-16]! /* push stack frame */
503 1.1 riastrad mov fp, sp
504 1.1 riastrad mov x9, x0 /* x9 := enckey */
505 1.1 riastrad mov x10, x3 /* x10 := nbytes */
506 1.13 riastrad ld1 {v0.16b}, [x4] /* q0 := chaining value */
507 1.9 riastrad _ALIGN_TEXT
508 1.13 riastrad 1: ld1 {v1.16b}, [x1], #0x10 /* q1 := plaintext block */
509 1.1 riastrad eor v0.16b, v0.16b, v1.16b /* q0 := cv ^ ptxt */
510 1.1 riastrad mov x0, x9 /* x0 := enckey */
511 1.1 riastrad mov x3, x5 /* x3 := nrounds */
512 1.4 riastrad bl aesarmv8_enc1 /* q0 := ctxt; trash x0/x3/q16 */
513 1.1 riastrad subs x10, x10, #0x10 /* count down nbytes */
514 1.13 riastrad st1 {v0.16b}, [x2], #0x10 /* store ciphertext block */
515 1.1 riastrad b.ne 1b /* repeat if x10 is nonzero */
516 1.13 riastrad st1 {v0.16b}, [x4] /* store chaining value */
517 1.1 riastrad ldp fp, lr, [sp], #16 /* pop stack frame */
518 1.1 riastrad 2: ret
519 1.1 riastrad END(aesarmv8_cbc_enc)
520 1.1 riastrad
521 1.1 riastrad /*
522 1.1 riastrad * aesarmv8_cbc_dec1(const struct aesdec *deckey@x0, const uint8_t *in@x1,
523 1.1 riastrad * uint8_t *out@x2, size_t nbytes@x3, const uint8_t iv[16] @x4,
524 1.1 riastrad * uint32_t nrounds@x5)
525 1.1 riastrad *
526 1.1 riastrad * Decrypt a contiguous sequence of blocks with AES-CBC.
527 1.1 riastrad *
528 1.1 riastrad * nbytes must be a positive integral multiple of 16. This routine
529 1.1 riastrad * is not vectorized; use aesarmv8_cbc_dec8 for >=8 blocks at once.
530 1.1 riastrad *
531 1.1 riastrad * Standard ABI calling convention.
532 1.1 riastrad */
533 1.1 riastrad ENTRY(aesarmv8_cbc_dec1)
534 1.4 riastrad stp fp, lr, [sp, #-16]! /* push stack frame */
535 1.1 riastrad mov fp, sp
536 1.13 riastrad ld1 {v24.16b}, [x4] /* q24 := iv */
537 1.1 riastrad mov x9, x0 /* x9 := enckey */
538 1.1 riastrad mov x10, x3 /* x10 := nbytes */
539 1.1 riastrad add x1, x1, x3 /* x1 := pointer past end of in */
540 1.1 riastrad add x2, x2, x3 /* x2 := pointer past end of out */
541 1.13 riastrad sub x1, x1, #0x10
542 1.13 riastrad ld1 {v0.16b}, [x1] /* q0 := last ciphertext block */
543 1.13 riastrad st1 {v0.16b}, [x4] /* update iv */
544 1.7 riastrad b 2f
545 1.9 riastrad _ALIGN_TEXT
546 1.13 riastrad 1: sub x1, x1, #0x10
547 1.13 riastrad ld1 {v31.16b}, [x1] /* q31 := chaining value */
548 1.13 riastrad sub x2, x2, #0x10
549 1.7 riastrad eor v0.16b, v0.16b, v31.16b /* q0 := plaintext block */
550 1.13 riastrad st1 {v0.16b}, [x2] /* store plaintext block */
551 1.7 riastrad mov v0.16b, v31.16b /* move cv = ciphertext block */
552 1.7 riastrad 2: mov x0, x9 /* x0 := enckey */
553 1.1 riastrad mov x3, x5 /* x3 := nrounds */
554 1.4 riastrad bl aesarmv8_dec1 /* q0 := cv ^ ptxt; trash x0/x3/q16 */
555 1.1 riastrad subs x10, x10, #0x10 /* count down nbytes */
556 1.7 riastrad b.ne 1b /* repeat if more blocks */
557 1.7 riastrad eor v0.16b, v0.16b, v24.16b /* q0 := first plaintext block */
558 1.13 riastrad sub x2, x2, #0x10 /* store first plaintext block */
559 1.13 riastrad st1 {v0.16b}, [x2]
560 1.4 riastrad ldp fp, lr, [sp], #16 /* pop stack frame */
561 1.1 riastrad ret
562 1.1 riastrad END(aesarmv8_cbc_dec1)
563 1.1 riastrad
564 1.1 riastrad /*
565 1.1 riastrad * aesarmv8_cbc_dec8(const struct aesdec *deckey@x0, const uint8_t *in@x1,
566 1.1 riastrad * uint8_t *out@x2, size_t nbytes@x3, const uint8_t iv[16] @x4,
567 1.1 riastrad * uint32_t nrounds@x5)
568 1.1 riastrad *
569 1.1 riastrad * Decrypt a contiguous sequence of 8-block units with AES-CBC.
570 1.1 riastrad *
571 1.1 riastrad * nbytes must be a positive integral multiple of 128.
572 1.1 riastrad *
573 1.1 riastrad * Standard ABI calling convention.
574 1.1 riastrad */
575 1.1 riastrad ENTRY(aesarmv8_cbc_dec8)
576 1.4 riastrad stp fp, lr, [sp, #-16]! /* push stack frame */
577 1.1 riastrad mov fp, sp
578 1.13 riastrad ld1 {v24.16b}, [x4] /* q24 := iv */
579 1.1 riastrad mov x9, x0 /* x9 := enckey */
580 1.1 riastrad mov x10, x3 /* x10 := nbytes */
581 1.1 riastrad add x1, x1, x3 /* x1 := pointer past end of in */
582 1.1 riastrad add x2, x2, x3 /* x2 := pointer past end of out */
583 1.13 riastrad sub x1, x1, #0x20
584 1.13 riastrad ld1 {v6.16b, v7.16b}, [x1] /* q6, q7 := last ciphertext blocks */
585 1.13 riastrad st1 {v7.16b}, [x4] /* update iv */
586 1.7 riastrad b 2f
587 1.9 riastrad _ALIGN_TEXT
588 1.13 riastrad 1: sub x1, x1, #0x20
589 1.13 riastrad ld1 {v6.16b, v7.16b}, [x1]
590 1.7 riastrad eor v0.16b, v0.16b, v7.16b /* q0 := pt0 */
591 1.13 riastrad sub x2, x2, #0x20
592 1.13 riastrad st1 {v0.16b, v1.16b}, [x2]
593 1.13 riastrad 2: sub x1, x1, #0x20
594 1.13 riastrad ld1 {v4.16b-v5.16b}, [x1]
595 1.13 riastrad sub x1, x1, #0x40
596 1.13 riastrad ld1 {v0.16b-v3.16b}, [x1]
597 1.13 riastrad
598 1.4 riastrad mov v31.16b, v6.16b /* q[24+i] := cv[i], 0<i<8 */
599 1.4 riastrad mov v30.16b, v5.16b
600 1.4 riastrad mov v29.16b, v4.16b
601 1.4 riastrad mov v28.16b, v3.16b
602 1.4 riastrad mov v27.16b, v2.16b
603 1.4 riastrad mov v26.16b, v1.16b
604 1.4 riastrad mov v25.16b, v0.16b
605 1.1 riastrad mov x0, x9 /* x0 := enckey */
606 1.1 riastrad mov x3, x5 /* x3 := nrounds */
607 1.4 riastrad bl aesarmv8_dec8 /* q[i] := cv[i] ^ pt[i];
608 1.4 riastrad * trash x0/x3/q16 */
609 1.4 riastrad eor v7.16b, v7.16b, v31.16b /* q[i] := pt[i] */
610 1.4 riastrad eor v6.16b, v6.16b, v30.16b
611 1.4 riastrad eor v5.16b, v5.16b, v29.16b
612 1.4 riastrad eor v4.16b, v4.16b, v28.16b
613 1.4 riastrad eor v3.16b, v3.16b, v27.16b
614 1.4 riastrad eor v2.16b, v2.16b, v26.16b
615 1.4 riastrad eor v1.16b, v1.16b, v25.16b
616 1.1 riastrad subs x10, x10, #0x80 /* count down nbytes */
617 1.13 riastrad sub x2, x2, #0x20 /* store plaintext blocks */
618 1.13 riastrad st1 {v6.16b-v7.16b}, [x2]
619 1.13 riastrad sub x2, x2, #0x40
620 1.13 riastrad st1 {v2.16b-v5.16b}, [x2]
621 1.7 riastrad b.ne 1b /* repeat if there's more */
622 1.7 riastrad eor v0.16b, v0.16b, v24.16b /* q0 := pt0 */
623 1.13 riastrad sub x2, x2, #0x20
624 1.13 riastrad st1 {v0.16b, v1.16b}, [x2] /* store first two plaintext blocks */
625 1.4 riastrad ldp fp, lr, [sp], #16 /* pop stack frame */
626 1.1 riastrad ret
627 1.1 riastrad END(aesarmv8_cbc_dec8)
628 1.1 riastrad
629 1.1 riastrad /*
630 1.1 riastrad * aesarmv8_xts_enc1(const struct aesenc *enckey@x0, const uint8_t *in@x1,
631 1.1 riastrad * uint8_t *out@x2, size_t nbytes@x3, uint8_t tweak[16] @x4,
632 1.1 riastrad * uint32_t nrounds@x5)
633 1.1 riastrad *
634 1.1 riastrad * Encrypt a contiguous sequence of blocks with AES-XTS.
635 1.1 riastrad *
636 1.1 riastrad * nbytes must be a positive integral multiple of 16. This routine
637 1.1 riastrad * is not vectorized; use aesarmv8_xts_enc8 for >=8 blocks at once.
638 1.1 riastrad *
639 1.1 riastrad * Standard ABI calling convention.
640 1.1 riastrad */
641 1.1 riastrad ENTRY(aesarmv8_xts_enc1)
642 1.1 riastrad stp fp, lr, [sp, #-16]! /* push stack frame */
643 1.1 riastrad mov fp, sp
644 1.1 riastrad mov x9, x0 /* x9 := enckey */
645 1.1 riastrad mov x10, x3 /* x10 := nbytes */
646 1.13 riastrad ld1 {v31.16b}, [x4] /* q31 := tweak */
647 1.9 riastrad _ALIGN_TEXT
648 1.13 riastrad 1: ld1 {v0.16b}, [x1], #0x10 /* q0 := ptxt */
649 1.1 riastrad mov x0, x9 /* x0 := enckey */
650 1.1 riastrad mov x3, x5 /* x3 := nrounds */
651 1.4 riastrad eor v0.16b, v0.16b, v31.16b /* q0 := ptxt ^ tweak */
652 1.4 riastrad bl aesarmv8_enc1 /* q0 := AES(...); trash x0/x3/q16 */
653 1.4 riastrad eor v0.16b, v0.16b, v31.16b /* q0 := AES(ptxt ^ tweak) ^ tweak */
654 1.13 riastrad st1 {v0.16b}, [x2], #0x10 /* store ciphertext block */
655 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
656 1.1 riastrad subs x10, x10, #0x10 /* count down nbytes */
657 1.1 riastrad b.ne 1b /* repeat if more blocks */
658 1.13 riastrad st1 {v31.16b}, [x4] /* update tweak */
659 1.1 riastrad ldp fp, lr, [sp], #16 /* pop stack frame */
660 1.1 riastrad ret
661 1.1 riastrad END(aesarmv8_xts_enc1)
662 1.1 riastrad
663 1.1 riastrad /*
664 1.1 riastrad * aesarmv8_xts_enc8(const struct aesenc *enckey@x0, const uint8_t *in@x1,
665 1.1 riastrad * uint8_t *out@x2, size_t nbytes@x3, uint8_t tweak[16] @x4,
666 1.1 riastrad * uint32_t nrounds@x5)
667 1.1 riastrad *
668 1.1 riastrad * Encrypt a contiguous sequence of blocks with AES-XTS.
669 1.1 riastrad *
670 1.1 riastrad * nbytes must be a positive integral multiple of 128.
671 1.1 riastrad *
672 1.1 riastrad * Standard ABI calling convention.
673 1.1 riastrad */
674 1.1 riastrad ENTRY(aesarmv8_xts_enc8)
675 1.4 riastrad stp fp, lr, [sp, #-16]! /* push stack frame */
676 1.1 riastrad mov fp, sp
677 1.1 riastrad mov x9, x0 /* x9 := enckey */
678 1.1 riastrad mov x10, x3 /* x10 := nbytes */
679 1.13 riastrad ld1 {v31.16b}, [x4] /* q31 := tweak */
680 1.9 riastrad _ALIGN_TEXT
681 1.4 riastrad 1: mov v24.16b, v31.16b /* q24 := tweak[0] */
682 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
683 1.4 riastrad mov v25.16b, v31.16b /* q25 := tweak[1] */
684 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
685 1.4 riastrad mov v26.16b, v31.16b /* q26 := tweak[2] */
686 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
687 1.4 riastrad mov v27.16b, v31.16b /* q27 := tweak[3] */
688 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
689 1.4 riastrad mov v28.16b, v31.16b /* q28 := tweak[4] */
690 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
691 1.4 riastrad mov v29.16b, v31.16b /* q29 := tweak[5] */
692 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
693 1.4 riastrad mov v30.16b, v31.16b /* q30 := tweak[6] */
694 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
695 1.4 riastrad /* q31 := tweak[7] */
696 1.14 riastrad ld1 {v0.16b-v3.16b}, [x1], #0x40 /* q[i] := ptxt[i] */
697 1.14 riastrad ld1 {v4.16b-v7.16b}, [x1], #0x40
698 1.4 riastrad eor v0.16b, v0.16b, v24.16b /* q[i] := ptxt[i] ^ tweak[i] */
699 1.4 riastrad eor v1.16b, v1.16b, v25.16b
700 1.4 riastrad eor v2.16b, v2.16b, v26.16b
701 1.4 riastrad eor v3.16b, v3.16b, v27.16b
702 1.4 riastrad eor v4.16b, v4.16b, v28.16b
703 1.4 riastrad eor v5.16b, v5.16b, v29.16b
704 1.4 riastrad eor v6.16b, v6.16b, v30.16b
705 1.4 riastrad eor v7.16b, v7.16b, v31.16b
706 1.1 riastrad mov x0, x9 /* x0 := enckey */
707 1.1 riastrad mov x3, x5 /* x3 := nrounds */
708 1.4 riastrad bl aesarmv8_enc8 /* encrypt q0-q7; trash x0/x3/q16 */
709 1.4 riastrad eor v0.16b, v0.16b, v24.16b /* q[i] := AES(...) ^ tweak[i] */
710 1.4 riastrad eor v1.16b, v1.16b, v25.16b
711 1.4 riastrad eor v2.16b, v2.16b, v26.16b
712 1.4 riastrad eor v3.16b, v3.16b, v27.16b
713 1.4 riastrad eor v4.16b, v4.16b, v28.16b
714 1.4 riastrad eor v5.16b, v5.16b, v29.16b
715 1.4 riastrad eor v6.16b, v6.16b, v30.16b
716 1.4 riastrad eor v7.16b, v7.16b, v31.16b
717 1.14 riastrad st1 {v0.16b-v3.16b}, [x2], #0x40 /* store ciphertext blocks */
718 1.14 riastrad st1 {v4.16b-v7.16b}, [x2], #0x40
719 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
720 1.1 riastrad subs x10, x10, #0x80 /* count down nbytes */
721 1.1 riastrad b.ne 1b /* repeat if more block groups */
722 1.13 riastrad st1 {v31.16b}, [x4] /* update tweak */
723 1.4 riastrad ldp fp, lr, [sp], #16 /* pop stack frame */
724 1.1 riastrad ret
725 1.1 riastrad END(aesarmv8_xts_enc8)
726 1.1 riastrad
727 1.1 riastrad /*
728 1.1 riastrad * aesarmv8_xts_dec1(const struct aesdec *deckey@x0, const uint8_t *in@x1,
729 1.1 riastrad * uint8_t *out@x2, size_t nbytes@x3, uint8_t tweak[16] @x4,
730 1.1 riastrad * uint32_t nrounds@x5)
731 1.1 riastrad *
732 1.4 riastrad * Decrypt a contiguous sequdece of blocks with AES-XTS.
733 1.1 riastrad *
734 1.1 riastrad * nbytes must be a positive integral multiple of 16. This routine
735 1.1 riastrad * is not vectorized; use aesarmv8_xts_dec8 for >=8 blocks at once.
736 1.1 riastrad *
737 1.1 riastrad * Standard ABI calling convention.
738 1.1 riastrad */
739 1.1 riastrad ENTRY(aesarmv8_xts_dec1)
740 1.1 riastrad stp fp, lr, [sp, #-16]! /* push stack frame */
741 1.1 riastrad mov fp, sp
742 1.1 riastrad mov x9, x0 /* x9 := deckey */
743 1.1 riastrad mov x10, x3 /* x10 := nbytes */
744 1.13 riastrad ld1 {v31.16b}, [x4] /* q31 := tweak */
745 1.9 riastrad _ALIGN_TEXT
746 1.13 riastrad 1: ld1 {v0.16b}, [x1], #0x10 /* q0 := ctxt */
747 1.1 riastrad mov x0, x9 /* x0 := deckey */
748 1.1 riastrad mov x3, x5 /* x3 := nrounds */
749 1.4 riastrad eor v0.16b, v0.16b, v31.16b /* q0 := ctxt ^ tweak */
750 1.4 riastrad bl aesarmv8_dec1 /* q0 := AES(...); trash x0/x3/q16 */
751 1.4 riastrad eor v0.16b, v0.16b, v31.16b /* q0 := AES(ctxt ^ tweak) ^ tweak */
752 1.13 riastrad st1 {v0.16b}, [x2], #0x10 /* store plaintext block */
753 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
754 1.1 riastrad subs x10, x10, #0x10 /* count down nbytes */
755 1.1 riastrad b.ne 1b /* repeat if more blocks */
756 1.13 riastrad st1 {v31.16b}, [x4] /* update tweak */
757 1.1 riastrad ldp fp, lr, [sp], #16 /* pop stack frame */
758 1.1 riastrad ret
759 1.1 riastrad END(aesarmv8_xts_dec1)
760 1.1 riastrad
761 1.1 riastrad /*
762 1.1 riastrad * aesarmv8_xts_dec8(const struct aesdec *deckey@x0, const uint8_t *in@x1,
763 1.1 riastrad * uint8_t *out@x2, size_t nbytes@x3, uint8_t tweak[16] @x4,
764 1.1 riastrad * uint32_t nrounds@x5)
765 1.1 riastrad *
766 1.4 riastrad * Decrypt a contiguous sequdece of blocks with AES-XTS.
767 1.1 riastrad *
768 1.1 riastrad * nbytes must be a positive integral multiple of 128.
769 1.1 riastrad *
770 1.1 riastrad * Standard ABI calling convention.
771 1.1 riastrad */
772 1.1 riastrad ENTRY(aesarmv8_xts_dec8)
773 1.4 riastrad stp fp, lr, [sp, #-16]! /* push stack frame */
774 1.1 riastrad mov fp, sp
775 1.1 riastrad mov x9, x0 /* x9 := deckey */
776 1.1 riastrad mov x10, x3 /* x10 := nbytes */
777 1.13 riastrad ld1 {v31.16b}, [x4] /* q31 := tweak */
778 1.9 riastrad _ALIGN_TEXT
779 1.4 riastrad 1: mov v24.16b, v31.16b /* q24 := tweak[0] */
780 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
781 1.4 riastrad mov v25.16b, v31.16b /* q25 := tweak[1] */
782 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
783 1.4 riastrad mov v26.16b, v31.16b /* q26 := tweak[2] */
784 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
785 1.4 riastrad mov v27.16b, v31.16b /* q27 := tweak[3] */
786 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
787 1.4 riastrad mov v28.16b, v31.16b /* q28 := tweak[4] */
788 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
789 1.4 riastrad mov v29.16b, v31.16b /* q29 := tweak[5] */
790 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
791 1.4 riastrad mov v30.16b, v31.16b /* q30 := tweak[6] */
792 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
793 1.4 riastrad /* q31 := tweak[7] */
794 1.13 riastrad ld1 {v0.16b-v3.16b}, [x1], #0x40 /* q[i] := ctxt[i] */
795 1.13 riastrad ld1 {v4.16b-v7.16b}, [x1], #0x40
796 1.4 riastrad eor v0.16b, v0.16b, v24.16b /* q[i] := ctxt[i] ^ tweak[i] */
797 1.4 riastrad eor v1.16b, v1.16b, v25.16b
798 1.4 riastrad eor v2.16b, v2.16b, v26.16b
799 1.4 riastrad eor v3.16b, v3.16b, v27.16b
800 1.4 riastrad eor v4.16b, v4.16b, v28.16b
801 1.4 riastrad eor v5.16b, v5.16b, v29.16b
802 1.4 riastrad eor v6.16b, v6.16b, v30.16b
803 1.4 riastrad eor v7.16b, v7.16b, v31.16b
804 1.1 riastrad mov x0, x9 /* x0 := deckey */
805 1.1 riastrad mov x3, x5 /* x3 := nrounds */
806 1.4 riastrad bl aesarmv8_dec8 /* decrypt q0-q7; trash x0/x3/q16 */
807 1.4 riastrad eor v0.16b, v0.16b, v24.16b /* q[i] := AES(...) ^ tweak[i] */
808 1.4 riastrad eor v1.16b, v1.16b, v25.16b
809 1.4 riastrad eor v2.16b, v2.16b, v26.16b
810 1.4 riastrad eor v3.16b, v3.16b, v27.16b
811 1.4 riastrad eor v4.16b, v4.16b, v28.16b
812 1.4 riastrad eor v5.16b, v5.16b, v29.16b
813 1.4 riastrad eor v6.16b, v6.16b, v30.16b
814 1.4 riastrad eor v7.16b, v7.16b, v31.16b
815 1.13 riastrad st1 {v0.16b-v3.16b}, [x2], #0x40 /* store plaintext blocks */
816 1.13 riastrad st1 {v4.16b-v7.16b}, [x2], #0x40
817 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
818 1.1 riastrad subs x10, x10, #0x80 /* count down nbytes */
819 1.1 riastrad b.ne 1b /* repeat if more block groups */
820 1.13 riastrad st1 {v31.16b}, [x4] /* update tweak */
821 1.4 riastrad ldp fp, lr, [sp], #16 /* pop stack frame */
822 1.1 riastrad ret
823 1.1 riastrad END(aesarmv8_xts_dec8)
824 1.1 riastrad
825 1.1 riastrad /*
826 1.4 riastrad * aesarmv8_xts_mulx(tweak@q31)
827 1.1 riastrad *
828 1.4 riastrad * Multiply q31 by x, modulo x^128 + x^7 + x^2 + x + 1, in place.
829 1.1 riastrad * Uses x0 and q0/q1 as temporaries.
830 1.1 riastrad */
831 1.1 riastrad .text
832 1.1 riastrad _ALIGN_TEXT
833 1.1 riastrad .type aesarmv8_xts_mulx,@function
834 1.1 riastrad aesarmv8_xts_mulx:
835 1.1 riastrad /*
836 1.1 riastrad * Simultaneously determine
837 1.1 riastrad * (a) whether the high bit of the low half must be
838 1.1 riastrad * shifted into the low bit of the high half, and
839 1.1 riastrad * (b) whether the high bit of the high half must be
840 1.1 riastrad * carried into x^128 = x^7 + x^2 + x + 1.
841 1.1 riastrad */
842 1.1 riastrad adrl x0, xtscarry
843 1.6 riastrad cmlt v1.2d, v31.2d, #0 /* v1.2d[i] := -1 if v31.2d[i] < 0, else 0 */
844 1.13 riastrad ld1 {v0.16b}, [x0] /* q0 := xtscarry */
845 1.1 riastrad ext v1.16b, v1.16b, v1.16b, #8 /* swap halves of q1 */
846 1.4 riastrad shl v31.2d, v31.2d, #1 /* shift */
847 1.1 riastrad and v0.16b, v0.16b, v1.16b /* copy xtscarry according to mask */
848 1.4 riastrad eor v31.16b, v31.16b, v0.16b /* incorporate (a) and (b) */
849 1.1 riastrad ret
850 1.1 riastrad END(aesarmv8_xts_mulx)
851 1.1 riastrad
852 1.1 riastrad .section .rodata
853 1.2 riastrad .p2align 4
854 1.1 riastrad .type xtscarry,@object
855 1.1 riastrad xtscarry:
856 1.1 riastrad .byte 0x87,0,0,0, 0,0,0,0, 1,0,0,0, 0,0,0,0
857 1.1 riastrad END(xtscarry)
858 1.1 riastrad
859 1.1 riastrad /*
860 1.1 riastrad * aesarmv8_xts_update(const uint8_t in[16] @x0, uint8_t out[16] @x1)
861 1.1 riastrad *
862 1.1 riastrad * Update an AES-XTS tweak.
863 1.1 riastrad *
864 1.1 riastrad * Standard ABI calling convention.
865 1.1 riastrad */
866 1.1 riastrad ENTRY(aesarmv8_xts_update)
867 1.1 riastrad stp fp, lr, [sp, #-16]! /* push stack frame */
868 1.1 riastrad mov fp, sp
869 1.13 riastrad ld1 {v31.16b}, [x0] /* load tweak */
870 1.4 riastrad bl aesarmv8_xts_mulx /* q31 *= x; trash x0/q0/q1 */
871 1.13 riastrad st1 {v31.16b}, [x1] /* store tweak */
872 1.1 riastrad ldp fp, lr, [sp], #16 /* pop stack frame */
873 1.1 riastrad ret
874 1.1 riastrad END(aesarmv8_xts_update)
875 1.1 riastrad
876 1.1 riastrad /*
877 1.8 riastrad * aesarmv8_cbcmac_update1(const struct aesenc *enckey@x0,
878 1.8 riastrad * const uint8_t *in@x1, size_t nbytes@x2, uint8_t auth[16] @x3,
879 1.8 riastrad * uint32_t nrounds@x4)
880 1.8 riastrad *
881 1.8 riastrad * Update CBC-MAC.
882 1.8 riastrad *
883 1.8 riastrad * nbytes must be a positive integral multiple of 16.
884 1.8 riastrad *
885 1.8 riastrad * Standard ABI calling convention.
886 1.8 riastrad */
887 1.8 riastrad ENTRY(aesarmv8_cbcmac_update1)
888 1.8 riastrad stp fp, lr, [sp, #-16]! /* push stack frame */
889 1.8 riastrad mov fp, sp
890 1.13 riastrad ld1 {v0.16b}, [x3] /* q0 := initial authenticator */
891 1.8 riastrad mov x9, x0 /* x9 := enckey */
892 1.8 riastrad mov x5, x3 /* x5 := &auth (enc1 trashes x3) */
893 1.9 riastrad _ALIGN_TEXT
894 1.13 riastrad 1: ld1 {v1.16b}, [x1], #0x10 /* q1 := plaintext block */
895 1.8 riastrad mov x0, x9 /* x0 := enckey */
896 1.8 riastrad mov x3, x4 /* x3 := nrounds */
897 1.8 riastrad eor v0.16b, v0.16b, v1.16b /* q0 := auth ^ ptxt */
898 1.8 riastrad bl aesarmv8_enc1 /* q0 := auth'; trash x0/x3/q16 */
899 1.8 riastrad subs x2, x2, #0x10 /* count down nbytes */
900 1.8 riastrad b.ne 1b /* repeat if x10 is nonzero */
901 1.13 riastrad st1 {v0.16b}, [x5] /* store updated authenticator */
902 1.8 riastrad ldp fp, lr, [sp], #16 /* pop stack frame */
903 1.8 riastrad ret
904 1.8 riastrad END(aesarmv8_cbcmac_update1)
905 1.8 riastrad
906 1.8 riastrad /*
907 1.8 riastrad * aesarmv8_ccm_enc1(const struct aesenc *enckey@x0, const uint8_t *in@x1,
908 1.8 riastrad * uint8_t *out@x2, size_t nbytes@x3, uint8_t authctr[32] @x4,
909 1.8 riastrad * uint32_t nrounds@x5)
910 1.8 riastrad *
911 1.8 riastrad * Update CCM encryption.
912 1.8 riastrad *
913 1.8 riastrad * nbytes must be a positive integral multiple of 16.
914 1.8 riastrad *
915 1.8 riastrad * Standard ABI calling convention.
916 1.8 riastrad */
917 1.8 riastrad ENTRY(aesarmv8_ccm_enc1)
918 1.8 riastrad stp fp, lr, [sp, #-16]! /* push stack frame */
919 1.8 riastrad mov fp, sp
920 1.13 riastrad ld1 {v0.16b, v1.16b}, [x4] /* q0 := auth, q2 := ctr (be) */
921 1.13 riastrad mov v2.16b, v1.16b
922 1.8 riastrad adrl x11, ctr32_inc /* x11 := &ctr32_inc */
923 1.8 riastrad ld1 {v5.4s}, [x11] /* q5 := (0,0,0,1) (host-endian) */
924 1.8 riastrad mov x9, x0 /* x9 := enckey */
925 1.8 riastrad mov x10, x3 /* x10 := nbytes */
926 1.8 riastrad rev32 v2.16b, v2.16b /* q2 := ctr (host-endian) */
927 1.9 riastrad _ALIGN_TEXT
928 1.13 riastrad 1: ld1 {v3.16b}, [x1], #0x10 /* q3 := plaintext block */
929 1.8 riastrad add v2.4s, v2.4s, v5.4s /* increment ctr (32-bit) */
930 1.8 riastrad mov x0, x9 /* x0 := enckey */
931 1.8 riastrad mov x3, x5 /* x3 := nrounds */
932 1.8 riastrad rev32 v1.16b, v2.16b /* q1 := ctr (big-endian) */
933 1.8 riastrad eor v0.16b, v0.16b, v3.16b /* q0 := auth ^ ptxt */
934 1.8 riastrad bl aesarmv8_enc2 /* q0 := auth', q1 := pad;
935 1.8 riastrad * trash x0/x3/q16 */
936 1.8 riastrad eor v3.16b, v1.16b, v3.16b /* q3 := ciphertext block */
937 1.8 riastrad subs x10, x10, #0x10 /* count down bytes */
938 1.13 riastrad st1 {v3.16b}, [x2], #0x10 /* store ciphertext block */
939 1.8 riastrad b.ne 1b /* repeat if more blocks */
940 1.8 riastrad rev32 v2.16b, v2.16b /* q2 := ctr (big-endian) */
941 1.13 riastrad mov v1.16b, v2.16b /* store updated auth/ctr */
942 1.13 riastrad st1 {v0.16b-v1.16b}, [x4]
943 1.8 riastrad ldp fp, lr, [sp], #16 /* pop stack frame */
944 1.8 riastrad ret
945 1.8 riastrad END(aesarmv8_ccm_enc1)
946 1.8 riastrad
947 1.8 riastrad /*
948 1.8 riastrad * aesarmv8_ccm_dec1(const struct aesenc *enckey@x0, const uint8_t *in@x1,
949 1.8 riastrad * uint8_t *out@x2, size_t nbytes@x3, uint8_t authctr[32] @x4,
950 1.8 riastrad * uint32_t nrounds@x5)
951 1.8 riastrad *
952 1.8 riastrad * Update CCM decryption.
953 1.8 riastrad *
954 1.8 riastrad * nbytes must be a positive integral multiple of 16.
955 1.8 riastrad *
956 1.8 riastrad * Standard ABI calling convention.
957 1.8 riastrad */
958 1.8 riastrad ENTRY(aesarmv8_ccm_dec1)
959 1.8 riastrad stp fp, lr, [sp, #-16]! /* push stack frame */
960 1.8 riastrad mov fp, sp
961 1.13 riastrad ld1 {v1.16b, v2.16b}, [x4] /* q1 := auth, q2 := ctr (be) */
962 1.8 riastrad adrl x11, ctr32_inc /* x11 := &ctr32_inc */
963 1.8 riastrad ld1 {v5.4s}, [x11] /* q5 := (0,0,0,1) (host-endian) */
964 1.8 riastrad mov x9, x0 /* x9 := enckey */
965 1.8 riastrad mov x10, x3 /* x10 := nbytes */
966 1.8 riastrad rev32 v2.16b, v2.16b /* q2 := ctr (host-endian) */
967 1.8 riastrad
968 1.8 riastrad /* Decrypt the first block. */
969 1.8 riastrad add v2.4s, v2.4s, v5.4s /* increment ctr (32-bit) */
970 1.8 riastrad mov x3, x5 /* x3 := nrounds */
971 1.8 riastrad rev32 v0.16b, v2.16b /* q0 := ctr (big-endian) */
972 1.13 riastrad ld1 {v3.16b}, [x1], #0x10 /* q3 := ctxt */
973 1.8 riastrad bl aesarmv8_enc1 /* q0 := pad; trash x0/x3/q16 */
974 1.8 riastrad b 2f
975 1.8 riastrad
976 1.9 riastrad _ALIGN_TEXT
977 1.8 riastrad 1: /*
978 1.8 riastrad * Authenticate the last block and decrypt the next block
979 1.8 riastrad * simultaneously.
980 1.8 riastrad *
981 1.8 riastrad * q1 = auth ^ ptxt[-1]
982 1.8 riastrad * q2 = ctr[-1] (le)
983 1.8 riastrad */
984 1.8 riastrad add v2.4s, v2.4s, v5.4s /* increment ctr (32-bit) */
985 1.8 riastrad mov x0, x9 /* x0 := enckey */
986 1.8 riastrad mov x3, x5 /* x3 := nrounds */
987 1.8 riastrad rev32 v0.16b, v2.16b /* q0 := ctr (big-endian) */
988 1.13 riastrad ld1 {v3.16b}, [x1], #0x10 /* q3 := ctxt */
989 1.8 riastrad bl aesarmv8_enc2 /* q0 := pad, q1 := auth';
990 1.8 riastrad * trash x0/x3/q16 */
991 1.8 riastrad 2: eor v3.16b, v0.16b, v3.16b /* q3 := plaintext block */
992 1.8 riastrad subs x10, x10, #0x10
993 1.13 riastrad st1 {v3.16b}, [x2], #0x10 /* store plaintext */
994 1.8 riastrad eor v1.16b, v1.16b, v3.16b /* q1 := auth ^ ptxt */
995 1.8 riastrad b.ne 1b
996 1.8 riastrad
997 1.8 riastrad rev32 v2.16b, v2.16b /* q2 := ctr (big-endian) */
998 1.8 riastrad
999 1.8 riastrad /* Authenticate the last block. */
1000 1.8 riastrad mov x0, x9 /* x0 := enckey */
1001 1.8 riastrad mov x3, x5 /* x3 := nrounds */
1002 1.8 riastrad mov v0.16b, v1.16b /* q0 := auth ^ ptxt */
1003 1.8 riastrad bl aesarmv8_enc1 /* q0 := auth'; trash x0/x3/q16 */
1004 1.12 riastrad
1005 1.13 riastrad mov v1.16b, v2.16b /* store updated auth/ctr */
1006 1.13 riastrad st1 {v0.16b-v1.16b}, [x4]
1007 1.8 riastrad ldp fp, lr, [sp], #16 /* pop stack frame */
1008 1.8 riastrad ret
1009 1.8 riastrad END(aesarmv8_ccm_dec1)
1010 1.8 riastrad
1011 1.8 riastrad .section .rodata
1012 1.8 riastrad .p2align 4
1013 1.8 riastrad .type ctr32_inc,@object
1014 1.8 riastrad ctr32_inc:
1015 1.8 riastrad .int 0, 0, 0, 1
1016 1.8 riastrad END(ctr32_inc)
1017 1.8 riastrad
1018 1.8 riastrad /*
1019 1.1 riastrad * aesarmv8_enc1(const struct aesenc *enckey@x0,
1020 1.1 riastrad * uint128_t block@q0, uint32_t nrounds@x3)
1021 1.1 riastrad *
1022 1.1 riastrad * Encrypt a single AES block in q0.
1023 1.1 riastrad *
1024 1.4 riastrad * Internal ABI. Uses q16 as temporary. Destroys x0 and x3.
1025 1.1 riastrad */
1026 1.1 riastrad .text
1027 1.1 riastrad _ALIGN_TEXT
1028 1.1 riastrad .type aesarmv8_enc1,@function
1029 1.1 riastrad aesarmv8_enc1:
1030 1.4 riastrad ldr q16, [x0], #0x10 /* load round key */
1031 1.10 riastrad sub x3, x3, #1
1032 1.9 riastrad _ALIGN_TEXT
1033 1.10 riastrad 1: /* q0 := MixColumns(ShiftRows(SubBytes(AddRoundKey_q16(q0)))) */
1034 1.10 riastrad aese v0.16b, v16.16b
1035 1.7 riastrad aesmc v0.16b, v0.16b
1036 1.10 riastrad ldr q16, [x0], #0x10
1037 1.10 riastrad subs x3, x3, #1
1038 1.10 riastrad b.ne 1b
1039 1.4 riastrad /* q0 := ShiftRows(SubBytes(AddRoundKey_q16(q0))) */
1040 1.4 riastrad aese v0.16b, v16.16b
1041 1.10 riastrad ldr q16, [x0] /* load last round key */
1042 1.10 riastrad /* q0 := AddRoundKey_q16(q0) */
1043 1.7 riastrad eor v0.16b, v0.16b, v16.16b
1044 1.1 riastrad ret
1045 1.1 riastrad END(aesarmv8_enc1)
1046 1.1 riastrad
1047 1.1 riastrad /*
1048 1.8 riastrad * aesarmv8_enc2(const struct aesenc *enckey@x0,
1049 1.8 riastrad * uint128_t block@q0, uint128_t block@q1, uint32_t nrounds@x3)
1050 1.8 riastrad *
1051 1.8 riastrad * Encrypt two AES blocks in q0 and q1.
1052 1.8 riastrad *
1053 1.8 riastrad * Internal ABI. Uses q16 as temporary. Destroys x0 and x3.
1054 1.8 riastrad */
1055 1.8 riastrad .text
1056 1.8 riastrad _ALIGN_TEXT
1057 1.8 riastrad .type aesarmv8_enc2,@function
1058 1.8 riastrad aesarmv8_enc2:
1059 1.8 riastrad ldr q16, [x0], #0x10 /* load round key */
1060 1.10 riastrad sub x3, x3, #1
1061 1.9 riastrad _ALIGN_TEXT
1062 1.10 riastrad 1: /* q[i] := MixColumns(ShiftRows(SubBytes(AddRoundKey_q16(q[i])))) */
1063 1.10 riastrad aese v0.16b, v16.16b
1064 1.8 riastrad aesmc v0.16b, v0.16b
1065 1.10 riastrad aese v1.16b, v16.16b
1066 1.8 riastrad aesmc v1.16b, v1.16b
1067 1.10 riastrad ldr q16, [x0], #0x10 /* load next round key */
1068 1.10 riastrad subs x3, x3, #1
1069 1.10 riastrad b.ne 1b
1070 1.8 riastrad /* q[i] := ShiftRows(SubBytes(AddRoundKey_q16(q[i]))) */
1071 1.8 riastrad aese v0.16b, v16.16b
1072 1.8 riastrad aese v1.16b, v16.16b
1073 1.10 riastrad ldr q16, [x0] /* load last round key */
1074 1.10 riastrad /* q[i] := AddRoundKey_q16(q[i]) */
1075 1.8 riastrad eor v0.16b, v0.16b, v16.16b
1076 1.8 riastrad eor v1.16b, v1.16b, v16.16b
1077 1.8 riastrad ret
1078 1.8 riastrad END(aesarmv8_enc2)
1079 1.8 riastrad
1080 1.8 riastrad /*
1081 1.1 riastrad * aesarmv8_enc8(const struct aesenc *enckey@x0,
1082 1.1 riastrad * uint128_t block0@q0, ..., uint128_t block7@q7,
1083 1.1 riastrad * uint32_t nrounds@x3)
1084 1.1 riastrad *
1085 1.1 riastrad * Encrypt eight AES blocks in q0 through q7 in parallel.
1086 1.1 riastrad *
1087 1.4 riastrad * Internal ABI. Uses q16 as temporary. Destroys x0 and x3.
1088 1.1 riastrad */
1089 1.1 riastrad .text
1090 1.1 riastrad _ALIGN_TEXT
1091 1.1 riastrad .type aesarmv8_enc8,@function
1092 1.1 riastrad aesarmv8_enc8:
1093 1.4 riastrad ldr q16, [x0], #0x10 /* load round key */
1094 1.10 riastrad sub x3, x3, #1
1095 1.9 riastrad _ALIGN_TEXT
1096 1.10 riastrad 1: /* q[i] := MixColumns(ShiftRows(SubBytes(AddRoundKey_q16(q[i])))) */
1097 1.10 riastrad aese v0.16b, v16.16b
1098 1.7 riastrad aesmc v0.16b, v0.16b
1099 1.10 riastrad aese v1.16b, v16.16b
1100 1.7 riastrad aesmc v1.16b, v1.16b
1101 1.10 riastrad aese v2.16b, v16.16b
1102 1.7 riastrad aesmc v2.16b, v2.16b
1103 1.10 riastrad aese v3.16b, v16.16b
1104 1.7 riastrad aesmc v3.16b, v3.16b
1105 1.10 riastrad aese v4.16b, v16.16b
1106 1.7 riastrad aesmc v4.16b, v4.16b
1107 1.10 riastrad aese v5.16b, v16.16b
1108 1.7 riastrad aesmc v5.16b, v5.16b
1109 1.10 riastrad aese v6.16b, v16.16b
1110 1.7 riastrad aesmc v6.16b, v6.16b
1111 1.10 riastrad aese v7.16b, v16.16b
1112 1.7 riastrad aesmc v7.16b, v7.16b
1113 1.10 riastrad ldr q16, [x0], #0x10 /* load next round key */
1114 1.10 riastrad subs x3, x3, #1
1115 1.10 riastrad b.ne 1b
1116 1.4 riastrad /* q[i] := ShiftRows(SubBytes(AddRoundKey_q16(q[i]))) */
1117 1.4 riastrad aese v0.16b, v16.16b
1118 1.4 riastrad aese v1.16b, v16.16b
1119 1.4 riastrad aese v2.16b, v16.16b
1120 1.4 riastrad aese v3.16b, v16.16b
1121 1.4 riastrad aese v4.16b, v16.16b
1122 1.4 riastrad aese v5.16b, v16.16b
1123 1.4 riastrad aese v6.16b, v16.16b
1124 1.4 riastrad aese v7.16b, v16.16b
1125 1.10 riastrad ldr q16, [x0] /* load last round key */
1126 1.10 riastrad /* q[i] := AddRoundKey_q16(q[i]) */
1127 1.10 riastrad eor v0.16b, v0.16b, v16.16b
1128 1.4 riastrad eor v1.16b, v1.16b, v16.16b
1129 1.4 riastrad eor v2.16b, v2.16b, v16.16b
1130 1.4 riastrad eor v3.16b, v3.16b, v16.16b
1131 1.4 riastrad eor v4.16b, v4.16b, v16.16b
1132 1.4 riastrad eor v5.16b, v5.16b, v16.16b
1133 1.4 riastrad eor v6.16b, v6.16b, v16.16b
1134 1.4 riastrad eor v7.16b, v7.16b, v16.16b
1135 1.1 riastrad ret
1136 1.1 riastrad END(aesarmv8_enc8)
1137 1.1 riastrad
1138 1.1 riastrad /*
1139 1.1 riastrad * aesarmv8_dec1(const struct aesdec *deckey@x0,
1140 1.1 riastrad * uint128_t block@q0, uint32_t nrounds@x3)
1141 1.1 riastrad *
1142 1.1 riastrad * Decrypt a single AES block in q0.
1143 1.1 riastrad *
1144 1.4 riastrad * Internal ABI. Uses q16 as temporary. Destroys x0 and x3.
1145 1.1 riastrad */
1146 1.1 riastrad .text
1147 1.1 riastrad _ALIGN_TEXT
1148 1.1 riastrad .type aesarmv8_dec1,@function
1149 1.1 riastrad aesarmv8_dec1:
1150 1.4 riastrad ldr q16, [x0], #0x10 /* load round key */
1151 1.10 riastrad sub x3, x3, #1
1152 1.9 riastrad _ALIGN_TEXT
1153 1.10 riastrad 1: /* q0 := InSubBytes(InShiftRows(AddRoundKey_q16(q0))) */
1154 1.10 riastrad aesd v0.16b, v16.16b
1155 1.10 riastrad /* q0 := InMixColumns(q0) */
1156 1.7 riastrad aesimc v0.16b, v0.16b
1157 1.10 riastrad ldr q16, [x0], #0x10 /* load next round key */
1158 1.10 riastrad subs x3, x3, #1
1159 1.10 riastrad b.ne 1b
1160 1.4 riastrad /* q0 := InSubBytes(InShiftRows(AddRoundKey_q16(q0))) */
1161 1.4 riastrad aesd v0.16b, v16.16b
1162 1.10 riastrad ldr q16, [x0] /* load last round key */
1163 1.10 riastrad /* q0 := AddRoundKey_q16(q0) */
1164 1.7 riastrad eor v0.16b, v0.16b, v16.16b
1165 1.1 riastrad ret
1166 1.1 riastrad END(aesarmv8_dec1)
1167 1.1 riastrad
1168 1.1 riastrad /*
1169 1.1 riastrad * aesarmv8_dec8(const struct aesdec *deckey@x0,
1170 1.1 riastrad * uint128_t block0@q0, ..., uint128_t block7@q7,
1171 1.1 riastrad * uint32_t nrounds@x3)
1172 1.1 riastrad *
1173 1.1 riastrad * Decrypt eight AES blocks in q0 through q7 in parallel.
1174 1.1 riastrad *
1175 1.4 riastrad * Internal ABI. Uses q16 as temporary. Destroys x0 and x3.
1176 1.1 riastrad */
1177 1.1 riastrad .text
1178 1.1 riastrad _ALIGN_TEXT
1179 1.1 riastrad .type aesarmv8_dec8,@function
1180 1.1 riastrad aesarmv8_dec8:
1181 1.4 riastrad ldr q16, [x0], #0x10 /* load round key */
1182 1.10 riastrad sub x3, x3, #1
1183 1.9 riastrad _ALIGN_TEXT
1184 1.10 riastrad 1: /* q[i] := InSubBytes(InShiftRows(AddRoundKey_q16(q[i]))) */
1185 1.10 riastrad aesd v0.16b, v16.16b
1186 1.10 riastrad /* q[i] := InMixColumns(q[i]) */
1187 1.7 riastrad aesimc v0.16b, v0.16b
1188 1.10 riastrad aesd v1.16b, v16.16b
1189 1.7 riastrad aesimc v1.16b, v1.16b
1190 1.10 riastrad aesd v2.16b, v16.16b
1191 1.7 riastrad aesimc v2.16b, v2.16b
1192 1.10 riastrad aesd v3.16b, v16.16b
1193 1.7 riastrad aesimc v3.16b, v3.16b
1194 1.10 riastrad aesd v4.16b, v16.16b
1195 1.7 riastrad aesimc v4.16b, v4.16b
1196 1.10 riastrad aesd v5.16b, v16.16b
1197 1.7 riastrad aesimc v5.16b, v5.16b
1198 1.10 riastrad aesd v6.16b, v16.16b
1199 1.7 riastrad aesimc v6.16b, v6.16b
1200 1.10 riastrad aesd v7.16b, v16.16b
1201 1.7 riastrad aesimc v7.16b, v7.16b
1202 1.10 riastrad ldr q16, [x0], #0x10 /* load next round key */
1203 1.10 riastrad subs x3, x3, #1
1204 1.10 riastrad b.ne 1b
1205 1.4 riastrad /* q[i] := InSubBytes(InShiftRows(AddRoundKey_q16(q[i]))) */
1206 1.4 riastrad aesd v0.16b, v16.16b
1207 1.4 riastrad aesd v1.16b, v16.16b
1208 1.4 riastrad aesd v2.16b, v16.16b
1209 1.4 riastrad aesd v3.16b, v16.16b
1210 1.4 riastrad aesd v4.16b, v16.16b
1211 1.4 riastrad aesd v5.16b, v16.16b
1212 1.4 riastrad aesd v6.16b, v16.16b
1213 1.4 riastrad aesd v7.16b, v16.16b
1214 1.10 riastrad ldr q16, [x0] /* load last round key */
1215 1.10 riastrad /* q[i] := AddRoundKey_q16(q[i]) */
1216 1.10 riastrad eor v0.16b, v0.16b, v16.16b
1217 1.4 riastrad eor v1.16b, v1.16b, v16.16b
1218 1.4 riastrad eor v2.16b, v2.16b, v16.16b
1219 1.4 riastrad eor v3.16b, v3.16b, v16.16b
1220 1.4 riastrad eor v4.16b, v4.16b, v16.16b
1221 1.4 riastrad eor v5.16b, v5.16b, v16.16b
1222 1.4 riastrad eor v6.16b, v6.16b, v16.16b
1223 1.4 riastrad eor v7.16b, v7.16b, v16.16b
1224 1.1 riastrad ret
1225 1.1 riastrad END(aesarmv8_dec8)
1226