aesfx-sparcv9.S revision 1.1 1 #include "sparc_arch.h"
2
3 #define LOCALS (STACK_BIAS+STACK_FRAME)
4
5 .text
6
7 .globl aes_fx_encrypt
8 .align 32
9 aes_fx_encrypt:
10 and %o0, 7, %o4 ! is input aligned?
11 andn %o0, 7, %o0
12 ldd [%o2 + 0], %f6 ! round[0]
13 ldd [%o2 + 8], %f8
14 mov %o7, %g1
15 ld [%o2 + 240], %o3
16
17 1: call .+8
18 add %o7, .Linp_align-1b, %o7
19
20 sll %o4, 3, %o4
21 ldd [%o0 + 0], %f0 ! load input
22 brz,pt %o4, .Lenc_inp_aligned
23 ldd [%o0 + 8], %f2
24
25 ldd [%o7 + %o4], %f14 ! shift left params
26 ldd [%o0 + 16], %f4
27 .word 0x81b81d62 !fshiftorx %f0,%f2,%f14,%f0
28 .word 0x85b89d64 !fshiftorx %f2,%f4,%f14,%f2
29
30 .Lenc_inp_aligned:
31 ldd [%o2 + 16], %f10 ! round[1]
32 ldd [%o2 + 24], %f12
33
34 .word 0x81b00d86 !fxor %f0,%f6,%f0 ! ^=round[0]
35 .word 0x85b08d88 !fxor %f2,%f8,%f2
36 ldd [%o2 + 32], %f6 ! round[2]
37 ldd [%o2 + 40], %f8
38 add %o2, 32, %o2
39 sub %o3, 4, %o3
40
41 .Loop_enc:
42 fmovd %f0, %f4
43 .word 0x81b0920a !faesencx %f2,%f10,%f0
44 .word 0x85b1120c !faesencx %f4,%f12,%f2
45 ldd [%o2 + 16], %f10
46 ldd [%o2 + 24], %f12
47 add %o2, 32, %o2
48
49 fmovd %f0, %f4
50 .word 0x81b09206 !faesencx %f2,%f6,%f0
51 .word 0x85b11208 !faesencx %f4,%f8,%f2
52 ldd [%o2 + 0], %f6
53 ldd [%o2 + 8], %f8
54
55 brnz,a %o3, .Loop_enc
56 sub %o3, 2, %o3
57
58 andcc %o1, 7, %o4 ! is output aligned?
59 andn %o1, 7, %o1
60 mov 0xff, %o5
61 srl %o5, %o4, %o5
62 add %o7, 64, %o7
63 sll %o4, 3, %o4
64
65 fmovd %f0, %f4
66 .word 0x81b0920a !faesencx %f2,%f10,%f0
67 .word 0x85b1120c !faesencx %f4,%f12,%f2
68 ldd [%o7 + %o4], %f14 ! shift right params
69
70 fmovd %f0, %f4
71 .word 0x81b09246 !faesenclx %f2,%f6,%f0
72 .word 0x85b11248 !faesenclx %f4,%f8,%f2
73
74 bnz,pn %icc, .Lenc_out_unaligned
75 mov %g1, %o7
76
77 std %f0, [%o1 + 0]
78 retl
79 std %f2, [%o1 + 8]
80
81 .align 16
82 .Lenc_out_unaligned:
83 add %o1, 16, %o0
84 orn %g0, %o5, %o4
85 .word 0x89b81d60 !fshiftorx %f0,%f0,%f14,%f4
86 .word 0x8db81d62 !fshiftorx %f0,%f2,%f14,%f6
87 .word 0x91b89d62 !fshiftorx %f2,%f2,%f14,%f8
88
89 stda %f4, [%o1 + %o5]0xc0 ! partial store
90 std %f6, [%o1 + 8]
91 stda %f8, [%o0 + %o4]0xc0 ! partial store
92 retl
93 nop
94 .type aes_fx_encrypt,#function
95 .size aes_fx_encrypt,.-aes_fx_encrypt
96
97 .globl aes_fx_decrypt
98 .align 32
99 aes_fx_decrypt:
100 and %o0, 7, %o4 ! is input aligned?
101 andn %o0, 7, %o0
102 ldd [%o2 + 0], %f6 ! round[0]
103 ldd [%o2 + 8], %f8
104 mov %o7, %g1
105 ld [%o2 + 240], %o3
106
107 1: call .+8
108 add %o7, .Linp_align-1b, %o7
109
110 sll %o4, 3, %o4
111 ldd [%o0 + 0], %f0 ! load input
112 brz,pt %o4, .Ldec_inp_aligned
113 ldd [%o0 + 8], %f2
114
115 ldd [%o7 + %o4], %f14 ! shift left params
116 ldd [%o0 + 16], %f4
117 .word 0x81b81d62 !fshiftorx %f0,%f2,%f14,%f0
118 .word 0x85b89d64 !fshiftorx %f2,%f4,%f14,%f2
119
120 .Ldec_inp_aligned:
121 ldd [%o2 + 16], %f10 ! round[1]
122 ldd [%o2 + 24], %f12
123
124 .word 0x81b00d86 !fxor %f0,%f6,%f0 ! ^=round[0]
125 .word 0x85b08d88 !fxor %f2,%f8,%f2
126 ldd [%o2 + 32], %f6 ! round[2]
127 ldd [%o2 + 40], %f8
128 add %o2, 32, %o2
129 sub %o3, 4, %o3
130
131 .Loop_dec:
132 fmovd %f0, %f4
133 .word 0x81b0922a !faesdecx %f2,%f10,%f0
134 .word 0x85b1122c !faesdecx %f4,%f12,%f2
135 ldd [%o2 + 16], %f10
136 ldd [%o2 + 24], %f12
137 add %o2, 32, %o2
138
139 fmovd %f0, %f4
140 .word 0x81b09226 !faesdecx %f2,%f6,%f0
141 .word 0x85b11228 !faesdecx %f4,%f8,%f2
142 ldd [%o2 + 0], %f6
143 ldd [%o2 + 8], %f8
144
145 brnz,a %o3, .Loop_dec
146 sub %o3, 2, %o3
147
148 andcc %o1, 7, %o4 ! is output aligned?
149 andn %o1, 7, %o1
150 mov 0xff, %o5
151 srl %o5, %o4, %o5
152 add %o7, 64, %o7
153 sll %o4, 3, %o4
154
155 fmovd %f0, %f4
156 .word 0x81b0922a !faesdecx %f2,%f10,%f0
157 .word 0x85b1122c !faesdecx %f4,%f12,%f2
158 ldd [%o7 + %o4], %f14 ! shift right params
159
160 fmovd %f0, %f4
161 .word 0x81b09266 !faesdeclx %f2,%f6,%f0
162 .word 0x85b11268 !faesdeclx %f4,%f8,%f2
163
164 bnz,pn %icc, .Ldec_out_unaligned
165 mov %g1, %o7
166
167 std %f0, [%o1 + 0]
168 retl
169 std %f2, [%o1 + 8]
170
171 .align 16
172 .Ldec_out_unaligned:
173 add %o1, 16, %o0
174 orn %g0, %o5, %o4
175 .word 0x89b81d60 !fshiftorx %f0,%f0,%f14,%f4
176 .word 0x8db81d62 !fshiftorx %f0,%f2,%f14,%f6
177 .word 0x91b89d62 !fshiftorx %f2,%f2,%f14,%f8
178
179 stda %f4, [%o1 + %o5]0xc0 ! partial store
180 std %f6, [%o1 + 8]
181 stda %f8, [%o0 + %o4]0xc0 ! partial store
182 retl
183 nop
184 .type aes_fx_decrypt,#function
185 .size aes_fx_decrypt,.-aes_fx_decrypt
186 .globl aes_fx_set_decrypt_key
187 .align 32
188 aes_fx_set_decrypt_key:
189 b .Lset_encrypt_key
190 mov -1, %o4
191 retl
192 nop
193 .type aes_fx_set_decrypt_key,#function
194 .size aes_fx_set_decrypt_key,.-aes_fx_set_decrypt_key
195
196 .globl aes_fx_set_encrypt_key
197 .align 32
198 aes_fx_set_encrypt_key:
199 mov 1, %o4
200 nop
201 .Lset_encrypt_key:
202 and %o0, 7, %o3
203 andn %o0, 7, %o0
204 sll %o3, 3, %o3
205 mov %o7, %g1
206
207 1: call .+8
208 add %o7, .Linp_align-1b, %o7
209
210 ldd [%o7 + %o3], %f10 ! shift left params
211 mov %g1, %o7
212
213 cmp %o1, 192
214 ldd [%o0 + 0], %f0
215 bl,pt %icc, .L128
216 ldd [%o0 + 8], %f2
217
218 be,pt %icc, .L192
219 ldd [%o0 + 16], %f4
220 brz,pt %o3, .L256aligned
221 ldd [%o0 + 24], %f6
222
223 ldd [%o0 + 32], %f8
224 .word 0x81b81562 !fshiftorx %f0,%f2,%f10,%f0
225 .word 0x85b89564 !fshiftorx %f2,%f4,%f10,%f2
226 .word 0x89b91566 !fshiftorx %f4,%f6,%f10,%f4
227 .word 0x8db99568 !fshiftorx %f6,%f8,%f10,%f6
228
229 .L256aligned:
230 mov 14, %o1
231 and %o4, 224, %o3
232 st %o1, [%o2 + 240] ! store rounds
233 add %o2, %o3, %o2 ! start or end of key schedule
234 sllx %o4, 4, %o4 ! 16 or -16
235 std %f0, [%o2 + 0]
236 .word 0x81b19290 !faeskeyx %f6,16,%f0
237 std %f2, [%o2 + 8]
238 add %o2, %o4, %o2
239 .word 0x85b01280 !faeskeyx %f0,0x00,%f2
240 std %f4, [%o2 + 0]
241 .word 0x89b09281 !faeskeyx %f2,0x01,%f4
242 std %f6, [%o2 + 8]
243 add %o2, %o4, %o2
244 .word 0x8db11280 !faeskeyx %f4,0x00,%f6
245 std %f0, [%o2 + 0]
246 .word 0x81b19291 !faeskeyx %f6,17,%f0
247 std %f2, [%o2 + 8]
248 add %o2, %o4, %o2
249 .word 0x85b01280 !faeskeyx %f0,0x00,%f2
250 std %f4, [%o2 + 0]
251 .word 0x89b09281 !faeskeyx %f2,0x01,%f4
252 std %f6, [%o2 + 8]
253 add %o2, %o4, %o2
254 .word 0x8db11280 !faeskeyx %f4,0x00,%f6
255 std %f0, [%o2 + 0]
256 .word 0x81b19292 !faeskeyx %f6,18,%f0
257 std %f2, [%o2 + 8]
258 add %o2, %o4, %o2
259 .word 0x85b01280 !faeskeyx %f0,0x00,%f2
260 std %f4, [%o2 + 0]
261 .word 0x89b09281 !faeskeyx %f2,0x01,%f4
262 std %f6, [%o2 + 8]
263 add %o2, %o4, %o2
264 .word 0x8db11280 !faeskeyx %f4,0x00,%f6
265 std %f0, [%o2 + 0]
266 .word 0x81b19293 !faeskeyx %f6,19,%f0
267 std %f2, [%o2 + 8]
268 add %o2, %o4, %o2
269 .word 0x85b01280 !faeskeyx %f0,0x00,%f2
270 std %f4, [%o2 + 0]
271 .word 0x89b09281 !faeskeyx %f2,0x01,%f4
272 std %f6, [%o2 + 8]
273 add %o2, %o4, %o2
274 .word 0x8db11280 !faeskeyx %f4,0x00,%f6
275 std %f0, [%o2 + 0]
276 .word 0x81b19294 !faeskeyx %f6,20,%f0
277 std %f2, [%o2 + 8]
278 add %o2, %o4, %o2
279 .word 0x85b01280 !faeskeyx %f0,0x00,%f2
280 std %f4, [%o2 + 0]
281 .word 0x89b09281 !faeskeyx %f2,0x01,%f4
282 std %f6, [%o2 + 8]
283 add %o2, %o4, %o2
284 .word 0x8db11280 !faeskeyx %f4,0x00,%f6
285 std %f0, [%o2 + 0]
286 .word 0x81b19295 !faeskeyx %f6,21,%f0
287 std %f2, [%o2 + 8]
288 add %o2, %o4, %o2
289 .word 0x85b01280 !faeskeyx %f0,0x00,%f2
290 std %f4, [%o2 + 0]
291 .word 0x89b09281 !faeskeyx %f2,0x01,%f4
292 std %f6, [%o2 + 8]
293 add %o2, %o4, %o2
294 .word 0x8db11280 !faeskeyx %f4,0x00,%f6
295 std %f0, [%o2 + 0]
296 .word 0x81b19296 !faeskeyx %f6,22,%f0
297 std %f2, [%o2 + 8]
298 add %o2, %o4, %o2
299 .word 0x85b01280 !faeskeyx %f0,0x00,%f2
300 std %f4,[%o2 + 0]
301 std %f6,[%o2 + 8]
302 add %o2, %o4, %o2
303 std %f0,[%o2 + 0]
304 std %f2,[%o2 + 8]
305 retl
306 xor %o0, %o0, %o0 ! return 0
307
308 .align 16
309 .L192:
310 brz,pt %o3, .L192aligned
311 nop
312
313 ldd [%o0 + 24], %f6
314 .word 0x81b81562 !fshiftorx %f0,%f2,%f10,%f0
315 .word 0x85b89564 !fshiftorx %f2,%f4,%f10,%f2
316 .word 0x89b91566 !fshiftorx %f4,%f6,%f10,%f4
317
318 .L192aligned:
319 mov 12, %o1
320 and %o4, 192, %o3
321 st %o1, [%o2 + 240] ! store rounds
322 add %o2, %o3, %o2 ! start or end of key schedule
323 sllx %o4, 4, %o4 ! 16 or -16
324 std %f0, [%o2 + 0]
325 .word 0x81b11290 !faeskeyx %f4,16,%f0
326 std %f2, [%o2 + 8]
327 add %o2, %o4, %o2
328 .word 0x85b01280 !faeskeyx %f0,0x00,%f2
329 std %f4, [%o2 + 0]
330 .word 0x89b09280 !faeskeyx %f2,0x00,%f4
331 std %f0, [%o2 + 8]
332 add %o2, %o4, %o2
333 .word 0x81b11291 !faeskeyx %f4,17,%f0
334 std %f2, [%o2 + 0]
335 .word 0x85b01280 !faeskeyx %f0,0x00,%f2
336 std %f4, [%o2 + 8]
337 add %o2, %o4, %o2
338 .word 0x89b09280 !faeskeyx %f2,0x00,%f4
339 std %f0, [%o2 + 0]
340 .word 0x81b11292 !faeskeyx %f4,18,%f0
341 std %f2, [%o2 + 8]
342 add %o2, %o4, %o2
343 .word 0x85b01280 !faeskeyx %f0,0x00,%f2
344 std %f4, [%o2 + 0]
345 .word 0x89b09280 !faeskeyx %f2,0x00,%f4
346 std %f0, [%o2 + 8]
347 add %o2, %o4, %o2
348 .word 0x81b11293 !faeskeyx %f4,19,%f0
349 std %f2, [%o2 + 0]
350 .word 0x85b01280 !faeskeyx %f0,0x00,%f2
351 std %f4, [%o2 + 8]
352 add %o2, %o4, %o2
353 .word 0x89b09280 !faeskeyx %f2,0x00,%f4
354 std %f0, [%o2 + 0]
355 .word 0x81b11294 !faeskeyx %f4,20,%f0
356 std %f2, [%o2 + 8]
357 add %o2, %o4, %o2
358 .word 0x85b01280 !faeskeyx %f0,0x00,%f2
359 std %f4, [%o2 + 0]
360 .word 0x89b09280 !faeskeyx %f2,0x00,%f4
361 std %f0, [%o2 + 8]
362 add %o2, %o4, %o2
363 .word 0x81b11295 !faeskeyx %f4,21,%f0
364 std %f2, [%o2 + 0]
365 .word 0x85b01280 !faeskeyx %f0,0x00,%f2
366 std %f4, [%o2 + 8]
367 add %o2, %o4, %o2
368 .word 0x89b09280 !faeskeyx %f2,0x00,%f4
369 std %f0, [%o2 + 0]
370 .word 0x81b11296 !faeskeyx %f4,22,%f0
371 std %f2, [%o2 + 8]
372 add %o2, %o4, %o2
373 .word 0x85b01280 !faeskeyx %f0,0x00,%f2
374 std %f4, [%o2 + 0]
375 .word 0x89b09280 !faeskeyx %f2,0x00,%f4
376 std %f0, [%o2 + 8]
377 add %o2, %o4, %o2
378 .word 0x81b11297 !faeskeyx %f4,23,%f0
379 std %f2, [%o2 + 0]
380 .word 0x85b01280 !faeskeyx %f0,0x00,%f2
381 std %f4, [%o2 + 8]
382 add %o2, %o4, %o2
383 std %f0, [%o2 + 0]
384 std %f2, [%o2 + 8]
385 retl
386 xor %o0, %o0, %o0 ! return 0
387
388 .align 16
389 .L128:
390 brz,pt %o3, .L128aligned
391 nop
392
393 ldd [%o0 + 16], %f4
394 .word 0x81b81562 !fshiftorx %f0,%f2,%f10,%f0
395 .word 0x85b89564 !fshiftorx %f2,%f4,%f10,%f2
396
397 .L128aligned:
398 mov 10, %o1
399 and %o4, 160, %o3
400 st %o1, [%o2 + 240] ! store rounds
401 add %o2, %o3, %o2 ! start or end of key schedule
402 sllx %o4, 4, %o4 ! 16 or -16
403 std %f0, [%o2 + 0]
404 .word 0x81b09290 !faeskeyx %f2,16,%f0
405 std %f2, [%o2 + 8]
406 add %o2, %o4, %o2
407 .word 0x85b01280 !faeskeyx %f0,0x00,%f2
408 std %f0, [%o2 + 0]
409 .word 0x81b09291 !faeskeyx %f2,17,%f0
410 std %f2, [%o2 + 8]
411 add %o2, %o4, %o2
412 .word 0x85b01280 !faeskeyx %f0,0x00,%f2
413 std %f0, [%o2 + 0]
414 .word 0x81b09292 !faeskeyx %f2,18,%f0
415 std %f2, [%o2 + 8]
416 add %o2, %o4, %o2
417 .word 0x85b01280 !faeskeyx %f0,0x00,%f2
418 std %f0, [%o2 + 0]
419 .word 0x81b09293 !faeskeyx %f2,19,%f0
420 std %f2, [%o2 + 8]
421 add %o2, %o4, %o2
422 .word 0x85b01280 !faeskeyx %f0,0x00,%f2
423 std %f0, [%o2 + 0]
424 .word 0x81b09294 !faeskeyx %f2,20,%f0
425 std %f2, [%o2 + 8]
426 add %o2, %o4, %o2
427 .word 0x85b01280 !faeskeyx %f0,0x00,%f2
428 std %f0, [%o2 + 0]
429 .word 0x81b09295 !faeskeyx %f2,21,%f0
430 std %f2, [%o2 + 8]
431 add %o2, %o4, %o2
432 .word 0x85b01280 !faeskeyx %f0,0x00,%f2
433 std %f0, [%o2 + 0]
434 .word 0x81b09296 !faeskeyx %f2,22,%f0
435 std %f2, [%o2 + 8]
436 add %o2, %o4, %o2
437 .word 0x85b01280 !faeskeyx %f0,0x00,%f2
438 std %f0, [%o2 + 0]
439 .word 0x81b09297 !faeskeyx %f2,23,%f0
440 std %f2, [%o2 + 8]
441 add %o2, %o4, %o2
442 .word 0x85b01280 !faeskeyx %f0,0x00,%f2
443 std %f0, [%o2 + 0]
444 .word 0x81b09298 !faeskeyx %f2,24,%f0
445 std %f2, [%o2 + 8]
446 add %o2, %o4, %o2
447 .word 0x85b01280 !faeskeyx %f0,0x00,%f2
448 std %f0, [%o2 + 0]
449 .word 0x81b09299 !faeskeyx %f2,25,%f0
450 std %f2, [%o2 + 8]
451 add %o2, %o4, %o2
452 .word 0x85b01280 !faeskeyx %f0,0x00,%f2
453 std %f0, [%o2 + 0]
454 std %f2, [%o2 + 8]
455 retl
456 xor %o0, %o0, %o0 ! return 0
457 .type aes_fx_set_encrypt_key,#function
458 .size aes_fx_set_encrypt_key,.-aes_fx_set_encrypt_key
459 .globl aes_fx_cbc_encrypt
460 .align 32
461 aes_fx_cbc_encrypt:
462 save %sp, -STACK_FRAME-16, %sp
463 srln %i2, 4, %i2
464 and %i0, 7, %l4
465 andn %i0, 7, %i0
466 brz,pn %i2, .Lcbc_no_data
467 sll %l4, 3, %l4
468
469 1: call .+8
470 add %o7, .Linp_align-1b, %o7
471
472 ld [%i3 + 240], %l0
473 and %i1, 7, %l5
474 ld [%i4 + 0], %f0 ! load ivec
475 andn %i1, 7, %i1
476 ld [%i4 + 4], %f1
477 sll %l5, 3, %l6
478 ld [%i4 + 8], %f2
479 ld [%i4 + 12], %f3
480
481 sll %l0, 4, %l0
482 add %l0, %i3, %l2
483 ldd [%i3 + 0], %f20 ! round[0]
484 ldd [%i3 + 8], %f22
485
486 add %i0, 16, %i0
487 sub %i2, 1, %i2
488 ldd [%l2 + 0], %f24 ! round[last]
489 ldd [%l2 + 8], %f26
490
491 mov 16, %l3
492 movrz %i2, 0, %l3
493 ldd [%i3 + 16], %f10 ! round[1]
494 ldd [%i3 + 24], %f12
495
496 ldd [%o7 + %l4], %f36 ! shift left params
497 add %o7, 64, %o7
498 ldd [%i0 - 16], %f28 ! load input
499 ldd [%i0 - 8], %f30
500 ldda [%i0]0x82, %f32 ! non-faulting load
501 brz %i5, .Lcbc_decrypt
502 add %i0, %l3, %i0 ! inp+=16
503
504 .word 0x81b50d80 !fxor %f20,%f0,%f0 ! ivec^=round[0]
505 .word 0x85b58d82 !fxor %f22,%f2,%f2
506 .word 0xb9bf0b7e !fshiftorx %f28,%f30,%f36,%f28
507 .word 0xbdbf8b61 !fshiftorx %f30,%f32,%f36,%f30
508 nop
509
510 .Loop_cbc_enc:
511 .word 0x81b70d80 !fxor %f28,%f0,%f0 ! inp^ivec^round[0]
512 .word 0x85b78d82 !fxor %f30,%f2,%f2
513 ldd [%i3 + 32], %f6 ! round[2]
514 ldd [%i3 + 40], %f8
515 add %i3, 32, %l2
516 sub %l0, 16*6, %l1
517
518 .Lcbc_enc:
519 fmovd %f0, %f4
520 .word 0x81b0920a !faesencx %f2,%f10,%f0
521 .word 0x85b1120c !faesencx %f4,%f12,%f2
522 ldd [%l2 + 16], %f10
523 ldd [%l2 + 24], %f12
524 add %l2, 32, %l2
525
526 fmovd %f0, %f4
527 .word 0x81b09206 !faesencx %f2,%f6,%f0
528 .word 0x85b11208 !faesencx %f4,%f8,%f2
529 ldd [%l2 + 0], %f6
530 ldd [%l2 + 8], %f8
531
532 brnz,a %l1, .Lcbc_enc
533 sub %l1, 16*2, %l1
534
535 fmovd %f0, %f4
536 .word 0x81b0920a !faesencx %f2,%f10,%f0
537 .word 0x85b1120c !faesencx %f4,%f12,%f2
538 ldd [%l2 + 16], %f10 ! round[last-1]
539 ldd [%l2 + 24], %f12
540
541 movrz %i2, 0, %l3
542 fmovd %f32, %f28
543 ldd [%i0 - 8], %f30 ! load next input block
544 ldda [%i0]0x82, %f32 ! non-faulting load
545 add %i0, %l3, %i0 ! inp+=16
546
547 fmovd %f0, %f4
548 .word 0x81b09206 !faesencx %f2,%f6,%f0
549 .word 0x85b11208 !faesencx %f4,%f8,%f2
550
551 .word 0xb9bf0b7e !fshiftorx %f28,%f30,%f36,%f28
552 .word 0xbdbf8b61 !fshiftorx %f30,%f32,%f36,%f30
553
554 fmovd %f0, %f4
555 .word 0x81b0920a !faesencx %f2,%f10,%f0
556 .word 0x85b1120c !faesencx %f4,%f12,%f2
557 ldd [%i3 + 16], %f10 ! round[1]
558 ldd [%i3 + 24], %f12
559
560 .word 0xb9b50d9c !fxor %f20,%f28,%f28 ! inp^=round[0]
561 .word 0xbdb58d9e !fxor %f22,%f30,%f30
562
563 fmovd %f0, %f4
564 .word 0x81b09258 !faesenclx %f2,%f24,%f0
565 .word 0x85b1125a !faesenclx %f4,%f26,%f2
566
567 brnz,pn %l5, .Lcbc_enc_unaligned_out
568 nop
569
570 std %f0, [%i1 + 0]
571 std %f2, [%i1 + 8]
572 add %i1, 16, %i1
573
574 brnz,a %i2, .Loop_cbc_enc
575 sub %i2, 1, %i2
576
577 st %f0, [%i4 + 0] ! output ivec
578 st %f1, [%i4 + 4]
579 st %f2, [%i4 + 8]
580 st %f3, [%i4 + 12]
581
582 .Lcbc_no_data:
583 ret
584 restore
585
586 .align 32
587 .Lcbc_enc_unaligned_out:
588 ldd [%o7 + %l6], %f36 ! shift right params
589 mov 0xff, %l6
590 srl %l6, %l5, %l6
591 sub %g0, %l4, %l5
592
593 .word 0x8db80b60 !fshiftorx %f0,%f0,%f36,%f6
594 .word 0x91b80b62 !fshiftorx %f0,%f2,%f36,%f8
595
596 stda %f6, [%i1 + %l6]0xc0 ! partial store
597 orn %g0, %l6, %l6
598 std %f8, [%i1 + 8]
599 add %i1, 16, %i1
600 brz %i2, .Lcbc_enc_unaligned_out_done
601 sub %i2, 1, %i2
602 b .Loop_cbc_enc_unaligned_out
603 nop
604
605 .align 32
606 .Loop_cbc_enc_unaligned_out:
607 fmovd %f2, %f34
608 .word 0x81b70d80 !fxor %f28,%f0,%f0 ! inp^ivec^round[0]
609 .word 0x85b78d82 !fxor %f30,%f2,%f2
610 ldd [%i3 + 32], %f6 ! round[2]
611 ldd [%i3 + 40], %f8
612
613 fmovd %f0, %f4
614 .word 0x81b0920a !faesencx %f2,%f10,%f0
615 .word 0x85b1120c !faesencx %f4,%f12,%f2
616 ldd [%i3 + 48], %f10 ! round[3]
617 ldd [%i3 + 56], %f12
618
619 ldx [%i0 - 16], %o0
620 ldx [%i0 - 8], %o1
621 brz %l4, .Lcbc_enc_aligned_inp
622 movrz %i2, 0, %l3
623
624 ldx [%i0], %o2
625 sllx %o0, %l4, %o0
626 srlx %o1, %l5, %g1
627 sllx %o1, %l4, %o1
628 or %g1, %o0, %o0
629 srlx %o2, %l5, %o2
630 or %o2, %o1, %o1
631
632 .Lcbc_enc_aligned_inp:
633 fmovd %f0, %f4
634 .word 0x81b09206 !faesencx %f2,%f6,%f0
635 .word 0x85b11208 !faesencx %f4,%f8,%f2
636 ldd [%i3 + 64], %f6 ! round[4]
637 ldd [%i3 + 72], %f8
638 add %i3, 64, %l2
639 sub %l0, 16*8, %l1
640
641 stx %o0, [%sp + LOCALS + 0]
642 stx %o1, [%sp + LOCALS + 8]
643 add %i0, %l3, %i0 ! inp+=16
644 nop
645
646 .Lcbc_enc_unaligned:
647 fmovd %f0, %f4
648 .word 0x81b0920a !faesencx %f2,%f10,%f0
649 .word 0x85b1120c !faesencx %f4,%f12,%f2
650 ldd [%l2 + 16], %f10
651 ldd [%l2 + 24], %f12
652 add %l2, 32, %l2
653
654 fmovd %f0, %f4
655 .word 0x81b09206 !faesencx %f2,%f6,%f0
656 .word 0x85b11208 !faesencx %f4,%f8,%f2
657 ldd [%l2 + 0], %f6
658 ldd [%l2 + 8], %f8
659
660 brnz,a %l1, .Lcbc_enc_unaligned
661 sub %l1, 16*2, %l1
662
663 fmovd %f0, %f4
664 .word 0x81b0920a !faesencx %f2,%f10,%f0
665 .word 0x85b1120c !faesencx %f4,%f12,%f2
666 ldd [%l2 + 16], %f10 ! round[last-1]
667 ldd [%l2 + 24], %f12
668
669 fmovd %f0, %f4
670 .word 0x81b09206 !faesencx %f2,%f6,%f0
671 .word 0x85b11208 !faesencx %f4,%f8,%f2
672
673 ldd [%sp + LOCALS + 0], %f28
674 ldd [%sp + LOCALS + 8], %f30
675
676 fmovd %f0, %f4
677 .word 0x81b0920a !faesencx %f2,%f10,%f0
678 .word 0x85b1120c !faesencx %f4,%f12,%f2
679 ldd [%i3 + 16], %f10 ! round[1]
680 ldd [%i3 + 24], %f12
681
682 .word 0xb9b50d9c !fxor %f20,%f28,%f28 ! inp^=round[0]
683 .word 0xbdb58d9e !fxor %f22,%f30,%f30
684
685 fmovd %f0, %f4
686 .word 0x81b09258 !faesenclx %f2,%f24,%f0
687 .word 0x85b1125a !faesenclx %f4,%f26,%f2
688
689 .word 0x8db8cb60 !fshiftorx %f34,%f0,%f36,%f6
690 .word 0x91b80b62 !fshiftorx %f0,%f2,%f36,%f8
691 std %f6, [%i1 + 0]
692 std %f8, [%i1 + 8]
693 add %i1, 16, %i1
694
695 brnz,a %i2, .Loop_cbc_enc_unaligned_out
696 sub %i2, 1, %i2
697
698 .Lcbc_enc_unaligned_out_done:
699 .word 0x91b88b62 !fshiftorx %f2,%f2,%f36,%f8
700 stda %f8, [%i1 + %l6]0xc0 ! partial store
701
702 st %f0, [%i4 + 0] ! output ivec
703 st %f1, [%i4 + 4]
704 st %f2, [%i4 + 8]
705 st %f3, [%i4 + 12]
706
707 ret
708 restore
709
710 .align 32
711 .Lcbc_decrypt:
712 .word 0xb9bf0b7e !fshiftorx %f28,%f30,%f36,%f28
713 .word 0xbdbf8b61 !fshiftorx %f30,%f32,%f36,%f30
714 fmovd %f0, %f16
715 fmovd %f2, %f18
716
717 .Loop_cbc_dec:
718 .word 0x81b70d94 !fxor %f28,%f20,%f0 ! inp^round[0]
719 .word 0x85b78d96 !fxor %f30,%f22,%f2
720 ldd [%i3 + 32], %f6 ! round[2]
721 ldd [%i3 + 40], %f8
722 add %i3, 32, %l2
723 sub %l0, 16*6, %l1
724
725 .Lcbc_dec:
726 fmovd %f0, %f4
727 .word 0x81b0922a !faesdecx %f2,%f10,%f0
728 .word 0x85b1122c !faesdecx %f4,%f12,%f2
729 ldd [%l2 + 16], %f10
730 ldd [%l2 + 24], %f12
731 add %l2, 32, %l2
732
733 fmovd %f0, %f4
734 .word 0x81b09226 !faesdecx %f2,%f6,%f0
735 .word 0x85b11228 !faesdecx %f4,%f8,%f2
736 ldd [%l2 + 0], %f6
737 ldd [%l2 + 8], %f8
738
739 brnz,a %l1, .Lcbc_dec
740 sub %l1, 16*2, %l1
741
742 fmovd %f0, %f4
743 .word 0x81b0922a !faesdecx %f2,%f10,%f0
744 .word 0x85b1122c !faesdecx %f4,%f12,%f2
745 ldd [%l2 + 16], %f10 ! round[last-1]
746 ldd [%l2 + 24], %f12
747
748 fmovd %f0, %f4
749 .word 0x81b09226 !faesdecx %f2,%f6,%f0
750 .word 0x85b11228 !faesdecx %f4,%f8,%f2
751 .word 0x8db40d98 !fxor %f16,%f24,%f6 ! ivec^round[last]
752 .word 0x91b48d9a !fxor %f18,%f26,%f8
753 fmovd %f28, %f16
754 fmovd %f30, %f18
755
756 movrz %i2, 0, %l3
757 fmovd %f32, %f28
758 ldd [%i0 - 8], %f30 ! load next input block
759 ldda [%i0]0x82, %f32 ! non-faulting load
760 add %i0, %l3, %i0 ! inp+=16
761
762 fmovd %f0, %f4
763 .word 0x81b0922a !faesdecx %f2,%f10,%f0
764 .word 0x85b1122c !faesdecx %f4,%f12,%f2
765 ldd [%i3 + 16], %f10 ! round[1]
766 ldd [%i3 + 24], %f12
767
768 .word 0xb9bf0b7e !fshiftorx %f28,%f30,%f36,%f28
769 .word 0xbdbf8b61 !fshiftorx %f30,%f32,%f36,%f30
770
771 fmovd %f0, %f4
772 .word 0x81b09266 !faesdeclx %f2,%f6,%f0
773 .word 0x85b11268 !faesdeclx %f4,%f8,%f2
774
775 brnz,pn %l5, .Lcbc_dec_unaligned_out
776 nop
777
778 std %f0, [%i1 + 0]
779 std %f2, [%i1 + 8]
780 add %i1, 16, %i1
781
782 brnz,a %i2, .Loop_cbc_dec
783 sub %i2, 1, %i2
784
785 st %f16, [%i4 + 0] ! output ivec
786 st %f17, [%i4 + 4]
787 st %f18, [%i4 + 8]
788 st %f19, [%i4 + 12]
789
790 ret
791 restore
792
793 .align 32
794 .Lcbc_dec_unaligned_out:
795 ldd [%o7 + %l6], %f36 ! shift right params
796 mov 0xff, %l6
797 srl %l6, %l5, %l6
798 sub %g0, %l4, %l5
799
800 .word 0x8db80b60 !fshiftorx %f0,%f0,%f36,%f6
801 .word 0x91b80b62 !fshiftorx %f0,%f2,%f36,%f8
802
803 stda %f6, [%i1 + %l6]0xc0 ! partial store
804 orn %g0, %l6, %l6
805 std %f8, [%i1 + 8]
806 add %i1, 16, %i1
807 brz %i2, .Lcbc_dec_unaligned_out_done
808 sub %i2, 1, %i2
809 b .Loop_cbc_dec_unaligned_out
810 nop
811
812 .align 32
813 .Loop_cbc_dec_unaligned_out:
814 fmovd %f2, %f34
815 .word 0x81b70d94 !fxor %f28,%f20,%f0 ! inp^round[0]
816 .word 0x85b78d96 !fxor %f30,%f22,%f2
817 ldd [%i3 + 32], %f6 ! round[2]
818 ldd [%i3 + 40], %f8
819
820 fmovd %f0, %f4
821 .word 0x81b0922a !faesdecx %f2,%f10,%f0
822 .word 0x85b1122c !faesdecx %f4,%f12,%f2
823 ldd [%i3 + 48], %f10 ! round[3]
824 ldd [%i3 + 56], %f12
825
826 ldx [%i0 - 16], %o0
827 ldx [%i0 - 8], %o1
828 brz %l4, .Lcbc_dec_aligned_inp
829 movrz %i2, 0, %l3
830
831 ldx [%i0], %o2
832 sllx %o0, %l4, %o0
833 srlx %o1, %l5, %g1
834 sllx %o1, %l4, %o1
835 or %g1, %o0, %o0
836 srlx %o2, %l5, %o2
837 or %o2, %o1, %o1
838
839 .Lcbc_dec_aligned_inp:
840 fmovd %f0, %f4
841 .word 0x81b09226 !faesdecx %f2,%f6,%f0
842 .word 0x85b11228 !faesdecx %f4,%f8,%f2
843 ldd [%i3 + 64], %f6 ! round[4]
844 ldd [%i3 + 72], %f8
845 add %i3, 64, %l2
846 sub %l0, 16*8, %l1
847
848 stx %o0, [%sp + LOCALS + 0]
849 stx %o1, [%sp + LOCALS + 8]
850 add %i0, %l3, %i0 ! inp+=16
851 nop
852
853 .Lcbc_dec_unaligned:
854 fmovd %f0, %f4
855 .word 0x81b0922a !faesdecx %f2,%f10,%f0
856 .word 0x85b1122c !faesdecx %f4,%f12,%f2
857 ldd [%l2 + 16], %f10
858 ldd [%l2 + 24], %f12
859 add %l2, 32, %l2
860
861 fmovd %f0, %f4
862 .word 0x81b09226 !faesdecx %f2,%f6,%f0
863 .word 0x85b11228 !faesdecx %f4,%f8,%f2
864 ldd [%l2 + 0], %f6
865 ldd [%l2 + 8], %f8
866
867 brnz,a %l1, .Lcbc_dec_unaligned
868 sub %l1, 16*2, %l1
869
870 fmovd %f0, %f4
871 .word 0x81b0922a !faesdecx %f2,%f10,%f0
872 .word 0x85b1122c !faesdecx %f4,%f12,%f2
873 ldd [%l2 + 16], %f10 ! round[last-1]
874 ldd [%l2 + 24], %f12
875
876 fmovd %f0, %f4
877 .word 0x81b09226 !faesdecx %f2,%f6,%f0
878 .word 0x85b11228 !faesdecx %f4,%f8,%f2
879
880 .word 0x8db40d98 !fxor %f16,%f24,%f6 ! ivec^round[last]
881 .word 0x91b48d9a !fxor %f18,%f26,%f8
882 fmovd %f28, %f16
883 fmovd %f30, %f18
884 ldd [%sp + LOCALS + 0], %f28
885 ldd [%sp + LOCALS + 8], %f30
886
887 fmovd %f0, %f4
888 .word 0x81b0922a !faesdecx %f2,%f10,%f0
889 .word 0x85b1122c !faesdecx %f4,%f12,%f2
890 ldd [%i3 + 16], %f10 ! round[1]
891 ldd [%i3 + 24], %f12
892
893 fmovd %f0, %f4
894 .word 0x81b09266 !faesdeclx %f2,%f6,%f0
895 .word 0x85b11268 !faesdeclx %f4,%f8,%f2
896
897 .word 0x8db8cb60 !fshiftorx %f34,%f0,%f36,%f6
898 .word 0x91b80b62 !fshiftorx %f0,%f2,%f36,%f8
899 std %f6, [%i1 + 0]
900 std %f8, [%i1 + 8]
901 add %i1, 16, %i1
902
903 brnz,a %i2, .Loop_cbc_dec_unaligned_out
904 sub %i2, 1, %i2
905
906 .Lcbc_dec_unaligned_out_done:
907 .word 0x91b88b62 !fshiftorx %f2,%f2,%f36,%f8
908 stda %f8, [%i1 + %l6]0xc0 ! partial store
909
910 st %f16, [%i4 + 0] ! output ivec
911 st %f17, [%i4 + 4]
912 st %f18, [%i4 + 8]
913 st %f19, [%i4 + 12]
914
915 ret
916 restore
917 .type aes_fx_cbc_encrypt,#function
918 .size aes_fx_cbc_encrypt,.-aes_fx_cbc_encrypt
919 .globl aes_fx_ctr32_encrypt_blocks
920 .align 32
921 aes_fx_ctr32_encrypt_blocks:
922 save %sp, -STACK_FRAME-16, %sp
923 srln %i2, 0, %i2
924 and %i0, 7, %l4
925 andn %i0, 7, %i0
926 brz,pn %i2, .Lctr32_no_data
927 sll %l4, 3, %l4
928
929 .Lpic: call .+8
930 add %o7, .Linp_align - .Lpic, %o7
931
932 ld [%i3 + 240], %l0
933 and %i1, 7, %l5
934 ld [%i4 + 0], %f16 ! load counter
935 andn %i1, 7, %i1
936 ld [%i4 + 4], %f17
937 sll %l5, 3, %l6
938 ld [%i4 + 8], %f18
939 ld [%i4 + 12], %f19
940 ldd [%o7 + 128], %f14
941
942 sll %l0, 4, %l0
943 add %l0, %i3, %l2
944 ldd [%i3 + 0], %f20 ! round[0]
945 ldd [%i3 + 8], %f22
946
947 add %i0, 16, %i0
948 sub %i2, 1, %i2
949 ldd [%i3 + 16], %f10 ! round[1]
950 ldd [%i3 + 24], %f12
951
952 mov 16, %l3
953 movrz %i2, 0, %l3
954 ldd [%l2 + 0], %f24 ! round[last]
955 ldd [%l2 + 8], %f26
956
957 ldd [%o7 + %l4], %f36 ! shiftleft params
958 add %o7, 64, %o7
959 ldd [%i0 - 16], %f28 ! load input
960 ldd [%i0 - 8], %f30
961 ldda [%i0]0x82, %f32 ! non-faulting load
962 add %i0, %l3, %i0 ! inp+=16
963
964 .word 0xb9bf0b7e !fshiftorx %f28,%f30,%f36,%f28
965 .word 0xbdbf8b61 !fshiftorx %f30,%f32,%f36,%f30
966
967 .Loop_ctr32:
968 .word 0x81b40d94 !fxor %f16,%f20,%f0 ! counter^round[0]
969 .word 0x85b48d96 !fxor %f18,%f22,%f2
970 ldd [%i3 + 32], %f6 ! round[2]
971 ldd [%i3 + 40], %f8
972 add %i3, 32, %l2
973 sub %l0, 16*6, %l1
974
975 .Lctr32_enc:
976 fmovd %f0, %f4
977 .word 0x81b0920a !faesencx %f2,%f10,%f0
978 .word 0x85b1120c !faesencx %f4,%f12,%f2
979 ldd [%l2 + 16], %f10
980 ldd [%l2 + 24], %f12
981 add %l2, 32, %l2
982
983 fmovd %f0, %f4
984 .word 0x81b09206 !faesencx %f2,%f6,%f0
985 .word 0x85b11208 !faesencx %f4,%f8,%f2
986 ldd [%l2 + 0], %f6
987 ldd [%l2 + 8], %f8
988
989 brnz,a %l1, .Lctr32_enc
990 sub %l1, 16*2, %l1
991
992 fmovd %f0, %f4
993 .word 0x81b0920a !faesencx %f2,%f10,%f0
994 .word 0x85b1120c !faesencx %f4,%f12,%f2
995 ldd [%l2 + 16], %f10 ! round[last-1]
996 ldd [%l2 + 24], %f12
997
998 fmovd %f0, %f4
999 .word 0x81b09206 !faesencx %f2,%f6,%f0
1000 .word 0x85b11208 !faesencx %f4,%f8,%f2
1001 .word 0x8db70d98 !fxor %f28,%f24,%f6 ! inp^round[last]
1002 .word 0x91b78d9a !fxor %f30,%f26,%f8
1003
1004 movrz %i2, 0, %l3
1005 fmovd %f32, %f28
1006 ldd [%i0 - 8], %f30 ! load next input block
1007 ldda [%i0]0x82, %f32 ! non-faulting load
1008 add %i0, %l3, %i0 ! inp+=16
1009
1010 fmovd %f0, %f4
1011 .word 0x81b0920a !faesencx %f2,%f10,%f0
1012 .word 0x85b1120c !faesencx %f4,%f12,%f2
1013 ldd [%i3 + 16], %f10 ! round[1]
1014 ldd [%i3 + 24], %f12
1015
1016 .word 0xb9bf0b7e !fshiftorx %f28,%f30,%f36,%f28
1017 .word 0xbdbf8b61 !fshiftorx %f30,%f32,%f36,%f30
1018 .word 0xa5b48a4e !fpadd32 %f18,%f14,%f18 ! increment counter
1019
1020 fmovd %f0, %f4
1021 .word 0x81b09246 !faesenclx %f2,%f6,%f0
1022 .word 0x85b11248 !faesenclx %f4,%f8,%f2
1023
1024 brnz,pn %l5, .Lctr32_unaligned_out
1025 nop
1026
1027 std %f0, [%i1 + 0]
1028 std %f2, [%i1 + 8]
1029 add %i1, 16, %i1
1030
1031 brnz,a %i2, .Loop_ctr32
1032 sub %i2, 1, %i2
1033
1034 .Lctr32_no_data:
1035 ret
1036 restore
1037
1038 .align 32
1039 .Lctr32_unaligned_out:
1040 ldd [%o7 + %l6], %f36 ! shift right params
1041 mov 0xff, %l6
1042 srl %l6, %l5, %l6
1043 sub %g0, %l4, %l5
1044
1045 .word 0x8db80b60 !fshiftorx %f0,%f0,%f36,%f6
1046 .word 0x91b80b62 !fshiftorx %f0,%f2,%f36,%f8
1047
1048 stda %f6, [%i1 + %l6]0xc0 ! partial store
1049 orn %g0, %l6, %l6
1050 std %f8, [%i1 + 8]
1051 add %i1, 16, %i1
1052 brz %i2, .Lctr32_unaligned_out_done
1053 sub %i2, 1, %i2
1054 b .Loop_ctr32_unaligned_out
1055 nop
1056
1057 .align 32
1058 .Loop_ctr32_unaligned_out:
1059 fmovd %f2, %f34
1060 .word 0x81b40d94 !fxor %f16,%f20,%f0 ! counter^round[0]
1061 .word 0x85b48d96 !fxor %f18,%f22,%f2
1062 ldd [%i3 + 32], %f6 ! round[2]
1063 ldd [%i3 + 40], %f8
1064
1065 fmovd %f0, %f4
1066 .word 0x81b0920a !faesencx %f2,%f10,%f0
1067 .word 0x85b1120c !faesencx %f4,%f12,%f2
1068 ldd [%i3 + 48], %f10 ! round[3]
1069 ldd [%i3 + 56], %f12
1070
1071 ldx [%i0 - 16], %o0
1072 ldx [%i0 - 8], %o1
1073 brz %l4, .Lctr32_aligned_inp
1074 movrz %i2, 0, %l3
1075
1076 ldx [%i0], %o2
1077 sllx %o0, %l4, %o0
1078 srlx %o1, %l5, %g1
1079 sllx %o1, %l4, %o1
1080 or %g1, %o0, %o0
1081 srlx %o2, %l5, %o2
1082 or %o2, %o1, %o1
1083
1084 .Lctr32_aligned_inp:
1085 fmovd %f0, %f4
1086 .word 0x81b09206 !faesencx %f2,%f6,%f0
1087 .word 0x85b11208 !faesencx %f4,%f8,%f2
1088 ldd [%i3 + 64], %f6 ! round[4]
1089 ldd [%i3 + 72], %f8
1090 add %i3, 64, %l2
1091 sub %l0, 16*8, %l1
1092
1093 stx %o0, [%sp + LOCALS + 0]
1094 stx %o1, [%sp + LOCALS + 8]
1095 add %i0, %l3, %i0 ! inp+=16
1096 nop
1097
1098 .Lctr32_enc_unaligned:
1099 fmovd %f0, %f4
1100 .word 0x81b0920a !faesencx %f2,%f10,%f0
1101 .word 0x85b1120c !faesencx %f4,%f12,%f2
1102 ldd [%l2 + 16], %f10
1103 ldd [%l2 + 24], %f12
1104 add %l2, 32, %l2
1105
1106 fmovd %f0, %f4
1107 .word 0x81b09206 !faesencx %f2,%f6,%f0
1108 .word 0x85b11208 !faesencx %f4,%f8,%f2
1109 ldd [%l2 + 0], %f6
1110 ldd [%l2 + 8], %f8
1111
1112 brnz,a %l1, .Lctr32_enc_unaligned
1113 sub %l1, 16*2, %l1
1114
1115 fmovd %f0, %f4
1116 .word 0x81b0920a !faesencx %f2,%f10,%f0
1117 .word 0x85b1120c !faesencx %f4,%f12,%f2
1118 ldd [%l2 + 16], %f10 ! round[last-1]
1119 ldd [%l2 + 24], %f12
1120 .word 0xa5b48a4e !fpadd32 %f18,%f14,%f18 ! increment counter
1121
1122 fmovd %f0, %f4
1123 .word 0x81b09206 !faesencx %f2,%f6,%f0
1124 .word 0x85b11208 !faesencx %f4,%f8,%f2
1125 .word 0x8db70d98 !fxor %f28,%f24,%f6 ! inp^round[last]
1126 .word 0x91b78d9a !fxor %f30,%f26,%f8
1127 ldd [%sp + LOCALS + 0], %f28
1128 ldd [%sp + LOCALS + 8], %f30
1129
1130 fmovd %f0, %f4
1131 .word 0x81b0920a !faesencx %f2,%f10,%f0
1132 .word 0x85b1120c !faesencx %f4,%f12,%f2
1133 ldd [%i3 + 16], %f10 ! round[1]
1134 ldd [%i3 + 24], %f12
1135
1136 fmovd %f0, %f4
1137 .word 0x81b09246 !faesenclx %f2,%f6,%f0
1138 .word 0x85b11248 !faesenclx %f4,%f8,%f2
1139
1140 .word 0x8db8cb60 !fshiftorx %f34,%f0,%f36,%f6
1141 .word 0x91b80b62 !fshiftorx %f0,%f2,%f36,%f8
1142 std %f6, [%i1 + 0]
1143 std %f8, [%i1 + 8]
1144 add %i1, 16, %i1
1145
1146 brnz,a %i2, .Loop_ctr32_unaligned_out
1147 sub %i2, 1, %i2
1148
1149 .Lctr32_unaligned_out_done:
1150 .word 0x91b88b62 !fshiftorx %f2,%f2,%f36,%f8
1151 stda %f8, [%i1 + %l6]0xc0 ! partial store
1152
1153 ret
1154 restore
1155 .type aes_fx_ctr32_encrypt_blocks,#function
1156 .size aes_fx_ctr32_encrypt_blocks,.-aes_fx_ctr32_encrypt_blocks
1157
1158 .align 32
1159 .Linp_align: ! fshiftorx parameters for left shift toward %rs1
1160 .byte 0, 0, 64, 0, 0, 64, 0, -64
1161 .byte 0, 0, 56, 8, 0, 56, 8, -56
1162 .byte 0, 0, 48, 16, 0, 48, 16, -48
1163 .byte 0, 0, 40, 24, 0, 40, 24, -40
1164 .byte 0, 0, 32, 32, 0, 32, 32, -32
1165 .byte 0, 0, 24, 40, 0, 24, 40, -24
1166 .byte 0, 0, 16, 48, 0, 16, 48, -16
1167 .byte 0, 0, 8, 56, 0, 8, 56, -8
1168 .Lout_align: ! fshiftorx parameters for right shift toward %rs2
1169 .byte 0, 0, 0, 64, 0, 0, 64, 0
1170 .byte 0, 0, 8, 56, 0, 8, 56, -8
1171 .byte 0, 0, 16, 48, 0, 16, 48, -16
1172 .byte 0, 0, 24, 40, 0, 24, 40, -24
1173 .byte 0, 0, 32, 32, 0, 32, 32, -32
1174 .byte 0, 0, 40, 24, 0, 40, 24, -40
1175 .byte 0, 0, 48, 16, 0, 48, 16, -48
1176 .byte 0, 0, 56, 8, 0, 56, 8, -56
1177 .Lone:
1178 .word 0, 1
1179 .asciz "AES for Fujitsu SPARC64 X, CRYPTOGAMS by <appro (at) openssl.org>"
1180 .align 4
1181