aest4-sparcv9.S revision 1.4 1 #ifndef __ASSEMBLER__
2 # define __ASSEMBLER__ 1
3 #endif
4 #include "crypto/sparc_arch.h"
5
6 #ifdef __arch64__
7 .register %g2,#scratch
8 .register %g3,#scratch
9 #endif
10
11 .text
12
13 .globl aes_t4_encrypt
14 .align 32
15 aes_t4_encrypt:
16 andcc %o0, 7, %g1 ! is input aligned?
17 andn %o0, 7, %o0
18
19 ldx [%o2 + 0], %g4
20 ldx [%o2 + 8], %g5
21
22 ldx [%o0 + 0], %o4
23 bz,pt %icc, 1f
24 ldx [%o0 + 8], %o5
25 ldx [%o0 + 16], %o0
26 sll %g1, 3, %g1
27 sub %g0, %g1, %o3
28 sllx %o4, %g1, %o4
29 sllx %o5, %g1, %g1
30 srlx %o5, %o3, %o5
31 srlx %o0, %o3, %o3
32 or %o5, %o4, %o4
33 or %o3, %g1, %o5
34 1:
35 ld [%o2 + 240], %o3
36 ldd [%o2 + 16], %f12
37 ldd [%o2 + 24], %f14
38 xor %g4, %o4, %o4
39 xor %g5, %o5, %o5
40 .word 0x81b0230c !movxtod %o4,%f0
41 .word 0x85b0230d !movxtod %o5,%f2
42 srl %o3, 1, %o3
43 ldd [%o2 + 32], %f16
44 sub %o3, 1, %o3
45 ldd [%o2 + 40], %f18
46 add %o2, 48, %o2
47
48 .Lenc:
49 .word 0x88cb0400 !aes_eround01 %f12,%f0,%f2,%f4
50 .word 0x84cb8420 !aes_eround23 %f14,%f0,%f2,%f2
51 ldd [%o2 + 0], %f12
52 ldd [%o2 + 8], %f14
53 sub %o3,1,%o3
54 .word 0x80cc0404 !aes_eround01 %f16,%f4,%f2,%f0
55 .word 0x84cc8424 !aes_eround23 %f18,%f4,%f2,%f2
56 ldd [%o2 + 16], %f16
57 ldd [%o2 + 24], %f18
58 brnz,pt %o3, .Lenc
59 add %o2, 32, %o2
60
61 andcc %o1, 7, %o4 ! is output aligned?
62 .word 0x88cb0400 !aes_eround01 %f12,%f0,%f2,%f4
63 .word 0x84cb8420 !aes_eround23 %f14,%f0,%f2,%f2
64 .word 0x80cc0484 !aes_eround01_l %f16,%f4,%f2,%f0
65 .word 0x84cc84a4 !aes_eround23_l %f18,%f4,%f2,%f2
66
67 bnz,pn %icc, 2f
68 nop
69
70 std %f0, [%o1 + 0]
71 retl
72 std %f2, [%o1 + 8]
73
74 2: .word 0x93b24340 !alignaddrl %o1,%g0,%o1
75 mov 0xff, %o5
76 srl %o5, %o4, %o5
77
78 .word 0x89b00900 !faligndata %f0,%f0,%f4
79 .word 0x8db00902 !faligndata %f0,%f2,%f6
80 .word 0x91b08902 !faligndata %f2,%f2,%f8
81
82 stda %f4, [%o1 + %o5]0xc0 ! partial store
83 std %f6, [%o1 + 8]
84 add %o1, 16, %o1
85 orn %g0, %o5, %o5
86 retl
87 stda %f8, [%o1 + %o5]0xc0 ! partial store
88 .type aes_t4_encrypt,#function
89 .size aes_t4_encrypt,.-aes_t4_encrypt
90
91 .globl aes_t4_decrypt
92 .align 32
93 aes_t4_decrypt:
94 andcc %o0, 7, %g1 ! is input aligned?
95 andn %o0, 7, %o0
96
97 ldx [%o2 + 0], %g4
98 ldx [%o2 + 8], %g5
99
100 ldx [%o0 + 0], %o4
101 bz,pt %icc, 1f
102 ldx [%o0 + 8], %o5
103 ldx [%o0 + 16], %o0
104 sll %g1, 3, %g1
105 sub %g0, %g1, %o3
106 sllx %o4, %g1, %o4
107 sllx %o5, %g1, %g1
108 srlx %o5, %o3, %o5
109 srlx %o0, %o3, %o3
110 or %o5, %o4, %o4
111 or %o3, %g1, %o5
112 1:
113 ld [%o2 + 240], %o3
114 ldd [%o2 + 16], %f12
115 ldd [%o2 + 24], %f14
116 xor %g4, %o4, %o4
117 xor %g5, %o5, %o5
118 .word 0x81b0230c !movxtod %o4,%f0
119 .word 0x85b0230d !movxtod %o5,%f2
120 srl %o3, 1, %o3
121 ldd [%o2 + 32], %f16
122 sub %o3, 1, %o3
123 ldd [%o2 + 40], %f18
124 add %o2, 48, %o2
125
126 .Ldec:
127 .word 0x88cb0440 !aes_dround01 %f12,%f0,%f2,%f4
128 .word 0x84cb8460 !aes_dround23 %f14,%f0,%f2,%f2
129 ldd [%o2 + 0], %f12
130 ldd [%o2 + 8], %f14
131 sub %o3,1,%o3
132 .word 0x80cc0444 !aes_dround01 %f16,%f4,%f2,%f0
133 .word 0x84cc8464 !aes_dround23 %f18,%f4,%f2,%f2
134 ldd [%o2 + 16], %f16
135 ldd [%o2 + 24], %f18
136 brnz,pt %o3, .Ldec
137 add %o2, 32, %o2
138
139 andcc %o1, 7, %o4 ! is output aligned?
140 .word 0x88cb0440 !aes_dround01 %f12,%f0,%f2,%f4
141 .word 0x84cb8460 !aes_dround23 %f14,%f0,%f2,%f2
142 .word 0x80cc04c4 !aes_dround01_l %f16,%f4,%f2,%f0
143 .word 0x84cc84e4 !aes_dround23_l %f18,%f4,%f2,%f2
144
145 bnz,pn %icc, 2f
146 nop
147
148 std %f0, [%o1 + 0]
149 retl
150 std %f2, [%o1 + 8]
151
152 2: .word 0x93b24340 !alignaddrl %o1,%g0,%o1
153 mov 0xff, %o5
154 srl %o5, %o4, %o5
155
156 .word 0x89b00900 !faligndata %f0,%f0,%f4
157 .word 0x8db00902 !faligndata %f0,%f2,%f6
158 .word 0x91b08902 !faligndata %f2,%f2,%f8
159
160 stda %f4, [%o1 + %o5]0xc0 ! partial store
161 std %f6, [%o1 + 8]
162 add %o1, 16, %o1
163 orn %g0, %o5, %o5
164 retl
165 stda %f8, [%o1 + %o5]0xc0 ! partial store
166 .type aes_t4_decrypt,#function
167 .size aes_t4_decrypt,.-aes_t4_decrypt
168 .globl aes_t4_set_encrypt_key
169 .align 32
170 aes_t4_set_encrypt_key:
171 .Lset_encrypt_key:
172 and %o0, 7, %o3
173 .word 0x91b20300 !alignaddr %o0,%g0,%o0
174 cmp %o1, 192
175 ldd [%o0 + 0], %f0
176 bl,pt %icc,.L128
177 ldd [%o0 + 8], %f2
178
179 be,pt %icc,.L192
180 ldd [%o0 + 16], %f4
181 brz,pt %o3, .L256aligned
182 ldd [%o0 + 24], %f6
183
184 ldd [%o0 + 32], %f8
185 .word 0x81b00902 !faligndata %f0,%f2,%f0
186 .word 0x85b08904 !faligndata %f2,%f4,%f2
187 .word 0x89b10906 !faligndata %f4,%f6,%f4
188 .word 0x8db18908 !faligndata %f6,%f8,%f6
189 .L256aligned:
190 std %f0, [%o2 + 0]
191 .word 0x80c80106 !aes_kexpand1 %f0,%f6,0,%f0
192 std %f2, [%o2 + 8]
193 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
194 std %f4, [%o2 + 16]
195 .word 0x89b12602 !aes_kexpand0 %f4,%f2,%f4
196 std %f6, [%o2 + 24]
197 .word 0x8db1a624 !aes_kexpand2 %f6,%f4,%f6
198 std %f0, [%o2 + 32]
199 .word 0x80c80306 !aes_kexpand1 %f0,%f6,1,%f0
200 std %f2, [%o2 + 40]
201 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
202 std %f4, [%o2 + 48]
203 .word 0x89b12602 !aes_kexpand0 %f4,%f2,%f4
204 std %f6, [%o2 + 56]
205 .word 0x8db1a624 !aes_kexpand2 %f6,%f4,%f6
206 std %f0, [%o2 + 64]
207 .word 0x80c80506 !aes_kexpand1 %f0,%f6,2,%f0
208 std %f2, [%o2 + 72]
209 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
210 std %f4, [%o2 + 80]
211 .word 0x89b12602 !aes_kexpand0 %f4,%f2,%f4
212 std %f6, [%o2 + 88]
213 .word 0x8db1a624 !aes_kexpand2 %f6,%f4,%f6
214 std %f0, [%o2 + 96]
215 .word 0x80c80706 !aes_kexpand1 %f0,%f6,3,%f0
216 std %f2, [%o2 + 104]
217 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
218 std %f4, [%o2 + 112]
219 .word 0x89b12602 !aes_kexpand0 %f4,%f2,%f4
220 std %f6, [%o2 + 120]
221 .word 0x8db1a624 !aes_kexpand2 %f6,%f4,%f6
222 std %f0, [%o2 + 128]
223 .word 0x80c80906 !aes_kexpand1 %f0,%f6,4,%f0
224 std %f2, [%o2 + 136]
225 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
226 std %f4, [%o2 + 144]
227 .word 0x89b12602 !aes_kexpand0 %f4,%f2,%f4
228 std %f6, [%o2 + 152]
229 .word 0x8db1a624 !aes_kexpand2 %f6,%f4,%f6
230 std %f0, [%o2 + 160]
231 .word 0x80c80b06 !aes_kexpand1 %f0,%f6,5,%f0
232 std %f2, [%o2 + 168]
233 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
234 std %f4, [%o2 + 176]
235 .word 0x89b12602 !aes_kexpand0 %f4,%f2,%f4
236 std %f6, [%o2 + 184]
237 .word 0x8db1a624 !aes_kexpand2 %f6,%f4,%f6
238 std %f0, [%o2 + 192]
239 .word 0x80c80d06 !aes_kexpand1 %f0,%f6,6,%f0
240 std %f2, [%o2 + 200]
241 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
242 std %f4, [%o2 + 208]
243 std %f6, [%o2 + 216]
244 std %f0, [%o2 + 224]
245 std %f2, [%o2 + 232]
246
247 mov 14, %o3
248 st %o3, [%o2 + 240]
249 retl
250 xor %o0, %o0, %o0
251
252 .align 16
253 .L192:
254 brz,pt %o3, .L192aligned
255 nop
256
257 ldd [%o0 + 24], %f6
258 .word 0x81b00902 !faligndata %f0,%f2,%f0
259 .word 0x85b08904 !faligndata %f2,%f4,%f2
260 .word 0x89b10906 !faligndata %f4,%f6,%f4
261 .L192aligned:
262 std %f0, [%o2 + 0]
263 .word 0x80c80104 !aes_kexpand1 %f0,%f4,0,%f0
264 std %f2, [%o2 + 8]
265 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
266 std %f4, [%o2 + 16]
267 .word 0x89b12622 !aes_kexpand2 %f4,%f2,%f4
268 std %f0, [%o2 + 24]
269 .word 0x80c80304 !aes_kexpand1 %f0,%f4,1,%f0
270 std %f2, [%o2 + 32]
271 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
272 std %f4, [%o2 + 40]
273 .word 0x89b12622 !aes_kexpand2 %f4,%f2,%f4
274 std %f0, [%o2 + 48]
275 .word 0x80c80504 !aes_kexpand1 %f0,%f4,2,%f0
276 std %f2, [%o2 + 56]
277 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
278 std %f4, [%o2 + 64]
279 .word 0x89b12622 !aes_kexpand2 %f4,%f2,%f4
280 std %f0, [%o2 + 72]
281 .word 0x80c80704 !aes_kexpand1 %f0,%f4,3,%f0
282 std %f2, [%o2 + 80]
283 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
284 std %f4, [%o2 + 88]
285 .word 0x89b12622 !aes_kexpand2 %f4,%f2,%f4
286 std %f0, [%o2 + 96]
287 .word 0x80c80904 !aes_kexpand1 %f0,%f4,4,%f0
288 std %f2, [%o2 + 104]
289 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
290 std %f4, [%o2 + 112]
291 .word 0x89b12622 !aes_kexpand2 %f4,%f2,%f4
292 std %f0, [%o2 + 120]
293 .word 0x80c80b04 !aes_kexpand1 %f0,%f4,5,%f0
294 std %f2, [%o2 + 128]
295 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
296 std %f4, [%o2 + 136]
297 .word 0x89b12622 !aes_kexpand2 %f4,%f2,%f4
298 std %f0, [%o2 + 144]
299 .word 0x80c80d04 !aes_kexpand1 %f0,%f4,6,%f0
300 std %f2, [%o2 + 152]
301 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
302 std %f4, [%o2 + 160]
303 .word 0x89b12622 !aes_kexpand2 %f4,%f2,%f4
304 std %f0, [%o2 + 168]
305 .word 0x80c80f04 !aes_kexpand1 %f0,%f4,7,%f0
306 std %f2, [%o2 + 176]
307 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
308 std %f4, [%o2 + 184]
309 std %f0, [%o2 + 192]
310 std %f2, [%o2 + 200]
311
312 mov 12, %o3
313 st %o3, [%o2 + 240]
314 retl
315 xor %o0, %o0, %o0
316
317 .align 16
318 .L128:
319 brz,pt %o3, .L128aligned
320 nop
321
322 ldd [%o0 + 16], %f4
323 .word 0x81b00902 !faligndata %f0,%f2,%f0
324 .word 0x85b08904 !faligndata %f2,%f4,%f2
325 .L128aligned:
326 std %f0, [%o2 + 0]
327 .word 0x80c80102 !aes_kexpand1 %f0,%f2,0,%f0
328 std %f2, [%o2 + 8]
329 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
330 std %f0, [%o2 + 16]
331 .word 0x80c80302 !aes_kexpand1 %f0,%f2,1,%f0
332 std %f2, [%o2 + 24]
333 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
334 std %f0, [%o2 + 32]
335 .word 0x80c80502 !aes_kexpand1 %f0,%f2,2,%f0
336 std %f2, [%o2 + 40]
337 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
338 std %f0, [%o2 + 48]
339 .word 0x80c80702 !aes_kexpand1 %f0,%f2,3,%f0
340 std %f2, [%o2 + 56]
341 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
342 std %f0, [%o2 + 64]
343 .word 0x80c80902 !aes_kexpand1 %f0,%f2,4,%f0
344 std %f2, [%o2 + 72]
345 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
346 std %f0, [%o2 + 80]
347 .word 0x80c80b02 !aes_kexpand1 %f0,%f2,5,%f0
348 std %f2, [%o2 + 88]
349 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
350 std %f0, [%o2 + 96]
351 .word 0x80c80d02 !aes_kexpand1 %f0,%f2,6,%f0
352 std %f2, [%o2 + 104]
353 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
354 std %f0, [%o2 + 112]
355 .word 0x80c80f02 !aes_kexpand1 %f0,%f2,7,%f0
356 std %f2, [%o2 + 120]
357 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
358 std %f0, [%o2 + 128]
359 .word 0x80c81102 !aes_kexpand1 %f0,%f2,8,%f0
360 std %f2, [%o2 + 136]
361 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
362 std %f0, [%o2 + 144]
363 .word 0x80c81302 !aes_kexpand1 %f0,%f2,9,%f0
364 std %f2, [%o2 + 152]
365 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
366 std %f0, [%o2 + 160]
367 std %f2, [%o2 + 168]
368
369 mov 10, %o3
370 st %o3, [%o2 + 240]
371 retl
372 xor %o0, %o0, %o0
373 .type aes_t4_set_encrypt_key,#function
374 .size aes_t4_set_encrypt_key,.-aes_t4_set_encrypt_key
375
376 .globl aes_t4_set_decrypt_key
377 .align 32
378 aes_t4_set_decrypt_key:
379 mov %o7, %o5
380 call .Lset_encrypt_key
381 nop
382
383 mov %o5, %o7
384 sll %o3, 4, %o0 ! %o3 is number of rounds
385 add %o3, 2, %o3
386 add %o2, %o0, %o0 ! %o0=%o2+16*rounds
387 srl %o3, 2, %o3 ! %o3=(rounds+2)/4
388
389 .Lkey_flip:
390 ldd [%o2 + 0], %f0
391 ldd [%o2 + 8], %f2
392 ldd [%o2 + 16], %f4
393 ldd [%o2 + 24], %f6
394 ldd [%o0 + 0], %f8
395 ldd [%o0 + 8], %f10
396 ldd [%o0 - 16], %f12
397 ldd [%o0 - 8], %f14
398 sub %o3, 1, %o3
399 std %f0, [%o0 + 0]
400 std %f2, [%o0 + 8]
401 std %f4, [%o0 - 16]
402 std %f6, [%o0 - 8]
403 std %f8, [%o2 + 0]
404 std %f10, [%o2 + 8]
405 std %f12, [%o2 + 16]
406 std %f14, [%o2 + 24]
407 add %o2, 32, %o2
408 brnz %o3, .Lkey_flip
409 sub %o0, 32, %o0
410
411 retl
412 xor %o0, %o0, %o0
413 .type aes_t4_set_decrypt_key,#function
414 .size aes_t4_set_decrypt_key,.-aes_t4_set_decrypt_key
415 .align 32
416 _aes128_encrypt_1x:
417 .word 0x88cc0400 !aes_eround01 %f16,%f0,%f2,%f4
418 .word 0x84cc8420 !aes_eround23 %f18,%f0,%f2,%f2
419 .word 0x80cd0404 !aes_eround01 %f20,%f4,%f2,%f0
420 .word 0x84cd8424 !aes_eround23 %f22,%f4,%f2,%f2
421 .word 0x88ce0400 !aes_eround01 %f24,%f0,%f2,%f4
422 .word 0x84ce8420 !aes_eround23 %f26,%f0,%f2,%f2
423 .word 0x80cf0404 !aes_eround01 %f28,%f4,%f2,%f0
424 .word 0x84cf8424 !aes_eround23 %f30,%f4,%f2,%f2
425 .word 0x88c84400 !aes_eround01 %f32,%f0,%f2,%f4
426 .word 0x84c8c420 !aes_eround23 %f34,%f0,%f2,%f2
427 .word 0x80c94404 !aes_eround01 %f36,%f4,%f2,%f0
428 .word 0x84c9c424 !aes_eround23 %f38,%f4,%f2,%f2
429 .word 0x88ca4400 !aes_eround01 %f40,%f0,%f2,%f4
430 .word 0x84cac420 !aes_eround23 %f42,%f0,%f2,%f2
431 .word 0x80cb4404 !aes_eround01 %f44,%f4,%f2,%f0
432 .word 0x84cbc424 !aes_eround23 %f46,%f4,%f2,%f2
433 .word 0x88cc4400 !aes_eround01 %f48,%f0,%f2,%f4
434 .word 0x84ccc420 !aes_eround23 %f50,%f0,%f2,%f2
435 .word 0x80cd4484 !aes_eround01_l %f52,%f4,%f2,%f0
436 retl
437 .word 0x84cdc4a4 !aes_eround23_l %f54,%f4,%f2,%f2
438 .type _aes128_encrypt_1x,#function
439 .size _aes128_encrypt_1x,.-_aes128_encrypt_1x
440
441 .align 32
442 _aes128_encrypt_2x:
443 .word 0x90cc0400 !aes_eround01 %f16,%f0,%f2,%f8
444 .word 0x84cc8420 !aes_eround23 %f18,%f0,%f2,%f2
445 .word 0x94cc0c04 !aes_eround01 %f16,%f4,%f6,%f10
446 .word 0x8ccc8c24 !aes_eround23 %f18,%f4,%f6,%f6
447 .word 0x80cd0408 !aes_eround01 %f20,%f8,%f2,%f0
448 .word 0x84cd8428 !aes_eround23 %f22,%f8,%f2,%f2
449 .word 0x88cd0c0a !aes_eround01 %f20,%f10,%f6,%f4
450 .word 0x8ccd8c2a !aes_eround23 %f22,%f10,%f6,%f6
451 .word 0x90ce0400 !aes_eround01 %f24,%f0,%f2,%f8
452 .word 0x84ce8420 !aes_eround23 %f26,%f0,%f2,%f2
453 .word 0x94ce0c04 !aes_eround01 %f24,%f4,%f6,%f10
454 .word 0x8cce8c24 !aes_eround23 %f26,%f4,%f6,%f6
455 .word 0x80cf0408 !aes_eround01 %f28,%f8,%f2,%f0
456 .word 0x84cf8428 !aes_eround23 %f30,%f8,%f2,%f2
457 .word 0x88cf0c0a !aes_eround01 %f28,%f10,%f6,%f4
458 .word 0x8ccf8c2a !aes_eround23 %f30,%f10,%f6,%f6
459 .word 0x90c84400 !aes_eround01 %f32,%f0,%f2,%f8
460 .word 0x84c8c420 !aes_eround23 %f34,%f0,%f2,%f2
461 .word 0x94c84c04 !aes_eround01 %f32,%f4,%f6,%f10
462 .word 0x8cc8cc24 !aes_eround23 %f34,%f4,%f6,%f6
463 .word 0x80c94408 !aes_eround01 %f36,%f8,%f2,%f0
464 .word 0x84c9c428 !aes_eround23 %f38,%f8,%f2,%f2
465 .word 0x88c94c0a !aes_eround01 %f36,%f10,%f6,%f4
466 .word 0x8cc9cc2a !aes_eround23 %f38,%f10,%f6,%f6
467 .word 0x90ca4400 !aes_eround01 %f40,%f0,%f2,%f8
468 .word 0x84cac420 !aes_eround23 %f42,%f0,%f2,%f2
469 .word 0x94ca4c04 !aes_eround01 %f40,%f4,%f6,%f10
470 .word 0x8ccacc24 !aes_eround23 %f42,%f4,%f6,%f6
471 .word 0x80cb4408 !aes_eround01 %f44,%f8,%f2,%f0
472 .word 0x84cbc428 !aes_eround23 %f46,%f8,%f2,%f2
473 .word 0x88cb4c0a !aes_eround01 %f44,%f10,%f6,%f4
474 .word 0x8ccbcc2a !aes_eround23 %f46,%f10,%f6,%f6
475 .word 0x90cc4400 !aes_eround01 %f48,%f0,%f2,%f8
476 .word 0x84ccc420 !aes_eround23 %f50,%f0,%f2,%f2
477 .word 0x94cc4c04 !aes_eround01 %f48,%f4,%f6,%f10
478 .word 0x8ccccc24 !aes_eround23 %f50,%f4,%f6,%f6
479 .word 0x80cd4488 !aes_eround01_l %f52,%f8,%f2,%f0
480 .word 0x84cdc4a8 !aes_eround23_l %f54,%f8,%f2,%f2
481 .word 0x88cd4c8a !aes_eround01_l %f52,%f10,%f6,%f4
482 retl
483 .word 0x8ccdccaa !aes_eround23_l %f54,%f10,%f6,%f6
484 .type _aes128_encrypt_2x,#function
485 .size _aes128_encrypt_2x,.-_aes128_encrypt_2x
486
487 .align 32
488 _aes128_loadkey:
489 ldx [%i3 + 0], %g4
490 ldx [%i3 + 8], %g5
491 ldd [%i3 + 16], %f16
492 ldd [%i3 + 24], %f18
493 ldd [%i3 + 32], %f20
494 ldd [%i3 + 40], %f22
495 ldd [%i3 + 48], %f24
496 ldd [%i3 + 56], %f26
497 ldd [%i3 + 64], %f28
498 ldd [%i3 + 72], %f30
499 ldd [%i3 + 80], %f32
500 ldd [%i3 + 88], %f34
501 ldd [%i3 + 96], %f36
502 ldd [%i3 + 104], %f38
503 ldd [%i3 + 112], %f40
504 ldd [%i3 + 120], %f42
505 ldd [%i3 + 128], %f44
506 ldd [%i3 + 136], %f46
507 ldd [%i3 + 144], %f48
508 ldd [%i3 + 152], %f50
509 ldd [%i3 + 160], %f52
510 ldd [%i3 + 168], %f54
511 retl
512 nop
513 .type _aes128_loadkey,#function
514 .size _aes128_loadkey,.-_aes128_loadkey
515 _aes128_load_enckey=_aes128_loadkey
516 _aes128_load_deckey=_aes128_loadkey
517
518 .globl aes128_t4_cbc_encrypt
519 .align 32
520 aes128_t4_cbc_encrypt:
521 save %sp, -STACK_FRAME, %sp
522 cmp %i2, 0
523 be,pn SIZE_T_CC, .L128_cbc_enc_abort
524 srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
525 sub %i0, %i1, %l5 ! %i0!=%i1
526 ld [%i4 + 0], %f0
527 ld [%i4 + 4], %f1
528 ld [%i4 + 8], %f2
529 ld [%i4 + 12], %f3
530 prefetch [%i0], 20
531 prefetch [%i0 + 63], 20
532 call _aes128_load_enckey
533 and %i0, 7, %l0
534 andn %i0, 7, %i0
535 sll %l0, 3, %l0
536 mov 64, %l1
537 mov 0xff, %l3
538 sub %l1, %l0, %l1
539 and %i1, 7, %l2
540 cmp %i2, 127
541 movrnz %l2, 0, %l5 ! if ( %i1&7 ||
542 movleu SIZE_T_CC, 0, %l5 ! %i2<128 ||
543 brnz,pn %l5, .L128cbc_enc_blk ! %i0==%i1)
544 srl %l3, %l2, %l3
545
546 .word 0xb3b64340 !alignaddrl %i1,%g0,%i1
547 srlx %i2, 4, %i2
548 prefetch [%i1], 22
549
550 .L128_cbc_enc_loop:
551 ldx [%i0 + 0], %o0
552 brz,pt %l0, 4f
553 ldx [%i0 + 8], %o1
554
555 ldx [%i0 + 16], %o2
556 sllx %o0, %l0, %o0
557 srlx %o1, %l1, %g1
558 sllx %o1, %l0, %o1
559 or %g1, %o0, %o0
560 srlx %o2, %l1, %o2
561 or %o2, %o1, %o1
562 4:
563 xor %g4, %o0, %o0 ! ^= rk[0]
564 xor %g5, %o1, %o1
565 .word 0x99b02308 !movxtod %o0,%f12
566 .word 0x9db02309 !movxtod %o1,%f14
567
568 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= ivec
569 .word 0x85b38d82 !fxor %f14,%f2,%f2
570 prefetch [%i1 + 63], 22
571 prefetch [%i0 + 16+63], 20
572 call _aes128_encrypt_1x
573 add %i0, 16, %i0
574
575 brnz,pn %l2, 2f
576 sub %i2, 1, %i2
577
578 std %f0, [%i1 + 0]
579 std %f2, [%i1 + 8]
580 brnz,pt %i2, .L128_cbc_enc_loop
581 add %i1, 16, %i1
582 st %f0, [%i4 + 0]
583 st %f1, [%i4 + 4]
584 st %f2, [%i4 + 8]
585 st %f3, [%i4 + 12]
586 .L128_cbc_enc_abort:
587 ret
588 restore
589
590 .align 16
591 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
592 ! and ~3x deterioration
593 ! in inp==out case
594 .word 0x89b00900 !faligndata %f0,%f0,%f4 ! handle unaligned output
595 .word 0x8db00902 !faligndata %f0,%f2,%f6
596 .word 0x91b08902 !faligndata %f2,%f2,%f8
597
598 stda %f4, [%i1 + %l3]0xc0 ! partial store
599 std %f6, [%i1 + 8]
600 add %i1, 16, %i1
601 orn %g0, %l3, %l3
602 stda %f8, [%i1 + %l3]0xc0 ! partial store
603
604 brnz,pt %i2, .L128_cbc_enc_loop+4
605 orn %g0, %l3, %l3
606 st %f0, [%i4 + 0]
607 st %f1, [%i4 + 4]
608 st %f2, [%i4 + 8]
609 st %f3, [%i4 + 12]
610 ret
611 restore
612
613 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
614 .align 32
615 .L128cbc_enc_blk:
616 add %i1, %i2, %l5
617 and %l5, 63, %l5 ! tail
618 sub %i2, %l5, %i2
619 add %l5, 15, %l5 ! round up to 16n
620 srlx %i2, 4, %i2
621 srl %l5, 4, %l5
622
623 .L128_cbc_enc_blk_loop:
624 ldx [%i0 + 0], %o0
625 brz,pt %l0, 5f
626 ldx [%i0 + 8], %o1
627
628 ldx [%i0 + 16], %o2
629 sllx %o0, %l0, %o0
630 srlx %o1, %l1, %g1
631 sllx %o1, %l0, %o1
632 or %g1, %o0, %o0
633 srlx %o2, %l1, %o2
634 or %o2, %o1, %o1
635 5:
636 xor %g4, %o0, %o0 ! ^= rk[0]
637 xor %g5, %o1, %o1
638 .word 0x99b02308 !movxtod %o0,%f12
639 .word 0x9db02309 !movxtod %o1,%f14
640
641 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= ivec
642 .word 0x85b38d82 !fxor %f14,%f2,%f2
643 prefetch [%i0 + 16+63], 20
644 call _aes128_encrypt_1x
645 add %i0, 16, %i0
646 sub %i2, 1, %i2
647
648 stda %f0, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
649 add %i1, 8, %i1
650 stda %f2, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
651 brnz,pt %i2, .L128_cbc_enc_blk_loop
652 add %i1, 8, %i1
653
654 membar #StoreLoad|#StoreStore
655 brnz,pt %l5, .L128_cbc_enc_loop
656 mov %l5, %i2
657 st %f0, [%i4 + 0]
658 st %f1, [%i4 + 4]
659 st %f2, [%i4 + 8]
660 st %f3, [%i4 + 12]
661 ret
662 restore
663 .type aes128_t4_cbc_encrypt,#function
664 .size aes128_t4_cbc_encrypt,.-aes128_t4_cbc_encrypt
665 .globl aes128_t4_ctr32_encrypt
666 .align 32
667 aes128_t4_ctr32_encrypt:
668 save %sp, -STACK_FRAME, %sp
669 srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
670
671 prefetch [%i0], 20
672 prefetch [%i0 + 63], 20
673 call _aes128_load_enckey
674 sllx %i2, 4, %i2
675
676 ld [%i4 + 0], %l4 ! counter
677 ld [%i4 + 4], %l5
678 ld [%i4 + 8], %l6
679 ld [%i4 + 12], %l7
680
681 sllx %l4, 32, %o5
682 or %l5, %o5, %o5
683 sllx %l6, 32, %g1
684 xor %o5, %g4, %g4 ! ^= rk[0]
685 xor %g1, %g5, %g5
686 .word 0x9db02304 !movxtod %g4,%f14 ! most significant 64 bits
687
688 sub %i0, %i1, %l5 ! %i0!=%i1
689 and %i0, 7, %l0
690 andn %i0, 7, %i0
691 sll %l0, 3, %l0
692 mov 64, %l1
693 mov 0xff, %l3
694 sub %l1, %l0, %l1
695 and %i1, 7, %l2
696 cmp %i2, 255
697 movrnz %l2, 0, %l5 ! if ( %i1&7 ||
698 movleu SIZE_T_CC, 0, %l5 ! %i2<256 ||
699 brnz,pn %l5, .L128_ctr32_blk ! %i0==%i1)
700 srl %l3, %l2, %l3
701
702 andcc %i2, 16, %g0 ! is number of blocks even?
703 .word 0xb3b64340 !alignaddrl %i1,%g0,%i1
704 bz %icc, .L128_ctr32_loop2x
705 srlx %i2, 4, %i2
706 .L128_ctr32_loop:
707 ldx [%i0 + 0], %o0
708 brz,pt %l0, 4f
709 ldx [%i0 + 8], %o1
710
711 ldx [%i0 + 16], %o2
712 sllx %o0, %l0, %o0
713 srlx %o1, %l1, %g1
714 sllx %o1, %l0, %o1
715 or %g1, %o0, %o0
716 srlx %o2, %l1, %o2
717 or %o2, %o1, %o1
718 4:
719 xor %g5, %l7, %g1 ! ^= rk[0]
720 add %l7, 1, %l7
721 .word 0x85b02301 !movxtod %g1,%f2
722 srl %l7, 0, %l7 ! clruw
723 prefetch [%i1 + 63], 22
724 prefetch [%i0 + 16+63], 20
725 .word 0x88cc040e !aes_eround01 %f16,%f14,%f2,%f4
726 .word 0x84cc842e !aes_eround23 %f18,%f14,%f2,%f2
727 call _aes128_encrypt_1x+8
728 add %i0, 16, %i0
729
730 .word 0x95b02308 !movxtod %o0,%f10
731 .word 0x99b02309 !movxtod %o1,%f12
732 .word 0x81b28d80 !fxor %f10,%f0,%f0 ! ^= inp
733 .word 0x85b30d82 !fxor %f12,%f2,%f2
734
735 brnz,pn %l2, 2f
736 sub %i2, 1, %i2
737
738 std %f0, [%i1 + 0]
739 std %f2, [%i1 + 8]
740 brnz,pt %i2, .L128_ctr32_loop2x
741 add %i1, 16, %i1
742
743 ret
744 restore
745
746 .align 16
747 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
748 ! and ~3x deterioration
749 ! in inp==out case
750 .word 0x89b00900 !faligndata %f0,%f0,%f4 ! handle unaligned output
751 .word 0x8db00902 !faligndata %f0,%f2,%f6
752 .word 0x91b08902 !faligndata %f2,%f2,%f8
753 stda %f4, [%i1 + %l3]0xc0 ! partial store
754 std %f6, [%i1 + 8]
755 add %i1, 16, %i1
756 orn %g0, %l3, %l3
757 stda %f8, [%i1 + %l3]0xc0 ! partial store
758
759 brnz,pt %i2, .L128_ctr32_loop2x+4
760 orn %g0, %l3, %l3
761
762 ret
763 restore
764
765 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
766 .align 32
767 .L128_ctr32_loop2x:
768 ldx [%i0 + 0], %o0
769 ldx [%i0 + 8], %o1
770 ldx [%i0 + 16], %o2
771 brz,pt %l0, 4f
772 ldx [%i0 + 24], %o3
773
774 ldx [%i0 + 32], %o4
775 sllx %o0, %l0, %o0
776 srlx %o1, %l1, %g1
777 or %g1, %o0, %o0
778 sllx %o1, %l0, %o1
779 srlx %o2, %l1, %g1
780 or %g1, %o1, %o1
781 sllx %o2, %l0, %o2
782 srlx %o3, %l1, %g1
783 or %g1, %o2, %o2
784 sllx %o3, %l0, %o3
785 srlx %o4, %l1, %o4
786 or %o4, %o3, %o3
787 4:
788 xor %g5, %l7, %g1 ! ^= rk[0]
789 add %l7, 1, %l7
790 .word 0x85b02301 !movxtod %g1,%f2
791 srl %l7, 0, %l7 ! clruw
792 xor %g5, %l7, %g1
793 add %l7, 1, %l7
794 .word 0x8db02301 !movxtod %g1,%f6
795 srl %l7, 0, %l7 ! clruw
796 prefetch [%i1 + 63], 22
797 prefetch [%i0 + 32+63], 20
798 .word 0x90cc040e !aes_eround01 %f16,%f14,%f2,%f8
799 .word 0x84cc842e !aes_eround23 %f18,%f14,%f2,%f2
800 .word 0x94cc0c0e !aes_eround01 %f16,%f14,%f6,%f10
801 .word 0x8ccc8c2e !aes_eround23 %f18,%f14,%f6,%f6
802 call _aes128_encrypt_2x+16
803 add %i0, 32, %i0
804
805 .word 0x91b02308 !movxtod %o0,%f8
806 .word 0x95b02309 !movxtod %o1,%f10
807 .word 0x99b0230a !movxtod %o2,%f12
808 .word 0x81b20d80 !fxor %f8,%f0,%f0 ! ^= inp
809 .word 0x91b0230b !movxtod %o3,%f8
810 .word 0x85b28d82 !fxor %f10,%f2,%f2
811 .word 0x89b30d84 !fxor %f12,%f4,%f4
812 .word 0x8db20d86 !fxor %f8,%f6,%f6
813
814 brnz,pn %l2, 2f
815 sub %i2, 2, %i2
816
817 std %f0, [%i1 + 0]
818 std %f2, [%i1 + 8]
819 std %f4, [%i1 + 16]
820 std %f6, [%i1 + 24]
821 brnz,pt %i2, .L128_ctr32_loop2x
822 add %i1, 32, %i1
823
824 ret
825 restore
826
827 .align 16
828 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
829 ! and ~3x deterioration
830 ! in inp==out case
831 .word 0x91b00900 !faligndata %f0,%f0,%f8 ! handle unaligned output
832 .word 0x81b00902 !faligndata %f0,%f2,%f0
833 .word 0x85b08904 !faligndata %f2,%f4,%f2
834 .word 0x89b10906 !faligndata %f4,%f6,%f4
835 .word 0x8db18906 !faligndata %f6,%f6,%f6
836
837 stda %f8, [%i1 + %l3]0xc0 ! partial store
838 std %f0, [%i1 + 8]
839 std %f2, [%i1 + 16]
840 std %f4, [%i1 + 24]
841 add %i1, 32, %i1
842 orn %g0, %l3, %l3
843 stda %f6, [%i1 + %l3]0xc0 ! partial store
844
845 brnz,pt %i2, .L128_ctr32_loop2x+4
846 orn %g0, %l3, %l3
847
848 ret
849 restore
850
851 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
852 .align 32
853 .L128_ctr32_blk:
854 add %i1, %i2, %l5
855 and %l5, 63, %l5 ! tail
856 sub %i2, %l5, %i2
857 add %l5, 15, %l5 ! round up to 16n
858 srlx %i2, 4, %i2
859 srl %l5, 4, %l5
860 sub %i2, 1, %i2
861 add %l5, 1, %l5
862
863 .L128_ctr32_blk_loop2x:
864 ldx [%i0 + 0], %o0
865 ldx [%i0 + 8], %o1
866 ldx [%i0 + 16], %o2
867 brz,pt %l0, 5f
868 ldx [%i0 + 24], %o3
869
870 ldx [%i0 + 32], %o4
871 sllx %o0, %l0, %o0
872 srlx %o1, %l1, %g1
873 or %g1, %o0, %o0
874 sllx %o1, %l0, %o1
875 srlx %o2, %l1, %g1
876 or %g1, %o1, %o1
877 sllx %o2, %l0, %o2
878 srlx %o3, %l1, %g1
879 or %g1, %o2, %o2
880 sllx %o3, %l0, %o3
881 srlx %o4, %l1, %o4
882 or %o4, %o3, %o3
883 5:
884 xor %g5, %l7, %g1 ! ^= rk[0]
885 add %l7, 1, %l7
886 .word 0x85b02301 !movxtod %g1,%f2
887 srl %l7, 0, %l7 ! clruw
888 xor %g5, %l7, %g1
889 add %l7, 1, %l7
890 .word 0x8db02301 !movxtod %g1,%f6
891 srl %l7, 0, %l7 ! clruw
892 prefetch [%i0 + 32+63], 20
893 .word 0x90cc040e !aes_eround01 %f16,%f14,%f2,%f8
894 .word 0x84cc842e !aes_eround23 %f18,%f14,%f2,%f2
895 .word 0x94cc0c0e !aes_eround01 %f16,%f14,%f6,%f10
896 .word 0x8ccc8c2e !aes_eround23 %f18,%f14,%f6,%f6
897 call _aes128_encrypt_2x+16
898 add %i0, 32, %i0
899 subcc %i2, 2, %i2
900
901 .word 0x91b02308 !movxtod %o0,%f8
902 .word 0x95b02309 !movxtod %o1,%f10
903 .word 0x99b0230a !movxtod %o2,%f12
904 .word 0x81b20d80 !fxor %f8,%f0,%f0 ! ^= inp
905 .word 0x91b0230b !movxtod %o3,%f8
906 .word 0x85b28d82 !fxor %f10,%f2,%f2
907 .word 0x89b30d84 !fxor %f12,%f4,%f4
908 .word 0x8db20d86 !fxor %f8,%f6,%f6
909
910 stda %f0, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
911 add %i1, 8, %i1
912 stda %f2, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
913 add %i1, 8, %i1
914 stda %f4, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
915 add %i1, 8, %i1
916 stda %f6, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
917 bgu,pt SIZE_T_CC, .L128_ctr32_blk_loop2x
918 add %i1, 8, %i1
919
920 add %l5, %i2, %i2
921 andcc %i2, 1, %g0 ! is number of blocks even?
922 membar #StoreLoad|#StoreStore
923 bnz,pt %icc, .L128_ctr32_loop
924 srl %i2, 0, %i2
925 brnz,pn %i2, .L128_ctr32_loop2x
926 nop
927
928 ret
929 restore
930 .type aes128_t4_ctr32_encrypt,#function
931 .size aes128_t4_ctr32_encrypt,.-aes128_t4_ctr32_encrypt
932 .globl aes128_t4_xts_encrypt
933 .align 32
934 aes128_t4_xts_encrypt:
935 save %sp, -STACK_FRAME-16, %sp
936 srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
937
938 mov %i5, %o0
939 add %fp, STACK_BIAS-16, %o1
940 call aes_t4_encrypt
941 mov %i4, %o2
942
943 add %fp, STACK_BIAS-16, %l7
944 ldxa [%l7]0x88, %g2
945 add %fp, STACK_BIAS-8, %l7
946 ldxa [%l7]0x88, %g3 ! %g3:%g2 is tweak
947
948 sethi %hi(0x76543210), %l7
949 or %l7, %lo(0x76543210), %l7
950 .word 0x81b5c320 !bmask %l7,%g0,%g0 ! byte swap mask
951
952 prefetch [%i0], 20
953 prefetch [%i0 + 63], 20
954 call _aes128_load_enckey
955 and %i2, 15, %i5
956 and %i2, -16, %i2
957
958 sub %i0, %i1, %l5 ! %i0!=%i1
959 and %i0, 7, %l0
960 andn %i0, 7, %i0
961 sll %l0, 3, %l0
962 mov 64, %l1
963 mov 0xff, %l3
964 sub %l1, %l0, %l1
965 and %i1, 7, %l2
966 cmp %i2, 255
967 movrnz %l2, 0, %l5 ! if ( %i1&7 ||
968 movleu SIZE_T_CC, 0, %l5 ! %i2<256 ||
969 brnz,pn %l5, .L128_xts_enblk ! %i0==%i1)
970 srl %l3, %l2, %l3
971
972 andcc %i2, 16, %g0 ! is number of blocks even?
973 .word 0xb3b64340 !alignaddrl %i1,%g0,%i1
974 bz %icc, .L128_xts_enloop2x
975 srlx %i2, 4, %i2
976 .L128_xts_enloop:
977 ldx [%i0 + 0], %o0
978 brz,pt %l0, 4f
979 ldx [%i0 + 8], %o1
980
981 ldx [%i0 + 16], %o2
982 sllx %o0, %l0, %o0
983 srlx %o1, %l1, %g1
984 sllx %o1, %l0, %o1
985 or %g1, %o0, %o0
986 srlx %o2, %l1, %o2
987 or %o2, %o1, %o1
988 4:
989 .word 0x99b02302 !movxtod %g2,%f12
990 .word 0x9db02303 !movxtod %g3,%f14
991 .word 0x99b3098c !bshuffle %f12,%f12,%f12
992 .word 0x9db3898e !bshuffle %f14,%f14,%f14
993
994 xor %g4, %o0, %o0 ! ^= rk[0]
995 xor %g5, %o1, %o1
996 .word 0x81b02308 !movxtod %o0,%f0
997 .word 0x85b02309 !movxtod %o1,%f2
998
999 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
1000 .word 0x85b38d82 !fxor %f14,%f2,%f2
1001
1002 prefetch [%i1 + 63], 22
1003 prefetch [%i0 + 16+63], 20
1004 call _aes128_encrypt_1x
1005 add %i0, 16, %i0
1006
1007 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
1008 .word 0x85b38d82 !fxor %f14,%f2,%f2
1009
1010 srax %g3, 63, %l7 ! next tweak value
1011 addcc %g2, %g2, %g2
1012 and %l7, 0x87, %l7
1013 .word 0x87b0c223 !addxc %g3,%g3,%g3
1014 xor %l7, %g2, %g2
1015
1016 brnz,pn %l2, 2f
1017 sub %i2, 1, %i2
1018
1019 std %f0, [%i1 + 0]
1020 std %f2, [%i1 + 8]
1021 brnz,pt %i2, .L128_xts_enloop2x
1022 add %i1, 16, %i1
1023
1024 brnz,pn %i5, .L128_xts_ensteal
1025 nop
1026
1027 ret
1028 restore
1029
1030 .align 16
1031 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
1032 ! and ~3x deterioration
1033 ! in inp==out case
1034 .word 0x89b00900 !faligndata %f0,%f0,%f4 ! handle unaligned output
1035 .word 0x8db00902 !faligndata %f0,%f2,%f6
1036 .word 0x91b08902 !faligndata %f2,%f2,%f8
1037 stda %f4, [%i1 + %l3]0xc0 ! partial store
1038 std %f6, [%i1 + 8]
1039 add %i1, 16, %i1
1040 orn %g0, %l3, %l3
1041 stda %f8, [%i1 + %l3]0xc0 ! partial store
1042
1043 brnz,pt %i2, .L128_xts_enloop2x+4
1044 orn %g0, %l3, %l3
1045
1046 brnz,pn %i5, .L128_xts_ensteal
1047 nop
1048
1049 ret
1050 restore
1051
1052 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1053 .align 32
1054 .L128_xts_enloop2x:
1055 ldx [%i0 + 0], %o0
1056 ldx [%i0 + 8], %o1
1057 ldx [%i0 + 16], %o2
1058 brz,pt %l0, 4f
1059 ldx [%i0 + 24], %o3
1060
1061 ldx [%i0 + 32], %o4
1062 sllx %o0, %l0, %o0
1063 srlx %o1, %l1, %g1
1064 or %g1, %o0, %o0
1065 sllx %o1, %l0, %o1
1066 srlx %o2, %l1, %g1
1067 or %g1, %o1, %o1
1068 sllx %o2, %l0, %o2
1069 srlx %o3, %l1, %g1
1070 or %g1, %o2, %o2
1071 sllx %o3, %l0, %o3
1072 srlx %o4, %l1, %o4
1073 or %o4, %o3, %o3
1074 4:
1075 .word 0x99b02302 !movxtod %g2,%f12
1076 .word 0x9db02303 !movxtod %g3,%f14
1077 .word 0x99b3098c !bshuffle %f12,%f12,%f12
1078 .word 0x9db3898e !bshuffle %f14,%f14,%f14
1079
1080 srax %g3, 63, %l7 ! next tweak value
1081 addcc %g2, %g2, %g2
1082 and %l7, 0x87, %l7
1083 .word 0x87b0c223 !addxc %g3,%g3,%g3
1084 xor %l7, %g2, %g2
1085
1086 .word 0x91b02302 !movxtod %g2,%f8
1087 .word 0x95b02303 !movxtod %g3,%f10
1088 .word 0x91b20988 !bshuffle %f8,%f8,%f8
1089 .word 0x95b2898a !bshuffle %f10,%f10,%f10
1090
1091 xor %g4, %o0, %o0 ! ^= rk[0]
1092 xor %g5, %o1, %o1
1093 xor %g4, %o2, %o2 ! ^= rk[0]
1094 xor %g5, %o3, %o3
1095 .word 0x81b02308 !movxtod %o0,%f0
1096 .word 0x85b02309 !movxtod %o1,%f2
1097 .word 0x89b0230a !movxtod %o2,%f4
1098 .word 0x8db0230b !movxtod %o3,%f6
1099
1100 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
1101 .word 0x85b38d82 !fxor %f14,%f2,%f2
1102 .word 0x89b20d84 !fxor %f8,%f4,%f4 ! ^= tweak[0]
1103 .word 0x8db28d86 !fxor %f10,%f6,%f6
1104
1105 prefetch [%i1 + 63], 22
1106 prefetch [%i0 + 32+63], 20
1107 call _aes128_encrypt_2x
1108 add %i0, 32, %i0
1109
1110 .word 0x91b02302 !movxtod %g2,%f8
1111 .word 0x95b02303 !movxtod %g3,%f10
1112
1113 srax %g3, 63, %l7 ! next tweak value
1114 addcc %g2, %g2, %g2
1115 and %l7, 0x87, %l7
1116 .word 0x87b0c223 !addxc %g3,%g3,%g3
1117 xor %l7, %g2, %g2
1118
1119 .word 0x91b20988 !bshuffle %f8,%f8,%f8
1120 .word 0x95b2898a !bshuffle %f10,%f10,%f10
1121
1122 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
1123 .word 0x85b38d82 !fxor %f14,%f2,%f2
1124 .word 0x89b20d84 !fxor %f8,%f4,%f4
1125 .word 0x8db28d86 !fxor %f10,%f6,%f6
1126
1127 brnz,pn %l2, 2f
1128 sub %i2, 2, %i2
1129
1130 std %f0, [%i1 + 0]
1131 std %f2, [%i1 + 8]
1132 std %f4, [%i1 + 16]
1133 std %f6, [%i1 + 24]
1134 brnz,pt %i2, .L128_xts_enloop2x
1135 add %i1, 32, %i1
1136
1137 .word 0x81b00f04 !fsrc2 %f0,%f4,%f0
1138 .word 0x85b00f06 !fsrc2 %f0,%f6,%f2
1139 brnz,pn %i5, .L128_xts_ensteal
1140 nop
1141
1142 ret
1143 restore
1144
1145 .align 16
1146 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
1147 ! and ~3x deterioration
1148 ! in inp==out case
1149 .word 0x91b00900 !faligndata %f0,%f0,%f8 ! handle unaligned output
1150 .word 0x95b00902 !faligndata %f0,%f2,%f10
1151 .word 0x99b08904 !faligndata %f2,%f4,%f12
1152 .word 0x9db10906 !faligndata %f4,%f6,%f14
1153 .word 0x81b18906 !faligndata %f6,%f6,%f0
1154
1155 stda %f8, [%i1 + %l3]0xc0 ! partial store
1156 std %f10, [%i1 + 8]
1157 std %f12, [%i1 + 16]
1158 std %f14, [%i1 + 24]
1159 add %i1, 32, %i1
1160 orn %g0, %l3, %l3
1161 stda %f0, [%i1 + %l3]0xc0 ! partial store
1162
1163 brnz,pt %i2, .L128_xts_enloop2x+4
1164 orn %g0, %l3, %l3
1165
1166 .word 0x81b00f04 !fsrc2 %f0,%f4,%f0
1167 .word 0x85b00f06 !fsrc2 %f0,%f6,%f2
1168 brnz,pn %i5, .L128_xts_ensteal
1169 nop
1170
1171 ret
1172 restore
1173
1174 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1175 .align 32
1176 .L128_xts_enblk:
1177 add %i1, %i2, %l5
1178 and %l5, 63, %l5 ! tail
1179 sub %i2, %l5, %i2
1180 add %l5, 15, %l5 ! round up to 16n
1181 srlx %i2, 4, %i2
1182 srl %l5, 4, %l5
1183 sub %i2, 1, %i2
1184 add %l5, 1, %l5
1185
1186 .L128_xts_enblk2x:
1187 ldx [%i0 + 0], %o0
1188 ldx [%i0 + 8], %o1
1189 ldx [%i0 + 16], %o2
1190 brz,pt %l0, 5f
1191 ldx [%i0 + 24], %o3
1192
1193 ldx [%i0 + 32], %o4
1194 sllx %o0, %l0, %o0
1195 srlx %o1, %l1, %g1
1196 or %g1, %o0, %o0
1197 sllx %o1, %l0, %o1
1198 srlx %o2, %l1, %g1
1199 or %g1, %o1, %o1
1200 sllx %o2, %l0, %o2
1201 srlx %o3, %l1, %g1
1202 or %g1, %o2, %o2
1203 sllx %o3, %l0, %o3
1204 srlx %o4, %l1, %o4
1205 or %o4, %o3, %o3
1206 5:
1207 .word 0x99b02302 !movxtod %g2,%f12
1208 .word 0x9db02303 !movxtod %g3,%f14
1209 .word 0x99b3098c !bshuffle %f12,%f12,%f12
1210 .word 0x9db3898e !bshuffle %f14,%f14,%f14
1211
1212 srax %g3, 63, %l7 ! next tweak value
1213 addcc %g2, %g2, %g2
1214 and %l7, 0x87, %l7
1215 .word 0x87b0c223 !addxc %g3,%g3,%g3
1216 xor %l7, %g2, %g2
1217
1218 .word 0x91b02302 !movxtod %g2,%f8
1219 .word 0x95b02303 !movxtod %g3,%f10
1220 .word 0x91b20988 !bshuffle %f8,%f8,%f8
1221 .word 0x95b2898a !bshuffle %f10,%f10,%f10
1222
1223 xor %g4, %o0, %o0 ! ^= rk[0]
1224 xor %g5, %o1, %o1
1225 xor %g4, %o2, %o2 ! ^= rk[0]
1226 xor %g5, %o3, %o3
1227 .word 0x81b02308 !movxtod %o0,%f0
1228 .word 0x85b02309 !movxtod %o1,%f2
1229 .word 0x89b0230a !movxtod %o2,%f4
1230 .word 0x8db0230b !movxtod %o3,%f6
1231
1232 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
1233 .word 0x85b38d82 !fxor %f14,%f2,%f2
1234 .word 0x89b20d84 !fxor %f8,%f4,%f4 ! ^= tweak[0]
1235 .word 0x8db28d86 !fxor %f10,%f6,%f6
1236
1237 prefetch [%i0 + 32+63], 20
1238 call _aes128_encrypt_2x
1239 add %i0, 32, %i0
1240
1241 .word 0x91b02302 !movxtod %g2,%f8
1242 .word 0x95b02303 !movxtod %g3,%f10
1243
1244 srax %g3, 63, %l7 ! next tweak value
1245 addcc %g2, %g2, %g2
1246 and %l7, 0x87, %l7
1247 .word 0x87b0c223 !addxc %g3,%g3,%g3
1248 xor %l7, %g2, %g2
1249
1250 .word 0x91b20988 !bshuffle %f8,%f8,%f8
1251 .word 0x95b2898a !bshuffle %f10,%f10,%f10
1252
1253 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
1254 .word 0x85b38d82 !fxor %f14,%f2,%f2
1255 .word 0x89b20d84 !fxor %f8,%f4,%f4
1256 .word 0x8db28d86 !fxor %f10,%f6,%f6
1257
1258 subcc %i2, 2, %i2
1259 stda %f0, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
1260 add %i1, 8, %i1
1261 stda %f2, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
1262 add %i1, 8, %i1
1263 stda %f4, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
1264 add %i1, 8, %i1
1265 stda %f6, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
1266 bgu,pt SIZE_T_CC, .L128_xts_enblk2x
1267 add %i1, 8, %i1
1268
1269 add %l5, %i2, %i2
1270 andcc %i2, 1, %g0 ! is number of blocks even?
1271 membar #StoreLoad|#StoreStore
1272 bnz,pt %icc, .L128_xts_enloop
1273 srl %i2, 0, %i2
1274 brnz,pn %i2, .L128_xts_enloop2x
1275 nop
1276
1277 .word 0x81b00f04 !fsrc2 %f0,%f4,%f0
1278 .word 0x85b00f06 !fsrc2 %f0,%f6,%f2
1279 brnz,pn %i5, .L128_xts_ensteal
1280 nop
1281
1282 ret
1283 restore
1284 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1285 .align 32
1286 .L128_xts_ensteal:
1287 std %f0, [%fp + STACK_BIAS-16] ! copy of output
1288 std %f2, [%fp + STACK_BIAS-8]
1289
1290 srl %l0, 3, %l0
1291 add %fp, STACK_BIAS-16, %l7
1292 add %i0, %l0, %i0 ! original %i0+%i2&-15
1293 add %i1, %l2, %i1 ! original %i1+%i2&-15
1294 mov 0, %l0
1295 nop ! align
1296
1297 .L128_xts_enstealing:
1298 ldub [%i0 + %l0], %o0
1299 ldub [%l7 + %l0], %o1
1300 dec %i5
1301 stb %o0, [%l7 + %l0]
1302 stb %o1, [%i1 + %l0]
1303 brnz %i5, .L128_xts_enstealing
1304 inc %l0
1305
1306 mov %l7, %i0
1307 sub %i1, 16, %i1
1308 mov 0, %l0
1309 sub %i1, %l2, %i1
1310 ba .L128_xts_enloop ! one more time
1311 mov 1, %i2 ! %i5 is 0
1312 ret
1313 restore
1314 .type aes128_t4_xts_encrypt,#function
1315 .size aes128_t4_xts_encrypt,.-aes128_t4_xts_encrypt
1316 .globl aes128_t4_xts_decrypt
1317 .align 32
1318 aes128_t4_xts_decrypt:
1319 save %sp, -STACK_FRAME-16, %sp
1320 srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
1321
1322 mov %i5, %o0
1323 add %fp, STACK_BIAS-16, %o1
1324 call aes_t4_encrypt
1325 mov %i4, %o2
1326
1327 add %fp, STACK_BIAS-16, %l7
1328 ldxa [%l7]0x88, %g2
1329 add %fp, STACK_BIAS-8, %l7
1330 ldxa [%l7]0x88, %g3 ! %g3:%g2 is tweak
1331
1332 sethi %hi(0x76543210), %l7
1333 or %l7, %lo(0x76543210), %l7
1334 .word 0x81b5c320 !bmask %l7,%g0,%g0 ! byte swap mask
1335
1336 prefetch [%i0], 20
1337 prefetch [%i0 + 63], 20
1338 call _aes128_load_deckey
1339 and %i2, 15, %i5
1340 and %i2, -16, %i2
1341 mov 0, %l7
1342 movrnz %i5, 16, %l7
1343 sub %i2, %l7, %i2
1344
1345 sub %i0, %i1, %l5 ! %i0!=%i1
1346 and %i0, 7, %l0
1347 andn %i0, 7, %i0
1348 sll %l0, 3, %l0
1349 mov 64, %l1
1350 mov 0xff, %l3
1351 sub %l1, %l0, %l1
1352 and %i1, 7, %l2
1353 cmp %i2, 255
1354 movrnz %l2, 0, %l5 ! if ( %i1&7 ||
1355 movleu SIZE_T_CC, 0, %l5 ! %i2<256 ||
1356 brnz,pn %l5, .L128_xts_deblk ! %i0==%i1)
1357 srl %l3, %l2, %l3
1358
1359 andcc %i2, 16, %g0 ! is number of blocks even?
1360 brz,pn %i2, .L128_xts_desteal
1361 .word 0xb3b64340 !alignaddrl %i1,%g0,%i1
1362 bz %icc, .L128_xts_deloop2x
1363 srlx %i2, 4, %i2
1364 .L128_xts_deloop:
1365 ldx [%i0 + 0], %o0
1366 brz,pt %l0, 4f
1367 ldx [%i0 + 8], %o1
1368
1369 ldx [%i0 + 16], %o2
1370 sllx %o0, %l0, %o0
1371 srlx %o1, %l1, %g1
1372 sllx %o1, %l0, %o1
1373 or %g1, %o0, %o0
1374 srlx %o2, %l1, %o2
1375 or %o2, %o1, %o1
1376 4:
1377 .word 0x99b02302 !movxtod %g2,%f12
1378 .word 0x9db02303 !movxtod %g3,%f14
1379 .word 0x99b3098c !bshuffle %f12,%f12,%f12
1380 .word 0x9db3898e !bshuffle %f14,%f14,%f14
1381
1382 xor %g4, %o0, %o0 ! ^= rk[0]
1383 xor %g5, %o1, %o1
1384 .word 0x81b02308 !movxtod %o0,%f0
1385 .word 0x85b02309 !movxtod %o1,%f2
1386
1387 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
1388 .word 0x85b38d82 !fxor %f14,%f2,%f2
1389
1390 prefetch [%i1 + 63], 22
1391 prefetch [%i0 + 16+63], 20
1392 call _aes128_decrypt_1x
1393 add %i0, 16, %i0
1394
1395 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
1396 .word 0x85b38d82 !fxor %f14,%f2,%f2
1397
1398 srax %g3, 63, %l7 ! next tweak value
1399 addcc %g2, %g2, %g2
1400 and %l7, 0x87, %l7
1401 .word 0x87b0c223 !addxc %g3,%g3,%g3
1402 xor %l7, %g2, %g2
1403
1404 brnz,pn %l2, 2f
1405 sub %i2, 1, %i2
1406
1407 std %f0, [%i1 + 0]
1408 std %f2, [%i1 + 8]
1409 brnz,pt %i2, .L128_xts_deloop2x
1410 add %i1, 16, %i1
1411
1412 brnz,pn %i5, .L128_xts_desteal
1413 nop
1414
1415 ret
1416 restore
1417
1418 .align 16
1419 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
1420 ! and ~3x deterioration
1421 ! in inp==out case
1422 .word 0x89b00900 !faligndata %f0,%f0,%f4 ! handle unaligned output
1423 .word 0x8db00902 !faligndata %f0,%f2,%f6
1424 .word 0x91b08902 !faligndata %f2,%f2,%f8
1425 stda %f4, [%i1 + %l3]0xc0 ! partial store
1426 std %f6, [%i1 + 8]
1427 add %i1, 16, %i1
1428 orn %g0, %l3, %l3
1429 stda %f8, [%i1 + %l3]0xc0 ! partial store
1430
1431 brnz,pt %i2, .L128_xts_deloop2x+4
1432 orn %g0, %l3, %l3
1433
1434 brnz,pn %i5, .L128_xts_desteal
1435 nop
1436
1437 ret
1438 restore
1439
1440 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1441 .align 32
1442 .L128_xts_deloop2x:
1443 ldx [%i0 + 0], %o0
1444 ldx [%i0 + 8], %o1
1445 ldx [%i0 + 16], %o2
1446 brz,pt %l0, 4f
1447 ldx [%i0 + 24], %o3
1448
1449 ldx [%i0 + 32], %o4
1450 sllx %o0, %l0, %o0
1451 srlx %o1, %l1, %g1
1452 or %g1, %o0, %o0
1453 sllx %o1, %l0, %o1
1454 srlx %o2, %l1, %g1
1455 or %g1, %o1, %o1
1456 sllx %o2, %l0, %o2
1457 srlx %o3, %l1, %g1
1458 or %g1, %o2, %o2
1459 sllx %o3, %l0, %o3
1460 srlx %o4, %l1, %o4
1461 or %o4, %o3, %o3
1462 4:
1463 .word 0x99b02302 !movxtod %g2,%f12
1464 .word 0x9db02303 !movxtod %g3,%f14
1465 .word 0x99b3098c !bshuffle %f12,%f12,%f12
1466 .word 0x9db3898e !bshuffle %f14,%f14,%f14
1467
1468 srax %g3, 63, %l7 ! next tweak value
1469 addcc %g2, %g2, %g2
1470 and %l7, 0x87, %l7
1471 .word 0x87b0c223 !addxc %g3,%g3,%g3
1472 xor %l7, %g2, %g2
1473
1474 .word 0x91b02302 !movxtod %g2,%f8
1475 .word 0x95b02303 !movxtod %g3,%f10
1476 .word 0x91b20988 !bshuffle %f8,%f8,%f8
1477 .word 0x95b2898a !bshuffle %f10,%f10,%f10
1478
1479 xor %g4, %o0, %o0 ! ^= rk[0]
1480 xor %g5, %o1, %o1
1481 xor %g4, %o2, %o2 ! ^= rk[0]
1482 xor %g5, %o3, %o3
1483 .word 0x81b02308 !movxtod %o0,%f0
1484 .word 0x85b02309 !movxtod %o1,%f2
1485 .word 0x89b0230a !movxtod %o2,%f4
1486 .word 0x8db0230b !movxtod %o3,%f6
1487
1488 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
1489 .word 0x85b38d82 !fxor %f14,%f2,%f2
1490 .word 0x89b20d84 !fxor %f8,%f4,%f4 ! ^= tweak[0]
1491 .word 0x8db28d86 !fxor %f10,%f6,%f6
1492
1493 prefetch [%i1 + 63], 22
1494 prefetch [%i0 + 32+63], 20
1495 call _aes128_decrypt_2x
1496 add %i0, 32, %i0
1497
1498 .word 0x91b02302 !movxtod %g2,%f8
1499 .word 0x95b02303 !movxtod %g3,%f10
1500
1501 srax %g3, 63, %l7 ! next tweak value
1502 addcc %g2, %g2, %g2
1503 and %l7, 0x87, %l7
1504 .word 0x87b0c223 !addxc %g3,%g3,%g3
1505 xor %l7, %g2, %g2
1506
1507 .word 0x91b20988 !bshuffle %f8,%f8,%f8
1508 .word 0x95b2898a !bshuffle %f10,%f10,%f10
1509
1510 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
1511 .word 0x85b38d82 !fxor %f14,%f2,%f2
1512 .word 0x89b20d84 !fxor %f8,%f4,%f4
1513 .word 0x8db28d86 !fxor %f10,%f6,%f6
1514
1515 brnz,pn %l2, 2f
1516 sub %i2, 2, %i2
1517
1518 std %f0, [%i1 + 0]
1519 std %f2, [%i1 + 8]
1520 std %f4, [%i1 + 16]
1521 std %f6, [%i1 + 24]
1522 brnz,pt %i2, .L128_xts_deloop2x
1523 add %i1, 32, %i1
1524
1525 .word 0x81b00f04 !fsrc2 %f0,%f4,%f0
1526 .word 0x85b00f06 !fsrc2 %f0,%f6,%f2
1527 brnz,pn %i5, .L128_xts_desteal
1528 nop
1529
1530 ret
1531 restore
1532
1533 .align 16
1534 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
1535 ! and ~3x deterioration
1536 ! in inp==out case
1537 .word 0x91b00900 !faligndata %f0,%f0,%f8 ! handle unaligned output
1538 .word 0x95b00902 !faligndata %f0,%f2,%f10
1539 .word 0x99b08904 !faligndata %f2,%f4,%f12
1540 .word 0x9db10906 !faligndata %f4,%f6,%f14
1541 .word 0x81b18906 !faligndata %f6,%f6,%f0
1542
1543 stda %f8, [%i1 + %l3]0xc0 ! partial store
1544 std %f10, [%i1 + 8]
1545 std %f12, [%i1 + 16]
1546 std %f14, [%i1 + 24]
1547 add %i1, 32, %i1
1548 orn %g0, %l3, %l3
1549 stda %f0, [%i1 + %l3]0xc0 ! partial store
1550
1551 brnz,pt %i2, .L128_xts_deloop2x+4
1552 orn %g0, %l3, %l3
1553
1554 .word 0x81b00f04 !fsrc2 %f0,%f4,%f0
1555 .word 0x85b00f06 !fsrc2 %f0,%f6,%f2
1556 brnz,pn %i5, .L128_xts_desteal
1557 nop
1558
1559 ret
1560 restore
1561
1562 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1563 .align 32
1564 .L128_xts_deblk:
1565 add %i1, %i2, %l5
1566 and %l5, 63, %l5 ! tail
1567 sub %i2, %l5, %i2
1568 add %l5, 15, %l5 ! round up to 16n
1569 srlx %i2, 4, %i2
1570 srl %l5, 4, %l5
1571 sub %i2, 1, %i2
1572 add %l5, 1, %l5
1573
1574 .L128_xts_deblk2x:
1575 ldx [%i0 + 0], %o0
1576 ldx [%i0 + 8], %o1
1577 ldx [%i0 + 16], %o2
1578 brz,pt %l0, 5f
1579 ldx [%i0 + 24], %o3
1580
1581 ldx [%i0 + 32], %o4
1582 sllx %o0, %l0, %o0
1583 srlx %o1, %l1, %g1
1584 or %g1, %o0, %o0
1585 sllx %o1, %l0, %o1
1586 srlx %o2, %l1, %g1
1587 or %g1, %o1, %o1
1588 sllx %o2, %l0, %o2
1589 srlx %o3, %l1, %g1
1590 or %g1, %o2, %o2
1591 sllx %o3, %l0, %o3
1592 srlx %o4, %l1, %o4
1593 or %o4, %o3, %o3
1594 5:
1595 .word 0x99b02302 !movxtod %g2,%f12
1596 .word 0x9db02303 !movxtod %g3,%f14
1597 .word 0x99b3098c !bshuffle %f12,%f12,%f12
1598 .word 0x9db3898e !bshuffle %f14,%f14,%f14
1599
1600 srax %g3, 63, %l7 ! next tweak value
1601 addcc %g2, %g2, %g2
1602 and %l7, 0x87, %l7
1603 .word 0x87b0c223 !addxc %g3,%g3,%g3
1604 xor %l7, %g2, %g2
1605
1606 .word 0x91b02302 !movxtod %g2,%f8
1607 .word 0x95b02303 !movxtod %g3,%f10
1608 .word 0x91b20988 !bshuffle %f8,%f8,%f8
1609 .word 0x95b2898a !bshuffle %f10,%f10,%f10
1610
1611 xor %g4, %o0, %o0 ! ^= rk[0]
1612 xor %g5, %o1, %o1
1613 xor %g4, %o2, %o2 ! ^= rk[0]
1614 xor %g5, %o3, %o3
1615 .word 0x81b02308 !movxtod %o0,%f0
1616 .word 0x85b02309 !movxtod %o1,%f2
1617 .word 0x89b0230a !movxtod %o2,%f4
1618 .word 0x8db0230b !movxtod %o3,%f6
1619
1620 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
1621 .word 0x85b38d82 !fxor %f14,%f2,%f2
1622 .word 0x89b20d84 !fxor %f8,%f4,%f4 ! ^= tweak[0]
1623 .word 0x8db28d86 !fxor %f10,%f6,%f6
1624
1625 prefetch [%i0 + 32+63], 20
1626 call _aes128_decrypt_2x
1627 add %i0, 32, %i0
1628
1629 .word 0x91b02302 !movxtod %g2,%f8
1630 .word 0x95b02303 !movxtod %g3,%f10
1631
1632 srax %g3, 63, %l7 ! next tweak value
1633 addcc %g2, %g2, %g2
1634 and %l7, 0x87, %l7
1635 .word 0x87b0c223 !addxc %g3,%g3,%g3
1636 xor %l7, %g2, %g2
1637
1638 .word 0x91b20988 !bshuffle %f8,%f8,%f8
1639 .word 0x95b2898a !bshuffle %f10,%f10,%f10
1640
1641 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
1642 .word 0x85b38d82 !fxor %f14,%f2,%f2
1643 .word 0x89b20d84 !fxor %f8,%f4,%f4
1644 .word 0x8db28d86 !fxor %f10,%f6,%f6
1645
1646 subcc %i2, 2, %i2
1647 stda %f0, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
1648 add %i1, 8, %i1
1649 stda %f2, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
1650 add %i1, 8, %i1
1651 stda %f4, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
1652 add %i1, 8, %i1
1653 stda %f6, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
1654 bgu,pt SIZE_T_CC, .L128_xts_deblk2x
1655 add %i1, 8, %i1
1656
1657 add %l5, %i2, %i2
1658 andcc %i2, 1, %g0 ! is number of blocks even?
1659 membar #StoreLoad|#StoreStore
1660 bnz,pt %icc, .L128_xts_deloop
1661 srl %i2, 0, %i2
1662 brnz,pn %i2, .L128_xts_deloop2x
1663 nop
1664
1665 .word 0x81b00f04 !fsrc2 %f0,%f4,%f0
1666 .word 0x85b00f06 !fsrc2 %f0,%f6,%f2
1667 brnz,pn %i5, .L128_xts_desteal
1668 nop
1669
1670 ret
1671 restore
1672 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1673 .align 32
1674 .L128_xts_desteal:
1675 ldx [%i0 + 0], %o0
1676 brz,pt %l0, 8f
1677 ldx [%i0 + 8], %o1
1678
1679 ldx [%i0 + 16], %o2
1680 sllx %o0, %l0, %o0
1681 srlx %o1, %l1, %g1
1682 sllx %o1, %l0, %o1
1683 or %g1, %o0, %o0
1684 srlx %o2, %l1, %o2
1685 or %o2, %o1, %o1
1686 8:
1687 srax %g3, 63, %l7 ! next tweak value
1688 addcc %g2, %g2, %o2
1689 and %l7, 0x87, %l7
1690 .word 0x97b0c223 !addxc %g3,%g3,%o3
1691 xor %l7, %o2, %o2
1692
1693 .word 0x99b0230a !movxtod %o2,%f12
1694 .word 0x9db0230b !movxtod %o3,%f14
1695 .word 0x99b3098c !bshuffle %f12,%f12,%f12
1696 .word 0x9db3898e !bshuffle %f14,%f14,%f14
1697
1698 xor %g4, %o0, %o0 ! ^= rk[0]
1699 xor %g5, %o1, %o1
1700 .word 0x81b02308 !movxtod %o0,%f0
1701 .word 0x85b02309 !movxtod %o1,%f2
1702
1703 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
1704 .word 0x85b38d82 !fxor %f14,%f2,%f2
1705
1706 call _aes128_decrypt_1x
1707 add %i0, 16, %i0
1708
1709 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
1710 .word 0x85b38d82 !fxor %f14,%f2,%f2
1711
1712 std %f0, [%fp + STACK_BIAS-16]
1713 std %f2, [%fp + STACK_BIAS-8]
1714
1715 srl %l0, 3, %l0
1716 add %fp, STACK_BIAS-16, %l7
1717 add %i0, %l0, %i0 ! original %i0+%i2&-15
1718 add %i1, %l2, %i1 ! original %i1+%i2&-15
1719 mov 0, %l0
1720 add %i1, 16, %i1
1721 nop ! align
1722
1723 .L128_xts_destealing:
1724 ldub [%i0 + %l0], %o0
1725 ldub [%l7 + %l0], %o1
1726 dec %i5
1727 stb %o0, [%l7 + %l0]
1728 stb %o1, [%i1 + %l0]
1729 brnz %i5, .L128_xts_destealing
1730 inc %l0
1731
1732 mov %l7, %i0
1733 sub %i1, 16, %i1
1734 mov 0, %l0
1735 sub %i1, %l2, %i1
1736 ba .L128_xts_deloop ! one more time
1737 mov 1, %i2 ! %i5 is 0
1738 ret
1739 restore
1740 .type aes128_t4_xts_decrypt,#function
1741 .size aes128_t4_xts_decrypt,.-aes128_t4_xts_decrypt
1742 .globl aes128_t4_cbc_decrypt
1743 .align 32
1744 aes128_t4_cbc_decrypt:
1745 save %sp, -STACK_FRAME, %sp
1746 cmp %i2, 0
1747 be,pn SIZE_T_CC, .L128_cbc_dec_abort
1748 srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
1749 sub %i0, %i1, %l5 ! %i0!=%i1
1750 ld [%i4 + 0], %f12 ! load ivec
1751 ld [%i4 + 4], %f13
1752 ld [%i4 + 8], %f14
1753 ld [%i4 + 12], %f15
1754 prefetch [%i0], 20
1755 prefetch [%i0 + 63], 20
1756 call _aes128_load_deckey
1757 and %i0, 7, %l0
1758 andn %i0, 7, %i0
1759 sll %l0, 3, %l0
1760 mov 64, %l1
1761 mov 0xff, %l3
1762 sub %l1, %l0, %l1
1763 and %i1, 7, %l2
1764 cmp %i2, 255
1765 movrnz %l2, 0, %l5 ! if ( %i1&7 ||
1766 movleu SIZE_T_CC, 0, %l5 ! %i2<256 ||
1767 brnz,pn %l5, .L128cbc_dec_blk ! %i0==%i1)
1768 srl %l3, %l2, %l3
1769
1770 andcc %i2, 16, %g0 ! is number of blocks even?
1771 srlx %i2, 4, %i2
1772 .word 0xb3b64340 !alignaddrl %i1,%g0,%i1
1773 bz %icc, .L128_cbc_dec_loop2x
1774 prefetch [%i1], 22
1775 .L128_cbc_dec_loop:
1776 ldx [%i0 + 0], %o0
1777 brz,pt %l0, 4f
1778 ldx [%i0 + 8], %o1
1779
1780 ldx [%i0 + 16], %o2
1781 sllx %o0, %l0, %o0
1782 srlx %o1, %l1, %g1
1783 sllx %o1, %l0, %o1
1784 or %g1, %o0, %o0
1785 srlx %o2, %l1, %o2
1786 or %o2, %o1, %o1
1787 4:
1788 xor %g4, %o0, %o2 ! ^= rk[0]
1789 xor %g5, %o1, %o3
1790 .word 0x81b0230a !movxtod %o2,%f0
1791 .word 0x85b0230b !movxtod %o3,%f2
1792
1793 prefetch [%i1 + 63], 22
1794 prefetch [%i0 + 16+63], 20
1795 call _aes128_decrypt_1x
1796 add %i0, 16, %i0
1797
1798 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= ivec
1799 .word 0x85b38d82 !fxor %f14,%f2,%f2
1800 .word 0x99b02308 !movxtod %o0,%f12
1801 .word 0x9db02309 !movxtod %o1,%f14
1802
1803 brnz,pn %l2, 2f
1804 sub %i2, 1, %i2
1805
1806 std %f0, [%i1 + 0]
1807 std %f2, [%i1 + 8]
1808 brnz,pt %i2, .L128_cbc_dec_loop2x
1809 add %i1, 16, %i1
1810 st %f12, [%i4 + 0]
1811 st %f13, [%i4 + 4]
1812 st %f14, [%i4 + 8]
1813 st %f15, [%i4 + 12]
1814 .L128_cbc_dec_abort:
1815 ret
1816 restore
1817
1818 .align 16
1819 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
1820 ! and ~3x deterioration
1821 ! in inp==out case
1822 .word 0x89b00900 !faligndata %f0,%f0,%f4 ! handle unaligned output
1823 .word 0x8db00902 !faligndata %f0,%f2,%f6
1824 .word 0x91b08902 !faligndata %f2,%f2,%f8
1825
1826 stda %f4, [%i1 + %l3]0xc0 ! partial store
1827 std %f6, [%i1 + 8]
1828 add %i1, 16, %i1
1829 orn %g0, %l3, %l3
1830 stda %f8, [%i1 + %l3]0xc0 ! partial store
1831
1832 brnz,pt %i2, .L128_cbc_dec_loop2x+4
1833 orn %g0, %l3, %l3
1834 st %f12, [%i4 + 0]
1835 st %f13, [%i4 + 4]
1836 st %f14, [%i4 + 8]
1837 st %f15, [%i4 + 12]
1838 ret
1839 restore
1840
1841 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1842 .align 32
1843 .L128_cbc_dec_loop2x:
1844 ldx [%i0 + 0], %o0
1845 ldx [%i0 + 8], %o1
1846 ldx [%i0 + 16], %o2
1847 brz,pt %l0, 4f
1848 ldx [%i0 + 24], %o3
1849
1850 ldx [%i0 + 32], %o4
1851 sllx %o0, %l0, %o0
1852 srlx %o1, %l1, %g1
1853 or %g1, %o0, %o0
1854 sllx %o1, %l0, %o1
1855 srlx %o2, %l1, %g1
1856 or %g1, %o1, %o1
1857 sllx %o2, %l0, %o2
1858 srlx %o3, %l1, %g1
1859 or %g1, %o2, %o2
1860 sllx %o3, %l0, %o3
1861 srlx %o4, %l1, %o4
1862 or %o4, %o3, %o3
1863 4:
1864 xor %g4, %o0, %o4 ! ^= rk[0]
1865 xor %g5, %o1, %o5
1866 .word 0x81b0230c !movxtod %o4,%f0
1867 .word 0x85b0230d !movxtod %o5,%f2
1868 xor %g4, %o2, %o4
1869 xor %g5, %o3, %o5
1870 .word 0x89b0230c !movxtod %o4,%f4
1871 .word 0x8db0230d !movxtod %o5,%f6
1872
1873 prefetch [%i1 + 63], 22
1874 prefetch [%i0 + 32+63], 20
1875 call _aes128_decrypt_2x
1876 add %i0, 32, %i0
1877
1878 .word 0x91b02308 !movxtod %o0,%f8
1879 .word 0x95b02309 !movxtod %o1,%f10
1880 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= ivec
1881 .word 0x85b38d82 !fxor %f14,%f2,%f2
1882 .word 0x99b0230a !movxtod %o2,%f12
1883 .word 0x9db0230b !movxtod %o3,%f14
1884 .word 0x89b20d84 !fxor %f8,%f4,%f4
1885 .word 0x8db28d86 !fxor %f10,%f6,%f6
1886
1887 brnz,pn %l2, 2f
1888 sub %i2, 2, %i2
1889
1890 std %f0, [%i1 + 0]
1891 std %f2, [%i1 + 8]
1892 std %f4, [%i1 + 16]
1893 std %f6, [%i1 + 24]
1894 brnz,pt %i2, .L128_cbc_dec_loop2x
1895 add %i1, 32, %i1
1896 st %f12, [%i4 + 0]
1897 st %f13, [%i4 + 4]
1898 st %f14, [%i4 + 8]
1899 st %f15, [%i4 + 12]
1900 ret
1901 restore
1902
1903 .align 16
1904 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
1905 ! and ~3x deterioration
1906 ! in inp==out case
1907 .word 0x91b00900 !faligndata %f0,%f0,%f8 ! handle unaligned output
1908 .word 0x81b00902 !faligndata %f0,%f2,%f0
1909 .word 0x85b08904 !faligndata %f2,%f4,%f2
1910 .word 0x89b10906 !faligndata %f4,%f6,%f4
1911 .word 0x8db18906 !faligndata %f6,%f6,%f6
1912 stda %f8, [%i1 + %l3]0xc0 ! partial store
1913 std %f0, [%i1 + 8]
1914 std %f2, [%i1 + 16]
1915 std %f4, [%i1 + 24]
1916 add %i1, 32, %i1
1917 orn %g0, %l3, %l3
1918 stda %f6, [%i1 + %l3]0xc0 ! partial store
1919
1920 brnz,pt %i2, .L128_cbc_dec_loop2x+4
1921 orn %g0, %l3, %l3
1922 st %f12, [%i4 + 0]
1923 st %f13, [%i4 + 4]
1924 st %f14, [%i4 + 8]
1925 st %f15, [%i4 + 12]
1926 ret
1927 restore
1928
1929 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1930 .align 32
1931 .L128cbc_dec_blk:
1932 add %i1, %i2, %l5
1933 and %l5, 63, %l5 ! tail
1934 sub %i2, %l5, %i2
1935 add %l5, 15, %l5 ! round up to 16n
1936 srlx %i2, 4, %i2
1937 srl %l5, 4, %l5
1938 sub %i2, 1, %i2
1939 add %l5, 1, %l5
1940
1941 .L128_cbc_dec_blk_loop2x:
1942 ldx [%i0 + 0], %o0
1943 ldx [%i0 + 8], %o1
1944 ldx [%i0 + 16], %o2
1945 brz,pt %l0, 5f
1946 ldx [%i0 + 24], %o3
1947
1948 ldx [%i0 + 32], %o4
1949 sllx %o0, %l0, %o0
1950 srlx %o1, %l1, %g1
1951 or %g1, %o0, %o0
1952 sllx %o1, %l0, %o1
1953 srlx %o2, %l1, %g1
1954 or %g1, %o1, %o1
1955 sllx %o2, %l0, %o2
1956 srlx %o3, %l1, %g1
1957 or %g1, %o2, %o2
1958 sllx %o3, %l0, %o3
1959 srlx %o4, %l1, %o4
1960 or %o4, %o3, %o3
1961 5:
1962 xor %g4, %o0, %o4 ! ^= rk[0]
1963 xor %g5, %o1, %o5
1964 .word 0x81b0230c !movxtod %o4,%f0
1965 .word 0x85b0230d !movxtod %o5,%f2
1966 xor %g4, %o2, %o4
1967 xor %g5, %o3, %o5
1968 .word 0x89b0230c !movxtod %o4,%f4
1969 .word 0x8db0230d !movxtod %o5,%f6
1970
1971 prefetch [%i0 + 32+63], 20
1972 call _aes128_decrypt_2x
1973 add %i0, 32, %i0
1974 subcc %i2, 2, %i2
1975
1976 .word 0x91b02308 !movxtod %o0,%f8
1977 .word 0x95b02309 !movxtod %o1,%f10
1978 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= ivec
1979 .word 0x85b38d82 !fxor %f14,%f2,%f2
1980 .word 0x99b0230a !movxtod %o2,%f12
1981 .word 0x9db0230b !movxtod %o3,%f14
1982 .word 0x89b20d84 !fxor %f8,%f4,%f4
1983 .word 0x8db28d86 !fxor %f10,%f6,%f6
1984
1985 stda %f0, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
1986 add %i1, 8, %i1
1987 stda %f2, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
1988 add %i1, 8, %i1
1989 stda %f4, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
1990 add %i1, 8, %i1
1991 stda %f6, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
1992 bgu,pt SIZE_T_CC, .L128_cbc_dec_blk_loop2x
1993 add %i1, 8, %i1
1994
1995 add %l5, %i2, %i2
1996 andcc %i2, 1, %g0 ! is number of blocks even?
1997 membar #StoreLoad|#StoreStore
1998 bnz,pt %icc, .L128_cbc_dec_loop
1999 srl %i2, 0, %i2
2000 brnz,pn %i2, .L128_cbc_dec_loop2x
2001 nop
2002 st %f12, [%i4 + 0] ! write out ivec
2003 st %f13, [%i4 + 4]
2004 st %f14, [%i4 + 8]
2005 st %f15, [%i4 + 12]
2006 ret
2007 restore
2008 .type aes128_t4_cbc_decrypt,#function
2009 .size aes128_t4_cbc_decrypt,.-aes128_t4_cbc_decrypt
2010 .align 32
2011 _aes128_decrypt_1x:
2012 .word 0x88cc0440 !aes_dround01 %f16,%f0,%f2,%f4
2013 .word 0x84cc8460 !aes_dround23 %f18,%f0,%f2,%f2
2014 .word 0x80cd0444 !aes_dround01 %f20,%f4,%f2,%f0
2015 .word 0x84cd8464 !aes_dround23 %f22,%f4,%f2,%f2
2016 .word 0x88ce0440 !aes_dround01 %f24,%f0,%f2,%f4
2017 .word 0x84ce8460 !aes_dround23 %f26,%f0,%f2,%f2
2018 .word 0x80cf0444 !aes_dround01 %f28,%f4,%f2,%f0
2019 .word 0x84cf8464 !aes_dround23 %f30,%f4,%f2,%f2
2020 .word 0x88c84440 !aes_dround01 %f32,%f0,%f2,%f4
2021 .word 0x84c8c460 !aes_dround23 %f34,%f0,%f2,%f2
2022 .word 0x80c94444 !aes_dround01 %f36,%f4,%f2,%f0
2023 .word 0x84c9c464 !aes_dround23 %f38,%f4,%f2,%f2
2024 .word 0x88ca4440 !aes_dround01 %f40,%f0,%f2,%f4
2025 .word 0x84cac460 !aes_dround23 %f42,%f0,%f2,%f2
2026 .word 0x80cb4444 !aes_dround01 %f44,%f4,%f2,%f0
2027 .word 0x84cbc464 !aes_dround23 %f46,%f4,%f2,%f2
2028 .word 0x88cc4440 !aes_dround01 %f48,%f0,%f2,%f4
2029 .word 0x84ccc460 !aes_dround23 %f50,%f0,%f2,%f2
2030 .word 0x80cd44c4 !aes_dround01_l %f52,%f4,%f2,%f0
2031 retl
2032 .word 0x84cdc4e4 !aes_dround23_l %f54,%f4,%f2,%f2
2033 .type _aes128_decrypt_1x,#function
2034 .size _aes128_decrypt_1x,.-_aes128_decrypt_1x
2035
2036 .align 32
2037 _aes128_decrypt_2x:
2038 .word 0x90cc0440 !aes_dround01 %f16,%f0,%f2,%f8
2039 .word 0x84cc8460 !aes_dround23 %f18,%f0,%f2,%f2
2040 .word 0x94cc0c44 !aes_dround01 %f16,%f4,%f6,%f10
2041 .word 0x8ccc8c64 !aes_dround23 %f18,%f4,%f6,%f6
2042 .word 0x80cd0448 !aes_dround01 %f20,%f8,%f2,%f0
2043 .word 0x84cd8468 !aes_dround23 %f22,%f8,%f2,%f2
2044 .word 0x88cd0c4a !aes_dround01 %f20,%f10,%f6,%f4
2045 .word 0x8ccd8c6a !aes_dround23 %f22,%f10,%f6,%f6
2046 .word 0x90ce0440 !aes_dround01 %f24,%f0,%f2,%f8
2047 .word 0x84ce8460 !aes_dround23 %f26,%f0,%f2,%f2
2048 .word 0x94ce0c44 !aes_dround01 %f24,%f4,%f6,%f10
2049 .word 0x8cce8c64 !aes_dround23 %f26,%f4,%f6,%f6
2050 .word 0x80cf0448 !aes_dround01 %f28,%f8,%f2,%f0
2051 .word 0x84cf8468 !aes_dround23 %f30,%f8,%f2,%f2
2052 .word 0x88cf0c4a !aes_dround01 %f28,%f10,%f6,%f4
2053 .word 0x8ccf8c6a !aes_dround23 %f30,%f10,%f6,%f6
2054 .word 0x90c84440 !aes_dround01 %f32,%f0,%f2,%f8
2055 .word 0x84c8c460 !aes_dround23 %f34,%f0,%f2,%f2
2056 .word 0x94c84c44 !aes_dround01 %f32,%f4,%f6,%f10
2057 .word 0x8cc8cc64 !aes_dround23 %f34,%f4,%f6,%f6
2058 .word 0x80c94448 !aes_dround01 %f36,%f8,%f2,%f0
2059 .word 0x84c9c468 !aes_dround23 %f38,%f8,%f2,%f2
2060 .word 0x88c94c4a !aes_dround01 %f36,%f10,%f6,%f4
2061 .word 0x8cc9cc6a !aes_dround23 %f38,%f10,%f6,%f6
2062 .word 0x90ca4440 !aes_dround01 %f40,%f0,%f2,%f8
2063 .word 0x84cac460 !aes_dround23 %f42,%f0,%f2,%f2
2064 .word 0x94ca4c44 !aes_dround01 %f40,%f4,%f6,%f10
2065 .word 0x8ccacc64 !aes_dround23 %f42,%f4,%f6,%f6
2066 .word 0x80cb4448 !aes_dround01 %f44,%f8,%f2,%f0
2067 .word 0x84cbc468 !aes_dround23 %f46,%f8,%f2,%f2
2068 .word 0x88cb4c4a !aes_dround01 %f44,%f10,%f6,%f4
2069 .word 0x8ccbcc6a !aes_dround23 %f46,%f10,%f6,%f6
2070 .word 0x90cc4440 !aes_dround01 %f48,%f0,%f2,%f8
2071 .word 0x84ccc460 !aes_dround23 %f50,%f0,%f2,%f2
2072 .word 0x94cc4c44 !aes_dround01 %f48,%f4,%f6,%f10
2073 .word 0x8ccccc64 !aes_dround23 %f50,%f4,%f6,%f6
2074 .word 0x80cd44c8 !aes_dround01_l %f52,%f8,%f2,%f0
2075 .word 0x84cdc4e8 !aes_dround23_l %f54,%f8,%f2,%f2
2076 .word 0x88cd4cca !aes_dround01_l %f52,%f10,%f6,%f4
2077 retl
2078 .word 0x8ccdccea !aes_dround23_l %f54,%f10,%f6,%f6
2079 .type _aes128_decrypt_2x,#function
2080 .size _aes128_decrypt_2x,.-_aes128_decrypt_2x
2081 .align 32
2082 _aes192_encrypt_1x:
2083 .word 0x88cc0400 !aes_eround01 %f16,%f0,%f2,%f4
2084 .word 0x84cc8420 !aes_eround23 %f18,%f0,%f2,%f2
2085 .word 0x80cd0404 !aes_eround01 %f20,%f4,%f2,%f0
2086 .word 0x84cd8424 !aes_eround23 %f22,%f4,%f2,%f2
2087 .word 0x88ce0400 !aes_eround01 %f24,%f0,%f2,%f4
2088 .word 0x84ce8420 !aes_eround23 %f26,%f0,%f2,%f2
2089 .word 0x80cf0404 !aes_eround01 %f28,%f4,%f2,%f0
2090 .word 0x84cf8424 !aes_eround23 %f30,%f4,%f2,%f2
2091 .word 0x88c84400 !aes_eround01 %f32,%f0,%f2,%f4
2092 .word 0x84c8c420 !aes_eround23 %f34,%f0,%f2,%f2
2093 .word 0x80c94404 !aes_eround01 %f36,%f4,%f2,%f0
2094 .word 0x84c9c424 !aes_eround23 %f38,%f4,%f2,%f2
2095 .word 0x88ca4400 !aes_eround01 %f40,%f0,%f2,%f4
2096 .word 0x84cac420 !aes_eround23 %f42,%f0,%f2,%f2
2097 .word 0x80cb4404 !aes_eround01 %f44,%f4,%f2,%f0
2098 .word 0x84cbc424 !aes_eround23 %f46,%f4,%f2,%f2
2099 .word 0x88cc4400 !aes_eround01 %f48,%f0,%f2,%f4
2100 .word 0x84ccc420 !aes_eround23 %f50,%f0,%f2,%f2
2101 .word 0x80cd4404 !aes_eround01 %f52,%f4,%f2,%f0
2102 .word 0x84cdc424 !aes_eround23 %f54,%f4,%f2,%f2
2103 .word 0x88ce4400 !aes_eround01 %f56,%f0,%f2,%f4
2104 .word 0x84cec420 !aes_eround23 %f58,%f0,%f2,%f2
2105 .word 0x80cf4484 !aes_eround01_l %f60,%f4,%f2,%f0
2106 retl
2107 .word 0x84cfc4a4 !aes_eround23_l %f62,%f4,%f2,%f2
2108 .type _aes192_encrypt_1x,#function
2109 .size _aes192_encrypt_1x,.-_aes192_encrypt_1x
2110
2111 .align 32
2112 _aes192_encrypt_2x:
2113 .word 0x90cc0400 !aes_eround01 %f16,%f0,%f2,%f8
2114 .word 0x84cc8420 !aes_eround23 %f18,%f0,%f2,%f2
2115 .word 0x94cc0c04 !aes_eround01 %f16,%f4,%f6,%f10
2116 .word 0x8ccc8c24 !aes_eround23 %f18,%f4,%f6,%f6
2117 .word 0x80cd0408 !aes_eround01 %f20,%f8,%f2,%f0
2118 .word 0x84cd8428 !aes_eround23 %f22,%f8,%f2,%f2
2119 .word 0x88cd0c0a !aes_eround01 %f20,%f10,%f6,%f4
2120 .word 0x8ccd8c2a !aes_eround23 %f22,%f10,%f6,%f6
2121 .word 0x90ce0400 !aes_eround01 %f24,%f0,%f2,%f8
2122 .word 0x84ce8420 !aes_eround23 %f26,%f0,%f2,%f2
2123 .word 0x94ce0c04 !aes_eround01 %f24,%f4,%f6,%f10
2124 .word 0x8cce8c24 !aes_eround23 %f26,%f4,%f6,%f6
2125 .word 0x80cf0408 !aes_eround01 %f28,%f8,%f2,%f0
2126 .word 0x84cf8428 !aes_eround23 %f30,%f8,%f2,%f2
2127 .word 0x88cf0c0a !aes_eround01 %f28,%f10,%f6,%f4
2128 .word 0x8ccf8c2a !aes_eround23 %f30,%f10,%f6,%f6
2129 .word 0x90c84400 !aes_eround01 %f32,%f0,%f2,%f8
2130 .word 0x84c8c420 !aes_eround23 %f34,%f0,%f2,%f2
2131 .word 0x94c84c04 !aes_eround01 %f32,%f4,%f6,%f10
2132 .word 0x8cc8cc24 !aes_eround23 %f34,%f4,%f6,%f6
2133 .word 0x80c94408 !aes_eround01 %f36,%f8,%f2,%f0
2134 .word 0x84c9c428 !aes_eround23 %f38,%f8,%f2,%f2
2135 .word 0x88c94c0a !aes_eround01 %f36,%f10,%f6,%f4
2136 .word 0x8cc9cc2a !aes_eround23 %f38,%f10,%f6,%f6
2137 .word 0x90ca4400 !aes_eround01 %f40,%f0,%f2,%f8
2138 .word 0x84cac420 !aes_eround23 %f42,%f0,%f2,%f2
2139 .word 0x94ca4c04 !aes_eround01 %f40,%f4,%f6,%f10
2140 .word 0x8ccacc24 !aes_eround23 %f42,%f4,%f6,%f6
2141 .word 0x80cb4408 !aes_eround01 %f44,%f8,%f2,%f0
2142 .word 0x84cbc428 !aes_eround23 %f46,%f8,%f2,%f2
2143 .word 0x88cb4c0a !aes_eround01 %f44,%f10,%f6,%f4
2144 .word 0x8ccbcc2a !aes_eround23 %f46,%f10,%f6,%f6
2145 .word 0x90cc4400 !aes_eround01 %f48,%f0,%f2,%f8
2146 .word 0x84ccc420 !aes_eround23 %f50,%f0,%f2,%f2
2147 .word 0x94cc4c04 !aes_eround01 %f48,%f4,%f6,%f10
2148 .word 0x8ccccc24 !aes_eround23 %f50,%f4,%f6,%f6
2149 .word 0x80cd4408 !aes_eround01 %f52,%f8,%f2,%f0
2150 .word 0x84cdc428 !aes_eround23 %f54,%f8,%f2,%f2
2151 .word 0x88cd4c0a !aes_eround01 %f52,%f10,%f6,%f4
2152 .word 0x8ccdcc2a !aes_eround23 %f54,%f10,%f6,%f6
2153 .word 0x90ce4400 !aes_eround01 %f56,%f0,%f2,%f8
2154 .word 0x84cec420 !aes_eround23 %f58,%f0,%f2,%f2
2155 .word 0x94ce4c04 !aes_eround01 %f56,%f4,%f6,%f10
2156 .word 0x8ccecc24 !aes_eround23 %f58,%f4,%f6,%f6
2157 .word 0x80cf4488 !aes_eround01_l %f60,%f8,%f2,%f0
2158 .word 0x84cfc4a8 !aes_eround23_l %f62,%f8,%f2,%f2
2159 .word 0x88cf4c8a !aes_eround01_l %f60,%f10,%f6,%f4
2160 retl
2161 .word 0x8ccfccaa !aes_eround23_l %f62,%f10,%f6,%f6
2162 .type _aes192_encrypt_2x,#function
2163 .size _aes192_encrypt_2x,.-_aes192_encrypt_2x
2164
2165 .align 32
2166 _aes256_encrypt_1x:
2167 .word 0x88cc0400 !aes_eround01 %f16,%f0,%f2,%f4
2168 .word 0x84cc8420 !aes_eround23 %f18,%f0,%f2,%f2
2169 ldd [%i3 + 208], %f16
2170 ldd [%i3 + 216], %f18
2171 .word 0x80cd0404 !aes_eround01 %f20,%f4,%f2,%f0
2172 .word 0x84cd8424 !aes_eround23 %f22,%f4,%f2,%f2
2173 ldd [%i3 + 224], %f20
2174 ldd [%i3 + 232], %f22
2175 .word 0x88ce0400 !aes_eround01 %f24,%f0,%f2,%f4
2176 .word 0x84ce8420 !aes_eround23 %f26,%f0,%f2,%f2
2177 .word 0x80cf0404 !aes_eround01 %f28,%f4,%f2,%f0
2178 .word 0x84cf8424 !aes_eround23 %f30,%f4,%f2,%f2
2179 .word 0x88c84400 !aes_eround01 %f32,%f0,%f2,%f4
2180 .word 0x84c8c420 !aes_eround23 %f34,%f0,%f2,%f2
2181 .word 0x80c94404 !aes_eround01 %f36,%f4,%f2,%f0
2182 .word 0x84c9c424 !aes_eround23 %f38,%f4,%f2,%f2
2183 .word 0x88ca4400 !aes_eround01 %f40,%f0,%f2,%f4
2184 .word 0x84cac420 !aes_eround23 %f42,%f0,%f2,%f2
2185 .word 0x80cb4404 !aes_eround01 %f44,%f4,%f2,%f0
2186 .word 0x84cbc424 !aes_eround23 %f46,%f4,%f2,%f2
2187 .word 0x88cc4400 !aes_eround01 %f48,%f0,%f2,%f4
2188 .word 0x84ccc420 !aes_eround23 %f50,%f0,%f2,%f2
2189 .word 0x80cd4404 !aes_eround01 %f52,%f4,%f2,%f0
2190 .word 0x84cdc424 !aes_eround23 %f54,%f4,%f2,%f2
2191 .word 0x88ce4400 !aes_eround01 %f56,%f0,%f2,%f4
2192 .word 0x84cec420 !aes_eround23 %f58,%f0,%f2,%f2
2193 .word 0x80cf4404 !aes_eround01 %f60,%f4,%f2,%f0
2194 .word 0x84cfc424 !aes_eround23 %f62,%f4,%f2,%f2
2195 .word 0x88cc0400 !aes_eround01 %f16,%f0,%f2,%f4
2196 .word 0x84cc8420 !aes_eround23 %f18,%f0,%f2,%f2
2197 ldd [%i3 + 16], %f16
2198 ldd [%i3 + 24], %f18
2199 .word 0x80cd0484 !aes_eround01_l %f20,%f4,%f2,%f0
2200 .word 0x84cd84a4 !aes_eround23_l %f22,%f4,%f2,%f2
2201 ldd [%i3 + 32], %f20
2202 retl
2203 ldd [%i3 + 40], %f22
2204 .type _aes256_encrypt_1x,#function
2205 .size _aes256_encrypt_1x,.-_aes256_encrypt_1x
2206
2207 .align 32
2208 _aes256_encrypt_2x:
2209 .word 0x90cc0400 !aes_eround01 %f16,%f0,%f2,%f8
2210 .word 0x84cc8420 !aes_eround23 %f18,%f0,%f2,%f2
2211 .word 0x94cc0c04 !aes_eround01 %f16,%f4,%f6,%f10
2212 .word 0x8ccc8c24 !aes_eround23 %f18,%f4,%f6,%f6
2213 ldd [%i3 + 208], %f16
2214 ldd [%i3 + 216], %f18
2215 .word 0x80cd0408 !aes_eround01 %f20,%f8,%f2,%f0
2216 .word 0x84cd8428 !aes_eround23 %f22,%f8,%f2,%f2
2217 .word 0x88cd0c0a !aes_eround01 %f20,%f10,%f6,%f4
2218 .word 0x8ccd8c2a !aes_eround23 %f22,%f10,%f6,%f6
2219 ldd [%i3 + 224], %f20
2220 ldd [%i3 + 232], %f22
2221 .word 0x90ce0400 !aes_eround01 %f24,%f0,%f2,%f8
2222 .word 0x84ce8420 !aes_eround23 %f26,%f0,%f2,%f2
2223 .word 0x94ce0c04 !aes_eround01 %f24,%f4,%f6,%f10
2224 .word 0x8cce8c24 !aes_eround23 %f26,%f4,%f6,%f6
2225 .word 0x80cf0408 !aes_eround01 %f28,%f8,%f2,%f0
2226 .word 0x84cf8428 !aes_eround23 %f30,%f8,%f2,%f2
2227 .word 0x88cf0c0a !aes_eround01 %f28,%f10,%f6,%f4
2228 .word 0x8ccf8c2a !aes_eround23 %f30,%f10,%f6,%f6
2229 .word 0x90c84400 !aes_eround01 %f32,%f0,%f2,%f8
2230 .word 0x84c8c420 !aes_eround23 %f34,%f0,%f2,%f2
2231 .word 0x94c84c04 !aes_eround01 %f32,%f4,%f6,%f10
2232 .word 0x8cc8cc24 !aes_eround23 %f34,%f4,%f6,%f6
2233 .word 0x80c94408 !aes_eround01 %f36,%f8,%f2,%f0
2234 .word 0x84c9c428 !aes_eround23 %f38,%f8,%f2,%f2
2235 .word 0x88c94c0a !aes_eround01 %f36,%f10,%f6,%f4
2236 .word 0x8cc9cc2a !aes_eround23 %f38,%f10,%f6,%f6
2237 .word 0x90ca4400 !aes_eround01 %f40,%f0,%f2,%f8
2238 .word 0x84cac420 !aes_eround23 %f42,%f0,%f2,%f2
2239 .word 0x94ca4c04 !aes_eround01 %f40,%f4,%f6,%f10
2240 .word 0x8ccacc24 !aes_eround23 %f42,%f4,%f6,%f6
2241 .word 0x80cb4408 !aes_eround01 %f44,%f8,%f2,%f0
2242 .word 0x84cbc428 !aes_eround23 %f46,%f8,%f2,%f2
2243 .word 0x88cb4c0a !aes_eround01 %f44,%f10,%f6,%f4
2244 .word 0x8ccbcc2a !aes_eround23 %f46,%f10,%f6,%f6
2245 .word 0x90cc4400 !aes_eround01 %f48,%f0,%f2,%f8
2246 .word 0x84ccc420 !aes_eround23 %f50,%f0,%f2,%f2
2247 .word 0x94cc4c04 !aes_eround01 %f48,%f4,%f6,%f10
2248 .word 0x8ccccc24 !aes_eround23 %f50,%f4,%f6,%f6
2249 .word 0x80cd4408 !aes_eround01 %f52,%f8,%f2,%f0
2250 .word 0x84cdc428 !aes_eround23 %f54,%f8,%f2,%f2
2251 .word 0x88cd4c0a !aes_eround01 %f52,%f10,%f6,%f4
2252 .word 0x8ccdcc2a !aes_eround23 %f54,%f10,%f6,%f6
2253 .word 0x90ce4400 !aes_eround01 %f56,%f0,%f2,%f8
2254 .word 0x84cec420 !aes_eround23 %f58,%f0,%f2,%f2
2255 .word 0x94ce4c04 !aes_eround01 %f56,%f4,%f6,%f10
2256 .word 0x8ccecc24 !aes_eround23 %f58,%f4,%f6,%f6
2257 .word 0x80cf4408 !aes_eround01 %f60,%f8,%f2,%f0
2258 .word 0x84cfc428 !aes_eround23 %f62,%f8,%f2,%f2
2259 .word 0x88cf4c0a !aes_eround01 %f60,%f10,%f6,%f4
2260 .word 0x8ccfcc2a !aes_eround23 %f62,%f10,%f6,%f6
2261 .word 0x90cc0400 !aes_eround01 %f16,%f0,%f2,%f8
2262 .word 0x84cc8420 !aes_eround23 %f18,%f0,%f2,%f2
2263 .word 0x94cc0c04 !aes_eround01 %f16,%f4,%f6,%f10
2264 .word 0x8ccc8c24 !aes_eround23 %f18,%f4,%f6,%f6
2265 ldd [%i3 + 16], %f16
2266 ldd [%i3 + 24], %f18
2267 .word 0x80cd0488 !aes_eround01_l %f20,%f8,%f2,%f0
2268 .word 0x84cd84a8 !aes_eround23_l %f22,%f8,%f2,%f2
2269 .word 0x88cd0c8a !aes_eround01_l %f20,%f10,%f6,%f4
2270 .word 0x8ccd8caa !aes_eround23_l %f22,%f10,%f6,%f6
2271 ldd [%i3 + 32], %f20
2272 retl
2273 ldd [%i3 + 40], %f22
2274 .type _aes256_encrypt_2x,#function
2275 .size _aes256_encrypt_2x,.-_aes256_encrypt_2x
2276
2277 .align 32
2278 _aes192_loadkey:
2279 ldx [%i3 + 0], %g4
2280 ldx [%i3 + 8], %g5
2281 ldd [%i3 + 16], %f16
2282 ldd [%i3 + 24], %f18
2283 ldd [%i3 + 32], %f20
2284 ldd [%i3 + 40], %f22
2285 ldd [%i3 + 48], %f24
2286 ldd [%i3 + 56], %f26
2287 ldd [%i3 + 64], %f28
2288 ldd [%i3 + 72], %f30
2289 ldd [%i3 + 80], %f32
2290 ldd [%i3 + 88], %f34
2291 ldd [%i3 + 96], %f36
2292 ldd [%i3 + 104], %f38
2293 ldd [%i3 + 112], %f40
2294 ldd [%i3 + 120], %f42
2295 ldd [%i3 + 128], %f44
2296 ldd [%i3 + 136], %f46
2297 ldd [%i3 + 144], %f48
2298 ldd [%i3 + 152], %f50
2299 ldd [%i3 + 160], %f52
2300 ldd [%i3 + 168], %f54
2301 ldd [%i3 + 176], %f56
2302 ldd [%i3 + 184], %f58
2303 ldd [%i3 + 192], %f60
2304 ldd [%i3 + 200], %f62
2305 retl
2306 nop
2307 .type _aes192_loadkey,#function
2308 .size _aes192_loadkey,.-_aes192_loadkey
2309 _aes256_loadkey=_aes192_loadkey
2310 _aes192_load_enckey=_aes192_loadkey
2311 _aes192_load_deckey=_aes192_loadkey
2312 _aes256_load_enckey=_aes192_loadkey
2313 _aes256_load_deckey=_aes192_loadkey
2314 .globl aes256_t4_cbc_encrypt
2315 .align 32
2316 aes256_t4_cbc_encrypt:
2317 save %sp, -STACK_FRAME, %sp
2318 cmp %i2, 0
2319 be,pn SIZE_T_CC, .L256_cbc_enc_abort
2320 srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
2321 sub %i0, %i1, %l5 ! %i0!=%i1
2322 ld [%i4 + 0], %f0
2323 ld [%i4 + 4], %f1
2324 ld [%i4 + 8], %f2
2325 ld [%i4 + 12], %f3
2326 prefetch [%i0], 20
2327 prefetch [%i0 + 63], 20
2328 call _aes256_load_enckey
2329 and %i0, 7, %l0
2330 andn %i0, 7, %i0
2331 sll %l0, 3, %l0
2332 mov 64, %l1
2333 mov 0xff, %l3
2334 sub %l1, %l0, %l1
2335 and %i1, 7, %l2
2336 cmp %i2, 127
2337 movrnz %l2, 0, %l5 ! if ( %i1&7 ||
2338 movleu SIZE_T_CC, 0, %l5 ! %i2<128 ||
2339 brnz,pn %l5, .L256cbc_enc_blk ! %i0==%i1)
2340 srl %l3, %l2, %l3
2341
2342 .word 0xb3b64340 !alignaddrl %i1,%g0,%i1
2343 srlx %i2, 4, %i2
2344 prefetch [%i1], 22
2345
2346 .L256_cbc_enc_loop:
2347 ldx [%i0 + 0], %o0
2348 brz,pt %l0, 4f
2349 ldx [%i0 + 8], %o1
2350
2351 ldx [%i0 + 16], %o2
2352 sllx %o0, %l0, %o0
2353 srlx %o1, %l1, %g1
2354 sllx %o1, %l0, %o1
2355 or %g1, %o0, %o0
2356 srlx %o2, %l1, %o2
2357 or %o2, %o1, %o1
2358 4:
2359 xor %g4, %o0, %o0 ! ^= rk[0]
2360 xor %g5, %o1, %o1
2361 .word 0x99b02308 !movxtod %o0,%f12
2362 .word 0x9db02309 !movxtod %o1,%f14
2363
2364 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= ivec
2365 .word 0x85b38d82 !fxor %f14,%f2,%f2
2366 prefetch [%i1 + 63], 22
2367 prefetch [%i0 + 16+63], 20
2368 call _aes256_encrypt_1x
2369 add %i0, 16, %i0
2370
2371 brnz,pn %l2, 2f
2372 sub %i2, 1, %i2
2373
2374 std %f0, [%i1 + 0]
2375 std %f2, [%i1 + 8]
2376 brnz,pt %i2, .L256_cbc_enc_loop
2377 add %i1, 16, %i1
2378 st %f0, [%i4 + 0]
2379 st %f1, [%i4 + 4]
2380 st %f2, [%i4 + 8]
2381 st %f3, [%i4 + 12]
2382 .L256_cbc_enc_abort:
2383 ret
2384 restore
2385
2386 .align 16
2387 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
2388 ! and ~3x deterioration
2389 ! in inp==out case
2390 .word 0x89b00900 !faligndata %f0,%f0,%f4 ! handle unaligned output
2391 .word 0x8db00902 !faligndata %f0,%f2,%f6
2392 .word 0x91b08902 !faligndata %f2,%f2,%f8
2393
2394 stda %f4, [%i1 + %l3]0xc0 ! partial store
2395 std %f6, [%i1 + 8]
2396 add %i1, 16, %i1
2397 orn %g0, %l3, %l3
2398 stda %f8, [%i1 + %l3]0xc0 ! partial store
2399
2400 brnz,pt %i2, .L256_cbc_enc_loop+4
2401 orn %g0, %l3, %l3
2402 st %f0, [%i4 + 0]
2403 st %f1, [%i4 + 4]
2404 st %f2, [%i4 + 8]
2405 st %f3, [%i4 + 12]
2406 ret
2407 restore
2408
2409 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2410 .align 32
2411 .L256cbc_enc_blk:
2412 add %i1, %i2, %l5
2413 and %l5, 63, %l5 ! tail
2414 sub %i2, %l5, %i2
2415 add %l5, 15, %l5 ! round up to 16n
2416 srlx %i2, 4, %i2
2417 srl %l5, 4, %l5
2418
2419 .L256_cbc_enc_blk_loop:
2420 ldx [%i0 + 0], %o0
2421 brz,pt %l0, 5f
2422 ldx [%i0 + 8], %o1
2423
2424 ldx [%i0 + 16], %o2
2425 sllx %o0, %l0, %o0
2426 srlx %o1, %l1, %g1
2427 sllx %o1, %l0, %o1
2428 or %g1, %o0, %o0
2429 srlx %o2, %l1, %o2
2430 or %o2, %o1, %o1
2431 5:
2432 xor %g4, %o0, %o0 ! ^= rk[0]
2433 xor %g5, %o1, %o1
2434 .word 0x99b02308 !movxtod %o0,%f12
2435 .word 0x9db02309 !movxtod %o1,%f14
2436
2437 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= ivec
2438 .word 0x85b38d82 !fxor %f14,%f2,%f2
2439 prefetch [%i0 + 16+63], 20
2440 call _aes256_encrypt_1x
2441 add %i0, 16, %i0
2442 sub %i2, 1, %i2
2443
2444 stda %f0, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
2445 add %i1, 8, %i1
2446 stda %f2, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
2447 brnz,pt %i2, .L256_cbc_enc_blk_loop
2448 add %i1, 8, %i1
2449
2450 membar #StoreLoad|#StoreStore
2451 brnz,pt %l5, .L256_cbc_enc_loop
2452 mov %l5, %i2
2453 st %f0, [%i4 + 0]
2454 st %f1, [%i4 + 4]
2455 st %f2, [%i4 + 8]
2456 st %f3, [%i4 + 12]
2457 ret
2458 restore
2459 .type aes256_t4_cbc_encrypt,#function
2460 .size aes256_t4_cbc_encrypt,.-aes256_t4_cbc_encrypt
2461 .globl aes192_t4_cbc_encrypt
2462 .align 32
2463 aes192_t4_cbc_encrypt:
2464 save %sp, -STACK_FRAME, %sp
2465 cmp %i2, 0
2466 be,pn SIZE_T_CC, .L192_cbc_enc_abort
2467 srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
2468 sub %i0, %i1, %l5 ! %i0!=%i1
2469 ld [%i4 + 0], %f0
2470 ld [%i4 + 4], %f1
2471 ld [%i4 + 8], %f2
2472 ld [%i4 + 12], %f3
2473 prefetch [%i0], 20
2474 prefetch [%i0 + 63], 20
2475 call _aes192_load_enckey
2476 and %i0, 7, %l0
2477 andn %i0, 7, %i0
2478 sll %l0, 3, %l0
2479 mov 64, %l1
2480 mov 0xff, %l3
2481 sub %l1, %l0, %l1
2482 and %i1, 7, %l2
2483 cmp %i2, 127
2484 movrnz %l2, 0, %l5 ! if ( %i1&7 ||
2485 movleu SIZE_T_CC, 0, %l5 ! %i2<128 ||
2486 brnz,pn %l5, .L192cbc_enc_blk ! %i0==%i1)
2487 srl %l3, %l2, %l3
2488
2489 .word 0xb3b64340 !alignaddrl %i1,%g0,%i1
2490 srlx %i2, 4, %i2
2491 prefetch [%i1], 22
2492
2493 .L192_cbc_enc_loop:
2494 ldx [%i0 + 0], %o0
2495 brz,pt %l0, 4f
2496 ldx [%i0 + 8], %o1
2497
2498 ldx [%i0 + 16], %o2
2499 sllx %o0, %l0, %o0
2500 srlx %o1, %l1, %g1
2501 sllx %o1, %l0, %o1
2502 or %g1, %o0, %o0
2503 srlx %o2, %l1, %o2
2504 or %o2, %o1, %o1
2505 4:
2506 xor %g4, %o0, %o0 ! ^= rk[0]
2507 xor %g5, %o1, %o1
2508 .word 0x99b02308 !movxtod %o0,%f12
2509 .word 0x9db02309 !movxtod %o1,%f14
2510
2511 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= ivec
2512 .word 0x85b38d82 !fxor %f14,%f2,%f2
2513 prefetch [%i1 + 63], 22
2514 prefetch [%i0 + 16+63], 20
2515 call _aes192_encrypt_1x
2516 add %i0, 16, %i0
2517
2518 brnz,pn %l2, 2f
2519 sub %i2, 1, %i2
2520
2521 std %f0, [%i1 + 0]
2522 std %f2, [%i1 + 8]
2523 brnz,pt %i2, .L192_cbc_enc_loop
2524 add %i1, 16, %i1
2525 st %f0, [%i4 + 0]
2526 st %f1, [%i4 + 4]
2527 st %f2, [%i4 + 8]
2528 st %f3, [%i4 + 12]
2529 .L192_cbc_enc_abort:
2530 ret
2531 restore
2532
2533 .align 16
2534 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
2535 ! and ~3x deterioration
2536 ! in inp==out case
2537 .word 0x89b00900 !faligndata %f0,%f0,%f4 ! handle unaligned output
2538 .word 0x8db00902 !faligndata %f0,%f2,%f6
2539 .word 0x91b08902 !faligndata %f2,%f2,%f8
2540
2541 stda %f4, [%i1 + %l3]0xc0 ! partial store
2542 std %f6, [%i1 + 8]
2543 add %i1, 16, %i1
2544 orn %g0, %l3, %l3
2545 stda %f8, [%i1 + %l3]0xc0 ! partial store
2546
2547 brnz,pt %i2, .L192_cbc_enc_loop+4
2548 orn %g0, %l3, %l3
2549 st %f0, [%i4 + 0]
2550 st %f1, [%i4 + 4]
2551 st %f2, [%i4 + 8]
2552 st %f3, [%i4 + 12]
2553 ret
2554 restore
2555
2556 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2557 .align 32
2558 .L192cbc_enc_blk:
2559 add %i1, %i2, %l5
2560 and %l5, 63, %l5 ! tail
2561 sub %i2, %l5, %i2
2562 add %l5, 15, %l5 ! round up to 16n
2563 srlx %i2, 4, %i2
2564 srl %l5, 4, %l5
2565
2566 .L192_cbc_enc_blk_loop:
2567 ldx [%i0 + 0], %o0
2568 brz,pt %l0, 5f
2569 ldx [%i0 + 8], %o1
2570
2571 ldx [%i0 + 16], %o2
2572 sllx %o0, %l0, %o0
2573 srlx %o1, %l1, %g1
2574 sllx %o1, %l0, %o1
2575 or %g1, %o0, %o0
2576 srlx %o2, %l1, %o2
2577 or %o2, %o1, %o1
2578 5:
2579 xor %g4, %o0, %o0 ! ^= rk[0]
2580 xor %g5, %o1, %o1
2581 .word 0x99b02308 !movxtod %o0,%f12
2582 .word 0x9db02309 !movxtod %o1,%f14
2583
2584 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= ivec
2585 .word 0x85b38d82 !fxor %f14,%f2,%f2
2586 prefetch [%i0 + 16+63], 20
2587 call _aes192_encrypt_1x
2588 add %i0, 16, %i0
2589 sub %i2, 1, %i2
2590
2591 stda %f0, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
2592 add %i1, 8, %i1
2593 stda %f2, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
2594 brnz,pt %i2, .L192_cbc_enc_blk_loop
2595 add %i1, 8, %i1
2596
2597 membar #StoreLoad|#StoreStore
2598 brnz,pt %l5, .L192_cbc_enc_loop
2599 mov %l5, %i2
2600 st %f0, [%i4 + 0]
2601 st %f1, [%i4 + 4]
2602 st %f2, [%i4 + 8]
2603 st %f3, [%i4 + 12]
2604 ret
2605 restore
2606 .type aes192_t4_cbc_encrypt,#function
2607 .size aes192_t4_cbc_encrypt,.-aes192_t4_cbc_encrypt
2608 .globl aes256_t4_ctr32_encrypt
2609 .align 32
2610 aes256_t4_ctr32_encrypt:
2611 save %sp, -STACK_FRAME, %sp
2612 srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
2613
2614 prefetch [%i0], 20
2615 prefetch [%i0 + 63], 20
2616 call _aes256_load_enckey
2617 sllx %i2, 4, %i2
2618
2619 ld [%i4 + 0], %l4 ! counter
2620 ld [%i4 + 4], %l5
2621 ld [%i4 + 8], %l6
2622 ld [%i4 + 12], %l7
2623
2624 sllx %l4, 32, %o5
2625 or %l5, %o5, %o5
2626 sllx %l6, 32, %g1
2627 xor %o5, %g4, %g4 ! ^= rk[0]
2628 xor %g1, %g5, %g5
2629 .word 0x9db02304 !movxtod %g4,%f14 ! most significant 64 bits
2630
2631 sub %i0, %i1, %l5 ! %i0!=%i1
2632 and %i0, 7, %l0
2633 andn %i0, 7, %i0
2634 sll %l0, 3, %l0
2635 mov 64, %l1
2636 mov 0xff, %l3
2637 sub %l1, %l0, %l1
2638 and %i1, 7, %l2
2639 cmp %i2, 255
2640 movrnz %l2, 0, %l5 ! if ( %i1&7 ||
2641 movleu SIZE_T_CC, 0, %l5 ! %i2<256 ||
2642 brnz,pn %l5, .L256_ctr32_blk ! %i0==%i1)
2643 srl %l3, %l2, %l3
2644
2645 andcc %i2, 16, %g0 ! is number of blocks even?
2646 .word 0xb3b64340 !alignaddrl %i1,%g0,%i1
2647 bz %icc, .L256_ctr32_loop2x
2648 srlx %i2, 4, %i2
2649 .L256_ctr32_loop:
2650 ldx [%i0 + 0], %o0
2651 brz,pt %l0, 4f
2652 ldx [%i0 + 8], %o1
2653
2654 ldx [%i0 + 16], %o2
2655 sllx %o0, %l0, %o0
2656 srlx %o1, %l1, %g1
2657 sllx %o1, %l0, %o1
2658 or %g1, %o0, %o0
2659 srlx %o2, %l1, %o2
2660 or %o2, %o1, %o1
2661 4:
2662 xor %g5, %l7, %g1 ! ^= rk[0]
2663 add %l7, 1, %l7
2664 .word 0x85b02301 !movxtod %g1,%f2
2665 srl %l7, 0, %l7 ! clruw
2666 prefetch [%i1 + 63], 22
2667 prefetch [%i0 + 16+63], 20
2668 .word 0x88cc040e !aes_eround01 %f16,%f14,%f2,%f4
2669 .word 0x84cc842e !aes_eround23 %f18,%f14,%f2,%f2
2670 call _aes256_encrypt_1x+8
2671 add %i0, 16, %i0
2672
2673 .word 0x95b02308 !movxtod %o0,%f10
2674 .word 0x99b02309 !movxtod %o1,%f12
2675 .word 0x81b28d80 !fxor %f10,%f0,%f0 ! ^= inp
2676 .word 0x85b30d82 !fxor %f12,%f2,%f2
2677
2678 brnz,pn %l2, 2f
2679 sub %i2, 1, %i2
2680
2681 std %f0, [%i1 + 0]
2682 std %f2, [%i1 + 8]
2683 brnz,pt %i2, .L256_ctr32_loop2x
2684 add %i1, 16, %i1
2685
2686 ret
2687 restore
2688
2689 .align 16
2690 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
2691 ! and ~3x deterioration
2692 ! in inp==out case
2693 .word 0x89b00900 !faligndata %f0,%f0,%f4 ! handle unaligned output
2694 .word 0x8db00902 !faligndata %f0,%f2,%f6
2695 .word 0x91b08902 !faligndata %f2,%f2,%f8
2696 stda %f4, [%i1 + %l3]0xc0 ! partial store
2697 std %f6, [%i1 + 8]
2698 add %i1, 16, %i1
2699 orn %g0, %l3, %l3
2700 stda %f8, [%i1 + %l3]0xc0 ! partial store
2701
2702 brnz,pt %i2, .L256_ctr32_loop2x+4
2703 orn %g0, %l3, %l3
2704
2705 ret
2706 restore
2707
2708 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2709 .align 32
2710 .L256_ctr32_loop2x:
2711 ldx [%i0 + 0], %o0
2712 ldx [%i0 + 8], %o1
2713 ldx [%i0 + 16], %o2
2714 brz,pt %l0, 4f
2715 ldx [%i0 + 24], %o3
2716
2717 ldx [%i0 + 32], %o4
2718 sllx %o0, %l0, %o0
2719 srlx %o1, %l1, %g1
2720 or %g1, %o0, %o0
2721 sllx %o1, %l0, %o1
2722 srlx %o2, %l1, %g1
2723 or %g1, %o1, %o1
2724 sllx %o2, %l0, %o2
2725 srlx %o3, %l1, %g1
2726 or %g1, %o2, %o2
2727 sllx %o3, %l0, %o3
2728 srlx %o4, %l1, %o4
2729 or %o4, %o3, %o3
2730 4:
2731 xor %g5, %l7, %g1 ! ^= rk[0]
2732 add %l7, 1, %l7
2733 .word 0x85b02301 !movxtod %g1,%f2
2734 srl %l7, 0, %l7 ! clruw
2735 xor %g5, %l7, %g1
2736 add %l7, 1, %l7
2737 .word 0x8db02301 !movxtod %g1,%f6
2738 srl %l7, 0, %l7 ! clruw
2739 prefetch [%i1 + 63], 22
2740 prefetch [%i0 + 32+63], 20
2741 .word 0x90cc040e !aes_eround01 %f16,%f14,%f2,%f8
2742 .word 0x84cc842e !aes_eround23 %f18,%f14,%f2,%f2
2743 .word 0x94cc0c0e !aes_eround01 %f16,%f14,%f6,%f10
2744 .word 0x8ccc8c2e !aes_eround23 %f18,%f14,%f6,%f6
2745 call _aes256_encrypt_2x+16
2746 add %i0, 32, %i0
2747
2748 .word 0x91b02308 !movxtod %o0,%f8
2749 .word 0x95b02309 !movxtod %o1,%f10
2750 .word 0x99b0230a !movxtod %o2,%f12
2751 .word 0x81b20d80 !fxor %f8,%f0,%f0 ! ^= inp
2752 .word 0x91b0230b !movxtod %o3,%f8
2753 .word 0x85b28d82 !fxor %f10,%f2,%f2
2754 .word 0x89b30d84 !fxor %f12,%f4,%f4
2755 .word 0x8db20d86 !fxor %f8,%f6,%f6
2756
2757 brnz,pn %l2, 2f
2758 sub %i2, 2, %i2
2759
2760 std %f0, [%i1 + 0]
2761 std %f2, [%i1 + 8]
2762 std %f4, [%i1 + 16]
2763 std %f6, [%i1 + 24]
2764 brnz,pt %i2, .L256_ctr32_loop2x
2765 add %i1, 32, %i1
2766
2767 ret
2768 restore
2769
2770 .align 16
2771 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
2772 ! and ~3x deterioration
2773 ! in inp==out case
2774 .word 0x91b00900 !faligndata %f0,%f0,%f8 ! handle unaligned output
2775 .word 0x81b00902 !faligndata %f0,%f2,%f0
2776 .word 0x85b08904 !faligndata %f2,%f4,%f2
2777 .word 0x89b10906 !faligndata %f4,%f6,%f4
2778 .word 0x8db18906 !faligndata %f6,%f6,%f6
2779
2780 stda %f8, [%i1 + %l3]0xc0 ! partial store
2781 std %f0, [%i1 + 8]
2782 std %f2, [%i1 + 16]
2783 std %f4, [%i1 + 24]
2784 add %i1, 32, %i1
2785 orn %g0, %l3, %l3
2786 stda %f6, [%i1 + %l3]0xc0 ! partial store
2787
2788 brnz,pt %i2, .L256_ctr32_loop2x+4
2789 orn %g0, %l3, %l3
2790
2791 ret
2792 restore
2793
2794 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2795 .align 32
2796 .L256_ctr32_blk:
2797 add %i1, %i2, %l5
2798 and %l5, 63, %l5 ! tail
2799 sub %i2, %l5, %i2
2800 add %l5, 15, %l5 ! round up to 16n
2801 srlx %i2, 4, %i2
2802 srl %l5, 4, %l5
2803 sub %i2, 1, %i2
2804 add %l5, 1, %l5
2805
2806 .L256_ctr32_blk_loop2x:
2807 ldx [%i0 + 0], %o0
2808 ldx [%i0 + 8], %o1
2809 ldx [%i0 + 16], %o2
2810 brz,pt %l0, 5f
2811 ldx [%i0 + 24], %o3
2812
2813 ldx [%i0 + 32], %o4
2814 sllx %o0, %l0, %o0
2815 srlx %o1, %l1, %g1
2816 or %g1, %o0, %o0
2817 sllx %o1, %l0, %o1
2818 srlx %o2, %l1, %g1
2819 or %g1, %o1, %o1
2820 sllx %o2, %l0, %o2
2821 srlx %o3, %l1, %g1
2822 or %g1, %o2, %o2
2823 sllx %o3, %l0, %o3
2824 srlx %o4, %l1, %o4
2825 or %o4, %o3, %o3
2826 5:
2827 xor %g5, %l7, %g1 ! ^= rk[0]
2828 add %l7, 1, %l7
2829 .word 0x85b02301 !movxtod %g1,%f2
2830 srl %l7, 0, %l7 ! clruw
2831 xor %g5, %l7, %g1
2832 add %l7, 1, %l7
2833 .word 0x8db02301 !movxtod %g1,%f6
2834 srl %l7, 0, %l7 ! clruw
2835 prefetch [%i0 + 32+63], 20
2836 .word 0x90cc040e !aes_eround01 %f16,%f14,%f2,%f8
2837 .word 0x84cc842e !aes_eround23 %f18,%f14,%f2,%f2
2838 .word 0x94cc0c0e !aes_eround01 %f16,%f14,%f6,%f10
2839 .word 0x8ccc8c2e !aes_eround23 %f18,%f14,%f6,%f6
2840 call _aes256_encrypt_2x+16
2841 add %i0, 32, %i0
2842 subcc %i2, 2, %i2
2843
2844 .word 0x91b02308 !movxtod %o0,%f8
2845 .word 0x95b02309 !movxtod %o1,%f10
2846 .word 0x99b0230a !movxtod %o2,%f12
2847 .word 0x81b20d80 !fxor %f8,%f0,%f0 ! ^= inp
2848 .word 0x91b0230b !movxtod %o3,%f8
2849 .word 0x85b28d82 !fxor %f10,%f2,%f2
2850 .word 0x89b30d84 !fxor %f12,%f4,%f4
2851 .word 0x8db20d86 !fxor %f8,%f6,%f6
2852
2853 stda %f0, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
2854 add %i1, 8, %i1
2855 stda %f2, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
2856 add %i1, 8, %i1
2857 stda %f4, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
2858 add %i1, 8, %i1
2859 stda %f6, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
2860 bgu,pt SIZE_T_CC, .L256_ctr32_blk_loop2x
2861 add %i1, 8, %i1
2862
2863 add %l5, %i2, %i2
2864 andcc %i2, 1, %g0 ! is number of blocks even?
2865 membar #StoreLoad|#StoreStore
2866 bnz,pt %icc, .L256_ctr32_loop
2867 srl %i2, 0, %i2
2868 brnz,pn %i2, .L256_ctr32_loop2x
2869 nop
2870
2871 ret
2872 restore
2873 .type aes256_t4_ctr32_encrypt,#function
2874 .size aes256_t4_ctr32_encrypt,.-aes256_t4_ctr32_encrypt
2875 .globl aes256_t4_xts_encrypt
2876 .align 32
2877 aes256_t4_xts_encrypt:
2878 save %sp, -STACK_FRAME-16, %sp
2879 srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
2880
2881 mov %i5, %o0
2882 add %fp, STACK_BIAS-16, %o1
2883 call aes_t4_encrypt
2884 mov %i4, %o2
2885
2886 add %fp, STACK_BIAS-16, %l7
2887 ldxa [%l7]0x88, %g2
2888 add %fp, STACK_BIAS-8, %l7
2889 ldxa [%l7]0x88, %g3 ! %g3:%g2 is tweak
2890
2891 sethi %hi(0x76543210), %l7
2892 or %l7, %lo(0x76543210), %l7
2893 .word 0x81b5c320 !bmask %l7,%g0,%g0 ! byte swap mask
2894
2895 prefetch [%i0], 20
2896 prefetch [%i0 + 63], 20
2897 call _aes256_load_enckey
2898 and %i2, 15, %i5
2899 and %i2, -16, %i2
2900
2901 sub %i0, %i1, %l5 ! %i0!=%i1
2902 and %i0, 7, %l0
2903 andn %i0, 7, %i0
2904 sll %l0, 3, %l0
2905 mov 64, %l1
2906 mov 0xff, %l3
2907 sub %l1, %l0, %l1
2908 and %i1, 7, %l2
2909 cmp %i2, 255
2910 movrnz %l2, 0, %l5 ! if ( %i1&7 ||
2911 movleu SIZE_T_CC, 0, %l5 ! %i2<256 ||
2912 brnz,pn %l5, .L256_xts_enblk ! %i0==%i1)
2913 srl %l3, %l2, %l3
2914
2915 andcc %i2, 16, %g0 ! is number of blocks even?
2916 .word 0xb3b64340 !alignaddrl %i1,%g0,%i1
2917 bz %icc, .L256_xts_enloop2x
2918 srlx %i2, 4, %i2
2919 .L256_xts_enloop:
2920 ldx [%i0 + 0], %o0
2921 brz,pt %l0, 4f
2922 ldx [%i0 + 8], %o1
2923
2924 ldx [%i0 + 16], %o2
2925 sllx %o0, %l0, %o0
2926 srlx %o1, %l1, %g1
2927 sllx %o1, %l0, %o1
2928 or %g1, %o0, %o0
2929 srlx %o2, %l1, %o2
2930 or %o2, %o1, %o1
2931 4:
2932 .word 0x99b02302 !movxtod %g2,%f12
2933 .word 0x9db02303 !movxtod %g3,%f14
2934 .word 0x99b3098c !bshuffle %f12,%f12,%f12
2935 .word 0x9db3898e !bshuffle %f14,%f14,%f14
2936
2937 xor %g4, %o0, %o0 ! ^= rk[0]
2938 xor %g5, %o1, %o1
2939 .word 0x81b02308 !movxtod %o0,%f0
2940 .word 0x85b02309 !movxtod %o1,%f2
2941
2942 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
2943 .word 0x85b38d82 !fxor %f14,%f2,%f2
2944
2945 prefetch [%i1 + 63], 22
2946 prefetch [%i0 + 16+63], 20
2947 call _aes256_encrypt_1x
2948 add %i0, 16, %i0
2949
2950 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
2951 .word 0x85b38d82 !fxor %f14,%f2,%f2
2952
2953 srax %g3, 63, %l7 ! next tweak value
2954 addcc %g2, %g2, %g2
2955 and %l7, 0x87, %l7
2956 .word 0x87b0c223 !addxc %g3,%g3,%g3
2957 xor %l7, %g2, %g2
2958
2959 brnz,pn %l2, 2f
2960 sub %i2, 1, %i2
2961
2962 std %f0, [%i1 + 0]
2963 std %f2, [%i1 + 8]
2964 brnz,pt %i2, .L256_xts_enloop2x
2965 add %i1, 16, %i1
2966
2967 brnz,pn %i5, .L256_xts_ensteal
2968 nop
2969
2970 ret
2971 restore
2972
2973 .align 16
2974 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
2975 ! and ~3x deterioration
2976 ! in inp==out case
2977 .word 0x89b00900 !faligndata %f0,%f0,%f4 ! handle unaligned output
2978 .word 0x8db00902 !faligndata %f0,%f2,%f6
2979 .word 0x91b08902 !faligndata %f2,%f2,%f8
2980 stda %f4, [%i1 + %l3]0xc0 ! partial store
2981 std %f6, [%i1 + 8]
2982 add %i1, 16, %i1
2983 orn %g0, %l3, %l3
2984 stda %f8, [%i1 + %l3]0xc0 ! partial store
2985
2986 brnz,pt %i2, .L256_xts_enloop2x+4
2987 orn %g0, %l3, %l3
2988
2989 brnz,pn %i5, .L256_xts_ensteal
2990 nop
2991
2992 ret
2993 restore
2994
2995 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2996 .align 32
2997 .L256_xts_enloop2x:
2998 ldx [%i0 + 0], %o0
2999 ldx [%i0 + 8], %o1
3000 ldx [%i0 + 16], %o2
3001 brz,pt %l0, 4f
3002 ldx [%i0 + 24], %o3
3003
3004 ldx [%i0 + 32], %o4
3005 sllx %o0, %l0, %o0
3006 srlx %o1, %l1, %g1
3007 or %g1, %o0, %o0
3008 sllx %o1, %l0, %o1
3009 srlx %o2, %l1, %g1
3010 or %g1, %o1, %o1
3011 sllx %o2, %l0, %o2
3012 srlx %o3, %l1, %g1
3013 or %g1, %o2, %o2
3014 sllx %o3, %l0, %o3
3015 srlx %o4, %l1, %o4
3016 or %o4, %o3, %o3
3017 4:
3018 .word 0x99b02302 !movxtod %g2,%f12
3019 .word 0x9db02303 !movxtod %g3,%f14
3020 .word 0x99b3098c !bshuffle %f12,%f12,%f12
3021 .word 0x9db3898e !bshuffle %f14,%f14,%f14
3022
3023 srax %g3, 63, %l7 ! next tweak value
3024 addcc %g2, %g2, %g2
3025 and %l7, 0x87, %l7
3026 .word 0x87b0c223 !addxc %g3,%g3,%g3
3027 xor %l7, %g2, %g2
3028
3029 .word 0x91b02302 !movxtod %g2,%f8
3030 .word 0x95b02303 !movxtod %g3,%f10
3031 .word 0x91b20988 !bshuffle %f8,%f8,%f8
3032 .word 0x95b2898a !bshuffle %f10,%f10,%f10
3033
3034 xor %g4, %o0, %o0 ! ^= rk[0]
3035 xor %g5, %o1, %o1
3036 xor %g4, %o2, %o2 ! ^= rk[0]
3037 xor %g5, %o3, %o3
3038 .word 0x81b02308 !movxtod %o0,%f0
3039 .word 0x85b02309 !movxtod %o1,%f2
3040 .word 0x89b0230a !movxtod %o2,%f4
3041 .word 0x8db0230b !movxtod %o3,%f6
3042
3043 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
3044 .word 0x85b38d82 !fxor %f14,%f2,%f2
3045 .word 0x89b20d84 !fxor %f8,%f4,%f4 ! ^= tweak[0]
3046 .word 0x8db28d86 !fxor %f10,%f6,%f6
3047
3048 prefetch [%i1 + 63], 22
3049 prefetch [%i0 + 32+63], 20
3050 call _aes256_encrypt_2x
3051 add %i0, 32, %i0
3052
3053 .word 0x91b02302 !movxtod %g2,%f8
3054 .word 0x95b02303 !movxtod %g3,%f10
3055
3056 srax %g3, 63, %l7 ! next tweak value
3057 addcc %g2, %g2, %g2
3058 and %l7, 0x87, %l7
3059 .word 0x87b0c223 !addxc %g3,%g3,%g3
3060 xor %l7, %g2, %g2
3061
3062 .word 0x91b20988 !bshuffle %f8,%f8,%f8
3063 .word 0x95b2898a !bshuffle %f10,%f10,%f10
3064
3065 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
3066 .word 0x85b38d82 !fxor %f14,%f2,%f2
3067 .word 0x89b20d84 !fxor %f8,%f4,%f4
3068 .word 0x8db28d86 !fxor %f10,%f6,%f6
3069
3070 brnz,pn %l2, 2f
3071 sub %i2, 2, %i2
3072
3073 std %f0, [%i1 + 0]
3074 std %f2, [%i1 + 8]
3075 std %f4, [%i1 + 16]
3076 std %f6, [%i1 + 24]
3077 brnz,pt %i2, .L256_xts_enloop2x
3078 add %i1, 32, %i1
3079
3080 .word 0x81b00f04 !fsrc2 %f0,%f4,%f0
3081 .word 0x85b00f06 !fsrc2 %f0,%f6,%f2
3082 brnz,pn %i5, .L256_xts_ensteal
3083 nop
3084
3085 ret
3086 restore
3087
3088 .align 16
3089 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
3090 ! and ~3x deterioration
3091 ! in inp==out case
3092 .word 0x91b00900 !faligndata %f0,%f0,%f8 ! handle unaligned output
3093 .word 0x95b00902 !faligndata %f0,%f2,%f10
3094 .word 0x99b08904 !faligndata %f2,%f4,%f12
3095 .word 0x9db10906 !faligndata %f4,%f6,%f14
3096 .word 0x81b18906 !faligndata %f6,%f6,%f0
3097
3098 stda %f8, [%i1 + %l3]0xc0 ! partial store
3099 std %f10, [%i1 + 8]
3100 std %f12, [%i1 + 16]
3101 std %f14, [%i1 + 24]
3102 add %i1, 32, %i1
3103 orn %g0, %l3, %l3
3104 stda %f0, [%i1 + %l3]0xc0 ! partial store
3105
3106 brnz,pt %i2, .L256_xts_enloop2x+4
3107 orn %g0, %l3, %l3
3108
3109 .word 0x81b00f04 !fsrc2 %f0,%f4,%f0
3110 .word 0x85b00f06 !fsrc2 %f0,%f6,%f2
3111 brnz,pn %i5, .L256_xts_ensteal
3112 nop
3113
3114 ret
3115 restore
3116
3117 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3118 .align 32
3119 .L256_xts_enblk:
3120 add %i1, %i2, %l5
3121 and %l5, 63, %l5 ! tail
3122 sub %i2, %l5, %i2
3123 add %l5, 15, %l5 ! round up to 16n
3124 srlx %i2, 4, %i2
3125 srl %l5, 4, %l5
3126 sub %i2, 1, %i2
3127 add %l5, 1, %l5
3128
3129 .L256_xts_enblk2x:
3130 ldx [%i0 + 0], %o0
3131 ldx [%i0 + 8], %o1
3132 ldx [%i0 + 16], %o2
3133 brz,pt %l0, 5f
3134 ldx [%i0 + 24], %o3
3135
3136 ldx [%i0 + 32], %o4
3137 sllx %o0, %l0, %o0
3138 srlx %o1, %l1, %g1
3139 or %g1, %o0, %o0
3140 sllx %o1, %l0, %o1
3141 srlx %o2, %l1, %g1
3142 or %g1, %o1, %o1
3143 sllx %o2, %l0, %o2
3144 srlx %o3, %l1, %g1
3145 or %g1, %o2, %o2
3146 sllx %o3, %l0, %o3
3147 srlx %o4, %l1, %o4
3148 or %o4, %o3, %o3
3149 5:
3150 .word 0x99b02302 !movxtod %g2,%f12
3151 .word 0x9db02303 !movxtod %g3,%f14
3152 .word 0x99b3098c !bshuffle %f12,%f12,%f12
3153 .word 0x9db3898e !bshuffle %f14,%f14,%f14
3154
3155 srax %g3, 63, %l7 ! next tweak value
3156 addcc %g2, %g2, %g2
3157 and %l7, 0x87, %l7
3158 .word 0x87b0c223 !addxc %g3,%g3,%g3
3159 xor %l7, %g2, %g2
3160
3161 .word 0x91b02302 !movxtod %g2,%f8
3162 .word 0x95b02303 !movxtod %g3,%f10
3163 .word 0x91b20988 !bshuffle %f8,%f8,%f8
3164 .word 0x95b2898a !bshuffle %f10,%f10,%f10
3165
3166 xor %g4, %o0, %o0 ! ^= rk[0]
3167 xor %g5, %o1, %o1
3168 xor %g4, %o2, %o2 ! ^= rk[0]
3169 xor %g5, %o3, %o3
3170 .word 0x81b02308 !movxtod %o0,%f0
3171 .word 0x85b02309 !movxtod %o1,%f2
3172 .word 0x89b0230a !movxtod %o2,%f4
3173 .word 0x8db0230b !movxtod %o3,%f6
3174
3175 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
3176 .word 0x85b38d82 !fxor %f14,%f2,%f2
3177 .word 0x89b20d84 !fxor %f8,%f4,%f4 ! ^= tweak[0]
3178 .word 0x8db28d86 !fxor %f10,%f6,%f6
3179
3180 prefetch [%i0 + 32+63], 20
3181 call _aes256_encrypt_2x
3182 add %i0, 32, %i0
3183
3184 .word 0x91b02302 !movxtod %g2,%f8
3185 .word 0x95b02303 !movxtod %g3,%f10
3186
3187 srax %g3, 63, %l7 ! next tweak value
3188 addcc %g2, %g2, %g2
3189 and %l7, 0x87, %l7
3190 .word 0x87b0c223 !addxc %g3,%g3,%g3
3191 xor %l7, %g2, %g2
3192
3193 .word 0x91b20988 !bshuffle %f8,%f8,%f8
3194 .word 0x95b2898a !bshuffle %f10,%f10,%f10
3195
3196 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
3197 .word 0x85b38d82 !fxor %f14,%f2,%f2
3198 .word 0x89b20d84 !fxor %f8,%f4,%f4
3199 .word 0x8db28d86 !fxor %f10,%f6,%f6
3200
3201 subcc %i2, 2, %i2
3202 stda %f0, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
3203 add %i1, 8, %i1
3204 stda %f2, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
3205 add %i1, 8, %i1
3206 stda %f4, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
3207 add %i1, 8, %i1
3208 stda %f6, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
3209 bgu,pt SIZE_T_CC, .L256_xts_enblk2x
3210 add %i1, 8, %i1
3211
3212 add %l5, %i2, %i2
3213 andcc %i2, 1, %g0 ! is number of blocks even?
3214 membar #StoreLoad|#StoreStore
3215 bnz,pt %icc, .L256_xts_enloop
3216 srl %i2, 0, %i2
3217 brnz,pn %i2, .L256_xts_enloop2x
3218 nop
3219
3220 .word 0x81b00f04 !fsrc2 %f0,%f4,%f0
3221 .word 0x85b00f06 !fsrc2 %f0,%f6,%f2
3222 brnz,pn %i5, .L256_xts_ensteal
3223 nop
3224
3225 ret
3226 restore
3227 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3228 .align 32
3229 .L256_xts_ensteal:
3230 std %f0, [%fp + STACK_BIAS-16] ! copy of output
3231 std %f2, [%fp + STACK_BIAS-8]
3232
3233 srl %l0, 3, %l0
3234 add %fp, STACK_BIAS-16, %l7
3235 add %i0, %l0, %i0 ! original %i0+%i2&-15
3236 add %i1, %l2, %i1 ! original %i1+%i2&-15
3237 mov 0, %l0
3238 nop ! align
3239
3240 .L256_xts_enstealing:
3241 ldub [%i0 + %l0], %o0
3242 ldub [%l7 + %l0], %o1
3243 dec %i5
3244 stb %o0, [%l7 + %l0]
3245 stb %o1, [%i1 + %l0]
3246 brnz %i5, .L256_xts_enstealing
3247 inc %l0
3248
3249 mov %l7, %i0
3250 sub %i1, 16, %i1
3251 mov 0, %l0
3252 sub %i1, %l2, %i1
3253 ba .L256_xts_enloop ! one more time
3254 mov 1, %i2 ! %i5 is 0
3255 ret
3256 restore
3257 .type aes256_t4_xts_encrypt,#function
3258 .size aes256_t4_xts_encrypt,.-aes256_t4_xts_encrypt
3259 .globl aes256_t4_xts_decrypt
3260 .align 32
3261 aes256_t4_xts_decrypt:
3262 save %sp, -STACK_FRAME-16, %sp
3263 srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
3264
3265 mov %i5, %o0
3266 add %fp, STACK_BIAS-16, %o1
3267 call aes_t4_encrypt
3268 mov %i4, %o2
3269
3270 add %fp, STACK_BIAS-16, %l7
3271 ldxa [%l7]0x88, %g2
3272 add %fp, STACK_BIAS-8, %l7
3273 ldxa [%l7]0x88, %g3 ! %g3:%g2 is tweak
3274
3275 sethi %hi(0x76543210), %l7
3276 or %l7, %lo(0x76543210), %l7
3277 .word 0x81b5c320 !bmask %l7,%g0,%g0 ! byte swap mask
3278
3279 prefetch [%i0], 20
3280 prefetch [%i0 + 63], 20
3281 call _aes256_load_deckey
3282 and %i2, 15, %i5
3283 and %i2, -16, %i2
3284 mov 0, %l7
3285 movrnz %i5, 16, %l7
3286 sub %i2, %l7, %i2
3287
3288 sub %i0, %i1, %l5 ! %i0!=%i1
3289 and %i0, 7, %l0
3290 andn %i0, 7, %i0
3291 sll %l0, 3, %l0
3292 mov 64, %l1
3293 mov 0xff, %l3
3294 sub %l1, %l0, %l1
3295 and %i1, 7, %l2
3296 cmp %i2, 255
3297 movrnz %l2, 0, %l5 ! if ( %i1&7 ||
3298 movleu SIZE_T_CC, 0, %l5 ! %i2<256 ||
3299 brnz,pn %l5, .L256_xts_deblk ! %i0==%i1)
3300 srl %l3, %l2, %l3
3301
3302 andcc %i2, 16, %g0 ! is number of blocks even?
3303 brz,pn %i2, .L256_xts_desteal
3304 .word 0xb3b64340 !alignaddrl %i1,%g0,%i1
3305 bz %icc, .L256_xts_deloop2x
3306 srlx %i2, 4, %i2
3307 .L256_xts_deloop:
3308 ldx [%i0 + 0], %o0
3309 brz,pt %l0, 4f
3310 ldx [%i0 + 8], %o1
3311
3312 ldx [%i0 + 16], %o2
3313 sllx %o0, %l0, %o0
3314 srlx %o1, %l1, %g1
3315 sllx %o1, %l0, %o1
3316 or %g1, %o0, %o0
3317 srlx %o2, %l1, %o2
3318 or %o2, %o1, %o1
3319 4:
3320 .word 0x99b02302 !movxtod %g2,%f12
3321 .word 0x9db02303 !movxtod %g3,%f14
3322 .word 0x99b3098c !bshuffle %f12,%f12,%f12
3323 .word 0x9db3898e !bshuffle %f14,%f14,%f14
3324
3325 xor %g4, %o0, %o0 ! ^= rk[0]
3326 xor %g5, %o1, %o1
3327 .word 0x81b02308 !movxtod %o0,%f0
3328 .word 0x85b02309 !movxtod %o1,%f2
3329
3330 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
3331 .word 0x85b38d82 !fxor %f14,%f2,%f2
3332
3333 prefetch [%i1 + 63], 22
3334 prefetch [%i0 + 16+63], 20
3335 call _aes256_decrypt_1x
3336 add %i0, 16, %i0
3337
3338 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
3339 .word 0x85b38d82 !fxor %f14,%f2,%f2
3340
3341 srax %g3, 63, %l7 ! next tweak value
3342 addcc %g2, %g2, %g2
3343 and %l7, 0x87, %l7
3344 .word 0x87b0c223 !addxc %g3,%g3,%g3
3345 xor %l7, %g2, %g2
3346
3347 brnz,pn %l2, 2f
3348 sub %i2, 1, %i2
3349
3350 std %f0, [%i1 + 0]
3351 std %f2, [%i1 + 8]
3352 brnz,pt %i2, .L256_xts_deloop2x
3353 add %i1, 16, %i1
3354
3355 brnz,pn %i5, .L256_xts_desteal
3356 nop
3357
3358 ret
3359 restore
3360
3361 .align 16
3362 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
3363 ! and ~3x deterioration
3364 ! in inp==out case
3365 .word 0x89b00900 !faligndata %f0,%f0,%f4 ! handle unaligned output
3366 .word 0x8db00902 !faligndata %f0,%f2,%f6
3367 .word 0x91b08902 !faligndata %f2,%f2,%f8
3368 stda %f4, [%i1 + %l3]0xc0 ! partial store
3369 std %f6, [%i1 + 8]
3370 add %i1, 16, %i1
3371 orn %g0, %l3, %l3
3372 stda %f8, [%i1 + %l3]0xc0 ! partial store
3373
3374 brnz,pt %i2, .L256_xts_deloop2x+4
3375 orn %g0, %l3, %l3
3376
3377 brnz,pn %i5, .L256_xts_desteal
3378 nop
3379
3380 ret
3381 restore
3382
3383 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3384 .align 32
3385 .L256_xts_deloop2x:
3386 ldx [%i0 + 0], %o0
3387 ldx [%i0 + 8], %o1
3388 ldx [%i0 + 16], %o2
3389 brz,pt %l0, 4f
3390 ldx [%i0 + 24], %o3
3391
3392 ldx [%i0 + 32], %o4
3393 sllx %o0, %l0, %o0
3394 srlx %o1, %l1, %g1
3395 or %g1, %o0, %o0
3396 sllx %o1, %l0, %o1
3397 srlx %o2, %l1, %g1
3398 or %g1, %o1, %o1
3399 sllx %o2, %l0, %o2
3400 srlx %o3, %l1, %g1
3401 or %g1, %o2, %o2
3402 sllx %o3, %l0, %o3
3403 srlx %o4, %l1, %o4
3404 or %o4, %o3, %o3
3405 4:
3406 .word 0x99b02302 !movxtod %g2,%f12
3407 .word 0x9db02303 !movxtod %g3,%f14
3408 .word 0x99b3098c !bshuffle %f12,%f12,%f12
3409 .word 0x9db3898e !bshuffle %f14,%f14,%f14
3410
3411 srax %g3, 63, %l7 ! next tweak value
3412 addcc %g2, %g2, %g2
3413 and %l7, 0x87, %l7
3414 .word 0x87b0c223 !addxc %g3,%g3,%g3
3415 xor %l7, %g2, %g2
3416
3417 .word 0x91b02302 !movxtod %g2,%f8
3418 .word 0x95b02303 !movxtod %g3,%f10
3419 .word 0x91b20988 !bshuffle %f8,%f8,%f8
3420 .word 0x95b2898a !bshuffle %f10,%f10,%f10
3421
3422 xor %g4, %o0, %o0 ! ^= rk[0]
3423 xor %g5, %o1, %o1
3424 xor %g4, %o2, %o2 ! ^= rk[0]
3425 xor %g5, %o3, %o3
3426 .word 0x81b02308 !movxtod %o0,%f0
3427 .word 0x85b02309 !movxtod %o1,%f2
3428 .word 0x89b0230a !movxtod %o2,%f4
3429 .word 0x8db0230b !movxtod %o3,%f6
3430
3431 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
3432 .word 0x85b38d82 !fxor %f14,%f2,%f2
3433 .word 0x89b20d84 !fxor %f8,%f4,%f4 ! ^= tweak[0]
3434 .word 0x8db28d86 !fxor %f10,%f6,%f6
3435
3436 prefetch [%i1 + 63], 22
3437 prefetch [%i0 + 32+63], 20
3438 call _aes256_decrypt_2x
3439 add %i0, 32, %i0
3440
3441 .word 0x91b02302 !movxtod %g2,%f8
3442 .word 0x95b02303 !movxtod %g3,%f10
3443
3444 srax %g3, 63, %l7 ! next tweak value
3445 addcc %g2, %g2, %g2
3446 and %l7, 0x87, %l7
3447 .word 0x87b0c223 !addxc %g3,%g3,%g3
3448 xor %l7, %g2, %g2
3449
3450 .word 0x91b20988 !bshuffle %f8,%f8,%f8
3451 .word 0x95b2898a !bshuffle %f10,%f10,%f10
3452
3453 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
3454 .word 0x85b38d82 !fxor %f14,%f2,%f2
3455 .word 0x89b20d84 !fxor %f8,%f4,%f4
3456 .word 0x8db28d86 !fxor %f10,%f6,%f6
3457
3458 brnz,pn %l2, 2f
3459 sub %i2, 2, %i2
3460
3461 std %f0, [%i1 + 0]
3462 std %f2, [%i1 + 8]
3463 std %f4, [%i1 + 16]
3464 std %f6, [%i1 + 24]
3465 brnz,pt %i2, .L256_xts_deloop2x
3466 add %i1, 32, %i1
3467
3468 .word 0x81b00f04 !fsrc2 %f0,%f4,%f0
3469 .word 0x85b00f06 !fsrc2 %f0,%f6,%f2
3470 brnz,pn %i5, .L256_xts_desteal
3471 nop
3472
3473 ret
3474 restore
3475
3476 .align 16
3477 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
3478 ! and ~3x deterioration
3479 ! in inp==out case
3480 .word 0x91b00900 !faligndata %f0,%f0,%f8 ! handle unaligned output
3481 .word 0x95b00902 !faligndata %f0,%f2,%f10
3482 .word 0x99b08904 !faligndata %f2,%f4,%f12
3483 .word 0x9db10906 !faligndata %f4,%f6,%f14
3484 .word 0x81b18906 !faligndata %f6,%f6,%f0
3485
3486 stda %f8, [%i1 + %l3]0xc0 ! partial store
3487 std %f10, [%i1 + 8]
3488 std %f12, [%i1 + 16]
3489 std %f14, [%i1 + 24]
3490 add %i1, 32, %i1
3491 orn %g0, %l3, %l3
3492 stda %f0, [%i1 + %l3]0xc0 ! partial store
3493
3494 brnz,pt %i2, .L256_xts_deloop2x+4
3495 orn %g0, %l3, %l3
3496
3497 .word 0x81b00f04 !fsrc2 %f0,%f4,%f0
3498 .word 0x85b00f06 !fsrc2 %f0,%f6,%f2
3499 brnz,pn %i5, .L256_xts_desteal
3500 nop
3501
3502 ret
3503 restore
3504
3505 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3506 .align 32
3507 .L256_xts_deblk:
3508 add %i1, %i2, %l5
3509 and %l5, 63, %l5 ! tail
3510 sub %i2, %l5, %i2
3511 add %l5, 15, %l5 ! round up to 16n
3512 srlx %i2, 4, %i2
3513 srl %l5, 4, %l5
3514 sub %i2, 1, %i2
3515 add %l5, 1, %l5
3516
3517 .L256_xts_deblk2x:
3518 ldx [%i0 + 0], %o0
3519 ldx [%i0 + 8], %o1
3520 ldx [%i0 + 16], %o2
3521 brz,pt %l0, 5f
3522 ldx [%i0 + 24], %o3
3523
3524 ldx [%i0 + 32], %o4
3525 sllx %o0, %l0, %o0
3526 srlx %o1, %l1, %g1
3527 or %g1, %o0, %o0
3528 sllx %o1, %l0, %o1
3529 srlx %o2, %l1, %g1
3530 or %g1, %o1, %o1
3531 sllx %o2, %l0, %o2
3532 srlx %o3, %l1, %g1
3533 or %g1, %o2, %o2
3534 sllx %o3, %l0, %o3
3535 srlx %o4, %l1, %o4
3536 or %o4, %o3, %o3
3537 5:
3538 .word 0x99b02302 !movxtod %g2,%f12
3539 .word 0x9db02303 !movxtod %g3,%f14
3540 .word 0x99b3098c !bshuffle %f12,%f12,%f12
3541 .word 0x9db3898e !bshuffle %f14,%f14,%f14
3542
3543 srax %g3, 63, %l7 ! next tweak value
3544 addcc %g2, %g2, %g2
3545 and %l7, 0x87, %l7
3546 .word 0x87b0c223 !addxc %g3,%g3,%g3
3547 xor %l7, %g2, %g2
3548
3549 .word 0x91b02302 !movxtod %g2,%f8
3550 .word 0x95b02303 !movxtod %g3,%f10
3551 .word 0x91b20988 !bshuffle %f8,%f8,%f8
3552 .word 0x95b2898a !bshuffle %f10,%f10,%f10
3553
3554 xor %g4, %o0, %o0 ! ^= rk[0]
3555 xor %g5, %o1, %o1
3556 xor %g4, %o2, %o2 ! ^= rk[0]
3557 xor %g5, %o3, %o3
3558 .word 0x81b02308 !movxtod %o0,%f0
3559 .word 0x85b02309 !movxtod %o1,%f2
3560 .word 0x89b0230a !movxtod %o2,%f4
3561 .word 0x8db0230b !movxtod %o3,%f6
3562
3563 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
3564 .word 0x85b38d82 !fxor %f14,%f2,%f2
3565 .word 0x89b20d84 !fxor %f8,%f4,%f4 ! ^= tweak[0]
3566 .word 0x8db28d86 !fxor %f10,%f6,%f6
3567
3568 prefetch [%i0 + 32+63], 20
3569 call _aes256_decrypt_2x
3570 add %i0, 32, %i0
3571
3572 .word 0x91b02302 !movxtod %g2,%f8
3573 .word 0x95b02303 !movxtod %g3,%f10
3574
3575 srax %g3, 63, %l7 ! next tweak value
3576 addcc %g2, %g2, %g2
3577 and %l7, 0x87, %l7
3578 .word 0x87b0c223 !addxc %g3,%g3,%g3
3579 xor %l7, %g2, %g2
3580
3581 .word 0x91b20988 !bshuffle %f8,%f8,%f8
3582 .word 0x95b2898a !bshuffle %f10,%f10,%f10
3583
3584 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
3585 .word 0x85b38d82 !fxor %f14,%f2,%f2
3586 .word 0x89b20d84 !fxor %f8,%f4,%f4
3587 .word 0x8db28d86 !fxor %f10,%f6,%f6
3588
3589 subcc %i2, 2, %i2
3590 stda %f0, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
3591 add %i1, 8, %i1
3592 stda %f2, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
3593 add %i1, 8, %i1
3594 stda %f4, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
3595 add %i1, 8, %i1
3596 stda %f6, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
3597 bgu,pt SIZE_T_CC, .L256_xts_deblk2x
3598 add %i1, 8, %i1
3599
3600 add %l5, %i2, %i2
3601 andcc %i2, 1, %g0 ! is number of blocks even?
3602 membar #StoreLoad|#StoreStore
3603 bnz,pt %icc, .L256_xts_deloop
3604 srl %i2, 0, %i2
3605 brnz,pn %i2, .L256_xts_deloop2x
3606 nop
3607
3608 .word 0x81b00f04 !fsrc2 %f0,%f4,%f0
3609 .word 0x85b00f06 !fsrc2 %f0,%f6,%f2
3610 brnz,pn %i5, .L256_xts_desteal
3611 nop
3612
3613 ret
3614 restore
3615 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3616 .align 32
3617 .L256_xts_desteal:
3618 ldx [%i0 + 0], %o0
3619 brz,pt %l0, 8f
3620 ldx [%i0 + 8], %o1
3621
3622 ldx [%i0 + 16], %o2
3623 sllx %o0, %l0, %o0
3624 srlx %o1, %l1, %g1
3625 sllx %o1, %l0, %o1
3626 or %g1, %o0, %o0
3627 srlx %o2, %l1, %o2
3628 or %o2, %o1, %o1
3629 8:
3630 srax %g3, 63, %l7 ! next tweak value
3631 addcc %g2, %g2, %o2
3632 and %l7, 0x87, %l7
3633 .word 0x97b0c223 !addxc %g3,%g3,%o3
3634 xor %l7, %o2, %o2
3635
3636 .word 0x99b0230a !movxtod %o2,%f12
3637 .word 0x9db0230b !movxtod %o3,%f14
3638 .word 0x99b3098c !bshuffle %f12,%f12,%f12
3639 .word 0x9db3898e !bshuffle %f14,%f14,%f14
3640
3641 xor %g4, %o0, %o0 ! ^= rk[0]
3642 xor %g5, %o1, %o1
3643 .word 0x81b02308 !movxtod %o0,%f0
3644 .word 0x85b02309 !movxtod %o1,%f2
3645
3646 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
3647 .word 0x85b38d82 !fxor %f14,%f2,%f2
3648
3649 call _aes256_decrypt_1x
3650 add %i0, 16, %i0
3651
3652 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
3653 .word 0x85b38d82 !fxor %f14,%f2,%f2
3654
3655 std %f0, [%fp + STACK_BIAS-16]
3656 std %f2, [%fp + STACK_BIAS-8]
3657
3658 srl %l0, 3, %l0
3659 add %fp, STACK_BIAS-16, %l7
3660 add %i0, %l0, %i0 ! original %i0+%i2&-15
3661 add %i1, %l2, %i1 ! original %i1+%i2&-15
3662 mov 0, %l0
3663 add %i1, 16, %i1
3664 nop ! align
3665
3666 .L256_xts_destealing:
3667 ldub [%i0 + %l0], %o0
3668 ldub [%l7 + %l0], %o1
3669 dec %i5
3670 stb %o0, [%l7 + %l0]
3671 stb %o1, [%i1 + %l0]
3672 brnz %i5, .L256_xts_destealing
3673 inc %l0
3674
3675 mov %l7, %i0
3676 sub %i1, 16, %i1
3677 mov 0, %l0
3678 sub %i1, %l2, %i1
3679 ba .L256_xts_deloop ! one more time
3680 mov 1, %i2 ! %i5 is 0
3681 ret
3682 restore
3683 .type aes256_t4_xts_decrypt,#function
3684 .size aes256_t4_xts_decrypt,.-aes256_t4_xts_decrypt
3685 .globl aes192_t4_ctr32_encrypt
3686 .align 32
3687 aes192_t4_ctr32_encrypt:
3688 save %sp, -STACK_FRAME, %sp
3689 srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
3690
3691 prefetch [%i0], 20
3692 prefetch [%i0 + 63], 20
3693 call _aes192_load_enckey
3694 sllx %i2, 4, %i2
3695
3696 ld [%i4 + 0], %l4 ! counter
3697 ld [%i4 + 4], %l5
3698 ld [%i4 + 8], %l6
3699 ld [%i4 + 12], %l7
3700
3701 sllx %l4, 32, %o5
3702 or %l5, %o5, %o5
3703 sllx %l6, 32, %g1
3704 xor %o5, %g4, %g4 ! ^= rk[0]
3705 xor %g1, %g5, %g5
3706 .word 0x9db02304 !movxtod %g4,%f14 ! most significant 64 bits
3707
3708 sub %i0, %i1, %l5 ! %i0!=%i1
3709 and %i0, 7, %l0
3710 andn %i0, 7, %i0
3711 sll %l0, 3, %l0
3712 mov 64, %l1
3713 mov 0xff, %l3
3714 sub %l1, %l0, %l1
3715 and %i1, 7, %l2
3716 cmp %i2, 255
3717 movrnz %l2, 0, %l5 ! if ( %i1&7 ||
3718 movleu SIZE_T_CC, 0, %l5 ! %i2<256 ||
3719 brnz,pn %l5, .L192_ctr32_blk ! %i0==%i1)
3720 srl %l3, %l2, %l3
3721
3722 andcc %i2, 16, %g0 ! is number of blocks even?
3723 .word 0xb3b64340 !alignaddrl %i1,%g0,%i1
3724 bz %icc, .L192_ctr32_loop2x
3725 srlx %i2, 4, %i2
3726 .L192_ctr32_loop:
3727 ldx [%i0 + 0], %o0
3728 brz,pt %l0, 4f
3729 ldx [%i0 + 8], %o1
3730
3731 ldx [%i0 + 16], %o2
3732 sllx %o0, %l0, %o0
3733 srlx %o1, %l1, %g1
3734 sllx %o1, %l0, %o1
3735 or %g1, %o0, %o0
3736 srlx %o2, %l1, %o2
3737 or %o2, %o1, %o1
3738 4:
3739 xor %g5, %l7, %g1 ! ^= rk[0]
3740 add %l7, 1, %l7
3741 .word 0x85b02301 !movxtod %g1,%f2
3742 srl %l7, 0, %l7 ! clruw
3743 prefetch [%i1 + 63], 22
3744 prefetch [%i0 + 16+63], 20
3745 .word 0x88cc040e !aes_eround01 %f16,%f14,%f2,%f4
3746 .word 0x84cc842e !aes_eround23 %f18,%f14,%f2,%f2
3747 call _aes192_encrypt_1x+8
3748 add %i0, 16, %i0
3749
3750 .word 0x95b02308 !movxtod %o0,%f10
3751 .word 0x99b02309 !movxtod %o1,%f12
3752 .word 0x81b28d80 !fxor %f10,%f0,%f0 ! ^= inp
3753 .word 0x85b30d82 !fxor %f12,%f2,%f2
3754
3755 brnz,pn %l2, 2f
3756 sub %i2, 1, %i2
3757
3758 std %f0, [%i1 + 0]
3759 std %f2, [%i1 + 8]
3760 brnz,pt %i2, .L192_ctr32_loop2x
3761 add %i1, 16, %i1
3762
3763 ret
3764 restore
3765
3766 .align 16
3767 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
3768 ! and ~3x deterioration
3769 ! in inp==out case
3770 .word 0x89b00900 !faligndata %f0,%f0,%f4 ! handle unaligned output
3771 .word 0x8db00902 !faligndata %f0,%f2,%f6
3772 .word 0x91b08902 !faligndata %f2,%f2,%f8
3773 stda %f4, [%i1 + %l3]0xc0 ! partial store
3774 std %f6, [%i1 + 8]
3775 add %i1, 16, %i1
3776 orn %g0, %l3, %l3
3777 stda %f8, [%i1 + %l3]0xc0 ! partial store
3778
3779 brnz,pt %i2, .L192_ctr32_loop2x+4
3780 orn %g0, %l3, %l3
3781
3782 ret
3783 restore
3784
3785 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3786 .align 32
3787 .L192_ctr32_loop2x:
3788 ldx [%i0 + 0], %o0
3789 ldx [%i0 + 8], %o1
3790 ldx [%i0 + 16], %o2
3791 brz,pt %l0, 4f
3792 ldx [%i0 + 24], %o3
3793
3794 ldx [%i0 + 32], %o4
3795 sllx %o0, %l0, %o0
3796 srlx %o1, %l1, %g1
3797 or %g1, %o0, %o0
3798 sllx %o1, %l0, %o1
3799 srlx %o2, %l1, %g1
3800 or %g1, %o1, %o1
3801 sllx %o2, %l0, %o2
3802 srlx %o3, %l1, %g1
3803 or %g1, %o2, %o2
3804 sllx %o3, %l0, %o3
3805 srlx %o4, %l1, %o4
3806 or %o4, %o3, %o3
3807 4:
3808 xor %g5, %l7, %g1 ! ^= rk[0]
3809 add %l7, 1, %l7
3810 .word 0x85b02301 !movxtod %g1,%f2
3811 srl %l7, 0, %l7 ! clruw
3812 xor %g5, %l7, %g1
3813 add %l7, 1, %l7
3814 .word 0x8db02301 !movxtod %g1,%f6
3815 srl %l7, 0, %l7 ! clruw
3816 prefetch [%i1 + 63], 22
3817 prefetch [%i0 + 32+63], 20
3818 .word 0x90cc040e !aes_eround01 %f16,%f14,%f2,%f8
3819 .word 0x84cc842e !aes_eround23 %f18,%f14,%f2,%f2
3820 .word 0x94cc0c0e !aes_eround01 %f16,%f14,%f6,%f10
3821 .word 0x8ccc8c2e !aes_eround23 %f18,%f14,%f6,%f6
3822 call _aes192_encrypt_2x+16
3823 add %i0, 32, %i0
3824
3825 .word 0x91b02308 !movxtod %o0,%f8
3826 .word 0x95b02309 !movxtod %o1,%f10
3827 .word 0x99b0230a !movxtod %o2,%f12
3828 .word 0x81b20d80 !fxor %f8,%f0,%f0 ! ^= inp
3829 .word 0x91b0230b !movxtod %o3,%f8
3830 .word 0x85b28d82 !fxor %f10,%f2,%f2
3831 .word 0x89b30d84 !fxor %f12,%f4,%f4
3832 .word 0x8db20d86 !fxor %f8,%f6,%f6
3833
3834 brnz,pn %l2, 2f
3835 sub %i2, 2, %i2
3836
3837 std %f0, [%i1 + 0]
3838 std %f2, [%i1 + 8]
3839 std %f4, [%i1 + 16]
3840 std %f6, [%i1 + 24]
3841 brnz,pt %i2, .L192_ctr32_loop2x
3842 add %i1, 32, %i1
3843
3844 ret
3845 restore
3846
3847 .align 16
3848 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
3849 ! and ~3x deterioration
3850 ! in inp==out case
3851 .word 0x91b00900 !faligndata %f0,%f0,%f8 ! handle unaligned output
3852 .word 0x81b00902 !faligndata %f0,%f2,%f0
3853 .word 0x85b08904 !faligndata %f2,%f4,%f2
3854 .word 0x89b10906 !faligndata %f4,%f6,%f4
3855 .word 0x8db18906 !faligndata %f6,%f6,%f6
3856
3857 stda %f8, [%i1 + %l3]0xc0 ! partial store
3858 std %f0, [%i1 + 8]
3859 std %f2, [%i1 + 16]
3860 std %f4, [%i1 + 24]
3861 add %i1, 32, %i1
3862 orn %g0, %l3, %l3
3863 stda %f6, [%i1 + %l3]0xc0 ! partial store
3864
3865 brnz,pt %i2, .L192_ctr32_loop2x+4
3866 orn %g0, %l3, %l3
3867
3868 ret
3869 restore
3870
3871 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3872 .align 32
3873 .L192_ctr32_blk:
3874 add %i1, %i2, %l5
3875 and %l5, 63, %l5 ! tail
3876 sub %i2, %l5, %i2
3877 add %l5, 15, %l5 ! round up to 16n
3878 srlx %i2, 4, %i2
3879 srl %l5, 4, %l5
3880 sub %i2, 1, %i2
3881 add %l5, 1, %l5
3882
3883 .L192_ctr32_blk_loop2x:
3884 ldx [%i0 + 0], %o0
3885 ldx [%i0 + 8], %o1
3886 ldx [%i0 + 16], %o2
3887 brz,pt %l0, 5f
3888 ldx [%i0 + 24], %o3
3889
3890 ldx [%i0 + 32], %o4
3891 sllx %o0, %l0, %o0
3892 srlx %o1, %l1, %g1
3893 or %g1, %o0, %o0
3894 sllx %o1, %l0, %o1
3895 srlx %o2, %l1, %g1
3896 or %g1, %o1, %o1
3897 sllx %o2, %l0, %o2
3898 srlx %o3, %l1, %g1
3899 or %g1, %o2, %o2
3900 sllx %o3, %l0, %o3
3901 srlx %o4, %l1, %o4
3902 or %o4, %o3, %o3
3903 5:
3904 xor %g5, %l7, %g1 ! ^= rk[0]
3905 add %l7, 1, %l7
3906 .word 0x85b02301 !movxtod %g1,%f2
3907 srl %l7, 0, %l7 ! clruw
3908 xor %g5, %l7, %g1
3909 add %l7, 1, %l7
3910 .word 0x8db02301 !movxtod %g1,%f6
3911 srl %l7, 0, %l7 ! clruw
3912 prefetch [%i0 + 32+63], 20
3913 .word 0x90cc040e !aes_eround01 %f16,%f14,%f2,%f8
3914 .word 0x84cc842e !aes_eround23 %f18,%f14,%f2,%f2
3915 .word 0x94cc0c0e !aes_eround01 %f16,%f14,%f6,%f10
3916 .word 0x8ccc8c2e !aes_eround23 %f18,%f14,%f6,%f6
3917 call _aes192_encrypt_2x+16
3918 add %i0, 32, %i0
3919 subcc %i2, 2, %i2
3920
3921 .word 0x91b02308 !movxtod %o0,%f8
3922 .word 0x95b02309 !movxtod %o1,%f10
3923 .word 0x99b0230a !movxtod %o2,%f12
3924 .word 0x81b20d80 !fxor %f8,%f0,%f0 ! ^= inp
3925 .word 0x91b0230b !movxtod %o3,%f8
3926 .word 0x85b28d82 !fxor %f10,%f2,%f2
3927 .word 0x89b30d84 !fxor %f12,%f4,%f4
3928 .word 0x8db20d86 !fxor %f8,%f6,%f6
3929
3930 stda %f0, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
3931 add %i1, 8, %i1
3932 stda %f2, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
3933 add %i1, 8, %i1
3934 stda %f4, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
3935 add %i1, 8, %i1
3936 stda %f6, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
3937 bgu,pt SIZE_T_CC, .L192_ctr32_blk_loop2x
3938 add %i1, 8, %i1
3939
3940 add %l5, %i2, %i2
3941 andcc %i2, 1, %g0 ! is number of blocks even?
3942 membar #StoreLoad|#StoreStore
3943 bnz,pt %icc, .L192_ctr32_loop
3944 srl %i2, 0, %i2
3945 brnz,pn %i2, .L192_ctr32_loop2x
3946 nop
3947
3948 ret
3949 restore
3950 .type aes192_t4_ctr32_encrypt,#function
3951 .size aes192_t4_ctr32_encrypt,.-aes192_t4_ctr32_encrypt
3952 .globl aes192_t4_cbc_decrypt
3953 .align 32
3954 aes192_t4_cbc_decrypt:
3955 save %sp, -STACK_FRAME, %sp
3956 cmp %i2, 0
3957 be,pn SIZE_T_CC, .L192_cbc_dec_abort
3958 srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
3959 sub %i0, %i1, %l5 ! %i0!=%i1
3960 ld [%i4 + 0], %f12 ! load ivec
3961 ld [%i4 + 4], %f13
3962 ld [%i4 + 8], %f14
3963 ld [%i4 + 12], %f15
3964 prefetch [%i0], 20
3965 prefetch [%i0 + 63], 20
3966 call _aes192_load_deckey
3967 and %i0, 7, %l0
3968 andn %i0, 7, %i0
3969 sll %l0, 3, %l0
3970 mov 64, %l1
3971 mov 0xff, %l3
3972 sub %l1, %l0, %l1
3973 and %i1, 7, %l2
3974 cmp %i2, 255
3975 movrnz %l2, 0, %l5 ! if ( %i1&7 ||
3976 movleu SIZE_T_CC, 0, %l5 ! %i2<256 ||
3977 brnz,pn %l5, .L192cbc_dec_blk ! %i0==%i1)
3978 srl %l3, %l2, %l3
3979
3980 andcc %i2, 16, %g0 ! is number of blocks even?
3981 srlx %i2, 4, %i2
3982 .word 0xb3b64340 !alignaddrl %i1,%g0,%i1
3983 bz %icc, .L192_cbc_dec_loop2x
3984 prefetch [%i1], 22
3985 .L192_cbc_dec_loop:
3986 ldx [%i0 + 0], %o0
3987 brz,pt %l0, 4f
3988 ldx [%i0 + 8], %o1
3989
3990 ldx [%i0 + 16], %o2
3991 sllx %o0, %l0, %o0
3992 srlx %o1, %l1, %g1
3993 sllx %o1, %l0, %o1
3994 or %g1, %o0, %o0
3995 srlx %o2, %l1, %o2
3996 or %o2, %o1, %o1
3997 4:
3998 xor %g4, %o0, %o2 ! ^= rk[0]
3999 xor %g5, %o1, %o3
4000 .word 0x81b0230a !movxtod %o2,%f0
4001 .word 0x85b0230b !movxtod %o3,%f2
4002
4003 prefetch [%i1 + 63], 22
4004 prefetch [%i0 + 16+63], 20
4005 call _aes192_decrypt_1x
4006 add %i0, 16, %i0
4007
4008 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= ivec
4009 .word 0x85b38d82 !fxor %f14,%f2,%f2
4010 .word 0x99b02308 !movxtod %o0,%f12
4011 .word 0x9db02309 !movxtod %o1,%f14
4012
4013 brnz,pn %l2, 2f
4014 sub %i2, 1, %i2
4015
4016 std %f0, [%i1 + 0]
4017 std %f2, [%i1 + 8]
4018 brnz,pt %i2, .L192_cbc_dec_loop2x
4019 add %i1, 16, %i1
4020 st %f12, [%i4 + 0]
4021 st %f13, [%i4 + 4]
4022 st %f14, [%i4 + 8]
4023 st %f15, [%i4 + 12]
4024 .L192_cbc_dec_abort:
4025 ret
4026 restore
4027
4028 .align 16
4029 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
4030 ! and ~3x deterioration
4031 ! in inp==out case
4032 .word 0x89b00900 !faligndata %f0,%f0,%f4 ! handle unaligned output
4033 .word 0x8db00902 !faligndata %f0,%f2,%f6
4034 .word 0x91b08902 !faligndata %f2,%f2,%f8
4035
4036 stda %f4, [%i1 + %l3]0xc0 ! partial store
4037 std %f6, [%i1 + 8]
4038 add %i1, 16, %i1
4039 orn %g0, %l3, %l3
4040 stda %f8, [%i1 + %l3]0xc0 ! partial store
4041
4042 brnz,pt %i2, .L192_cbc_dec_loop2x+4
4043 orn %g0, %l3, %l3
4044 st %f12, [%i4 + 0]
4045 st %f13, [%i4 + 4]
4046 st %f14, [%i4 + 8]
4047 st %f15, [%i4 + 12]
4048 ret
4049 restore
4050
4051 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
4052 .align 32
4053 .L192_cbc_dec_loop2x:
4054 ldx [%i0 + 0], %o0
4055 ldx [%i0 + 8], %o1
4056 ldx [%i0 + 16], %o2
4057 brz,pt %l0, 4f
4058 ldx [%i0 + 24], %o3
4059
4060 ldx [%i0 + 32], %o4
4061 sllx %o0, %l0, %o0
4062 srlx %o1, %l1, %g1
4063 or %g1, %o0, %o0
4064 sllx %o1, %l0, %o1
4065 srlx %o2, %l1, %g1
4066 or %g1, %o1, %o1
4067 sllx %o2, %l0, %o2
4068 srlx %o3, %l1, %g1
4069 or %g1, %o2, %o2
4070 sllx %o3, %l0, %o3
4071 srlx %o4, %l1, %o4
4072 or %o4, %o3, %o3
4073 4:
4074 xor %g4, %o0, %o4 ! ^= rk[0]
4075 xor %g5, %o1, %o5
4076 .word 0x81b0230c !movxtod %o4,%f0
4077 .word 0x85b0230d !movxtod %o5,%f2
4078 xor %g4, %o2, %o4
4079 xor %g5, %o3, %o5
4080 .word 0x89b0230c !movxtod %o4,%f4
4081 .word 0x8db0230d !movxtod %o5,%f6
4082
4083 prefetch [%i1 + 63], 22
4084 prefetch [%i0 + 32+63], 20
4085 call _aes192_decrypt_2x
4086 add %i0, 32, %i0
4087
4088 .word 0x91b02308 !movxtod %o0,%f8
4089 .word 0x95b02309 !movxtod %o1,%f10
4090 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= ivec
4091 .word 0x85b38d82 !fxor %f14,%f2,%f2
4092 .word 0x99b0230a !movxtod %o2,%f12
4093 .word 0x9db0230b !movxtod %o3,%f14
4094 .word 0x89b20d84 !fxor %f8,%f4,%f4
4095 .word 0x8db28d86 !fxor %f10,%f6,%f6
4096
4097 brnz,pn %l2, 2f
4098 sub %i2, 2, %i2
4099
4100 std %f0, [%i1 + 0]
4101 std %f2, [%i1 + 8]
4102 std %f4, [%i1 + 16]
4103 std %f6, [%i1 + 24]
4104 brnz,pt %i2, .L192_cbc_dec_loop2x
4105 add %i1, 32, %i1
4106 st %f12, [%i4 + 0]
4107 st %f13, [%i4 + 4]
4108 st %f14, [%i4 + 8]
4109 st %f15, [%i4 + 12]
4110 ret
4111 restore
4112
4113 .align 16
4114 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
4115 ! and ~3x deterioration
4116 ! in inp==out case
4117 .word 0x91b00900 !faligndata %f0,%f0,%f8 ! handle unaligned output
4118 .word 0x81b00902 !faligndata %f0,%f2,%f0
4119 .word 0x85b08904 !faligndata %f2,%f4,%f2
4120 .word 0x89b10906 !faligndata %f4,%f6,%f4
4121 .word 0x8db18906 !faligndata %f6,%f6,%f6
4122 stda %f8, [%i1 + %l3]0xc0 ! partial store
4123 std %f0, [%i1 + 8]
4124 std %f2, [%i1 + 16]
4125 std %f4, [%i1 + 24]
4126 add %i1, 32, %i1
4127 orn %g0, %l3, %l3
4128 stda %f6, [%i1 + %l3]0xc0 ! partial store
4129
4130 brnz,pt %i2, .L192_cbc_dec_loop2x+4
4131 orn %g0, %l3, %l3
4132 st %f12, [%i4 + 0]
4133 st %f13, [%i4 + 4]
4134 st %f14, [%i4 + 8]
4135 st %f15, [%i4 + 12]
4136 ret
4137 restore
4138
4139 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
4140 .align 32
4141 .L192cbc_dec_blk:
4142 add %i1, %i2, %l5
4143 and %l5, 63, %l5 ! tail
4144 sub %i2, %l5, %i2
4145 add %l5, 15, %l5 ! round up to 16n
4146 srlx %i2, 4, %i2
4147 srl %l5, 4, %l5
4148 sub %i2, 1, %i2
4149 add %l5, 1, %l5
4150
4151 .L192_cbc_dec_blk_loop2x:
4152 ldx [%i0 + 0], %o0
4153 ldx [%i0 + 8], %o1
4154 ldx [%i0 + 16], %o2
4155 brz,pt %l0, 5f
4156 ldx [%i0 + 24], %o3
4157
4158 ldx [%i0 + 32], %o4
4159 sllx %o0, %l0, %o0
4160 srlx %o1, %l1, %g1
4161 or %g1, %o0, %o0
4162 sllx %o1, %l0, %o1
4163 srlx %o2, %l1, %g1
4164 or %g1, %o1, %o1
4165 sllx %o2, %l0, %o2
4166 srlx %o3, %l1, %g1
4167 or %g1, %o2, %o2
4168 sllx %o3, %l0, %o3
4169 srlx %o4, %l1, %o4
4170 or %o4, %o3, %o3
4171 5:
4172 xor %g4, %o0, %o4 ! ^= rk[0]
4173 xor %g5, %o1, %o5
4174 .word 0x81b0230c !movxtod %o4,%f0
4175 .word 0x85b0230d !movxtod %o5,%f2
4176 xor %g4, %o2, %o4
4177 xor %g5, %o3, %o5
4178 .word 0x89b0230c !movxtod %o4,%f4
4179 .word 0x8db0230d !movxtod %o5,%f6
4180
4181 prefetch [%i0 + 32+63], 20
4182 call _aes192_decrypt_2x
4183 add %i0, 32, %i0
4184 subcc %i2, 2, %i2
4185
4186 .word 0x91b02308 !movxtod %o0,%f8
4187 .word 0x95b02309 !movxtod %o1,%f10
4188 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= ivec
4189 .word 0x85b38d82 !fxor %f14,%f2,%f2
4190 .word 0x99b0230a !movxtod %o2,%f12
4191 .word 0x9db0230b !movxtod %o3,%f14
4192 .word 0x89b20d84 !fxor %f8,%f4,%f4
4193 .word 0x8db28d86 !fxor %f10,%f6,%f6
4194
4195 stda %f0, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
4196 add %i1, 8, %i1
4197 stda %f2, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
4198 add %i1, 8, %i1
4199 stda %f4, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
4200 add %i1, 8, %i1
4201 stda %f6, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
4202 bgu,pt SIZE_T_CC, .L192_cbc_dec_blk_loop2x
4203 add %i1, 8, %i1
4204
4205 add %l5, %i2, %i2
4206 andcc %i2, 1, %g0 ! is number of blocks even?
4207 membar #StoreLoad|#StoreStore
4208 bnz,pt %icc, .L192_cbc_dec_loop
4209 srl %i2, 0, %i2
4210 brnz,pn %i2, .L192_cbc_dec_loop2x
4211 nop
4212 st %f12, [%i4 + 0] ! write out ivec
4213 st %f13, [%i4 + 4]
4214 st %f14, [%i4 + 8]
4215 st %f15, [%i4 + 12]
4216 ret
4217 restore
4218 .type aes192_t4_cbc_decrypt,#function
4219 .size aes192_t4_cbc_decrypt,.-aes192_t4_cbc_decrypt
4220 .globl aes256_t4_cbc_decrypt
4221 .align 32
4222 aes256_t4_cbc_decrypt:
4223 save %sp, -STACK_FRAME, %sp
4224 cmp %i2, 0
4225 be,pn SIZE_T_CC, .L256_cbc_dec_abort
4226 srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
4227 sub %i0, %i1, %l5 ! %i0!=%i1
4228 ld [%i4 + 0], %f12 ! load ivec
4229 ld [%i4 + 4], %f13
4230 ld [%i4 + 8], %f14
4231 ld [%i4 + 12], %f15
4232 prefetch [%i0], 20
4233 prefetch [%i0 + 63], 20
4234 call _aes256_load_deckey
4235 and %i0, 7, %l0
4236 andn %i0, 7, %i0
4237 sll %l0, 3, %l0
4238 mov 64, %l1
4239 mov 0xff, %l3
4240 sub %l1, %l0, %l1
4241 and %i1, 7, %l2
4242 cmp %i2, 255
4243 movrnz %l2, 0, %l5 ! if ( %i1&7 ||
4244 movleu SIZE_T_CC, 0, %l5 ! %i2<256 ||
4245 brnz,pn %l5, .L256cbc_dec_blk ! %i0==%i1)
4246 srl %l3, %l2, %l3
4247
4248 andcc %i2, 16, %g0 ! is number of blocks even?
4249 srlx %i2, 4, %i2
4250 .word 0xb3b64340 !alignaddrl %i1,%g0,%i1
4251 bz %icc, .L256_cbc_dec_loop2x
4252 prefetch [%i1], 22
4253 .L256_cbc_dec_loop:
4254 ldx [%i0 + 0], %o0
4255 brz,pt %l0, 4f
4256 ldx [%i0 + 8], %o1
4257
4258 ldx [%i0 + 16], %o2
4259 sllx %o0, %l0, %o0
4260 srlx %o1, %l1, %g1
4261 sllx %o1, %l0, %o1
4262 or %g1, %o0, %o0
4263 srlx %o2, %l1, %o2
4264 or %o2, %o1, %o1
4265 4:
4266 xor %g4, %o0, %o2 ! ^= rk[0]
4267 xor %g5, %o1, %o3
4268 .word 0x81b0230a !movxtod %o2,%f0
4269 .word 0x85b0230b !movxtod %o3,%f2
4270
4271 prefetch [%i1 + 63], 22
4272 prefetch [%i0 + 16+63], 20
4273 call _aes256_decrypt_1x
4274 add %i0, 16, %i0
4275
4276 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= ivec
4277 .word 0x85b38d82 !fxor %f14,%f2,%f2
4278 .word 0x99b02308 !movxtod %o0,%f12
4279 .word 0x9db02309 !movxtod %o1,%f14
4280
4281 brnz,pn %l2, 2f
4282 sub %i2, 1, %i2
4283
4284 std %f0, [%i1 + 0]
4285 std %f2, [%i1 + 8]
4286 brnz,pt %i2, .L256_cbc_dec_loop2x
4287 add %i1, 16, %i1
4288 st %f12, [%i4 + 0]
4289 st %f13, [%i4 + 4]
4290 st %f14, [%i4 + 8]
4291 st %f15, [%i4 + 12]
4292 .L256_cbc_dec_abort:
4293 ret
4294 restore
4295
4296 .align 16
4297 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
4298 ! and ~3x deterioration
4299 ! in inp==out case
4300 .word 0x89b00900 !faligndata %f0,%f0,%f4 ! handle unaligned output
4301 .word 0x8db00902 !faligndata %f0,%f2,%f6
4302 .word 0x91b08902 !faligndata %f2,%f2,%f8
4303
4304 stda %f4, [%i1 + %l3]0xc0 ! partial store
4305 std %f6, [%i1 + 8]
4306 add %i1, 16, %i1
4307 orn %g0, %l3, %l3
4308 stda %f8, [%i1 + %l3]0xc0 ! partial store
4309
4310 brnz,pt %i2, .L256_cbc_dec_loop2x+4
4311 orn %g0, %l3, %l3
4312 st %f12, [%i4 + 0]
4313 st %f13, [%i4 + 4]
4314 st %f14, [%i4 + 8]
4315 st %f15, [%i4 + 12]
4316 ret
4317 restore
4318
4319 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
4320 .align 32
4321 .L256_cbc_dec_loop2x:
4322 ldx [%i0 + 0], %o0
4323 ldx [%i0 + 8], %o1
4324 ldx [%i0 + 16], %o2
4325 brz,pt %l0, 4f
4326 ldx [%i0 + 24], %o3
4327
4328 ldx [%i0 + 32], %o4
4329 sllx %o0, %l0, %o0
4330 srlx %o1, %l1, %g1
4331 or %g1, %o0, %o0
4332 sllx %o1, %l0, %o1
4333 srlx %o2, %l1, %g1
4334 or %g1, %o1, %o1
4335 sllx %o2, %l0, %o2
4336 srlx %o3, %l1, %g1
4337 or %g1, %o2, %o2
4338 sllx %o3, %l0, %o3
4339 srlx %o4, %l1, %o4
4340 or %o4, %o3, %o3
4341 4:
4342 xor %g4, %o0, %o4 ! ^= rk[0]
4343 xor %g5, %o1, %o5
4344 .word 0x81b0230c !movxtod %o4,%f0
4345 .word 0x85b0230d !movxtod %o5,%f2
4346 xor %g4, %o2, %o4
4347 xor %g5, %o3, %o5
4348 .word 0x89b0230c !movxtod %o4,%f4
4349 .word 0x8db0230d !movxtod %o5,%f6
4350
4351 prefetch [%i1 + 63], 22
4352 prefetch [%i0 + 32+63], 20
4353 call _aes256_decrypt_2x
4354 add %i0, 32, %i0
4355
4356 .word 0x91b02308 !movxtod %o0,%f8
4357 .word 0x95b02309 !movxtod %o1,%f10
4358 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= ivec
4359 .word 0x85b38d82 !fxor %f14,%f2,%f2
4360 .word 0x99b0230a !movxtod %o2,%f12
4361 .word 0x9db0230b !movxtod %o3,%f14
4362 .word 0x89b20d84 !fxor %f8,%f4,%f4
4363 .word 0x8db28d86 !fxor %f10,%f6,%f6
4364
4365 brnz,pn %l2, 2f
4366 sub %i2, 2, %i2
4367
4368 std %f0, [%i1 + 0]
4369 std %f2, [%i1 + 8]
4370 std %f4, [%i1 + 16]
4371 std %f6, [%i1 + 24]
4372 brnz,pt %i2, .L256_cbc_dec_loop2x
4373 add %i1, 32, %i1
4374 st %f12, [%i4 + 0]
4375 st %f13, [%i4 + 4]
4376 st %f14, [%i4 + 8]
4377 st %f15, [%i4 + 12]
4378 ret
4379 restore
4380
4381 .align 16
4382 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
4383 ! and ~3x deterioration
4384 ! in inp==out case
4385 .word 0x91b00900 !faligndata %f0,%f0,%f8 ! handle unaligned output
4386 .word 0x81b00902 !faligndata %f0,%f2,%f0
4387 .word 0x85b08904 !faligndata %f2,%f4,%f2
4388 .word 0x89b10906 !faligndata %f4,%f6,%f4
4389 .word 0x8db18906 !faligndata %f6,%f6,%f6
4390 stda %f8, [%i1 + %l3]0xc0 ! partial store
4391 std %f0, [%i1 + 8]
4392 std %f2, [%i1 + 16]
4393 std %f4, [%i1 + 24]
4394 add %i1, 32, %i1
4395 orn %g0, %l3, %l3
4396 stda %f6, [%i1 + %l3]0xc0 ! partial store
4397
4398 brnz,pt %i2, .L256_cbc_dec_loop2x+4
4399 orn %g0, %l3, %l3
4400 st %f12, [%i4 + 0]
4401 st %f13, [%i4 + 4]
4402 st %f14, [%i4 + 8]
4403 st %f15, [%i4 + 12]
4404 ret
4405 restore
4406
4407 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
4408 .align 32
4409 .L256cbc_dec_blk:
4410 add %i1, %i2, %l5
4411 and %l5, 63, %l5 ! tail
4412 sub %i2, %l5, %i2
4413 add %l5, 15, %l5 ! round up to 16n
4414 srlx %i2, 4, %i2
4415 srl %l5, 4, %l5
4416 sub %i2, 1, %i2
4417 add %l5, 1, %l5
4418
4419 .L256_cbc_dec_blk_loop2x:
4420 ldx [%i0 + 0], %o0
4421 ldx [%i0 + 8], %o1
4422 ldx [%i0 + 16], %o2
4423 brz,pt %l0, 5f
4424 ldx [%i0 + 24], %o3
4425
4426 ldx [%i0 + 32], %o4
4427 sllx %o0, %l0, %o0
4428 srlx %o1, %l1, %g1
4429 or %g1, %o0, %o0
4430 sllx %o1, %l0, %o1
4431 srlx %o2, %l1, %g1
4432 or %g1, %o1, %o1
4433 sllx %o2, %l0, %o2
4434 srlx %o3, %l1, %g1
4435 or %g1, %o2, %o2
4436 sllx %o3, %l0, %o3
4437 srlx %o4, %l1, %o4
4438 or %o4, %o3, %o3
4439 5:
4440 xor %g4, %o0, %o4 ! ^= rk[0]
4441 xor %g5, %o1, %o5
4442 .word 0x81b0230c !movxtod %o4,%f0
4443 .word 0x85b0230d !movxtod %o5,%f2
4444 xor %g4, %o2, %o4
4445 xor %g5, %o3, %o5
4446 .word 0x89b0230c !movxtod %o4,%f4
4447 .word 0x8db0230d !movxtod %o5,%f6
4448
4449 prefetch [%i0 + 32+63], 20
4450 call _aes256_decrypt_2x
4451 add %i0, 32, %i0
4452 subcc %i2, 2, %i2
4453
4454 .word 0x91b02308 !movxtod %o0,%f8
4455 .word 0x95b02309 !movxtod %o1,%f10
4456 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= ivec
4457 .word 0x85b38d82 !fxor %f14,%f2,%f2
4458 .word 0x99b0230a !movxtod %o2,%f12
4459 .word 0x9db0230b !movxtod %o3,%f14
4460 .word 0x89b20d84 !fxor %f8,%f4,%f4
4461 .word 0x8db28d86 !fxor %f10,%f6,%f6
4462
4463 stda %f0, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
4464 add %i1, 8, %i1
4465 stda %f2, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
4466 add %i1, 8, %i1
4467 stda %f4, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
4468 add %i1, 8, %i1
4469 stda %f6, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
4470 bgu,pt SIZE_T_CC, .L256_cbc_dec_blk_loop2x
4471 add %i1, 8, %i1
4472
4473 add %l5, %i2, %i2
4474 andcc %i2, 1, %g0 ! is number of blocks even?
4475 membar #StoreLoad|#StoreStore
4476 bnz,pt %icc, .L256_cbc_dec_loop
4477 srl %i2, 0, %i2
4478 brnz,pn %i2, .L256_cbc_dec_loop2x
4479 nop
4480 st %f12, [%i4 + 0] ! write out ivec
4481 st %f13, [%i4 + 4]
4482 st %f14, [%i4 + 8]
4483 st %f15, [%i4 + 12]
4484 ret
4485 restore
4486 .type aes256_t4_cbc_decrypt,#function
4487 .size aes256_t4_cbc_decrypt,.-aes256_t4_cbc_decrypt
4488 .align 32
4489 _aes256_decrypt_1x:
4490 .word 0x88cc0440 !aes_dround01 %f16,%f0,%f2,%f4
4491 .word 0x84cc8460 !aes_dround23 %f18,%f0,%f2,%f2
4492 ldd [%i3 + 208], %f16
4493 ldd [%i3 + 216], %f18
4494 .word 0x80cd0444 !aes_dround01 %f20,%f4,%f2,%f0
4495 .word 0x84cd8464 !aes_dround23 %f22,%f4,%f2,%f2
4496 ldd [%i3 + 224], %f20
4497 ldd [%i3 + 232], %f22
4498 .word 0x88ce0440 !aes_dround01 %f24,%f0,%f2,%f4
4499 .word 0x84ce8460 !aes_dround23 %f26,%f0,%f2,%f2
4500 .word 0x80cf0444 !aes_dround01 %f28,%f4,%f2,%f0
4501 .word 0x84cf8464 !aes_dround23 %f30,%f4,%f2,%f2
4502 .word 0x88c84440 !aes_dround01 %f32,%f0,%f2,%f4
4503 .word 0x84c8c460 !aes_dround23 %f34,%f0,%f2,%f2
4504 .word 0x80c94444 !aes_dround01 %f36,%f4,%f2,%f0
4505 .word 0x84c9c464 !aes_dround23 %f38,%f4,%f2,%f2
4506 .word 0x88ca4440 !aes_dround01 %f40,%f0,%f2,%f4
4507 .word 0x84cac460 !aes_dround23 %f42,%f0,%f2,%f2
4508 .word 0x80cb4444 !aes_dround01 %f44,%f4,%f2,%f0
4509 .word 0x84cbc464 !aes_dround23 %f46,%f4,%f2,%f2
4510 .word 0x88cc4440 !aes_dround01 %f48,%f0,%f2,%f4
4511 .word 0x84ccc460 !aes_dround23 %f50,%f0,%f2,%f2
4512 .word 0x80cd4444 !aes_dround01 %f52,%f4,%f2,%f0
4513 .word 0x84cdc464 !aes_dround23 %f54,%f4,%f2,%f2
4514 .word 0x88ce4440 !aes_dround01 %f56,%f0,%f2,%f4
4515 .word 0x84cec460 !aes_dround23 %f58,%f0,%f2,%f2
4516 .word 0x80cf4444 !aes_dround01 %f60,%f4,%f2,%f0
4517 .word 0x84cfc464 !aes_dround23 %f62,%f4,%f2,%f2
4518 .word 0x88cc0440 !aes_dround01 %f16,%f0,%f2,%f4
4519 .word 0x84cc8460 !aes_dround23 %f18,%f0,%f2,%f2
4520 ldd [%i3 + 16], %f16
4521 ldd [%i3 + 24], %f18
4522 .word 0x80cd04c4 !aes_dround01_l %f20,%f4,%f2,%f0
4523 .word 0x84cd84e4 !aes_dround23_l %f22,%f4,%f2,%f2
4524 ldd [%i3 + 32], %f20
4525 retl
4526 ldd [%i3 + 40], %f22
4527 .type _aes256_decrypt_1x,#function
4528 .size _aes256_decrypt_1x,.-_aes256_decrypt_1x
4529
4530 .align 32
4531 _aes256_decrypt_2x:
4532 .word 0x90cc0440 !aes_dround01 %f16,%f0,%f2,%f8
4533 .word 0x84cc8460 !aes_dround23 %f18,%f0,%f2,%f2
4534 .word 0x94cc0c44 !aes_dround01 %f16,%f4,%f6,%f10
4535 .word 0x8ccc8c64 !aes_dround23 %f18,%f4,%f6,%f6
4536 ldd [%i3 + 208], %f16
4537 ldd [%i3 + 216], %f18
4538 .word 0x80cd0448 !aes_dround01 %f20,%f8,%f2,%f0
4539 .word 0x84cd8468 !aes_dround23 %f22,%f8,%f2,%f2
4540 .word 0x88cd0c4a !aes_dround01 %f20,%f10,%f6,%f4
4541 .word 0x8ccd8c6a !aes_dround23 %f22,%f10,%f6,%f6
4542 ldd [%i3 + 224], %f20
4543 ldd [%i3 + 232], %f22
4544 .word 0x90ce0440 !aes_dround01 %f24,%f0,%f2,%f8
4545 .word 0x84ce8460 !aes_dround23 %f26,%f0,%f2,%f2
4546 .word 0x94ce0c44 !aes_dround01 %f24,%f4,%f6,%f10
4547 .word 0x8cce8c64 !aes_dround23 %f26,%f4,%f6,%f6
4548 .word 0x80cf0448 !aes_dround01 %f28,%f8,%f2,%f0
4549 .word 0x84cf8468 !aes_dround23 %f30,%f8,%f2,%f2
4550 .word 0x88cf0c4a !aes_dround01 %f28,%f10,%f6,%f4
4551 .word 0x8ccf8c6a !aes_dround23 %f30,%f10,%f6,%f6
4552 .word 0x90c84440 !aes_dround01 %f32,%f0,%f2,%f8
4553 .word 0x84c8c460 !aes_dround23 %f34,%f0,%f2,%f2
4554 .word 0x94c84c44 !aes_dround01 %f32,%f4,%f6,%f10
4555 .word 0x8cc8cc64 !aes_dround23 %f34,%f4,%f6,%f6
4556 .word 0x80c94448 !aes_dround01 %f36,%f8,%f2,%f0
4557 .word 0x84c9c468 !aes_dround23 %f38,%f8,%f2,%f2
4558 .word 0x88c94c4a !aes_dround01 %f36,%f10,%f6,%f4
4559 .word 0x8cc9cc6a !aes_dround23 %f38,%f10,%f6,%f6
4560 .word 0x90ca4440 !aes_dround01 %f40,%f0,%f2,%f8
4561 .word 0x84cac460 !aes_dround23 %f42,%f0,%f2,%f2
4562 .word 0x94ca4c44 !aes_dround01 %f40,%f4,%f6,%f10
4563 .word 0x8ccacc64 !aes_dround23 %f42,%f4,%f6,%f6
4564 .word 0x80cb4448 !aes_dround01 %f44,%f8,%f2,%f0
4565 .word 0x84cbc468 !aes_dround23 %f46,%f8,%f2,%f2
4566 .word 0x88cb4c4a !aes_dround01 %f44,%f10,%f6,%f4
4567 .word 0x8ccbcc6a !aes_dround23 %f46,%f10,%f6,%f6
4568 .word 0x90cc4440 !aes_dround01 %f48,%f0,%f2,%f8
4569 .word 0x84ccc460 !aes_dround23 %f50,%f0,%f2,%f2
4570 .word 0x94cc4c44 !aes_dround01 %f48,%f4,%f6,%f10
4571 .word 0x8ccccc64 !aes_dround23 %f50,%f4,%f6,%f6
4572 .word 0x80cd4448 !aes_dround01 %f52,%f8,%f2,%f0
4573 .word 0x84cdc468 !aes_dround23 %f54,%f8,%f2,%f2
4574 .word 0x88cd4c4a !aes_dround01 %f52,%f10,%f6,%f4
4575 .word 0x8ccdcc6a !aes_dround23 %f54,%f10,%f6,%f6
4576 .word 0x90ce4440 !aes_dround01 %f56,%f0,%f2,%f8
4577 .word 0x84cec460 !aes_dround23 %f58,%f0,%f2,%f2
4578 .word 0x94ce4c44 !aes_dround01 %f56,%f4,%f6,%f10
4579 .word 0x8ccecc64 !aes_dround23 %f58,%f4,%f6,%f6
4580 .word 0x80cf4448 !aes_dround01 %f60,%f8,%f2,%f0
4581 .word 0x84cfc468 !aes_dround23 %f62,%f8,%f2,%f2
4582 .word 0x88cf4c4a !aes_dround01 %f60,%f10,%f6,%f4
4583 .word 0x8ccfcc6a !aes_dround23 %f62,%f10,%f6,%f6
4584 .word 0x90cc0440 !aes_dround01 %f16,%f0,%f2,%f8
4585 .word 0x84cc8460 !aes_dround23 %f18,%f0,%f2,%f2
4586 .word 0x94cc0c44 !aes_dround01 %f16,%f4,%f6,%f10
4587 .word 0x8ccc8c64 !aes_dround23 %f18,%f4,%f6,%f6
4588 ldd [%i3 + 16], %f16
4589 ldd [%i3 + 24], %f18
4590 .word 0x80cd04c8 !aes_dround01_l %f20,%f8,%f2,%f0
4591 .word 0x84cd84e8 !aes_dround23_l %f22,%f8,%f2,%f2
4592 .word 0x88cd0cca !aes_dround01_l %f20,%f10,%f6,%f4
4593 .word 0x8ccd8cea !aes_dround23_l %f22,%f10,%f6,%f6
4594 ldd [%i3 + 32], %f20
4595 retl
4596 ldd [%i3 + 40], %f22
4597 .type _aes256_decrypt_2x,#function
4598 .size _aes256_decrypt_2x,.-_aes256_decrypt_2x
4599
4600 .align 32
4601 _aes192_decrypt_1x:
4602 .word 0x88cc0440 !aes_dround01 %f16,%f0,%f2,%f4
4603 .word 0x84cc8460 !aes_dround23 %f18,%f0,%f2,%f2
4604 .word 0x80cd0444 !aes_dround01 %f20,%f4,%f2,%f0
4605 .word 0x84cd8464 !aes_dround23 %f22,%f4,%f2,%f2
4606 .word 0x88ce0440 !aes_dround01 %f24,%f0,%f2,%f4
4607 .word 0x84ce8460 !aes_dround23 %f26,%f0,%f2,%f2
4608 .word 0x80cf0444 !aes_dround01 %f28,%f4,%f2,%f0
4609 .word 0x84cf8464 !aes_dround23 %f30,%f4,%f2,%f2
4610 .word 0x88c84440 !aes_dround01 %f32,%f0,%f2,%f4
4611 .word 0x84c8c460 !aes_dround23 %f34,%f0,%f2,%f2
4612 .word 0x80c94444 !aes_dround01 %f36,%f4,%f2,%f0
4613 .word 0x84c9c464 !aes_dround23 %f38,%f4,%f2,%f2
4614 .word 0x88ca4440 !aes_dround01 %f40,%f0,%f2,%f4
4615 .word 0x84cac460 !aes_dround23 %f42,%f0,%f2,%f2
4616 .word 0x80cb4444 !aes_dround01 %f44,%f4,%f2,%f0
4617 .word 0x84cbc464 !aes_dround23 %f46,%f4,%f2,%f2
4618 .word 0x88cc4440 !aes_dround01 %f48,%f0,%f2,%f4
4619 .word 0x84ccc460 !aes_dround23 %f50,%f0,%f2,%f2
4620 .word 0x80cd4444 !aes_dround01 %f52,%f4,%f2,%f0
4621 .word 0x84cdc464 !aes_dround23 %f54,%f4,%f2,%f2
4622 .word 0x88ce4440 !aes_dround01 %f56,%f0,%f2,%f4
4623 .word 0x84cec460 !aes_dround23 %f58,%f0,%f2,%f2
4624 .word 0x80cf44c4 !aes_dround01_l %f60,%f4,%f2,%f0
4625 retl
4626 .word 0x84cfc4e4 !aes_dround23_l %f62,%f4,%f2,%f2
4627 .type _aes192_decrypt_1x,#function
4628 .size _aes192_decrypt_1x,.-_aes192_decrypt_1x
4629
4630 .align 32
4631 _aes192_decrypt_2x:
4632 .word 0x90cc0440 !aes_dround01 %f16,%f0,%f2,%f8
4633 .word 0x84cc8460 !aes_dround23 %f18,%f0,%f2,%f2
4634 .word 0x94cc0c44 !aes_dround01 %f16,%f4,%f6,%f10
4635 .word 0x8ccc8c64 !aes_dround23 %f18,%f4,%f6,%f6
4636 .word 0x80cd0448 !aes_dround01 %f20,%f8,%f2,%f0
4637 .word 0x84cd8468 !aes_dround23 %f22,%f8,%f2,%f2
4638 .word 0x88cd0c4a !aes_dround01 %f20,%f10,%f6,%f4
4639 .word 0x8ccd8c6a !aes_dround23 %f22,%f10,%f6,%f6
4640 .word 0x90ce0440 !aes_dround01 %f24,%f0,%f2,%f8
4641 .word 0x84ce8460 !aes_dround23 %f26,%f0,%f2,%f2
4642 .word 0x94ce0c44 !aes_dround01 %f24,%f4,%f6,%f10
4643 .word 0x8cce8c64 !aes_dround23 %f26,%f4,%f6,%f6
4644 .word 0x80cf0448 !aes_dround01 %f28,%f8,%f2,%f0
4645 .word 0x84cf8468 !aes_dround23 %f30,%f8,%f2,%f2
4646 .word 0x88cf0c4a !aes_dround01 %f28,%f10,%f6,%f4
4647 .word 0x8ccf8c6a !aes_dround23 %f30,%f10,%f6,%f6
4648 .word 0x90c84440 !aes_dround01 %f32,%f0,%f2,%f8
4649 .word 0x84c8c460 !aes_dround23 %f34,%f0,%f2,%f2
4650 .word 0x94c84c44 !aes_dround01 %f32,%f4,%f6,%f10
4651 .word 0x8cc8cc64 !aes_dround23 %f34,%f4,%f6,%f6
4652 .word 0x80c94448 !aes_dround01 %f36,%f8,%f2,%f0
4653 .word 0x84c9c468 !aes_dround23 %f38,%f8,%f2,%f2
4654 .word 0x88c94c4a !aes_dround01 %f36,%f10,%f6,%f4
4655 .word 0x8cc9cc6a !aes_dround23 %f38,%f10,%f6,%f6
4656 .word 0x90ca4440 !aes_dround01 %f40,%f0,%f2,%f8
4657 .word 0x84cac460 !aes_dround23 %f42,%f0,%f2,%f2
4658 .word 0x94ca4c44 !aes_dround01 %f40,%f4,%f6,%f10
4659 .word 0x8ccacc64 !aes_dround23 %f42,%f4,%f6,%f6
4660 .word 0x80cb4448 !aes_dround01 %f44,%f8,%f2,%f0
4661 .word 0x84cbc468 !aes_dround23 %f46,%f8,%f2,%f2
4662 .word 0x88cb4c4a !aes_dround01 %f44,%f10,%f6,%f4
4663 .word 0x8ccbcc6a !aes_dround23 %f46,%f10,%f6,%f6
4664 .word 0x90cc4440 !aes_dround01 %f48,%f0,%f2,%f8
4665 .word 0x84ccc460 !aes_dround23 %f50,%f0,%f2,%f2
4666 .word 0x94cc4c44 !aes_dround01 %f48,%f4,%f6,%f10
4667 .word 0x8ccccc64 !aes_dround23 %f50,%f4,%f6,%f6
4668 .word 0x80cd4448 !aes_dround01 %f52,%f8,%f2,%f0
4669 .word 0x84cdc468 !aes_dround23 %f54,%f8,%f2,%f2
4670 .word 0x88cd4c4a !aes_dround01 %f52,%f10,%f6,%f4
4671 .word 0x8ccdcc6a !aes_dround23 %f54,%f10,%f6,%f6
4672 .word 0x90ce4440 !aes_dround01 %f56,%f0,%f2,%f8
4673 .word 0x84cec460 !aes_dround23 %f58,%f0,%f2,%f2
4674 .word 0x94ce4c44 !aes_dround01 %f56,%f4,%f6,%f10
4675 .word 0x8ccecc64 !aes_dround23 %f58,%f4,%f6,%f6
4676 .word 0x80cf44c8 !aes_dround01_l %f60,%f8,%f2,%f0
4677 .word 0x84cfc4e8 !aes_dround23_l %f62,%f8,%f2,%f2
4678 .word 0x88cf4cca !aes_dround01_l %f60,%f10,%f6,%f4
4679 retl
4680 .word 0x8ccfccea !aes_dround23_l %f62,%f10,%f6,%f6
4681 .type _aes192_decrypt_2x,#function
4682 .size _aes192_decrypt_2x,.-_aes192_decrypt_2x
4683 .asciz "AES for SPARC T4, David S. Miller, Andy Polyakov"
4684 .align 4
4685