aest4-sparcv9.S revision 1.1.1.2 1 #include "sparc_arch.h"
2
3 #ifdef __arch64__
4 .register %g2,#scratch
5 .register %g3,#scratch
6 #endif
7
8 .text
9
10 .globl aes_t4_encrypt
11 .align 32
12 aes_t4_encrypt:
13 andcc %o0, 7, %g1 ! is input aligned?
14 andn %o0, 7, %o0
15
16 ldx [%o2 + 0], %g4
17 ldx [%o2 + 8], %g5
18
19 ldx [%o0 + 0], %o4
20 bz,pt %icc, 1f
21 ldx [%o0 + 8], %o5
22 ldx [%o0 + 16], %o0
23 sll %g1, 3, %g1
24 sub %g0, %g1, %o3
25 sllx %o4, %g1, %o4
26 sllx %o5, %g1, %g1
27 srlx %o5, %o3, %o5
28 srlx %o0, %o3, %o3
29 or %o5, %o4, %o4
30 or %o3, %g1, %o5
31 1:
32 ld [%o2 + 240], %o3
33 ldd [%o2 + 16], %f12
34 ldd [%o2 + 24], %f14
35 xor %g4, %o4, %o4
36 xor %g5, %o5, %o5
37 .word 0x81b0230c !movxtod %o4,%f0
38 .word 0x85b0230d !movxtod %o5,%f2
39 srl %o3, 1, %o3
40 ldd [%o2 + 32], %f16
41 sub %o3, 1, %o3
42 ldd [%o2 + 40], %f18
43 add %o2, 48, %o2
44
45 .Lenc:
46 .word 0x88cb0400 !aes_eround01 %f12,%f0,%f2,%f4
47 .word 0x84cb8420 !aes_eround23 %f14,%f0,%f2,%f2
48 ldd [%o2 + 0], %f12
49 ldd [%o2 + 8], %f14
50 sub %o3,1,%o3
51 .word 0x80cc0404 !aes_eround01 %f16,%f4,%f2,%f0
52 .word 0x84cc8424 !aes_eround23 %f18,%f4,%f2,%f2
53 ldd [%o2 + 16], %f16
54 ldd [%o2 + 24], %f18
55 brnz,pt %o3, .Lenc
56 add %o2, 32, %o2
57
58 andcc %o1, 7, %o4 ! is output aligned?
59 .word 0x88cb0400 !aes_eround01 %f12,%f0,%f2,%f4
60 .word 0x84cb8420 !aes_eround23 %f14,%f0,%f2,%f2
61 .word 0x80cc0484 !aes_eround01_l %f16,%f4,%f2,%f0
62 .word 0x84cc84a4 !aes_eround23_l %f18,%f4,%f2,%f2
63
64 bnz,pn %icc, 2f
65 nop
66
67 std %f0, [%o1 + 0]
68 retl
69 std %f2, [%o1 + 8]
70
71 2: .word 0x93b24340 !alignaddrl %o1,%g0,%o1
72 mov 0xff, %o5
73 srl %o5, %o4, %o5
74
75 .word 0x89b00900 !faligndata %f0,%f0,%f4
76 .word 0x8db00902 !faligndata %f0,%f2,%f6
77 .word 0x91b08902 !faligndata %f2,%f2,%f8
78
79 stda %f4, [%o1 + %o5]0xc0 ! partial store
80 std %f6, [%o1 + 8]
81 add %o1, 16, %o1
82 orn %g0, %o5, %o5
83 retl
84 stda %f8, [%o1 + %o5]0xc0 ! partial store
85 .type aes_t4_encrypt,#function
86 .size aes_t4_encrypt,.-aes_t4_encrypt
87
88 .globl aes_t4_decrypt
89 .align 32
90 aes_t4_decrypt:
91 andcc %o0, 7, %g1 ! is input aligned?
92 andn %o0, 7, %o0
93
94 ldx [%o2 + 0], %g4
95 ldx [%o2 + 8], %g5
96
97 ldx [%o0 + 0], %o4
98 bz,pt %icc, 1f
99 ldx [%o0 + 8], %o5
100 ldx [%o0 + 16], %o0
101 sll %g1, 3, %g1
102 sub %g0, %g1, %o3
103 sllx %o4, %g1, %o4
104 sllx %o5, %g1, %g1
105 srlx %o5, %o3, %o5
106 srlx %o0, %o3, %o3
107 or %o5, %o4, %o4
108 or %o3, %g1, %o5
109 1:
110 ld [%o2 + 240], %o3
111 ldd [%o2 + 16], %f12
112 ldd [%o2 + 24], %f14
113 xor %g4, %o4, %o4
114 xor %g5, %o5, %o5
115 .word 0x81b0230c !movxtod %o4,%f0
116 .word 0x85b0230d !movxtod %o5,%f2
117 srl %o3, 1, %o3
118 ldd [%o2 + 32], %f16
119 sub %o3, 1, %o3
120 ldd [%o2 + 40], %f18
121 add %o2, 48, %o2
122
123 .Ldec:
124 .word 0x88cb0440 !aes_dround01 %f12,%f0,%f2,%f4
125 .word 0x84cb8460 !aes_dround23 %f14,%f0,%f2,%f2
126 ldd [%o2 + 0], %f12
127 ldd [%o2 + 8], %f14
128 sub %o3,1,%o3
129 .word 0x80cc0444 !aes_dround01 %f16,%f4,%f2,%f0
130 .word 0x84cc8464 !aes_dround23 %f18,%f4,%f2,%f2
131 ldd [%o2 + 16], %f16
132 ldd [%o2 + 24], %f18
133 brnz,pt %o3, .Ldec
134 add %o2, 32, %o2
135
136 andcc %o1, 7, %o4 ! is output aligned?
137 .word 0x88cb0440 !aes_dround01 %f12,%f0,%f2,%f4
138 .word 0x84cb8460 !aes_dround23 %f14,%f0,%f2,%f2
139 .word 0x80cc04c4 !aes_dround01_l %f16,%f4,%f2,%f0
140 .word 0x84cc84e4 !aes_dround23_l %f18,%f4,%f2,%f2
141
142 bnz,pn %icc, 2f
143 nop
144
145 std %f0, [%o1 + 0]
146 retl
147 std %f2, [%o1 + 8]
148
149 2: .word 0x93b24340 !alignaddrl %o1,%g0,%o1
150 mov 0xff, %o5
151 srl %o5, %o4, %o5
152
153 .word 0x89b00900 !faligndata %f0,%f0,%f4
154 .word 0x8db00902 !faligndata %f0,%f2,%f6
155 .word 0x91b08902 !faligndata %f2,%f2,%f8
156
157 stda %f4, [%o1 + %o5]0xc0 ! partial store
158 std %f6, [%o1 + 8]
159 add %o1, 16, %o1
160 orn %g0, %o5, %o5
161 retl
162 stda %f8, [%o1 + %o5]0xc0 ! partial store
163 .type aes_t4_decrypt,#function
164 .size aes_t4_decrypt,.-aes_t4_decrypt
165 .globl aes_t4_set_encrypt_key
166 .align 32
167 aes_t4_set_encrypt_key:
168 .Lset_encrypt_key:
169 and %o0, 7, %o3
170 .word 0x91b20300 !alignaddr %o0,%g0,%o0
171 cmp %o1, 192
172 ldd [%o0 + 0], %f0
173 bl,pt %icc,.L128
174 ldd [%o0 + 8], %f2
175
176 be,pt %icc,.L192
177 ldd [%o0 + 16], %f4
178 brz,pt %o3, .L256aligned
179 ldd [%o0 + 24], %f6
180
181 ldd [%o0 + 32], %f8
182 .word 0x81b00902 !faligndata %f0,%f2,%f0
183 .word 0x85b08904 !faligndata %f2,%f4,%f2
184 .word 0x89b10906 !faligndata %f4,%f6,%f4
185 .word 0x8db18908 !faligndata %f6,%f8,%f6
186 .L256aligned:
187 std %f0, [%o2 + 0]
188 .word 0x80c80106 !aes_kexpand1 %f0,%f6,0,%f0
189 std %f2, [%o2 + 8]
190 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
191 std %f4, [%o2 + 16]
192 .word 0x89b12602 !aes_kexpand0 %f4,%f2,%f4
193 std %f6, [%o2 + 24]
194 .word 0x8db1a624 !aes_kexpand2 %f6,%f4,%f6
195 std %f0, [%o2 + 32]
196 .word 0x80c80306 !aes_kexpand1 %f0,%f6,1,%f0
197 std %f2, [%o2 + 40]
198 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
199 std %f4, [%o2 + 48]
200 .word 0x89b12602 !aes_kexpand0 %f4,%f2,%f4
201 std %f6, [%o2 + 56]
202 .word 0x8db1a624 !aes_kexpand2 %f6,%f4,%f6
203 std %f0, [%o2 + 64]
204 .word 0x80c80506 !aes_kexpand1 %f0,%f6,2,%f0
205 std %f2, [%o2 + 72]
206 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
207 std %f4, [%o2 + 80]
208 .word 0x89b12602 !aes_kexpand0 %f4,%f2,%f4
209 std %f6, [%o2 + 88]
210 .word 0x8db1a624 !aes_kexpand2 %f6,%f4,%f6
211 std %f0, [%o2 + 96]
212 .word 0x80c80706 !aes_kexpand1 %f0,%f6,3,%f0
213 std %f2, [%o2 + 104]
214 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
215 std %f4, [%o2 + 112]
216 .word 0x89b12602 !aes_kexpand0 %f4,%f2,%f4
217 std %f6, [%o2 + 120]
218 .word 0x8db1a624 !aes_kexpand2 %f6,%f4,%f6
219 std %f0, [%o2 + 128]
220 .word 0x80c80906 !aes_kexpand1 %f0,%f6,4,%f0
221 std %f2, [%o2 + 136]
222 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
223 std %f4, [%o2 + 144]
224 .word 0x89b12602 !aes_kexpand0 %f4,%f2,%f4
225 std %f6, [%o2 + 152]
226 .word 0x8db1a624 !aes_kexpand2 %f6,%f4,%f6
227 std %f0, [%o2 + 160]
228 .word 0x80c80b06 !aes_kexpand1 %f0,%f6,5,%f0
229 std %f2, [%o2 + 168]
230 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
231 std %f4, [%o2 + 176]
232 .word 0x89b12602 !aes_kexpand0 %f4,%f2,%f4
233 std %f6, [%o2 + 184]
234 .word 0x8db1a624 !aes_kexpand2 %f6,%f4,%f6
235 std %f0, [%o2 + 192]
236 .word 0x80c80d06 !aes_kexpand1 %f0,%f6,6,%f0
237 std %f2, [%o2 + 200]
238 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
239 std %f4, [%o2 + 208]
240 std %f6, [%o2 + 216]
241 std %f0, [%o2 + 224]
242 std %f2, [%o2 + 232]
243
244 mov 14, %o3
245 st %o3, [%o2 + 240]
246 retl
247 xor %o0, %o0, %o0
248
249 .align 16
250 .L192:
251 brz,pt %o3, .L192aligned
252 nop
253
254 ldd [%o0 + 24], %f6
255 .word 0x81b00902 !faligndata %f0,%f2,%f0
256 .word 0x85b08904 !faligndata %f2,%f4,%f2
257 .word 0x89b10906 !faligndata %f4,%f6,%f4
258 .L192aligned:
259 std %f0, [%o2 + 0]
260 .word 0x80c80104 !aes_kexpand1 %f0,%f4,0,%f0
261 std %f2, [%o2 + 8]
262 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
263 std %f4, [%o2 + 16]
264 .word 0x89b12622 !aes_kexpand2 %f4,%f2,%f4
265 std %f0, [%o2 + 24]
266 .word 0x80c80304 !aes_kexpand1 %f0,%f4,1,%f0
267 std %f2, [%o2 + 32]
268 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
269 std %f4, [%o2 + 40]
270 .word 0x89b12622 !aes_kexpand2 %f4,%f2,%f4
271 std %f0, [%o2 + 48]
272 .word 0x80c80504 !aes_kexpand1 %f0,%f4,2,%f0
273 std %f2, [%o2 + 56]
274 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
275 std %f4, [%o2 + 64]
276 .word 0x89b12622 !aes_kexpand2 %f4,%f2,%f4
277 std %f0, [%o2 + 72]
278 .word 0x80c80704 !aes_kexpand1 %f0,%f4,3,%f0
279 std %f2, [%o2 + 80]
280 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
281 std %f4, [%o2 + 88]
282 .word 0x89b12622 !aes_kexpand2 %f4,%f2,%f4
283 std %f0, [%o2 + 96]
284 .word 0x80c80904 !aes_kexpand1 %f0,%f4,4,%f0
285 std %f2, [%o2 + 104]
286 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
287 std %f4, [%o2 + 112]
288 .word 0x89b12622 !aes_kexpand2 %f4,%f2,%f4
289 std %f0, [%o2 + 120]
290 .word 0x80c80b04 !aes_kexpand1 %f0,%f4,5,%f0
291 std %f2, [%o2 + 128]
292 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
293 std %f4, [%o2 + 136]
294 .word 0x89b12622 !aes_kexpand2 %f4,%f2,%f4
295 std %f0, [%o2 + 144]
296 .word 0x80c80d04 !aes_kexpand1 %f0,%f4,6,%f0
297 std %f2, [%o2 + 152]
298 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
299 std %f4, [%o2 + 160]
300 .word 0x89b12622 !aes_kexpand2 %f4,%f2,%f4
301 std %f0, [%o2 + 168]
302 .word 0x80c80f04 !aes_kexpand1 %f0,%f4,7,%f0
303 std %f2, [%o2 + 176]
304 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
305 std %f4, [%o2 + 184]
306 std %f0, [%o2 + 192]
307 std %f2, [%o2 + 200]
308
309 mov 12, %o3
310 st %o3, [%o2 + 240]
311 retl
312 xor %o0, %o0, %o0
313
314 .align 16
315 .L128:
316 brz,pt %o3, .L128aligned
317 nop
318
319 ldd [%o0 + 16], %f4
320 .word 0x81b00902 !faligndata %f0,%f2,%f0
321 .word 0x85b08904 !faligndata %f2,%f4,%f2
322 .L128aligned:
323 std %f0, [%o2 + 0]
324 .word 0x80c80102 !aes_kexpand1 %f0,%f2,0,%f0
325 std %f2, [%o2 + 8]
326 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
327 std %f0, [%o2 + 16]
328 .word 0x80c80302 !aes_kexpand1 %f0,%f2,1,%f0
329 std %f2, [%o2 + 24]
330 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
331 std %f0, [%o2 + 32]
332 .word 0x80c80502 !aes_kexpand1 %f0,%f2,2,%f0
333 std %f2, [%o2 + 40]
334 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
335 std %f0, [%o2 + 48]
336 .word 0x80c80702 !aes_kexpand1 %f0,%f2,3,%f0
337 std %f2, [%o2 + 56]
338 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
339 std %f0, [%o2 + 64]
340 .word 0x80c80902 !aes_kexpand1 %f0,%f2,4,%f0
341 std %f2, [%o2 + 72]
342 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
343 std %f0, [%o2 + 80]
344 .word 0x80c80b02 !aes_kexpand1 %f0,%f2,5,%f0
345 std %f2, [%o2 + 88]
346 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
347 std %f0, [%o2 + 96]
348 .word 0x80c80d02 !aes_kexpand1 %f0,%f2,6,%f0
349 std %f2, [%o2 + 104]
350 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
351 std %f0, [%o2 + 112]
352 .word 0x80c80f02 !aes_kexpand1 %f0,%f2,7,%f0
353 std %f2, [%o2 + 120]
354 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
355 std %f0, [%o2 + 128]
356 .word 0x80c81102 !aes_kexpand1 %f0,%f2,8,%f0
357 std %f2, [%o2 + 136]
358 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
359 std %f0, [%o2 + 144]
360 .word 0x80c81302 !aes_kexpand1 %f0,%f2,9,%f0
361 std %f2, [%o2 + 152]
362 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
363 std %f0, [%o2 + 160]
364 std %f2, [%o2 + 168]
365
366 mov 10, %o3
367 st %o3, [%o2 + 240]
368 retl
369 xor %o0, %o0, %o0
370 .type aes_t4_set_encrypt_key,#function
371 .size aes_t4_set_encrypt_key,.-aes_t4_set_encrypt_key
372
373 .globl aes_t4_set_decrypt_key
374 .align 32
375 aes_t4_set_decrypt_key:
376 mov %o7, %o5
377 call .Lset_encrypt_key
378 nop
379
380 mov %o5, %o7
381 sll %o3, 4, %o0 ! %o3 is number of rounds
382 add %o3, 2, %o3
383 add %o2, %o0, %o0 ! %o0=%o2+16*rounds
384 srl %o3, 2, %o3 ! %o3=(rounds+2)/4
385
386 .Lkey_flip:
387 ldd [%o2 + 0], %f0
388 ldd [%o2 + 8], %f2
389 ldd [%o2 + 16], %f4
390 ldd [%o2 + 24], %f6
391 ldd [%o0 + 0], %f8
392 ldd [%o0 + 8], %f10
393 ldd [%o0 - 16], %f12
394 ldd [%o0 - 8], %f14
395 sub %o3, 1, %o3
396 std %f0, [%o0 + 0]
397 std %f2, [%o0 + 8]
398 std %f4, [%o0 - 16]
399 std %f6, [%o0 - 8]
400 std %f8, [%o2 + 0]
401 std %f10, [%o2 + 8]
402 std %f12, [%o2 + 16]
403 std %f14, [%o2 + 24]
404 add %o2, 32, %o2
405 brnz %o3, .Lkey_flip
406 sub %o0, 32, %o0
407
408 retl
409 xor %o0, %o0, %o0
410 .type aes_t4_set_decrypt_key,#function
411 .size aes_t4_set_decrypt_key,.-aes_t4_set_decrypt_key
412 .align 32
413 _aes128_encrypt_1x:
414 .word 0x88cc0400 !aes_eround01 %f16,%f0,%f2,%f4
415 .word 0x84cc8420 !aes_eround23 %f18,%f0,%f2,%f2
416 .word 0x80cd0404 !aes_eround01 %f20,%f4,%f2,%f0
417 .word 0x84cd8424 !aes_eround23 %f22,%f4,%f2,%f2
418 .word 0x88ce0400 !aes_eround01 %f24,%f0,%f2,%f4
419 .word 0x84ce8420 !aes_eround23 %f26,%f0,%f2,%f2
420 .word 0x80cf0404 !aes_eround01 %f28,%f4,%f2,%f0
421 .word 0x84cf8424 !aes_eround23 %f30,%f4,%f2,%f2
422 .word 0x88c84400 !aes_eround01 %f32,%f0,%f2,%f4
423 .word 0x84c8c420 !aes_eround23 %f34,%f0,%f2,%f2
424 .word 0x80c94404 !aes_eround01 %f36,%f4,%f2,%f0
425 .word 0x84c9c424 !aes_eround23 %f38,%f4,%f2,%f2
426 .word 0x88ca4400 !aes_eround01 %f40,%f0,%f2,%f4
427 .word 0x84cac420 !aes_eround23 %f42,%f0,%f2,%f2
428 .word 0x80cb4404 !aes_eround01 %f44,%f4,%f2,%f0
429 .word 0x84cbc424 !aes_eround23 %f46,%f4,%f2,%f2
430 .word 0x88cc4400 !aes_eround01 %f48,%f0,%f2,%f4
431 .word 0x84ccc420 !aes_eround23 %f50,%f0,%f2,%f2
432 .word 0x80cd4484 !aes_eround01_l %f52,%f4,%f2,%f0
433 retl
434 .word 0x84cdc4a4 !aes_eround23_l %f54,%f4,%f2,%f2
435 .type _aes128_encrypt_1x,#function
436 .size _aes128_encrypt_1x,.-_aes128_encrypt_1x
437
438 .align 32
439 _aes128_encrypt_2x:
440 .word 0x90cc0400 !aes_eround01 %f16,%f0,%f2,%f8
441 .word 0x84cc8420 !aes_eround23 %f18,%f0,%f2,%f2
442 .word 0x94cc0c04 !aes_eround01 %f16,%f4,%f6,%f10
443 .word 0x8ccc8c24 !aes_eround23 %f18,%f4,%f6,%f6
444 .word 0x80cd0408 !aes_eround01 %f20,%f8,%f2,%f0
445 .word 0x84cd8428 !aes_eround23 %f22,%f8,%f2,%f2
446 .word 0x88cd0c0a !aes_eround01 %f20,%f10,%f6,%f4
447 .word 0x8ccd8c2a !aes_eround23 %f22,%f10,%f6,%f6
448 .word 0x90ce0400 !aes_eround01 %f24,%f0,%f2,%f8
449 .word 0x84ce8420 !aes_eround23 %f26,%f0,%f2,%f2
450 .word 0x94ce0c04 !aes_eround01 %f24,%f4,%f6,%f10
451 .word 0x8cce8c24 !aes_eround23 %f26,%f4,%f6,%f6
452 .word 0x80cf0408 !aes_eround01 %f28,%f8,%f2,%f0
453 .word 0x84cf8428 !aes_eround23 %f30,%f8,%f2,%f2
454 .word 0x88cf0c0a !aes_eround01 %f28,%f10,%f6,%f4
455 .word 0x8ccf8c2a !aes_eround23 %f30,%f10,%f6,%f6
456 .word 0x90c84400 !aes_eround01 %f32,%f0,%f2,%f8
457 .word 0x84c8c420 !aes_eround23 %f34,%f0,%f2,%f2
458 .word 0x94c84c04 !aes_eround01 %f32,%f4,%f6,%f10
459 .word 0x8cc8cc24 !aes_eround23 %f34,%f4,%f6,%f6
460 .word 0x80c94408 !aes_eround01 %f36,%f8,%f2,%f0
461 .word 0x84c9c428 !aes_eround23 %f38,%f8,%f2,%f2
462 .word 0x88c94c0a !aes_eround01 %f36,%f10,%f6,%f4
463 .word 0x8cc9cc2a !aes_eround23 %f38,%f10,%f6,%f6
464 .word 0x90ca4400 !aes_eround01 %f40,%f0,%f2,%f8
465 .word 0x84cac420 !aes_eround23 %f42,%f0,%f2,%f2
466 .word 0x94ca4c04 !aes_eround01 %f40,%f4,%f6,%f10
467 .word 0x8ccacc24 !aes_eround23 %f42,%f4,%f6,%f6
468 .word 0x80cb4408 !aes_eround01 %f44,%f8,%f2,%f0
469 .word 0x84cbc428 !aes_eround23 %f46,%f8,%f2,%f2
470 .word 0x88cb4c0a !aes_eround01 %f44,%f10,%f6,%f4
471 .word 0x8ccbcc2a !aes_eround23 %f46,%f10,%f6,%f6
472 .word 0x90cc4400 !aes_eround01 %f48,%f0,%f2,%f8
473 .word 0x84ccc420 !aes_eround23 %f50,%f0,%f2,%f2
474 .word 0x94cc4c04 !aes_eround01 %f48,%f4,%f6,%f10
475 .word 0x8ccccc24 !aes_eround23 %f50,%f4,%f6,%f6
476 .word 0x80cd4488 !aes_eround01_l %f52,%f8,%f2,%f0
477 .word 0x84cdc4a8 !aes_eround23_l %f54,%f8,%f2,%f2
478 .word 0x88cd4c8a !aes_eround01_l %f52,%f10,%f6,%f4
479 retl
480 .word 0x8ccdccaa !aes_eround23_l %f54,%f10,%f6,%f6
481 .type _aes128_encrypt_2x,#function
482 .size _aes128_encrypt_2x,.-_aes128_encrypt_2x
483
484 .align 32
485 _aes128_loadkey:
486 ldx [%i3 + 0], %g4
487 ldx [%i3 + 8], %g5
488 ldd [%i3 + 16], %f16
489 ldd [%i3 + 24], %f18
490 ldd [%i3 + 32], %f20
491 ldd [%i3 + 40], %f22
492 ldd [%i3 + 48], %f24
493 ldd [%i3 + 56], %f26
494 ldd [%i3 + 64], %f28
495 ldd [%i3 + 72], %f30
496 ldd [%i3 + 80], %f32
497 ldd [%i3 + 88], %f34
498 ldd [%i3 + 96], %f36
499 ldd [%i3 + 104], %f38
500 ldd [%i3 + 112], %f40
501 ldd [%i3 + 120], %f42
502 ldd [%i3 + 128], %f44
503 ldd [%i3 + 136], %f46
504 ldd [%i3 + 144], %f48
505 ldd [%i3 + 152], %f50
506 ldd [%i3 + 160], %f52
507 ldd [%i3 + 168], %f54
508 retl
509 nop
510 .type _aes128_loadkey,#function
511 .size _aes128_loadkey,.-_aes128_loadkey
512 _aes128_load_enckey=_aes128_loadkey
513 _aes128_load_deckey=_aes128_loadkey
514
515 .globl aes128_t4_cbc_encrypt
516 .align 32
517 aes128_t4_cbc_encrypt:
518 save %sp, -STACK_FRAME, %sp
519 cmp %i2, 0
520 be,pn SIZE_T_CC, .L128_cbc_enc_abort
521 srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
522 sub %i0, %i1, %l5 ! %i0!=%i1
523 ld [%i4 + 0], %f0
524 ld [%i4 + 4], %f1
525 ld [%i4 + 8], %f2
526 ld [%i4 + 12], %f3
527 prefetch [%i0], 20
528 prefetch [%i0 + 63], 20
529 call _aes128_load_enckey
530 and %i0, 7, %l0
531 andn %i0, 7, %i0
532 sll %l0, 3, %l0
533 mov 64, %l1
534 mov 0xff, %l3
535 sub %l1, %l0, %l1
536 and %i1, 7, %l2
537 cmp %i2, 127
538 movrnz %l2, 0, %l5 ! if ( %i1&7 ||
539 movleu SIZE_T_CC, 0, %l5 ! %i2<128 ||
540 brnz,pn %l5, .L128cbc_enc_blk ! %i0==%i1)
541 srl %l3, %l2, %l3
542
543 .word 0xb3b64340 !alignaddrl %i1,%g0,%i1
544 srlx %i2, 4, %i2
545 prefetch [%i1], 22
546
547 .L128_cbc_enc_loop:
548 ldx [%i0 + 0], %o0
549 brz,pt %l0, 4f
550 ldx [%i0 + 8], %o1
551
552 ldx [%i0 + 16], %o2
553 sllx %o0, %l0, %o0
554 srlx %o1, %l1, %g1
555 sllx %o1, %l0, %o1
556 or %g1, %o0, %o0
557 srlx %o2, %l1, %o2
558 or %o2, %o1, %o1
559 4:
560 xor %g4, %o0, %o0 ! ^= rk[0]
561 xor %g5, %o1, %o1
562 .word 0x99b02308 !movxtod %o0,%f12
563 .word 0x9db02309 !movxtod %o1,%f14
564
565 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= ivec
566 .word 0x85b38d82 !fxor %f14,%f2,%f2
567 prefetch [%i1 + 63], 22
568 prefetch [%i0 + 16+63], 20
569 call _aes128_encrypt_1x
570 add %i0, 16, %i0
571
572 brnz,pn %l2, 2f
573 sub %i2, 1, %i2
574
575 std %f0, [%i1 + 0]
576 std %f2, [%i1 + 8]
577 brnz,pt %i2, .L128_cbc_enc_loop
578 add %i1, 16, %i1
579 st %f0, [%i4 + 0]
580 st %f1, [%i4 + 4]
581 st %f2, [%i4 + 8]
582 st %f3, [%i4 + 12]
583 .L128_cbc_enc_abort:
584 ret
585 restore
586
587 .align 16
588 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
589 ! and ~3x deterioration
590 ! in inp==out case
591 .word 0x89b00900 !faligndata %f0,%f0,%f4 ! handle unaligned output
592 .word 0x8db00902 !faligndata %f0,%f2,%f6
593 .word 0x91b08902 !faligndata %f2,%f2,%f8
594
595 stda %f4, [%i1 + %l3]0xc0 ! partial store
596 std %f6, [%i1 + 8]
597 add %i1, 16, %i1
598 orn %g0, %l3, %l3
599 stda %f8, [%i1 + %l3]0xc0 ! partial store
600
601 brnz,pt %i2, .L128_cbc_enc_loop+4
602 orn %g0, %l3, %l3
603 st %f0, [%i4 + 0]
604 st %f1, [%i4 + 4]
605 st %f2, [%i4 + 8]
606 st %f3, [%i4 + 12]
607 ret
608 restore
609
610 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
611 .align 32
612 .L128cbc_enc_blk:
613 add %i1, %i2, %l5
614 and %l5, 63, %l5 ! tail
615 sub %i2, %l5, %i2
616 add %l5, 15, %l5 ! round up to 16n
617 srlx %i2, 4, %i2
618 srl %l5, 4, %l5
619
620 .L128_cbc_enc_blk_loop:
621 ldx [%i0 + 0], %o0
622 brz,pt %l0, 5f
623 ldx [%i0 + 8], %o1
624
625 ldx [%i0 + 16], %o2
626 sllx %o0, %l0, %o0
627 srlx %o1, %l1, %g1
628 sllx %o1, %l0, %o1
629 or %g1, %o0, %o0
630 srlx %o2, %l1, %o2
631 or %o2, %o1, %o1
632 5:
633 xor %g4, %o0, %o0 ! ^= rk[0]
634 xor %g5, %o1, %o1
635 .word 0x99b02308 !movxtod %o0,%f12
636 .word 0x9db02309 !movxtod %o1,%f14
637
638 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= ivec
639 .word 0x85b38d82 !fxor %f14,%f2,%f2
640 prefetch [%i0 + 16+63], 20
641 call _aes128_encrypt_1x
642 add %i0, 16, %i0
643 sub %i2, 1, %i2
644
645 stda %f0, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
646 add %i1, 8, %i1
647 stda %f2, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
648 brnz,pt %i2, .L128_cbc_enc_blk_loop
649 add %i1, 8, %i1
650
651 membar #StoreLoad|#StoreStore
652 brnz,pt %l5, .L128_cbc_enc_loop
653 mov %l5, %i2
654 st %f0, [%i4 + 0]
655 st %f1, [%i4 + 4]
656 st %f2, [%i4 + 8]
657 st %f3, [%i4 + 12]
658 ret
659 restore
660 .type aes128_t4_cbc_encrypt,#function
661 .size aes128_t4_cbc_encrypt,.-aes128_t4_cbc_encrypt
662 .globl aes128_t4_ctr32_encrypt
663 .align 32
664 aes128_t4_ctr32_encrypt:
665 save %sp, -STACK_FRAME, %sp
666 srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
667
668 prefetch [%i0], 20
669 prefetch [%i0 + 63], 20
670 call _aes128_load_enckey
671 sllx %i2, 4, %i2
672
673 ld [%i4 + 0], %l4 ! counter
674 ld [%i4 + 4], %l5
675 ld [%i4 + 8], %l6
676 ld [%i4 + 12], %l7
677
678 sllx %l4, 32, %o5
679 or %l5, %o5, %o5
680 sllx %l6, 32, %g1
681 xor %o5, %g4, %g4 ! ^= rk[0]
682 xor %g1, %g5, %g5
683 .word 0x9db02304 !movxtod %g4,%f14 ! most significant 64 bits
684
685 sub %i0, %i1, %l5 ! %i0!=%i1
686 and %i0, 7, %l0
687 andn %i0, 7, %i0
688 sll %l0, 3, %l0
689 mov 64, %l1
690 mov 0xff, %l3
691 sub %l1, %l0, %l1
692 and %i1, 7, %l2
693 cmp %i2, 255
694 movrnz %l2, 0, %l5 ! if ( %i1&7 ||
695 movleu SIZE_T_CC, 0, %l5 ! %i2<256 ||
696 brnz,pn %l5, .L128_ctr32_blk ! %i0==%i1)
697 srl %l3, %l2, %l3
698
699 andcc %i2, 16, %g0 ! is number of blocks even?
700 .word 0xb3b64340 !alignaddrl %i1,%g0,%i1
701 bz %icc, .L128_ctr32_loop2x
702 srlx %i2, 4, %i2
703 .L128_ctr32_loop:
704 ldx [%i0 + 0], %o0
705 brz,pt %l0, 4f
706 ldx [%i0 + 8], %o1
707
708 ldx [%i0 + 16], %o2
709 sllx %o0, %l0, %o0
710 srlx %o1, %l1, %g1
711 sllx %o1, %l0, %o1
712 or %g1, %o0, %o0
713 srlx %o2, %l1, %o2
714 or %o2, %o1, %o1
715 4:
716 xor %g5, %l7, %g1 ! ^= rk[0]
717 add %l7, 1, %l7
718 .word 0x85b02301 !movxtod %g1,%f2
719 srl %l7, 0, %l7 ! clruw
720 prefetch [%i1 + 63], 22
721 prefetch [%i0 + 16+63], 20
722 .word 0x88cc040e !aes_eround01 %f16,%f14,%f2,%f4
723 .word 0x84cc842e !aes_eround23 %f18,%f14,%f2,%f2
724 call _aes128_encrypt_1x+8
725 add %i0, 16, %i0
726
727 .word 0x95b02308 !movxtod %o0,%f10
728 .word 0x99b02309 !movxtod %o1,%f12
729 .word 0x81b28d80 !fxor %f10,%f0,%f0 ! ^= inp
730 .word 0x85b30d82 !fxor %f12,%f2,%f2
731
732 brnz,pn %l2, 2f
733 sub %i2, 1, %i2
734
735 std %f0, [%i1 + 0]
736 std %f2, [%i1 + 8]
737 brnz,pt %i2, .L128_ctr32_loop2x
738 add %i1, 16, %i1
739
740 ret
741 restore
742
743 .align 16
744 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
745 ! and ~3x deterioration
746 ! in inp==out case
747 .word 0x89b00900 !faligndata %f0,%f0,%f4 ! handle unaligned output
748 .word 0x8db00902 !faligndata %f0,%f2,%f6
749 .word 0x91b08902 !faligndata %f2,%f2,%f8
750 stda %f4, [%i1 + %l3]0xc0 ! partial store
751 std %f6, [%i1 + 8]
752 add %i1, 16, %i1
753 orn %g0, %l3, %l3
754 stda %f8, [%i1 + %l3]0xc0 ! partial store
755
756 brnz,pt %i2, .L128_ctr32_loop2x+4
757 orn %g0, %l3, %l3
758
759 ret
760 restore
761
762 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
763 .align 32
764 .L128_ctr32_loop2x:
765 ldx [%i0 + 0], %o0
766 ldx [%i0 + 8], %o1
767 ldx [%i0 + 16], %o2
768 brz,pt %l0, 4f
769 ldx [%i0 + 24], %o3
770
771 ldx [%i0 + 32], %o4
772 sllx %o0, %l0, %o0
773 srlx %o1, %l1, %g1
774 or %g1, %o0, %o0
775 sllx %o1, %l0, %o1
776 srlx %o2, %l1, %g1
777 or %g1, %o1, %o1
778 sllx %o2, %l0, %o2
779 srlx %o3, %l1, %g1
780 or %g1, %o2, %o2
781 sllx %o3, %l0, %o3
782 srlx %o4, %l1, %o4
783 or %o4, %o3, %o3
784 4:
785 xor %g5, %l7, %g1 ! ^= rk[0]
786 add %l7, 1, %l7
787 .word 0x85b02301 !movxtod %g1,%f2
788 srl %l7, 0, %l7 ! clruw
789 xor %g5, %l7, %g1
790 add %l7, 1, %l7
791 .word 0x8db02301 !movxtod %g1,%f6
792 srl %l7, 0, %l7 ! clruw
793 prefetch [%i1 + 63], 22
794 prefetch [%i0 + 32+63], 20
795 .word 0x90cc040e !aes_eround01 %f16,%f14,%f2,%f8
796 .word 0x84cc842e !aes_eround23 %f18,%f14,%f2,%f2
797 .word 0x94cc0c0e !aes_eround01 %f16,%f14,%f6,%f10
798 .word 0x8ccc8c2e !aes_eround23 %f18,%f14,%f6,%f6
799 call _aes128_encrypt_2x+16
800 add %i0, 32, %i0
801
802 .word 0x91b02308 !movxtod %o0,%f8
803 .word 0x95b02309 !movxtod %o1,%f10
804 .word 0x99b0230a !movxtod %o2,%f12
805 .word 0x81b20d80 !fxor %f8,%f0,%f0 ! ^= inp
806 .word 0x91b0230b !movxtod %o3,%f8
807 .word 0x85b28d82 !fxor %f10,%f2,%f2
808 .word 0x89b30d84 !fxor %f12,%f4,%f4
809 .word 0x8db20d86 !fxor %f8,%f6,%f6
810
811 brnz,pn %l2, 2f
812 sub %i2, 2, %i2
813
814 std %f0, [%i1 + 0]
815 std %f2, [%i1 + 8]
816 std %f4, [%i1 + 16]
817 std %f6, [%i1 + 24]
818 brnz,pt %i2, .L128_ctr32_loop2x
819 add %i1, 32, %i1
820
821 ret
822 restore
823
824 .align 16
825 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
826 ! and ~3x deterioration
827 ! in inp==out case
828 .word 0x91b00900 !faligndata %f0,%f0,%f8 ! handle unaligned output
829 .word 0x81b00902 !faligndata %f0,%f2,%f0
830 .word 0x85b08904 !faligndata %f2,%f4,%f2
831 .word 0x89b10906 !faligndata %f4,%f6,%f4
832 .word 0x8db18906 !faligndata %f6,%f6,%f6
833
834 stda %f8, [%i1 + %l3]0xc0 ! partial store
835 std %f0, [%i1 + 8]
836 std %f2, [%i1 + 16]
837 std %f4, [%i1 + 24]
838 add %i1, 32, %i1
839 orn %g0, %l3, %l3
840 stda %f6, [%i1 + %l3]0xc0 ! partial store
841
842 brnz,pt %i2, .L128_ctr32_loop2x+4
843 orn %g0, %l3, %l3
844
845 ret
846 restore
847
848 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
849 .align 32
850 .L128_ctr32_blk:
851 add %i1, %i2, %l5
852 and %l5, 63, %l5 ! tail
853 sub %i2, %l5, %i2
854 add %l5, 15, %l5 ! round up to 16n
855 srlx %i2, 4, %i2
856 srl %l5, 4, %l5
857 sub %i2, 1, %i2
858 add %l5, 1, %l5
859
860 .L128_ctr32_blk_loop2x:
861 ldx [%i0 + 0], %o0
862 ldx [%i0 + 8], %o1
863 ldx [%i0 + 16], %o2
864 brz,pt %l0, 5f
865 ldx [%i0 + 24], %o3
866
867 ldx [%i0 + 32], %o4
868 sllx %o0, %l0, %o0
869 srlx %o1, %l1, %g1
870 or %g1, %o0, %o0
871 sllx %o1, %l0, %o1
872 srlx %o2, %l1, %g1
873 or %g1, %o1, %o1
874 sllx %o2, %l0, %o2
875 srlx %o3, %l1, %g1
876 or %g1, %o2, %o2
877 sllx %o3, %l0, %o3
878 srlx %o4, %l1, %o4
879 or %o4, %o3, %o3
880 5:
881 xor %g5, %l7, %g1 ! ^= rk[0]
882 add %l7, 1, %l7
883 .word 0x85b02301 !movxtod %g1,%f2
884 srl %l7, 0, %l7 ! clruw
885 xor %g5, %l7, %g1
886 add %l7, 1, %l7
887 .word 0x8db02301 !movxtod %g1,%f6
888 srl %l7, 0, %l7 ! clruw
889 prefetch [%i0 + 32+63], 20
890 .word 0x90cc040e !aes_eround01 %f16,%f14,%f2,%f8
891 .word 0x84cc842e !aes_eround23 %f18,%f14,%f2,%f2
892 .word 0x94cc0c0e !aes_eround01 %f16,%f14,%f6,%f10
893 .word 0x8ccc8c2e !aes_eround23 %f18,%f14,%f6,%f6
894 call _aes128_encrypt_2x+16
895 add %i0, 32, %i0
896 subcc %i2, 2, %i2
897
898 .word 0x91b02308 !movxtod %o0,%f8
899 .word 0x95b02309 !movxtod %o1,%f10
900 .word 0x99b0230a !movxtod %o2,%f12
901 .word 0x81b20d80 !fxor %f8,%f0,%f0 ! ^= inp
902 .word 0x91b0230b !movxtod %o3,%f8
903 .word 0x85b28d82 !fxor %f10,%f2,%f2
904 .word 0x89b30d84 !fxor %f12,%f4,%f4
905 .word 0x8db20d86 !fxor %f8,%f6,%f6
906
907 stda %f0, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
908 add %i1, 8, %i1
909 stda %f2, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
910 add %i1, 8, %i1
911 stda %f4, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
912 add %i1, 8, %i1
913 stda %f6, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
914 bgu,pt SIZE_T_CC, .L128_ctr32_blk_loop2x
915 add %i1, 8, %i1
916
917 add %l5, %i2, %i2
918 andcc %i2, 1, %g0 ! is number of blocks even?
919 membar #StoreLoad|#StoreStore
920 bnz,pt %icc, .L128_ctr32_loop
921 srl %i2, 0, %i2
922 brnz,pn %i2, .L128_ctr32_loop2x
923 nop
924
925 ret
926 restore
927 .type aes128_t4_ctr32_encrypt,#function
928 .size aes128_t4_ctr32_encrypt,.-aes128_t4_ctr32_encrypt
929 .globl aes128_t4_xts_encrypt
930 .align 32
931 aes128_t4_xts_encrypt:
932 save %sp, -STACK_FRAME-16, %sp
933 srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
934
935 mov %i5, %o0
936 add %fp, STACK_BIAS-16, %o1
937 call aes_t4_encrypt
938 mov %i4, %o2
939
940 add %fp, STACK_BIAS-16, %l7
941 ldxa [%l7]0x88, %g2
942 add %fp, STACK_BIAS-8, %l7
943 ldxa [%l7]0x88, %g3 ! %g3:%g2 is tweak
944
945 sethi %hi(0x76543210), %l7
946 or %l7, %lo(0x76543210), %l7
947 .word 0x81b5c320 !bmask %l7,%g0,%g0 ! byte swap mask
948
949 prefetch [%i0], 20
950 prefetch [%i0 + 63], 20
951 call _aes128_load_enckey
952 and %i2, 15, %i5
953 and %i2, -16, %i2
954
955 sub %i0, %i1, %l5 ! %i0!=%i1
956 and %i0, 7, %l0
957 andn %i0, 7, %i0
958 sll %l0, 3, %l0
959 mov 64, %l1
960 mov 0xff, %l3
961 sub %l1, %l0, %l1
962 and %i1, 7, %l2
963 cmp %i2, 255
964 movrnz %l2, 0, %l5 ! if ( %i1&7 ||
965 movleu SIZE_T_CC, 0, %l5 ! %i2<256 ||
966 brnz,pn %l5, .L128_xts_enblk ! %i0==%i1)
967 srl %l3, %l2, %l3
968
969 andcc %i2, 16, %g0 ! is number of blocks even?
970 .word 0xb3b64340 !alignaddrl %i1,%g0,%i1
971 bz %icc, .L128_xts_enloop2x
972 srlx %i2, 4, %i2
973 .L128_xts_enloop:
974 ldx [%i0 + 0], %o0
975 brz,pt %l0, 4f
976 ldx [%i0 + 8], %o1
977
978 ldx [%i0 + 16], %o2
979 sllx %o0, %l0, %o0
980 srlx %o1, %l1, %g1
981 sllx %o1, %l0, %o1
982 or %g1, %o0, %o0
983 srlx %o2, %l1, %o2
984 or %o2, %o1, %o1
985 4:
986 .word 0x99b02302 !movxtod %g2,%f12
987 .word 0x9db02303 !movxtod %g3,%f14
988 .word 0x99b3098c !bshuffle %f12,%f12,%f12
989 .word 0x9db3898e !bshuffle %f14,%f14,%f14
990
991 xor %g4, %o0, %o0 ! ^= rk[0]
992 xor %g5, %o1, %o1
993 .word 0x81b02308 !movxtod %o0,%f0
994 .word 0x85b02309 !movxtod %o1,%f2
995
996 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
997 .word 0x85b38d82 !fxor %f14,%f2,%f2
998
999 prefetch [%i1 + 63], 22
1000 prefetch [%i0 + 16+63], 20
1001 call _aes128_encrypt_1x
1002 add %i0, 16, %i0
1003
1004 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
1005 .word 0x85b38d82 !fxor %f14,%f2,%f2
1006
1007 srax %g3, 63, %l7 ! next tweak value
1008 addcc %g2, %g2, %g2
1009 and %l7, 0x87, %l7
1010 .word 0x87b0c223 !addxc %g3,%g3,%g3
1011 xor %l7, %g2, %g2
1012
1013 brnz,pn %l2, 2f
1014 sub %i2, 1, %i2
1015
1016 std %f0, [%i1 + 0]
1017 std %f2, [%i1 + 8]
1018 brnz,pt %i2, .L128_xts_enloop2x
1019 add %i1, 16, %i1
1020
1021 brnz,pn %i5, .L128_xts_ensteal
1022 nop
1023
1024 ret
1025 restore
1026
1027 .align 16
1028 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
1029 ! and ~3x deterioration
1030 ! in inp==out case
1031 .word 0x89b00900 !faligndata %f0,%f0,%f4 ! handle unaligned output
1032 .word 0x8db00902 !faligndata %f0,%f2,%f6
1033 .word 0x91b08902 !faligndata %f2,%f2,%f8
1034 stda %f4, [%i1 + %l3]0xc0 ! partial store
1035 std %f6, [%i1 + 8]
1036 add %i1, 16, %i1
1037 orn %g0, %l3, %l3
1038 stda %f8, [%i1 + %l3]0xc0 ! partial store
1039
1040 brnz,pt %i2, .L128_xts_enloop2x+4
1041 orn %g0, %l3, %l3
1042
1043 brnz,pn %i5, .L128_xts_ensteal
1044 nop
1045
1046 ret
1047 restore
1048
1049 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1050 .align 32
1051 .L128_xts_enloop2x:
1052 ldx [%i0 + 0], %o0
1053 ldx [%i0 + 8], %o1
1054 ldx [%i0 + 16], %o2
1055 brz,pt %l0, 4f
1056 ldx [%i0 + 24], %o3
1057
1058 ldx [%i0 + 32], %o4
1059 sllx %o0, %l0, %o0
1060 srlx %o1, %l1, %g1
1061 or %g1, %o0, %o0
1062 sllx %o1, %l0, %o1
1063 srlx %o2, %l1, %g1
1064 or %g1, %o1, %o1
1065 sllx %o2, %l0, %o2
1066 srlx %o3, %l1, %g1
1067 or %g1, %o2, %o2
1068 sllx %o3, %l0, %o3
1069 srlx %o4, %l1, %o4
1070 or %o4, %o3, %o3
1071 4:
1072 .word 0x99b02302 !movxtod %g2,%f12
1073 .word 0x9db02303 !movxtod %g3,%f14
1074 .word 0x99b3098c !bshuffle %f12,%f12,%f12
1075 .word 0x9db3898e !bshuffle %f14,%f14,%f14
1076
1077 srax %g3, 63, %l7 ! next tweak value
1078 addcc %g2, %g2, %g2
1079 and %l7, 0x87, %l7
1080 .word 0x87b0c223 !addxc %g3,%g3,%g3
1081 xor %l7, %g2, %g2
1082
1083 .word 0x91b02302 !movxtod %g2,%f8
1084 .word 0x95b02303 !movxtod %g3,%f10
1085 .word 0x91b20988 !bshuffle %f8,%f8,%f8
1086 .word 0x95b2898a !bshuffle %f10,%f10,%f10
1087
1088 xor %g4, %o0, %o0 ! ^= rk[0]
1089 xor %g5, %o1, %o1
1090 xor %g4, %o2, %o2 ! ^= rk[0]
1091 xor %g5, %o3, %o3
1092 .word 0x81b02308 !movxtod %o0,%f0
1093 .word 0x85b02309 !movxtod %o1,%f2
1094 .word 0x89b0230a !movxtod %o2,%f4
1095 .word 0x8db0230b !movxtod %o3,%f6
1096
1097 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
1098 .word 0x85b38d82 !fxor %f14,%f2,%f2
1099 .word 0x89b20d84 !fxor %f8,%f4,%f4 ! ^= tweak[0]
1100 .word 0x8db28d86 !fxor %f10,%f6,%f6
1101
1102 prefetch [%i1 + 63], 22
1103 prefetch [%i0 + 32+63], 20
1104 call _aes128_encrypt_2x
1105 add %i0, 32, %i0
1106
1107 .word 0x91b02302 !movxtod %g2,%f8
1108 .word 0x95b02303 !movxtod %g3,%f10
1109
1110 srax %g3, 63, %l7 ! next tweak value
1111 addcc %g2, %g2, %g2
1112 and %l7, 0x87, %l7
1113 .word 0x87b0c223 !addxc %g3,%g3,%g3
1114 xor %l7, %g2, %g2
1115
1116 .word 0x91b20988 !bshuffle %f8,%f8,%f8
1117 .word 0x95b2898a !bshuffle %f10,%f10,%f10
1118
1119 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
1120 .word 0x85b38d82 !fxor %f14,%f2,%f2
1121 .word 0x89b20d84 !fxor %f8,%f4,%f4
1122 .word 0x8db28d86 !fxor %f10,%f6,%f6
1123
1124 brnz,pn %l2, 2f
1125 sub %i2, 2, %i2
1126
1127 std %f0, [%i1 + 0]
1128 std %f2, [%i1 + 8]
1129 std %f4, [%i1 + 16]
1130 std %f6, [%i1 + 24]
1131 brnz,pt %i2, .L128_xts_enloop2x
1132 add %i1, 32, %i1
1133
1134 .word 0x81b00f04 !fsrc2 %f0,%f4,%f0
1135 .word 0x85b00f06 !fsrc2 %f0,%f6,%f2
1136 brnz,pn %i5, .L128_xts_ensteal
1137 nop
1138
1139 ret
1140 restore
1141
1142 .align 16
1143 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
1144 ! and ~3x deterioration
1145 ! in inp==out case
1146 .word 0x91b00900 !faligndata %f0,%f0,%f8 ! handle unaligned output
1147 .word 0x95b00902 !faligndata %f0,%f2,%f10
1148 .word 0x99b08904 !faligndata %f2,%f4,%f12
1149 .word 0x9db10906 !faligndata %f4,%f6,%f14
1150 .word 0x81b18906 !faligndata %f6,%f6,%f0
1151
1152 stda %f8, [%i1 + %l3]0xc0 ! partial store
1153 std %f10, [%i1 + 8]
1154 std %f12, [%i1 + 16]
1155 std %f14, [%i1 + 24]
1156 add %i1, 32, %i1
1157 orn %g0, %l3, %l3
1158 stda %f0, [%i1 + %l3]0xc0 ! partial store
1159
1160 brnz,pt %i2, .L128_xts_enloop2x+4
1161 orn %g0, %l3, %l3
1162
1163 .word 0x81b00f04 !fsrc2 %f0,%f4,%f0
1164 .word 0x85b00f06 !fsrc2 %f0,%f6,%f2
1165 brnz,pn %i5, .L128_xts_ensteal
1166 nop
1167
1168 ret
1169 restore
1170
1171 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1172 .align 32
1173 .L128_xts_enblk:
1174 add %i1, %i2, %l5
1175 and %l5, 63, %l5 ! tail
1176 sub %i2, %l5, %i2
1177 add %l5, 15, %l5 ! round up to 16n
1178 srlx %i2, 4, %i2
1179 srl %l5, 4, %l5
1180 sub %i2, 1, %i2
1181 add %l5, 1, %l5
1182
1183 .L128_xts_enblk2x:
1184 ldx [%i0 + 0], %o0
1185 ldx [%i0 + 8], %o1
1186 ldx [%i0 + 16], %o2
1187 brz,pt %l0, 5f
1188 ldx [%i0 + 24], %o3
1189
1190 ldx [%i0 + 32], %o4
1191 sllx %o0, %l0, %o0
1192 srlx %o1, %l1, %g1
1193 or %g1, %o0, %o0
1194 sllx %o1, %l0, %o1
1195 srlx %o2, %l1, %g1
1196 or %g1, %o1, %o1
1197 sllx %o2, %l0, %o2
1198 srlx %o3, %l1, %g1
1199 or %g1, %o2, %o2
1200 sllx %o3, %l0, %o3
1201 srlx %o4, %l1, %o4
1202 or %o4, %o3, %o3
1203 5:
1204 .word 0x99b02302 !movxtod %g2,%f12
1205 .word 0x9db02303 !movxtod %g3,%f14
1206 .word 0x99b3098c !bshuffle %f12,%f12,%f12
1207 .word 0x9db3898e !bshuffle %f14,%f14,%f14
1208
1209 srax %g3, 63, %l7 ! next tweak value
1210 addcc %g2, %g2, %g2
1211 and %l7, 0x87, %l7
1212 .word 0x87b0c223 !addxc %g3,%g3,%g3
1213 xor %l7, %g2, %g2
1214
1215 .word 0x91b02302 !movxtod %g2,%f8
1216 .word 0x95b02303 !movxtod %g3,%f10
1217 .word 0x91b20988 !bshuffle %f8,%f8,%f8
1218 .word 0x95b2898a !bshuffle %f10,%f10,%f10
1219
1220 xor %g4, %o0, %o0 ! ^= rk[0]
1221 xor %g5, %o1, %o1
1222 xor %g4, %o2, %o2 ! ^= rk[0]
1223 xor %g5, %o3, %o3
1224 .word 0x81b02308 !movxtod %o0,%f0
1225 .word 0x85b02309 !movxtod %o1,%f2
1226 .word 0x89b0230a !movxtod %o2,%f4
1227 .word 0x8db0230b !movxtod %o3,%f6
1228
1229 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
1230 .word 0x85b38d82 !fxor %f14,%f2,%f2
1231 .word 0x89b20d84 !fxor %f8,%f4,%f4 ! ^= tweak[0]
1232 .word 0x8db28d86 !fxor %f10,%f6,%f6
1233
1234 prefetch [%i0 + 32+63], 20
1235 call _aes128_encrypt_2x
1236 add %i0, 32, %i0
1237
1238 .word 0x91b02302 !movxtod %g2,%f8
1239 .word 0x95b02303 !movxtod %g3,%f10
1240
1241 srax %g3, 63, %l7 ! next tweak value
1242 addcc %g2, %g2, %g2
1243 and %l7, 0x87, %l7
1244 .word 0x87b0c223 !addxc %g3,%g3,%g3
1245 xor %l7, %g2, %g2
1246
1247 .word 0x91b20988 !bshuffle %f8,%f8,%f8
1248 .word 0x95b2898a !bshuffle %f10,%f10,%f10
1249
1250 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
1251 .word 0x85b38d82 !fxor %f14,%f2,%f2
1252 .word 0x89b20d84 !fxor %f8,%f4,%f4
1253 .word 0x8db28d86 !fxor %f10,%f6,%f6
1254
1255 subcc %i2, 2, %i2
1256 stda %f0, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
1257 add %i1, 8, %i1
1258 stda %f2, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
1259 add %i1, 8, %i1
1260 stda %f4, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
1261 add %i1, 8, %i1
1262 stda %f6, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
1263 bgu,pt SIZE_T_CC, .L128_xts_enblk2x
1264 add %i1, 8, %i1
1265
1266 add %l5, %i2, %i2
1267 andcc %i2, 1, %g0 ! is number of blocks even?
1268 membar #StoreLoad|#StoreStore
1269 bnz,pt %icc, .L128_xts_enloop
1270 srl %i2, 0, %i2
1271 brnz,pn %i2, .L128_xts_enloop2x
1272 nop
1273
1274 .word 0x81b00f04 !fsrc2 %f0,%f4,%f0
1275 .word 0x85b00f06 !fsrc2 %f0,%f6,%f2
1276 brnz,pn %i5, .L128_xts_ensteal
1277 nop
1278
1279 ret
1280 restore
1281 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1282 .align 32
1283 .L128_xts_ensteal:
1284 std %f0, [%fp + STACK_BIAS-16] ! copy of output
1285 std %f2, [%fp + STACK_BIAS-8]
1286
1287 srl %l0, 3, %l0
1288 add %fp, STACK_BIAS-16, %l7
1289 add %i0, %l0, %i0 ! original %i0+%i2&-15
1290 add %i1, %l2, %i1 ! original %i1+%i2&-15
1291 mov 0, %l0
1292 nop ! align
1293
1294 .L128_xts_enstealing:
1295 ldub [%i0 + %l0], %o0
1296 ldub [%l7 + %l0], %o1
1297 dec %i5
1298 stb %o0, [%l7 + %l0]
1299 stb %o1, [%i1 + %l0]
1300 brnz %i5, .L128_xts_enstealing
1301 inc %l0
1302
1303 mov %l7, %i0
1304 sub %i1, 16, %i1
1305 mov 0, %l0
1306 sub %i1, %l2, %i1
1307 ba .L128_xts_enloop ! one more time
1308 mov 1, %i2 ! %i5 is 0
1309 ret
1310 restore
1311 .type aes128_t4_xts_encrypt,#function
1312 .size aes128_t4_xts_encrypt,.-aes128_t4_xts_encrypt
1313 .globl aes128_t4_xts_decrypt
1314 .align 32
1315 aes128_t4_xts_decrypt:
1316 save %sp, -STACK_FRAME-16, %sp
1317 srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
1318
1319 mov %i5, %o0
1320 add %fp, STACK_BIAS-16, %o1
1321 call aes_t4_encrypt
1322 mov %i4, %o2
1323
1324 add %fp, STACK_BIAS-16, %l7
1325 ldxa [%l7]0x88, %g2
1326 add %fp, STACK_BIAS-8, %l7
1327 ldxa [%l7]0x88, %g3 ! %g3:%g2 is tweak
1328
1329 sethi %hi(0x76543210), %l7
1330 or %l7, %lo(0x76543210), %l7
1331 .word 0x81b5c320 !bmask %l7,%g0,%g0 ! byte swap mask
1332
1333 prefetch [%i0], 20
1334 prefetch [%i0 + 63], 20
1335 call _aes128_load_deckey
1336 and %i2, 15, %i5
1337 and %i2, -16, %i2
1338 mov 0, %l7
1339 movrnz %i5, 16, %l7
1340 sub %i2, %l7, %i2
1341
1342 sub %i0, %i1, %l5 ! %i0!=%i1
1343 and %i0, 7, %l0
1344 andn %i0, 7, %i0
1345 sll %l0, 3, %l0
1346 mov 64, %l1
1347 mov 0xff, %l3
1348 sub %l1, %l0, %l1
1349 and %i1, 7, %l2
1350 cmp %i2, 255
1351 movrnz %l2, 0, %l5 ! if ( %i1&7 ||
1352 movleu SIZE_T_CC, 0, %l5 ! %i2<256 ||
1353 brnz,pn %l5, .L128_xts_deblk ! %i0==%i1)
1354 srl %l3, %l2, %l3
1355
1356 andcc %i2, 16, %g0 ! is number of blocks even?
1357 brz,pn %i2, .L128_xts_desteal
1358 .word 0xb3b64340 !alignaddrl %i1,%g0,%i1
1359 bz %icc, .L128_xts_deloop2x
1360 srlx %i2, 4, %i2
1361 .L128_xts_deloop:
1362 ldx [%i0 + 0], %o0
1363 brz,pt %l0, 4f
1364 ldx [%i0 + 8], %o1
1365
1366 ldx [%i0 + 16], %o2
1367 sllx %o0, %l0, %o0
1368 srlx %o1, %l1, %g1
1369 sllx %o1, %l0, %o1
1370 or %g1, %o0, %o0
1371 srlx %o2, %l1, %o2
1372 or %o2, %o1, %o1
1373 4:
1374 .word 0x99b02302 !movxtod %g2,%f12
1375 .word 0x9db02303 !movxtod %g3,%f14
1376 .word 0x99b3098c !bshuffle %f12,%f12,%f12
1377 .word 0x9db3898e !bshuffle %f14,%f14,%f14
1378
1379 xor %g4, %o0, %o0 ! ^= rk[0]
1380 xor %g5, %o1, %o1
1381 .word 0x81b02308 !movxtod %o0,%f0
1382 .word 0x85b02309 !movxtod %o1,%f2
1383
1384 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
1385 .word 0x85b38d82 !fxor %f14,%f2,%f2
1386
1387 prefetch [%i1 + 63], 22
1388 prefetch [%i0 + 16+63], 20
1389 call _aes128_decrypt_1x
1390 add %i0, 16, %i0
1391
1392 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
1393 .word 0x85b38d82 !fxor %f14,%f2,%f2
1394
1395 srax %g3, 63, %l7 ! next tweak value
1396 addcc %g2, %g2, %g2
1397 and %l7, 0x87, %l7
1398 .word 0x87b0c223 !addxc %g3,%g3,%g3
1399 xor %l7, %g2, %g2
1400
1401 brnz,pn %l2, 2f
1402 sub %i2, 1, %i2
1403
1404 std %f0, [%i1 + 0]
1405 std %f2, [%i1 + 8]
1406 brnz,pt %i2, .L128_xts_deloop2x
1407 add %i1, 16, %i1
1408
1409 brnz,pn %i5, .L128_xts_desteal
1410 nop
1411
1412 ret
1413 restore
1414
1415 .align 16
1416 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
1417 ! and ~3x deterioration
1418 ! in inp==out case
1419 .word 0x89b00900 !faligndata %f0,%f0,%f4 ! handle unaligned output
1420 .word 0x8db00902 !faligndata %f0,%f2,%f6
1421 .word 0x91b08902 !faligndata %f2,%f2,%f8
1422 stda %f4, [%i1 + %l3]0xc0 ! partial store
1423 std %f6, [%i1 + 8]
1424 add %i1, 16, %i1
1425 orn %g0, %l3, %l3
1426 stda %f8, [%i1 + %l3]0xc0 ! partial store
1427
1428 brnz,pt %i2, .L128_xts_deloop2x+4
1429 orn %g0, %l3, %l3
1430
1431 brnz,pn %i5, .L128_xts_desteal
1432 nop
1433
1434 ret
1435 restore
1436
1437 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1438 .align 32
1439 .L128_xts_deloop2x:
1440 ldx [%i0 + 0], %o0
1441 ldx [%i0 + 8], %o1
1442 ldx [%i0 + 16], %o2
1443 brz,pt %l0, 4f
1444 ldx [%i0 + 24], %o3
1445
1446 ldx [%i0 + 32], %o4
1447 sllx %o0, %l0, %o0
1448 srlx %o1, %l1, %g1
1449 or %g1, %o0, %o0
1450 sllx %o1, %l0, %o1
1451 srlx %o2, %l1, %g1
1452 or %g1, %o1, %o1
1453 sllx %o2, %l0, %o2
1454 srlx %o3, %l1, %g1
1455 or %g1, %o2, %o2
1456 sllx %o3, %l0, %o3
1457 srlx %o4, %l1, %o4
1458 or %o4, %o3, %o3
1459 4:
1460 .word 0x99b02302 !movxtod %g2,%f12
1461 .word 0x9db02303 !movxtod %g3,%f14
1462 .word 0x99b3098c !bshuffle %f12,%f12,%f12
1463 .word 0x9db3898e !bshuffle %f14,%f14,%f14
1464
1465 srax %g3, 63, %l7 ! next tweak value
1466 addcc %g2, %g2, %g2
1467 and %l7, 0x87, %l7
1468 .word 0x87b0c223 !addxc %g3,%g3,%g3
1469 xor %l7, %g2, %g2
1470
1471 .word 0x91b02302 !movxtod %g2,%f8
1472 .word 0x95b02303 !movxtod %g3,%f10
1473 .word 0x91b20988 !bshuffle %f8,%f8,%f8
1474 .word 0x95b2898a !bshuffle %f10,%f10,%f10
1475
1476 xor %g4, %o0, %o0 ! ^= rk[0]
1477 xor %g5, %o1, %o1
1478 xor %g4, %o2, %o2 ! ^= rk[0]
1479 xor %g5, %o3, %o3
1480 .word 0x81b02308 !movxtod %o0,%f0
1481 .word 0x85b02309 !movxtod %o1,%f2
1482 .word 0x89b0230a !movxtod %o2,%f4
1483 .word 0x8db0230b !movxtod %o3,%f6
1484
1485 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
1486 .word 0x85b38d82 !fxor %f14,%f2,%f2
1487 .word 0x89b20d84 !fxor %f8,%f4,%f4 ! ^= tweak[0]
1488 .word 0x8db28d86 !fxor %f10,%f6,%f6
1489
1490 prefetch [%i1 + 63], 22
1491 prefetch [%i0 + 32+63], 20
1492 call _aes128_decrypt_2x
1493 add %i0, 32, %i0
1494
1495 .word 0x91b02302 !movxtod %g2,%f8
1496 .word 0x95b02303 !movxtod %g3,%f10
1497
1498 srax %g3, 63, %l7 ! next tweak value
1499 addcc %g2, %g2, %g2
1500 and %l7, 0x87, %l7
1501 .word 0x87b0c223 !addxc %g3,%g3,%g3
1502 xor %l7, %g2, %g2
1503
1504 .word 0x91b20988 !bshuffle %f8,%f8,%f8
1505 .word 0x95b2898a !bshuffle %f10,%f10,%f10
1506
1507 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
1508 .word 0x85b38d82 !fxor %f14,%f2,%f2
1509 .word 0x89b20d84 !fxor %f8,%f4,%f4
1510 .word 0x8db28d86 !fxor %f10,%f6,%f6
1511
1512 brnz,pn %l2, 2f
1513 sub %i2, 2, %i2
1514
1515 std %f0, [%i1 + 0]
1516 std %f2, [%i1 + 8]
1517 std %f4, [%i1 + 16]
1518 std %f6, [%i1 + 24]
1519 brnz,pt %i2, .L128_xts_deloop2x
1520 add %i1, 32, %i1
1521
1522 .word 0x81b00f04 !fsrc2 %f0,%f4,%f0
1523 .word 0x85b00f06 !fsrc2 %f0,%f6,%f2
1524 brnz,pn %i5, .L128_xts_desteal
1525 nop
1526
1527 ret
1528 restore
1529
1530 .align 16
1531 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
1532 ! and ~3x deterioration
1533 ! in inp==out case
1534 .word 0x91b00900 !faligndata %f0,%f0,%f8 ! handle unaligned output
1535 .word 0x95b00902 !faligndata %f0,%f2,%f10
1536 .word 0x99b08904 !faligndata %f2,%f4,%f12
1537 .word 0x9db10906 !faligndata %f4,%f6,%f14
1538 .word 0x81b18906 !faligndata %f6,%f6,%f0
1539
1540 stda %f8, [%i1 + %l3]0xc0 ! partial store
1541 std %f10, [%i1 + 8]
1542 std %f12, [%i1 + 16]
1543 std %f14, [%i1 + 24]
1544 add %i1, 32, %i1
1545 orn %g0, %l3, %l3
1546 stda %f0, [%i1 + %l3]0xc0 ! partial store
1547
1548 brnz,pt %i2, .L128_xts_deloop2x+4
1549 orn %g0, %l3, %l3
1550
1551 .word 0x81b00f04 !fsrc2 %f0,%f4,%f0
1552 .word 0x85b00f06 !fsrc2 %f0,%f6,%f2
1553 brnz,pn %i5, .L128_xts_desteal
1554 nop
1555
1556 ret
1557 restore
1558
1559 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1560 .align 32
1561 .L128_xts_deblk:
1562 add %i1, %i2, %l5
1563 and %l5, 63, %l5 ! tail
1564 sub %i2, %l5, %i2
1565 add %l5, 15, %l5 ! round up to 16n
1566 srlx %i2, 4, %i2
1567 srl %l5, 4, %l5
1568 sub %i2, 1, %i2
1569 add %l5, 1, %l5
1570
1571 .L128_xts_deblk2x:
1572 ldx [%i0 + 0], %o0
1573 ldx [%i0 + 8], %o1
1574 ldx [%i0 + 16], %o2
1575 brz,pt %l0, 5f
1576 ldx [%i0 + 24], %o3
1577
1578 ldx [%i0 + 32], %o4
1579 sllx %o0, %l0, %o0
1580 srlx %o1, %l1, %g1
1581 or %g1, %o0, %o0
1582 sllx %o1, %l0, %o1
1583 srlx %o2, %l1, %g1
1584 or %g1, %o1, %o1
1585 sllx %o2, %l0, %o2
1586 srlx %o3, %l1, %g1
1587 or %g1, %o2, %o2
1588 sllx %o3, %l0, %o3
1589 srlx %o4, %l1, %o4
1590 or %o4, %o3, %o3
1591 5:
1592 .word 0x99b02302 !movxtod %g2,%f12
1593 .word 0x9db02303 !movxtod %g3,%f14
1594 .word 0x99b3098c !bshuffle %f12,%f12,%f12
1595 .word 0x9db3898e !bshuffle %f14,%f14,%f14
1596
1597 srax %g3, 63, %l7 ! next tweak value
1598 addcc %g2, %g2, %g2
1599 and %l7, 0x87, %l7
1600 .word 0x87b0c223 !addxc %g3,%g3,%g3
1601 xor %l7, %g2, %g2
1602
1603 .word 0x91b02302 !movxtod %g2,%f8
1604 .word 0x95b02303 !movxtod %g3,%f10
1605 .word 0x91b20988 !bshuffle %f8,%f8,%f8
1606 .word 0x95b2898a !bshuffle %f10,%f10,%f10
1607
1608 xor %g4, %o0, %o0 ! ^= rk[0]
1609 xor %g5, %o1, %o1
1610 xor %g4, %o2, %o2 ! ^= rk[0]
1611 xor %g5, %o3, %o3
1612 .word 0x81b02308 !movxtod %o0,%f0
1613 .word 0x85b02309 !movxtod %o1,%f2
1614 .word 0x89b0230a !movxtod %o2,%f4
1615 .word 0x8db0230b !movxtod %o3,%f6
1616
1617 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
1618 .word 0x85b38d82 !fxor %f14,%f2,%f2
1619 .word 0x89b20d84 !fxor %f8,%f4,%f4 ! ^= tweak[0]
1620 .word 0x8db28d86 !fxor %f10,%f6,%f6
1621
1622 prefetch [%i0 + 32+63], 20
1623 call _aes128_decrypt_2x
1624 add %i0, 32, %i0
1625
1626 .word 0x91b02302 !movxtod %g2,%f8
1627 .word 0x95b02303 !movxtod %g3,%f10
1628
1629 srax %g3, 63, %l7 ! next tweak value
1630 addcc %g2, %g2, %g2
1631 and %l7, 0x87, %l7
1632 .word 0x87b0c223 !addxc %g3,%g3,%g3
1633 xor %l7, %g2, %g2
1634
1635 .word 0x91b20988 !bshuffle %f8,%f8,%f8
1636 .word 0x95b2898a !bshuffle %f10,%f10,%f10
1637
1638 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
1639 .word 0x85b38d82 !fxor %f14,%f2,%f2
1640 .word 0x89b20d84 !fxor %f8,%f4,%f4
1641 .word 0x8db28d86 !fxor %f10,%f6,%f6
1642
1643 subcc %i2, 2, %i2
1644 stda %f0, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
1645 add %i1, 8, %i1
1646 stda %f2, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
1647 add %i1, 8, %i1
1648 stda %f4, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
1649 add %i1, 8, %i1
1650 stda %f6, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
1651 bgu,pt SIZE_T_CC, .L128_xts_deblk2x
1652 add %i1, 8, %i1
1653
1654 add %l5, %i2, %i2
1655 andcc %i2, 1, %g0 ! is number of blocks even?
1656 membar #StoreLoad|#StoreStore
1657 bnz,pt %icc, .L128_xts_deloop
1658 srl %i2, 0, %i2
1659 brnz,pn %i2, .L128_xts_deloop2x
1660 nop
1661
1662 .word 0x81b00f04 !fsrc2 %f0,%f4,%f0
1663 .word 0x85b00f06 !fsrc2 %f0,%f6,%f2
1664 brnz,pn %i5, .L128_xts_desteal
1665 nop
1666
1667 ret
1668 restore
1669 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1670 .align 32
1671 .L128_xts_desteal:
1672 ldx [%i0 + 0], %o0
1673 brz,pt %l0, 8f
1674 ldx [%i0 + 8], %o1
1675
1676 ldx [%i0 + 16], %o2
1677 sllx %o0, %l0, %o0
1678 srlx %o1, %l1, %g1
1679 sllx %o1, %l0, %o1
1680 or %g1, %o0, %o0
1681 srlx %o2, %l1, %o2
1682 or %o2, %o1, %o1
1683 8:
1684 srax %g3, 63, %l7 ! next tweak value
1685 addcc %g2, %g2, %o2
1686 and %l7, 0x87, %l7
1687 .word 0x97b0c223 !addxc %g3,%g3,%o3
1688 xor %l7, %o2, %o2
1689
1690 .word 0x99b0230a !movxtod %o2,%f12
1691 .word 0x9db0230b !movxtod %o3,%f14
1692 .word 0x99b3098c !bshuffle %f12,%f12,%f12
1693 .word 0x9db3898e !bshuffle %f14,%f14,%f14
1694
1695 xor %g4, %o0, %o0 ! ^= rk[0]
1696 xor %g5, %o1, %o1
1697 .word 0x81b02308 !movxtod %o0,%f0
1698 .word 0x85b02309 !movxtod %o1,%f2
1699
1700 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
1701 .word 0x85b38d82 !fxor %f14,%f2,%f2
1702
1703 call _aes128_decrypt_1x
1704 add %i0, 16, %i0
1705
1706 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
1707 .word 0x85b38d82 !fxor %f14,%f2,%f2
1708
1709 std %f0, [%fp + STACK_BIAS-16]
1710 std %f2, [%fp + STACK_BIAS-8]
1711
1712 srl %l0, 3, %l0
1713 add %fp, STACK_BIAS-16, %l7
1714 add %i0, %l0, %i0 ! original %i0+%i2&-15
1715 add %i1, %l2, %i1 ! original %i1+%i2&-15
1716 mov 0, %l0
1717 add %i1, 16, %i1
1718 nop ! align
1719
1720 .L128_xts_destealing:
1721 ldub [%i0 + %l0], %o0
1722 ldub [%l7 + %l0], %o1
1723 dec %i5
1724 stb %o0, [%l7 + %l0]
1725 stb %o1, [%i1 + %l0]
1726 brnz %i5, .L128_xts_destealing
1727 inc %l0
1728
1729 mov %l7, %i0
1730 sub %i1, 16, %i1
1731 mov 0, %l0
1732 sub %i1, %l2, %i1
1733 ba .L128_xts_deloop ! one more time
1734 mov 1, %i2 ! %i5 is 0
1735 ret
1736 restore
1737 .type aes128_t4_xts_decrypt,#function
1738 .size aes128_t4_xts_decrypt,.-aes128_t4_xts_decrypt
1739 .globl aes128_t4_cbc_decrypt
1740 .align 32
1741 aes128_t4_cbc_decrypt:
1742 save %sp, -STACK_FRAME, %sp
1743 cmp %i2, 0
1744 be,pn SIZE_T_CC, .L128_cbc_dec_abort
1745 srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
1746 sub %i0, %i1, %l5 ! %i0!=%i1
1747 ld [%i4 + 0], %f12 ! load ivec
1748 ld [%i4 + 4], %f13
1749 ld [%i4 + 8], %f14
1750 ld [%i4 + 12], %f15
1751 prefetch [%i0], 20
1752 prefetch [%i0 + 63], 20
1753 call _aes128_load_deckey
1754 and %i0, 7, %l0
1755 andn %i0, 7, %i0
1756 sll %l0, 3, %l0
1757 mov 64, %l1
1758 mov 0xff, %l3
1759 sub %l1, %l0, %l1
1760 and %i1, 7, %l2
1761 cmp %i2, 255
1762 movrnz %l2, 0, %l5 ! if ( %i1&7 ||
1763 movleu SIZE_T_CC, 0, %l5 ! %i2<256 ||
1764 brnz,pn %l5, .L128cbc_dec_blk ! %i0==%i1)
1765 srl %l3, %l2, %l3
1766
1767 andcc %i2, 16, %g0 ! is number of blocks even?
1768 srlx %i2, 4, %i2
1769 .word 0xb3b64340 !alignaddrl %i1,%g0,%i1
1770 bz %icc, .L128_cbc_dec_loop2x
1771 prefetch [%i1], 22
1772 .L128_cbc_dec_loop:
1773 ldx [%i0 + 0], %o0
1774 brz,pt %l0, 4f
1775 ldx [%i0 + 8], %o1
1776
1777 ldx [%i0 + 16], %o2
1778 sllx %o0, %l0, %o0
1779 srlx %o1, %l1, %g1
1780 sllx %o1, %l0, %o1
1781 or %g1, %o0, %o0
1782 srlx %o2, %l1, %o2
1783 or %o2, %o1, %o1
1784 4:
1785 xor %g4, %o0, %o2 ! ^= rk[0]
1786 xor %g5, %o1, %o3
1787 .word 0x81b0230a !movxtod %o2,%f0
1788 .word 0x85b0230b !movxtod %o3,%f2
1789
1790 prefetch [%i1 + 63], 22
1791 prefetch [%i0 + 16+63], 20
1792 call _aes128_decrypt_1x
1793 add %i0, 16, %i0
1794
1795 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= ivec
1796 .word 0x85b38d82 !fxor %f14,%f2,%f2
1797 .word 0x99b02308 !movxtod %o0,%f12
1798 .word 0x9db02309 !movxtod %o1,%f14
1799
1800 brnz,pn %l2, 2f
1801 sub %i2, 1, %i2
1802
1803 std %f0, [%i1 + 0]
1804 std %f2, [%i1 + 8]
1805 brnz,pt %i2, .L128_cbc_dec_loop2x
1806 add %i1, 16, %i1
1807 st %f12, [%i4 + 0]
1808 st %f13, [%i4 + 4]
1809 st %f14, [%i4 + 8]
1810 st %f15, [%i4 + 12]
1811 .L128_cbc_dec_abort:
1812 ret
1813 restore
1814
1815 .align 16
1816 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
1817 ! and ~3x deterioration
1818 ! in inp==out case
1819 .word 0x89b00900 !faligndata %f0,%f0,%f4 ! handle unaligned output
1820 .word 0x8db00902 !faligndata %f0,%f2,%f6
1821 .word 0x91b08902 !faligndata %f2,%f2,%f8
1822
1823 stda %f4, [%i1 + %l3]0xc0 ! partial store
1824 std %f6, [%i1 + 8]
1825 add %i1, 16, %i1
1826 orn %g0, %l3, %l3
1827 stda %f8, [%i1 + %l3]0xc0 ! partial store
1828
1829 brnz,pt %i2, .L128_cbc_dec_loop2x+4
1830 orn %g0, %l3, %l3
1831 st %f12, [%i4 + 0]
1832 st %f13, [%i4 + 4]
1833 st %f14, [%i4 + 8]
1834 st %f15, [%i4 + 12]
1835 ret
1836 restore
1837
1838 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1839 .align 32
1840 .L128_cbc_dec_loop2x:
1841 ldx [%i0 + 0], %o0
1842 ldx [%i0 + 8], %o1
1843 ldx [%i0 + 16], %o2
1844 brz,pt %l0, 4f
1845 ldx [%i0 + 24], %o3
1846
1847 ldx [%i0 + 32], %o4
1848 sllx %o0, %l0, %o0
1849 srlx %o1, %l1, %g1
1850 or %g1, %o0, %o0
1851 sllx %o1, %l0, %o1
1852 srlx %o2, %l1, %g1
1853 or %g1, %o1, %o1
1854 sllx %o2, %l0, %o2
1855 srlx %o3, %l1, %g1
1856 or %g1, %o2, %o2
1857 sllx %o3, %l0, %o3
1858 srlx %o4, %l1, %o4
1859 or %o4, %o3, %o3
1860 4:
1861 xor %g4, %o0, %o4 ! ^= rk[0]
1862 xor %g5, %o1, %o5
1863 .word 0x81b0230c !movxtod %o4,%f0
1864 .word 0x85b0230d !movxtod %o5,%f2
1865 xor %g4, %o2, %o4
1866 xor %g5, %o3, %o5
1867 .word 0x89b0230c !movxtod %o4,%f4
1868 .word 0x8db0230d !movxtod %o5,%f6
1869
1870 prefetch [%i1 + 63], 22
1871 prefetch [%i0 + 32+63], 20
1872 call _aes128_decrypt_2x
1873 add %i0, 32, %i0
1874
1875 .word 0x91b02308 !movxtod %o0,%f8
1876 .word 0x95b02309 !movxtod %o1,%f10
1877 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= ivec
1878 .word 0x85b38d82 !fxor %f14,%f2,%f2
1879 .word 0x99b0230a !movxtod %o2,%f12
1880 .word 0x9db0230b !movxtod %o3,%f14
1881 .word 0x89b20d84 !fxor %f8,%f4,%f4
1882 .word 0x8db28d86 !fxor %f10,%f6,%f6
1883
1884 brnz,pn %l2, 2f
1885 sub %i2, 2, %i2
1886
1887 std %f0, [%i1 + 0]
1888 std %f2, [%i1 + 8]
1889 std %f4, [%i1 + 16]
1890 std %f6, [%i1 + 24]
1891 brnz,pt %i2, .L128_cbc_dec_loop2x
1892 add %i1, 32, %i1
1893 st %f12, [%i4 + 0]
1894 st %f13, [%i4 + 4]
1895 st %f14, [%i4 + 8]
1896 st %f15, [%i4 + 12]
1897 ret
1898 restore
1899
1900 .align 16
1901 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
1902 ! and ~3x deterioration
1903 ! in inp==out case
1904 .word 0x91b00900 !faligndata %f0,%f0,%f8 ! handle unaligned output
1905 .word 0x81b00902 !faligndata %f0,%f2,%f0
1906 .word 0x85b08904 !faligndata %f2,%f4,%f2
1907 .word 0x89b10906 !faligndata %f4,%f6,%f4
1908 .word 0x8db18906 !faligndata %f6,%f6,%f6
1909 stda %f8, [%i1 + %l3]0xc0 ! partial store
1910 std %f0, [%i1 + 8]
1911 std %f2, [%i1 + 16]
1912 std %f4, [%i1 + 24]
1913 add %i1, 32, %i1
1914 orn %g0, %l3, %l3
1915 stda %f6, [%i1 + %l3]0xc0 ! partial store
1916
1917 brnz,pt %i2, .L128_cbc_dec_loop2x+4
1918 orn %g0, %l3, %l3
1919 st %f12, [%i4 + 0]
1920 st %f13, [%i4 + 4]
1921 st %f14, [%i4 + 8]
1922 st %f15, [%i4 + 12]
1923 ret
1924 restore
1925
1926 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1927 .align 32
1928 .L128cbc_dec_blk:
1929 add %i1, %i2, %l5
1930 and %l5, 63, %l5 ! tail
1931 sub %i2, %l5, %i2
1932 add %l5, 15, %l5 ! round up to 16n
1933 srlx %i2, 4, %i2
1934 srl %l5, 4, %l5
1935 sub %i2, 1, %i2
1936 add %l5, 1, %l5
1937
1938 .L128_cbc_dec_blk_loop2x:
1939 ldx [%i0 + 0], %o0
1940 ldx [%i0 + 8], %o1
1941 ldx [%i0 + 16], %o2
1942 brz,pt %l0, 5f
1943 ldx [%i0 + 24], %o3
1944
1945 ldx [%i0 + 32], %o4
1946 sllx %o0, %l0, %o0
1947 srlx %o1, %l1, %g1
1948 or %g1, %o0, %o0
1949 sllx %o1, %l0, %o1
1950 srlx %o2, %l1, %g1
1951 or %g1, %o1, %o1
1952 sllx %o2, %l0, %o2
1953 srlx %o3, %l1, %g1
1954 or %g1, %o2, %o2
1955 sllx %o3, %l0, %o3
1956 srlx %o4, %l1, %o4
1957 or %o4, %o3, %o3
1958 5:
1959 xor %g4, %o0, %o4 ! ^= rk[0]
1960 xor %g5, %o1, %o5
1961 .word 0x81b0230c !movxtod %o4,%f0
1962 .word 0x85b0230d !movxtod %o5,%f2
1963 xor %g4, %o2, %o4
1964 xor %g5, %o3, %o5
1965 .word 0x89b0230c !movxtod %o4,%f4
1966 .word 0x8db0230d !movxtod %o5,%f6
1967
1968 prefetch [%i0 + 32+63], 20
1969 call _aes128_decrypt_2x
1970 add %i0, 32, %i0
1971 subcc %i2, 2, %i2
1972
1973 .word 0x91b02308 !movxtod %o0,%f8
1974 .word 0x95b02309 !movxtod %o1,%f10
1975 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= ivec
1976 .word 0x85b38d82 !fxor %f14,%f2,%f2
1977 .word 0x99b0230a !movxtod %o2,%f12
1978 .word 0x9db0230b !movxtod %o3,%f14
1979 .word 0x89b20d84 !fxor %f8,%f4,%f4
1980 .word 0x8db28d86 !fxor %f10,%f6,%f6
1981
1982 stda %f0, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
1983 add %i1, 8, %i1
1984 stda %f2, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
1985 add %i1, 8, %i1
1986 stda %f4, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
1987 add %i1, 8, %i1
1988 stda %f6, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
1989 bgu,pt SIZE_T_CC, .L128_cbc_dec_blk_loop2x
1990 add %i1, 8, %i1
1991
1992 add %l5, %i2, %i2
1993 andcc %i2, 1, %g0 ! is number of blocks even?
1994 membar #StoreLoad|#StoreStore
1995 bnz,pt %icc, .L128_cbc_dec_loop
1996 srl %i2, 0, %i2
1997 brnz,pn %i2, .L128_cbc_dec_loop2x
1998 nop
1999 st %f12, [%i4 + 0] ! write out ivec
2000 st %f13, [%i4 + 4]
2001 st %f14, [%i4 + 8]
2002 st %f15, [%i4 + 12]
2003 ret
2004 restore
2005 .type aes128_t4_cbc_decrypt,#function
2006 .size aes128_t4_cbc_decrypt,.-aes128_t4_cbc_decrypt
2007 .align 32
2008 _aes128_decrypt_1x:
2009 .word 0x88cc0440 !aes_dround01 %f16,%f0,%f2,%f4
2010 .word 0x84cc8460 !aes_dround23 %f18,%f0,%f2,%f2
2011 .word 0x80cd0444 !aes_dround01 %f20,%f4,%f2,%f0
2012 .word 0x84cd8464 !aes_dround23 %f22,%f4,%f2,%f2
2013 .word 0x88ce0440 !aes_dround01 %f24,%f0,%f2,%f4
2014 .word 0x84ce8460 !aes_dround23 %f26,%f0,%f2,%f2
2015 .word 0x80cf0444 !aes_dround01 %f28,%f4,%f2,%f0
2016 .word 0x84cf8464 !aes_dround23 %f30,%f4,%f2,%f2
2017 .word 0x88c84440 !aes_dround01 %f32,%f0,%f2,%f4
2018 .word 0x84c8c460 !aes_dround23 %f34,%f0,%f2,%f2
2019 .word 0x80c94444 !aes_dround01 %f36,%f4,%f2,%f0
2020 .word 0x84c9c464 !aes_dround23 %f38,%f4,%f2,%f2
2021 .word 0x88ca4440 !aes_dround01 %f40,%f0,%f2,%f4
2022 .word 0x84cac460 !aes_dround23 %f42,%f0,%f2,%f2
2023 .word 0x80cb4444 !aes_dround01 %f44,%f4,%f2,%f0
2024 .word 0x84cbc464 !aes_dround23 %f46,%f4,%f2,%f2
2025 .word 0x88cc4440 !aes_dround01 %f48,%f0,%f2,%f4
2026 .word 0x84ccc460 !aes_dround23 %f50,%f0,%f2,%f2
2027 .word 0x80cd44c4 !aes_dround01_l %f52,%f4,%f2,%f0
2028 retl
2029 .word 0x84cdc4e4 !aes_dround23_l %f54,%f4,%f2,%f2
2030 .type _aes128_decrypt_1x,#function
2031 .size _aes128_decrypt_1x,.-_aes128_decrypt_1x
2032
2033 .align 32
2034 _aes128_decrypt_2x:
2035 .word 0x90cc0440 !aes_dround01 %f16,%f0,%f2,%f8
2036 .word 0x84cc8460 !aes_dround23 %f18,%f0,%f2,%f2
2037 .word 0x94cc0c44 !aes_dround01 %f16,%f4,%f6,%f10
2038 .word 0x8ccc8c64 !aes_dround23 %f18,%f4,%f6,%f6
2039 .word 0x80cd0448 !aes_dround01 %f20,%f8,%f2,%f0
2040 .word 0x84cd8468 !aes_dround23 %f22,%f8,%f2,%f2
2041 .word 0x88cd0c4a !aes_dround01 %f20,%f10,%f6,%f4
2042 .word 0x8ccd8c6a !aes_dround23 %f22,%f10,%f6,%f6
2043 .word 0x90ce0440 !aes_dround01 %f24,%f0,%f2,%f8
2044 .word 0x84ce8460 !aes_dround23 %f26,%f0,%f2,%f2
2045 .word 0x94ce0c44 !aes_dround01 %f24,%f4,%f6,%f10
2046 .word 0x8cce8c64 !aes_dround23 %f26,%f4,%f6,%f6
2047 .word 0x80cf0448 !aes_dround01 %f28,%f8,%f2,%f0
2048 .word 0x84cf8468 !aes_dround23 %f30,%f8,%f2,%f2
2049 .word 0x88cf0c4a !aes_dround01 %f28,%f10,%f6,%f4
2050 .word 0x8ccf8c6a !aes_dround23 %f30,%f10,%f6,%f6
2051 .word 0x90c84440 !aes_dround01 %f32,%f0,%f2,%f8
2052 .word 0x84c8c460 !aes_dround23 %f34,%f0,%f2,%f2
2053 .word 0x94c84c44 !aes_dround01 %f32,%f4,%f6,%f10
2054 .word 0x8cc8cc64 !aes_dround23 %f34,%f4,%f6,%f6
2055 .word 0x80c94448 !aes_dround01 %f36,%f8,%f2,%f0
2056 .word 0x84c9c468 !aes_dround23 %f38,%f8,%f2,%f2
2057 .word 0x88c94c4a !aes_dround01 %f36,%f10,%f6,%f4
2058 .word 0x8cc9cc6a !aes_dround23 %f38,%f10,%f6,%f6
2059 .word 0x90ca4440 !aes_dround01 %f40,%f0,%f2,%f8
2060 .word 0x84cac460 !aes_dround23 %f42,%f0,%f2,%f2
2061 .word 0x94ca4c44 !aes_dround01 %f40,%f4,%f6,%f10
2062 .word 0x8ccacc64 !aes_dround23 %f42,%f4,%f6,%f6
2063 .word 0x80cb4448 !aes_dround01 %f44,%f8,%f2,%f0
2064 .word 0x84cbc468 !aes_dround23 %f46,%f8,%f2,%f2
2065 .word 0x88cb4c4a !aes_dround01 %f44,%f10,%f6,%f4
2066 .word 0x8ccbcc6a !aes_dround23 %f46,%f10,%f6,%f6
2067 .word 0x90cc4440 !aes_dround01 %f48,%f0,%f2,%f8
2068 .word 0x84ccc460 !aes_dround23 %f50,%f0,%f2,%f2
2069 .word 0x94cc4c44 !aes_dround01 %f48,%f4,%f6,%f10
2070 .word 0x8ccccc64 !aes_dround23 %f50,%f4,%f6,%f6
2071 .word 0x80cd44c8 !aes_dround01_l %f52,%f8,%f2,%f0
2072 .word 0x84cdc4e8 !aes_dround23_l %f54,%f8,%f2,%f2
2073 .word 0x88cd4cca !aes_dround01_l %f52,%f10,%f6,%f4
2074 retl
2075 .word 0x8ccdccea !aes_dround23_l %f54,%f10,%f6,%f6
2076 .type _aes128_decrypt_2x,#function
2077 .size _aes128_decrypt_2x,.-_aes128_decrypt_2x
2078 .align 32
2079 _aes192_encrypt_1x:
2080 .word 0x88cc0400 !aes_eround01 %f16,%f0,%f2,%f4
2081 .word 0x84cc8420 !aes_eround23 %f18,%f0,%f2,%f2
2082 .word 0x80cd0404 !aes_eround01 %f20,%f4,%f2,%f0
2083 .word 0x84cd8424 !aes_eround23 %f22,%f4,%f2,%f2
2084 .word 0x88ce0400 !aes_eround01 %f24,%f0,%f2,%f4
2085 .word 0x84ce8420 !aes_eround23 %f26,%f0,%f2,%f2
2086 .word 0x80cf0404 !aes_eround01 %f28,%f4,%f2,%f0
2087 .word 0x84cf8424 !aes_eround23 %f30,%f4,%f2,%f2
2088 .word 0x88c84400 !aes_eround01 %f32,%f0,%f2,%f4
2089 .word 0x84c8c420 !aes_eround23 %f34,%f0,%f2,%f2
2090 .word 0x80c94404 !aes_eround01 %f36,%f4,%f2,%f0
2091 .word 0x84c9c424 !aes_eround23 %f38,%f4,%f2,%f2
2092 .word 0x88ca4400 !aes_eround01 %f40,%f0,%f2,%f4
2093 .word 0x84cac420 !aes_eround23 %f42,%f0,%f2,%f2
2094 .word 0x80cb4404 !aes_eround01 %f44,%f4,%f2,%f0
2095 .word 0x84cbc424 !aes_eround23 %f46,%f4,%f2,%f2
2096 .word 0x88cc4400 !aes_eround01 %f48,%f0,%f2,%f4
2097 .word 0x84ccc420 !aes_eround23 %f50,%f0,%f2,%f2
2098 .word 0x80cd4404 !aes_eround01 %f52,%f4,%f2,%f0
2099 .word 0x84cdc424 !aes_eround23 %f54,%f4,%f2,%f2
2100 .word 0x88ce4400 !aes_eround01 %f56,%f0,%f2,%f4
2101 .word 0x84cec420 !aes_eround23 %f58,%f0,%f2,%f2
2102 .word 0x80cf4484 !aes_eround01_l %f60,%f4,%f2,%f0
2103 retl
2104 .word 0x84cfc4a4 !aes_eround23_l %f62,%f4,%f2,%f2
2105 .type _aes192_encrypt_1x,#function
2106 .size _aes192_encrypt_1x,.-_aes192_encrypt_1x
2107
2108 .align 32
2109 _aes192_encrypt_2x:
2110 .word 0x90cc0400 !aes_eround01 %f16,%f0,%f2,%f8
2111 .word 0x84cc8420 !aes_eround23 %f18,%f0,%f2,%f2
2112 .word 0x94cc0c04 !aes_eround01 %f16,%f4,%f6,%f10
2113 .word 0x8ccc8c24 !aes_eround23 %f18,%f4,%f6,%f6
2114 .word 0x80cd0408 !aes_eround01 %f20,%f8,%f2,%f0
2115 .word 0x84cd8428 !aes_eround23 %f22,%f8,%f2,%f2
2116 .word 0x88cd0c0a !aes_eround01 %f20,%f10,%f6,%f4
2117 .word 0x8ccd8c2a !aes_eround23 %f22,%f10,%f6,%f6
2118 .word 0x90ce0400 !aes_eround01 %f24,%f0,%f2,%f8
2119 .word 0x84ce8420 !aes_eround23 %f26,%f0,%f2,%f2
2120 .word 0x94ce0c04 !aes_eround01 %f24,%f4,%f6,%f10
2121 .word 0x8cce8c24 !aes_eround23 %f26,%f4,%f6,%f6
2122 .word 0x80cf0408 !aes_eround01 %f28,%f8,%f2,%f0
2123 .word 0x84cf8428 !aes_eround23 %f30,%f8,%f2,%f2
2124 .word 0x88cf0c0a !aes_eround01 %f28,%f10,%f6,%f4
2125 .word 0x8ccf8c2a !aes_eround23 %f30,%f10,%f6,%f6
2126 .word 0x90c84400 !aes_eround01 %f32,%f0,%f2,%f8
2127 .word 0x84c8c420 !aes_eround23 %f34,%f0,%f2,%f2
2128 .word 0x94c84c04 !aes_eround01 %f32,%f4,%f6,%f10
2129 .word 0x8cc8cc24 !aes_eround23 %f34,%f4,%f6,%f6
2130 .word 0x80c94408 !aes_eround01 %f36,%f8,%f2,%f0
2131 .word 0x84c9c428 !aes_eround23 %f38,%f8,%f2,%f2
2132 .word 0x88c94c0a !aes_eround01 %f36,%f10,%f6,%f4
2133 .word 0x8cc9cc2a !aes_eround23 %f38,%f10,%f6,%f6
2134 .word 0x90ca4400 !aes_eround01 %f40,%f0,%f2,%f8
2135 .word 0x84cac420 !aes_eround23 %f42,%f0,%f2,%f2
2136 .word 0x94ca4c04 !aes_eround01 %f40,%f4,%f6,%f10
2137 .word 0x8ccacc24 !aes_eround23 %f42,%f4,%f6,%f6
2138 .word 0x80cb4408 !aes_eround01 %f44,%f8,%f2,%f0
2139 .word 0x84cbc428 !aes_eround23 %f46,%f8,%f2,%f2
2140 .word 0x88cb4c0a !aes_eround01 %f44,%f10,%f6,%f4
2141 .word 0x8ccbcc2a !aes_eround23 %f46,%f10,%f6,%f6
2142 .word 0x90cc4400 !aes_eround01 %f48,%f0,%f2,%f8
2143 .word 0x84ccc420 !aes_eround23 %f50,%f0,%f2,%f2
2144 .word 0x94cc4c04 !aes_eround01 %f48,%f4,%f6,%f10
2145 .word 0x8ccccc24 !aes_eround23 %f50,%f4,%f6,%f6
2146 .word 0x80cd4408 !aes_eround01 %f52,%f8,%f2,%f0
2147 .word 0x84cdc428 !aes_eround23 %f54,%f8,%f2,%f2
2148 .word 0x88cd4c0a !aes_eround01 %f52,%f10,%f6,%f4
2149 .word 0x8ccdcc2a !aes_eround23 %f54,%f10,%f6,%f6
2150 .word 0x90ce4400 !aes_eround01 %f56,%f0,%f2,%f8
2151 .word 0x84cec420 !aes_eround23 %f58,%f0,%f2,%f2
2152 .word 0x94ce4c04 !aes_eround01 %f56,%f4,%f6,%f10
2153 .word 0x8ccecc24 !aes_eround23 %f58,%f4,%f6,%f6
2154 .word 0x80cf4488 !aes_eround01_l %f60,%f8,%f2,%f0
2155 .word 0x84cfc4a8 !aes_eround23_l %f62,%f8,%f2,%f2
2156 .word 0x88cf4c8a !aes_eround01_l %f60,%f10,%f6,%f4
2157 retl
2158 .word 0x8ccfccaa !aes_eround23_l %f62,%f10,%f6,%f6
2159 .type _aes192_encrypt_2x,#function
2160 .size _aes192_encrypt_2x,.-_aes192_encrypt_2x
2161
2162 .align 32
2163 _aes256_encrypt_1x:
2164 .word 0x88cc0400 !aes_eround01 %f16,%f0,%f2,%f4
2165 .word 0x84cc8420 !aes_eround23 %f18,%f0,%f2,%f2
2166 ldd [%i3 + 208], %f16
2167 ldd [%i3 + 216], %f18
2168 .word 0x80cd0404 !aes_eround01 %f20,%f4,%f2,%f0
2169 .word 0x84cd8424 !aes_eround23 %f22,%f4,%f2,%f2
2170 ldd [%i3 + 224], %f20
2171 ldd [%i3 + 232], %f22
2172 .word 0x88ce0400 !aes_eround01 %f24,%f0,%f2,%f4
2173 .word 0x84ce8420 !aes_eround23 %f26,%f0,%f2,%f2
2174 .word 0x80cf0404 !aes_eround01 %f28,%f4,%f2,%f0
2175 .word 0x84cf8424 !aes_eround23 %f30,%f4,%f2,%f2
2176 .word 0x88c84400 !aes_eround01 %f32,%f0,%f2,%f4
2177 .word 0x84c8c420 !aes_eround23 %f34,%f0,%f2,%f2
2178 .word 0x80c94404 !aes_eround01 %f36,%f4,%f2,%f0
2179 .word 0x84c9c424 !aes_eround23 %f38,%f4,%f2,%f2
2180 .word 0x88ca4400 !aes_eround01 %f40,%f0,%f2,%f4
2181 .word 0x84cac420 !aes_eround23 %f42,%f0,%f2,%f2
2182 .word 0x80cb4404 !aes_eround01 %f44,%f4,%f2,%f0
2183 .word 0x84cbc424 !aes_eround23 %f46,%f4,%f2,%f2
2184 .word 0x88cc4400 !aes_eround01 %f48,%f0,%f2,%f4
2185 .word 0x84ccc420 !aes_eround23 %f50,%f0,%f2,%f2
2186 .word 0x80cd4404 !aes_eround01 %f52,%f4,%f2,%f0
2187 .word 0x84cdc424 !aes_eround23 %f54,%f4,%f2,%f2
2188 .word 0x88ce4400 !aes_eround01 %f56,%f0,%f2,%f4
2189 .word 0x84cec420 !aes_eround23 %f58,%f0,%f2,%f2
2190 .word 0x80cf4404 !aes_eround01 %f60,%f4,%f2,%f0
2191 .word 0x84cfc424 !aes_eround23 %f62,%f4,%f2,%f2
2192 .word 0x88cc0400 !aes_eround01 %f16,%f0,%f2,%f4
2193 .word 0x84cc8420 !aes_eround23 %f18,%f0,%f2,%f2
2194 ldd [%i3 + 16], %f16
2195 ldd [%i3 + 24], %f18
2196 .word 0x80cd0484 !aes_eround01_l %f20,%f4,%f2,%f0
2197 .word 0x84cd84a4 !aes_eround23_l %f22,%f4,%f2,%f2
2198 ldd [%i3 + 32], %f20
2199 retl
2200 ldd [%i3 + 40], %f22
2201 .type _aes256_encrypt_1x,#function
2202 .size _aes256_encrypt_1x,.-_aes256_encrypt_1x
2203
2204 .align 32
2205 _aes256_encrypt_2x:
2206 .word 0x90cc0400 !aes_eround01 %f16,%f0,%f2,%f8
2207 .word 0x84cc8420 !aes_eround23 %f18,%f0,%f2,%f2
2208 .word 0x94cc0c04 !aes_eround01 %f16,%f4,%f6,%f10
2209 .word 0x8ccc8c24 !aes_eround23 %f18,%f4,%f6,%f6
2210 ldd [%i3 + 208], %f16
2211 ldd [%i3 + 216], %f18
2212 .word 0x80cd0408 !aes_eround01 %f20,%f8,%f2,%f0
2213 .word 0x84cd8428 !aes_eround23 %f22,%f8,%f2,%f2
2214 .word 0x88cd0c0a !aes_eround01 %f20,%f10,%f6,%f4
2215 .word 0x8ccd8c2a !aes_eround23 %f22,%f10,%f6,%f6
2216 ldd [%i3 + 224], %f20
2217 ldd [%i3 + 232], %f22
2218 .word 0x90ce0400 !aes_eround01 %f24,%f0,%f2,%f8
2219 .word 0x84ce8420 !aes_eround23 %f26,%f0,%f2,%f2
2220 .word 0x94ce0c04 !aes_eround01 %f24,%f4,%f6,%f10
2221 .word 0x8cce8c24 !aes_eround23 %f26,%f4,%f6,%f6
2222 .word 0x80cf0408 !aes_eround01 %f28,%f8,%f2,%f0
2223 .word 0x84cf8428 !aes_eround23 %f30,%f8,%f2,%f2
2224 .word 0x88cf0c0a !aes_eround01 %f28,%f10,%f6,%f4
2225 .word 0x8ccf8c2a !aes_eround23 %f30,%f10,%f6,%f6
2226 .word 0x90c84400 !aes_eround01 %f32,%f0,%f2,%f8
2227 .word 0x84c8c420 !aes_eround23 %f34,%f0,%f2,%f2
2228 .word 0x94c84c04 !aes_eround01 %f32,%f4,%f6,%f10
2229 .word 0x8cc8cc24 !aes_eround23 %f34,%f4,%f6,%f6
2230 .word 0x80c94408 !aes_eround01 %f36,%f8,%f2,%f0
2231 .word 0x84c9c428 !aes_eround23 %f38,%f8,%f2,%f2
2232 .word 0x88c94c0a !aes_eround01 %f36,%f10,%f6,%f4
2233 .word 0x8cc9cc2a !aes_eround23 %f38,%f10,%f6,%f6
2234 .word 0x90ca4400 !aes_eround01 %f40,%f0,%f2,%f8
2235 .word 0x84cac420 !aes_eround23 %f42,%f0,%f2,%f2
2236 .word 0x94ca4c04 !aes_eround01 %f40,%f4,%f6,%f10
2237 .word 0x8ccacc24 !aes_eround23 %f42,%f4,%f6,%f6
2238 .word 0x80cb4408 !aes_eround01 %f44,%f8,%f2,%f0
2239 .word 0x84cbc428 !aes_eround23 %f46,%f8,%f2,%f2
2240 .word 0x88cb4c0a !aes_eround01 %f44,%f10,%f6,%f4
2241 .word 0x8ccbcc2a !aes_eround23 %f46,%f10,%f6,%f6
2242 .word 0x90cc4400 !aes_eround01 %f48,%f0,%f2,%f8
2243 .word 0x84ccc420 !aes_eround23 %f50,%f0,%f2,%f2
2244 .word 0x94cc4c04 !aes_eround01 %f48,%f4,%f6,%f10
2245 .word 0x8ccccc24 !aes_eround23 %f50,%f4,%f6,%f6
2246 .word 0x80cd4408 !aes_eround01 %f52,%f8,%f2,%f0
2247 .word 0x84cdc428 !aes_eround23 %f54,%f8,%f2,%f2
2248 .word 0x88cd4c0a !aes_eround01 %f52,%f10,%f6,%f4
2249 .word 0x8ccdcc2a !aes_eround23 %f54,%f10,%f6,%f6
2250 .word 0x90ce4400 !aes_eround01 %f56,%f0,%f2,%f8
2251 .word 0x84cec420 !aes_eround23 %f58,%f0,%f2,%f2
2252 .word 0x94ce4c04 !aes_eround01 %f56,%f4,%f6,%f10
2253 .word 0x8ccecc24 !aes_eround23 %f58,%f4,%f6,%f6
2254 .word 0x80cf4408 !aes_eround01 %f60,%f8,%f2,%f0
2255 .word 0x84cfc428 !aes_eround23 %f62,%f8,%f2,%f2
2256 .word 0x88cf4c0a !aes_eround01 %f60,%f10,%f6,%f4
2257 .word 0x8ccfcc2a !aes_eround23 %f62,%f10,%f6,%f6
2258 .word 0x90cc0400 !aes_eround01 %f16,%f0,%f2,%f8
2259 .word 0x84cc8420 !aes_eround23 %f18,%f0,%f2,%f2
2260 .word 0x94cc0c04 !aes_eround01 %f16,%f4,%f6,%f10
2261 .word 0x8ccc8c24 !aes_eround23 %f18,%f4,%f6,%f6
2262 ldd [%i3 + 16], %f16
2263 ldd [%i3 + 24], %f18
2264 .word 0x80cd0488 !aes_eround01_l %f20,%f8,%f2,%f0
2265 .word 0x84cd84a8 !aes_eround23_l %f22,%f8,%f2,%f2
2266 .word 0x88cd0c8a !aes_eround01_l %f20,%f10,%f6,%f4
2267 .word 0x8ccd8caa !aes_eround23_l %f22,%f10,%f6,%f6
2268 ldd [%i3 + 32], %f20
2269 retl
2270 ldd [%i3 + 40], %f22
2271 .type _aes256_encrypt_2x,#function
2272 .size _aes256_encrypt_2x,.-_aes256_encrypt_2x
2273
2274 .align 32
2275 _aes192_loadkey:
2276 ldx [%i3 + 0], %g4
2277 ldx [%i3 + 8], %g5
2278 ldd [%i3 + 16], %f16
2279 ldd [%i3 + 24], %f18
2280 ldd [%i3 + 32], %f20
2281 ldd [%i3 + 40], %f22
2282 ldd [%i3 + 48], %f24
2283 ldd [%i3 + 56], %f26
2284 ldd [%i3 + 64], %f28
2285 ldd [%i3 + 72], %f30
2286 ldd [%i3 + 80], %f32
2287 ldd [%i3 + 88], %f34
2288 ldd [%i3 + 96], %f36
2289 ldd [%i3 + 104], %f38
2290 ldd [%i3 + 112], %f40
2291 ldd [%i3 + 120], %f42
2292 ldd [%i3 + 128], %f44
2293 ldd [%i3 + 136], %f46
2294 ldd [%i3 + 144], %f48
2295 ldd [%i3 + 152], %f50
2296 ldd [%i3 + 160], %f52
2297 ldd [%i3 + 168], %f54
2298 ldd [%i3 + 176], %f56
2299 ldd [%i3 + 184], %f58
2300 ldd [%i3 + 192], %f60
2301 ldd [%i3 + 200], %f62
2302 retl
2303 nop
2304 .type _aes192_loadkey,#function
2305 .size _aes192_loadkey,.-_aes192_loadkey
2306 _aes256_loadkey=_aes192_loadkey
2307 _aes192_load_enckey=_aes192_loadkey
2308 _aes192_load_deckey=_aes192_loadkey
2309 _aes256_load_enckey=_aes192_loadkey
2310 _aes256_load_deckey=_aes192_loadkey
2311 .globl aes256_t4_cbc_encrypt
2312 .align 32
2313 aes256_t4_cbc_encrypt:
2314 save %sp, -STACK_FRAME, %sp
2315 cmp %i2, 0
2316 be,pn SIZE_T_CC, .L256_cbc_enc_abort
2317 srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
2318 sub %i0, %i1, %l5 ! %i0!=%i1
2319 ld [%i4 + 0], %f0
2320 ld [%i4 + 4], %f1
2321 ld [%i4 + 8], %f2
2322 ld [%i4 + 12], %f3
2323 prefetch [%i0], 20
2324 prefetch [%i0 + 63], 20
2325 call _aes256_load_enckey
2326 and %i0, 7, %l0
2327 andn %i0, 7, %i0
2328 sll %l0, 3, %l0
2329 mov 64, %l1
2330 mov 0xff, %l3
2331 sub %l1, %l0, %l1
2332 and %i1, 7, %l2
2333 cmp %i2, 127
2334 movrnz %l2, 0, %l5 ! if ( %i1&7 ||
2335 movleu SIZE_T_CC, 0, %l5 ! %i2<128 ||
2336 brnz,pn %l5, .L256cbc_enc_blk ! %i0==%i1)
2337 srl %l3, %l2, %l3
2338
2339 .word 0xb3b64340 !alignaddrl %i1,%g0,%i1
2340 srlx %i2, 4, %i2
2341 prefetch [%i1], 22
2342
2343 .L256_cbc_enc_loop:
2344 ldx [%i0 + 0], %o0
2345 brz,pt %l0, 4f
2346 ldx [%i0 + 8], %o1
2347
2348 ldx [%i0 + 16], %o2
2349 sllx %o0, %l0, %o0
2350 srlx %o1, %l1, %g1
2351 sllx %o1, %l0, %o1
2352 or %g1, %o0, %o0
2353 srlx %o2, %l1, %o2
2354 or %o2, %o1, %o1
2355 4:
2356 xor %g4, %o0, %o0 ! ^= rk[0]
2357 xor %g5, %o1, %o1
2358 .word 0x99b02308 !movxtod %o0,%f12
2359 .word 0x9db02309 !movxtod %o1,%f14
2360
2361 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= ivec
2362 .word 0x85b38d82 !fxor %f14,%f2,%f2
2363 prefetch [%i1 + 63], 22
2364 prefetch [%i0 + 16+63], 20
2365 call _aes256_encrypt_1x
2366 add %i0, 16, %i0
2367
2368 brnz,pn %l2, 2f
2369 sub %i2, 1, %i2
2370
2371 std %f0, [%i1 + 0]
2372 std %f2, [%i1 + 8]
2373 brnz,pt %i2, .L256_cbc_enc_loop
2374 add %i1, 16, %i1
2375 st %f0, [%i4 + 0]
2376 st %f1, [%i4 + 4]
2377 st %f2, [%i4 + 8]
2378 st %f3, [%i4 + 12]
2379 .L256_cbc_enc_abort:
2380 ret
2381 restore
2382
2383 .align 16
2384 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
2385 ! and ~3x deterioration
2386 ! in inp==out case
2387 .word 0x89b00900 !faligndata %f0,%f0,%f4 ! handle unaligned output
2388 .word 0x8db00902 !faligndata %f0,%f2,%f6
2389 .word 0x91b08902 !faligndata %f2,%f2,%f8
2390
2391 stda %f4, [%i1 + %l3]0xc0 ! partial store
2392 std %f6, [%i1 + 8]
2393 add %i1, 16, %i1
2394 orn %g0, %l3, %l3
2395 stda %f8, [%i1 + %l3]0xc0 ! partial store
2396
2397 brnz,pt %i2, .L256_cbc_enc_loop+4
2398 orn %g0, %l3, %l3
2399 st %f0, [%i4 + 0]
2400 st %f1, [%i4 + 4]
2401 st %f2, [%i4 + 8]
2402 st %f3, [%i4 + 12]
2403 ret
2404 restore
2405
2406 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2407 .align 32
2408 .L256cbc_enc_blk:
2409 add %i1, %i2, %l5
2410 and %l5, 63, %l5 ! tail
2411 sub %i2, %l5, %i2
2412 add %l5, 15, %l5 ! round up to 16n
2413 srlx %i2, 4, %i2
2414 srl %l5, 4, %l5
2415
2416 .L256_cbc_enc_blk_loop:
2417 ldx [%i0 + 0], %o0
2418 brz,pt %l0, 5f
2419 ldx [%i0 + 8], %o1
2420
2421 ldx [%i0 + 16], %o2
2422 sllx %o0, %l0, %o0
2423 srlx %o1, %l1, %g1
2424 sllx %o1, %l0, %o1
2425 or %g1, %o0, %o0
2426 srlx %o2, %l1, %o2
2427 or %o2, %o1, %o1
2428 5:
2429 xor %g4, %o0, %o0 ! ^= rk[0]
2430 xor %g5, %o1, %o1
2431 .word 0x99b02308 !movxtod %o0,%f12
2432 .word 0x9db02309 !movxtod %o1,%f14
2433
2434 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= ivec
2435 .word 0x85b38d82 !fxor %f14,%f2,%f2
2436 prefetch [%i0 + 16+63], 20
2437 call _aes256_encrypt_1x
2438 add %i0, 16, %i0
2439 sub %i2, 1, %i2
2440
2441 stda %f0, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
2442 add %i1, 8, %i1
2443 stda %f2, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
2444 brnz,pt %i2, .L256_cbc_enc_blk_loop
2445 add %i1, 8, %i1
2446
2447 membar #StoreLoad|#StoreStore
2448 brnz,pt %l5, .L256_cbc_enc_loop
2449 mov %l5, %i2
2450 st %f0, [%i4 + 0]
2451 st %f1, [%i4 + 4]
2452 st %f2, [%i4 + 8]
2453 st %f3, [%i4 + 12]
2454 ret
2455 restore
2456 .type aes256_t4_cbc_encrypt,#function
2457 .size aes256_t4_cbc_encrypt,.-aes256_t4_cbc_encrypt
2458 .globl aes192_t4_cbc_encrypt
2459 .align 32
2460 aes192_t4_cbc_encrypt:
2461 save %sp, -STACK_FRAME, %sp
2462 cmp %i2, 0
2463 be,pn SIZE_T_CC, .L192_cbc_enc_abort
2464 srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
2465 sub %i0, %i1, %l5 ! %i0!=%i1
2466 ld [%i4 + 0], %f0
2467 ld [%i4 + 4], %f1
2468 ld [%i4 + 8], %f2
2469 ld [%i4 + 12], %f3
2470 prefetch [%i0], 20
2471 prefetch [%i0 + 63], 20
2472 call _aes192_load_enckey
2473 and %i0, 7, %l0
2474 andn %i0, 7, %i0
2475 sll %l0, 3, %l0
2476 mov 64, %l1
2477 mov 0xff, %l3
2478 sub %l1, %l0, %l1
2479 and %i1, 7, %l2
2480 cmp %i2, 127
2481 movrnz %l2, 0, %l5 ! if ( %i1&7 ||
2482 movleu SIZE_T_CC, 0, %l5 ! %i2<128 ||
2483 brnz,pn %l5, .L192cbc_enc_blk ! %i0==%i1)
2484 srl %l3, %l2, %l3
2485
2486 .word 0xb3b64340 !alignaddrl %i1,%g0,%i1
2487 srlx %i2, 4, %i2
2488 prefetch [%i1], 22
2489
2490 .L192_cbc_enc_loop:
2491 ldx [%i0 + 0], %o0
2492 brz,pt %l0, 4f
2493 ldx [%i0 + 8], %o1
2494
2495 ldx [%i0 + 16], %o2
2496 sllx %o0, %l0, %o0
2497 srlx %o1, %l1, %g1
2498 sllx %o1, %l0, %o1
2499 or %g1, %o0, %o0
2500 srlx %o2, %l1, %o2
2501 or %o2, %o1, %o1
2502 4:
2503 xor %g4, %o0, %o0 ! ^= rk[0]
2504 xor %g5, %o1, %o1
2505 .word 0x99b02308 !movxtod %o0,%f12
2506 .word 0x9db02309 !movxtod %o1,%f14
2507
2508 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= ivec
2509 .word 0x85b38d82 !fxor %f14,%f2,%f2
2510 prefetch [%i1 + 63], 22
2511 prefetch [%i0 + 16+63], 20
2512 call _aes192_encrypt_1x
2513 add %i0, 16, %i0
2514
2515 brnz,pn %l2, 2f
2516 sub %i2, 1, %i2
2517
2518 std %f0, [%i1 + 0]
2519 std %f2, [%i1 + 8]
2520 brnz,pt %i2, .L192_cbc_enc_loop
2521 add %i1, 16, %i1
2522 st %f0, [%i4 + 0]
2523 st %f1, [%i4 + 4]
2524 st %f2, [%i4 + 8]
2525 st %f3, [%i4 + 12]
2526 .L192_cbc_enc_abort:
2527 ret
2528 restore
2529
2530 .align 16
2531 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
2532 ! and ~3x deterioration
2533 ! in inp==out case
2534 .word 0x89b00900 !faligndata %f0,%f0,%f4 ! handle unaligned output
2535 .word 0x8db00902 !faligndata %f0,%f2,%f6
2536 .word 0x91b08902 !faligndata %f2,%f2,%f8
2537
2538 stda %f4, [%i1 + %l3]0xc0 ! partial store
2539 std %f6, [%i1 + 8]
2540 add %i1, 16, %i1
2541 orn %g0, %l3, %l3
2542 stda %f8, [%i1 + %l3]0xc0 ! partial store
2543
2544 brnz,pt %i2, .L192_cbc_enc_loop+4
2545 orn %g0, %l3, %l3
2546 st %f0, [%i4 + 0]
2547 st %f1, [%i4 + 4]
2548 st %f2, [%i4 + 8]
2549 st %f3, [%i4 + 12]
2550 ret
2551 restore
2552
2553 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2554 .align 32
2555 .L192cbc_enc_blk:
2556 add %i1, %i2, %l5
2557 and %l5, 63, %l5 ! tail
2558 sub %i2, %l5, %i2
2559 add %l5, 15, %l5 ! round up to 16n
2560 srlx %i2, 4, %i2
2561 srl %l5, 4, %l5
2562
2563 .L192_cbc_enc_blk_loop:
2564 ldx [%i0 + 0], %o0
2565 brz,pt %l0, 5f
2566 ldx [%i0 + 8], %o1
2567
2568 ldx [%i0 + 16], %o2
2569 sllx %o0, %l0, %o0
2570 srlx %o1, %l1, %g1
2571 sllx %o1, %l0, %o1
2572 or %g1, %o0, %o0
2573 srlx %o2, %l1, %o2
2574 or %o2, %o1, %o1
2575 5:
2576 xor %g4, %o0, %o0 ! ^= rk[0]
2577 xor %g5, %o1, %o1
2578 .word 0x99b02308 !movxtod %o0,%f12
2579 .word 0x9db02309 !movxtod %o1,%f14
2580
2581 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= ivec
2582 .word 0x85b38d82 !fxor %f14,%f2,%f2
2583 prefetch [%i0 + 16+63], 20
2584 call _aes192_encrypt_1x
2585 add %i0, 16, %i0
2586 sub %i2, 1, %i2
2587
2588 stda %f0, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
2589 add %i1, 8, %i1
2590 stda %f2, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
2591 brnz,pt %i2, .L192_cbc_enc_blk_loop
2592 add %i1, 8, %i1
2593
2594 membar #StoreLoad|#StoreStore
2595 brnz,pt %l5, .L192_cbc_enc_loop
2596 mov %l5, %i2
2597 st %f0, [%i4 + 0]
2598 st %f1, [%i4 + 4]
2599 st %f2, [%i4 + 8]
2600 st %f3, [%i4 + 12]
2601 ret
2602 restore
2603 .type aes192_t4_cbc_encrypt,#function
2604 .size aes192_t4_cbc_encrypt,.-aes192_t4_cbc_encrypt
2605 .globl aes256_t4_ctr32_encrypt
2606 .align 32
2607 aes256_t4_ctr32_encrypt:
2608 save %sp, -STACK_FRAME, %sp
2609 srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
2610
2611 prefetch [%i0], 20
2612 prefetch [%i0 + 63], 20
2613 call _aes256_load_enckey
2614 sllx %i2, 4, %i2
2615
2616 ld [%i4 + 0], %l4 ! counter
2617 ld [%i4 + 4], %l5
2618 ld [%i4 + 8], %l6
2619 ld [%i4 + 12], %l7
2620
2621 sllx %l4, 32, %o5
2622 or %l5, %o5, %o5
2623 sllx %l6, 32, %g1
2624 xor %o5, %g4, %g4 ! ^= rk[0]
2625 xor %g1, %g5, %g5
2626 .word 0x9db02304 !movxtod %g4,%f14 ! most significant 64 bits
2627
2628 sub %i0, %i1, %l5 ! %i0!=%i1
2629 and %i0, 7, %l0
2630 andn %i0, 7, %i0
2631 sll %l0, 3, %l0
2632 mov 64, %l1
2633 mov 0xff, %l3
2634 sub %l1, %l0, %l1
2635 and %i1, 7, %l2
2636 cmp %i2, 255
2637 movrnz %l2, 0, %l5 ! if ( %i1&7 ||
2638 movleu SIZE_T_CC, 0, %l5 ! %i2<256 ||
2639 brnz,pn %l5, .L256_ctr32_blk ! %i0==%i1)
2640 srl %l3, %l2, %l3
2641
2642 andcc %i2, 16, %g0 ! is number of blocks even?
2643 .word 0xb3b64340 !alignaddrl %i1,%g0,%i1
2644 bz %icc, .L256_ctr32_loop2x
2645 srlx %i2, 4, %i2
2646 .L256_ctr32_loop:
2647 ldx [%i0 + 0], %o0
2648 brz,pt %l0, 4f
2649 ldx [%i0 + 8], %o1
2650
2651 ldx [%i0 + 16], %o2
2652 sllx %o0, %l0, %o0
2653 srlx %o1, %l1, %g1
2654 sllx %o1, %l0, %o1
2655 or %g1, %o0, %o0
2656 srlx %o2, %l1, %o2
2657 or %o2, %o1, %o1
2658 4:
2659 xor %g5, %l7, %g1 ! ^= rk[0]
2660 add %l7, 1, %l7
2661 .word 0x85b02301 !movxtod %g1,%f2
2662 srl %l7, 0, %l7 ! clruw
2663 prefetch [%i1 + 63], 22
2664 prefetch [%i0 + 16+63], 20
2665 .word 0x88cc040e !aes_eround01 %f16,%f14,%f2,%f4
2666 .word 0x84cc842e !aes_eround23 %f18,%f14,%f2,%f2
2667 call _aes256_encrypt_1x+8
2668 add %i0, 16, %i0
2669
2670 .word 0x95b02308 !movxtod %o0,%f10
2671 .word 0x99b02309 !movxtod %o1,%f12
2672 .word 0x81b28d80 !fxor %f10,%f0,%f0 ! ^= inp
2673 .word 0x85b30d82 !fxor %f12,%f2,%f2
2674
2675 brnz,pn %l2, 2f
2676 sub %i2, 1, %i2
2677
2678 std %f0, [%i1 + 0]
2679 std %f2, [%i1 + 8]
2680 brnz,pt %i2, .L256_ctr32_loop2x
2681 add %i1, 16, %i1
2682
2683 ret
2684 restore
2685
2686 .align 16
2687 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
2688 ! and ~3x deterioration
2689 ! in inp==out case
2690 .word 0x89b00900 !faligndata %f0,%f0,%f4 ! handle unaligned output
2691 .word 0x8db00902 !faligndata %f0,%f2,%f6
2692 .word 0x91b08902 !faligndata %f2,%f2,%f8
2693 stda %f4, [%i1 + %l3]0xc0 ! partial store
2694 std %f6, [%i1 + 8]
2695 add %i1, 16, %i1
2696 orn %g0, %l3, %l3
2697 stda %f8, [%i1 + %l3]0xc0 ! partial store
2698
2699 brnz,pt %i2, .L256_ctr32_loop2x+4
2700 orn %g0, %l3, %l3
2701
2702 ret
2703 restore
2704
2705 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2706 .align 32
2707 .L256_ctr32_loop2x:
2708 ldx [%i0 + 0], %o0
2709 ldx [%i0 + 8], %o1
2710 ldx [%i0 + 16], %o2
2711 brz,pt %l0, 4f
2712 ldx [%i0 + 24], %o3
2713
2714 ldx [%i0 + 32], %o4
2715 sllx %o0, %l0, %o0
2716 srlx %o1, %l1, %g1
2717 or %g1, %o0, %o0
2718 sllx %o1, %l0, %o1
2719 srlx %o2, %l1, %g1
2720 or %g1, %o1, %o1
2721 sllx %o2, %l0, %o2
2722 srlx %o3, %l1, %g1
2723 or %g1, %o2, %o2
2724 sllx %o3, %l0, %o3
2725 srlx %o4, %l1, %o4
2726 or %o4, %o3, %o3
2727 4:
2728 xor %g5, %l7, %g1 ! ^= rk[0]
2729 add %l7, 1, %l7
2730 .word 0x85b02301 !movxtod %g1,%f2
2731 srl %l7, 0, %l7 ! clruw
2732 xor %g5, %l7, %g1
2733 add %l7, 1, %l7
2734 .word 0x8db02301 !movxtod %g1,%f6
2735 srl %l7, 0, %l7 ! clruw
2736 prefetch [%i1 + 63], 22
2737 prefetch [%i0 + 32+63], 20
2738 .word 0x90cc040e !aes_eround01 %f16,%f14,%f2,%f8
2739 .word 0x84cc842e !aes_eround23 %f18,%f14,%f2,%f2
2740 .word 0x94cc0c0e !aes_eround01 %f16,%f14,%f6,%f10
2741 .word 0x8ccc8c2e !aes_eround23 %f18,%f14,%f6,%f6
2742 call _aes256_encrypt_2x+16
2743 add %i0, 32, %i0
2744
2745 .word 0x91b02308 !movxtod %o0,%f8
2746 .word 0x95b02309 !movxtod %o1,%f10
2747 .word 0x99b0230a !movxtod %o2,%f12
2748 .word 0x81b20d80 !fxor %f8,%f0,%f0 ! ^= inp
2749 .word 0x91b0230b !movxtod %o3,%f8
2750 .word 0x85b28d82 !fxor %f10,%f2,%f2
2751 .word 0x89b30d84 !fxor %f12,%f4,%f4
2752 .word 0x8db20d86 !fxor %f8,%f6,%f6
2753
2754 brnz,pn %l2, 2f
2755 sub %i2, 2, %i2
2756
2757 std %f0, [%i1 + 0]
2758 std %f2, [%i1 + 8]
2759 std %f4, [%i1 + 16]
2760 std %f6, [%i1 + 24]
2761 brnz,pt %i2, .L256_ctr32_loop2x
2762 add %i1, 32, %i1
2763
2764 ret
2765 restore
2766
2767 .align 16
2768 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
2769 ! and ~3x deterioration
2770 ! in inp==out case
2771 .word 0x91b00900 !faligndata %f0,%f0,%f8 ! handle unaligned output
2772 .word 0x81b00902 !faligndata %f0,%f2,%f0
2773 .word 0x85b08904 !faligndata %f2,%f4,%f2
2774 .word 0x89b10906 !faligndata %f4,%f6,%f4
2775 .word 0x8db18906 !faligndata %f6,%f6,%f6
2776
2777 stda %f8, [%i1 + %l3]0xc0 ! partial store
2778 std %f0, [%i1 + 8]
2779 std %f2, [%i1 + 16]
2780 std %f4, [%i1 + 24]
2781 add %i1, 32, %i1
2782 orn %g0, %l3, %l3
2783 stda %f6, [%i1 + %l3]0xc0 ! partial store
2784
2785 brnz,pt %i2, .L256_ctr32_loop2x+4
2786 orn %g0, %l3, %l3
2787
2788 ret
2789 restore
2790
2791 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2792 .align 32
2793 .L256_ctr32_blk:
2794 add %i1, %i2, %l5
2795 and %l5, 63, %l5 ! tail
2796 sub %i2, %l5, %i2
2797 add %l5, 15, %l5 ! round up to 16n
2798 srlx %i2, 4, %i2
2799 srl %l5, 4, %l5
2800 sub %i2, 1, %i2
2801 add %l5, 1, %l5
2802
2803 .L256_ctr32_blk_loop2x:
2804 ldx [%i0 + 0], %o0
2805 ldx [%i0 + 8], %o1
2806 ldx [%i0 + 16], %o2
2807 brz,pt %l0, 5f
2808 ldx [%i0 + 24], %o3
2809
2810 ldx [%i0 + 32], %o4
2811 sllx %o0, %l0, %o0
2812 srlx %o1, %l1, %g1
2813 or %g1, %o0, %o0
2814 sllx %o1, %l0, %o1
2815 srlx %o2, %l1, %g1
2816 or %g1, %o1, %o1
2817 sllx %o2, %l0, %o2
2818 srlx %o3, %l1, %g1
2819 or %g1, %o2, %o2
2820 sllx %o3, %l0, %o3
2821 srlx %o4, %l1, %o4
2822 or %o4, %o3, %o3
2823 5:
2824 xor %g5, %l7, %g1 ! ^= rk[0]
2825 add %l7, 1, %l7
2826 .word 0x85b02301 !movxtod %g1,%f2
2827 srl %l7, 0, %l7 ! clruw
2828 xor %g5, %l7, %g1
2829 add %l7, 1, %l7
2830 .word 0x8db02301 !movxtod %g1,%f6
2831 srl %l7, 0, %l7 ! clruw
2832 prefetch [%i0 + 32+63], 20
2833 .word 0x90cc040e !aes_eround01 %f16,%f14,%f2,%f8
2834 .word 0x84cc842e !aes_eround23 %f18,%f14,%f2,%f2
2835 .word 0x94cc0c0e !aes_eround01 %f16,%f14,%f6,%f10
2836 .word 0x8ccc8c2e !aes_eround23 %f18,%f14,%f6,%f6
2837 call _aes256_encrypt_2x+16
2838 add %i0, 32, %i0
2839 subcc %i2, 2, %i2
2840
2841 .word 0x91b02308 !movxtod %o0,%f8
2842 .word 0x95b02309 !movxtod %o1,%f10
2843 .word 0x99b0230a !movxtod %o2,%f12
2844 .word 0x81b20d80 !fxor %f8,%f0,%f0 ! ^= inp
2845 .word 0x91b0230b !movxtod %o3,%f8
2846 .word 0x85b28d82 !fxor %f10,%f2,%f2
2847 .word 0x89b30d84 !fxor %f12,%f4,%f4
2848 .word 0x8db20d86 !fxor %f8,%f6,%f6
2849
2850 stda %f0, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
2851 add %i1, 8, %i1
2852 stda %f2, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
2853 add %i1, 8, %i1
2854 stda %f4, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
2855 add %i1, 8, %i1
2856 stda %f6, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
2857 bgu,pt SIZE_T_CC, .L256_ctr32_blk_loop2x
2858 add %i1, 8, %i1
2859
2860 add %l5, %i2, %i2
2861 andcc %i2, 1, %g0 ! is number of blocks even?
2862 membar #StoreLoad|#StoreStore
2863 bnz,pt %icc, .L256_ctr32_loop
2864 srl %i2, 0, %i2
2865 brnz,pn %i2, .L256_ctr32_loop2x
2866 nop
2867
2868 ret
2869 restore
2870 .type aes256_t4_ctr32_encrypt,#function
2871 .size aes256_t4_ctr32_encrypt,.-aes256_t4_ctr32_encrypt
2872 .globl aes256_t4_xts_encrypt
2873 .align 32
2874 aes256_t4_xts_encrypt:
2875 save %sp, -STACK_FRAME-16, %sp
2876 srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
2877
2878 mov %i5, %o0
2879 add %fp, STACK_BIAS-16, %o1
2880 call aes_t4_encrypt
2881 mov %i4, %o2
2882
2883 add %fp, STACK_BIAS-16, %l7
2884 ldxa [%l7]0x88, %g2
2885 add %fp, STACK_BIAS-8, %l7
2886 ldxa [%l7]0x88, %g3 ! %g3:%g2 is tweak
2887
2888 sethi %hi(0x76543210), %l7
2889 or %l7, %lo(0x76543210), %l7
2890 .word 0x81b5c320 !bmask %l7,%g0,%g0 ! byte swap mask
2891
2892 prefetch [%i0], 20
2893 prefetch [%i0 + 63], 20
2894 call _aes256_load_enckey
2895 and %i2, 15, %i5
2896 and %i2, -16, %i2
2897
2898 sub %i0, %i1, %l5 ! %i0!=%i1
2899 and %i0, 7, %l0
2900 andn %i0, 7, %i0
2901 sll %l0, 3, %l0
2902 mov 64, %l1
2903 mov 0xff, %l3
2904 sub %l1, %l0, %l1
2905 and %i1, 7, %l2
2906 cmp %i2, 255
2907 movrnz %l2, 0, %l5 ! if ( %i1&7 ||
2908 movleu SIZE_T_CC, 0, %l5 ! %i2<256 ||
2909 brnz,pn %l5, .L256_xts_enblk ! %i0==%i1)
2910 srl %l3, %l2, %l3
2911
2912 andcc %i2, 16, %g0 ! is number of blocks even?
2913 .word 0xb3b64340 !alignaddrl %i1,%g0,%i1
2914 bz %icc, .L256_xts_enloop2x
2915 srlx %i2, 4, %i2
2916 .L256_xts_enloop:
2917 ldx [%i0 + 0], %o0
2918 brz,pt %l0, 4f
2919 ldx [%i0 + 8], %o1
2920
2921 ldx [%i0 + 16], %o2
2922 sllx %o0, %l0, %o0
2923 srlx %o1, %l1, %g1
2924 sllx %o1, %l0, %o1
2925 or %g1, %o0, %o0
2926 srlx %o2, %l1, %o2
2927 or %o2, %o1, %o1
2928 4:
2929 .word 0x99b02302 !movxtod %g2,%f12
2930 .word 0x9db02303 !movxtod %g3,%f14
2931 .word 0x99b3098c !bshuffle %f12,%f12,%f12
2932 .word 0x9db3898e !bshuffle %f14,%f14,%f14
2933
2934 xor %g4, %o0, %o0 ! ^= rk[0]
2935 xor %g5, %o1, %o1
2936 .word 0x81b02308 !movxtod %o0,%f0
2937 .word 0x85b02309 !movxtod %o1,%f2
2938
2939 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
2940 .word 0x85b38d82 !fxor %f14,%f2,%f2
2941
2942 prefetch [%i1 + 63], 22
2943 prefetch [%i0 + 16+63], 20
2944 call _aes256_encrypt_1x
2945 add %i0, 16, %i0
2946
2947 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
2948 .word 0x85b38d82 !fxor %f14,%f2,%f2
2949
2950 srax %g3, 63, %l7 ! next tweak value
2951 addcc %g2, %g2, %g2
2952 and %l7, 0x87, %l7
2953 .word 0x87b0c223 !addxc %g3,%g3,%g3
2954 xor %l7, %g2, %g2
2955
2956 brnz,pn %l2, 2f
2957 sub %i2, 1, %i2
2958
2959 std %f0, [%i1 + 0]
2960 std %f2, [%i1 + 8]
2961 brnz,pt %i2, .L256_xts_enloop2x
2962 add %i1, 16, %i1
2963
2964 brnz,pn %i5, .L256_xts_ensteal
2965 nop
2966
2967 ret
2968 restore
2969
2970 .align 16
2971 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
2972 ! and ~3x deterioration
2973 ! in inp==out case
2974 .word 0x89b00900 !faligndata %f0,%f0,%f4 ! handle unaligned output
2975 .word 0x8db00902 !faligndata %f0,%f2,%f6
2976 .word 0x91b08902 !faligndata %f2,%f2,%f8
2977 stda %f4, [%i1 + %l3]0xc0 ! partial store
2978 std %f6, [%i1 + 8]
2979 add %i1, 16, %i1
2980 orn %g0, %l3, %l3
2981 stda %f8, [%i1 + %l3]0xc0 ! partial store
2982
2983 brnz,pt %i2, .L256_xts_enloop2x+4
2984 orn %g0, %l3, %l3
2985
2986 brnz,pn %i5, .L256_xts_ensteal
2987 nop
2988
2989 ret
2990 restore
2991
2992 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2993 .align 32
2994 .L256_xts_enloop2x:
2995 ldx [%i0 + 0], %o0
2996 ldx [%i0 + 8], %o1
2997 ldx [%i0 + 16], %o2
2998 brz,pt %l0, 4f
2999 ldx [%i0 + 24], %o3
3000
3001 ldx [%i0 + 32], %o4
3002 sllx %o0, %l0, %o0
3003 srlx %o1, %l1, %g1
3004 or %g1, %o0, %o0
3005 sllx %o1, %l0, %o1
3006 srlx %o2, %l1, %g1
3007 or %g1, %o1, %o1
3008 sllx %o2, %l0, %o2
3009 srlx %o3, %l1, %g1
3010 or %g1, %o2, %o2
3011 sllx %o3, %l0, %o3
3012 srlx %o4, %l1, %o4
3013 or %o4, %o3, %o3
3014 4:
3015 .word 0x99b02302 !movxtod %g2,%f12
3016 .word 0x9db02303 !movxtod %g3,%f14
3017 .word 0x99b3098c !bshuffle %f12,%f12,%f12
3018 .word 0x9db3898e !bshuffle %f14,%f14,%f14
3019
3020 srax %g3, 63, %l7 ! next tweak value
3021 addcc %g2, %g2, %g2
3022 and %l7, 0x87, %l7
3023 .word 0x87b0c223 !addxc %g3,%g3,%g3
3024 xor %l7, %g2, %g2
3025
3026 .word 0x91b02302 !movxtod %g2,%f8
3027 .word 0x95b02303 !movxtod %g3,%f10
3028 .word 0x91b20988 !bshuffle %f8,%f8,%f8
3029 .word 0x95b2898a !bshuffle %f10,%f10,%f10
3030
3031 xor %g4, %o0, %o0 ! ^= rk[0]
3032 xor %g5, %o1, %o1
3033 xor %g4, %o2, %o2 ! ^= rk[0]
3034 xor %g5, %o3, %o3
3035 .word 0x81b02308 !movxtod %o0,%f0
3036 .word 0x85b02309 !movxtod %o1,%f2
3037 .word 0x89b0230a !movxtod %o2,%f4
3038 .word 0x8db0230b !movxtod %o3,%f6
3039
3040 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
3041 .word 0x85b38d82 !fxor %f14,%f2,%f2
3042 .word 0x89b20d84 !fxor %f8,%f4,%f4 ! ^= tweak[0]
3043 .word 0x8db28d86 !fxor %f10,%f6,%f6
3044
3045 prefetch [%i1 + 63], 22
3046 prefetch [%i0 + 32+63], 20
3047 call _aes256_encrypt_2x
3048 add %i0, 32, %i0
3049
3050 .word 0x91b02302 !movxtod %g2,%f8
3051 .word 0x95b02303 !movxtod %g3,%f10
3052
3053 srax %g3, 63, %l7 ! next tweak value
3054 addcc %g2, %g2, %g2
3055 and %l7, 0x87, %l7
3056 .word 0x87b0c223 !addxc %g3,%g3,%g3
3057 xor %l7, %g2, %g2
3058
3059 .word 0x91b20988 !bshuffle %f8,%f8,%f8
3060 .word 0x95b2898a !bshuffle %f10,%f10,%f10
3061
3062 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
3063 .word 0x85b38d82 !fxor %f14,%f2,%f2
3064 .word 0x89b20d84 !fxor %f8,%f4,%f4
3065 .word 0x8db28d86 !fxor %f10,%f6,%f6
3066
3067 brnz,pn %l2, 2f
3068 sub %i2, 2, %i2
3069
3070 std %f0, [%i1 + 0]
3071 std %f2, [%i1 + 8]
3072 std %f4, [%i1 + 16]
3073 std %f6, [%i1 + 24]
3074 brnz,pt %i2, .L256_xts_enloop2x
3075 add %i1, 32, %i1
3076
3077 .word 0x81b00f04 !fsrc2 %f0,%f4,%f0
3078 .word 0x85b00f06 !fsrc2 %f0,%f6,%f2
3079 brnz,pn %i5, .L256_xts_ensteal
3080 nop
3081
3082 ret
3083 restore
3084
3085 .align 16
3086 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
3087 ! and ~3x deterioration
3088 ! in inp==out case
3089 .word 0x91b00900 !faligndata %f0,%f0,%f8 ! handle unaligned output
3090 .word 0x95b00902 !faligndata %f0,%f2,%f10
3091 .word 0x99b08904 !faligndata %f2,%f4,%f12
3092 .word 0x9db10906 !faligndata %f4,%f6,%f14
3093 .word 0x81b18906 !faligndata %f6,%f6,%f0
3094
3095 stda %f8, [%i1 + %l3]0xc0 ! partial store
3096 std %f10, [%i1 + 8]
3097 std %f12, [%i1 + 16]
3098 std %f14, [%i1 + 24]
3099 add %i1, 32, %i1
3100 orn %g0, %l3, %l3
3101 stda %f0, [%i1 + %l3]0xc0 ! partial store
3102
3103 brnz,pt %i2, .L256_xts_enloop2x+4
3104 orn %g0, %l3, %l3
3105
3106 .word 0x81b00f04 !fsrc2 %f0,%f4,%f0
3107 .word 0x85b00f06 !fsrc2 %f0,%f6,%f2
3108 brnz,pn %i5, .L256_xts_ensteal
3109 nop
3110
3111 ret
3112 restore
3113
3114 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3115 .align 32
3116 .L256_xts_enblk:
3117 add %i1, %i2, %l5
3118 and %l5, 63, %l5 ! tail
3119 sub %i2, %l5, %i2
3120 add %l5, 15, %l5 ! round up to 16n
3121 srlx %i2, 4, %i2
3122 srl %l5, 4, %l5
3123 sub %i2, 1, %i2
3124 add %l5, 1, %l5
3125
3126 .L256_xts_enblk2x:
3127 ldx [%i0 + 0], %o0
3128 ldx [%i0 + 8], %o1
3129 ldx [%i0 + 16], %o2
3130 brz,pt %l0, 5f
3131 ldx [%i0 + 24], %o3
3132
3133 ldx [%i0 + 32], %o4
3134 sllx %o0, %l0, %o0
3135 srlx %o1, %l1, %g1
3136 or %g1, %o0, %o0
3137 sllx %o1, %l0, %o1
3138 srlx %o2, %l1, %g1
3139 or %g1, %o1, %o1
3140 sllx %o2, %l0, %o2
3141 srlx %o3, %l1, %g1
3142 or %g1, %o2, %o2
3143 sllx %o3, %l0, %o3
3144 srlx %o4, %l1, %o4
3145 or %o4, %o3, %o3
3146 5:
3147 .word 0x99b02302 !movxtod %g2,%f12
3148 .word 0x9db02303 !movxtod %g3,%f14
3149 .word 0x99b3098c !bshuffle %f12,%f12,%f12
3150 .word 0x9db3898e !bshuffle %f14,%f14,%f14
3151
3152 srax %g3, 63, %l7 ! next tweak value
3153 addcc %g2, %g2, %g2
3154 and %l7, 0x87, %l7
3155 .word 0x87b0c223 !addxc %g3,%g3,%g3
3156 xor %l7, %g2, %g2
3157
3158 .word 0x91b02302 !movxtod %g2,%f8
3159 .word 0x95b02303 !movxtod %g3,%f10
3160 .word 0x91b20988 !bshuffle %f8,%f8,%f8
3161 .word 0x95b2898a !bshuffle %f10,%f10,%f10
3162
3163 xor %g4, %o0, %o0 ! ^= rk[0]
3164 xor %g5, %o1, %o1
3165 xor %g4, %o2, %o2 ! ^= rk[0]
3166 xor %g5, %o3, %o3
3167 .word 0x81b02308 !movxtod %o0,%f0
3168 .word 0x85b02309 !movxtod %o1,%f2
3169 .word 0x89b0230a !movxtod %o2,%f4
3170 .word 0x8db0230b !movxtod %o3,%f6
3171
3172 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
3173 .word 0x85b38d82 !fxor %f14,%f2,%f2
3174 .word 0x89b20d84 !fxor %f8,%f4,%f4 ! ^= tweak[0]
3175 .word 0x8db28d86 !fxor %f10,%f6,%f6
3176
3177 prefetch [%i0 + 32+63], 20
3178 call _aes256_encrypt_2x
3179 add %i0, 32, %i0
3180
3181 .word 0x91b02302 !movxtod %g2,%f8
3182 .word 0x95b02303 !movxtod %g3,%f10
3183
3184 srax %g3, 63, %l7 ! next tweak value
3185 addcc %g2, %g2, %g2
3186 and %l7, 0x87, %l7
3187 .word 0x87b0c223 !addxc %g3,%g3,%g3
3188 xor %l7, %g2, %g2
3189
3190 .word 0x91b20988 !bshuffle %f8,%f8,%f8
3191 .word 0x95b2898a !bshuffle %f10,%f10,%f10
3192
3193 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
3194 .word 0x85b38d82 !fxor %f14,%f2,%f2
3195 .word 0x89b20d84 !fxor %f8,%f4,%f4
3196 .word 0x8db28d86 !fxor %f10,%f6,%f6
3197
3198 subcc %i2, 2, %i2
3199 stda %f0, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
3200 add %i1, 8, %i1
3201 stda %f2, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
3202 add %i1, 8, %i1
3203 stda %f4, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
3204 add %i1, 8, %i1
3205 stda %f6, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
3206 bgu,pt SIZE_T_CC, .L256_xts_enblk2x
3207 add %i1, 8, %i1
3208
3209 add %l5, %i2, %i2
3210 andcc %i2, 1, %g0 ! is number of blocks even?
3211 membar #StoreLoad|#StoreStore
3212 bnz,pt %icc, .L256_xts_enloop
3213 srl %i2, 0, %i2
3214 brnz,pn %i2, .L256_xts_enloop2x
3215 nop
3216
3217 .word 0x81b00f04 !fsrc2 %f0,%f4,%f0
3218 .word 0x85b00f06 !fsrc2 %f0,%f6,%f2
3219 brnz,pn %i5, .L256_xts_ensteal
3220 nop
3221
3222 ret
3223 restore
3224 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3225 .align 32
3226 .L256_xts_ensteal:
3227 std %f0, [%fp + STACK_BIAS-16] ! copy of output
3228 std %f2, [%fp + STACK_BIAS-8]
3229
3230 srl %l0, 3, %l0
3231 add %fp, STACK_BIAS-16, %l7
3232 add %i0, %l0, %i0 ! original %i0+%i2&-15
3233 add %i1, %l2, %i1 ! original %i1+%i2&-15
3234 mov 0, %l0
3235 nop ! align
3236
3237 .L256_xts_enstealing:
3238 ldub [%i0 + %l0], %o0
3239 ldub [%l7 + %l0], %o1
3240 dec %i5
3241 stb %o0, [%l7 + %l0]
3242 stb %o1, [%i1 + %l0]
3243 brnz %i5, .L256_xts_enstealing
3244 inc %l0
3245
3246 mov %l7, %i0
3247 sub %i1, 16, %i1
3248 mov 0, %l0
3249 sub %i1, %l2, %i1
3250 ba .L256_xts_enloop ! one more time
3251 mov 1, %i2 ! %i5 is 0
3252 ret
3253 restore
3254 .type aes256_t4_xts_encrypt,#function
3255 .size aes256_t4_xts_encrypt,.-aes256_t4_xts_encrypt
3256 .globl aes256_t4_xts_decrypt
3257 .align 32
3258 aes256_t4_xts_decrypt:
3259 save %sp, -STACK_FRAME-16, %sp
3260 srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
3261
3262 mov %i5, %o0
3263 add %fp, STACK_BIAS-16, %o1
3264 call aes_t4_encrypt
3265 mov %i4, %o2
3266
3267 add %fp, STACK_BIAS-16, %l7
3268 ldxa [%l7]0x88, %g2
3269 add %fp, STACK_BIAS-8, %l7
3270 ldxa [%l7]0x88, %g3 ! %g3:%g2 is tweak
3271
3272 sethi %hi(0x76543210), %l7
3273 or %l7, %lo(0x76543210), %l7
3274 .word 0x81b5c320 !bmask %l7,%g0,%g0 ! byte swap mask
3275
3276 prefetch [%i0], 20
3277 prefetch [%i0 + 63], 20
3278 call _aes256_load_deckey
3279 and %i2, 15, %i5
3280 and %i2, -16, %i2
3281 mov 0, %l7
3282 movrnz %i5, 16, %l7
3283 sub %i2, %l7, %i2
3284
3285 sub %i0, %i1, %l5 ! %i0!=%i1
3286 and %i0, 7, %l0
3287 andn %i0, 7, %i0
3288 sll %l0, 3, %l0
3289 mov 64, %l1
3290 mov 0xff, %l3
3291 sub %l1, %l0, %l1
3292 and %i1, 7, %l2
3293 cmp %i2, 255
3294 movrnz %l2, 0, %l5 ! if ( %i1&7 ||
3295 movleu SIZE_T_CC, 0, %l5 ! %i2<256 ||
3296 brnz,pn %l5, .L256_xts_deblk ! %i0==%i1)
3297 srl %l3, %l2, %l3
3298
3299 andcc %i2, 16, %g0 ! is number of blocks even?
3300 brz,pn %i2, .L256_xts_desteal
3301 .word 0xb3b64340 !alignaddrl %i1,%g0,%i1
3302 bz %icc, .L256_xts_deloop2x
3303 srlx %i2, 4, %i2
3304 .L256_xts_deloop:
3305 ldx [%i0 + 0], %o0
3306 brz,pt %l0, 4f
3307 ldx [%i0 + 8], %o1
3308
3309 ldx [%i0 + 16], %o2
3310 sllx %o0, %l0, %o0
3311 srlx %o1, %l1, %g1
3312 sllx %o1, %l0, %o1
3313 or %g1, %o0, %o0
3314 srlx %o2, %l1, %o2
3315 or %o2, %o1, %o1
3316 4:
3317 .word 0x99b02302 !movxtod %g2,%f12
3318 .word 0x9db02303 !movxtod %g3,%f14
3319 .word 0x99b3098c !bshuffle %f12,%f12,%f12
3320 .word 0x9db3898e !bshuffle %f14,%f14,%f14
3321
3322 xor %g4, %o0, %o0 ! ^= rk[0]
3323 xor %g5, %o1, %o1
3324 .word 0x81b02308 !movxtod %o0,%f0
3325 .word 0x85b02309 !movxtod %o1,%f2
3326
3327 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
3328 .word 0x85b38d82 !fxor %f14,%f2,%f2
3329
3330 prefetch [%i1 + 63], 22
3331 prefetch [%i0 + 16+63], 20
3332 call _aes256_decrypt_1x
3333 add %i0, 16, %i0
3334
3335 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
3336 .word 0x85b38d82 !fxor %f14,%f2,%f2
3337
3338 srax %g3, 63, %l7 ! next tweak value
3339 addcc %g2, %g2, %g2
3340 and %l7, 0x87, %l7
3341 .word 0x87b0c223 !addxc %g3,%g3,%g3
3342 xor %l7, %g2, %g2
3343
3344 brnz,pn %l2, 2f
3345 sub %i2, 1, %i2
3346
3347 std %f0, [%i1 + 0]
3348 std %f2, [%i1 + 8]
3349 brnz,pt %i2, .L256_xts_deloop2x
3350 add %i1, 16, %i1
3351
3352 brnz,pn %i5, .L256_xts_desteal
3353 nop
3354
3355 ret
3356 restore
3357
3358 .align 16
3359 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
3360 ! and ~3x deterioration
3361 ! in inp==out case
3362 .word 0x89b00900 !faligndata %f0,%f0,%f4 ! handle unaligned output
3363 .word 0x8db00902 !faligndata %f0,%f2,%f6
3364 .word 0x91b08902 !faligndata %f2,%f2,%f8
3365 stda %f4, [%i1 + %l3]0xc0 ! partial store
3366 std %f6, [%i1 + 8]
3367 add %i1, 16, %i1
3368 orn %g0, %l3, %l3
3369 stda %f8, [%i1 + %l3]0xc0 ! partial store
3370
3371 brnz,pt %i2, .L256_xts_deloop2x+4
3372 orn %g0, %l3, %l3
3373
3374 brnz,pn %i5, .L256_xts_desteal
3375 nop
3376
3377 ret
3378 restore
3379
3380 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3381 .align 32
3382 .L256_xts_deloop2x:
3383 ldx [%i0 + 0], %o0
3384 ldx [%i0 + 8], %o1
3385 ldx [%i0 + 16], %o2
3386 brz,pt %l0, 4f
3387 ldx [%i0 + 24], %o3
3388
3389 ldx [%i0 + 32], %o4
3390 sllx %o0, %l0, %o0
3391 srlx %o1, %l1, %g1
3392 or %g1, %o0, %o0
3393 sllx %o1, %l0, %o1
3394 srlx %o2, %l1, %g1
3395 or %g1, %o1, %o1
3396 sllx %o2, %l0, %o2
3397 srlx %o3, %l1, %g1
3398 or %g1, %o2, %o2
3399 sllx %o3, %l0, %o3
3400 srlx %o4, %l1, %o4
3401 or %o4, %o3, %o3
3402 4:
3403 .word 0x99b02302 !movxtod %g2,%f12
3404 .word 0x9db02303 !movxtod %g3,%f14
3405 .word 0x99b3098c !bshuffle %f12,%f12,%f12
3406 .word 0x9db3898e !bshuffle %f14,%f14,%f14
3407
3408 srax %g3, 63, %l7 ! next tweak value
3409 addcc %g2, %g2, %g2
3410 and %l7, 0x87, %l7
3411 .word 0x87b0c223 !addxc %g3,%g3,%g3
3412 xor %l7, %g2, %g2
3413
3414 .word 0x91b02302 !movxtod %g2,%f8
3415 .word 0x95b02303 !movxtod %g3,%f10
3416 .word 0x91b20988 !bshuffle %f8,%f8,%f8
3417 .word 0x95b2898a !bshuffle %f10,%f10,%f10
3418
3419 xor %g4, %o0, %o0 ! ^= rk[0]
3420 xor %g5, %o1, %o1
3421 xor %g4, %o2, %o2 ! ^= rk[0]
3422 xor %g5, %o3, %o3
3423 .word 0x81b02308 !movxtod %o0,%f0
3424 .word 0x85b02309 !movxtod %o1,%f2
3425 .word 0x89b0230a !movxtod %o2,%f4
3426 .word 0x8db0230b !movxtod %o3,%f6
3427
3428 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
3429 .word 0x85b38d82 !fxor %f14,%f2,%f2
3430 .word 0x89b20d84 !fxor %f8,%f4,%f4 ! ^= tweak[0]
3431 .word 0x8db28d86 !fxor %f10,%f6,%f6
3432
3433 prefetch [%i1 + 63], 22
3434 prefetch [%i0 + 32+63], 20
3435 call _aes256_decrypt_2x
3436 add %i0, 32, %i0
3437
3438 .word 0x91b02302 !movxtod %g2,%f8
3439 .word 0x95b02303 !movxtod %g3,%f10
3440
3441 srax %g3, 63, %l7 ! next tweak value
3442 addcc %g2, %g2, %g2
3443 and %l7, 0x87, %l7
3444 .word 0x87b0c223 !addxc %g3,%g3,%g3
3445 xor %l7, %g2, %g2
3446
3447 .word 0x91b20988 !bshuffle %f8,%f8,%f8
3448 .word 0x95b2898a !bshuffle %f10,%f10,%f10
3449
3450 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
3451 .word 0x85b38d82 !fxor %f14,%f2,%f2
3452 .word 0x89b20d84 !fxor %f8,%f4,%f4
3453 .word 0x8db28d86 !fxor %f10,%f6,%f6
3454
3455 brnz,pn %l2, 2f
3456 sub %i2, 2, %i2
3457
3458 std %f0, [%i1 + 0]
3459 std %f2, [%i1 + 8]
3460 std %f4, [%i1 + 16]
3461 std %f6, [%i1 + 24]
3462 brnz,pt %i2, .L256_xts_deloop2x
3463 add %i1, 32, %i1
3464
3465 .word 0x81b00f04 !fsrc2 %f0,%f4,%f0
3466 .word 0x85b00f06 !fsrc2 %f0,%f6,%f2
3467 brnz,pn %i5, .L256_xts_desteal
3468 nop
3469
3470 ret
3471 restore
3472
3473 .align 16
3474 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
3475 ! and ~3x deterioration
3476 ! in inp==out case
3477 .word 0x91b00900 !faligndata %f0,%f0,%f8 ! handle unaligned output
3478 .word 0x95b00902 !faligndata %f0,%f2,%f10
3479 .word 0x99b08904 !faligndata %f2,%f4,%f12
3480 .word 0x9db10906 !faligndata %f4,%f6,%f14
3481 .word 0x81b18906 !faligndata %f6,%f6,%f0
3482
3483 stda %f8, [%i1 + %l3]0xc0 ! partial store
3484 std %f10, [%i1 + 8]
3485 std %f12, [%i1 + 16]
3486 std %f14, [%i1 + 24]
3487 add %i1, 32, %i1
3488 orn %g0, %l3, %l3
3489 stda %f0, [%i1 + %l3]0xc0 ! partial store
3490
3491 brnz,pt %i2, .L256_xts_deloop2x+4
3492 orn %g0, %l3, %l3
3493
3494 .word 0x81b00f04 !fsrc2 %f0,%f4,%f0
3495 .word 0x85b00f06 !fsrc2 %f0,%f6,%f2
3496 brnz,pn %i5, .L256_xts_desteal
3497 nop
3498
3499 ret
3500 restore
3501
3502 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3503 .align 32
3504 .L256_xts_deblk:
3505 add %i1, %i2, %l5
3506 and %l5, 63, %l5 ! tail
3507 sub %i2, %l5, %i2
3508 add %l5, 15, %l5 ! round up to 16n
3509 srlx %i2, 4, %i2
3510 srl %l5, 4, %l5
3511 sub %i2, 1, %i2
3512 add %l5, 1, %l5
3513
3514 .L256_xts_deblk2x:
3515 ldx [%i0 + 0], %o0
3516 ldx [%i0 + 8], %o1
3517 ldx [%i0 + 16], %o2
3518 brz,pt %l0, 5f
3519 ldx [%i0 + 24], %o3
3520
3521 ldx [%i0 + 32], %o4
3522 sllx %o0, %l0, %o0
3523 srlx %o1, %l1, %g1
3524 or %g1, %o0, %o0
3525 sllx %o1, %l0, %o1
3526 srlx %o2, %l1, %g1
3527 or %g1, %o1, %o1
3528 sllx %o2, %l0, %o2
3529 srlx %o3, %l1, %g1
3530 or %g1, %o2, %o2
3531 sllx %o3, %l0, %o3
3532 srlx %o4, %l1, %o4
3533 or %o4, %o3, %o3
3534 5:
3535 .word 0x99b02302 !movxtod %g2,%f12
3536 .word 0x9db02303 !movxtod %g3,%f14
3537 .word 0x99b3098c !bshuffle %f12,%f12,%f12
3538 .word 0x9db3898e !bshuffle %f14,%f14,%f14
3539
3540 srax %g3, 63, %l7 ! next tweak value
3541 addcc %g2, %g2, %g2
3542 and %l7, 0x87, %l7
3543 .word 0x87b0c223 !addxc %g3,%g3,%g3
3544 xor %l7, %g2, %g2
3545
3546 .word 0x91b02302 !movxtod %g2,%f8
3547 .word 0x95b02303 !movxtod %g3,%f10
3548 .word 0x91b20988 !bshuffle %f8,%f8,%f8
3549 .word 0x95b2898a !bshuffle %f10,%f10,%f10
3550
3551 xor %g4, %o0, %o0 ! ^= rk[0]
3552 xor %g5, %o1, %o1
3553 xor %g4, %o2, %o2 ! ^= rk[0]
3554 xor %g5, %o3, %o3
3555 .word 0x81b02308 !movxtod %o0,%f0
3556 .word 0x85b02309 !movxtod %o1,%f2
3557 .word 0x89b0230a !movxtod %o2,%f4
3558 .word 0x8db0230b !movxtod %o3,%f6
3559
3560 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
3561 .word 0x85b38d82 !fxor %f14,%f2,%f2
3562 .word 0x89b20d84 !fxor %f8,%f4,%f4 ! ^= tweak[0]
3563 .word 0x8db28d86 !fxor %f10,%f6,%f6
3564
3565 prefetch [%i0 + 32+63], 20
3566 call _aes256_decrypt_2x
3567 add %i0, 32, %i0
3568
3569 .word 0x91b02302 !movxtod %g2,%f8
3570 .word 0x95b02303 !movxtod %g3,%f10
3571
3572 srax %g3, 63, %l7 ! next tweak value
3573 addcc %g2, %g2, %g2
3574 and %l7, 0x87, %l7
3575 .word 0x87b0c223 !addxc %g3,%g3,%g3
3576 xor %l7, %g2, %g2
3577
3578 .word 0x91b20988 !bshuffle %f8,%f8,%f8
3579 .word 0x95b2898a !bshuffle %f10,%f10,%f10
3580
3581 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
3582 .word 0x85b38d82 !fxor %f14,%f2,%f2
3583 .word 0x89b20d84 !fxor %f8,%f4,%f4
3584 .word 0x8db28d86 !fxor %f10,%f6,%f6
3585
3586 subcc %i2, 2, %i2
3587 stda %f0, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
3588 add %i1, 8, %i1
3589 stda %f2, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
3590 add %i1, 8, %i1
3591 stda %f4, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
3592 add %i1, 8, %i1
3593 stda %f6, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
3594 bgu,pt SIZE_T_CC, .L256_xts_deblk2x
3595 add %i1, 8, %i1
3596
3597 add %l5, %i2, %i2
3598 andcc %i2, 1, %g0 ! is number of blocks even?
3599 membar #StoreLoad|#StoreStore
3600 bnz,pt %icc, .L256_xts_deloop
3601 srl %i2, 0, %i2
3602 brnz,pn %i2, .L256_xts_deloop2x
3603 nop
3604
3605 .word 0x81b00f04 !fsrc2 %f0,%f4,%f0
3606 .word 0x85b00f06 !fsrc2 %f0,%f6,%f2
3607 brnz,pn %i5, .L256_xts_desteal
3608 nop
3609
3610 ret
3611 restore
3612 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3613 .align 32
3614 .L256_xts_desteal:
3615 ldx [%i0 + 0], %o0
3616 brz,pt %l0, 8f
3617 ldx [%i0 + 8], %o1
3618
3619 ldx [%i0 + 16], %o2
3620 sllx %o0, %l0, %o0
3621 srlx %o1, %l1, %g1
3622 sllx %o1, %l0, %o1
3623 or %g1, %o0, %o0
3624 srlx %o2, %l1, %o2
3625 or %o2, %o1, %o1
3626 8:
3627 srax %g3, 63, %l7 ! next tweak value
3628 addcc %g2, %g2, %o2
3629 and %l7, 0x87, %l7
3630 .word 0x97b0c223 !addxc %g3,%g3,%o3
3631 xor %l7, %o2, %o2
3632
3633 .word 0x99b0230a !movxtod %o2,%f12
3634 .word 0x9db0230b !movxtod %o3,%f14
3635 .word 0x99b3098c !bshuffle %f12,%f12,%f12
3636 .word 0x9db3898e !bshuffle %f14,%f14,%f14
3637
3638 xor %g4, %o0, %o0 ! ^= rk[0]
3639 xor %g5, %o1, %o1
3640 .word 0x81b02308 !movxtod %o0,%f0
3641 .word 0x85b02309 !movxtod %o1,%f2
3642
3643 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
3644 .word 0x85b38d82 !fxor %f14,%f2,%f2
3645
3646 call _aes256_decrypt_1x
3647 add %i0, 16, %i0
3648
3649 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
3650 .word 0x85b38d82 !fxor %f14,%f2,%f2
3651
3652 std %f0, [%fp + STACK_BIAS-16]
3653 std %f2, [%fp + STACK_BIAS-8]
3654
3655 srl %l0, 3, %l0
3656 add %fp, STACK_BIAS-16, %l7
3657 add %i0, %l0, %i0 ! original %i0+%i2&-15
3658 add %i1, %l2, %i1 ! original %i1+%i2&-15
3659 mov 0, %l0
3660 add %i1, 16, %i1
3661 nop ! align
3662
3663 .L256_xts_destealing:
3664 ldub [%i0 + %l0], %o0
3665 ldub [%l7 + %l0], %o1
3666 dec %i5
3667 stb %o0, [%l7 + %l0]
3668 stb %o1, [%i1 + %l0]
3669 brnz %i5, .L256_xts_destealing
3670 inc %l0
3671
3672 mov %l7, %i0
3673 sub %i1, 16, %i1
3674 mov 0, %l0
3675 sub %i1, %l2, %i1
3676 ba .L256_xts_deloop ! one more time
3677 mov 1, %i2 ! %i5 is 0
3678 ret
3679 restore
3680 .type aes256_t4_xts_decrypt,#function
3681 .size aes256_t4_xts_decrypt,.-aes256_t4_xts_decrypt
3682 .globl aes192_t4_ctr32_encrypt
3683 .align 32
3684 aes192_t4_ctr32_encrypt:
3685 save %sp, -STACK_FRAME, %sp
3686 srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
3687
3688 prefetch [%i0], 20
3689 prefetch [%i0 + 63], 20
3690 call _aes192_load_enckey
3691 sllx %i2, 4, %i2
3692
3693 ld [%i4 + 0], %l4 ! counter
3694 ld [%i4 + 4], %l5
3695 ld [%i4 + 8], %l6
3696 ld [%i4 + 12], %l7
3697
3698 sllx %l4, 32, %o5
3699 or %l5, %o5, %o5
3700 sllx %l6, 32, %g1
3701 xor %o5, %g4, %g4 ! ^= rk[0]
3702 xor %g1, %g5, %g5
3703 .word 0x9db02304 !movxtod %g4,%f14 ! most significant 64 bits
3704
3705 sub %i0, %i1, %l5 ! %i0!=%i1
3706 and %i0, 7, %l0
3707 andn %i0, 7, %i0
3708 sll %l0, 3, %l0
3709 mov 64, %l1
3710 mov 0xff, %l3
3711 sub %l1, %l0, %l1
3712 and %i1, 7, %l2
3713 cmp %i2, 255
3714 movrnz %l2, 0, %l5 ! if ( %i1&7 ||
3715 movleu SIZE_T_CC, 0, %l5 ! %i2<256 ||
3716 brnz,pn %l5, .L192_ctr32_blk ! %i0==%i1)
3717 srl %l3, %l2, %l3
3718
3719 andcc %i2, 16, %g0 ! is number of blocks even?
3720 .word 0xb3b64340 !alignaddrl %i1,%g0,%i1
3721 bz %icc, .L192_ctr32_loop2x
3722 srlx %i2, 4, %i2
3723 .L192_ctr32_loop:
3724 ldx [%i0 + 0], %o0
3725 brz,pt %l0, 4f
3726 ldx [%i0 + 8], %o1
3727
3728 ldx [%i0 + 16], %o2
3729 sllx %o0, %l0, %o0
3730 srlx %o1, %l1, %g1
3731 sllx %o1, %l0, %o1
3732 or %g1, %o0, %o0
3733 srlx %o2, %l1, %o2
3734 or %o2, %o1, %o1
3735 4:
3736 xor %g5, %l7, %g1 ! ^= rk[0]
3737 add %l7, 1, %l7
3738 .word 0x85b02301 !movxtod %g1,%f2
3739 srl %l7, 0, %l7 ! clruw
3740 prefetch [%i1 + 63], 22
3741 prefetch [%i0 + 16+63], 20
3742 .word 0x88cc040e !aes_eround01 %f16,%f14,%f2,%f4
3743 .word 0x84cc842e !aes_eround23 %f18,%f14,%f2,%f2
3744 call _aes192_encrypt_1x+8
3745 add %i0, 16, %i0
3746
3747 .word 0x95b02308 !movxtod %o0,%f10
3748 .word 0x99b02309 !movxtod %o1,%f12
3749 .word 0x81b28d80 !fxor %f10,%f0,%f0 ! ^= inp
3750 .word 0x85b30d82 !fxor %f12,%f2,%f2
3751
3752 brnz,pn %l2, 2f
3753 sub %i2, 1, %i2
3754
3755 std %f0, [%i1 + 0]
3756 std %f2, [%i1 + 8]
3757 brnz,pt %i2, .L192_ctr32_loop2x
3758 add %i1, 16, %i1
3759
3760 ret
3761 restore
3762
3763 .align 16
3764 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
3765 ! and ~3x deterioration
3766 ! in inp==out case
3767 .word 0x89b00900 !faligndata %f0,%f0,%f4 ! handle unaligned output
3768 .word 0x8db00902 !faligndata %f0,%f2,%f6
3769 .word 0x91b08902 !faligndata %f2,%f2,%f8
3770 stda %f4, [%i1 + %l3]0xc0 ! partial store
3771 std %f6, [%i1 + 8]
3772 add %i1, 16, %i1
3773 orn %g0, %l3, %l3
3774 stda %f8, [%i1 + %l3]0xc0 ! partial store
3775
3776 brnz,pt %i2, .L192_ctr32_loop2x+4
3777 orn %g0, %l3, %l3
3778
3779 ret
3780 restore
3781
3782 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3783 .align 32
3784 .L192_ctr32_loop2x:
3785 ldx [%i0 + 0], %o0
3786 ldx [%i0 + 8], %o1
3787 ldx [%i0 + 16], %o2
3788 brz,pt %l0, 4f
3789 ldx [%i0 + 24], %o3
3790
3791 ldx [%i0 + 32], %o4
3792 sllx %o0, %l0, %o0
3793 srlx %o1, %l1, %g1
3794 or %g1, %o0, %o0
3795 sllx %o1, %l0, %o1
3796 srlx %o2, %l1, %g1
3797 or %g1, %o1, %o1
3798 sllx %o2, %l0, %o2
3799 srlx %o3, %l1, %g1
3800 or %g1, %o2, %o2
3801 sllx %o3, %l0, %o3
3802 srlx %o4, %l1, %o4
3803 or %o4, %o3, %o3
3804 4:
3805 xor %g5, %l7, %g1 ! ^= rk[0]
3806 add %l7, 1, %l7
3807 .word 0x85b02301 !movxtod %g1,%f2
3808 srl %l7, 0, %l7 ! clruw
3809 xor %g5, %l7, %g1
3810 add %l7, 1, %l7
3811 .word 0x8db02301 !movxtod %g1,%f6
3812 srl %l7, 0, %l7 ! clruw
3813 prefetch [%i1 + 63], 22
3814 prefetch [%i0 + 32+63], 20
3815 .word 0x90cc040e !aes_eround01 %f16,%f14,%f2,%f8
3816 .word 0x84cc842e !aes_eround23 %f18,%f14,%f2,%f2
3817 .word 0x94cc0c0e !aes_eround01 %f16,%f14,%f6,%f10
3818 .word 0x8ccc8c2e !aes_eround23 %f18,%f14,%f6,%f6
3819 call _aes192_encrypt_2x+16
3820 add %i0, 32, %i0
3821
3822 .word 0x91b02308 !movxtod %o0,%f8
3823 .word 0x95b02309 !movxtod %o1,%f10
3824 .word 0x99b0230a !movxtod %o2,%f12
3825 .word 0x81b20d80 !fxor %f8,%f0,%f0 ! ^= inp
3826 .word 0x91b0230b !movxtod %o3,%f8
3827 .word 0x85b28d82 !fxor %f10,%f2,%f2
3828 .word 0x89b30d84 !fxor %f12,%f4,%f4
3829 .word 0x8db20d86 !fxor %f8,%f6,%f6
3830
3831 brnz,pn %l2, 2f
3832 sub %i2, 2, %i2
3833
3834 std %f0, [%i1 + 0]
3835 std %f2, [%i1 + 8]
3836 std %f4, [%i1 + 16]
3837 std %f6, [%i1 + 24]
3838 brnz,pt %i2, .L192_ctr32_loop2x
3839 add %i1, 32, %i1
3840
3841 ret
3842 restore
3843
3844 .align 16
3845 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
3846 ! and ~3x deterioration
3847 ! in inp==out case
3848 .word 0x91b00900 !faligndata %f0,%f0,%f8 ! handle unaligned output
3849 .word 0x81b00902 !faligndata %f0,%f2,%f0
3850 .word 0x85b08904 !faligndata %f2,%f4,%f2
3851 .word 0x89b10906 !faligndata %f4,%f6,%f4
3852 .word 0x8db18906 !faligndata %f6,%f6,%f6
3853
3854 stda %f8, [%i1 + %l3]0xc0 ! partial store
3855 std %f0, [%i1 + 8]
3856 std %f2, [%i1 + 16]
3857 std %f4, [%i1 + 24]
3858 add %i1, 32, %i1
3859 orn %g0, %l3, %l3
3860 stda %f6, [%i1 + %l3]0xc0 ! partial store
3861
3862 brnz,pt %i2, .L192_ctr32_loop2x+4
3863 orn %g0, %l3, %l3
3864
3865 ret
3866 restore
3867
3868 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3869 .align 32
3870 .L192_ctr32_blk:
3871 add %i1, %i2, %l5
3872 and %l5, 63, %l5 ! tail
3873 sub %i2, %l5, %i2
3874 add %l5, 15, %l5 ! round up to 16n
3875 srlx %i2, 4, %i2
3876 srl %l5, 4, %l5
3877 sub %i2, 1, %i2
3878 add %l5, 1, %l5
3879
3880 .L192_ctr32_blk_loop2x:
3881 ldx [%i0 + 0], %o0
3882 ldx [%i0 + 8], %o1
3883 ldx [%i0 + 16], %o2
3884 brz,pt %l0, 5f
3885 ldx [%i0 + 24], %o3
3886
3887 ldx [%i0 + 32], %o4
3888 sllx %o0, %l0, %o0
3889 srlx %o1, %l1, %g1
3890 or %g1, %o0, %o0
3891 sllx %o1, %l0, %o1
3892 srlx %o2, %l1, %g1
3893 or %g1, %o1, %o1
3894 sllx %o2, %l0, %o2
3895 srlx %o3, %l1, %g1
3896 or %g1, %o2, %o2
3897 sllx %o3, %l0, %o3
3898 srlx %o4, %l1, %o4
3899 or %o4, %o3, %o3
3900 5:
3901 xor %g5, %l7, %g1 ! ^= rk[0]
3902 add %l7, 1, %l7
3903 .word 0x85b02301 !movxtod %g1,%f2
3904 srl %l7, 0, %l7 ! clruw
3905 xor %g5, %l7, %g1
3906 add %l7, 1, %l7
3907 .word 0x8db02301 !movxtod %g1,%f6
3908 srl %l7, 0, %l7 ! clruw
3909 prefetch [%i0 + 32+63], 20
3910 .word 0x90cc040e !aes_eround01 %f16,%f14,%f2,%f8
3911 .word 0x84cc842e !aes_eround23 %f18,%f14,%f2,%f2
3912 .word 0x94cc0c0e !aes_eround01 %f16,%f14,%f6,%f10
3913 .word 0x8ccc8c2e !aes_eround23 %f18,%f14,%f6,%f6
3914 call _aes192_encrypt_2x+16
3915 add %i0, 32, %i0
3916 subcc %i2, 2, %i2
3917
3918 .word 0x91b02308 !movxtod %o0,%f8
3919 .word 0x95b02309 !movxtod %o1,%f10
3920 .word 0x99b0230a !movxtod %o2,%f12
3921 .word 0x81b20d80 !fxor %f8,%f0,%f0 ! ^= inp
3922 .word 0x91b0230b !movxtod %o3,%f8
3923 .word 0x85b28d82 !fxor %f10,%f2,%f2
3924 .word 0x89b30d84 !fxor %f12,%f4,%f4
3925 .word 0x8db20d86 !fxor %f8,%f6,%f6
3926
3927 stda %f0, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
3928 add %i1, 8, %i1
3929 stda %f2, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
3930 add %i1, 8, %i1
3931 stda %f4, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
3932 add %i1, 8, %i1
3933 stda %f6, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
3934 bgu,pt SIZE_T_CC, .L192_ctr32_blk_loop2x
3935 add %i1, 8, %i1
3936
3937 add %l5, %i2, %i2
3938 andcc %i2, 1, %g0 ! is number of blocks even?
3939 membar #StoreLoad|#StoreStore
3940 bnz,pt %icc, .L192_ctr32_loop
3941 srl %i2, 0, %i2
3942 brnz,pn %i2, .L192_ctr32_loop2x
3943 nop
3944
3945 ret
3946 restore
3947 .type aes192_t4_ctr32_encrypt,#function
3948 .size aes192_t4_ctr32_encrypt,.-aes192_t4_ctr32_encrypt
3949 .globl aes192_t4_cbc_decrypt
3950 .align 32
3951 aes192_t4_cbc_decrypt:
3952 save %sp, -STACK_FRAME, %sp
3953 cmp %i2, 0
3954 be,pn SIZE_T_CC, .L192_cbc_dec_abort
3955 srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
3956 sub %i0, %i1, %l5 ! %i0!=%i1
3957 ld [%i4 + 0], %f12 ! load ivec
3958 ld [%i4 + 4], %f13
3959 ld [%i4 + 8], %f14
3960 ld [%i4 + 12], %f15
3961 prefetch [%i0], 20
3962 prefetch [%i0 + 63], 20
3963 call _aes192_load_deckey
3964 and %i0, 7, %l0
3965 andn %i0, 7, %i0
3966 sll %l0, 3, %l0
3967 mov 64, %l1
3968 mov 0xff, %l3
3969 sub %l1, %l0, %l1
3970 and %i1, 7, %l2
3971 cmp %i2, 255
3972 movrnz %l2, 0, %l5 ! if ( %i1&7 ||
3973 movleu SIZE_T_CC, 0, %l5 ! %i2<256 ||
3974 brnz,pn %l5, .L192cbc_dec_blk ! %i0==%i1)
3975 srl %l3, %l2, %l3
3976
3977 andcc %i2, 16, %g0 ! is number of blocks even?
3978 srlx %i2, 4, %i2
3979 .word 0xb3b64340 !alignaddrl %i1,%g0,%i1
3980 bz %icc, .L192_cbc_dec_loop2x
3981 prefetch [%i1], 22
3982 .L192_cbc_dec_loop:
3983 ldx [%i0 + 0], %o0
3984 brz,pt %l0, 4f
3985 ldx [%i0 + 8], %o1
3986
3987 ldx [%i0 + 16], %o2
3988 sllx %o0, %l0, %o0
3989 srlx %o1, %l1, %g1
3990 sllx %o1, %l0, %o1
3991 or %g1, %o0, %o0
3992 srlx %o2, %l1, %o2
3993 or %o2, %o1, %o1
3994 4:
3995 xor %g4, %o0, %o2 ! ^= rk[0]
3996 xor %g5, %o1, %o3
3997 .word 0x81b0230a !movxtod %o2,%f0
3998 .word 0x85b0230b !movxtod %o3,%f2
3999
4000 prefetch [%i1 + 63], 22
4001 prefetch [%i0 + 16+63], 20
4002 call _aes192_decrypt_1x
4003 add %i0, 16, %i0
4004
4005 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= ivec
4006 .word 0x85b38d82 !fxor %f14,%f2,%f2
4007 .word 0x99b02308 !movxtod %o0,%f12
4008 .word 0x9db02309 !movxtod %o1,%f14
4009
4010 brnz,pn %l2, 2f
4011 sub %i2, 1, %i2
4012
4013 std %f0, [%i1 + 0]
4014 std %f2, [%i1 + 8]
4015 brnz,pt %i2, .L192_cbc_dec_loop2x
4016 add %i1, 16, %i1
4017 st %f12, [%i4 + 0]
4018 st %f13, [%i4 + 4]
4019 st %f14, [%i4 + 8]
4020 st %f15, [%i4 + 12]
4021 .L192_cbc_dec_abort:
4022 ret
4023 restore
4024
4025 .align 16
4026 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
4027 ! and ~3x deterioration
4028 ! in inp==out case
4029 .word 0x89b00900 !faligndata %f0,%f0,%f4 ! handle unaligned output
4030 .word 0x8db00902 !faligndata %f0,%f2,%f6
4031 .word 0x91b08902 !faligndata %f2,%f2,%f8
4032
4033 stda %f4, [%i1 + %l3]0xc0 ! partial store
4034 std %f6, [%i1 + 8]
4035 add %i1, 16, %i1
4036 orn %g0, %l3, %l3
4037 stda %f8, [%i1 + %l3]0xc0 ! partial store
4038
4039 brnz,pt %i2, .L192_cbc_dec_loop2x+4
4040 orn %g0, %l3, %l3
4041 st %f12, [%i4 + 0]
4042 st %f13, [%i4 + 4]
4043 st %f14, [%i4 + 8]
4044 st %f15, [%i4 + 12]
4045 ret
4046 restore
4047
4048 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
4049 .align 32
4050 .L192_cbc_dec_loop2x:
4051 ldx [%i0 + 0], %o0
4052 ldx [%i0 + 8], %o1
4053 ldx [%i0 + 16], %o2
4054 brz,pt %l0, 4f
4055 ldx [%i0 + 24], %o3
4056
4057 ldx [%i0 + 32], %o4
4058 sllx %o0, %l0, %o0
4059 srlx %o1, %l1, %g1
4060 or %g1, %o0, %o0
4061 sllx %o1, %l0, %o1
4062 srlx %o2, %l1, %g1
4063 or %g1, %o1, %o1
4064 sllx %o2, %l0, %o2
4065 srlx %o3, %l1, %g1
4066 or %g1, %o2, %o2
4067 sllx %o3, %l0, %o3
4068 srlx %o4, %l1, %o4
4069 or %o4, %o3, %o3
4070 4:
4071 xor %g4, %o0, %o4 ! ^= rk[0]
4072 xor %g5, %o1, %o5
4073 .word 0x81b0230c !movxtod %o4,%f0
4074 .word 0x85b0230d !movxtod %o5,%f2
4075 xor %g4, %o2, %o4
4076 xor %g5, %o3, %o5
4077 .word 0x89b0230c !movxtod %o4,%f4
4078 .word 0x8db0230d !movxtod %o5,%f6
4079
4080 prefetch [%i1 + 63], 22
4081 prefetch [%i0 + 32+63], 20
4082 call _aes192_decrypt_2x
4083 add %i0, 32, %i0
4084
4085 .word 0x91b02308 !movxtod %o0,%f8
4086 .word 0x95b02309 !movxtod %o1,%f10
4087 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= ivec
4088 .word 0x85b38d82 !fxor %f14,%f2,%f2
4089 .word 0x99b0230a !movxtod %o2,%f12
4090 .word 0x9db0230b !movxtod %o3,%f14
4091 .word 0x89b20d84 !fxor %f8,%f4,%f4
4092 .word 0x8db28d86 !fxor %f10,%f6,%f6
4093
4094 brnz,pn %l2, 2f
4095 sub %i2, 2, %i2
4096
4097 std %f0, [%i1 + 0]
4098 std %f2, [%i1 + 8]
4099 std %f4, [%i1 + 16]
4100 std %f6, [%i1 + 24]
4101 brnz,pt %i2, .L192_cbc_dec_loop2x
4102 add %i1, 32, %i1
4103 st %f12, [%i4 + 0]
4104 st %f13, [%i4 + 4]
4105 st %f14, [%i4 + 8]
4106 st %f15, [%i4 + 12]
4107 ret
4108 restore
4109
4110 .align 16
4111 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
4112 ! and ~3x deterioration
4113 ! in inp==out case
4114 .word 0x91b00900 !faligndata %f0,%f0,%f8 ! handle unaligned output
4115 .word 0x81b00902 !faligndata %f0,%f2,%f0
4116 .word 0x85b08904 !faligndata %f2,%f4,%f2
4117 .word 0x89b10906 !faligndata %f4,%f6,%f4
4118 .word 0x8db18906 !faligndata %f6,%f6,%f6
4119 stda %f8, [%i1 + %l3]0xc0 ! partial store
4120 std %f0, [%i1 + 8]
4121 std %f2, [%i1 + 16]
4122 std %f4, [%i1 + 24]
4123 add %i1, 32, %i1
4124 orn %g0, %l3, %l3
4125 stda %f6, [%i1 + %l3]0xc0 ! partial store
4126
4127 brnz,pt %i2, .L192_cbc_dec_loop2x+4
4128 orn %g0, %l3, %l3
4129 st %f12, [%i4 + 0]
4130 st %f13, [%i4 + 4]
4131 st %f14, [%i4 + 8]
4132 st %f15, [%i4 + 12]
4133 ret
4134 restore
4135
4136 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
4137 .align 32
4138 .L192cbc_dec_blk:
4139 add %i1, %i2, %l5
4140 and %l5, 63, %l5 ! tail
4141 sub %i2, %l5, %i2
4142 add %l5, 15, %l5 ! round up to 16n
4143 srlx %i2, 4, %i2
4144 srl %l5, 4, %l5
4145 sub %i2, 1, %i2
4146 add %l5, 1, %l5
4147
4148 .L192_cbc_dec_blk_loop2x:
4149 ldx [%i0 + 0], %o0
4150 ldx [%i0 + 8], %o1
4151 ldx [%i0 + 16], %o2
4152 brz,pt %l0, 5f
4153 ldx [%i0 + 24], %o3
4154
4155 ldx [%i0 + 32], %o4
4156 sllx %o0, %l0, %o0
4157 srlx %o1, %l1, %g1
4158 or %g1, %o0, %o0
4159 sllx %o1, %l0, %o1
4160 srlx %o2, %l1, %g1
4161 or %g1, %o1, %o1
4162 sllx %o2, %l0, %o2
4163 srlx %o3, %l1, %g1
4164 or %g1, %o2, %o2
4165 sllx %o3, %l0, %o3
4166 srlx %o4, %l1, %o4
4167 or %o4, %o3, %o3
4168 5:
4169 xor %g4, %o0, %o4 ! ^= rk[0]
4170 xor %g5, %o1, %o5
4171 .word 0x81b0230c !movxtod %o4,%f0
4172 .word 0x85b0230d !movxtod %o5,%f2
4173 xor %g4, %o2, %o4
4174 xor %g5, %o3, %o5
4175 .word 0x89b0230c !movxtod %o4,%f4
4176 .word 0x8db0230d !movxtod %o5,%f6
4177
4178 prefetch [%i0 + 32+63], 20
4179 call _aes192_decrypt_2x
4180 add %i0, 32, %i0
4181 subcc %i2, 2, %i2
4182
4183 .word 0x91b02308 !movxtod %o0,%f8
4184 .word 0x95b02309 !movxtod %o1,%f10
4185 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= ivec
4186 .word 0x85b38d82 !fxor %f14,%f2,%f2
4187 .word 0x99b0230a !movxtod %o2,%f12
4188 .word 0x9db0230b !movxtod %o3,%f14
4189 .word 0x89b20d84 !fxor %f8,%f4,%f4
4190 .word 0x8db28d86 !fxor %f10,%f6,%f6
4191
4192 stda %f0, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
4193 add %i1, 8, %i1
4194 stda %f2, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
4195 add %i1, 8, %i1
4196 stda %f4, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
4197 add %i1, 8, %i1
4198 stda %f6, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
4199 bgu,pt SIZE_T_CC, .L192_cbc_dec_blk_loop2x
4200 add %i1, 8, %i1
4201
4202 add %l5, %i2, %i2
4203 andcc %i2, 1, %g0 ! is number of blocks even?
4204 membar #StoreLoad|#StoreStore
4205 bnz,pt %icc, .L192_cbc_dec_loop
4206 srl %i2, 0, %i2
4207 brnz,pn %i2, .L192_cbc_dec_loop2x
4208 nop
4209 st %f12, [%i4 + 0] ! write out ivec
4210 st %f13, [%i4 + 4]
4211 st %f14, [%i4 + 8]
4212 st %f15, [%i4 + 12]
4213 ret
4214 restore
4215 .type aes192_t4_cbc_decrypt,#function
4216 .size aes192_t4_cbc_decrypt,.-aes192_t4_cbc_decrypt
4217 .globl aes256_t4_cbc_decrypt
4218 .align 32
4219 aes256_t4_cbc_decrypt:
4220 save %sp, -STACK_FRAME, %sp
4221 cmp %i2, 0
4222 be,pn SIZE_T_CC, .L256_cbc_dec_abort
4223 srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
4224 sub %i0, %i1, %l5 ! %i0!=%i1
4225 ld [%i4 + 0], %f12 ! load ivec
4226 ld [%i4 + 4], %f13
4227 ld [%i4 + 8], %f14
4228 ld [%i4 + 12], %f15
4229 prefetch [%i0], 20
4230 prefetch [%i0 + 63], 20
4231 call _aes256_load_deckey
4232 and %i0, 7, %l0
4233 andn %i0, 7, %i0
4234 sll %l0, 3, %l0
4235 mov 64, %l1
4236 mov 0xff, %l3
4237 sub %l1, %l0, %l1
4238 and %i1, 7, %l2
4239 cmp %i2, 255
4240 movrnz %l2, 0, %l5 ! if ( %i1&7 ||
4241 movleu SIZE_T_CC, 0, %l5 ! %i2<256 ||
4242 brnz,pn %l5, .L256cbc_dec_blk ! %i0==%i1)
4243 srl %l3, %l2, %l3
4244
4245 andcc %i2, 16, %g0 ! is number of blocks even?
4246 srlx %i2, 4, %i2
4247 .word 0xb3b64340 !alignaddrl %i1,%g0,%i1
4248 bz %icc, .L256_cbc_dec_loop2x
4249 prefetch [%i1], 22
4250 .L256_cbc_dec_loop:
4251 ldx [%i0 + 0], %o0
4252 brz,pt %l0, 4f
4253 ldx [%i0 + 8], %o1
4254
4255 ldx [%i0 + 16], %o2
4256 sllx %o0, %l0, %o0
4257 srlx %o1, %l1, %g1
4258 sllx %o1, %l0, %o1
4259 or %g1, %o0, %o0
4260 srlx %o2, %l1, %o2
4261 or %o2, %o1, %o1
4262 4:
4263 xor %g4, %o0, %o2 ! ^= rk[0]
4264 xor %g5, %o1, %o3
4265 .word 0x81b0230a !movxtod %o2,%f0
4266 .word 0x85b0230b !movxtod %o3,%f2
4267
4268 prefetch [%i1 + 63], 22
4269 prefetch [%i0 + 16+63], 20
4270 call _aes256_decrypt_1x
4271 add %i0, 16, %i0
4272
4273 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= ivec
4274 .word 0x85b38d82 !fxor %f14,%f2,%f2
4275 .word 0x99b02308 !movxtod %o0,%f12
4276 .word 0x9db02309 !movxtod %o1,%f14
4277
4278 brnz,pn %l2, 2f
4279 sub %i2, 1, %i2
4280
4281 std %f0, [%i1 + 0]
4282 std %f2, [%i1 + 8]
4283 brnz,pt %i2, .L256_cbc_dec_loop2x
4284 add %i1, 16, %i1
4285 st %f12, [%i4 + 0]
4286 st %f13, [%i4 + 4]
4287 st %f14, [%i4 + 8]
4288 st %f15, [%i4 + 12]
4289 .L256_cbc_dec_abort:
4290 ret
4291 restore
4292
4293 .align 16
4294 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
4295 ! and ~3x deterioration
4296 ! in inp==out case
4297 .word 0x89b00900 !faligndata %f0,%f0,%f4 ! handle unaligned output
4298 .word 0x8db00902 !faligndata %f0,%f2,%f6
4299 .word 0x91b08902 !faligndata %f2,%f2,%f8
4300
4301 stda %f4, [%i1 + %l3]0xc0 ! partial store
4302 std %f6, [%i1 + 8]
4303 add %i1, 16, %i1
4304 orn %g0, %l3, %l3
4305 stda %f8, [%i1 + %l3]0xc0 ! partial store
4306
4307 brnz,pt %i2, .L256_cbc_dec_loop2x+4
4308 orn %g0, %l3, %l3
4309 st %f12, [%i4 + 0]
4310 st %f13, [%i4 + 4]
4311 st %f14, [%i4 + 8]
4312 st %f15, [%i4 + 12]
4313 ret
4314 restore
4315
4316 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
4317 .align 32
4318 .L256_cbc_dec_loop2x:
4319 ldx [%i0 + 0], %o0
4320 ldx [%i0 + 8], %o1
4321 ldx [%i0 + 16], %o2
4322 brz,pt %l0, 4f
4323 ldx [%i0 + 24], %o3
4324
4325 ldx [%i0 + 32], %o4
4326 sllx %o0, %l0, %o0
4327 srlx %o1, %l1, %g1
4328 or %g1, %o0, %o0
4329 sllx %o1, %l0, %o1
4330 srlx %o2, %l1, %g1
4331 or %g1, %o1, %o1
4332 sllx %o2, %l0, %o2
4333 srlx %o3, %l1, %g1
4334 or %g1, %o2, %o2
4335 sllx %o3, %l0, %o3
4336 srlx %o4, %l1, %o4
4337 or %o4, %o3, %o3
4338 4:
4339 xor %g4, %o0, %o4 ! ^= rk[0]
4340 xor %g5, %o1, %o5
4341 .word 0x81b0230c !movxtod %o4,%f0
4342 .word 0x85b0230d !movxtod %o5,%f2
4343 xor %g4, %o2, %o4
4344 xor %g5, %o3, %o5
4345 .word 0x89b0230c !movxtod %o4,%f4
4346 .word 0x8db0230d !movxtod %o5,%f6
4347
4348 prefetch [%i1 + 63], 22
4349 prefetch [%i0 + 32+63], 20
4350 call _aes256_decrypt_2x
4351 add %i0, 32, %i0
4352
4353 .word 0x91b02308 !movxtod %o0,%f8
4354 .word 0x95b02309 !movxtod %o1,%f10
4355 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= ivec
4356 .word 0x85b38d82 !fxor %f14,%f2,%f2
4357 .word 0x99b0230a !movxtod %o2,%f12
4358 .word 0x9db0230b !movxtod %o3,%f14
4359 .word 0x89b20d84 !fxor %f8,%f4,%f4
4360 .word 0x8db28d86 !fxor %f10,%f6,%f6
4361
4362 brnz,pn %l2, 2f
4363 sub %i2, 2, %i2
4364
4365 std %f0, [%i1 + 0]
4366 std %f2, [%i1 + 8]
4367 std %f4, [%i1 + 16]
4368 std %f6, [%i1 + 24]
4369 brnz,pt %i2, .L256_cbc_dec_loop2x
4370 add %i1, 32, %i1
4371 st %f12, [%i4 + 0]
4372 st %f13, [%i4 + 4]
4373 st %f14, [%i4 + 8]
4374 st %f15, [%i4 + 12]
4375 ret
4376 restore
4377
4378 .align 16
4379 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
4380 ! and ~3x deterioration
4381 ! in inp==out case
4382 .word 0x91b00900 !faligndata %f0,%f0,%f8 ! handle unaligned output
4383 .word 0x81b00902 !faligndata %f0,%f2,%f0
4384 .word 0x85b08904 !faligndata %f2,%f4,%f2
4385 .word 0x89b10906 !faligndata %f4,%f6,%f4
4386 .word 0x8db18906 !faligndata %f6,%f6,%f6
4387 stda %f8, [%i1 + %l3]0xc0 ! partial store
4388 std %f0, [%i1 + 8]
4389 std %f2, [%i1 + 16]
4390 std %f4, [%i1 + 24]
4391 add %i1, 32, %i1
4392 orn %g0, %l3, %l3
4393 stda %f6, [%i1 + %l3]0xc0 ! partial store
4394
4395 brnz,pt %i2, .L256_cbc_dec_loop2x+4
4396 orn %g0, %l3, %l3
4397 st %f12, [%i4 + 0]
4398 st %f13, [%i4 + 4]
4399 st %f14, [%i4 + 8]
4400 st %f15, [%i4 + 12]
4401 ret
4402 restore
4403
4404 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
4405 .align 32
4406 .L256cbc_dec_blk:
4407 add %i1, %i2, %l5
4408 and %l5, 63, %l5 ! tail
4409 sub %i2, %l5, %i2
4410 add %l5, 15, %l5 ! round up to 16n
4411 srlx %i2, 4, %i2
4412 srl %l5, 4, %l5
4413 sub %i2, 1, %i2
4414 add %l5, 1, %l5
4415
4416 .L256_cbc_dec_blk_loop2x:
4417 ldx [%i0 + 0], %o0
4418 ldx [%i0 + 8], %o1
4419 ldx [%i0 + 16], %o2
4420 brz,pt %l0, 5f
4421 ldx [%i0 + 24], %o3
4422
4423 ldx [%i0 + 32], %o4
4424 sllx %o0, %l0, %o0
4425 srlx %o1, %l1, %g1
4426 or %g1, %o0, %o0
4427 sllx %o1, %l0, %o1
4428 srlx %o2, %l1, %g1
4429 or %g1, %o1, %o1
4430 sllx %o2, %l0, %o2
4431 srlx %o3, %l1, %g1
4432 or %g1, %o2, %o2
4433 sllx %o3, %l0, %o3
4434 srlx %o4, %l1, %o4
4435 or %o4, %o3, %o3
4436 5:
4437 xor %g4, %o0, %o4 ! ^= rk[0]
4438 xor %g5, %o1, %o5
4439 .word 0x81b0230c !movxtod %o4,%f0
4440 .word 0x85b0230d !movxtod %o5,%f2
4441 xor %g4, %o2, %o4
4442 xor %g5, %o3, %o5
4443 .word 0x89b0230c !movxtod %o4,%f4
4444 .word 0x8db0230d !movxtod %o5,%f6
4445
4446 prefetch [%i0 + 32+63], 20
4447 call _aes256_decrypt_2x
4448 add %i0, 32, %i0
4449 subcc %i2, 2, %i2
4450
4451 .word 0x91b02308 !movxtod %o0,%f8
4452 .word 0x95b02309 !movxtod %o1,%f10
4453 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= ivec
4454 .word 0x85b38d82 !fxor %f14,%f2,%f2
4455 .word 0x99b0230a !movxtod %o2,%f12
4456 .word 0x9db0230b !movxtod %o3,%f14
4457 .word 0x89b20d84 !fxor %f8,%f4,%f4
4458 .word 0x8db28d86 !fxor %f10,%f6,%f6
4459
4460 stda %f0, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
4461 add %i1, 8, %i1
4462 stda %f2, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
4463 add %i1, 8, %i1
4464 stda %f4, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
4465 add %i1, 8, %i1
4466 stda %f6, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
4467 bgu,pt SIZE_T_CC, .L256_cbc_dec_blk_loop2x
4468 add %i1, 8, %i1
4469
4470 add %l5, %i2, %i2
4471 andcc %i2, 1, %g0 ! is number of blocks even?
4472 membar #StoreLoad|#StoreStore
4473 bnz,pt %icc, .L256_cbc_dec_loop
4474 srl %i2, 0, %i2
4475 brnz,pn %i2, .L256_cbc_dec_loop2x
4476 nop
4477 st %f12, [%i4 + 0] ! write out ivec
4478 st %f13, [%i4 + 4]
4479 st %f14, [%i4 + 8]
4480 st %f15, [%i4 + 12]
4481 ret
4482 restore
4483 .type aes256_t4_cbc_decrypt,#function
4484 .size aes256_t4_cbc_decrypt,.-aes256_t4_cbc_decrypt
4485 .align 32
4486 _aes256_decrypt_1x:
4487 .word 0x88cc0440 !aes_dround01 %f16,%f0,%f2,%f4
4488 .word 0x84cc8460 !aes_dround23 %f18,%f0,%f2,%f2
4489 ldd [%i3 + 208], %f16
4490 ldd [%i3 + 216], %f18
4491 .word 0x80cd0444 !aes_dround01 %f20,%f4,%f2,%f0
4492 .word 0x84cd8464 !aes_dround23 %f22,%f4,%f2,%f2
4493 ldd [%i3 + 224], %f20
4494 ldd [%i3 + 232], %f22
4495 .word 0x88ce0440 !aes_dround01 %f24,%f0,%f2,%f4
4496 .word 0x84ce8460 !aes_dround23 %f26,%f0,%f2,%f2
4497 .word 0x80cf0444 !aes_dround01 %f28,%f4,%f2,%f0
4498 .word 0x84cf8464 !aes_dround23 %f30,%f4,%f2,%f2
4499 .word 0x88c84440 !aes_dround01 %f32,%f0,%f2,%f4
4500 .word 0x84c8c460 !aes_dround23 %f34,%f0,%f2,%f2
4501 .word 0x80c94444 !aes_dround01 %f36,%f4,%f2,%f0
4502 .word 0x84c9c464 !aes_dround23 %f38,%f4,%f2,%f2
4503 .word 0x88ca4440 !aes_dround01 %f40,%f0,%f2,%f4
4504 .word 0x84cac460 !aes_dround23 %f42,%f0,%f2,%f2
4505 .word 0x80cb4444 !aes_dround01 %f44,%f4,%f2,%f0
4506 .word 0x84cbc464 !aes_dround23 %f46,%f4,%f2,%f2
4507 .word 0x88cc4440 !aes_dround01 %f48,%f0,%f2,%f4
4508 .word 0x84ccc460 !aes_dround23 %f50,%f0,%f2,%f2
4509 .word 0x80cd4444 !aes_dround01 %f52,%f4,%f2,%f0
4510 .word 0x84cdc464 !aes_dround23 %f54,%f4,%f2,%f2
4511 .word 0x88ce4440 !aes_dround01 %f56,%f0,%f2,%f4
4512 .word 0x84cec460 !aes_dround23 %f58,%f0,%f2,%f2
4513 .word 0x80cf4444 !aes_dround01 %f60,%f4,%f2,%f0
4514 .word 0x84cfc464 !aes_dround23 %f62,%f4,%f2,%f2
4515 .word 0x88cc0440 !aes_dround01 %f16,%f0,%f2,%f4
4516 .word 0x84cc8460 !aes_dround23 %f18,%f0,%f2,%f2
4517 ldd [%i3 + 16], %f16
4518 ldd [%i3 + 24], %f18
4519 .word 0x80cd04c4 !aes_dround01_l %f20,%f4,%f2,%f0
4520 .word 0x84cd84e4 !aes_dround23_l %f22,%f4,%f2,%f2
4521 ldd [%i3 + 32], %f20
4522 retl
4523 ldd [%i3 + 40], %f22
4524 .type _aes256_decrypt_1x,#function
4525 .size _aes256_decrypt_1x,.-_aes256_decrypt_1x
4526
4527 .align 32
4528 _aes256_decrypt_2x:
4529 .word 0x90cc0440 !aes_dround01 %f16,%f0,%f2,%f8
4530 .word 0x84cc8460 !aes_dround23 %f18,%f0,%f2,%f2
4531 .word 0x94cc0c44 !aes_dround01 %f16,%f4,%f6,%f10
4532 .word 0x8ccc8c64 !aes_dround23 %f18,%f4,%f6,%f6
4533 ldd [%i3 + 208], %f16
4534 ldd [%i3 + 216], %f18
4535 .word 0x80cd0448 !aes_dround01 %f20,%f8,%f2,%f0
4536 .word 0x84cd8468 !aes_dround23 %f22,%f8,%f2,%f2
4537 .word 0x88cd0c4a !aes_dround01 %f20,%f10,%f6,%f4
4538 .word 0x8ccd8c6a !aes_dround23 %f22,%f10,%f6,%f6
4539 ldd [%i3 + 224], %f20
4540 ldd [%i3 + 232], %f22
4541 .word 0x90ce0440 !aes_dround01 %f24,%f0,%f2,%f8
4542 .word 0x84ce8460 !aes_dround23 %f26,%f0,%f2,%f2
4543 .word 0x94ce0c44 !aes_dround01 %f24,%f4,%f6,%f10
4544 .word 0x8cce8c64 !aes_dround23 %f26,%f4,%f6,%f6
4545 .word 0x80cf0448 !aes_dround01 %f28,%f8,%f2,%f0
4546 .word 0x84cf8468 !aes_dround23 %f30,%f8,%f2,%f2
4547 .word 0x88cf0c4a !aes_dround01 %f28,%f10,%f6,%f4
4548 .word 0x8ccf8c6a !aes_dround23 %f30,%f10,%f6,%f6
4549 .word 0x90c84440 !aes_dround01 %f32,%f0,%f2,%f8
4550 .word 0x84c8c460 !aes_dround23 %f34,%f0,%f2,%f2
4551 .word 0x94c84c44 !aes_dround01 %f32,%f4,%f6,%f10
4552 .word 0x8cc8cc64 !aes_dround23 %f34,%f4,%f6,%f6
4553 .word 0x80c94448 !aes_dround01 %f36,%f8,%f2,%f0
4554 .word 0x84c9c468 !aes_dround23 %f38,%f8,%f2,%f2
4555 .word 0x88c94c4a !aes_dround01 %f36,%f10,%f6,%f4
4556 .word 0x8cc9cc6a !aes_dround23 %f38,%f10,%f6,%f6
4557 .word 0x90ca4440 !aes_dround01 %f40,%f0,%f2,%f8
4558 .word 0x84cac460 !aes_dround23 %f42,%f0,%f2,%f2
4559 .word 0x94ca4c44 !aes_dround01 %f40,%f4,%f6,%f10
4560 .word 0x8ccacc64 !aes_dround23 %f42,%f4,%f6,%f6
4561 .word 0x80cb4448 !aes_dround01 %f44,%f8,%f2,%f0
4562 .word 0x84cbc468 !aes_dround23 %f46,%f8,%f2,%f2
4563 .word 0x88cb4c4a !aes_dround01 %f44,%f10,%f6,%f4
4564 .word 0x8ccbcc6a !aes_dround23 %f46,%f10,%f6,%f6
4565 .word 0x90cc4440 !aes_dround01 %f48,%f0,%f2,%f8
4566 .word 0x84ccc460 !aes_dround23 %f50,%f0,%f2,%f2
4567 .word 0x94cc4c44 !aes_dround01 %f48,%f4,%f6,%f10
4568 .word 0x8ccccc64 !aes_dround23 %f50,%f4,%f6,%f6
4569 .word 0x80cd4448 !aes_dround01 %f52,%f8,%f2,%f0
4570 .word 0x84cdc468 !aes_dround23 %f54,%f8,%f2,%f2
4571 .word 0x88cd4c4a !aes_dround01 %f52,%f10,%f6,%f4
4572 .word 0x8ccdcc6a !aes_dround23 %f54,%f10,%f6,%f6
4573 .word 0x90ce4440 !aes_dround01 %f56,%f0,%f2,%f8
4574 .word 0x84cec460 !aes_dround23 %f58,%f0,%f2,%f2
4575 .word 0x94ce4c44 !aes_dround01 %f56,%f4,%f6,%f10
4576 .word 0x8ccecc64 !aes_dround23 %f58,%f4,%f6,%f6
4577 .word 0x80cf4448 !aes_dround01 %f60,%f8,%f2,%f0
4578 .word 0x84cfc468 !aes_dround23 %f62,%f8,%f2,%f2
4579 .word 0x88cf4c4a !aes_dround01 %f60,%f10,%f6,%f4
4580 .word 0x8ccfcc6a !aes_dround23 %f62,%f10,%f6,%f6
4581 .word 0x90cc0440 !aes_dround01 %f16,%f0,%f2,%f8
4582 .word 0x84cc8460 !aes_dround23 %f18,%f0,%f2,%f2
4583 .word 0x94cc0c44 !aes_dround01 %f16,%f4,%f6,%f10
4584 .word 0x8ccc8c64 !aes_dround23 %f18,%f4,%f6,%f6
4585 ldd [%i3 + 16], %f16
4586 ldd [%i3 + 24], %f18
4587 .word 0x80cd04c8 !aes_dround01_l %f20,%f8,%f2,%f0
4588 .word 0x84cd84e8 !aes_dround23_l %f22,%f8,%f2,%f2
4589 .word 0x88cd0cca !aes_dround01_l %f20,%f10,%f6,%f4
4590 .word 0x8ccd8cea !aes_dround23_l %f22,%f10,%f6,%f6
4591 ldd [%i3 + 32], %f20
4592 retl
4593 ldd [%i3 + 40], %f22
4594 .type _aes256_decrypt_2x,#function
4595 .size _aes256_decrypt_2x,.-_aes256_decrypt_2x
4596
4597 .align 32
4598 _aes192_decrypt_1x:
4599 .word 0x88cc0440 !aes_dround01 %f16,%f0,%f2,%f4
4600 .word 0x84cc8460 !aes_dround23 %f18,%f0,%f2,%f2
4601 .word 0x80cd0444 !aes_dround01 %f20,%f4,%f2,%f0
4602 .word 0x84cd8464 !aes_dround23 %f22,%f4,%f2,%f2
4603 .word 0x88ce0440 !aes_dround01 %f24,%f0,%f2,%f4
4604 .word 0x84ce8460 !aes_dround23 %f26,%f0,%f2,%f2
4605 .word 0x80cf0444 !aes_dround01 %f28,%f4,%f2,%f0
4606 .word 0x84cf8464 !aes_dround23 %f30,%f4,%f2,%f2
4607 .word 0x88c84440 !aes_dround01 %f32,%f0,%f2,%f4
4608 .word 0x84c8c460 !aes_dround23 %f34,%f0,%f2,%f2
4609 .word 0x80c94444 !aes_dround01 %f36,%f4,%f2,%f0
4610 .word 0x84c9c464 !aes_dround23 %f38,%f4,%f2,%f2
4611 .word 0x88ca4440 !aes_dround01 %f40,%f0,%f2,%f4
4612 .word 0x84cac460 !aes_dround23 %f42,%f0,%f2,%f2
4613 .word 0x80cb4444 !aes_dround01 %f44,%f4,%f2,%f0
4614 .word 0x84cbc464 !aes_dround23 %f46,%f4,%f2,%f2
4615 .word 0x88cc4440 !aes_dround01 %f48,%f0,%f2,%f4
4616 .word 0x84ccc460 !aes_dround23 %f50,%f0,%f2,%f2
4617 .word 0x80cd4444 !aes_dround01 %f52,%f4,%f2,%f0
4618 .word 0x84cdc464 !aes_dround23 %f54,%f4,%f2,%f2
4619 .word 0x88ce4440 !aes_dround01 %f56,%f0,%f2,%f4
4620 .word 0x84cec460 !aes_dround23 %f58,%f0,%f2,%f2
4621 .word 0x80cf44c4 !aes_dround01_l %f60,%f4,%f2,%f0
4622 retl
4623 .word 0x84cfc4e4 !aes_dround23_l %f62,%f4,%f2,%f2
4624 .type _aes192_decrypt_1x,#function
4625 .size _aes192_decrypt_1x,.-_aes192_decrypt_1x
4626
4627 .align 32
4628 _aes192_decrypt_2x:
4629 .word 0x90cc0440 !aes_dround01 %f16,%f0,%f2,%f8
4630 .word 0x84cc8460 !aes_dround23 %f18,%f0,%f2,%f2
4631 .word 0x94cc0c44 !aes_dround01 %f16,%f4,%f6,%f10
4632 .word 0x8ccc8c64 !aes_dround23 %f18,%f4,%f6,%f6
4633 .word 0x80cd0448 !aes_dround01 %f20,%f8,%f2,%f0
4634 .word 0x84cd8468 !aes_dround23 %f22,%f8,%f2,%f2
4635 .word 0x88cd0c4a !aes_dround01 %f20,%f10,%f6,%f4
4636 .word 0x8ccd8c6a !aes_dround23 %f22,%f10,%f6,%f6
4637 .word 0x90ce0440 !aes_dround01 %f24,%f0,%f2,%f8
4638 .word 0x84ce8460 !aes_dround23 %f26,%f0,%f2,%f2
4639 .word 0x94ce0c44 !aes_dround01 %f24,%f4,%f6,%f10
4640 .word 0x8cce8c64 !aes_dround23 %f26,%f4,%f6,%f6
4641 .word 0x80cf0448 !aes_dround01 %f28,%f8,%f2,%f0
4642 .word 0x84cf8468 !aes_dround23 %f30,%f8,%f2,%f2
4643 .word 0x88cf0c4a !aes_dround01 %f28,%f10,%f6,%f4
4644 .word 0x8ccf8c6a !aes_dround23 %f30,%f10,%f6,%f6
4645 .word 0x90c84440 !aes_dround01 %f32,%f0,%f2,%f8
4646 .word 0x84c8c460 !aes_dround23 %f34,%f0,%f2,%f2
4647 .word 0x94c84c44 !aes_dround01 %f32,%f4,%f6,%f10
4648 .word 0x8cc8cc64 !aes_dround23 %f34,%f4,%f6,%f6
4649 .word 0x80c94448 !aes_dround01 %f36,%f8,%f2,%f0
4650 .word 0x84c9c468 !aes_dround23 %f38,%f8,%f2,%f2
4651 .word 0x88c94c4a !aes_dround01 %f36,%f10,%f6,%f4
4652 .word 0x8cc9cc6a !aes_dround23 %f38,%f10,%f6,%f6
4653 .word 0x90ca4440 !aes_dround01 %f40,%f0,%f2,%f8
4654 .word 0x84cac460 !aes_dround23 %f42,%f0,%f2,%f2
4655 .word 0x94ca4c44 !aes_dround01 %f40,%f4,%f6,%f10
4656 .word 0x8ccacc64 !aes_dround23 %f42,%f4,%f6,%f6
4657 .word 0x80cb4448 !aes_dround01 %f44,%f8,%f2,%f0
4658 .word 0x84cbc468 !aes_dround23 %f46,%f8,%f2,%f2
4659 .word 0x88cb4c4a !aes_dround01 %f44,%f10,%f6,%f4
4660 .word 0x8ccbcc6a !aes_dround23 %f46,%f10,%f6,%f6
4661 .word 0x90cc4440 !aes_dround01 %f48,%f0,%f2,%f8
4662 .word 0x84ccc460 !aes_dround23 %f50,%f0,%f2,%f2
4663 .word 0x94cc4c44 !aes_dround01 %f48,%f4,%f6,%f10
4664 .word 0x8ccccc64 !aes_dround23 %f50,%f4,%f6,%f6
4665 .word 0x80cd4448 !aes_dround01 %f52,%f8,%f2,%f0
4666 .word 0x84cdc468 !aes_dround23 %f54,%f8,%f2,%f2
4667 .word 0x88cd4c4a !aes_dround01 %f52,%f10,%f6,%f4
4668 .word 0x8ccdcc6a !aes_dround23 %f54,%f10,%f6,%f6
4669 .word 0x90ce4440 !aes_dround01 %f56,%f0,%f2,%f8
4670 .word 0x84cec460 !aes_dround23 %f58,%f0,%f2,%f2
4671 .word 0x94ce4c44 !aes_dround01 %f56,%f4,%f6,%f10
4672 .word 0x8ccecc64 !aes_dround23 %f58,%f4,%f6,%f6
4673 .word 0x80cf44c8 !aes_dround01_l %f60,%f8,%f2,%f0
4674 .word 0x84cfc4e8 !aes_dround23_l %f62,%f8,%f2,%f2
4675 .word 0x88cf4cca !aes_dround01_l %f60,%f10,%f6,%f4
4676 retl
4677 .word 0x8ccfccea !aes_dround23_l %f62,%f10,%f6,%f6
4678 .type _aes192_decrypt_2x,#function
4679 .size _aes192_decrypt_2x,.-_aes192_decrypt_2x
4680 .asciz "AES for SPARC T4, David S. Miller, Andy Polyakov"
4681 .align 4
4682