aest4-sparcv9.S revision 1.1 1 .text
2
3 .globl aes_t4_encrypt
4 .align 32
5 aes_t4_encrypt:
6 andcc %o0, 7, %g1 ! is input aligned?
7 andn %o0, 7, %o0
8
9 ldx [%o2 + 0], %g4
10 ldx [%o2 + 8], %g5
11
12 ldx [%o0 + 0], %o4
13 bz,pt %icc, 1f
14 ldx [%o0 + 8], %o5
15 ldx [%o0 + 16], %o0
16 sll %g1, 3, %g1
17 sub %g0, %g1, %o3
18 sllx %o4, %g1, %o4
19 sllx %o5, %g1, %g1
20 srlx %o5, %o3, %o5
21 srlx %o0, %o3, %o3
22 or %o5, %o4, %o4
23 or %o3, %g1, %o5
24 1:
25 ld [%o2 + 240], %o3
26 ldd [%o2 + 16], %f12
27 ldd [%o2 + 24], %f14
28 xor %g4, %o4, %o4
29 xor %g5, %o5, %o5
30 .word 0x81b0230c !movxtod %o4,%f0
31 .word 0x85b0230d !movxtod %o5,%f2
32 srl %o3, 1, %o3
33 ldd [%o2 + 32], %f16
34 sub %o3, 1, %o3
35 ldd [%o2 + 40], %f18
36 add %o2, 48, %o2
37
38 .Lenc:
39 .word 0x88cb0400 !aes_eround01 %f12,%f0,%f2,%f4
40 .word 0x84cb8420 !aes_eround23 %f14,%f0,%f2,%f2
41 ldd [%o2 + 0], %f12
42 ldd [%o2 + 8], %f14
43 sub %o3,1,%o3
44 .word 0x80cc0404 !aes_eround01 %f16,%f4,%f2,%f0
45 .word 0x84cc8424 !aes_eround23 %f18,%f4,%f2,%f2
46 ldd [%o2 + 16], %f16
47 ldd [%o2 + 24], %f18
48 brnz,pt %o3, .Lenc
49 add %o2, 32, %o2
50
51 andcc %o1, 7, %o4 ! is output aligned?
52 .word 0x88cb0400 !aes_eround01 %f12,%f0,%f2,%f4
53 .word 0x84cb8420 !aes_eround23 %f14,%f0,%f2,%f2
54 .word 0x80cc0484 !aes_eround01_l %f16,%f4,%f2,%f0
55 .word 0x84cc84a4 !aes_eround23_l %f18,%f4,%f2,%f2
56
57 bnz,pn %icc, 2f
58 nop
59
60 std %f0, [%o1 + 0]
61 retl
62 std %f2, [%o1 + 8]
63
64 2: .word 0x93b24340 !alignaddrl %o1,%g0,%o1
65 mov 0xff, %o5
66 srl %o5, %o4, %o5
67
68 .word 0x89b00900 !faligndata %f0,%f0,%f4
69 .word 0x8db00902 !faligndata %f0,%f2,%f6
70 .word 0x91b08902 !faligndata %f2,%f2,%f8
71
72 stda %f4, [%o1 + %o5]0xc0 ! partial store
73 std %f6, [%o1 + 8]
74 add %o1, 16, %o1
75 orn %g0, %o5, %o5
76 retl
77 stda %f8, [%o1 + %o5]0xc0 ! partial store
78 .type aes_t4_encrypt,#function
79 .size aes_t4_encrypt,.-aes_t4_encrypt
80
81 .globl aes_t4_decrypt
82 .align 32
83 aes_t4_decrypt:
84 andcc %o0, 7, %g1 ! is input aligned?
85 andn %o0, 7, %o0
86
87 ldx [%o2 + 0], %g4
88 ldx [%o2 + 8], %g5
89
90 ldx [%o0 + 0], %o4
91 bz,pt %icc, 1f
92 ldx [%o0 + 8], %o5
93 ldx [%o0 + 16], %o0
94 sll %g1, 3, %g1
95 sub %g0, %g1, %o3
96 sllx %o4, %g1, %o4
97 sllx %o5, %g1, %g1
98 srlx %o5, %o3, %o5
99 srlx %o0, %o3, %o3
100 or %o5, %o4, %o4
101 or %o3, %g1, %o5
102 1:
103 ld [%o2 + 240], %o3
104 ldd [%o2 + 16], %f12
105 ldd [%o2 + 24], %f14
106 xor %g4, %o4, %o4
107 xor %g5, %o5, %o5
108 .word 0x81b0230c !movxtod %o4,%f0
109 .word 0x85b0230d !movxtod %o5,%f2
110 srl %o3, 1, %o3
111 ldd [%o2 + 32], %f16
112 sub %o3, 1, %o3
113 ldd [%o2 + 40], %f18
114 add %o2, 48, %o2
115
116 .Ldec:
117 .word 0x88cb0440 !aes_dround01 %f12,%f0,%f2,%f4
118 .word 0x84cb8460 !aes_dround23 %f14,%f0,%f2,%f2
119 ldd [%o2 + 0], %f12
120 ldd [%o2 + 8], %f14
121 sub %o3,1,%o3
122 .word 0x80cc0444 !aes_dround01 %f16,%f4,%f2,%f0
123 .word 0x84cc8464 !aes_dround23 %f18,%f4,%f2,%f2
124 ldd [%o2 + 16], %f16
125 ldd [%o2 + 24], %f18
126 brnz,pt %o3, .Ldec
127 add %o2, 32, %o2
128
129 andcc %o1, 7, %o4 ! is output aligned?
130 .word 0x88cb0440 !aes_dround01 %f12,%f0,%f2,%f4
131 .word 0x84cb8460 !aes_dround23 %f14,%f0,%f2,%f2
132 .word 0x80cc04c4 !aes_dround01_l %f16,%f4,%f2,%f0
133 .word 0x84cc84e4 !aes_dround23_l %f18,%f4,%f2,%f2
134
135 bnz,pn %icc, 2f
136 nop
137
138 std %f0, [%o1 + 0]
139 retl
140 std %f2, [%o1 + 8]
141
142 2: .word 0x93b24340 !alignaddrl %o1,%g0,%o1
143 mov 0xff, %o5
144 srl %o5, %o4, %o5
145
146 .word 0x89b00900 !faligndata %f0,%f0,%f4
147 .word 0x8db00902 !faligndata %f0,%f2,%f6
148 .word 0x91b08902 !faligndata %f2,%f2,%f8
149
150 stda %f4, [%o1 + %o5]0xc0 ! partial store
151 std %f6, [%o1 + 8]
152 add %o1, 16, %o1
153 orn %g0, %o5, %o5
154 retl
155 stda %f8, [%o1 + %o5]0xc0 ! partial store
156 .type aes_t4_decrypt,#function
157 .size aes_t4_decrypt,.-aes_t4_decrypt
158 .globl aes_t4_set_encrypt_key
159 .align 32
160 aes_t4_set_encrypt_key:
161 .Lset_encrypt_key:
162 and %o0, 7, %o3
163 .word 0x91b20300 !alignaddr %o0,%g0,%o0
164 cmp %o1, 192
165 ldd [%o0 + 0], %f0
166 bl,pt %icc,.L128
167 ldd [%o0 + 8], %f2
168
169 be,pt %icc,.L192
170 ldd [%o0 + 16], %f4
171 brz,pt %o3, .L256aligned
172 ldd [%o0 + 24], %f6
173
174 ldd [%o0 + 32], %f8
175 .word 0x81b00902 !faligndata %f0,%f2,%f0
176 .word 0x85b08904 !faligndata %f2,%f4,%f2
177 .word 0x89b10906 !faligndata %f4,%f6,%f4
178 .word 0x8db18908 !faligndata %f6,%f8,%f6
179 .L256aligned:
180 std %f0, [%o2 + 0]
181 .word 0x80c80106 !aes_kexpand1 %f0,%f6,0,%f0
182 std %f2, [%o2 + 8]
183 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
184 std %f4, [%o2 + 16]
185 .word 0x89b12602 !aes_kexpand0 %f4,%f2,%f4
186 std %f6, [%o2 + 24]
187 .word 0x8db1a624 !aes_kexpand2 %f6,%f4,%f6
188 std %f0, [%o2 + 32]
189 .word 0x80c80306 !aes_kexpand1 %f0,%f6,1,%f0
190 std %f2, [%o2 + 40]
191 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
192 std %f4, [%o2 + 48]
193 .word 0x89b12602 !aes_kexpand0 %f4,%f2,%f4
194 std %f6, [%o2 + 56]
195 .word 0x8db1a624 !aes_kexpand2 %f6,%f4,%f6
196 std %f0, [%o2 + 64]
197 .word 0x80c80506 !aes_kexpand1 %f0,%f6,2,%f0
198 std %f2, [%o2 + 72]
199 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
200 std %f4, [%o2 + 80]
201 .word 0x89b12602 !aes_kexpand0 %f4,%f2,%f4
202 std %f6, [%o2 + 88]
203 .word 0x8db1a624 !aes_kexpand2 %f6,%f4,%f6
204 std %f0, [%o2 + 96]
205 .word 0x80c80706 !aes_kexpand1 %f0,%f6,3,%f0
206 std %f2, [%o2 + 104]
207 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
208 std %f4, [%o2 + 112]
209 .word 0x89b12602 !aes_kexpand0 %f4,%f2,%f4
210 std %f6, [%o2 + 120]
211 .word 0x8db1a624 !aes_kexpand2 %f6,%f4,%f6
212 std %f0, [%o2 + 128]
213 .word 0x80c80906 !aes_kexpand1 %f0,%f6,4,%f0
214 std %f2, [%o2 + 136]
215 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
216 std %f4, [%o2 + 144]
217 .word 0x89b12602 !aes_kexpand0 %f4,%f2,%f4
218 std %f6, [%o2 + 152]
219 .word 0x8db1a624 !aes_kexpand2 %f6,%f4,%f6
220 std %f0, [%o2 + 160]
221 .word 0x80c80b06 !aes_kexpand1 %f0,%f6,5,%f0
222 std %f2, [%o2 + 168]
223 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
224 std %f4, [%o2 + 176]
225 .word 0x89b12602 !aes_kexpand0 %f4,%f2,%f4
226 std %f6, [%o2 + 184]
227 .word 0x8db1a624 !aes_kexpand2 %f6,%f4,%f6
228 std %f0, [%o2 + 192]
229 .word 0x80c80d06 !aes_kexpand1 %f0,%f6,6,%f0
230 std %f2, [%o2 + 200]
231 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
232 std %f4, [%o2 + 208]
233 std %f6, [%o2 + 216]
234 std %f0, [%o2 + 224]
235 std %f2, [%o2 + 232]
236
237 mov 14, %o3
238 st %o3, [%o2 + 240]
239 retl
240 xor %o0, %o0, %o0
241
242 .align 16
243 .L192:
244 brz,pt %o3, .L192aligned
245 nop
246
247 ldd [%o0 + 24], %f6
248 .word 0x81b00902 !faligndata %f0,%f2,%f0
249 .word 0x85b08904 !faligndata %f2,%f4,%f2
250 .word 0x89b10906 !faligndata %f4,%f6,%f4
251 .L192aligned:
252 std %f0, [%o2 + 0]
253 .word 0x80c80104 !aes_kexpand1 %f0,%f4,0,%f0
254 std %f2, [%o2 + 8]
255 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
256 std %f4, [%o2 + 16]
257 .word 0x89b12622 !aes_kexpand2 %f4,%f2,%f4
258 std %f0, [%o2 + 24]
259 .word 0x80c80304 !aes_kexpand1 %f0,%f4,1,%f0
260 std %f2, [%o2 + 32]
261 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
262 std %f4, [%o2 + 40]
263 .word 0x89b12622 !aes_kexpand2 %f4,%f2,%f4
264 std %f0, [%o2 + 48]
265 .word 0x80c80504 !aes_kexpand1 %f0,%f4,2,%f0
266 std %f2, [%o2 + 56]
267 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
268 std %f4, [%o2 + 64]
269 .word 0x89b12622 !aes_kexpand2 %f4,%f2,%f4
270 std %f0, [%o2 + 72]
271 .word 0x80c80704 !aes_kexpand1 %f0,%f4,3,%f0
272 std %f2, [%o2 + 80]
273 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
274 std %f4, [%o2 + 88]
275 .word 0x89b12622 !aes_kexpand2 %f4,%f2,%f4
276 std %f0, [%o2 + 96]
277 .word 0x80c80904 !aes_kexpand1 %f0,%f4,4,%f0
278 std %f2, [%o2 + 104]
279 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
280 std %f4, [%o2 + 112]
281 .word 0x89b12622 !aes_kexpand2 %f4,%f2,%f4
282 std %f0, [%o2 + 120]
283 .word 0x80c80b04 !aes_kexpand1 %f0,%f4,5,%f0
284 std %f2, [%o2 + 128]
285 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
286 std %f4, [%o2 + 136]
287 .word 0x89b12622 !aes_kexpand2 %f4,%f2,%f4
288 std %f0, [%o2 + 144]
289 .word 0x80c80d04 !aes_kexpand1 %f0,%f4,6,%f0
290 std %f2, [%o2 + 152]
291 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
292 std %f4, [%o2 + 160]
293 .word 0x89b12622 !aes_kexpand2 %f4,%f2,%f4
294 std %f0, [%o2 + 168]
295 .word 0x80c80f04 !aes_kexpand1 %f0,%f4,7,%f0
296 std %f2, [%o2 + 176]
297 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
298 std %f4, [%o2 + 184]
299 std %f0, [%o2 + 192]
300 std %f2, [%o2 + 200]
301
302 mov 12, %o3
303 st %o3, [%o2 + 240]
304 retl
305 xor %o0, %o0, %o0
306
307 .align 16
308 .L128:
309 brz,pt %o3, .L128aligned
310 nop
311
312 ldd [%o0 + 16], %f4
313 .word 0x81b00902 !faligndata %f0,%f2,%f0
314 .word 0x85b08904 !faligndata %f2,%f4,%f2
315 .L128aligned:
316 std %f0, [%o2 + 0]
317 .word 0x80c80102 !aes_kexpand1 %f0,%f2,0,%f0
318 std %f2, [%o2 + 8]
319 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
320 std %f0, [%o2 + 16]
321 .word 0x80c80302 !aes_kexpand1 %f0,%f2,1,%f0
322 std %f2, [%o2 + 24]
323 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
324 std %f0, [%o2 + 32]
325 .word 0x80c80502 !aes_kexpand1 %f0,%f2,2,%f0
326 std %f2, [%o2 + 40]
327 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
328 std %f0, [%o2 + 48]
329 .word 0x80c80702 !aes_kexpand1 %f0,%f2,3,%f0
330 std %f2, [%o2 + 56]
331 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
332 std %f0, [%o2 + 64]
333 .word 0x80c80902 !aes_kexpand1 %f0,%f2,4,%f0
334 std %f2, [%o2 + 72]
335 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
336 std %f0, [%o2 + 80]
337 .word 0x80c80b02 !aes_kexpand1 %f0,%f2,5,%f0
338 std %f2, [%o2 + 88]
339 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
340 std %f0, [%o2 + 96]
341 .word 0x80c80d02 !aes_kexpand1 %f0,%f2,6,%f0
342 std %f2, [%o2 + 104]
343 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
344 std %f0, [%o2 + 112]
345 .word 0x80c80f02 !aes_kexpand1 %f0,%f2,7,%f0
346 std %f2, [%o2 + 120]
347 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
348 std %f0, [%o2 + 128]
349 .word 0x80c81102 !aes_kexpand1 %f0,%f2,8,%f0
350 std %f2, [%o2 + 136]
351 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
352 std %f0, [%o2 + 144]
353 .word 0x80c81302 !aes_kexpand1 %f0,%f2,9,%f0
354 std %f2, [%o2 + 152]
355 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
356 std %f0, [%o2 + 160]
357 std %f2, [%o2 + 168]
358
359 mov 10, %o3
360 st %o3, [%o2 + 240]
361 retl
362 xor %o0, %o0, %o0
363 .type aes_t4_set_encrypt_key,#function
364 .size aes_t4_set_encrypt_key,.-aes_t4_set_encrypt_key
365
366 .globl aes_t4_set_decrypt_key
367 .align 32
368 aes_t4_set_decrypt_key:
369 mov %o7, %o5
370 call .Lset_encrypt_key
371 nop
372
373 mov %o5, %o7
374 sll %o3, 4, %o0 ! %o3 is number of rounds
375 add %o3, 2, %o3
376 add %o2, %o0, %o0 ! %o0=%o2+16*rounds
377 srl %o3, 2, %o3 ! %o3=(rounds+2)/4
378
379 .Lkey_flip:
380 ldd [%o2 + 0], %f0
381 ldd [%o2 + 8], %f2
382 ldd [%o2 + 16], %f4
383 ldd [%o2 + 24], %f6
384 ldd [%o0 + 0], %f8
385 ldd [%o0 + 8], %f10
386 ldd [%o0 - 16], %f12
387 ldd [%o0 - 8], %f14
388 sub %o3, 1, %o3
389 std %f0, [%o0 + 0]
390 std %f2, [%o0 + 8]
391 std %f4, [%o0 - 16]
392 std %f6, [%o0 - 8]
393 std %f8, [%o2 + 0]
394 std %f10, [%o2 + 8]
395 std %f12, [%o2 + 16]
396 std %f14, [%o2 + 24]
397 add %o2, 32, %o2
398 brnz %o3, .Lkey_flip
399 sub %o0, 32, %o0
400
401 retl
402 xor %o0, %o0, %o0
403 .type aes_t4_set_decrypt_key,#function
404 .size aes_t4_set_decrypt_key,.-aes_t4_set_decrypt_key
405 .align 32
406 _aes128_encrypt_1x:
407 .word 0x88cc0400 !aes_eround01 %f16,%f0,%f2,%f4
408 .word 0x84cc8420 !aes_eround23 %f18,%f0,%f2,%f2
409 .word 0x80cd0404 !aes_eround01 %f20,%f4,%f2,%f0
410 .word 0x84cd8424 !aes_eround23 %f22,%f4,%f2,%f2
411 .word 0x88ce0400 !aes_eround01 %f24,%f0,%f2,%f4
412 .word 0x84ce8420 !aes_eround23 %f26,%f0,%f2,%f2
413 .word 0x80cf0404 !aes_eround01 %f28,%f4,%f2,%f0
414 .word 0x84cf8424 !aes_eround23 %f30,%f4,%f2,%f2
415 .word 0x88c84400 !aes_eround01 %f32,%f0,%f2,%f4
416 .word 0x84c8c420 !aes_eround23 %f34,%f0,%f2,%f2
417 .word 0x80c94404 !aes_eround01 %f36,%f4,%f2,%f0
418 .word 0x84c9c424 !aes_eround23 %f38,%f4,%f2,%f2
419 .word 0x88ca4400 !aes_eround01 %f40,%f0,%f2,%f4
420 .word 0x84cac420 !aes_eround23 %f42,%f0,%f2,%f2
421 .word 0x80cb4404 !aes_eround01 %f44,%f4,%f2,%f0
422 .word 0x84cbc424 !aes_eround23 %f46,%f4,%f2,%f2
423 .word 0x88cc4400 !aes_eround01 %f48,%f0,%f2,%f4
424 .word 0x84ccc420 !aes_eround23 %f50,%f0,%f2,%f2
425 .word 0x80cd4484 !aes_eround01_l %f52,%f4,%f2,%f0
426 retl
427 .word 0x84cdc4a4 !aes_eround23_l %f54,%f4,%f2,%f2
428 .type _aes128_encrypt_1x,#function
429 .size _aes128_encrypt_1x,.-_aes128_encrypt_1x
430
431 .align 32
432 _aes128_encrypt_2x:
433 .word 0x90cc0400 !aes_eround01 %f16,%f0,%f2,%f8
434 .word 0x84cc8420 !aes_eround23 %f18,%f0,%f2,%f2
435 .word 0x94cc0c04 !aes_eround01 %f16,%f4,%f6,%f10
436 .word 0x8ccc8c24 !aes_eround23 %f18,%f4,%f6,%f6
437 .word 0x80cd0408 !aes_eround01 %f20,%f8,%f2,%f0
438 .word 0x84cd8428 !aes_eround23 %f22,%f8,%f2,%f2
439 .word 0x88cd0c0a !aes_eround01 %f20,%f10,%f6,%f4
440 .word 0x8ccd8c2a !aes_eround23 %f22,%f10,%f6,%f6
441 .word 0x90ce0400 !aes_eround01 %f24,%f0,%f2,%f8
442 .word 0x84ce8420 !aes_eround23 %f26,%f0,%f2,%f2
443 .word 0x94ce0c04 !aes_eround01 %f24,%f4,%f6,%f10
444 .word 0x8cce8c24 !aes_eround23 %f26,%f4,%f6,%f6
445 .word 0x80cf0408 !aes_eround01 %f28,%f8,%f2,%f0
446 .word 0x84cf8428 !aes_eround23 %f30,%f8,%f2,%f2
447 .word 0x88cf0c0a !aes_eround01 %f28,%f10,%f6,%f4
448 .word 0x8ccf8c2a !aes_eround23 %f30,%f10,%f6,%f6
449 .word 0x90c84400 !aes_eround01 %f32,%f0,%f2,%f8
450 .word 0x84c8c420 !aes_eround23 %f34,%f0,%f2,%f2
451 .word 0x94c84c04 !aes_eround01 %f32,%f4,%f6,%f10
452 .word 0x8cc8cc24 !aes_eround23 %f34,%f4,%f6,%f6
453 .word 0x80c94408 !aes_eround01 %f36,%f8,%f2,%f0
454 .word 0x84c9c428 !aes_eround23 %f38,%f8,%f2,%f2
455 .word 0x88c94c0a !aes_eround01 %f36,%f10,%f6,%f4
456 .word 0x8cc9cc2a !aes_eround23 %f38,%f10,%f6,%f6
457 .word 0x90ca4400 !aes_eround01 %f40,%f0,%f2,%f8
458 .word 0x84cac420 !aes_eround23 %f42,%f0,%f2,%f2
459 .word 0x94ca4c04 !aes_eround01 %f40,%f4,%f6,%f10
460 .word 0x8ccacc24 !aes_eround23 %f42,%f4,%f6,%f6
461 .word 0x80cb4408 !aes_eround01 %f44,%f8,%f2,%f0
462 .word 0x84cbc428 !aes_eround23 %f46,%f8,%f2,%f2
463 .word 0x88cb4c0a !aes_eround01 %f44,%f10,%f6,%f4
464 .word 0x8ccbcc2a !aes_eround23 %f46,%f10,%f6,%f6
465 .word 0x90cc4400 !aes_eround01 %f48,%f0,%f2,%f8
466 .word 0x84ccc420 !aes_eround23 %f50,%f0,%f2,%f2
467 .word 0x94cc4c04 !aes_eround01 %f48,%f4,%f6,%f10
468 .word 0x8ccccc24 !aes_eround23 %f50,%f4,%f6,%f6
469 .word 0x80cd4488 !aes_eround01_l %f52,%f8,%f2,%f0
470 .word 0x84cdc4a8 !aes_eround23_l %f54,%f8,%f2,%f2
471 .word 0x88cd4c8a !aes_eround01_l %f52,%f10,%f6,%f4
472 retl
473 .word 0x8ccdccaa !aes_eround23_l %f54,%f10,%f6,%f6
474 .type _aes128_encrypt_2x,#function
475 .size _aes128_encrypt_2x,.-_aes128_encrypt_2x
476
477 .align 32
478 _aes128_loadkey:
479 ldx [%i3 + 0], %g4
480 ldx [%i3 + 8], %g5
481 ldd [%i3 + 16], %f16
482 ldd [%i3 + 24], %f18
483 ldd [%i3 + 32], %f20
484 ldd [%i3 + 40], %f22
485 ldd [%i3 + 48], %f24
486 ldd [%i3 + 56], %f26
487 ldd [%i3 + 64], %f28
488 ldd [%i3 + 72], %f30
489 ldd [%i3 + 80], %f32
490 ldd [%i3 + 88], %f34
491 ldd [%i3 + 96], %f36
492 ldd [%i3 + 104], %f38
493 ldd [%i3 + 112], %f40
494 ldd [%i3 + 120], %f42
495 ldd [%i3 + 128], %f44
496 ldd [%i3 + 136], %f46
497 ldd [%i3 + 144], %f48
498 ldd [%i3 + 152], %f50
499 ldd [%i3 + 160], %f52
500 ldd [%i3 + 168], %f54
501 retl
502 nop
503 .type _aes128_loadkey,#function
504 .size _aes128_loadkey,.-_aes128_loadkey
505 _aes128_load_enckey=_aes128_loadkey
506 _aes128_load_deckey=_aes128_loadkey
507
508 .globl aes128_t4_cbc_encrypt
509 .align 32
510 aes128_t4_cbc_encrypt:
511 save %sp, -112, %sp
512 cmp %i2, 0
513 be,pn %icc, .L128_cbc_enc_abort
514 srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
515 sub %i0, %i1, %l5 ! %i0!=%i1
516 ld [%i4 + 0], %f0
517 ld [%i4 + 4], %f1
518 ld [%i4 + 8], %f2
519 ld [%i4 + 12], %f3
520 prefetch [%i0], 20
521 prefetch [%i0 + 63], 20
522 call _aes128_load_enckey
523 and %i0, 7, %l0
524 andn %i0, 7, %i0
525 sll %l0, 3, %l0
526 mov 64, %l1
527 mov 0xff, %l3
528 sub %l1, %l0, %l1
529 and %i1, 7, %l2
530 cmp %i2, 127
531 movrnz %l2, 0, %l5 ! if ( %i1&7 ||
532 movleu %icc, 0, %l5 ! %i2<128 ||
533 brnz,pn %l5, .L128cbc_enc_blk ! %i0==%i1)
534 srl %l3, %l2, %l3
535
536 .word 0xb3b64340 !alignaddrl %i1,%g0,%i1
537 srlx %i2, 4, %i2
538 prefetch [%i1], 22
539
540 .L128_cbc_enc_loop:
541 ldx [%i0 + 0], %o0
542 brz,pt %l0, 4f
543 ldx [%i0 + 8], %o1
544
545 ldx [%i0 + 16], %o2
546 sllx %o0, %l0, %o0
547 srlx %o1, %l1, %g1
548 sllx %o1, %l0, %o1
549 or %g1, %o0, %o0
550 srlx %o2, %l1, %o2
551 or %o2, %o1, %o1
552 4:
553 xor %g4, %o0, %o0 ! ^= rk[0]
554 xor %g5, %o1, %o1
555 .word 0x99b02308 !movxtod %o0,%f12
556 .word 0x9db02309 !movxtod %o1,%f14
557
558 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= ivec
559 .word 0x85b38d82 !fxor %f14,%f2,%f2
560 prefetch [%i1 + 63], 22
561 prefetch [%i0 + 16+63], 20
562 call _aes128_encrypt_1x
563 add %i0, 16, %i0
564
565 brnz,pn %l2, 2f
566 sub %i2, 1, %i2
567
568 std %f0, [%i1 + 0]
569 std %f2, [%i1 + 8]
570 brnz,pt %i2, .L128_cbc_enc_loop
571 add %i1, 16, %i1
572 st %f0, [%i4 + 0]
573 st %f1, [%i4 + 4]
574 st %f2, [%i4 + 8]
575 st %f3, [%i4 + 12]
576 .L128_cbc_enc_abort:
577 ret
578 restore
579
580 .align 16
581 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
582 ! and ~3x deterioration
583 ! in inp==out case
584 .word 0x89b00900 !faligndata %f0,%f0,%f4 ! handle unaligned output
585 .word 0x8db00902 !faligndata %f0,%f2,%f6
586 .word 0x91b08902 !faligndata %f2,%f2,%f8
587
588 stda %f4, [%i1 + %l3]0xc0 ! partial store
589 std %f6, [%i1 + 8]
590 add %i1, 16, %i1
591 orn %g0, %l3, %l3
592 stda %f8, [%i1 + %l3]0xc0 ! partial store
593
594 brnz,pt %i2, .L128_cbc_enc_loop+4
595 orn %g0, %l3, %l3
596 st %f0, [%i4 + 0]
597 st %f1, [%i4 + 4]
598 st %f2, [%i4 + 8]
599 st %f3, [%i4 + 12]
600 ret
601 restore
602
603 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
604 .align 32
605 .L128cbc_enc_blk:
606 add %i1, %i2, %l5
607 and %l5, 63, %l5 ! tail
608 sub %i2, %l5, %i2
609 add %l5, 15, %l5 ! round up to 16n
610 srlx %i2, 4, %i2
611 srl %l5, 4, %l5
612
613 .L128_cbc_enc_blk_loop:
614 ldx [%i0 + 0], %o0
615 brz,pt %l0, 5f
616 ldx [%i0 + 8], %o1
617
618 ldx [%i0 + 16], %o2
619 sllx %o0, %l0, %o0
620 srlx %o1, %l1, %g1
621 sllx %o1, %l0, %o1
622 or %g1, %o0, %o0
623 srlx %o2, %l1, %o2
624 or %o2, %o1, %o1
625 5:
626 xor %g4, %o0, %o0 ! ^= rk[0]
627 xor %g5, %o1, %o1
628 .word 0x99b02308 !movxtod %o0,%f12
629 .word 0x9db02309 !movxtod %o1,%f14
630
631 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= ivec
632 .word 0x85b38d82 !fxor %f14,%f2,%f2
633 prefetch [%i0 + 16+63], 20
634 call _aes128_encrypt_1x
635 add %i0, 16, %i0
636 sub %i2, 1, %i2
637
638 stda %f0, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
639 add %i1, 8, %i1
640 stda %f2, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
641 brnz,pt %i2, .L128_cbc_enc_blk_loop
642 add %i1, 8, %i1
643
644 membar #StoreLoad|#StoreStore
645 brnz,pt %l5, .L128_cbc_enc_loop
646 mov %l5, %i2
647 st %f0, [%i4 + 0]
648 st %f1, [%i4 + 4]
649 st %f2, [%i4 + 8]
650 st %f3, [%i4 + 12]
651 ret
652 restore
653 .type aes128_t4_cbc_encrypt,#function
654 .size aes128_t4_cbc_encrypt,.-aes128_t4_cbc_encrypt
655 .globl aes128_t4_ctr32_encrypt
656 .align 32
657 aes128_t4_ctr32_encrypt:
658 save %sp, -112, %sp
659 srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
660
661 prefetch [%i0], 20
662 prefetch [%i0 + 63], 20
663 call _aes128_load_enckey
664 sllx %i2, 4, %i2
665
666 ld [%i4 + 0], %l4 ! counter
667 ld [%i4 + 4], %l5
668 ld [%i4 + 8], %l6
669 ld [%i4 + 12], %l7
670
671 sllx %l4, 32, %o5
672 or %l5, %o5, %o5
673 sllx %l6, 32, %g1
674 xor %o5, %g4, %g4 ! ^= rk[0]
675 xor %g1, %g5, %g5
676 .word 0x9db02304 !movxtod %g4,%f14 ! most significant 64 bits
677
678 sub %i0, %i1, %l5 ! %i0!=%i1
679 and %i0, 7, %l0
680 andn %i0, 7, %i0
681 sll %l0, 3, %l0
682 mov 64, %l1
683 mov 0xff, %l3
684 sub %l1, %l0, %l1
685 and %i1, 7, %l2
686 cmp %i2, 255
687 movrnz %l2, 0, %l5 ! if ( %i1&7 ||
688 movleu %icc, 0, %l5 ! %i2<256 ||
689 brnz,pn %l5, .L128_ctr32_blk ! %i0==%i1)
690 srl %l3, %l2, %l3
691
692 andcc %i2, 16, %g0 ! is number of blocks even?
693 .word 0xb3b64340 !alignaddrl %i1,%g0,%i1
694 bz %icc, .L128_ctr32_loop2x
695 srlx %i2, 4, %i2
696 .L128_ctr32_loop:
697 ldx [%i0 + 0], %o0
698 brz,pt %l0, 4f
699 ldx [%i0 + 8], %o1
700
701 ldx [%i0 + 16], %o2
702 sllx %o0, %l0, %o0
703 srlx %o1, %l1, %g1
704 sllx %o1, %l0, %o1
705 or %g1, %o0, %o0
706 srlx %o2, %l1, %o2
707 or %o2, %o1, %o1
708 4:
709 xor %g5, %l7, %g1 ! ^= rk[0]
710 add %l7, 1, %l7
711 .word 0x85b02301 !movxtod %g1,%f2
712 srl %l7, 0, %l7 ! clruw
713 prefetch [%i1 + 63], 22
714 prefetch [%i0 + 16+63], 20
715 .word 0x88cc040e !aes_eround01 %f16,%f14,%f2,%f4
716 .word 0x84cc842e !aes_eround23 %f18,%f14,%f2,%f2
717 call _aes128_encrypt_1x+8
718 add %i0, 16, %i0
719
720 .word 0x95b02308 !movxtod %o0,%f10
721 .word 0x99b02309 !movxtod %o1,%f12
722 .word 0x81b28d80 !fxor %f10,%f0,%f0 ! ^= inp
723 .word 0x85b30d82 !fxor %f12,%f2,%f2
724
725 brnz,pn %l2, 2f
726 sub %i2, 1, %i2
727
728 std %f0, [%i1 + 0]
729 std %f2, [%i1 + 8]
730 brnz,pt %i2, .L128_ctr32_loop2x
731 add %i1, 16, %i1
732
733 ret
734 restore
735
736 .align 16
737 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
738 ! and ~3x deterioration
739 ! in inp==out case
740 .word 0x89b00900 !faligndata %f0,%f0,%f4 ! handle unaligned output
741 .word 0x8db00902 !faligndata %f0,%f2,%f6
742 .word 0x91b08902 !faligndata %f2,%f2,%f8
743 stda %f4, [%i1 + %l3]0xc0 ! partial store
744 std %f6, [%i1 + 8]
745 add %i1, 16, %i1
746 orn %g0, %l3, %l3
747 stda %f8, [%i1 + %l3]0xc0 ! partial store
748
749 brnz,pt %i2, .L128_ctr32_loop2x+4
750 orn %g0, %l3, %l3
751
752 ret
753 restore
754
755 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
756 .align 32
757 .L128_ctr32_loop2x:
758 ldx [%i0 + 0], %o0
759 ldx [%i0 + 8], %o1
760 ldx [%i0 + 16], %o2
761 brz,pt %l0, 4f
762 ldx [%i0 + 24], %o3
763
764 ldx [%i0 + 32], %o4
765 sllx %o0, %l0, %o0
766 srlx %o1, %l1, %g1
767 or %g1, %o0, %o0
768 sllx %o1, %l0, %o1
769 srlx %o2, %l1, %g1
770 or %g1, %o1, %o1
771 sllx %o2, %l0, %o2
772 srlx %o3, %l1, %g1
773 or %g1, %o2, %o2
774 sllx %o3, %l0, %o3
775 srlx %o4, %l1, %o4
776 or %o4, %o3, %o3
777 4:
778 xor %g5, %l7, %g1 ! ^= rk[0]
779 add %l7, 1, %l7
780 .word 0x85b02301 !movxtod %g1,%f2
781 srl %l7, 0, %l7 ! clruw
782 xor %g5, %l7, %g1
783 add %l7, 1, %l7
784 .word 0x8db02301 !movxtod %g1,%f6
785 srl %l7, 0, %l7 ! clruw
786 prefetch [%i1 + 63], 22
787 prefetch [%i0 + 32+63], 20
788 .word 0x90cc040e !aes_eround01 %f16,%f14,%f2,%f8
789 .word 0x84cc842e !aes_eround23 %f18,%f14,%f2,%f2
790 .word 0x94cc0c0e !aes_eround01 %f16,%f14,%f6,%f10
791 .word 0x8ccc8c2e !aes_eround23 %f18,%f14,%f6,%f6
792 call _aes128_encrypt_2x+16
793 add %i0, 32, %i0
794
795 .word 0x91b02308 !movxtod %o0,%f8
796 .word 0x95b02309 !movxtod %o1,%f10
797 .word 0x99b0230a !movxtod %o2,%f12
798 .word 0x81b20d80 !fxor %f8,%f0,%f0 ! ^= inp
799 .word 0x91b0230b !movxtod %o3,%f8
800 .word 0x85b28d82 !fxor %f10,%f2,%f2
801 .word 0x89b30d84 !fxor %f12,%f4,%f4
802 .word 0x8db20d86 !fxor %f8,%f6,%f6
803
804 brnz,pn %l2, 2f
805 sub %i2, 2, %i2
806
807 std %f0, [%i1 + 0]
808 std %f2, [%i1 + 8]
809 std %f4, [%i1 + 16]
810 std %f6, [%i1 + 24]
811 brnz,pt %i2, .L128_ctr32_loop2x
812 add %i1, 32, %i1
813
814 ret
815 restore
816
817 .align 16
818 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
819 ! and ~3x deterioration
820 ! in inp==out case
821 .word 0x91b00900 !faligndata %f0,%f0,%f8 ! handle unaligned output
822 .word 0x81b00902 !faligndata %f0,%f2,%f0
823 .word 0x85b08904 !faligndata %f2,%f4,%f2
824 .word 0x89b10906 !faligndata %f4,%f6,%f4
825 .word 0x8db18906 !faligndata %f6,%f6,%f6
826
827 stda %f8, [%i1 + %l3]0xc0 ! partial store
828 std %f0, [%i1 + 8]
829 std %f2, [%i1 + 16]
830 std %f4, [%i1 + 24]
831 add %i1, 32, %i1
832 orn %g0, %l3, %l3
833 stda %f6, [%i1 + %l3]0xc0 ! partial store
834
835 brnz,pt %i2, .L128_ctr32_loop2x+4
836 orn %g0, %l3, %l3
837
838 ret
839 restore
840
841 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
842 .align 32
843 .L128_ctr32_blk:
844 add %i1, %i2, %l5
845 and %l5, 63, %l5 ! tail
846 sub %i2, %l5, %i2
847 add %l5, 15, %l5 ! round up to 16n
848 srlx %i2, 4, %i2
849 srl %l5, 4, %l5
850 sub %i2, 1, %i2
851 add %l5, 1, %l5
852
853 .L128_ctr32_blk_loop2x:
854 ldx [%i0 + 0], %o0
855 ldx [%i0 + 8], %o1
856 ldx [%i0 + 16], %o2
857 brz,pt %l0, 5f
858 ldx [%i0 + 24], %o3
859
860 ldx [%i0 + 32], %o4
861 sllx %o0, %l0, %o0
862 srlx %o1, %l1, %g1
863 or %g1, %o0, %o0
864 sllx %o1, %l0, %o1
865 srlx %o2, %l1, %g1
866 or %g1, %o1, %o1
867 sllx %o2, %l0, %o2
868 srlx %o3, %l1, %g1
869 or %g1, %o2, %o2
870 sllx %o3, %l0, %o3
871 srlx %o4, %l1, %o4
872 or %o4, %o3, %o3
873 5:
874 xor %g5, %l7, %g1 ! ^= rk[0]
875 add %l7, 1, %l7
876 .word 0x85b02301 !movxtod %g1,%f2
877 srl %l7, 0, %l7 ! clruw
878 xor %g5, %l7, %g1
879 add %l7, 1, %l7
880 .word 0x8db02301 !movxtod %g1,%f6
881 srl %l7, 0, %l7 ! clruw
882 prefetch [%i0 + 32+63], 20
883 .word 0x90cc040e !aes_eround01 %f16,%f14,%f2,%f8
884 .word 0x84cc842e !aes_eround23 %f18,%f14,%f2,%f2
885 .word 0x94cc0c0e !aes_eround01 %f16,%f14,%f6,%f10
886 .word 0x8ccc8c2e !aes_eround23 %f18,%f14,%f6,%f6
887 call _aes128_encrypt_2x+16
888 add %i0, 32, %i0
889 subcc %i2, 2, %i2
890
891 .word 0x91b02308 !movxtod %o0,%f8
892 .word 0x95b02309 !movxtod %o1,%f10
893 .word 0x99b0230a !movxtod %o2,%f12
894 .word 0x81b20d80 !fxor %f8,%f0,%f0 ! ^= inp
895 .word 0x91b0230b !movxtod %o3,%f8
896 .word 0x85b28d82 !fxor %f10,%f2,%f2
897 .word 0x89b30d84 !fxor %f12,%f4,%f4
898 .word 0x8db20d86 !fxor %f8,%f6,%f6
899
900 stda %f0, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
901 add %i1, 8, %i1
902 stda %f2, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
903 add %i1, 8, %i1
904 stda %f4, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
905 add %i1, 8, %i1
906 stda %f6, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
907 bgu,pt %icc, .L128_ctr32_blk_loop2x
908 add %i1, 8, %i1
909
910 add %l5, %i2, %i2
911 andcc %i2, 1, %g0 ! is number of blocks even?
912 membar #StoreLoad|#StoreStore
913 bnz,pt %icc, .L128_ctr32_loop
914 srl %i2, 0, %i2
915 brnz,pn %i2, .L128_ctr32_loop2x
916 nop
917
918 ret
919 restore
920 .type aes128_t4_ctr32_encrypt,#function
921 .size aes128_t4_ctr32_encrypt,.-aes128_t4_ctr32_encrypt
922 .globl aes128_t4_xts_encrypt
923 .align 32
924 aes128_t4_xts_encrypt:
925 save %sp, -112-16, %sp
926 srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
927
928 mov %i5, %o0
929 add %fp, 0-16, %o1
930 call aes_t4_encrypt
931 mov %i4, %o2
932
933 add %fp, 0-16, %l7
934 ldxa [%l7]0x88, %g2
935 add %fp, 0-8, %l7
936 ldxa [%l7]0x88, %g3 ! %g3:%g2 is tweak
937
938 sethi %hi(0x76543210), %l7
939 or %l7, %lo(0x76543210), %l7
940 .word 0x81b5c320 !bmask %l7,%g0,%g0 ! byte swap mask
941
942 prefetch [%i0], 20
943 prefetch [%i0 + 63], 20
944 call _aes128_load_enckey
945 and %i2, 15, %i5
946 and %i2, -16, %i2
947
948 sub %i0, %i1, %l5 ! %i0!=%i1
949 and %i0, 7, %l0
950 andn %i0, 7, %i0
951 sll %l0, 3, %l0
952 mov 64, %l1
953 mov 0xff, %l3
954 sub %l1, %l0, %l1
955 and %i1, 7, %l2
956 cmp %i2, 255
957 movrnz %l2, 0, %l5 ! if ( %i1&7 ||
958 movleu %icc, 0, %l5 ! %i2<256 ||
959 brnz,pn %l5, .L128_xts_enblk ! %i0==%i1)
960 srl %l3, %l2, %l3
961
962 andcc %i2, 16, %g0 ! is number of blocks even?
963 .word 0xb3b64340 !alignaddrl %i1,%g0,%i1
964 bz %icc, .L128_xts_enloop2x
965 srlx %i2, 4, %i2
966 .L128_xts_enloop:
967 ldx [%i0 + 0], %o0
968 brz,pt %l0, 4f
969 ldx [%i0 + 8], %o1
970
971 ldx [%i0 + 16], %o2
972 sllx %o0, %l0, %o0
973 srlx %o1, %l1, %g1
974 sllx %o1, %l0, %o1
975 or %g1, %o0, %o0
976 srlx %o2, %l1, %o2
977 or %o2, %o1, %o1
978 4:
979 .word 0x99b02302 !movxtod %g2,%f12
980 .word 0x9db02303 !movxtod %g3,%f14
981 .word 0x99b3098c !bshuffle %f12,%f12,%f12
982 .word 0x9db3898e !bshuffle %f14,%f14,%f14
983
984 xor %g4, %o0, %o0 ! ^= rk[0]
985 xor %g5, %o1, %o1
986 .word 0x81b02308 !movxtod %o0,%f0
987 .word 0x85b02309 !movxtod %o1,%f2
988
989 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
990 .word 0x85b38d82 !fxor %f14,%f2,%f2
991
992 prefetch [%i1 + 63], 22
993 prefetch [%i0 + 16+63], 20
994 call _aes128_encrypt_1x
995 add %i0, 16, %i0
996
997 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
998 .word 0x85b38d82 !fxor %f14,%f2,%f2
999
1000 srax %g3, 63, %l7 ! next tweak value
1001 addcc %g2, %g2, %g2
1002 and %l7, 0x87, %l7
1003 .word 0x87b0c223 !addxc %g3,%g3,%g3
1004 xor %l7, %g2, %g2
1005
1006 brnz,pn %l2, 2f
1007 sub %i2, 1, %i2
1008
1009 std %f0, [%i1 + 0]
1010 std %f2, [%i1 + 8]
1011 brnz,pt %i2, .L128_xts_enloop2x
1012 add %i1, 16, %i1
1013
1014 brnz,pn %i5, .L128_xts_ensteal
1015 nop
1016
1017 ret
1018 restore
1019
1020 .align 16
1021 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
1022 ! and ~3x deterioration
1023 ! in inp==out case
1024 .word 0x89b00900 !faligndata %f0,%f0,%f4 ! handle unaligned output
1025 .word 0x8db00902 !faligndata %f0,%f2,%f6
1026 .word 0x91b08902 !faligndata %f2,%f2,%f8
1027 stda %f4, [%i1 + %l3]0xc0 ! partial store
1028 std %f6, [%i1 + 8]
1029 add %i1, 16, %i1
1030 orn %g0, %l3, %l3
1031 stda %f8, [%i1 + %l3]0xc0 ! partial store
1032
1033 brnz,pt %i2, .L128_xts_enloop2x+4
1034 orn %g0, %l3, %l3
1035
1036 brnz,pn %i5, .L128_xts_ensteal
1037 nop
1038
1039 ret
1040 restore
1041
1042 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1043 .align 32
1044 .L128_xts_enloop2x:
1045 ldx [%i0 + 0], %o0
1046 ldx [%i0 + 8], %o1
1047 ldx [%i0 + 16], %o2
1048 brz,pt %l0, 4f
1049 ldx [%i0 + 24], %o3
1050
1051 ldx [%i0 + 32], %o4
1052 sllx %o0, %l0, %o0
1053 srlx %o1, %l1, %g1
1054 or %g1, %o0, %o0
1055 sllx %o1, %l0, %o1
1056 srlx %o2, %l1, %g1
1057 or %g1, %o1, %o1
1058 sllx %o2, %l0, %o2
1059 srlx %o3, %l1, %g1
1060 or %g1, %o2, %o2
1061 sllx %o3, %l0, %o3
1062 srlx %o4, %l1, %o4
1063 or %o4, %o3, %o3
1064 4:
1065 .word 0x99b02302 !movxtod %g2,%f12
1066 .word 0x9db02303 !movxtod %g3,%f14
1067 .word 0x99b3098c !bshuffle %f12,%f12,%f12
1068 .word 0x9db3898e !bshuffle %f14,%f14,%f14
1069
1070 srax %g3, 63, %l7 ! next tweak value
1071 addcc %g2, %g2, %g2
1072 and %l7, 0x87, %l7
1073 .word 0x87b0c223 !addxc %g3,%g3,%g3
1074 xor %l7, %g2, %g2
1075
1076 .word 0x91b02302 !movxtod %g2,%f8
1077 .word 0x95b02303 !movxtod %g3,%f10
1078 .word 0x91b20988 !bshuffle %f8,%f8,%f8
1079 .word 0x95b2898a !bshuffle %f10,%f10,%f10
1080
1081 xor %g4, %o0, %o0 ! ^= rk[0]
1082 xor %g5, %o1, %o1
1083 xor %g4, %o2, %o2 ! ^= rk[0]
1084 xor %g5, %o3, %o3
1085 .word 0x81b02308 !movxtod %o0,%f0
1086 .word 0x85b02309 !movxtod %o1,%f2
1087 .word 0x89b0230a !movxtod %o2,%f4
1088 .word 0x8db0230b !movxtod %o3,%f6
1089
1090 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
1091 .word 0x85b38d82 !fxor %f14,%f2,%f2
1092 .word 0x89b20d84 !fxor %f8,%f4,%f4 ! ^= tweak[0]
1093 .word 0x8db28d86 !fxor %f10,%f6,%f6
1094
1095 prefetch [%i1 + 63], 22
1096 prefetch [%i0 + 32+63], 20
1097 call _aes128_encrypt_2x
1098 add %i0, 32, %i0
1099
1100 .word 0x91b02302 !movxtod %g2,%f8
1101 .word 0x95b02303 !movxtod %g3,%f10
1102
1103 srax %g3, 63, %l7 ! next tweak value
1104 addcc %g2, %g2, %g2
1105 and %l7, 0x87, %l7
1106 .word 0x87b0c223 !addxc %g3,%g3,%g3
1107 xor %l7, %g2, %g2
1108
1109 .word 0x91b20988 !bshuffle %f8,%f8,%f8
1110 .word 0x95b2898a !bshuffle %f10,%f10,%f10
1111
1112 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
1113 .word 0x85b38d82 !fxor %f14,%f2,%f2
1114 .word 0x89b20d84 !fxor %f8,%f4,%f4
1115 .word 0x8db28d86 !fxor %f10,%f6,%f6
1116
1117 brnz,pn %l2, 2f
1118 sub %i2, 2, %i2
1119
1120 std %f0, [%i1 + 0]
1121 std %f2, [%i1 + 8]
1122 std %f4, [%i1 + 16]
1123 std %f6, [%i1 + 24]
1124 brnz,pt %i2, .L128_xts_enloop2x
1125 add %i1, 32, %i1
1126
1127 .word 0x81b00f04 !fsrc2 %f0,%f4,%f0
1128 .word 0x85b00f06 !fsrc2 %f0,%f6,%f2
1129 brnz,pn %i5, .L128_xts_ensteal
1130 nop
1131
1132 ret
1133 restore
1134
1135 .align 16
1136 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
1137 ! and ~3x deterioration
1138 ! in inp==out case
1139 .word 0x91b00900 !faligndata %f0,%f0,%f8 ! handle unaligned output
1140 .word 0x95b00902 !faligndata %f0,%f2,%f10
1141 .word 0x99b08904 !faligndata %f2,%f4,%f12
1142 .word 0x9db10906 !faligndata %f4,%f6,%f14
1143 .word 0x81b18906 !faligndata %f6,%f6,%f0
1144
1145 stda %f8, [%i1 + %l3]0xc0 ! partial store
1146 std %f10, [%i1 + 8]
1147 std %f12, [%i1 + 16]
1148 std %f14, [%i1 + 24]
1149 add %i1, 32, %i1
1150 orn %g0, %l3, %l3
1151 stda %f0, [%i1 + %l3]0xc0 ! partial store
1152
1153 brnz,pt %i2, .L128_xts_enloop2x+4
1154 orn %g0, %l3, %l3
1155
1156 .word 0x81b00f04 !fsrc2 %f0,%f4,%f0
1157 .word 0x85b00f06 !fsrc2 %f0,%f6,%f2
1158 brnz,pn %i5, .L128_xts_ensteal
1159 nop
1160
1161 ret
1162 restore
1163
1164 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1165 .align 32
1166 .L128_xts_enblk:
1167 add %i1, %i2, %l5
1168 and %l5, 63, %l5 ! tail
1169 sub %i2, %l5, %i2
1170 add %l5, 15, %l5 ! round up to 16n
1171 srlx %i2, 4, %i2
1172 srl %l5, 4, %l5
1173 sub %i2, 1, %i2
1174 add %l5, 1, %l5
1175
1176 .L128_xts_enblk2x:
1177 ldx [%i0 + 0], %o0
1178 ldx [%i0 + 8], %o1
1179 ldx [%i0 + 16], %o2
1180 brz,pt %l0, 5f
1181 ldx [%i0 + 24], %o3
1182
1183 ldx [%i0 + 32], %o4
1184 sllx %o0, %l0, %o0
1185 srlx %o1, %l1, %g1
1186 or %g1, %o0, %o0
1187 sllx %o1, %l0, %o1
1188 srlx %o2, %l1, %g1
1189 or %g1, %o1, %o1
1190 sllx %o2, %l0, %o2
1191 srlx %o3, %l1, %g1
1192 or %g1, %o2, %o2
1193 sllx %o3, %l0, %o3
1194 srlx %o4, %l1, %o4
1195 or %o4, %o3, %o3
1196 5:
1197 .word 0x99b02302 !movxtod %g2,%f12
1198 .word 0x9db02303 !movxtod %g3,%f14
1199 .word 0x99b3098c !bshuffle %f12,%f12,%f12
1200 .word 0x9db3898e !bshuffle %f14,%f14,%f14
1201
1202 srax %g3, 63, %l7 ! next tweak value
1203 addcc %g2, %g2, %g2
1204 and %l7, 0x87, %l7
1205 .word 0x87b0c223 !addxc %g3,%g3,%g3
1206 xor %l7, %g2, %g2
1207
1208 .word 0x91b02302 !movxtod %g2,%f8
1209 .word 0x95b02303 !movxtod %g3,%f10
1210 .word 0x91b20988 !bshuffle %f8,%f8,%f8
1211 .word 0x95b2898a !bshuffle %f10,%f10,%f10
1212
1213 xor %g4, %o0, %o0 ! ^= rk[0]
1214 xor %g5, %o1, %o1
1215 xor %g4, %o2, %o2 ! ^= rk[0]
1216 xor %g5, %o3, %o3
1217 .word 0x81b02308 !movxtod %o0,%f0
1218 .word 0x85b02309 !movxtod %o1,%f2
1219 .word 0x89b0230a !movxtod %o2,%f4
1220 .word 0x8db0230b !movxtod %o3,%f6
1221
1222 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
1223 .word 0x85b38d82 !fxor %f14,%f2,%f2
1224 .word 0x89b20d84 !fxor %f8,%f4,%f4 ! ^= tweak[0]
1225 .word 0x8db28d86 !fxor %f10,%f6,%f6
1226
1227 prefetch [%i0 + 32+63], 20
1228 call _aes128_encrypt_2x
1229 add %i0, 32, %i0
1230
1231 .word 0x91b02302 !movxtod %g2,%f8
1232 .word 0x95b02303 !movxtod %g3,%f10
1233
1234 srax %g3, 63, %l7 ! next tweak value
1235 addcc %g2, %g2, %g2
1236 and %l7, 0x87, %l7
1237 .word 0x87b0c223 !addxc %g3,%g3,%g3
1238 xor %l7, %g2, %g2
1239
1240 .word 0x91b20988 !bshuffle %f8,%f8,%f8
1241 .word 0x95b2898a !bshuffle %f10,%f10,%f10
1242
1243 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
1244 .word 0x85b38d82 !fxor %f14,%f2,%f2
1245 .word 0x89b20d84 !fxor %f8,%f4,%f4
1246 .word 0x8db28d86 !fxor %f10,%f6,%f6
1247
1248 subcc %i2, 2, %i2
1249 stda %f0, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
1250 add %i1, 8, %i1
1251 stda %f2, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
1252 add %i1, 8, %i1
1253 stda %f4, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
1254 add %i1, 8, %i1
1255 stda %f6, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
1256 bgu,pt %icc, .L128_xts_enblk2x
1257 add %i1, 8, %i1
1258
1259 add %l5, %i2, %i2
1260 andcc %i2, 1, %g0 ! is number of blocks even?
1261 membar #StoreLoad|#StoreStore
1262 bnz,pt %icc, .L128_xts_enloop
1263 srl %i2, 0, %i2
1264 brnz,pn %i2, .L128_xts_enloop2x
1265 nop
1266
1267 .word 0x81b00f04 !fsrc2 %f0,%f4,%f0
1268 .word 0x85b00f06 !fsrc2 %f0,%f6,%f2
1269 brnz,pn %i5, .L128_xts_ensteal
1270 nop
1271
1272 ret
1273 restore
1274 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1275 .align 32
1276 .L128_xts_ensteal:
1277 std %f0, [%fp + 0-16] ! copy of output
1278 std %f2, [%fp + 0-8]
1279
1280 srl %l0, 3, %l0
1281 add %fp, 0-16, %l7
1282 add %i0, %l0, %i0 ! original %i0+%i2&-15
1283 add %i1, %l2, %i1 ! original %i1+%i2&-15
1284 mov 0, %l0
1285 nop ! align
1286
1287 .L128_xts_enstealing:
1288 ldub [%i0 + %l0], %o0
1289 ldub [%l7 + %l0], %o1
1290 dec %i5
1291 stb %o0, [%l7 + %l0]
1292 stb %o1, [%i1 + %l0]
1293 brnz %i5, .L128_xts_enstealing
1294 inc %l0
1295
1296 mov %l7, %i0
1297 sub %i1, 16, %i1
1298 mov 0, %l0
1299 sub %i1, %l2, %i1
1300 ba .L128_xts_enloop ! one more time
1301 mov 1, %i2 ! %i5 is 0
1302 ret
1303 restore
1304 .type aes128_t4_xts_encrypt,#function
1305 .size aes128_t4_xts_encrypt,.-aes128_t4_xts_encrypt
1306 .globl aes128_t4_xts_decrypt
1307 .align 32
1308 aes128_t4_xts_decrypt:
1309 save %sp, -112-16, %sp
1310 srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
1311
1312 mov %i5, %o0
1313 add %fp, 0-16, %o1
1314 call aes_t4_encrypt
1315 mov %i4, %o2
1316
1317 add %fp, 0-16, %l7
1318 ldxa [%l7]0x88, %g2
1319 add %fp, 0-8, %l7
1320 ldxa [%l7]0x88, %g3 ! %g3:%g2 is tweak
1321
1322 sethi %hi(0x76543210), %l7
1323 or %l7, %lo(0x76543210), %l7
1324 .word 0x81b5c320 !bmask %l7,%g0,%g0 ! byte swap mask
1325
1326 prefetch [%i0], 20
1327 prefetch [%i0 + 63], 20
1328 call _aes128_load_deckey
1329 and %i2, 15, %i5
1330 and %i2, -16, %i2
1331 mov 0, %l7
1332 movrnz %i5, 16, %l7
1333 sub %i2, %l7, %i2
1334
1335 sub %i0, %i1, %l5 ! %i0!=%i1
1336 and %i0, 7, %l0
1337 andn %i0, 7, %i0
1338 sll %l0, 3, %l0
1339 mov 64, %l1
1340 mov 0xff, %l3
1341 sub %l1, %l0, %l1
1342 and %i1, 7, %l2
1343 cmp %i2, 255
1344 movrnz %l2, 0, %l5 ! if ( %i1&7 ||
1345 movleu %icc, 0, %l5 ! %i2<256 ||
1346 brnz,pn %l5, .L128_xts_deblk ! %i0==%i1)
1347 srl %l3, %l2, %l3
1348
1349 andcc %i2, 16, %g0 ! is number of blocks even?
1350 brz,pn %i2, .L128_xts_desteal
1351 .word 0xb3b64340 !alignaddrl %i1,%g0,%i1
1352 bz %icc, .L128_xts_deloop2x
1353 srlx %i2, 4, %i2
1354 .L128_xts_deloop:
1355 ldx [%i0 + 0], %o0
1356 brz,pt %l0, 4f
1357 ldx [%i0 + 8], %o1
1358
1359 ldx [%i0 + 16], %o2
1360 sllx %o0, %l0, %o0
1361 srlx %o1, %l1, %g1
1362 sllx %o1, %l0, %o1
1363 or %g1, %o0, %o0
1364 srlx %o2, %l1, %o2
1365 or %o2, %o1, %o1
1366 4:
1367 .word 0x99b02302 !movxtod %g2,%f12
1368 .word 0x9db02303 !movxtod %g3,%f14
1369 .word 0x99b3098c !bshuffle %f12,%f12,%f12
1370 .word 0x9db3898e !bshuffle %f14,%f14,%f14
1371
1372 xor %g4, %o0, %o0 ! ^= rk[0]
1373 xor %g5, %o1, %o1
1374 .word 0x81b02308 !movxtod %o0,%f0
1375 .word 0x85b02309 !movxtod %o1,%f2
1376
1377 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
1378 .word 0x85b38d82 !fxor %f14,%f2,%f2
1379
1380 prefetch [%i1 + 63], 22
1381 prefetch [%i0 + 16+63], 20
1382 call _aes128_decrypt_1x
1383 add %i0, 16, %i0
1384
1385 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
1386 .word 0x85b38d82 !fxor %f14,%f2,%f2
1387
1388 srax %g3, 63, %l7 ! next tweak value
1389 addcc %g2, %g2, %g2
1390 and %l7, 0x87, %l7
1391 .word 0x87b0c223 !addxc %g3,%g3,%g3
1392 xor %l7, %g2, %g2
1393
1394 brnz,pn %l2, 2f
1395 sub %i2, 1, %i2
1396
1397 std %f0, [%i1 + 0]
1398 std %f2, [%i1 + 8]
1399 brnz,pt %i2, .L128_xts_deloop2x
1400 add %i1, 16, %i1
1401
1402 brnz,pn %i5, .L128_xts_desteal
1403 nop
1404
1405 ret
1406 restore
1407
1408 .align 16
1409 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
1410 ! and ~3x deterioration
1411 ! in inp==out case
1412 .word 0x89b00900 !faligndata %f0,%f0,%f4 ! handle unaligned output
1413 .word 0x8db00902 !faligndata %f0,%f2,%f6
1414 .word 0x91b08902 !faligndata %f2,%f2,%f8
1415 stda %f4, [%i1 + %l3]0xc0 ! partial store
1416 std %f6, [%i1 + 8]
1417 add %i1, 16, %i1
1418 orn %g0, %l3, %l3
1419 stda %f8, [%i1 + %l3]0xc0 ! partial store
1420
1421 brnz,pt %i2, .L128_xts_deloop2x+4
1422 orn %g0, %l3, %l3
1423
1424 brnz,pn %i5, .L128_xts_desteal
1425 nop
1426
1427 ret
1428 restore
1429
1430 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1431 .align 32
1432 .L128_xts_deloop2x:
1433 ldx [%i0 + 0], %o0
1434 ldx [%i0 + 8], %o1
1435 ldx [%i0 + 16], %o2
1436 brz,pt %l0, 4f
1437 ldx [%i0 + 24], %o3
1438
1439 ldx [%i0 + 32], %o4
1440 sllx %o0, %l0, %o0
1441 srlx %o1, %l1, %g1
1442 or %g1, %o0, %o0
1443 sllx %o1, %l0, %o1
1444 srlx %o2, %l1, %g1
1445 or %g1, %o1, %o1
1446 sllx %o2, %l0, %o2
1447 srlx %o3, %l1, %g1
1448 or %g1, %o2, %o2
1449 sllx %o3, %l0, %o3
1450 srlx %o4, %l1, %o4
1451 or %o4, %o3, %o3
1452 4:
1453 .word 0x99b02302 !movxtod %g2,%f12
1454 .word 0x9db02303 !movxtod %g3,%f14
1455 .word 0x99b3098c !bshuffle %f12,%f12,%f12
1456 .word 0x9db3898e !bshuffle %f14,%f14,%f14
1457
1458 srax %g3, 63, %l7 ! next tweak value
1459 addcc %g2, %g2, %g2
1460 and %l7, 0x87, %l7
1461 .word 0x87b0c223 !addxc %g3,%g3,%g3
1462 xor %l7, %g2, %g2
1463
1464 .word 0x91b02302 !movxtod %g2,%f8
1465 .word 0x95b02303 !movxtod %g3,%f10
1466 .word 0x91b20988 !bshuffle %f8,%f8,%f8
1467 .word 0x95b2898a !bshuffle %f10,%f10,%f10
1468
1469 xor %g4, %o0, %o0 ! ^= rk[0]
1470 xor %g5, %o1, %o1
1471 xor %g4, %o2, %o2 ! ^= rk[0]
1472 xor %g5, %o3, %o3
1473 .word 0x81b02308 !movxtod %o0,%f0
1474 .word 0x85b02309 !movxtod %o1,%f2
1475 .word 0x89b0230a !movxtod %o2,%f4
1476 .word 0x8db0230b !movxtod %o3,%f6
1477
1478 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
1479 .word 0x85b38d82 !fxor %f14,%f2,%f2
1480 .word 0x89b20d84 !fxor %f8,%f4,%f4 ! ^= tweak[0]
1481 .word 0x8db28d86 !fxor %f10,%f6,%f6
1482
1483 prefetch [%i1 + 63], 22
1484 prefetch [%i0 + 32+63], 20
1485 call _aes128_decrypt_2x
1486 add %i0, 32, %i0
1487
1488 .word 0x91b02302 !movxtod %g2,%f8
1489 .word 0x95b02303 !movxtod %g3,%f10
1490
1491 srax %g3, 63, %l7 ! next tweak value
1492 addcc %g2, %g2, %g2
1493 and %l7, 0x87, %l7
1494 .word 0x87b0c223 !addxc %g3,%g3,%g3
1495 xor %l7, %g2, %g2
1496
1497 .word 0x91b20988 !bshuffle %f8,%f8,%f8
1498 .word 0x95b2898a !bshuffle %f10,%f10,%f10
1499
1500 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
1501 .word 0x85b38d82 !fxor %f14,%f2,%f2
1502 .word 0x89b20d84 !fxor %f8,%f4,%f4
1503 .word 0x8db28d86 !fxor %f10,%f6,%f6
1504
1505 brnz,pn %l2, 2f
1506 sub %i2, 2, %i2
1507
1508 std %f0, [%i1 + 0]
1509 std %f2, [%i1 + 8]
1510 std %f4, [%i1 + 16]
1511 std %f6, [%i1 + 24]
1512 brnz,pt %i2, .L128_xts_deloop2x
1513 add %i1, 32, %i1
1514
1515 .word 0x81b00f04 !fsrc2 %f0,%f4,%f0
1516 .word 0x85b00f06 !fsrc2 %f0,%f6,%f2
1517 brnz,pn %i5, .L128_xts_desteal
1518 nop
1519
1520 ret
1521 restore
1522
1523 .align 16
1524 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
1525 ! and ~3x deterioration
1526 ! in inp==out case
1527 .word 0x91b00900 !faligndata %f0,%f0,%f8 ! handle unaligned output
1528 .word 0x95b00902 !faligndata %f0,%f2,%f10
1529 .word 0x99b08904 !faligndata %f2,%f4,%f12
1530 .word 0x9db10906 !faligndata %f4,%f6,%f14
1531 .word 0x81b18906 !faligndata %f6,%f6,%f0
1532
1533 stda %f8, [%i1 + %l3]0xc0 ! partial store
1534 std %f10, [%i1 + 8]
1535 std %f12, [%i1 + 16]
1536 std %f14, [%i1 + 24]
1537 add %i1, 32, %i1
1538 orn %g0, %l3, %l3
1539 stda %f0, [%i1 + %l3]0xc0 ! partial store
1540
1541 brnz,pt %i2, .L128_xts_deloop2x+4
1542 orn %g0, %l3, %l3
1543
1544 .word 0x81b00f04 !fsrc2 %f0,%f4,%f0
1545 .word 0x85b00f06 !fsrc2 %f0,%f6,%f2
1546 brnz,pn %i5, .L128_xts_desteal
1547 nop
1548
1549 ret
1550 restore
1551
1552 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1553 .align 32
1554 .L128_xts_deblk:
1555 add %i1, %i2, %l5
1556 and %l5, 63, %l5 ! tail
1557 sub %i2, %l5, %i2
1558 add %l5, 15, %l5 ! round up to 16n
1559 srlx %i2, 4, %i2
1560 srl %l5, 4, %l5
1561 sub %i2, 1, %i2
1562 add %l5, 1, %l5
1563
1564 .L128_xts_deblk2x:
1565 ldx [%i0 + 0], %o0
1566 ldx [%i0 + 8], %o1
1567 ldx [%i0 + 16], %o2
1568 brz,pt %l0, 5f
1569 ldx [%i0 + 24], %o3
1570
1571 ldx [%i0 + 32], %o4
1572 sllx %o0, %l0, %o0
1573 srlx %o1, %l1, %g1
1574 or %g1, %o0, %o0
1575 sllx %o1, %l0, %o1
1576 srlx %o2, %l1, %g1
1577 or %g1, %o1, %o1
1578 sllx %o2, %l0, %o2
1579 srlx %o3, %l1, %g1
1580 or %g1, %o2, %o2
1581 sllx %o3, %l0, %o3
1582 srlx %o4, %l1, %o4
1583 or %o4, %o3, %o3
1584 5:
1585 .word 0x99b02302 !movxtod %g2,%f12
1586 .word 0x9db02303 !movxtod %g3,%f14
1587 .word 0x99b3098c !bshuffle %f12,%f12,%f12
1588 .word 0x9db3898e !bshuffle %f14,%f14,%f14
1589
1590 srax %g3, 63, %l7 ! next tweak value
1591 addcc %g2, %g2, %g2
1592 and %l7, 0x87, %l7
1593 .word 0x87b0c223 !addxc %g3,%g3,%g3
1594 xor %l7, %g2, %g2
1595
1596 .word 0x91b02302 !movxtod %g2,%f8
1597 .word 0x95b02303 !movxtod %g3,%f10
1598 .word 0x91b20988 !bshuffle %f8,%f8,%f8
1599 .word 0x95b2898a !bshuffle %f10,%f10,%f10
1600
1601 xor %g4, %o0, %o0 ! ^= rk[0]
1602 xor %g5, %o1, %o1
1603 xor %g4, %o2, %o2 ! ^= rk[0]
1604 xor %g5, %o3, %o3
1605 .word 0x81b02308 !movxtod %o0,%f0
1606 .word 0x85b02309 !movxtod %o1,%f2
1607 .word 0x89b0230a !movxtod %o2,%f4
1608 .word 0x8db0230b !movxtod %o3,%f6
1609
1610 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
1611 .word 0x85b38d82 !fxor %f14,%f2,%f2
1612 .word 0x89b20d84 !fxor %f8,%f4,%f4 ! ^= tweak[0]
1613 .word 0x8db28d86 !fxor %f10,%f6,%f6
1614
1615 prefetch [%i0 + 32+63], 20
1616 call _aes128_decrypt_2x
1617 add %i0, 32, %i0
1618
1619 .word 0x91b02302 !movxtod %g2,%f8
1620 .word 0x95b02303 !movxtod %g3,%f10
1621
1622 srax %g3, 63, %l7 ! next tweak value
1623 addcc %g2, %g2, %g2
1624 and %l7, 0x87, %l7
1625 .word 0x87b0c223 !addxc %g3,%g3,%g3
1626 xor %l7, %g2, %g2
1627
1628 .word 0x91b20988 !bshuffle %f8,%f8,%f8
1629 .word 0x95b2898a !bshuffle %f10,%f10,%f10
1630
1631 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
1632 .word 0x85b38d82 !fxor %f14,%f2,%f2
1633 .word 0x89b20d84 !fxor %f8,%f4,%f4
1634 .word 0x8db28d86 !fxor %f10,%f6,%f6
1635
1636 subcc %i2, 2, %i2
1637 stda %f0, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
1638 add %i1, 8, %i1
1639 stda %f2, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
1640 add %i1, 8, %i1
1641 stda %f4, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
1642 add %i1, 8, %i1
1643 stda %f6, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
1644 bgu,pt %icc, .L128_xts_deblk2x
1645 add %i1, 8, %i1
1646
1647 add %l5, %i2, %i2
1648 andcc %i2, 1, %g0 ! is number of blocks even?
1649 membar #StoreLoad|#StoreStore
1650 bnz,pt %icc, .L128_xts_deloop
1651 srl %i2, 0, %i2
1652 brnz,pn %i2, .L128_xts_deloop2x
1653 nop
1654
1655 .word 0x81b00f04 !fsrc2 %f0,%f4,%f0
1656 .word 0x85b00f06 !fsrc2 %f0,%f6,%f2
1657 brnz,pn %i5, .L128_xts_desteal
1658 nop
1659
1660 ret
1661 restore
1662 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1663 .align 32
1664 .L128_xts_desteal:
1665 ldx [%i0 + 0], %o0
1666 brz,pt %l0, 8f
1667 ldx [%i0 + 8], %o1
1668
1669 ldx [%i0 + 16], %o2
1670 sllx %o0, %l0, %o0
1671 srlx %o1, %l1, %g1
1672 sllx %o1, %l0, %o1
1673 or %g1, %o0, %o0
1674 srlx %o2, %l1, %o2
1675 or %o2, %o1, %o1
1676 8:
1677 srax %g3, 63, %l7 ! next tweak value
1678 addcc %g2, %g2, %o2
1679 and %l7, 0x87, %l7
1680 .word 0x97b0c223 !addxc %g3,%g3,%o3
1681 xor %l7, %o2, %o2
1682
1683 .word 0x99b0230a !movxtod %o2,%f12
1684 .word 0x9db0230b !movxtod %o3,%f14
1685 .word 0x99b3098c !bshuffle %f12,%f12,%f12
1686 .word 0x9db3898e !bshuffle %f14,%f14,%f14
1687
1688 xor %g4, %o0, %o0 ! ^= rk[0]
1689 xor %g5, %o1, %o1
1690 .word 0x81b02308 !movxtod %o0,%f0
1691 .word 0x85b02309 !movxtod %o1,%f2
1692
1693 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
1694 .word 0x85b38d82 !fxor %f14,%f2,%f2
1695
1696 call _aes128_decrypt_1x
1697 add %i0, 16, %i0
1698
1699 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
1700 .word 0x85b38d82 !fxor %f14,%f2,%f2
1701
1702 std %f0, [%fp + 0-16]
1703 std %f2, [%fp + 0-8]
1704
1705 srl %l0, 3, %l0
1706 add %fp, 0-16, %l7
1707 add %i0, %l0, %i0 ! original %i0+%i2&-15
1708 add %i1, %l2, %i1 ! original %i1+%i2&-15
1709 mov 0, %l0
1710 add %i1, 16, %i1
1711 nop ! align
1712
1713 .L128_xts_destealing:
1714 ldub [%i0 + %l0], %o0
1715 ldub [%l7 + %l0], %o1
1716 dec %i5
1717 stb %o0, [%l7 + %l0]
1718 stb %o1, [%i1 + %l0]
1719 brnz %i5, .L128_xts_destealing
1720 inc %l0
1721
1722 mov %l7, %i0
1723 sub %i1, 16, %i1
1724 mov 0, %l0
1725 sub %i1, %l2, %i1
1726 ba .L128_xts_deloop ! one more time
1727 mov 1, %i2 ! %i5 is 0
1728 ret
1729 restore
1730 .type aes128_t4_xts_decrypt,#function
1731 .size aes128_t4_xts_decrypt,.-aes128_t4_xts_decrypt
1732 .globl aes128_t4_cbc_decrypt
1733 .align 32
1734 aes128_t4_cbc_decrypt:
1735 save %sp, -112, %sp
1736 cmp %i2, 0
1737 be,pn %icc, .L128_cbc_dec_abort
1738 srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
1739 sub %i0, %i1, %l5 ! %i0!=%i1
1740 ld [%i4 + 0], %f12 ! load ivec
1741 ld [%i4 + 4], %f13
1742 ld [%i4 + 8], %f14
1743 ld [%i4 + 12], %f15
1744 prefetch [%i0], 20
1745 prefetch [%i0 + 63], 20
1746 call _aes128_load_deckey
1747 and %i0, 7, %l0
1748 andn %i0, 7, %i0
1749 sll %l0, 3, %l0
1750 mov 64, %l1
1751 mov 0xff, %l3
1752 sub %l1, %l0, %l1
1753 and %i1, 7, %l2
1754 cmp %i2, 255
1755 movrnz %l2, 0, %l5 ! if ( %i1&7 ||
1756 movleu %icc, 0, %l5 ! %i2<256 ||
1757 brnz,pn %l5, .L128cbc_dec_blk ! %i0==%i1)
1758 srl %l3, %l2, %l3
1759
1760 andcc %i2, 16, %g0 ! is number of blocks even?
1761 srlx %i2, 4, %i2
1762 .word 0xb3b64340 !alignaddrl %i1,%g0,%i1
1763 bz %icc, .L128_cbc_dec_loop2x
1764 prefetch [%i1], 22
1765 .L128_cbc_dec_loop:
1766 ldx [%i0 + 0], %o0
1767 brz,pt %l0, 4f
1768 ldx [%i0 + 8], %o1
1769
1770 ldx [%i0 + 16], %o2
1771 sllx %o0, %l0, %o0
1772 srlx %o1, %l1, %g1
1773 sllx %o1, %l0, %o1
1774 or %g1, %o0, %o0
1775 srlx %o2, %l1, %o2
1776 or %o2, %o1, %o1
1777 4:
1778 xor %g4, %o0, %o2 ! ^= rk[0]
1779 xor %g5, %o1, %o3
1780 .word 0x81b0230a !movxtod %o2,%f0
1781 .word 0x85b0230b !movxtod %o3,%f2
1782
1783 prefetch [%i1 + 63], 22
1784 prefetch [%i0 + 16+63], 20
1785 call _aes128_decrypt_1x
1786 add %i0, 16, %i0
1787
1788 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= ivec
1789 .word 0x85b38d82 !fxor %f14,%f2,%f2
1790 .word 0x99b02308 !movxtod %o0,%f12
1791 .word 0x9db02309 !movxtod %o1,%f14
1792
1793 brnz,pn %l2, 2f
1794 sub %i2, 1, %i2
1795
1796 std %f0, [%i1 + 0]
1797 std %f2, [%i1 + 8]
1798 brnz,pt %i2, .L128_cbc_dec_loop2x
1799 add %i1, 16, %i1
1800 st %f12, [%i4 + 0]
1801 st %f13, [%i4 + 4]
1802 st %f14, [%i4 + 8]
1803 st %f15, [%i4 + 12]
1804 .L128_cbc_dec_abort:
1805 ret
1806 restore
1807
1808 .align 16
1809 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
1810 ! and ~3x deterioration
1811 ! in inp==out case
1812 .word 0x89b00900 !faligndata %f0,%f0,%f4 ! handle unaligned output
1813 .word 0x8db00902 !faligndata %f0,%f2,%f6
1814 .word 0x91b08902 !faligndata %f2,%f2,%f8
1815
1816 stda %f4, [%i1 + %l3]0xc0 ! partial store
1817 std %f6, [%i1 + 8]
1818 add %i1, 16, %i1
1819 orn %g0, %l3, %l3
1820 stda %f8, [%i1 + %l3]0xc0 ! partial store
1821
1822 brnz,pt %i2, .L128_cbc_dec_loop2x+4
1823 orn %g0, %l3, %l3
1824 st %f12, [%i4 + 0]
1825 st %f13, [%i4 + 4]
1826 st %f14, [%i4 + 8]
1827 st %f15, [%i4 + 12]
1828 ret
1829 restore
1830
1831 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1832 .align 32
1833 .L128_cbc_dec_loop2x:
1834 ldx [%i0 + 0], %o0
1835 ldx [%i0 + 8], %o1
1836 ldx [%i0 + 16], %o2
1837 brz,pt %l0, 4f
1838 ldx [%i0 + 24], %o3
1839
1840 ldx [%i0 + 32], %o4
1841 sllx %o0, %l0, %o0
1842 srlx %o1, %l1, %g1
1843 or %g1, %o0, %o0
1844 sllx %o1, %l0, %o1
1845 srlx %o2, %l1, %g1
1846 or %g1, %o1, %o1
1847 sllx %o2, %l0, %o2
1848 srlx %o3, %l1, %g1
1849 or %g1, %o2, %o2
1850 sllx %o3, %l0, %o3
1851 srlx %o4, %l1, %o4
1852 or %o4, %o3, %o3
1853 4:
1854 xor %g4, %o0, %o4 ! ^= rk[0]
1855 xor %g5, %o1, %o5
1856 .word 0x81b0230c !movxtod %o4,%f0
1857 .word 0x85b0230d !movxtod %o5,%f2
1858 xor %g4, %o2, %o4
1859 xor %g5, %o3, %o5
1860 .word 0x89b0230c !movxtod %o4,%f4
1861 .word 0x8db0230d !movxtod %o5,%f6
1862
1863 prefetch [%i1 + 63], 22
1864 prefetch [%i0 + 32+63], 20
1865 call _aes128_decrypt_2x
1866 add %i0, 32, %i0
1867
1868 .word 0x91b02308 !movxtod %o0,%f8
1869 .word 0x95b02309 !movxtod %o1,%f10
1870 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= ivec
1871 .word 0x85b38d82 !fxor %f14,%f2,%f2
1872 .word 0x99b0230a !movxtod %o2,%f12
1873 .word 0x9db0230b !movxtod %o3,%f14
1874 .word 0x89b20d84 !fxor %f8,%f4,%f4
1875 .word 0x8db28d86 !fxor %f10,%f6,%f6
1876
1877 brnz,pn %l2, 2f
1878 sub %i2, 2, %i2
1879
1880 std %f0, [%i1 + 0]
1881 std %f2, [%i1 + 8]
1882 std %f4, [%i1 + 16]
1883 std %f6, [%i1 + 24]
1884 brnz,pt %i2, .L128_cbc_dec_loop2x
1885 add %i1, 32, %i1
1886 st %f12, [%i4 + 0]
1887 st %f13, [%i4 + 4]
1888 st %f14, [%i4 + 8]
1889 st %f15, [%i4 + 12]
1890 ret
1891 restore
1892
1893 .align 16
1894 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
1895 ! and ~3x deterioration
1896 ! in inp==out case
1897 .word 0x91b00900 !faligndata %f0,%f0,%f8 ! handle unaligned output
1898 .word 0x81b00902 !faligndata %f0,%f2,%f0
1899 .word 0x85b08904 !faligndata %f2,%f4,%f2
1900 .word 0x89b10906 !faligndata %f4,%f6,%f4
1901 .word 0x8db18906 !faligndata %f6,%f6,%f6
1902 stda %f8, [%i1 + %l3]0xc0 ! partial store
1903 std %f0, [%i1 + 8]
1904 std %f2, [%i1 + 16]
1905 std %f4, [%i1 + 24]
1906 add %i1, 32, %i1
1907 orn %g0, %l3, %l3
1908 stda %f6, [%i1 + %l3]0xc0 ! partial store
1909
1910 brnz,pt %i2, .L128_cbc_dec_loop2x+4
1911 orn %g0, %l3, %l3
1912 st %f12, [%i4 + 0]
1913 st %f13, [%i4 + 4]
1914 st %f14, [%i4 + 8]
1915 st %f15, [%i4 + 12]
1916 ret
1917 restore
1918
1919 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1920 .align 32
1921 .L128cbc_dec_blk:
1922 add %i1, %i2, %l5
1923 and %l5, 63, %l5 ! tail
1924 sub %i2, %l5, %i2
1925 add %l5, 15, %l5 ! round up to 16n
1926 srlx %i2, 4, %i2
1927 srl %l5, 4, %l5
1928 sub %i2, 1, %i2
1929 add %l5, 1, %l5
1930
1931 .L128_cbc_dec_blk_loop2x:
1932 ldx [%i0 + 0], %o0
1933 ldx [%i0 + 8], %o1
1934 ldx [%i0 + 16], %o2
1935 brz,pt %l0, 5f
1936 ldx [%i0 + 24], %o3
1937
1938 ldx [%i0 + 32], %o4
1939 sllx %o0, %l0, %o0
1940 srlx %o1, %l1, %g1
1941 or %g1, %o0, %o0
1942 sllx %o1, %l0, %o1
1943 srlx %o2, %l1, %g1
1944 or %g1, %o1, %o1
1945 sllx %o2, %l0, %o2
1946 srlx %o3, %l1, %g1
1947 or %g1, %o2, %o2
1948 sllx %o3, %l0, %o3
1949 srlx %o4, %l1, %o4
1950 or %o4, %o3, %o3
1951 5:
1952 xor %g4, %o0, %o4 ! ^= rk[0]
1953 xor %g5, %o1, %o5
1954 .word 0x81b0230c !movxtod %o4,%f0
1955 .word 0x85b0230d !movxtod %o5,%f2
1956 xor %g4, %o2, %o4
1957 xor %g5, %o3, %o5
1958 .word 0x89b0230c !movxtod %o4,%f4
1959 .word 0x8db0230d !movxtod %o5,%f6
1960
1961 prefetch [%i0 + 32+63], 20
1962 call _aes128_decrypt_2x
1963 add %i0, 32, %i0
1964 subcc %i2, 2, %i2
1965
1966 .word 0x91b02308 !movxtod %o0,%f8
1967 .word 0x95b02309 !movxtod %o1,%f10
1968 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= ivec
1969 .word 0x85b38d82 !fxor %f14,%f2,%f2
1970 .word 0x99b0230a !movxtod %o2,%f12
1971 .word 0x9db0230b !movxtod %o3,%f14
1972 .word 0x89b20d84 !fxor %f8,%f4,%f4
1973 .word 0x8db28d86 !fxor %f10,%f6,%f6
1974
1975 stda %f0, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
1976 add %i1, 8, %i1
1977 stda %f2, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
1978 add %i1, 8, %i1
1979 stda %f4, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
1980 add %i1, 8, %i1
1981 stda %f6, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
1982 bgu,pt %icc, .L128_cbc_dec_blk_loop2x
1983 add %i1, 8, %i1
1984
1985 add %l5, %i2, %i2
1986 andcc %i2, 1, %g0 ! is number of blocks even?
1987 membar #StoreLoad|#StoreStore
1988 bnz,pt %icc, .L128_cbc_dec_loop
1989 srl %i2, 0, %i2
1990 brnz,pn %i2, .L128_cbc_dec_loop2x
1991 nop
1992 st %f12, [%i4 + 0] ! write out ivec
1993 st %f13, [%i4 + 4]
1994 st %f14, [%i4 + 8]
1995 st %f15, [%i4 + 12]
1996 ret
1997 restore
1998 .type aes128_t4_cbc_decrypt,#function
1999 .size aes128_t4_cbc_decrypt,.-aes128_t4_cbc_decrypt
2000 .align 32
2001 _aes128_decrypt_1x:
2002 .word 0x88cc0440 !aes_dround01 %f16,%f0,%f2,%f4
2003 .word 0x84cc8460 !aes_dround23 %f18,%f0,%f2,%f2
2004 .word 0x80cd0444 !aes_dround01 %f20,%f4,%f2,%f0
2005 .word 0x84cd8464 !aes_dround23 %f22,%f4,%f2,%f2
2006 .word 0x88ce0440 !aes_dround01 %f24,%f0,%f2,%f4
2007 .word 0x84ce8460 !aes_dround23 %f26,%f0,%f2,%f2
2008 .word 0x80cf0444 !aes_dround01 %f28,%f4,%f2,%f0
2009 .word 0x84cf8464 !aes_dround23 %f30,%f4,%f2,%f2
2010 .word 0x88c84440 !aes_dround01 %f32,%f0,%f2,%f4
2011 .word 0x84c8c460 !aes_dround23 %f34,%f0,%f2,%f2
2012 .word 0x80c94444 !aes_dround01 %f36,%f4,%f2,%f0
2013 .word 0x84c9c464 !aes_dround23 %f38,%f4,%f2,%f2
2014 .word 0x88ca4440 !aes_dround01 %f40,%f0,%f2,%f4
2015 .word 0x84cac460 !aes_dround23 %f42,%f0,%f2,%f2
2016 .word 0x80cb4444 !aes_dround01 %f44,%f4,%f2,%f0
2017 .word 0x84cbc464 !aes_dround23 %f46,%f4,%f2,%f2
2018 .word 0x88cc4440 !aes_dround01 %f48,%f0,%f2,%f4
2019 .word 0x84ccc460 !aes_dround23 %f50,%f0,%f2,%f2
2020 .word 0x80cd44c4 !aes_dround01_l %f52,%f4,%f2,%f0
2021 retl
2022 .word 0x84cdc4e4 !aes_dround23_l %f54,%f4,%f2,%f2
2023 .type _aes128_decrypt_1x,#function
2024 .size _aes128_decrypt_1x,.-_aes128_decrypt_1x
2025
2026 .align 32
2027 _aes128_decrypt_2x:
2028 .word 0x90cc0440 !aes_dround01 %f16,%f0,%f2,%f8
2029 .word 0x84cc8460 !aes_dround23 %f18,%f0,%f2,%f2
2030 .word 0x94cc0c44 !aes_dround01 %f16,%f4,%f6,%f10
2031 .word 0x8ccc8c64 !aes_dround23 %f18,%f4,%f6,%f6
2032 .word 0x80cd0448 !aes_dround01 %f20,%f8,%f2,%f0
2033 .word 0x84cd8468 !aes_dround23 %f22,%f8,%f2,%f2
2034 .word 0x88cd0c4a !aes_dround01 %f20,%f10,%f6,%f4
2035 .word 0x8ccd8c6a !aes_dround23 %f22,%f10,%f6,%f6
2036 .word 0x90ce0440 !aes_dround01 %f24,%f0,%f2,%f8
2037 .word 0x84ce8460 !aes_dround23 %f26,%f0,%f2,%f2
2038 .word 0x94ce0c44 !aes_dround01 %f24,%f4,%f6,%f10
2039 .word 0x8cce8c64 !aes_dround23 %f26,%f4,%f6,%f6
2040 .word 0x80cf0448 !aes_dround01 %f28,%f8,%f2,%f0
2041 .word 0x84cf8468 !aes_dround23 %f30,%f8,%f2,%f2
2042 .word 0x88cf0c4a !aes_dround01 %f28,%f10,%f6,%f4
2043 .word 0x8ccf8c6a !aes_dround23 %f30,%f10,%f6,%f6
2044 .word 0x90c84440 !aes_dround01 %f32,%f0,%f2,%f8
2045 .word 0x84c8c460 !aes_dround23 %f34,%f0,%f2,%f2
2046 .word 0x94c84c44 !aes_dround01 %f32,%f4,%f6,%f10
2047 .word 0x8cc8cc64 !aes_dround23 %f34,%f4,%f6,%f6
2048 .word 0x80c94448 !aes_dround01 %f36,%f8,%f2,%f0
2049 .word 0x84c9c468 !aes_dround23 %f38,%f8,%f2,%f2
2050 .word 0x88c94c4a !aes_dround01 %f36,%f10,%f6,%f4
2051 .word 0x8cc9cc6a !aes_dround23 %f38,%f10,%f6,%f6
2052 .word 0x90ca4440 !aes_dround01 %f40,%f0,%f2,%f8
2053 .word 0x84cac460 !aes_dround23 %f42,%f0,%f2,%f2
2054 .word 0x94ca4c44 !aes_dround01 %f40,%f4,%f6,%f10
2055 .word 0x8ccacc64 !aes_dround23 %f42,%f4,%f6,%f6
2056 .word 0x80cb4448 !aes_dround01 %f44,%f8,%f2,%f0
2057 .word 0x84cbc468 !aes_dround23 %f46,%f8,%f2,%f2
2058 .word 0x88cb4c4a !aes_dround01 %f44,%f10,%f6,%f4
2059 .word 0x8ccbcc6a !aes_dround23 %f46,%f10,%f6,%f6
2060 .word 0x90cc4440 !aes_dround01 %f48,%f0,%f2,%f8
2061 .word 0x84ccc460 !aes_dround23 %f50,%f0,%f2,%f2
2062 .word 0x94cc4c44 !aes_dround01 %f48,%f4,%f6,%f10
2063 .word 0x8ccccc64 !aes_dround23 %f50,%f4,%f6,%f6
2064 .word 0x80cd44c8 !aes_dround01_l %f52,%f8,%f2,%f0
2065 .word 0x84cdc4e8 !aes_dround23_l %f54,%f8,%f2,%f2
2066 .word 0x88cd4cca !aes_dround01_l %f52,%f10,%f6,%f4
2067 retl
2068 .word 0x8ccdccea !aes_dround23_l %f54,%f10,%f6,%f6
2069 .type _aes128_decrypt_2x,#function
2070 .size _aes128_decrypt_2x,.-_aes128_decrypt_2x
2071 .align 32
2072 _aes192_encrypt_1x:
2073 .word 0x88cc0400 !aes_eround01 %f16,%f0,%f2,%f4
2074 .word 0x84cc8420 !aes_eround23 %f18,%f0,%f2,%f2
2075 .word 0x80cd0404 !aes_eround01 %f20,%f4,%f2,%f0
2076 .word 0x84cd8424 !aes_eround23 %f22,%f4,%f2,%f2
2077 .word 0x88ce0400 !aes_eround01 %f24,%f0,%f2,%f4
2078 .word 0x84ce8420 !aes_eround23 %f26,%f0,%f2,%f2
2079 .word 0x80cf0404 !aes_eround01 %f28,%f4,%f2,%f0
2080 .word 0x84cf8424 !aes_eround23 %f30,%f4,%f2,%f2
2081 .word 0x88c84400 !aes_eround01 %f32,%f0,%f2,%f4
2082 .word 0x84c8c420 !aes_eround23 %f34,%f0,%f2,%f2
2083 .word 0x80c94404 !aes_eround01 %f36,%f4,%f2,%f0
2084 .word 0x84c9c424 !aes_eround23 %f38,%f4,%f2,%f2
2085 .word 0x88ca4400 !aes_eround01 %f40,%f0,%f2,%f4
2086 .word 0x84cac420 !aes_eround23 %f42,%f0,%f2,%f2
2087 .word 0x80cb4404 !aes_eround01 %f44,%f4,%f2,%f0
2088 .word 0x84cbc424 !aes_eround23 %f46,%f4,%f2,%f2
2089 .word 0x88cc4400 !aes_eround01 %f48,%f0,%f2,%f4
2090 .word 0x84ccc420 !aes_eround23 %f50,%f0,%f2,%f2
2091 .word 0x80cd4404 !aes_eround01 %f52,%f4,%f2,%f0
2092 .word 0x84cdc424 !aes_eround23 %f54,%f4,%f2,%f2
2093 .word 0x88ce4400 !aes_eround01 %f56,%f0,%f2,%f4
2094 .word 0x84cec420 !aes_eround23 %f58,%f0,%f2,%f2
2095 .word 0x80cf4484 !aes_eround01_l %f60,%f4,%f2,%f0
2096 retl
2097 .word 0x84cfc4a4 !aes_eround23_l %f62,%f4,%f2,%f2
2098 .type _aes192_encrypt_1x,#function
2099 .size _aes192_encrypt_1x,.-_aes192_encrypt_1x
2100
2101 .align 32
2102 _aes192_encrypt_2x:
2103 .word 0x90cc0400 !aes_eround01 %f16,%f0,%f2,%f8
2104 .word 0x84cc8420 !aes_eround23 %f18,%f0,%f2,%f2
2105 .word 0x94cc0c04 !aes_eround01 %f16,%f4,%f6,%f10
2106 .word 0x8ccc8c24 !aes_eround23 %f18,%f4,%f6,%f6
2107 .word 0x80cd0408 !aes_eround01 %f20,%f8,%f2,%f0
2108 .word 0x84cd8428 !aes_eround23 %f22,%f8,%f2,%f2
2109 .word 0x88cd0c0a !aes_eround01 %f20,%f10,%f6,%f4
2110 .word 0x8ccd8c2a !aes_eround23 %f22,%f10,%f6,%f6
2111 .word 0x90ce0400 !aes_eround01 %f24,%f0,%f2,%f8
2112 .word 0x84ce8420 !aes_eround23 %f26,%f0,%f2,%f2
2113 .word 0x94ce0c04 !aes_eround01 %f24,%f4,%f6,%f10
2114 .word 0x8cce8c24 !aes_eround23 %f26,%f4,%f6,%f6
2115 .word 0x80cf0408 !aes_eround01 %f28,%f8,%f2,%f0
2116 .word 0x84cf8428 !aes_eround23 %f30,%f8,%f2,%f2
2117 .word 0x88cf0c0a !aes_eround01 %f28,%f10,%f6,%f4
2118 .word 0x8ccf8c2a !aes_eround23 %f30,%f10,%f6,%f6
2119 .word 0x90c84400 !aes_eround01 %f32,%f0,%f2,%f8
2120 .word 0x84c8c420 !aes_eround23 %f34,%f0,%f2,%f2
2121 .word 0x94c84c04 !aes_eround01 %f32,%f4,%f6,%f10
2122 .word 0x8cc8cc24 !aes_eround23 %f34,%f4,%f6,%f6
2123 .word 0x80c94408 !aes_eround01 %f36,%f8,%f2,%f0
2124 .word 0x84c9c428 !aes_eround23 %f38,%f8,%f2,%f2
2125 .word 0x88c94c0a !aes_eround01 %f36,%f10,%f6,%f4
2126 .word 0x8cc9cc2a !aes_eround23 %f38,%f10,%f6,%f6
2127 .word 0x90ca4400 !aes_eround01 %f40,%f0,%f2,%f8
2128 .word 0x84cac420 !aes_eround23 %f42,%f0,%f2,%f2
2129 .word 0x94ca4c04 !aes_eround01 %f40,%f4,%f6,%f10
2130 .word 0x8ccacc24 !aes_eround23 %f42,%f4,%f6,%f6
2131 .word 0x80cb4408 !aes_eround01 %f44,%f8,%f2,%f0
2132 .word 0x84cbc428 !aes_eround23 %f46,%f8,%f2,%f2
2133 .word 0x88cb4c0a !aes_eround01 %f44,%f10,%f6,%f4
2134 .word 0x8ccbcc2a !aes_eround23 %f46,%f10,%f6,%f6
2135 .word 0x90cc4400 !aes_eround01 %f48,%f0,%f2,%f8
2136 .word 0x84ccc420 !aes_eround23 %f50,%f0,%f2,%f2
2137 .word 0x94cc4c04 !aes_eround01 %f48,%f4,%f6,%f10
2138 .word 0x8ccccc24 !aes_eround23 %f50,%f4,%f6,%f6
2139 .word 0x80cd4408 !aes_eround01 %f52,%f8,%f2,%f0
2140 .word 0x84cdc428 !aes_eround23 %f54,%f8,%f2,%f2
2141 .word 0x88cd4c0a !aes_eround01 %f52,%f10,%f6,%f4
2142 .word 0x8ccdcc2a !aes_eround23 %f54,%f10,%f6,%f6
2143 .word 0x90ce4400 !aes_eround01 %f56,%f0,%f2,%f8
2144 .word 0x84cec420 !aes_eround23 %f58,%f0,%f2,%f2
2145 .word 0x94ce4c04 !aes_eround01 %f56,%f4,%f6,%f10
2146 .word 0x8ccecc24 !aes_eround23 %f58,%f4,%f6,%f6
2147 .word 0x80cf4488 !aes_eround01_l %f60,%f8,%f2,%f0
2148 .word 0x84cfc4a8 !aes_eround23_l %f62,%f8,%f2,%f2
2149 .word 0x88cf4c8a !aes_eround01_l %f60,%f10,%f6,%f4
2150 retl
2151 .word 0x8ccfccaa !aes_eround23_l %f62,%f10,%f6,%f6
2152 .type _aes192_encrypt_2x,#function
2153 .size _aes192_encrypt_2x,.-_aes192_encrypt_2x
2154
2155 .align 32
2156 _aes256_encrypt_1x:
2157 .word 0x88cc0400 !aes_eround01 %f16,%f0,%f2,%f4
2158 .word 0x84cc8420 !aes_eround23 %f18,%f0,%f2,%f2
2159 ldd [%i3 + 208], %f16
2160 ldd [%i3 + 216], %f18
2161 .word 0x80cd0404 !aes_eround01 %f20,%f4,%f2,%f0
2162 .word 0x84cd8424 !aes_eround23 %f22,%f4,%f2,%f2
2163 ldd [%i3 + 224], %f20
2164 ldd [%i3 + 232], %f22
2165 .word 0x88ce0400 !aes_eround01 %f24,%f0,%f2,%f4
2166 .word 0x84ce8420 !aes_eround23 %f26,%f0,%f2,%f2
2167 .word 0x80cf0404 !aes_eround01 %f28,%f4,%f2,%f0
2168 .word 0x84cf8424 !aes_eround23 %f30,%f4,%f2,%f2
2169 .word 0x88c84400 !aes_eround01 %f32,%f0,%f2,%f4
2170 .word 0x84c8c420 !aes_eround23 %f34,%f0,%f2,%f2
2171 .word 0x80c94404 !aes_eround01 %f36,%f4,%f2,%f0
2172 .word 0x84c9c424 !aes_eround23 %f38,%f4,%f2,%f2
2173 .word 0x88ca4400 !aes_eround01 %f40,%f0,%f2,%f4
2174 .word 0x84cac420 !aes_eround23 %f42,%f0,%f2,%f2
2175 .word 0x80cb4404 !aes_eround01 %f44,%f4,%f2,%f0
2176 .word 0x84cbc424 !aes_eround23 %f46,%f4,%f2,%f2
2177 .word 0x88cc4400 !aes_eround01 %f48,%f0,%f2,%f4
2178 .word 0x84ccc420 !aes_eround23 %f50,%f0,%f2,%f2
2179 .word 0x80cd4404 !aes_eround01 %f52,%f4,%f2,%f0
2180 .word 0x84cdc424 !aes_eround23 %f54,%f4,%f2,%f2
2181 .word 0x88ce4400 !aes_eround01 %f56,%f0,%f2,%f4
2182 .word 0x84cec420 !aes_eround23 %f58,%f0,%f2,%f2
2183 .word 0x80cf4404 !aes_eround01 %f60,%f4,%f2,%f0
2184 .word 0x84cfc424 !aes_eround23 %f62,%f4,%f2,%f2
2185 .word 0x88cc0400 !aes_eround01 %f16,%f0,%f2,%f4
2186 .word 0x84cc8420 !aes_eround23 %f18,%f0,%f2,%f2
2187 ldd [%i3 + 16], %f16
2188 ldd [%i3 + 24], %f18
2189 .word 0x80cd0484 !aes_eround01_l %f20,%f4,%f2,%f0
2190 .word 0x84cd84a4 !aes_eround23_l %f22,%f4,%f2,%f2
2191 ldd [%i3 + 32], %f20
2192 retl
2193 ldd [%i3 + 40], %f22
2194 .type _aes256_encrypt_1x,#function
2195 .size _aes256_encrypt_1x,.-_aes256_encrypt_1x
2196
2197 .align 32
2198 _aes256_encrypt_2x:
2199 .word 0x90cc0400 !aes_eround01 %f16,%f0,%f2,%f8
2200 .word 0x84cc8420 !aes_eround23 %f18,%f0,%f2,%f2
2201 .word 0x94cc0c04 !aes_eround01 %f16,%f4,%f6,%f10
2202 .word 0x8ccc8c24 !aes_eround23 %f18,%f4,%f6,%f6
2203 ldd [%i3 + 208], %f16
2204 ldd [%i3 + 216], %f18
2205 .word 0x80cd0408 !aes_eround01 %f20,%f8,%f2,%f0
2206 .word 0x84cd8428 !aes_eround23 %f22,%f8,%f2,%f2
2207 .word 0x88cd0c0a !aes_eround01 %f20,%f10,%f6,%f4
2208 .word 0x8ccd8c2a !aes_eround23 %f22,%f10,%f6,%f6
2209 ldd [%i3 + 224], %f20
2210 ldd [%i3 + 232], %f22
2211 .word 0x90ce0400 !aes_eround01 %f24,%f0,%f2,%f8
2212 .word 0x84ce8420 !aes_eround23 %f26,%f0,%f2,%f2
2213 .word 0x94ce0c04 !aes_eround01 %f24,%f4,%f6,%f10
2214 .word 0x8cce8c24 !aes_eround23 %f26,%f4,%f6,%f6
2215 .word 0x80cf0408 !aes_eround01 %f28,%f8,%f2,%f0
2216 .word 0x84cf8428 !aes_eround23 %f30,%f8,%f2,%f2
2217 .word 0x88cf0c0a !aes_eround01 %f28,%f10,%f6,%f4
2218 .word 0x8ccf8c2a !aes_eround23 %f30,%f10,%f6,%f6
2219 .word 0x90c84400 !aes_eround01 %f32,%f0,%f2,%f8
2220 .word 0x84c8c420 !aes_eround23 %f34,%f0,%f2,%f2
2221 .word 0x94c84c04 !aes_eround01 %f32,%f4,%f6,%f10
2222 .word 0x8cc8cc24 !aes_eround23 %f34,%f4,%f6,%f6
2223 .word 0x80c94408 !aes_eround01 %f36,%f8,%f2,%f0
2224 .word 0x84c9c428 !aes_eround23 %f38,%f8,%f2,%f2
2225 .word 0x88c94c0a !aes_eround01 %f36,%f10,%f6,%f4
2226 .word 0x8cc9cc2a !aes_eround23 %f38,%f10,%f6,%f6
2227 .word 0x90ca4400 !aes_eround01 %f40,%f0,%f2,%f8
2228 .word 0x84cac420 !aes_eround23 %f42,%f0,%f2,%f2
2229 .word 0x94ca4c04 !aes_eround01 %f40,%f4,%f6,%f10
2230 .word 0x8ccacc24 !aes_eround23 %f42,%f4,%f6,%f6
2231 .word 0x80cb4408 !aes_eround01 %f44,%f8,%f2,%f0
2232 .word 0x84cbc428 !aes_eround23 %f46,%f8,%f2,%f2
2233 .word 0x88cb4c0a !aes_eround01 %f44,%f10,%f6,%f4
2234 .word 0x8ccbcc2a !aes_eround23 %f46,%f10,%f6,%f6
2235 .word 0x90cc4400 !aes_eround01 %f48,%f0,%f2,%f8
2236 .word 0x84ccc420 !aes_eround23 %f50,%f0,%f2,%f2
2237 .word 0x94cc4c04 !aes_eround01 %f48,%f4,%f6,%f10
2238 .word 0x8ccccc24 !aes_eround23 %f50,%f4,%f6,%f6
2239 .word 0x80cd4408 !aes_eround01 %f52,%f8,%f2,%f0
2240 .word 0x84cdc428 !aes_eround23 %f54,%f8,%f2,%f2
2241 .word 0x88cd4c0a !aes_eround01 %f52,%f10,%f6,%f4
2242 .word 0x8ccdcc2a !aes_eround23 %f54,%f10,%f6,%f6
2243 .word 0x90ce4400 !aes_eround01 %f56,%f0,%f2,%f8
2244 .word 0x84cec420 !aes_eround23 %f58,%f0,%f2,%f2
2245 .word 0x94ce4c04 !aes_eround01 %f56,%f4,%f6,%f10
2246 .word 0x8ccecc24 !aes_eround23 %f58,%f4,%f6,%f6
2247 .word 0x80cf4408 !aes_eround01 %f60,%f8,%f2,%f0
2248 .word 0x84cfc428 !aes_eround23 %f62,%f8,%f2,%f2
2249 .word 0x88cf4c0a !aes_eround01 %f60,%f10,%f6,%f4
2250 .word 0x8ccfcc2a !aes_eround23 %f62,%f10,%f6,%f6
2251 .word 0x90cc0400 !aes_eround01 %f16,%f0,%f2,%f8
2252 .word 0x84cc8420 !aes_eround23 %f18,%f0,%f2,%f2
2253 .word 0x94cc0c04 !aes_eround01 %f16,%f4,%f6,%f10
2254 .word 0x8ccc8c24 !aes_eround23 %f18,%f4,%f6,%f6
2255 ldd [%i3 + 16], %f16
2256 ldd [%i3 + 24], %f18
2257 .word 0x80cd0488 !aes_eround01_l %f20,%f8,%f2,%f0
2258 .word 0x84cd84a8 !aes_eround23_l %f22,%f8,%f2,%f2
2259 .word 0x88cd0c8a !aes_eround01_l %f20,%f10,%f6,%f4
2260 .word 0x8ccd8caa !aes_eround23_l %f22,%f10,%f6,%f6
2261 ldd [%i3 + 32], %f20
2262 retl
2263 ldd [%i3 + 40], %f22
2264 .type _aes256_encrypt_2x,#function
2265 .size _aes256_encrypt_2x,.-_aes256_encrypt_2x
2266
2267 .align 32
2268 _aes192_loadkey:
2269 ldx [%i3 + 0], %g4
2270 ldx [%i3 + 8], %g5
2271 ldd [%i3 + 16], %f16
2272 ldd [%i3 + 24], %f18
2273 ldd [%i3 + 32], %f20
2274 ldd [%i3 + 40], %f22
2275 ldd [%i3 + 48], %f24
2276 ldd [%i3 + 56], %f26
2277 ldd [%i3 + 64], %f28
2278 ldd [%i3 + 72], %f30
2279 ldd [%i3 + 80], %f32
2280 ldd [%i3 + 88], %f34
2281 ldd [%i3 + 96], %f36
2282 ldd [%i3 + 104], %f38
2283 ldd [%i3 + 112], %f40
2284 ldd [%i3 + 120], %f42
2285 ldd [%i3 + 128], %f44
2286 ldd [%i3 + 136], %f46
2287 ldd [%i3 + 144], %f48
2288 ldd [%i3 + 152], %f50
2289 ldd [%i3 + 160], %f52
2290 ldd [%i3 + 168], %f54
2291 ldd [%i3 + 176], %f56
2292 ldd [%i3 + 184], %f58
2293 ldd [%i3 + 192], %f60
2294 ldd [%i3 + 200], %f62
2295 retl
2296 nop
2297 .type _aes192_loadkey,#function
2298 .size _aes192_loadkey,.-_aes192_loadkey
2299 _aes256_loadkey=_aes192_loadkey
2300 _aes192_load_enckey=_aes192_loadkey
2301 _aes192_load_deckey=_aes192_loadkey
2302 _aes256_load_enckey=_aes192_loadkey
2303 _aes256_load_deckey=_aes192_loadkey
2304 .globl aes256_t4_cbc_encrypt
2305 .align 32
2306 aes256_t4_cbc_encrypt:
2307 save %sp, -112, %sp
2308 cmp %i2, 0
2309 be,pn %icc, .L256_cbc_enc_abort
2310 srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
2311 sub %i0, %i1, %l5 ! %i0!=%i1
2312 ld [%i4 + 0], %f0
2313 ld [%i4 + 4], %f1
2314 ld [%i4 + 8], %f2
2315 ld [%i4 + 12], %f3
2316 prefetch [%i0], 20
2317 prefetch [%i0 + 63], 20
2318 call _aes256_load_enckey
2319 and %i0, 7, %l0
2320 andn %i0, 7, %i0
2321 sll %l0, 3, %l0
2322 mov 64, %l1
2323 mov 0xff, %l3
2324 sub %l1, %l0, %l1
2325 and %i1, 7, %l2
2326 cmp %i2, 127
2327 movrnz %l2, 0, %l5 ! if ( %i1&7 ||
2328 movleu %icc, 0, %l5 ! %i2<128 ||
2329 brnz,pn %l5, .L256cbc_enc_blk ! %i0==%i1)
2330 srl %l3, %l2, %l3
2331
2332 .word 0xb3b64340 !alignaddrl %i1,%g0,%i1
2333 srlx %i2, 4, %i2
2334 prefetch [%i1], 22
2335
2336 .L256_cbc_enc_loop:
2337 ldx [%i0 + 0], %o0
2338 brz,pt %l0, 4f
2339 ldx [%i0 + 8], %o1
2340
2341 ldx [%i0 + 16], %o2
2342 sllx %o0, %l0, %o0
2343 srlx %o1, %l1, %g1
2344 sllx %o1, %l0, %o1
2345 or %g1, %o0, %o0
2346 srlx %o2, %l1, %o2
2347 or %o2, %o1, %o1
2348 4:
2349 xor %g4, %o0, %o0 ! ^= rk[0]
2350 xor %g5, %o1, %o1
2351 .word 0x99b02308 !movxtod %o0,%f12
2352 .word 0x9db02309 !movxtod %o1,%f14
2353
2354 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= ivec
2355 .word 0x85b38d82 !fxor %f14,%f2,%f2
2356 prefetch [%i1 + 63], 22
2357 prefetch [%i0 + 16+63], 20
2358 call _aes256_encrypt_1x
2359 add %i0, 16, %i0
2360
2361 brnz,pn %l2, 2f
2362 sub %i2, 1, %i2
2363
2364 std %f0, [%i1 + 0]
2365 std %f2, [%i1 + 8]
2366 brnz,pt %i2, .L256_cbc_enc_loop
2367 add %i1, 16, %i1
2368 st %f0, [%i4 + 0]
2369 st %f1, [%i4 + 4]
2370 st %f2, [%i4 + 8]
2371 st %f3, [%i4 + 12]
2372 .L256_cbc_enc_abort:
2373 ret
2374 restore
2375
2376 .align 16
2377 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
2378 ! and ~3x deterioration
2379 ! in inp==out case
2380 .word 0x89b00900 !faligndata %f0,%f0,%f4 ! handle unaligned output
2381 .word 0x8db00902 !faligndata %f0,%f2,%f6
2382 .word 0x91b08902 !faligndata %f2,%f2,%f8
2383
2384 stda %f4, [%i1 + %l3]0xc0 ! partial store
2385 std %f6, [%i1 + 8]
2386 add %i1, 16, %i1
2387 orn %g0, %l3, %l3
2388 stda %f8, [%i1 + %l3]0xc0 ! partial store
2389
2390 brnz,pt %i2, .L256_cbc_enc_loop+4
2391 orn %g0, %l3, %l3
2392 st %f0, [%i4 + 0]
2393 st %f1, [%i4 + 4]
2394 st %f2, [%i4 + 8]
2395 st %f3, [%i4 + 12]
2396 ret
2397 restore
2398
2399 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2400 .align 32
2401 .L256cbc_enc_blk:
2402 add %i1, %i2, %l5
2403 and %l5, 63, %l5 ! tail
2404 sub %i2, %l5, %i2
2405 add %l5, 15, %l5 ! round up to 16n
2406 srlx %i2, 4, %i2
2407 srl %l5, 4, %l5
2408
2409 .L256_cbc_enc_blk_loop:
2410 ldx [%i0 + 0], %o0
2411 brz,pt %l0, 5f
2412 ldx [%i0 + 8], %o1
2413
2414 ldx [%i0 + 16], %o2
2415 sllx %o0, %l0, %o0
2416 srlx %o1, %l1, %g1
2417 sllx %o1, %l0, %o1
2418 or %g1, %o0, %o0
2419 srlx %o2, %l1, %o2
2420 or %o2, %o1, %o1
2421 5:
2422 xor %g4, %o0, %o0 ! ^= rk[0]
2423 xor %g5, %o1, %o1
2424 .word 0x99b02308 !movxtod %o0,%f12
2425 .word 0x9db02309 !movxtod %o1,%f14
2426
2427 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= ivec
2428 .word 0x85b38d82 !fxor %f14,%f2,%f2
2429 prefetch [%i0 + 16+63], 20
2430 call _aes256_encrypt_1x
2431 add %i0, 16, %i0
2432 sub %i2, 1, %i2
2433
2434 stda %f0, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
2435 add %i1, 8, %i1
2436 stda %f2, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
2437 brnz,pt %i2, .L256_cbc_enc_blk_loop
2438 add %i1, 8, %i1
2439
2440 membar #StoreLoad|#StoreStore
2441 brnz,pt %l5, .L256_cbc_enc_loop
2442 mov %l5, %i2
2443 st %f0, [%i4 + 0]
2444 st %f1, [%i4 + 4]
2445 st %f2, [%i4 + 8]
2446 st %f3, [%i4 + 12]
2447 ret
2448 restore
2449 .type aes256_t4_cbc_encrypt,#function
2450 .size aes256_t4_cbc_encrypt,.-aes256_t4_cbc_encrypt
2451 .globl aes192_t4_cbc_encrypt
2452 .align 32
2453 aes192_t4_cbc_encrypt:
2454 save %sp, -112, %sp
2455 cmp %i2, 0
2456 be,pn %icc, .L192_cbc_enc_abort
2457 srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
2458 sub %i0, %i1, %l5 ! %i0!=%i1
2459 ld [%i4 + 0], %f0
2460 ld [%i4 + 4], %f1
2461 ld [%i4 + 8], %f2
2462 ld [%i4 + 12], %f3
2463 prefetch [%i0], 20
2464 prefetch [%i0 + 63], 20
2465 call _aes192_load_enckey
2466 and %i0, 7, %l0
2467 andn %i0, 7, %i0
2468 sll %l0, 3, %l0
2469 mov 64, %l1
2470 mov 0xff, %l3
2471 sub %l1, %l0, %l1
2472 and %i1, 7, %l2
2473 cmp %i2, 127
2474 movrnz %l2, 0, %l5 ! if ( %i1&7 ||
2475 movleu %icc, 0, %l5 ! %i2<128 ||
2476 brnz,pn %l5, .L192cbc_enc_blk ! %i0==%i1)
2477 srl %l3, %l2, %l3
2478
2479 .word 0xb3b64340 !alignaddrl %i1,%g0,%i1
2480 srlx %i2, 4, %i2
2481 prefetch [%i1], 22
2482
2483 .L192_cbc_enc_loop:
2484 ldx [%i0 + 0], %o0
2485 brz,pt %l0, 4f
2486 ldx [%i0 + 8], %o1
2487
2488 ldx [%i0 + 16], %o2
2489 sllx %o0, %l0, %o0
2490 srlx %o1, %l1, %g1
2491 sllx %o1, %l0, %o1
2492 or %g1, %o0, %o0
2493 srlx %o2, %l1, %o2
2494 or %o2, %o1, %o1
2495 4:
2496 xor %g4, %o0, %o0 ! ^= rk[0]
2497 xor %g5, %o1, %o1
2498 .word 0x99b02308 !movxtod %o0,%f12
2499 .word 0x9db02309 !movxtod %o1,%f14
2500
2501 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= ivec
2502 .word 0x85b38d82 !fxor %f14,%f2,%f2
2503 prefetch [%i1 + 63], 22
2504 prefetch [%i0 + 16+63], 20
2505 call _aes192_encrypt_1x
2506 add %i0, 16, %i0
2507
2508 brnz,pn %l2, 2f
2509 sub %i2, 1, %i2
2510
2511 std %f0, [%i1 + 0]
2512 std %f2, [%i1 + 8]
2513 brnz,pt %i2, .L192_cbc_enc_loop
2514 add %i1, 16, %i1
2515 st %f0, [%i4 + 0]
2516 st %f1, [%i4 + 4]
2517 st %f2, [%i4 + 8]
2518 st %f3, [%i4 + 12]
2519 .L192_cbc_enc_abort:
2520 ret
2521 restore
2522
2523 .align 16
2524 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
2525 ! and ~3x deterioration
2526 ! in inp==out case
2527 .word 0x89b00900 !faligndata %f0,%f0,%f4 ! handle unaligned output
2528 .word 0x8db00902 !faligndata %f0,%f2,%f6
2529 .word 0x91b08902 !faligndata %f2,%f2,%f8
2530
2531 stda %f4, [%i1 + %l3]0xc0 ! partial store
2532 std %f6, [%i1 + 8]
2533 add %i1, 16, %i1
2534 orn %g0, %l3, %l3
2535 stda %f8, [%i1 + %l3]0xc0 ! partial store
2536
2537 brnz,pt %i2, .L192_cbc_enc_loop+4
2538 orn %g0, %l3, %l3
2539 st %f0, [%i4 + 0]
2540 st %f1, [%i4 + 4]
2541 st %f2, [%i4 + 8]
2542 st %f3, [%i4 + 12]
2543 ret
2544 restore
2545
2546 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2547 .align 32
2548 .L192cbc_enc_blk:
2549 add %i1, %i2, %l5
2550 and %l5, 63, %l5 ! tail
2551 sub %i2, %l5, %i2
2552 add %l5, 15, %l5 ! round up to 16n
2553 srlx %i2, 4, %i2
2554 srl %l5, 4, %l5
2555
2556 .L192_cbc_enc_blk_loop:
2557 ldx [%i0 + 0], %o0
2558 brz,pt %l0, 5f
2559 ldx [%i0 + 8], %o1
2560
2561 ldx [%i0 + 16], %o2
2562 sllx %o0, %l0, %o0
2563 srlx %o1, %l1, %g1
2564 sllx %o1, %l0, %o1
2565 or %g1, %o0, %o0
2566 srlx %o2, %l1, %o2
2567 or %o2, %o1, %o1
2568 5:
2569 xor %g4, %o0, %o0 ! ^= rk[0]
2570 xor %g5, %o1, %o1
2571 .word 0x99b02308 !movxtod %o0,%f12
2572 .word 0x9db02309 !movxtod %o1,%f14
2573
2574 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= ivec
2575 .word 0x85b38d82 !fxor %f14,%f2,%f2
2576 prefetch [%i0 + 16+63], 20
2577 call _aes192_encrypt_1x
2578 add %i0, 16, %i0
2579 sub %i2, 1, %i2
2580
2581 stda %f0, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
2582 add %i1, 8, %i1
2583 stda %f2, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
2584 brnz,pt %i2, .L192_cbc_enc_blk_loop
2585 add %i1, 8, %i1
2586
2587 membar #StoreLoad|#StoreStore
2588 brnz,pt %l5, .L192_cbc_enc_loop
2589 mov %l5, %i2
2590 st %f0, [%i4 + 0]
2591 st %f1, [%i4 + 4]
2592 st %f2, [%i4 + 8]
2593 st %f3, [%i4 + 12]
2594 ret
2595 restore
2596 .type aes192_t4_cbc_encrypt,#function
2597 .size aes192_t4_cbc_encrypt,.-aes192_t4_cbc_encrypt
2598 .globl aes256_t4_ctr32_encrypt
2599 .align 32
2600 aes256_t4_ctr32_encrypt:
2601 save %sp, -112, %sp
2602 srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
2603
2604 prefetch [%i0], 20
2605 prefetch [%i0 + 63], 20
2606 call _aes256_load_enckey
2607 sllx %i2, 4, %i2
2608
2609 ld [%i4 + 0], %l4 ! counter
2610 ld [%i4 + 4], %l5
2611 ld [%i4 + 8], %l6
2612 ld [%i4 + 12], %l7
2613
2614 sllx %l4, 32, %o5
2615 or %l5, %o5, %o5
2616 sllx %l6, 32, %g1
2617 xor %o5, %g4, %g4 ! ^= rk[0]
2618 xor %g1, %g5, %g5
2619 .word 0x9db02304 !movxtod %g4,%f14 ! most significant 64 bits
2620
2621 sub %i0, %i1, %l5 ! %i0!=%i1
2622 and %i0, 7, %l0
2623 andn %i0, 7, %i0
2624 sll %l0, 3, %l0
2625 mov 64, %l1
2626 mov 0xff, %l3
2627 sub %l1, %l0, %l1
2628 and %i1, 7, %l2
2629 cmp %i2, 255
2630 movrnz %l2, 0, %l5 ! if ( %i1&7 ||
2631 movleu %icc, 0, %l5 ! %i2<256 ||
2632 brnz,pn %l5, .L256_ctr32_blk ! %i0==%i1)
2633 srl %l3, %l2, %l3
2634
2635 andcc %i2, 16, %g0 ! is number of blocks even?
2636 .word 0xb3b64340 !alignaddrl %i1,%g0,%i1
2637 bz %icc, .L256_ctr32_loop2x
2638 srlx %i2, 4, %i2
2639 .L256_ctr32_loop:
2640 ldx [%i0 + 0], %o0
2641 brz,pt %l0, 4f
2642 ldx [%i0 + 8], %o1
2643
2644 ldx [%i0 + 16], %o2
2645 sllx %o0, %l0, %o0
2646 srlx %o1, %l1, %g1
2647 sllx %o1, %l0, %o1
2648 or %g1, %o0, %o0
2649 srlx %o2, %l1, %o2
2650 or %o2, %o1, %o1
2651 4:
2652 xor %g5, %l7, %g1 ! ^= rk[0]
2653 add %l7, 1, %l7
2654 .word 0x85b02301 !movxtod %g1,%f2
2655 srl %l7, 0, %l7 ! clruw
2656 prefetch [%i1 + 63], 22
2657 prefetch [%i0 + 16+63], 20
2658 .word 0x88cc040e !aes_eround01 %f16,%f14,%f2,%f4
2659 .word 0x84cc842e !aes_eround23 %f18,%f14,%f2,%f2
2660 call _aes256_encrypt_1x+8
2661 add %i0, 16, %i0
2662
2663 .word 0x95b02308 !movxtod %o0,%f10
2664 .word 0x99b02309 !movxtod %o1,%f12
2665 .word 0x81b28d80 !fxor %f10,%f0,%f0 ! ^= inp
2666 .word 0x85b30d82 !fxor %f12,%f2,%f2
2667
2668 brnz,pn %l2, 2f
2669 sub %i2, 1, %i2
2670
2671 std %f0, [%i1 + 0]
2672 std %f2, [%i1 + 8]
2673 brnz,pt %i2, .L256_ctr32_loop2x
2674 add %i1, 16, %i1
2675
2676 ret
2677 restore
2678
2679 .align 16
2680 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
2681 ! and ~3x deterioration
2682 ! in inp==out case
2683 .word 0x89b00900 !faligndata %f0,%f0,%f4 ! handle unaligned output
2684 .word 0x8db00902 !faligndata %f0,%f2,%f6
2685 .word 0x91b08902 !faligndata %f2,%f2,%f8
2686 stda %f4, [%i1 + %l3]0xc0 ! partial store
2687 std %f6, [%i1 + 8]
2688 add %i1, 16, %i1
2689 orn %g0, %l3, %l3
2690 stda %f8, [%i1 + %l3]0xc0 ! partial store
2691
2692 brnz,pt %i2, .L256_ctr32_loop2x+4
2693 orn %g0, %l3, %l3
2694
2695 ret
2696 restore
2697
2698 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2699 .align 32
2700 .L256_ctr32_loop2x:
2701 ldx [%i0 + 0], %o0
2702 ldx [%i0 + 8], %o1
2703 ldx [%i0 + 16], %o2
2704 brz,pt %l0, 4f
2705 ldx [%i0 + 24], %o3
2706
2707 ldx [%i0 + 32], %o4
2708 sllx %o0, %l0, %o0
2709 srlx %o1, %l1, %g1
2710 or %g1, %o0, %o0
2711 sllx %o1, %l0, %o1
2712 srlx %o2, %l1, %g1
2713 or %g1, %o1, %o1
2714 sllx %o2, %l0, %o2
2715 srlx %o3, %l1, %g1
2716 or %g1, %o2, %o2
2717 sllx %o3, %l0, %o3
2718 srlx %o4, %l1, %o4
2719 or %o4, %o3, %o3
2720 4:
2721 xor %g5, %l7, %g1 ! ^= rk[0]
2722 add %l7, 1, %l7
2723 .word 0x85b02301 !movxtod %g1,%f2
2724 srl %l7, 0, %l7 ! clruw
2725 xor %g5, %l7, %g1
2726 add %l7, 1, %l7
2727 .word 0x8db02301 !movxtod %g1,%f6
2728 srl %l7, 0, %l7 ! clruw
2729 prefetch [%i1 + 63], 22
2730 prefetch [%i0 + 32+63], 20
2731 .word 0x90cc040e !aes_eround01 %f16,%f14,%f2,%f8
2732 .word 0x84cc842e !aes_eround23 %f18,%f14,%f2,%f2
2733 .word 0x94cc0c0e !aes_eround01 %f16,%f14,%f6,%f10
2734 .word 0x8ccc8c2e !aes_eround23 %f18,%f14,%f6,%f6
2735 call _aes256_encrypt_2x+16
2736 add %i0, 32, %i0
2737
2738 .word 0x91b02308 !movxtod %o0,%f8
2739 .word 0x95b02309 !movxtod %o1,%f10
2740 .word 0x99b0230a !movxtod %o2,%f12
2741 .word 0x81b20d80 !fxor %f8,%f0,%f0 ! ^= inp
2742 .word 0x91b0230b !movxtod %o3,%f8
2743 .word 0x85b28d82 !fxor %f10,%f2,%f2
2744 .word 0x89b30d84 !fxor %f12,%f4,%f4
2745 .word 0x8db20d86 !fxor %f8,%f6,%f6
2746
2747 brnz,pn %l2, 2f
2748 sub %i2, 2, %i2
2749
2750 std %f0, [%i1 + 0]
2751 std %f2, [%i1 + 8]
2752 std %f4, [%i1 + 16]
2753 std %f6, [%i1 + 24]
2754 brnz,pt %i2, .L256_ctr32_loop2x
2755 add %i1, 32, %i1
2756
2757 ret
2758 restore
2759
2760 .align 16
2761 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
2762 ! and ~3x deterioration
2763 ! in inp==out case
2764 .word 0x91b00900 !faligndata %f0,%f0,%f8 ! handle unaligned output
2765 .word 0x81b00902 !faligndata %f0,%f2,%f0
2766 .word 0x85b08904 !faligndata %f2,%f4,%f2
2767 .word 0x89b10906 !faligndata %f4,%f6,%f4
2768 .word 0x8db18906 !faligndata %f6,%f6,%f6
2769
2770 stda %f8, [%i1 + %l3]0xc0 ! partial store
2771 std %f0, [%i1 + 8]
2772 std %f2, [%i1 + 16]
2773 std %f4, [%i1 + 24]
2774 add %i1, 32, %i1
2775 orn %g0, %l3, %l3
2776 stda %f6, [%i1 + %l3]0xc0 ! partial store
2777
2778 brnz,pt %i2, .L256_ctr32_loop2x+4
2779 orn %g0, %l3, %l3
2780
2781 ret
2782 restore
2783
2784 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2785 .align 32
2786 .L256_ctr32_blk:
2787 add %i1, %i2, %l5
2788 and %l5, 63, %l5 ! tail
2789 sub %i2, %l5, %i2
2790 add %l5, 15, %l5 ! round up to 16n
2791 srlx %i2, 4, %i2
2792 srl %l5, 4, %l5
2793 sub %i2, 1, %i2
2794 add %l5, 1, %l5
2795
2796 .L256_ctr32_blk_loop2x:
2797 ldx [%i0 + 0], %o0
2798 ldx [%i0 + 8], %o1
2799 ldx [%i0 + 16], %o2
2800 brz,pt %l0, 5f
2801 ldx [%i0 + 24], %o3
2802
2803 ldx [%i0 + 32], %o4
2804 sllx %o0, %l0, %o0
2805 srlx %o1, %l1, %g1
2806 or %g1, %o0, %o0
2807 sllx %o1, %l0, %o1
2808 srlx %o2, %l1, %g1
2809 or %g1, %o1, %o1
2810 sllx %o2, %l0, %o2
2811 srlx %o3, %l1, %g1
2812 or %g1, %o2, %o2
2813 sllx %o3, %l0, %o3
2814 srlx %o4, %l1, %o4
2815 or %o4, %o3, %o3
2816 5:
2817 xor %g5, %l7, %g1 ! ^= rk[0]
2818 add %l7, 1, %l7
2819 .word 0x85b02301 !movxtod %g1,%f2
2820 srl %l7, 0, %l7 ! clruw
2821 xor %g5, %l7, %g1
2822 add %l7, 1, %l7
2823 .word 0x8db02301 !movxtod %g1,%f6
2824 srl %l7, 0, %l7 ! clruw
2825 prefetch [%i0 + 32+63], 20
2826 .word 0x90cc040e !aes_eround01 %f16,%f14,%f2,%f8
2827 .word 0x84cc842e !aes_eround23 %f18,%f14,%f2,%f2
2828 .word 0x94cc0c0e !aes_eround01 %f16,%f14,%f6,%f10
2829 .word 0x8ccc8c2e !aes_eround23 %f18,%f14,%f6,%f6
2830 call _aes256_encrypt_2x+16
2831 add %i0, 32, %i0
2832 subcc %i2, 2, %i2
2833
2834 .word 0x91b02308 !movxtod %o0,%f8
2835 .word 0x95b02309 !movxtod %o1,%f10
2836 .word 0x99b0230a !movxtod %o2,%f12
2837 .word 0x81b20d80 !fxor %f8,%f0,%f0 ! ^= inp
2838 .word 0x91b0230b !movxtod %o3,%f8
2839 .word 0x85b28d82 !fxor %f10,%f2,%f2
2840 .word 0x89b30d84 !fxor %f12,%f4,%f4
2841 .word 0x8db20d86 !fxor %f8,%f6,%f6
2842
2843 stda %f0, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
2844 add %i1, 8, %i1
2845 stda %f2, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
2846 add %i1, 8, %i1
2847 stda %f4, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
2848 add %i1, 8, %i1
2849 stda %f6, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
2850 bgu,pt %icc, .L256_ctr32_blk_loop2x
2851 add %i1, 8, %i1
2852
2853 add %l5, %i2, %i2
2854 andcc %i2, 1, %g0 ! is number of blocks even?
2855 membar #StoreLoad|#StoreStore
2856 bnz,pt %icc, .L256_ctr32_loop
2857 srl %i2, 0, %i2
2858 brnz,pn %i2, .L256_ctr32_loop2x
2859 nop
2860
2861 ret
2862 restore
2863 .type aes256_t4_ctr32_encrypt,#function
2864 .size aes256_t4_ctr32_encrypt,.-aes256_t4_ctr32_encrypt
2865 .globl aes256_t4_xts_encrypt
2866 .align 32
2867 aes256_t4_xts_encrypt:
2868 save %sp, -112-16, %sp
2869 srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
2870
2871 mov %i5, %o0
2872 add %fp, 0-16, %o1
2873 call aes_t4_encrypt
2874 mov %i4, %o2
2875
2876 add %fp, 0-16, %l7
2877 ldxa [%l7]0x88, %g2
2878 add %fp, 0-8, %l7
2879 ldxa [%l7]0x88, %g3 ! %g3:%g2 is tweak
2880
2881 sethi %hi(0x76543210), %l7
2882 or %l7, %lo(0x76543210), %l7
2883 .word 0x81b5c320 !bmask %l7,%g0,%g0 ! byte swap mask
2884
2885 prefetch [%i0], 20
2886 prefetch [%i0 + 63], 20
2887 call _aes256_load_enckey
2888 and %i2, 15, %i5
2889 and %i2, -16, %i2
2890
2891 sub %i0, %i1, %l5 ! %i0!=%i1
2892 and %i0, 7, %l0
2893 andn %i0, 7, %i0
2894 sll %l0, 3, %l0
2895 mov 64, %l1
2896 mov 0xff, %l3
2897 sub %l1, %l0, %l1
2898 and %i1, 7, %l2
2899 cmp %i2, 255
2900 movrnz %l2, 0, %l5 ! if ( %i1&7 ||
2901 movleu %icc, 0, %l5 ! %i2<256 ||
2902 brnz,pn %l5, .L256_xts_enblk ! %i0==%i1)
2903 srl %l3, %l2, %l3
2904
2905 andcc %i2, 16, %g0 ! is number of blocks even?
2906 .word 0xb3b64340 !alignaddrl %i1,%g0,%i1
2907 bz %icc, .L256_xts_enloop2x
2908 srlx %i2, 4, %i2
2909 .L256_xts_enloop:
2910 ldx [%i0 + 0], %o0
2911 brz,pt %l0, 4f
2912 ldx [%i0 + 8], %o1
2913
2914 ldx [%i0 + 16], %o2
2915 sllx %o0, %l0, %o0
2916 srlx %o1, %l1, %g1
2917 sllx %o1, %l0, %o1
2918 or %g1, %o0, %o0
2919 srlx %o2, %l1, %o2
2920 or %o2, %o1, %o1
2921 4:
2922 .word 0x99b02302 !movxtod %g2,%f12
2923 .word 0x9db02303 !movxtod %g3,%f14
2924 .word 0x99b3098c !bshuffle %f12,%f12,%f12
2925 .word 0x9db3898e !bshuffle %f14,%f14,%f14
2926
2927 xor %g4, %o0, %o0 ! ^= rk[0]
2928 xor %g5, %o1, %o1
2929 .word 0x81b02308 !movxtod %o0,%f0
2930 .word 0x85b02309 !movxtod %o1,%f2
2931
2932 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
2933 .word 0x85b38d82 !fxor %f14,%f2,%f2
2934
2935 prefetch [%i1 + 63], 22
2936 prefetch [%i0 + 16+63], 20
2937 call _aes256_encrypt_1x
2938 add %i0, 16, %i0
2939
2940 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
2941 .word 0x85b38d82 !fxor %f14,%f2,%f2
2942
2943 srax %g3, 63, %l7 ! next tweak value
2944 addcc %g2, %g2, %g2
2945 and %l7, 0x87, %l7
2946 .word 0x87b0c223 !addxc %g3,%g3,%g3
2947 xor %l7, %g2, %g2
2948
2949 brnz,pn %l2, 2f
2950 sub %i2, 1, %i2
2951
2952 std %f0, [%i1 + 0]
2953 std %f2, [%i1 + 8]
2954 brnz,pt %i2, .L256_xts_enloop2x
2955 add %i1, 16, %i1
2956
2957 brnz,pn %i5, .L256_xts_ensteal
2958 nop
2959
2960 ret
2961 restore
2962
2963 .align 16
2964 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
2965 ! and ~3x deterioration
2966 ! in inp==out case
2967 .word 0x89b00900 !faligndata %f0,%f0,%f4 ! handle unaligned output
2968 .word 0x8db00902 !faligndata %f0,%f2,%f6
2969 .word 0x91b08902 !faligndata %f2,%f2,%f8
2970 stda %f4, [%i1 + %l3]0xc0 ! partial store
2971 std %f6, [%i1 + 8]
2972 add %i1, 16, %i1
2973 orn %g0, %l3, %l3
2974 stda %f8, [%i1 + %l3]0xc0 ! partial store
2975
2976 brnz,pt %i2, .L256_xts_enloop2x+4
2977 orn %g0, %l3, %l3
2978
2979 brnz,pn %i5, .L256_xts_ensteal
2980 nop
2981
2982 ret
2983 restore
2984
2985 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2986 .align 32
2987 .L256_xts_enloop2x:
2988 ldx [%i0 + 0], %o0
2989 ldx [%i0 + 8], %o1
2990 ldx [%i0 + 16], %o2
2991 brz,pt %l0, 4f
2992 ldx [%i0 + 24], %o3
2993
2994 ldx [%i0 + 32], %o4
2995 sllx %o0, %l0, %o0
2996 srlx %o1, %l1, %g1
2997 or %g1, %o0, %o0
2998 sllx %o1, %l0, %o1
2999 srlx %o2, %l1, %g1
3000 or %g1, %o1, %o1
3001 sllx %o2, %l0, %o2
3002 srlx %o3, %l1, %g1
3003 or %g1, %o2, %o2
3004 sllx %o3, %l0, %o3
3005 srlx %o4, %l1, %o4
3006 or %o4, %o3, %o3
3007 4:
3008 .word 0x99b02302 !movxtod %g2,%f12
3009 .word 0x9db02303 !movxtod %g3,%f14
3010 .word 0x99b3098c !bshuffle %f12,%f12,%f12
3011 .word 0x9db3898e !bshuffle %f14,%f14,%f14
3012
3013 srax %g3, 63, %l7 ! next tweak value
3014 addcc %g2, %g2, %g2
3015 and %l7, 0x87, %l7
3016 .word 0x87b0c223 !addxc %g3,%g3,%g3
3017 xor %l7, %g2, %g2
3018
3019 .word 0x91b02302 !movxtod %g2,%f8
3020 .word 0x95b02303 !movxtod %g3,%f10
3021 .word 0x91b20988 !bshuffle %f8,%f8,%f8
3022 .word 0x95b2898a !bshuffle %f10,%f10,%f10
3023
3024 xor %g4, %o0, %o0 ! ^= rk[0]
3025 xor %g5, %o1, %o1
3026 xor %g4, %o2, %o2 ! ^= rk[0]
3027 xor %g5, %o3, %o3
3028 .word 0x81b02308 !movxtod %o0,%f0
3029 .word 0x85b02309 !movxtod %o1,%f2
3030 .word 0x89b0230a !movxtod %o2,%f4
3031 .word 0x8db0230b !movxtod %o3,%f6
3032
3033 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
3034 .word 0x85b38d82 !fxor %f14,%f2,%f2
3035 .word 0x89b20d84 !fxor %f8,%f4,%f4 ! ^= tweak[0]
3036 .word 0x8db28d86 !fxor %f10,%f6,%f6
3037
3038 prefetch [%i1 + 63], 22
3039 prefetch [%i0 + 32+63], 20
3040 call _aes256_encrypt_2x
3041 add %i0, 32, %i0
3042
3043 .word 0x91b02302 !movxtod %g2,%f8
3044 .word 0x95b02303 !movxtod %g3,%f10
3045
3046 srax %g3, 63, %l7 ! next tweak value
3047 addcc %g2, %g2, %g2
3048 and %l7, 0x87, %l7
3049 .word 0x87b0c223 !addxc %g3,%g3,%g3
3050 xor %l7, %g2, %g2
3051
3052 .word 0x91b20988 !bshuffle %f8,%f8,%f8
3053 .word 0x95b2898a !bshuffle %f10,%f10,%f10
3054
3055 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
3056 .word 0x85b38d82 !fxor %f14,%f2,%f2
3057 .word 0x89b20d84 !fxor %f8,%f4,%f4
3058 .word 0x8db28d86 !fxor %f10,%f6,%f6
3059
3060 brnz,pn %l2, 2f
3061 sub %i2, 2, %i2
3062
3063 std %f0, [%i1 + 0]
3064 std %f2, [%i1 + 8]
3065 std %f4, [%i1 + 16]
3066 std %f6, [%i1 + 24]
3067 brnz,pt %i2, .L256_xts_enloop2x
3068 add %i1, 32, %i1
3069
3070 .word 0x81b00f04 !fsrc2 %f0,%f4,%f0
3071 .word 0x85b00f06 !fsrc2 %f0,%f6,%f2
3072 brnz,pn %i5, .L256_xts_ensteal
3073 nop
3074
3075 ret
3076 restore
3077
3078 .align 16
3079 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
3080 ! and ~3x deterioration
3081 ! in inp==out case
3082 .word 0x91b00900 !faligndata %f0,%f0,%f8 ! handle unaligned output
3083 .word 0x95b00902 !faligndata %f0,%f2,%f10
3084 .word 0x99b08904 !faligndata %f2,%f4,%f12
3085 .word 0x9db10906 !faligndata %f4,%f6,%f14
3086 .word 0x81b18906 !faligndata %f6,%f6,%f0
3087
3088 stda %f8, [%i1 + %l3]0xc0 ! partial store
3089 std %f10, [%i1 + 8]
3090 std %f12, [%i1 + 16]
3091 std %f14, [%i1 + 24]
3092 add %i1, 32, %i1
3093 orn %g0, %l3, %l3
3094 stda %f0, [%i1 + %l3]0xc0 ! partial store
3095
3096 brnz,pt %i2, .L256_xts_enloop2x+4
3097 orn %g0, %l3, %l3
3098
3099 .word 0x81b00f04 !fsrc2 %f0,%f4,%f0
3100 .word 0x85b00f06 !fsrc2 %f0,%f6,%f2
3101 brnz,pn %i5, .L256_xts_ensteal
3102 nop
3103
3104 ret
3105 restore
3106
3107 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3108 .align 32
3109 .L256_xts_enblk:
3110 add %i1, %i2, %l5
3111 and %l5, 63, %l5 ! tail
3112 sub %i2, %l5, %i2
3113 add %l5, 15, %l5 ! round up to 16n
3114 srlx %i2, 4, %i2
3115 srl %l5, 4, %l5
3116 sub %i2, 1, %i2
3117 add %l5, 1, %l5
3118
3119 .L256_xts_enblk2x:
3120 ldx [%i0 + 0], %o0
3121 ldx [%i0 + 8], %o1
3122 ldx [%i0 + 16], %o2
3123 brz,pt %l0, 5f
3124 ldx [%i0 + 24], %o3
3125
3126 ldx [%i0 + 32], %o4
3127 sllx %o0, %l0, %o0
3128 srlx %o1, %l1, %g1
3129 or %g1, %o0, %o0
3130 sllx %o1, %l0, %o1
3131 srlx %o2, %l1, %g1
3132 or %g1, %o1, %o1
3133 sllx %o2, %l0, %o2
3134 srlx %o3, %l1, %g1
3135 or %g1, %o2, %o2
3136 sllx %o3, %l0, %o3
3137 srlx %o4, %l1, %o4
3138 or %o4, %o3, %o3
3139 5:
3140 .word 0x99b02302 !movxtod %g2,%f12
3141 .word 0x9db02303 !movxtod %g3,%f14
3142 .word 0x99b3098c !bshuffle %f12,%f12,%f12
3143 .word 0x9db3898e !bshuffle %f14,%f14,%f14
3144
3145 srax %g3, 63, %l7 ! next tweak value
3146 addcc %g2, %g2, %g2
3147 and %l7, 0x87, %l7
3148 .word 0x87b0c223 !addxc %g3,%g3,%g3
3149 xor %l7, %g2, %g2
3150
3151 .word 0x91b02302 !movxtod %g2,%f8
3152 .word 0x95b02303 !movxtod %g3,%f10
3153 .word 0x91b20988 !bshuffle %f8,%f8,%f8
3154 .word 0x95b2898a !bshuffle %f10,%f10,%f10
3155
3156 xor %g4, %o0, %o0 ! ^= rk[0]
3157 xor %g5, %o1, %o1
3158 xor %g4, %o2, %o2 ! ^= rk[0]
3159 xor %g5, %o3, %o3
3160 .word 0x81b02308 !movxtod %o0,%f0
3161 .word 0x85b02309 !movxtod %o1,%f2
3162 .word 0x89b0230a !movxtod %o2,%f4
3163 .word 0x8db0230b !movxtod %o3,%f6
3164
3165 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
3166 .word 0x85b38d82 !fxor %f14,%f2,%f2
3167 .word 0x89b20d84 !fxor %f8,%f4,%f4 ! ^= tweak[0]
3168 .word 0x8db28d86 !fxor %f10,%f6,%f6
3169
3170 prefetch [%i0 + 32+63], 20
3171 call _aes256_encrypt_2x
3172 add %i0, 32, %i0
3173
3174 .word 0x91b02302 !movxtod %g2,%f8
3175 .word 0x95b02303 !movxtod %g3,%f10
3176
3177 srax %g3, 63, %l7 ! next tweak value
3178 addcc %g2, %g2, %g2
3179 and %l7, 0x87, %l7
3180 .word 0x87b0c223 !addxc %g3,%g3,%g3
3181 xor %l7, %g2, %g2
3182
3183 .word 0x91b20988 !bshuffle %f8,%f8,%f8
3184 .word 0x95b2898a !bshuffle %f10,%f10,%f10
3185
3186 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
3187 .word 0x85b38d82 !fxor %f14,%f2,%f2
3188 .word 0x89b20d84 !fxor %f8,%f4,%f4
3189 .word 0x8db28d86 !fxor %f10,%f6,%f6
3190
3191 subcc %i2, 2, %i2
3192 stda %f0, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
3193 add %i1, 8, %i1
3194 stda %f2, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
3195 add %i1, 8, %i1
3196 stda %f4, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
3197 add %i1, 8, %i1
3198 stda %f6, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
3199 bgu,pt %icc, .L256_xts_enblk2x
3200 add %i1, 8, %i1
3201
3202 add %l5, %i2, %i2
3203 andcc %i2, 1, %g0 ! is number of blocks even?
3204 membar #StoreLoad|#StoreStore
3205 bnz,pt %icc, .L256_xts_enloop
3206 srl %i2, 0, %i2
3207 brnz,pn %i2, .L256_xts_enloop2x
3208 nop
3209
3210 .word 0x81b00f04 !fsrc2 %f0,%f4,%f0
3211 .word 0x85b00f06 !fsrc2 %f0,%f6,%f2
3212 brnz,pn %i5, .L256_xts_ensteal
3213 nop
3214
3215 ret
3216 restore
3217 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3218 .align 32
3219 .L256_xts_ensteal:
3220 std %f0, [%fp + 0-16] ! copy of output
3221 std %f2, [%fp + 0-8]
3222
3223 srl %l0, 3, %l0
3224 add %fp, 0-16, %l7
3225 add %i0, %l0, %i0 ! original %i0+%i2&-15
3226 add %i1, %l2, %i1 ! original %i1+%i2&-15
3227 mov 0, %l0
3228 nop ! align
3229
3230 .L256_xts_enstealing:
3231 ldub [%i0 + %l0], %o0
3232 ldub [%l7 + %l0], %o1
3233 dec %i5
3234 stb %o0, [%l7 + %l0]
3235 stb %o1, [%i1 + %l0]
3236 brnz %i5, .L256_xts_enstealing
3237 inc %l0
3238
3239 mov %l7, %i0
3240 sub %i1, 16, %i1
3241 mov 0, %l0
3242 sub %i1, %l2, %i1
3243 ba .L256_xts_enloop ! one more time
3244 mov 1, %i2 ! %i5 is 0
3245 ret
3246 restore
3247 .type aes256_t4_xts_encrypt,#function
3248 .size aes256_t4_xts_encrypt,.-aes256_t4_xts_encrypt
3249 .globl aes256_t4_xts_decrypt
3250 .align 32
3251 aes256_t4_xts_decrypt:
3252 save %sp, -112-16, %sp
3253 srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
3254
3255 mov %i5, %o0
3256 add %fp, 0-16, %o1
3257 call aes_t4_encrypt
3258 mov %i4, %o2
3259
3260 add %fp, 0-16, %l7
3261 ldxa [%l7]0x88, %g2
3262 add %fp, 0-8, %l7
3263 ldxa [%l7]0x88, %g3 ! %g3:%g2 is tweak
3264
3265 sethi %hi(0x76543210), %l7
3266 or %l7, %lo(0x76543210), %l7
3267 .word 0x81b5c320 !bmask %l7,%g0,%g0 ! byte swap mask
3268
3269 prefetch [%i0], 20
3270 prefetch [%i0 + 63], 20
3271 call _aes256_load_deckey
3272 and %i2, 15, %i5
3273 and %i2, -16, %i2
3274 mov 0, %l7
3275 movrnz %i5, 16, %l7
3276 sub %i2, %l7, %i2
3277
3278 sub %i0, %i1, %l5 ! %i0!=%i1
3279 and %i0, 7, %l0
3280 andn %i0, 7, %i0
3281 sll %l0, 3, %l0
3282 mov 64, %l1
3283 mov 0xff, %l3
3284 sub %l1, %l0, %l1
3285 and %i1, 7, %l2
3286 cmp %i2, 255
3287 movrnz %l2, 0, %l5 ! if ( %i1&7 ||
3288 movleu %icc, 0, %l5 ! %i2<256 ||
3289 brnz,pn %l5, .L256_xts_deblk ! %i0==%i1)
3290 srl %l3, %l2, %l3
3291
3292 andcc %i2, 16, %g0 ! is number of blocks even?
3293 brz,pn %i2, .L256_xts_desteal
3294 .word 0xb3b64340 !alignaddrl %i1,%g0,%i1
3295 bz %icc, .L256_xts_deloop2x
3296 srlx %i2, 4, %i2
3297 .L256_xts_deloop:
3298 ldx [%i0 + 0], %o0
3299 brz,pt %l0, 4f
3300 ldx [%i0 + 8], %o1
3301
3302 ldx [%i0 + 16], %o2
3303 sllx %o0, %l0, %o0
3304 srlx %o1, %l1, %g1
3305 sllx %o1, %l0, %o1
3306 or %g1, %o0, %o0
3307 srlx %o2, %l1, %o2
3308 or %o2, %o1, %o1
3309 4:
3310 .word 0x99b02302 !movxtod %g2,%f12
3311 .word 0x9db02303 !movxtod %g3,%f14
3312 .word 0x99b3098c !bshuffle %f12,%f12,%f12
3313 .word 0x9db3898e !bshuffle %f14,%f14,%f14
3314
3315 xor %g4, %o0, %o0 ! ^= rk[0]
3316 xor %g5, %o1, %o1
3317 .word 0x81b02308 !movxtod %o0,%f0
3318 .word 0x85b02309 !movxtod %o1,%f2
3319
3320 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
3321 .word 0x85b38d82 !fxor %f14,%f2,%f2
3322
3323 prefetch [%i1 + 63], 22
3324 prefetch [%i0 + 16+63], 20
3325 call _aes256_decrypt_1x
3326 add %i0, 16, %i0
3327
3328 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
3329 .word 0x85b38d82 !fxor %f14,%f2,%f2
3330
3331 srax %g3, 63, %l7 ! next tweak value
3332 addcc %g2, %g2, %g2
3333 and %l7, 0x87, %l7
3334 .word 0x87b0c223 !addxc %g3,%g3,%g3
3335 xor %l7, %g2, %g2
3336
3337 brnz,pn %l2, 2f
3338 sub %i2, 1, %i2
3339
3340 std %f0, [%i1 + 0]
3341 std %f2, [%i1 + 8]
3342 brnz,pt %i2, .L256_xts_deloop2x
3343 add %i1, 16, %i1
3344
3345 brnz,pn %i5, .L256_xts_desteal
3346 nop
3347
3348 ret
3349 restore
3350
3351 .align 16
3352 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
3353 ! and ~3x deterioration
3354 ! in inp==out case
3355 .word 0x89b00900 !faligndata %f0,%f0,%f4 ! handle unaligned output
3356 .word 0x8db00902 !faligndata %f0,%f2,%f6
3357 .word 0x91b08902 !faligndata %f2,%f2,%f8
3358 stda %f4, [%i1 + %l3]0xc0 ! partial store
3359 std %f6, [%i1 + 8]
3360 add %i1, 16, %i1
3361 orn %g0, %l3, %l3
3362 stda %f8, [%i1 + %l3]0xc0 ! partial store
3363
3364 brnz,pt %i2, .L256_xts_deloop2x+4
3365 orn %g0, %l3, %l3
3366
3367 brnz,pn %i5, .L256_xts_desteal
3368 nop
3369
3370 ret
3371 restore
3372
3373 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3374 .align 32
3375 .L256_xts_deloop2x:
3376 ldx [%i0 + 0], %o0
3377 ldx [%i0 + 8], %o1
3378 ldx [%i0 + 16], %o2
3379 brz,pt %l0, 4f
3380 ldx [%i0 + 24], %o3
3381
3382 ldx [%i0 + 32], %o4
3383 sllx %o0, %l0, %o0
3384 srlx %o1, %l1, %g1
3385 or %g1, %o0, %o0
3386 sllx %o1, %l0, %o1
3387 srlx %o2, %l1, %g1
3388 or %g1, %o1, %o1
3389 sllx %o2, %l0, %o2
3390 srlx %o3, %l1, %g1
3391 or %g1, %o2, %o2
3392 sllx %o3, %l0, %o3
3393 srlx %o4, %l1, %o4
3394 or %o4, %o3, %o3
3395 4:
3396 .word 0x99b02302 !movxtod %g2,%f12
3397 .word 0x9db02303 !movxtod %g3,%f14
3398 .word 0x99b3098c !bshuffle %f12,%f12,%f12
3399 .word 0x9db3898e !bshuffle %f14,%f14,%f14
3400
3401 srax %g3, 63, %l7 ! next tweak value
3402 addcc %g2, %g2, %g2
3403 and %l7, 0x87, %l7
3404 .word 0x87b0c223 !addxc %g3,%g3,%g3
3405 xor %l7, %g2, %g2
3406
3407 .word 0x91b02302 !movxtod %g2,%f8
3408 .word 0x95b02303 !movxtod %g3,%f10
3409 .word 0x91b20988 !bshuffle %f8,%f8,%f8
3410 .word 0x95b2898a !bshuffle %f10,%f10,%f10
3411
3412 xor %g4, %o0, %o0 ! ^= rk[0]
3413 xor %g5, %o1, %o1
3414 xor %g4, %o2, %o2 ! ^= rk[0]
3415 xor %g5, %o3, %o3
3416 .word 0x81b02308 !movxtod %o0,%f0
3417 .word 0x85b02309 !movxtod %o1,%f2
3418 .word 0x89b0230a !movxtod %o2,%f4
3419 .word 0x8db0230b !movxtod %o3,%f6
3420
3421 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
3422 .word 0x85b38d82 !fxor %f14,%f2,%f2
3423 .word 0x89b20d84 !fxor %f8,%f4,%f4 ! ^= tweak[0]
3424 .word 0x8db28d86 !fxor %f10,%f6,%f6
3425
3426 prefetch [%i1 + 63], 22
3427 prefetch [%i0 + 32+63], 20
3428 call _aes256_decrypt_2x
3429 add %i0, 32, %i0
3430
3431 .word 0x91b02302 !movxtod %g2,%f8
3432 .word 0x95b02303 !movxtod %g3,%f10
3433
3434 srax %g3, 63, %l7 ! next tweak value
3435 addcc %g2, %g2, %g2
3436 and %l7, 0x87, %l7
3437 .word 0x87b0c223 !addxc %g3,%g3,%g3
3438 xor %l7, %g2, %g2
3439
3440 .word 0x91b20988 !bshuffle %f8,%f8,%f8
3441 .word 0x95b2898a !bshuffle %f10,%f10,%f10
3442
3443 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
3444 .word 0x85b38d82 !fxor %f14,%f2,%f2
3445 .word 0x89b20d84 !fxor %f8,%f4,%f4
3446 .word 0x8db28d86 !fxor %f10,%f6,%f6
3447
3448 brnz,pn %l2, 2f
3449 sub %i2, 2, %i2
3450
3451 std %f0, [%i1 + 0]
3452 std %f2, [%i1 + 8]
3453 std %f4, [%i1 + 16]
3454 std %f6, [%i1 + 24]
3455 brnz,pt %i2, .L256_xts_deloop2x
3456 add %i1, 32, %i1
3457
3458 .word 0x81b00f04 !fsrc2 %f0,%f4,%f0
3459 .word 0x85b00f06 !fsrc2 %f0,%f6,%f2
3460 brnz,pn %i5, .L256_xts_desteal
3461 nop
3462
3463 ret
3464 restore
3465
3466 .align 16
3467 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
3468 ! and ~3x deterioration
3469 ! in inp==out case
3470 .word 0x91b00900 !faligndata %f0,%f0,%f8 ! handle unaligned output
3471 .word 0x95b00902 !faligndata %f0,%f2,%f10
3472 .word 0x99b08904 !faligndata %f2,%f4,%f12
3473 .word 0x9db10906 !faligndata %f4,%f6,%f14
3474 .word 0x81b18906 !faligndata %f6,%f6,%f0
3475
3476 stda %f8, [%i1 + %l3]0xc0 ! partial store
3477 std %f10, [%i1 + 8]
3478 std %f12, [%i1 + 16]
3479 std %f14, [%i1 + 24]
3480 add %i1, 32, %i1
3481 orn %g0, %l3, %l3
3482 stda %f0, [%i1 + %l3]0xc0 ! partial store
3483
3484 brnz,pt %i2, .L256_xts_deloop2x+4
3485 orn %g0, %l3, %l3
3486
3487 .word 0x81b00f04 !fsrc2 %f0,%f4,%f0
3488 .word 0x85b00f06 !fsrc2 %f0,%f6,%f2
3489 brnz,pn %i5, .L256_xts_desteal
3490 nop
3491
3492 ret
3493 restore
3494
3495 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3496 .align 32
3497 .L256_xts_deblk:
3498 add %i1, %i2, %l5
3499 and %l5, 63, %l5 ! tail
3500 sub %i2, %l5, %i2
3501 add %l5, 15, %l5 ! round up to 16n
3502 srlx %i2, 4, %i2
3503 srl %l5, 4, %l5
3504 sub %i2, 1, %i2
3505 add %l5, 1, %l5
3506
3507 .L256_xts_deblk2x:
3508 ldx [%i0 + 0], %o0
3509 ldx [%i0 + 8], %o1
3510 ldx [%i0 + 16], %o2
3511 brz,pt %l0, 5f
3512 ldx [%i0 + 24], %o3
3513
3514 ldx [%i0 + 32], %o4
3515 sllx %o0, %l0, %o0
3516 srlx %o1, %l1, %g1
3517 or %g1, %o0, %o0
3518 sllx %o1, %l0, %o1
3519 srlx %o2, %l1, %g1
3520 or %g1, %o1, %o1
3521 sllx %o2, %l0, %o2
3522 srlx %o3, %l1, %g1
3523 or %g1, %o2, %o2
3524 sllx %o3, %l0, %o3
3525 srlx %o4, %l1, %o4
3526 or %o4, %o3, %o3
3527 5:
3528 .word 0x99b02302 !movxtod %g2,%f12
3529 .word 0x9db02303 !movxtod %g3,%f14
3530 .word 0x99b3098c !bshuffle %f12,%f12,%f12
3531 .word 0x9db3898e !bshuffle %f14,%f14,%f14
3532
3533 srax %g3, 63, %l7 ! next tweak value
3534 addcc %g2, %g2, %g2
3535 and %l7, 0x87, %l7
3536 .word 0x87b0c223 !addxc %g3,%g3,%g3
3537 xor %l7, %g2, %g2
3538
3539 .word 0x91b02302 !movxtod %g2,%f8
3540 .word 0x95b02303 !movxtod %g3,%f10
3541 .word 0x91b20988 !bshuffle %f8,%f8,%f8
3542 .word 0x95b2898a !bshuffle %f10,%f10,%f10
3543
3544 xor %g4, %o0, %o0 ! ^= rk[0]
3545 xor %g5, %o1, %o1
3546 xor %g4, %o2, %o2 ! ^= rk[0]
3547 xor %g5, %o3, %o3
3548 .word 0x81b02308 !movxtod %o0,%f0
3549 .word 0x85b02309 !movxtod %o1,%f2
3550 .word 0x89b0230a !movxtod %o2,%f4
3551 .word 0x8db0230b !movxtod %o3,%f6
3552
3553 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
3554 .word 0x85b38d82 !fxor %f14,%f2,%f2
3555 .word 0x89b20d84 !fxor %f8,%f4,%f4 ! ^= tweak[0]
3556 .word 0x8db28d86 !fxor %f10,%f6,%f6
3557
3558 prefetch [%i0 + 32+63], 20
3559 call _aes256_decrypt_2x
3560 add %i0, 32, %i0
3561
3562 .word 0x91b02302 !movxtod %g2,%f8
3563 .word 0x95b02303 !movxtod %g3,%f10
3564
3565 srax %g3, 63, %l7 ! next tweak value
3566 addcc %g2, %g2, %g2
3567 and %l7, 0x87, %l7
3568 .word 0x87b0c223 !addxc %g3,%g3,%g3
3569 xor %l7, %g2, %g2
3570
3571 .word 0x91b20988 !bshuffle %f8,%f8,%f8
3572 .word 0x95b2898a !bshuffle %f10,%f10,%f10
3573
3574 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
3575 .word 0x85b38d82 !fxor %f14,%f2,%f2
3576 .word 0x89b20d84 !fxor %f8,%f4,%f4
3577 .word 0x8db28d86 !fxor %f10,%f6,%f6
3578
3579 subcc %i2, 2, %i2
3580 stda %f0, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
3581 add %i1, 8, %i1
3582 stda %f2, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
3583 add %i1, 8, %i1
3584 stda %f4, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
3585 add %i1, 8, %i1
3586 stda %f6, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
3587 bgu,pt %icc, .L256_xts_deblk2x
3588 add %i1, 8, %i1
3589
3590 add %l5, %i2, %i2
3591 andcc %i2, 1, %g0 ! is number of blocks even?
3592 membar #StoreLoad|#StoreStore
3593 bnz,pt %icc, .L256_xts_deloop
3594 srl %i2, 0, %i2
3595 brnz,pn %i2, .L256_xts_deloop2x
3596 nop
3597
3598 .word 0x81b00f04 !fsrc2 %f0,%f4,%f0
3599 .word 0x85b00f06 !fsrc2 %f0,%f6,%f2
3600 brnz,pn %i5, .L256_xts_desteal
3601 nop
3602
3603 ret
3604 restore
3605 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3606 .align 32
3607 .L256_xts_desteal:
3608 ldx [%i0 + 0], %o0
3609 brz,pt %l0, 8f
3610 ldx [%i0 + 8], %o1
3611
3612 ldx [%i0 + 16], %o2
3613 sllx %o0, %l0, %o0
3614 srlx %o1, %l1, %g1
3615 sllx %o1, %l0, %o1
3616 or %g1, %o0, %o0
3617 srlx %o2, %l1, %o2
3618 or %o2, %o1, %o1
3619 8:
3620 srax %g3, 63, %l7 ! next tweak value
3621 addcc %g2, %g2, %o2
3622 and %l7, 0x87, %l7
3623 .word 0x97b0c223 !addxc %g3,%g3,%o3
3624 xor %l7, %o2, %o2
3625
3626 .word 0x99b0230a !movxtod %o2,%f12
3627 .word 0x9db0230b !movxtod %o3,%f14
3628 .word 0x99b3098c !bshuffle %f12,%f12,%f12
3629 .word 0x9db3898e !bshuffle %f14,%f14,%f14
3630
3631 xor %g4, %o0, %o0 ! ^= rk[0]
3632 xor %g5, %o1, %o1
3633 .word 0x81b02308 !movxtod %o0,%f0
3634 .word 0x85b02309 !movxtod %o1,%f2
3635
3636 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
3637 .word 0x85b38d82 !fxor %f14,%f2,%f2
3638
3639 call _aes256_decrypt_1x
3640 add %i0, 16, %i0
3641
3642 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
3643 .word 0x85b38d82 !fxor %f14,%f2,%f2
3644
3645 std %f0, [%fp + 0-16]
3646 std %f2, [%fp + 0-8]
3647
3648 srl %l0, 3, %l0
3649 add %fp, 0-16, %l7
3650 add %i0, %l0, %i0 ! original %i0+%i2&-15
3651 add %i1, %l2, %i1 ! original %i1+%i2&-15
3652 mov 0, %l0
3653 add %i1, 16, %i1
3654 nop ! align
3655
3656 .L256_xts_destealing:
3657 ldub [%i0 + %l0], %o0
3658 ldub [%l7 + %l0], %o1
3659 dec %i5
3660 stb %o0, [%l7 + %l0]
3661 stb %o1, [%i1 + %l0]
3662 brnz %i5, .L256_xts_destealing
3663 inc %l0
3664
3665 mov %l7, %i0
3666 sub %i1, 16, %i1
3667 mov 0, %l0
3668 sub %i1, %l2, %i1
3669 ba .L256_xts_deloop ! one more time
3670 mov 1, %i2 ! %i5 is 0
3671 ret
3672 restore
3673 .type aes256_t4_xts_decrypt,#function
3674 .size aes256_t4_xts_decrypt,.-aes256_t4_xts_decrypt
3675 .globl aes192_t4_ctr32_encrypt
3676 .align 32
3677 aes192_t4_ctr32_encrypt:
3678 save %sp, -112, %sp
3679 srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
3680
3681 prefetch [%i0], 20
3682 prefetch [%i0 + 63], 20
3683 call _aes192_load_enckey
3684 sllx %i2, 4, %i2
3685
3686 ld [%i4 + 0], %l4 ! counter
3687 ld [%i4 + 4], %l5
3688 ld [%i4 + 8], %l6
3689 ld [%i4 + 12], %l7
3690
3691 sllx %l4, 32, %o5
3692 or %l5, %o5, %o5
3693 sllx %l6, 32, %g1
3694 xor %o5, %g4, %g4 ! ^= rk[0]
3695 xor %g1, %g5, %g5
3696 .word 0x9db02304 !movxtod %g4,%f14 ! most significant 64 bits
3697
3698 sub %i0, %i1, %l5 ! %i0!=%i1
3699 and %i0, 7, %l0
3700 andn %i0, 7, %i0
3701 sll %l0, 3, %l0
3702 mov 64, %l1
3703 mov 0xff, %l3
3704 sub %l1, %l0, %l1
3705 and %i1, 7, %l2
3706 cmp %i2, 255
3707 movrnz %l2, 0, %l5 ! if ( %i1&7 ||
3708 movleu %icc, 0, %l5 ! %i2<256 ||
3709 brnz,pn %l5, .L192_ctr32_blk ! %i0==%i1)
3710 srl %l3, %l2, %l3
3711
3712 andcc %i2, 16, %g0 ! is number of blocks even?
3713 .word 0xb3b64340 !alignaddrl %i1,%g0,%i1
3714 bz %icc, .L192_ctr32_loop2x
3715 srlx %i2, 4, %i2
3716 .L192_ctr32_loop:
3717 ldx [%i0 + 0], %o0
3718 brz,pt %l0, 4f
3719 ldx [%i0 + 8], %o1
3720
3721 ldx [%i0 + 16], %o2
3722 sllx %o0, %l0, %o0
3723 srlx %o1, %l1, %g1
3724 sllx %o1, %l0, %o1
3725 or %g1, %o0, %o0
3726 srlx %o2, %l1, %o2
3727 or %o2, %o1, %o1
3728 4:
3729 xor %g5, %l7, %g1 ! ^= rk[0]
3730 add %l7, 1, %l7
3731 .word 0x85b02301 !movxtod %g1,%f2
3732 srl %l7, 0, %l7 ! clruw
3733 prefetch [%i1 + 63], 22
3734 prefetch [%i0 + 16+63], 20
3735 .word 0x88cc040e !aes_eround01 %f16,%f14,%f2,%f4
3736 .word 0x84cc842e !aes_eround23 %f18,%f14,%f2,%f2
3737 call _aes192_encrypt_1x+8
3738 add %i0, 16, %i0
3739
3740 .word 0x95b02308 !movxtod %o0,%f10
3741 .word 0x99b02309 !movxtod %o1,%f12
3742 .word 0x81b28d80 !fxor %f10,%f0,%f0 ! ^= inp
3743 .word 0x85b30d82 !fxor %f12,%f2,%f2
3744
3745 brnz,pn %l2, 2f
3746 sub %i2, 1, %i2
3747
3748 std %f0, [%i1 + 0]
3749 std %f2, [%i1 + 8]
3750 brnz,pt %i2, .L192_ctr32_loop2x
3751 add %i1, 16, %i1
3752
3753 ret
3754 restore
3755
3756 .align 16
3757 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
3758 ! and ~3x deterioration
3759 ! in inp==out case
3760 .word 0x89b00900 !faligndata %f0,%f0,%f4 ! handle unaligned output
3761 .word 0x8db00902 !faligndata %f0,%f2,%f6
3762 .word 0x91b08902 !faligndata %f2,%f2,%f8
3763 stda %f4, [%i1 + %l3]0xc0 ! partial store
3764 std %f6, [%i1 + 8]
3765 add %i1, 16, %i1
3766 orn %g0, %l3, %l3
3767 stda %f8, [%i1 + %l3]0xc0 ! partial store
3768
3769 brnz,pt %i2, .L192_ctr32_loop2x+4
3770 orn %g0, %l3, %l3
3771
3772 ret
3773 restore
3774
3775 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3776 .align 32
3777 .L192_ctr32_loop2x:
3778 ldx [%i0 + 0], %o0
3779 ldx [%i0 + 8], %o1
3780 ldx [%i0 + 16], %o2
3781 brz,pt %l0, 4f
3782 ldx [%i0 + 24], %o3
3783
3784 ldx [%i0 + 32], %o4
3785 sllx %o0, %l0, %o0
3786 srlx %o1, %l1, %g1
3787 or %g1, %o0, %o0
3788 sllx %o1, %l0, %o1
3789 srlx %o2, %l1, %g1
3790 or %g1, %o1, %o1
3791 sllx %o2, %l0, %o2
3792 srlx %o3, %l1, %g1
3793 or %g1, %o2, %o2
3794 sllx %o3, %l0, %o3
3795 srlx %o4, %l1, %o4
3796 or %o4, %o3, %o3
3797 4:
3798 xor %g5, %l7, %g1 ! ^= rk[0]
3799 add %l7, 1, %l7
3800 .word 0x85b02301 !movxtod %g1,%f2
3801 srl %l7, 0, %l7 ! clruw
3802 xor %g5, %l7, %g1
3803 add %l7, 1, %l7
3804 .word 0x8db02301 !movxtod %g1,%f6
3805 srl %l7, 0, %l7 ! clruw
3806 prefetch [%i1 + 63], 22
3807 prefetch [%i0 + 32+63], 20
3808 .word 0x90cc040e !aes_eround01 %f16,%f14,%f2,%f8
3809 .word 0x84cc842e !aes_eround23 %f18,%f14,%f2,%f2
3810 .word 0x94cc0c0e !aes_eround01 %f16,%f14,%f6,%f10
3811 .word 0x8ccc8c2e !aes_eround23 %f18,%f14,%f6,%f6
3812 call _aes192_encrypt_2x+16
3813 add %i0, 32, %i0
3814
3815 .word 0x91b02308 !movxtod %o0,%f8
3816 .word 0x95b02309 !movxtod %o1,%f10
3817 .word 0x99b0230a !movxtod %o2,%f12
3818 .word 0x81b20d80 !fxor %f8,%f0,%f0 ! ^= inp
3819 .word 0x91b0230b !movxtod %o3,%f8
3820 .word 0x85b28d82 !fxor %f10,%f2,%f2
3821 .word 0x89b30d84 !fxor %f12,%f4,%f4
3822 .word 0x8db20d86 !fxor %f8,%f6,%f6
3823
3824 brnz,pn %l2, 2f
3825 sub %i2, 2, %i2
3826
3827 std %f0, [%i1 + 0]
3828 std %f2, [%i1 + 8]
3829 std %f4, [%i1 + 16]
3830 std %f6, [%i1 + 24]
3831 brnz,pt %i2, .L192_ctr32_loop2x
3832 add %i1, 32, %i1
3833
3834 ret
3835 restore
3836
3837 .align 16
3838 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
3839 ! and ~3x deterioration
3840 ! in inp==out case
3841 .word 0x91b00900 !faligndata %f0,%f0,%f8 ! handle unaligned output
3842 .word 0x81b00902 !faligndata %f0,%f2,%f0
3843 .word 0x85b08904 !faligndata %f2,%f4,%f2
3844 .word 0x89b10906 !faligndata %f4,%f6,%f4
3845 .word 0x8db18906 !faligndata %f6,%f6,%f6
3846
3847 stda %f8, [%i1 + %l3]0xc0 ! partial store
3848 std %f0, [%i1 + 8]
3849 std %f2, [%i1 + 16]
3850 std %f4, [%i1 + 24]
3851 add %i1, 32, %i1
3852 orn %g0, %l3, %l3
3853 stda %f6, [%i1 + %l3]0xc0 ! partial store
3854
3855 brnz,pt %i2, .L192_ctr32_loop2x+4
3856 orn %g0, %l3, %l3
3857
3858 ret
3859 restore
3860
3861 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3862 .align 32
3863 .L192_ctr32_blk:
3864 add %i1, %i2, %l5
3865 and %l5, 63, %l5 ! tail
3866 sub %i2, %l5, %i2
3867 add %l5, 15, %l5 ! round up to 16n
3868 srlx %i2, 4, %i2
3869 srl %l5, 4, %l5
3870 sub %i2, 1, %i2
3871 add %l5, 1, %l5
3872
3873 .L192_ctr32_blk_loop2x:
3874 ldx [%i0 + 0], %o0
3875 ldx [%i0 + 8], %o1
3876 ldx [%i0 + 16], %o2
3877 brz,pt %l0, 5f
3878 ldx [%i0 + 24], %o3
3879
3880 ldx [%i0 + 32], %o4
3881 sllx %o0, %l0, %o0
3882 srlx %o1, %l1, %g1
3883 or %g1, %o0, %o0
3884 sllx %o1, %l0, %o1
3885 srlx %o2, %l1, %g1
3886 or %g1, %o1, %o1
3887 sllx %o2, %l0, %o2
3888 srlx %o3, %l1, %g1
3889 or %g1, %o2, %o2
3890 sllx %o3, %l0, %o3
3891 srlx %o4, %l1, %o4
3892 or %o4, %o3, %o3
3893 5:
3894 xor %g5, %l7, %g1 ! ^= rk[0]
3895 add %l7, 1, %l7
3896 .word 0x85b02301 !movxtod %g1,%f2
3897 srl %l7, 0, %l7 ! clruw
3898 xor %g5, %l7, %g1
3899 add %l7, 1, %l7
3900 .word 0x8db02301 !movxtod %g1,%f6
3901 srl %l7, 0, %l7 ! clruw
3902 prefetch [%i0 + 32+63], 20
3903 .word 0x90cc040e !aes_eround01 %f16,%f14,%f2,%f8
3904 .word 0x84cc842e !aes_eround23 %f18,%f14,%f2,%f2
3905 .word 0x94cc0c0e !aes_eround01 %f16,%f14,%f6,%f10
3906 .word 0x8ccc8c2e !aes_eround23 %f18,%f14,%f6,%f6
3907 call _aes192_encrypt_2x+16
3908 add %i0, 32, %i0
3909 subcc %i2, 2, %i2
3910
3911 .word 0x91b02308 !movxtod %o0,%f8
3912 .word 0x95b02309 !movxtod %o1,%f10
3913 .word 0x99b0230a !movxtod %o2,%f12
3914 .word 0x81b20d80 !fxor %f8,%f0,%f0 ! ^= inp
3915 .word 0x91b0230b !movxtod %o3,%f8
3916 .word 0x85b28d82 !fxor %f10,%f2,%f2
3917 .word 0x89b30d84 !fxor %f12,%f4,%f4
3918 .word 0x8db20d86 !fxor %f8,%f6,%f6
3919
3920 stda %f0, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
3921 add %i1, 8, %i1
3922 stda %f2, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
3923 add %i1, 8, %i1
3924 stda %f4, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
3925 add %i1, 8, %i1
3926 stda %f6, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
3927 bgu,pt %icc, .L192_ctr32_blk_loop2x
3928 add %i1, 8, %i1
3929
3930 add %l5, %i2, %i2
3931 andcc %i2, 1, %g0 ! is number of blocks even?
3932 membar #StoreLoad|#StoreStore
3933 bnz,pt %icc, .L192_ctr32_loop
3934 srl %i2, 0, %i2
3935 brnz,pn %i2, .L192_ctr32_loop2x
3936 nop
3937
3938 ret
3939 restore
3940 .type aes192_t4_ctr32_encrypt,#function
3941 .size aes192_t4_ctr32_encrypt,.-aes192_t4_ctr32_encrypt
3942 .globl aes192_t4_cbc_decrypt
3943 .align 32
3944 aes192_t4_cbc_decrypt:
3945 save %sp, -112, %sp
3946 cmp %i2, 0
3947 be,pn %icc, .L192_cbc_dec_abort
3948 srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
3949 sub %i0, %i1, %l5 ! %i0!=%i1
3950 ld [%i4 + 0], %f12 ! load ivec
3951 ld [%i4 + 4], %f13
3952 ld [%i4 + 8], %f14
3953 ld [%i4 + 12], %f15
3954 prefetch [%i0], 20
3955 prefetch [%i0 + 63], 20
3956 call _aes192_load_deckey
3957 and %i0, 7, %l0
3958 andn %i0, 7, %i0
3959 sll %l0, 3, %l0
3960 mov 64, %l1
3961 mov 0xff, %l3
3962 sub %l1, %l0, %l1
3963 and %i1, 7, %l2
3964 cmp %i2, 255
3965 movrnz %l2, 0, %l5 ! if ( %i1&7 ||
3966 movleu %icc, 0, %l5 ! %i2<256 ||
3967 brnz,pn %l5, .L192cbc_dec_blk ! %i0==%i1)
3968 srl %l3, %l2, %l3
3969
3970 andcc %i2, 16, %g0 ! is number of blocks even?
3971 srlx %i2, 4, %i2
3972 .word 0xb3b64340 !alignaddrl %i1,%g0,%i1
3973 bz %icc, .L192_cbc_dec_loop2x
3974 prefetch [%i1], 22
3975 .L192_cbc_dec_loop:
3976 ldx [%i0 + 0], %o0
3977 brz,pt %l0, 4f
3978 ldx [%i0 + 8], %o1
3979
3980 ldx [%i0 + 16], %o2
3981 sllx %o0, %l0, %o0
3982 srlx %o1, %l1, %g1
3983 sllx %o1, %l0, %o1
3984 or %g1, %o0, %o0
3985 srlx %o2, %l1, %o2
3986 or %o2, %o1, %o1
3987 4:
3988 xor %g4, %o0, %o2 ! ^= rk[0]
3989 xor %g5, %o1, %o3
3990 .word 0x81b0230a !movxtod %o2,%f0
3991 .word 0x85b0230b !movxtod %o3,%f2
3992
3993 prefetch [%i1 + 63], 22
3994 prefetch [%i0 + 16+63], 20
3995 call _aes192_decrypt_1x
3996 add %i0, 16, %i0
3997
3998 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= ivec
3999 .word 0x85b38d82 !fxor %f14,%f2,%f2
4000 .word 0x99b02308 !movxtod %o0,%f12
4001 .word 0x9db02309 !movxtod %o1,%f14
4002
4003 brnz,pn %l2, 2f
4004 sub %i2, 1, %i2
4005
4006 std %f0, [%i1 + 0]
4007 std %f2, [%i1 + 8]
4008 brnz,pt %i2, .L192_cbc_dec_loop2x
4009 add %i1, 16, %i1
4010 st %f12, [%i4 + 0]
4011 st %f13, [%i4 + 4]
4012 st %f14, [%i4 + 8]
4013 st %f15, [%i4 + 12]
4014 .L192_cbc_dec_abort:
4015 ret
4016 restore
4017
4018 .align 16
4019 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
4020 ! and ~3x deterioration
4021 ! in inp==out case
4022 .word 0x89b00900 !faligndata %f0,%f0,%f4 ! handle unaligned output
4023 .word 0x8db00902 !faligndata %f0,%f2,%f6
4024 .word 0x91b08902 !faligndata %f2,%f2,%f8
4025
4026 stda %f4, [%i1 + %l3]0xc0 ! partial store
4027 std %f6, [%i1 + 8]
4028 add %i1, 16, %i1
4029 orn %g0, %l3, %l3
4030 stda %f8, [%i1 + %l3]0xc0 ! partial store
4031
4032 brnz,pt %i2, .L192_cbc_dec_loop2x+4
4033 orn %g0, %l3, %l3
4034 st %f12, [%i4 + 0]
4035 st %f13, [%i4 + 4]
4036 st %f14, [%i4 + 8]
4037 st %f15, [%i4 + 12]
4038 ret
4039 restore
4040
4041 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
4042 .align 32
4043 .L192_cbc_dec_loop2x:
4044 ldx [%i0 + 0], %o0
4045 ldx [%i0 + 8], %o1
4046 ldx [%i0 + 16], %o2
4047 brz,pt %l0, 4f
4048 ldx [%i0 + 24], %o3
4049
4050 ldx [%i0 + 32], %o4
4051 sllx %o0, %l0, %o0
4052 srlx %o1, %l1, %g1
4053 or %g1, %o0, %o0
4054 sllx %o1, %l0, %o1
4055 srlx %o2, %l1, %g1
4056 or %g1, %o1, %o1
4057 sllx %o2, %l0, %o2
4058 srlx %o3, %l1, %g1
4059 or %g1, %o2, %o2
4060 sllx %o3, %l0, %o3
4061 srlx %o4, %l1, %o4
4062 or %o4, %o3, %o3
4063 4:
4064 xor %g4, %o0, %o4 ! ^= rk[0]
4065 xor %g5, %o1, %o5
4066 .word 0x81b0230c !movxtod %o4,%f0
4067 .word 0x85b0230d !movxtod %o5,%f2
4068 xor %g4, %o2, %o4
4069 xor %g5, %o3, %o5
4070 .word 0x89b0230c !movxtod %o4,%f4
4071 .word 0x8db0230d !movxtod %o5,%f6
4072
4073 prefetch [%i1 + 63], 22
4074 prefetch [%i0 + 32+63], 20
4075 call _aes192_decrypt_2x
4076 add %i0, 32, %i0
4077
4078 .word 0x91b02308 !movxtod %o0,%f8
4079 .word 0x95b02309 !movxtod %o1,%f10
4080 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= ivec
4081 .word 0x85b38d82 !fxor %f14,%f2,%f2
4082 .word 0x99b0230a !movxtod %o2,%f12
4083 .word 0x9db0230b !movxtod %o3,%f14
4084 .word 0x89b20d84 !fxor %f8,%f4,%f4
4085 .word 0x8db28d86 !fxor %f10,%f6,%f6
4086
4087 brnz,pn %l2, 2f
4088 sub %i2, 2, %i2
4089
4090 std %f0, [%i1 + 0]
4091 std %f2, [%i1 + 8]
4092 std %f4, [%i1 + 16]
4093 std %f6, [%i1 + 24]
4094 brnz,pt %i2, .L192_cbc_dec_loop2x
4095 add %i1, 32, %i1
4096 st %f12, [%i4 + 0]
4097 st %f13, [%i4 + 4]
4098 st %f14, [%i4 + 8]
4099 st %f15, [%i4 + 12]
4100 ret
4101 restore
4102
4103 .align 16
4104 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
4105 ! and ~3x deterioration
4106 ! in inp==out case
4107 .word 0x91b00900 !faligndata %f0,%f0,%f8 ! handle unaligned output
4108 .word 0x81b00902 !faligndata %f0,%f2,%f0
4109 .word 0x85b08904 !faligndata %f2,%f4,%f2
4110 .word 0x89b10906 !faligndata %f4,%f6,%f4
4111 .word 0x8db18906 !faligndata %f6,%f6,%f6
4112 stda %f8, [%i1 + %l3]0xc0 ! partial store
4113 std %f0, [%i1 + 8]
4114 std %f2, [%i1 + 16]
4115 std %f4, [%i1 + 24]
4116 add %i1, 32, %i1
4117 orn %g0, %l3, %l3
4118 stda %f6, [%i1 + %l3]0xc0 ! partial store
4119
4120 brnz,pt %i2, .L192_cbc_dec_loop2x+4
4121 orn %g0, %l3, %l3
4122 st %f12, [%i4 + 0]
4123 st %f13, [%i4 + 4]
4124 st %f14, [%i4 + 8]
4125 st %f15, [%i4 + 12]
4126 ret
4127 restore
4128
4129 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
4130 .align 32
4131 .L192cbc_dec_blk:
4132 add %i1, %i2, %l5
4133 and %l5, 63, %l5 ! tail
4134 sub %i2, %l5, %i2
4135 add %l5, 15, %l5 ! round up to 16n
4136 srlx %i2, 4, %i2
4137 srl %l5, 4, %l5
4138 sub %i2, 1, %i2
4139 add %l5, 1, %l5
4140
4141 .L192_cbc_dec_blk_loop2x:
4142 ldx [%i0 + 0], %o0
4143 ldx [%i0 + 8], %o1
4144 ldx [%i0 + 16], %o2
4145 brz,pt %l0, 5f
4146 ldx [%i0 + 24], %o3
4147
4148 ldx [%i0 + 32], %o4
4149 sllx %o0, %l0, %o0
4150 srlx %o1, %l1, %g1
4151 or %g1, %o0, %o0
4152 sllx %o1, %l0, %o1
4153 srlx %o2, %l1, %g1
4154 or %g1, %o1, %o1
4155 sllx %o2, %l0, %o2
4156 srlx %o3, %l1, %g1
4157 or %g1, %o2, %o2
4158 sllx %o3, %l0, %o3
4159 srlx %o4, %l1, %o4
4160 or %o4, %o3, %o3
4161 5:
4162 xor %g4, %o0, %o4 ! ^= rk[0]
4163 xor %g5, %o1, %o5
4164 .word 0x81b0230c !movxtod %o4,%f0
4165 .word 0x85b0230d !movxtod %o5,%f2
4166 xor %g4, %o2, %o4
4167 xor %g5, %o3, %o5
4168 .word 0x89b0230c !movxtod %o4,%f4
4169 .word 0x8db0230d !movxtod %o5,%f6
4170
4171 prefetch [%i0 + 32+63], 20
4172 call _aes192_decrypt_2x
4173 add %i0, 32, %i0
4174 subcc %i2, 2, %i2
4175
4176 .word 0x91b02308 !movxtod %o0,%f8
4177 .word 0x95b02309 !movxtod %o1,%f10
4178 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= ivec
4179 .word 0x85b38d82 !fxor %f14,%f2,%f2
4180 .word 0x99b0230a !movxtod %o2,%f12
4181 .word 0x9db0230b !movxtod %o3,%f14
4182 .word 0x89b20d84 !fxor %f8,%f4,%f4
4183 .word 0x8db28d86 !fxor %f10,%f6,%f6
4184
4185 stda %f0, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
4186 add %i1, 8, %i1
4187 stda %f2, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
4188 add %i1, 8, %i1
4189 stda %f4, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
4190 add %i1, 8, %i1
4191 stda %f6, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
4192 bgu,pt %icc, .L192_cbc_dec_blk_loop2x
4193 add %i1, 8, %i1
4194
4195 add %l5, %i2, %i2
4196 andcc %i2, 1, %g0 ! is number of blocks even?
4197 membar #StoreLoad|#StoreStore
4198 bnz,pt %icc, .L192_cbc_dec_loop
4199 srl %i2, 0, %i2
4200 brnz,pn %i2, .L192_cbc_dec_loop2x
4201 nop
4202 st %f12, [%i4 + 0] ! write out ivec
4203 st %f13, [%i4 + 4]
4204 st %f14, [%i4 + 8]
4205 st %f15, [%i4 + 12]
4206 ret
4207 restore
4208 .type aes192_t4_cbc_decrypt,#function
4209 .size aes192_t4_cbc_decrypt,.-aes192_t4_cbc_decrypt
4210 .globl aes256_t4_cbc_decrypt
4211 .align 32
4212 aes256_t4_cbc_decrypt:
4213 save %sp, -112, %sp
4214 cmp %i2, 0
4215 be,pn %icc, .L256_cbc_dec_abort
4216 srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
4217 sub %i0, %i1, %l5 ! %i0!=%i1
4218 ld [%i4 + 0], %f12 ! load ivec
4219 ld [%i4 + 4], %f13
4220 ld [%i4 + 8], %f14
4221 ld [%i4 + 12], %f15
4222 prefetch [%i0], 20
4223 prefetch [%i0 + 63], 20
4224 call _aes256_load_deckey
4225 and %i0, 7, %l0
4226 andn %i0, 7, %i0
4227 sll %l0, 3, %l0
4228 mov 64, %l1
4229 mov 0xff, %l3
4230 sub %l1, %l0, %l1
4231 and %i1, 7, %l2
4232 cmp %i2, 255
4233 movrnz %l2, 0, %l5 ! if ( %i1&7 ||
4234 movleu %icc, 0, %l5 ! %i2<256 ||
4235 brnz,pn %l5, .L256cbc_dec_blk ! %i0==%i1)
4236 srl %l3, %l2, %l3
4237
4238 andcc %i2, 16, %g0 ! is number of blocks even?
4239 srlx %i2, 4, %i2
4240 .word 0xb3b64340 !alignaddrl %i1,%g0,%i1
4241 bz %icc, .L256_cbc_dec_loop2x
4242 prefetch [%i1], 22
4243 .L256_cbc_dec_loop:
4244 ldx [%i0 + 0], %o0
4245 brz,pt %l0, 4f
4246 ldx [%i0 + 8], %o1
4247
4248 ldx [%i0 + 16], %o2
4249 sllx %o0, %l0, %o0
4250 srlx %o1, %l1, %g1
4251 sllx %o1, %l0, %o1
4252 or %g1, %o0, %o0
4253 srlx %o2, %l1, %o2
4254 or %o2, %o1, %o1
4255 4:
4256 xor %g4, %o0, %o2 ! ^= rk[0]
4257 xor %g5, %o1, %o3
4258 .word 0x81b0230a !movxtod %o2,%f0
4259 .word 0x85b0230b !movxtod %o3,%f2
4260
4261 prefetch [%i1 + 63], 22
4262 prefetch [%i0 + 16+63], 20
4263 call _aes256_decrypt_1x
4264 add %i0, 16, %i0
4265
4266 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= ivec
4267 .word 0x85b38d82 !fxor %f14,%f2,%f2
4268 .word 0x99b02308 !movxtod %o0,%f12
4269 .word 0x9db02309 !movxtod %o1,%f14
4270
4271 brnz,pn %l2, 2f
4272 sub %i2, 1, %i2
4273
4274 std %f0, [%i1 + 0]
4275 std %f2, [%i1 + 8]
4276 brnz,pt %i2, .L256_cbc_dec_loop2x
4277 add %i1, 16, %i1
4278 st %f12, [%i4 + 0]
4279 st %f13, [%i4 + 4]
4280 st %f14, [%i4 + 8]
4281 st %f15, [%i4 + 12]
4282 .L256_cbc_dec_abort:
4283 ret
4284 restore
4285
4286 .align 16
4287 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
4288 ! and ~3x deterioration
4289 ! in inp==out case
4290 .word 0x89b00900 !faligndata %f0,%f0,%f4 ! handle unaligned output
4291 .word 0x8db00902 !faligndata %f0,%f2,%f6
4292 .word 0x91b08902 !faligndata %f2,%f2,%f8
4293
4294 stda %f4, [%i1 + %l3]0xc0 ! partial store
4295 std %f6, [%i1 + 8]
4296 add %i1, 16, %i1
4297 orn %g0, %l3, %l3
4298 stda %f8, [%i1 + %l3]0xc0 ! partial store
4299
4300 brnz,pt %i2, .L256_cbc_dec_loop2x+4
4301 orn %g0, %l3, %l3
4302 st %f12, [%i4 + 0]
4303 st %f13, [%i4 + 4]
4304 st %f14, [%i4 + 8]
4305 st %f15, [%i4 + 12]
4306 ret
4307 restore
4308
4309 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
4310 .align 32
4311 .L256_cbc_dec_loop2x:
4312 ldx [%i0 + 0], %o0
4313 ldx [%i0 + 8], %o1
4314 ldx [%i0 + 16], %o2
4315 brz,pt %l0, 4f
4316 ldx [%i0 + 24], %o3
4317
4318 ldx [%i0 + 32], %o4
4319 sllx %o0, %l0, %o0
4320 srlx %o1, %l1, %g1
4321 or %g1, %o0, %o0
4322 sllx %o1, %l0, %o1
4323 srlx %o2, %l1, %g1
4324 or %g1, %o1, %o1
4325 sllx %o2, %l0, %o2
4326 srlx %o3, %l1, %g1
4327 or %g1, %o2, %o2
4328 sllx %o3, %l0, %o3
4329 srlx %o4, %l1, %o4
4330 or %o4, %o3, %o3
4331 4:
4332 xor %g4, %o0, %o4 ! ^= rk[0]
4333 xor %g5, %o1, %o5
4334 .word 0x81b0230c !movxtod %o4,%f0
4335 .word 0x85b0230d !movxtod %o5,%f2
4336 xor %g4, %o2, %o4
4337 xor %g5, %o3, %o5
4338 .word 0x89b0230c !movxtod %o4,%f4
4339 .word 0x8db0230d !movxtod %o5,%f6
4340
4341 prefetch [%i1 + 63], 22
4342 prefetch [%i0 + 32+63], 20
4343 call _aes256_decrypt_2x
4344 add %i0, 32, %i0
4345
4346 .word 0x91b02308 !movxtod %o0,%f8
4347 .word 0x95b02309 !movxtod %o1,%f10
4348 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= ivec
4349 .word 0x85b38d82 !fxor %f14,%f2,%f2
4350 .word 0x99b0230a !movxtod %o2,%f12
4351 .word 0x9db0230b !movxtod %o3,%f14
4352 .word 0x89b20d84 !fxor %f8,%f4,%f4
4353 .word 0x8db28d86 !fxor %f10,%f6,%f6
4354
4355 brnz,pn %l2, 2f
4356 sub %i2, 2, %i2
4357
4358 std %f0, [%i1 + 0]
4359 std %f2, [%i1 + 8]
4360 std %f4, [%i1 + 16]
4361 std %f6, [%i1 + 24]
4362 brnz,pt %i2, .L256_cbc_dec_loop2x
4363 add %i1, 32, %i1
4364 st %f12, [%i4 + 0]
4365 st %f13, [%i4 + 4]
4366 st %f14, [%i4 + 8]
4367 st %f15, [%i4 + 12]
4368 ret
4369 restore
4370
4371 .align 16
4372 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
4373 ! and ~3x deterioration
4374 ! in inp==out case
4375 .word 0x91b00900 !faligndata %f0,%f0,%f8 ! handle unaligned output
4376 .word 0x81b00902 !faligndata %f0,%f2,%f0
4377 .word 0x85b08904 !faligndata %f2,%f4,%f2
4378 .word 0x89b10906 !faligndata %f4,%f6,%f4
4379 .word 0x8db18906 !faligndata %f6,%f6,%f6
4380 stda %f8, [%i1 + %l3]0xc0 ! partial store
4381 std %f0, [%i1 + 8]
4382 std %f2, [%i1 + 16]
4383 std %f4, [%i1 + 24]
4384 add %i1, 32, %i1
4385 orn %g0, %l3, %l3
4386 stda %f6, [%i1 + %l3]0xc0 ! partial store
4387
4388 brnz,pt %i2, .L256_cbc_dec_loop2x+4
4389 orn %g0, %l3, %l3
4390 st %f12, [%i4 + 0]
4391 st %f13, [%i4 + 4]
4392 st %f14, [%i4 + 8]
4393 st %f15, [%i4 + 12]
4394 ret
4395 restore
4396
4397 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
4398 .align 32
4399 .L256cbc_dec_blk:
4400 add %i1, %i2, %l5
4401 and %l5, 63, %l5 ! tail
4402 sub %i2, %l5, %i2
4403 add %l5, 15, %l5 ! round up to 16n
4404 srlx %i2, 4, %i2
4405 srl %l5, 4, %l5
4406 sub %i2, 1, %i2
4407 add %l5, 1, %l5
4408
4409 .L256_cbc_dec_blk_loop2x:
4410 ldx [%i0 + 0], %o0
4411 ldx [%i0 + 8], %o1
4412 ldx [%i0 + 16], %o2
4413 brz,pt %l0, 5f
4414 ldx [%i0 + 24], %o3
4415
4416 ldx [%i0 + 32], %o4
4417 sllx %o0, %l0, %o0
4418 srlx %o1, %l1, %g1
4419 or %g1, %o0, %o0
4420 sllx %o1, %l0, %o1
4421 srlx %o2, %l1, %g1
4422 or %g1, %o1, %o1
4423 sllx %o2, %l0, %o2
4424 srlx %o3, %l1, %g1
4425 or %g1, %o2, %o2
4426 sllx %o3, %l0, %o3
4427 srlx %o4, %l1, %o4
4428 or %o4, %o3, %o3
4429 5:
4430 xor %g4, %o0, %o4 ! ^= rk[0]
4431 xor %g5, %o1, %o5
4432 .word 0x81b0230c !movxtod %o4,%f0
4433 .word 0x85b0230d !movxtod %o5,%f2
4434 xor %g4, %o2, %o4
4435 xor %g5, %o3, %o5
4436 .word 0x89b0230c !movxtod %o4,%f4
4437 .word 0x8db0230d !movxtod %o5,%f6
4438
4439 prefetch [%i0 + 32+63], 20
4440 call _aes256_decrypt_2x
4441 add %i0, 32, %i0
4442 subcc %i2, 2, %i2
4443
4444 .word 0x91b02308 !movxtod %o0,%f8
4445 .word 0x95b02309 !movxtod %o1,%f10
4446 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= ivec
4447 .word 0x85b38d82 !fxor %f14,%f2,%f2
4448 .word 0x99b0230a !movxtod %o2,%f12
4449 .word 0x9db0230b !movxtod %o3,%f14
4450 .word 0x89b20d84 !fxor %f8,%f4,%f4
4451 .word 0x8db28d86 !fxor %f10,%f6,%f6
4452
4453 stda %f0, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
4454 add %i1, 8, %i1
4455 stda %f2, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
4456 add %i1, 8, %i1
4457 stda %f4, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
4458 add %i1, 8, %i1
4459 stda %f6, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
4460 bgu,pt %icc, .L256_cbc_dec_blk_loop2x
4461 add %i1, 8, %i1
4462
4463 add %l5, %i2, %i2
4464 andcc %i2, 1, %g0 ! is number of blocks even?
4465 membar #StoreLoad|#StoreStore
4466 bnz,pt %icc, .L256_cbc_dec_loop
4467 srl %i2, 0, %i2
4468 brnz,pn %i2, .L256_cbc_dec_loop2x
4469 nop
4470 st %f12, [%i4 + 0] ! write out ivec
4471 st %f13, [%i4 + 4]
4472 st %f14, [%i4 + 8]
4473 st %f15, [%i4 + 12]
4474 ret
4475 restore
4476 .type aes256_t4_cbc_decrypt,#function
4477 .size aes256_t4_cbc_decrypt,.-aes256_t4_cbc_decrypt
4478 .align 32
4479 _aes256_decrypt_1x:
4480 .word 0x88cc0440 !aes_dround01 %f16,%f0,%f2,%f4
4481 .word 0x84cc8460 !aes_dround23 %f18,%f0,%f2,%f2
4482 ldd [%i3 + 208], %f16
4483 ldd [%i3 + 216], %f18
4484 .word 0x80cd0444 !aes_dround01 %f20,%f4,%f2,%f0
4485 .word 0x84cd8464 !aes_dround23 %f22,%f4,%f2,%f2
4486 ldd [%i3 + 224], %f20
4487 ldd [%i3 + 232], %f22
4488 .word 0x88ce0440 !aes_dround01 %f24,%f0,%f2,%f4
4489 .word 0x84ce8460 !aes_dround23 %f26,%f0,%f2,%f2
4490 .word 0x80cf0444 !aes_dround01 %f28,%f4,%f2,%f0
4491 .word 0x84cf8464 !aes_dround23 %f30,%f4,%f2,%f2
4492 .word 0x88c84440 !aes_dround01 %f32,%f0,%f2,%f4
4493 .word 0x84c8c460 !aes_dround23 %f34,%f0,%f2,%f2
4494 .word 0x80c94444 !aes_dround01 %f36,%f4,%f2,%f0
4495 .word 0x84c9c464 !aes_dround23 %f38,%f4,%f2,%f2
4496 .word 0x88ca4440 !aes_dround01 %f40,%f0,%f2,%f4
4497 .word 0x84cac460 !aes_dround23 %f42,%f0,%f2,%f2
4498 .word 0x80cb4444 !aes_dround01 %f44,%f4,%f2,%f0
4499 .word 0x84cbc464 !aes_dround23 %f46,%f4,%f2,%f2
4500 .word 0x88cc4440 !aes_dround01 %f48,%f0,%f2,%f4
4501 .word 0x84ccc460 !aes_dround23 %f50,%f0,%f2,%f2
4502 .word 0x80cd4444 !aes_dround01 %f52,%f4,%f2,%f0
4503 .word 0x84cdc464 !aes_dround23 %f54,%f4,%f2,%f2
4504 .word 0x88ce4440 !aes_dround01 %f56,%f0,%f2,%f4
4505 .word 0x84cec460 !aes_dround23 %f58,%f0,%f2,%f2
4506 .word 0x80cf4444 !aes_dround01 %f60,%f4,%f2,%f0
4507 .word 0x84cfc464 !aes_dround23 %f62,%f4,%f2,%f2
4508 .word 0x88cc0440 !aes_dround01 %f16,%f0,%f2,%f4
4509 .word 0x84cc8460 !aes_dround23 %f18,%f0,%f2,%f2
4510 ldd [%i3 + 16], %f16
4511 ldd [%i3 + 24], %f18
4512 .word 0x80cd04c4 !aes_dround01_l %f20,%f4,%f2,%f0
4513 .word 0x84cd84e4 !aes_dround23_l %f22,%f4,%f2,%f2
4514 ldd [%i3 + 32], %f20
4515 retl
4516 ldd [%i3 + 40], %f22
4517 .type _aes256_decrypt_1x,#function
4518 .size _aes256_decrypt_1x,.-_aes256_decrypt_1x
4519
4520 .align 32
4521 _aes256_decrypt_2x:
4522 .word 0x90cc0440 !aes_dround01 %f16,%f0,%f2,%f8
4523 .word 0x84cc8460 !aes_dround23 %f18,%f0,%f2,%f2
4524 .word 0x94cc0c44 !aes_dround01 %f16,%f4,%f6,%f10
4525 .word 0x8ccc8c64 !aes_dround23 %f18,%f4,%f6,%f6
4526 ldd [%i3 + 208], %f16
4527 ldd [%i3 + 216], %f18
4528 .word 0x80cd0448 !aes_dround01 %f20,%f8,%f2,%f0
4529 .word 0x84cd8468 !aes_dround23 %f22,%f8,%f2,%f2
4530 .word 0x88cd0c4a !aes_dround01 %f20,%f10,%f6,%f4
4531 .word 0x8ccd8c6a !aes_dround23 %f22,%f10,%f6,%f6
4532 ldd [%i3 + 224], %f20
4533 ldd [%i3 + 232], %f22
4534 .word 0x90ce0440 !aes_dround01 %f24,%f0,%f2,%f8
4535 .word 0x84ce8460 !aes_dround23 %f26,%f0,%f2,%f2
4536 .word 0x94ce0c44 !aes_dround01 %f24,%f4,%f6,%f10
4537 .word 0x8cce8c64 !aes_dround23 %f26,%f4,%f6,%f6
4538 .word 0x80cf0448 !aes_dround01 %f28,%f8,%f2,%f0
4539 .word 0x84cf8468 !aes_dround23 %f30,%f8,%f2,%f2
4540 .word 0x88cf0c4a !aes_dround01 %f28,%f10,%f6,%f4
4541 .word 0x8ccf8c6a !aes_dround23 %f30,%f10,%f6,%f6
4542 .word 0x90c84440 !aes_dround01 %f32,%f0,%f2,%f8
4543 .word 0x84c8c460 !aes_dround23 %f34,%f0,%f2,%f2
4544 .word 0x94c84c44 !aes_dround01 %f32,%f4,%f6,%f10
4545 .word 0x8cc8cc64 !aes_dround23 %f34,%f4,%f6,%f6
4546 .word 0x80c94448 !aes_dround01 %f36,%f8,%f2,%f0
4547 .word 0x84c9c468 !aes_dround23 %f38,%f8,%f2,%f2
4548 .word 0x88c94c4a !aes_dround01 %f36,%f10,%f6,%f4
4549 .word 0x8cc9cc6a !aes_dround23 %f38,%f10,%f6,%f6
4550 .word 0x90ca4440 !aes_dround01 %f40,%f0,%f2,%f8
4551 .word 0x84cac460 !aes_dround23 %f42,%f0,%f2,%f2
4552 .word 0x94ca4c44 !aes_dround01 %f40,%f4,%f6,%f10
4553 .word 0x8ccacc64 !aes_dround23 %f42,%f4,%f6,%f6
4554 .word 0x80cb4448 !aes_dround01 %f44,%f8,%f2,%f0
4555 .word 0x84cbc468 !aes_dround23 %f46,%f8,%f2,%f2
4556 .word 0x88cb4c4a !aes_dround01 %f44,%f10,%f6,%f4
4557 .word 0x8ccbcc6a !aes_dround23 %f46,%f10,%f6,%f6
4558 .word 0x90cc4440 !aes_dround01 %f48,%f0,%f2,%f8
4559 .word 0x84ccc460 !aes_dround23 %f50,%f0,%f2,%f2
4560 .word 0x94cc4c44 !aes_dround01 %f48,%f4,%f6,%f10
4561 .word 0x8ccccc64 !aes_dround23 %f50,%f4,%f6,%f6
4562 .word 0x80cd4448 !aes_dround01 %f52,%f8,%f2,%f0
4563 .word 0x84cdc468 !aes_dround23 %f54,%f8,%f2,%f2
4564 .word 0x88cd4c4a !aes_dround01 %f52,%f10,%f6,%f4
4565 .word 0x8ccdcc6a !aes_dround23 %f54,%f10,%f6,%f6
4566 .word 0x90ce4440 !aes_dround01 %f56,%f0,%f2,%f8
4567 .word 0x84cec460 !aes_dround23 %f58,%f0,%f2,%f2
4568 .word 0x94ce4c44 !aes_dround01 %f56,%f4,%f6,%f10
4569 .word 0x8ccecc64 !aes_dround23 %f58,%f4,%f6,%f6
4570 .word 0x80cf4448 !aes_dround01 %f60,%f8,%f2,%f0
4571 .word 0x84cfc468 !aes_dround23 %f62,%f8,%f2,%f2
4572 .word 0x88cf4c4a !aes_dround01 %f60,%f10,%f6,%f4
4573 .word 0x8ccfcc6a !aes_dround23 %f62,%f10,%f6,%f6
4574 .word 0x90cc0440 !aes_dround01 %f16,%f0,%f2,%f8
4575 .word 0x84cc8460 !aes_dround23 %f18,%f0,%f2,%f2
4576 .word 0x94cc0c44 !aes_dround01 %f16,%f4,%f6,%f10
4577 .word 0x8ccc8c64 !aes_dround23 %f18,%f4,%f6,%f6
4578 ldd [%i3 + 16], %f16
4579 ldd [%i3 + 24], %f18
4580 .word 0x80cd04c8 !aes_dround01_l %f20,%f8,%f2,%f0
4581 .word 0x84cd84e8 !aes_dround23_l %f22,%f8,%f2,%f2
4582 .word 0x88cd0cca !aes_dround01_l %f20,%f10,%f6,%f4
4583 .word 0x8ccd8cea !aes_dround23_l %f22,%f10,%f6,%f6
4584 ldd [%i3 + 32], %f20
4585 retl
4586 ldd [%i3 + 40], %f22
4587 .type _aes256_decrypt_2x,#function
4588 .size _aes256_decrypt_2x,.-_aes256_decrypt_2x
4589
4590 .align 32
4591 _aes192_decrypt_1x:
4592 .word 0x88cc0440 !aes_dround01 %f16,%f0,%f2,%f4
4593 .word 0x84cc8460 !aes_dround23 %f18,%f0,%f2,%f2
4594 .word 0x80cd0444 !aes_dround01 %f20,%f4,%f2,%f0
4595 .word 0x84cd8464 !aes_dround23 %f22,%f4,%f2,%f2
4596 .word 0x88ce0440 !aes_dround01 %f24,%f0,%f2,%f4
4597 .word 0x84ce8460 !aes_dround23 %f26,%f0,%f2,%f2
4598 .word 0x80cf0444 !aes_dround01 %f28,%f4,%f2,%f0
4599 .word 0x84cf8464 !aes_dround23 %f30,%f4,%f2,%f2
4600 .word 0x88c84440 !aes_dround01 %f32,%f0,%f2,%f4
4601 .word 0x84c8c460 !aes_dround23 %f34,%f0,%f2,%f2
4602 .word 0x80c94444 !aes_dround01 %f36,%f4,%f2,%f0
4603 .word 0x84c9c464 !aes_dround23 %f38,%f4,%f2,%f2
4604 .word 0x88ca4440 !aes_dround01 %f40,%f0,%f2,%f4
4605 .word 0x84cac460 !aes_dround23 %f42,%f0,%f2,%f2
4606 .word 0x80cb4444 !aes_dround01 %f44,%f4,%f2,%f0
4607 .word 0x84cbc464 !aes_dround23 %f46,%f4,%f2,%f2
4608 .word 0x88cc4440 !aes_dround01 %f48,%f0,%f2,%f4
4609 .word 0x84ccc460 !aes_dround23 %f50,%f0,%f2,%f2
4610 .word 0x80cd4444 !aes_dround01 %f52,%f4,%f2,%f0
4611 .word 0x84cdc464 !aes_dround23 %f54,%f4,%f2,%f2
4612 .word 0x88ce4440 !aes_dround01 %f56,%f0,%f2,%f4
4613 .word 0x84cec460 !aes_dround23 %f58,%f0,%f2,%f2
4614 .word 0x80cf44c4 !aes_dround01_l %f60,%f4,%f2,%f0
4615 retl
4616 .word 0x84cfc4e4 !aes_dround23_l %f62,%f4,%f2,%f2
4617 .type _aes192_decrypt_1x,#function
4618 .size _aes192_decrypt_1x,.-_aes192_decrypt_1x
4619
4620 .align 32
4621 _aes192_decrypt_2x:
4622 .word 0x90cc0440 !aes_dround01 %f16,%f0,%f2,%f8
4623 .word 0x84cc8460 !aes_dround23 %f18,%f0,%f2,%f2
4624 .word 0x94cc0c44 !aes_dround01 %f16,%f4,%f6,%f10
4625 .word 0x8ccc8c64 !aes_dround23 %f18,%f4,%f6,%f6
4626 .word 0x80cd0448 !aes_dround01 %f20,%f8,%f2,%f0
4627 .word 0x84cd8468 !aes_dround23 %f22,%f8,%f2,%f2
4628 .word 0x88cd0c4a !aes_dround01 %f20,%f10,%f6,%f4
4629 .word 0x8ccd8c6a !aes_dround23 %f22,%f10,%f6,%f6
4630 .word 0x90ce0440 !aes_dround01 %f24,%f0,%f2,%f8
4631 .word 0x84ce8460 !aes_dround23 %f26,%f0,%f2,%f2
4632 .word 0x94ce0c44 !aes_dround01 %f24,%f4,%f6,%f10
4633 .word 0x8cce8c64 !aes_dround23 %f26,%f4,%f6,%f6
4634 .word 0x80cf0448 !aes_dround01 %f28,%f8,%f2,%f0
4635 .word 0x84cf8468 !aes_dround23 %f30,%f8,%f2,%f2
4636 .word 0x88cf0c4a !aes_dround01 %f28,%f10,%f6,%f4
4637 .word 0x8ccf8c6a !aes_dround23 %f30,%f10,%f6,%f6
4638 .word 0x90c84440 !aes_dround01 %f32,%f0,%f2,%f8
4639 .word 0x84c8c460 !aes_dround23 %f34,%f0,%f2,%f2
4640 .word 0x94c84c44 !aes_dround01 %f32,%f4,%f6,%f10
4641 .word 0x8cc8cc64 !aes_dround23 %f34,%f4,%f6,%f6
4642 .word 0x80c94448 !aes_dround01 %f36,%f8,%f2,%f0
4643 .word 0x84c9c468 !aes_dround23 %f38,%f8,%f2,%f2
4644 .word 0x88c94c4a !aes_dround01 %f36,%f10,%f6,%f4
4645 .word 0x8cc9cc6a !aes_dround23 %f38,%f10,%f6,%f6
4646 .word 0x90ca4440 !aes_dround01 %f40,%f0,%f2,%f8
4647 .word 0x84cac460 !aes_dround23 %f42,%f0,%f2,%f2
4648 .word 0x94ca4c44 !aes_dround01 %f40,%f4,%f6,%f10
4649 .word 0x8ccacc64 !aes_dround23 %f42,%f4,%f6,%f6
4650 .word 0x80cb4448 !aes_dround01 %f44,%f8,%f2,%f0
4651 .word 0x84cbc468 !aes_dround23 %f46,%f8,%f2,%f2
4652 .word 0x88cb4c4a !aes_dround01 %f44,%f10,%f6,%f4
4653 .word 0x8ccbcc6a !aes_dround23 %f46,%f10,%f6,%f6
4654 .word 0x90cc4440 !aes_dround01 %f48,%f0,%f2,%f8
4655 .word 0x84ccc460 !aes_dround23 %f50,%f0,%f2,%f2
4656 .word 0x94cc4c44 !aes_dround01 %f48,%f4,%f6,%f10
4657 .word 0x8ccccc64 !aes_dround23 %f50,%f4,%f6,%f6
4658 .word 0x80cd4448 !aes_dround01 %f52,%f8,%f2,%f0
4659 .word 0x84cdc468 !aes_dround23 %f54,%f8,%f2,%f2
4660 .word 0x88cd4c4a !aes_dround01 %f52,%f10,%f6,%f4
4661 .word 0x8ccdcc6a !aes_dround23 %f54,%f10,%f6,%f6
4662 .word 0x90ce4440 !aes_dround01 %f56,%f0,%f2,%f8
4663 .word 0x84cec460 !aes_dround23 %f58,%f0,%f2,%f2
4664 .word 0x94ce4c44 !aes_dround01 %f56,%f4,%f6,%f10
4665 .word 0x8ccecc64 !aes_dround23 %f58,%f4,%f6,%f6
4666 .word 0x80cf44c8 !aes_dround01_l %f60,%f8,%f2,%f0
4667 .word 0x84cfc4e8 !aes_dround23_l %f62,%f8,%f2,%f2
4668 .word 0x88cf4cca !aes_dround01_l %f60,%f10,%f6,%f4
4669 retl
4670 .word 0x8ccfccea !aes_dround23_l %f62,%f10,%f6,%f6
4671 .type _aes192_decrypt_2x,#function
4672 .size _aes192_decrypt_2x,.-_aes192_decrypt_2x
4673 .asciz "AES for SPARC T4, David S. Miller, Andy Polyakov"
4674 .align 4
4675