aest4-sparcv9.S revision 1.1.2.2 1 .register %g2,#scratch
2 .register %g3,#scratch
3
4 .text
5
6 .globl aes_t4_encrypt
7 .align 32
8 aes_t4_encrypt:
9 andcc %o0, 7, %g1 ! is input aligned?
10 andn %o0, 7, %o0
11
12 ldx [%o2 + 0], %g4
13 ldx [%o2 + 8], %g5
14
15 ldx [%o0 + 0], %o4
16 bz,pt %icc, 1f
17 ldx [%o0 + 8], %o5
18 ldx [%o0 + 16], %o0
19 sll %g1, 3, %g1
20 sub %g0, %g1, %o3
21 sllx %o4, %g1, %o4
22 sllx %o5, %g1, %g1
23 srlx %o5, %o3, %o5
24 srlx %o0, %o3, %o3
25 or %o5, %o4, %o4
26 or %o3, %g1, %o5
27 1:
28 ld [%o2 + 240], %o3
29 ldd [%o2 + 16], %f12
30 ldd [%o2 + 24], %f14
31 xor %g4, %o4, %o4
32 xor %g5, %o5, %o5
33 .word 0x81b0230c !movxtod %o4,%f0
34 .word 0x85b0230d !movxtod %o5,%f2
35 srl %o3, 1, %o3
36 ldd [%o2 + 32], %f16
37 sub %o3, 1, %o3
38 ldd [%o2 + 40], %f18
39 add %o2, 48, %o2
40
41 .Lenc:
42 .word 0x88cb0400 !aes_eround01 %f12,%f0,%f2,%f4
43 .word 0x84cb8420 !aes_eround23 %f14,%f0,%f2,%f2
44 ldd [%o2 + 0], %f12
45 ldd [%o2 + 8], %f14
46 sub %o3,1,%o3
47 .word 0x80cc0404 !aes_eround01 %f16,%f4,%f2,%f0
48 .word 0x84cc8424 !aes_eround23 %f18,%f4,%f2,%f2
49 ldd [%o2 + 16], %f16
50 ldd [%o2 + 24], %f18
51 brnz,pt %o3, .Lenc
52 add %o2, 32, %o2
53
54 andcc %o1, 7, %o4 ! is output aligned?
55 .word 0x88cb0400 !aes_eround01 %f12,%f0,%f2,%f4
56 .word 0x84cb8420 !aes_eround23 %f14,%f0,%f2,%f2
57 .word 0x80cc0484 !aes_eround01_l %f16,%f4,%f2,%f0
58 .word 0x84cc84a4 !aes_eround23_l %f18,%f4,%f2,%f2
59
60 bnz,pn %icc, 2f
61 nop
62
63 std %f0, [%o1 + 0]
64 retl
65 std %f2, [%o1 + 8]
66
67 2: .word 0x93b24340 !alignaddrl %o1,%g0,%o1
68 mov 0xff, %o5
69 srl %o5, %o4, %o5
70
71 .word 0x89b00900 !faligndata %f0,%f0,%f4
72 .word 0x8db00902 !faligndata %f0,%f2,%f6
73 .word 0x91b08902 !faligndata %f2,%f2,%f8
74
75 stda %f4, [%o1 + %o5]0xc0 ! partial store
76 std %f6, [%o1 + 8]
77 add %o1, 16, %o1
78 orn %g0, %o5, %o5
79 retl
80 stda %f8, [%o1 + %o5]0xc0 ! partial store
81 .type aes_t4_encrypt,#function
82 .size aes_t4_encrypt,.-aes_t4_encrypt
83
84 .globl aes_t4_decrypt
85 .align 32
86 aes_t4_decrypt:
87 andcc %o0, 7, %g1 ! is input aligned?
88 andn %o0, 7, %o0
89
90 ldx [%o2 + 0], %g4
91 ldx [%o2 + 8], %g5
92
93 ldx [%o0 + 0], %o4
94 bz,pt %icc, 1f
95 ldx [%o0 + 8], %o5
96 ldx [%o0 + 16], %o0
97 sll %g1, 3, %g1
98 sub %g0, %g1, %o3
99 sllx %o4, %g1, %o4
100 sllx %o5, %g1, %g1
101 srlx %o5, %o3, %o5
102 srlx %o0, %o3, %o3
103 or %o5, %o4, %o4
104 or %o3, %g1, %o5
105 1:
106 ld [%o2 + 240], %o3
107 ldd [%o2 + 16], %f12
108 ldd [%o2 + 24], %f14
109 xor %g4, %o4, %o4
110 xor %g5, %o5, %o5
111 .word 0x81b0230c !movxtod %o4,%f0
112 .word 0x85b0230d !movxtod %o5,%f2
113 srl %o3, 1, %o3
114 ldd [%o2 + 32], %f16
115 sub %o3, 1, %o3
116 ldd [%o2 + 40], %f18
117 add %o2, 48, %o2
118
119 .Ldec:
120 .word 0x88cb0440 !aes_dround01 %f12,%f0,%f2,%f4
121 .word 0x84cb8460 !aes_dround23 %f14,%f0,%f2,%f2
122 ldd [%o2 + 0], %f12
123 ldd [%o2 + 8], %f14
124 sub %o3,1,%o3
125 .word 0x80cc0444 !aes_dround01 %f16,%f4,%f2,%f0
126 .word 0x84cc8464 !aes_dround23 %f18,%f4,%f2,%f2
127 ldd [%o2 + 16], %f16
128 ldd [%o2 + 24], %f18
129 brnz,pt %o3, .Ldec
130 add %o2, 32, %o2
131
132 andcc %o1, 7, %o4 ! is output aligned?
133 .word 0x88cb0440 !aes_dround01 %f12,%f0,%f2,%f4
134 .word 0x84cb8460 !aes_dround23 %f14,%f0,%f2,%f2
135 .word 0x80cc04c4 !aes_dround01_l %f16,%f4,%f2,%f0
136 .word 0x84cc84e4 !aes_dround23_l %f18,%f4,%f2,%f2
137
138 bnz,pn %icc, 2f
139 nop
140
141 std %f0, [%o1 + 0]
142 retl
143 std %f2, [%o1 + 8]
144
145 2: .word 0x93b24340 !alignaddrl %o1,%g0,%o1
146 mov 0xff, %o5
147 srl %o5, %o4, %o5
148
149 .word 0x89b00900 !faligndata %f0,%f0,%f4
150 .word 0x8db00902 !faligndata %f0,%f2,%f6
151 .word 0x91b08902 !faligndata %f2,%f2,%f8
152
153 stda %f4, [%o1 + %o5]0xc0 ! partial store
154 std %f6, [%o1 + 8]
155 add %o1, 16, %o1
156 orn %g0, %o5, %o5
157 retl
158 stda %f8, [%o1 + %o5]0xc0 ! partial store
159 .type aes_t4_decrypt,#function
160 .size aes_t4_decrypt,.-aes_t4_decrypt
161 .globl aes_t4_set_encrypt_key
162 .align 32
163 aes_t4_set_encrypt_key:
164 .Lset_encrypt_key:
165 and %o0, 7, %o3
166 .word 0x91b20300 !alignaddr %o0,%g0,%o0
167 cmp %o1, 192
168 ldd [%o0 + 0], %f0
169 bl,pt %icc,.L128
170 ldd [%o0 + 8], %f2
171
172 be,pt %icc,.L192
173 ldd [%o0 + 16], %f4
174 brz,pt %o3, .L256aligned
175 ldd [%o0 + 24], %f6
176
177 ldd [%o0 + 32], %f8
178 .word 0x81b00902 !faligndata %f0,%f2,%f0
179 .word 0x85b08904 !faligndata %f2,%f4,%f2
180 .word 0x89b10906 !faligndata %f4,%f6,%f4
181 .word 0x8db18908 !faligndata %f6,%f8,%f6
182 .L256aligned:
183 std %f0, [%o2 + 0]
184 .word 0x80c80106 !aes_kexpand1 %f0,%f6,0,%f0
185 std %f2, [%o2 + 8]
186 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
187 std %f4, [%o2 + 16]
188 .word 0x89b12602 !aes_kexpand0 %f4,%f2,%f4
189 std %f6, [%o2 + 24]
190 .word 0x8db1a624 !aes_kexpand2 %f6,%f4,%f6
191 std %f0, [%o2 + 32]
192 .word 0x80c80306 !aes_kexpand1 %f0,%f6,1,%f0
193 std %f2, [%o2 + 40]
194 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
195 std %f4, [%o2 + 48]
196 .word 0x89b12602 !aes_kexpand0 %f4,%f2,%f4
197 std %f6, [%o2 + 56]
198 .word 0x8db1a624 !aes_kexpand2 %f6,%f4,%f6
199 std %f0, [%o2 + 64]
200 .word 0x80c80506 !aes_kexpand1 %f0,%f6,2,%f0
201 std %f2, [%o2 + 72]
202 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
203 std %f4, [%o2 + 80]
204 .word 0x89b12602 !aes_kexpand0 %f4,%f2,%f4
205 std %f6, [%o2 + 88]
206 .word 0x8db1a624 !aes_kexpand2 %f6,%f4,%f6
207 std %f0, [%o2 + 96]
208 .word 0x80c80706 !aes_kexpand1 %f0,%f6,3,%f0
209 std %f2, [%o2 + 104]
210 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
211 std %f4, [%o2 + 112]
212 .word 0x89b12602 !aes_kexpand0 %f4,%f2,%f4
213 std %f6, [%o2 + 120]
214 .word 0x8db1a624 !aes_kexpand2 %f6,%f4,%f6
215 std %f0, [%o2 + 128]
216 .word 0x80c80906 !aes_kexpand1 %f0,%f6,4,%f0
217 std %f2, [%o2 + 136]
218 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
219 std %f4, [%o2 + 144]
220 .word 0x89b12602 !aes_kexpand0 %f4,%f2,%f4
221 std %f6, [%o2 + 152]
222 .word 0x8db1a624 !aes_kexpand2 %f6,%f4,%f6
223 std %f0, [%o2 + 160]
224 .word 0x80c80b06 !aes_kexpand1 %f0,%f6,5,%f0
225 std %f2, [%o2 + 168]
226 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
227 std %f4, [%o2 + 176]
228 .word 0x89b12602 !aes_kexpand0 %f4,%f2,%f4
229 std %f6, [%o2 + 184]
230 .word 0x8db1a624 !aes_kexpand2 %f6,%f4,%f6
231 std %f0, [%o2 + 192]
232 .word 0x80c80d06 !aes_kexpand1 %f0,%f6,6,%f0
233 std %f2, [%o2 + 200]
234 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
235 std %f4, [%o2 + 208]
236 std %f6, [%o2 + 216]
237 std %f0, [%o2 + 224]
238 std %f2, [%o2 + 232]
239
240 mov 14, %o3
241 st %o3, [%o2 + 240]
242 retl
243 xor %o0, %o0, %o0
244
245 .align 16
246 .L192:
247 brz,pt %o3, .L192aligned
248 nop
249
250 ldd [%o0 + 24], %f6
251 .word 0x81b00902 !faligndata %f0,%f2,%f0
252 .word 0x85b08904 !faligndata %f2,%f4,%f2
253 .word 0x89b10906 !faligndata %f4,%f6,%f4
254 .L192aligned:
255 std %f0, [%o2 + 0]
256 .word 0x80c80104 !aes_kexpand1 %f0,%f4,0,%f0
257 std %f2, [%o2 + 8]
258 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
259 std %f4, [%o2 + 16]
260 .word 0x89b12622 !aes_kexpand2 %f4,%f2,%f4
261 std %f0, [%o2 + 24]
262 .word 0x80c80304 !aes_kexpand1 %f0,%f4,1,%f0
263 std %f2, [%o2 + 32]
264 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
265 std %f4, [%o2 + 40]
266 .word 0x89b12622 !aes_kexpand2 %f4,%f2,%f4
267 std %f0, [%o2 + 48]
268 .word 0x80c80504 !aes_kexpand1 %f0,%f4,2,%f0
269 std %f2, [%o2 + 56]
270 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
271 std %f4, [%o2 + 64]
272 .word 0x89b12622 !aes_kexpand2 %f4,%f2,%f4
273 std %f0, [%o2 + 72]
274 .word 0x80c80704 !aes_kexpand1 %f0,%f4,3,%f0
275 std %f2, [%o2 + 80]
276 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
277 std %f4, [%o2 + 88]
278 .word 0x89b12622 !aes_kexpand2 %f4,%f2,%f4
279 std %f0, [%o2 + 96]
280 .word 0x80c80904 !aes_kexpand1 %f0,%f4,4,%f0
281 std %f2, [%o2 + 104]
282 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
283 std %f4, [%o2 + 112]
284 .word 0x89b12622 !aes_kexpand2 %f4,%f2,%f4
285 std %f0, [%o2 + 120]
286 .word 0x80c80b04 !aes_kexpand1 %f0,%f4,5,%f0
287 std %f2, [%o2 + 128]
288 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
289 std %f4, [%o2 + 136]
290 .word 0x89b12622 !aes_kexpand2 %f4,%f2,%f4
291 std %f0, [%o2 + 144]
292 .word 0x80c80d04 !aes_kexpand1 %f0,%f4,6,%f0
293 std %f2, [%o2 + 152]
294 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
295 std %f4, [%o2 + 160]
296 .word 0x89b12622 !aes_kexpand2 %f4,%f2,%f4
297 std %f0, [%o2 + 168]
298 .word 0x80c80f04 !aes_kexpand1 %f0,%f4,7,%f0
299 std %f2, [%o2 + 176]
300 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
301 std %f4, [%o2 + 184]
302 std %f0, [%o2 + 192]
303 std %f2, [%o2 + 200]
304
305 mov 12, %o3
306 st %o3, [%o2 + 240]
307 retl
308 xor %o0, %o0, %o0
309
310 .align 16
311 .L128:
312 brz,pt %o3, .L128aligned
313 nop
314
315 ldd [%o0 + 16], %f4
316 .word 0x81b00902 !faligndata %f0,%f2,%f0
317 .word 0x85b08904 !faligndata %f2,%f4,%f2
318 .L128aligned:
319 std %f0, [%o2 + 0]
320 .word 0x80c80102 !aes_kexpand1 %f0,%f2,0,%f0
321 std %f2, [%o2 + 8]
322 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
323 std %f0, [%o2 + 16]
324 .word 0x80c80302 !aes_kexpand1 %f0,%f2,1,%f0
325 std %f2, [%o2 + 24]
326 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
327 std %f0, [%o2 + 32]
328 .word 0x80c80502 !aes_kexpand1 %f0,%f2,2,%f0
329 std %f2, [%o2 + 40]
330 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
331 std %f0, [%o2 + 48]
332 .word 0x80c80702 !aes_kexpand1 %f0,%f2,3,%f0
333 std %f2, [%o2 + 56]
334 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
335 std %f0, [%o2 + 64]
336 .word 0x80c80902 !aes_kexpand1 %f0,%f2,4,%f0
337 std %f2, [%o2 + 72]
338 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
339 std %f0, [%o2 + 80]
340 .word 0x80c80b02 !aes_kexpand1 %f0,%f2,5,%f0
341 std %f2, [%o2 + 88]
342 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
343 std %f0, [%o2 + 96]
344 .word 0x80c80d02 !aes_kexpand1 %f0,%f2,6,%f0
345 std %f2, [%o2 + 104]
346 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
347 std %f0, [%o2 + 112]
348 .word 0x80c80f02 !aes_kexpand1 %f0,%f2,7,%f0
349 std %f2, [%o2 + 120]
350 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
351 std %f0, [%o2 + 128]
352 .word 0x80c81102 !aes_kexpand1 %f0,%f2,8,%f0
353 std %f2, [%o2 + 136]
354 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
355 std %f0, [%o2 + 144]
356 .word 0x80c81302 !aes_kexpand1 %f0,%f2,9,%f0
357 std %f2, [%o2 + 152]
358 .word 0x85b0a620 !aes_kexpand2 %f2,%f0,%f2
359 std %f0, [%o2 + 160]
360 std %f2, [%o2 + 168]
361
362 mov 10, %o3
363 st %o3, [%o2 + 240]
364 retl
365 xor %o0, %o0, %o0
366 .type aes_t4_set_encrypt_key,#function
367 .size aes_t4_set_encrypt_key,.-aes_t4_set_encrypt_key
368
369 .globl aes_t4_set_decrypt_key
370 .align 32
371 aes_t4_set_decrypt_key:
372 mov %o7, %o5
373 call .Lset_encrypt_key
374 nop
375
376 mov %o5, %o7
377 sll %o3, 4, %o0 ! %o3 is number of rounds
378 add %o3, 2, %o3
379 add %o2, %o0, %o0 ! %o0=%o2+16*rounds
380 srl %o3, 2, %o3 ! %o3=(rounds+2)/4
381
382 .Lkey_flip:
383 ldd [%o2 + 0], %f0
384 ldd [%o2 + 8], %f2
385 ldd [%o2 + 16], %f4
386 ldd [%o2 + 24], %f6
387 ldd [%o0 + 0], %f8
388 ldd [%o0 + 8], %f10
389 ldd [%o0 - 16], %f12
390 ldd [%o0 - 8], %f14
391 sub %o3, 1, %o3
392 std %f0, [%o0 + 0]
393 std %f2, [%o0 + 8]
394 std %f4, [%o0 - 16]
395 std %f6, [%o0 - 8]
396 std %f8, [%o2 + 0]
397 std %f10, [%o2 + 8]
398 std %f12, [%o2 + 16]
399 std %f14, [%o2 + 24]
400 add %o2, 32, %o2
401 brnz %o3, .Lkey_flip
402 sub %o0, 32, %o0
403
404 retl
405 xor %o0, %o0, %o0
406 .type aes_t4_set_decrypt_key,#function
407 .size aes_t4_set_decrypt_key,.-aes_t4_set_decrypt_key
408 .align 32
409 _aes128_encrypt_1x:
410 .word 0x88cc0400 !aes_eround01 %f16,%f0,%f2,%f4
411 .word 0x84cc8420 !aes_eround23 %f18,%f0,%f2,%f2
412 .word 0x80cd0404 !aes_eround01 %f20,%f4,%f2,%f0
413 .word 0x84cd8424 !aes_eround23 %f22,%f4,%f2,%f2
414 .word 0x88ce0400 !aes_eround01 %f24,%f0,%f2,%f4
415 .word 0x84ce8420 !aes_eround23 %f26,%f0,%f2,%f2
416 .word 0x80cf0404 !aes_eround01 %f28,%f4,%f2,%f0
417 .word 0x84cf8424 !aes_eround23 %f30,%f4,%f2,%f2
418 .word 0x88c84400 !aes_eround01 %f32,%f0,%f2,%f4
419 .word 0x84c8c420 !aes_eround23 %f34,%f0,%f2,%f2
420 .word 0x80c94404 !aes_eround01 %f36,%f4,%f2,%f0
421 .word 0x84c9c424 !aes_eround23 %f38,%f4,%f2,%f2
422 .word 0x88ca4400 !aes_eround01 %f40,%f0,%f2,%f4
423 .word 0x84cac420 !aes_eround23 %f42,%f0,%f2,%f2
424 .word 0x80cb4404 !aes_eround01 %f44,%f4,%f2,%f0
425 .word 0x84cbc424 !aes_eround23 %f46,%f4,%f2,%f2
426 .word 0x88cc4400 !aes_eround01 %f48,%f0,%f2,%f4
427 .word 0x84ccc420 !aes_eround23 %f50,%f0,%f2,%f2
428 .word 0x80cd4484 !aes_eround01_l %f52,%f4,%f2,%f0
429 retl
430 .word 0x84cdc4a4 !aes_eround23_l %f54,%f4,%f2,%f2
431 .type _aes128_encrypt_1x,#function
432 .size _aes128_encrypt_1x,.-_aes128_encrypt_1x
433
434 .align 32
435 _aes128_encrypt_2x:
436 .word 0x90cc0400 !aes_eround01 %f16,%f0,%f2,%f8
437 .word 0x84cc8420 !aes_eround23 %f18,%f0,%f2,%f2
438 .word 0x94cc0c04 !aes_eround01 %f16,%f4,%f6,%f10
439 .word 0x8ccc8c24 !aes_eround23 %f18,%f4,%f6,%f6
440 .word 0x80cd0408 !aes_eround01 %f20,%f8,%f2,%f0
441 .word 0x84cd8428 !aes_eround23 %f22,%f8,%f2,%f2
442 .word 0x88cd0c0a !aes_eround01 %f20,%f10,%f6,%f4
443 .word 0x8ccd8c2a !aes_eround23 %f22,%f10,%f6,%f6
444 .word 0x90ce0400 !aes_eround01 %f24,%f0,%f2,%f8
445 .word 0x84ce8420 !aes_eround23 %f26,%f0,%f2,%f2
446 .word 0x94ce0c04 !aes_eround01 %f24,%f4,%f6,%f10
447 .word 0x8cce8c24 !aes_eround23 %f26,%f4,%f6,%f6
448 .word 0x80cf0408 !aes_eround01 %f28,%f8,%f2,%f0
449 .word 0x84cf8428 !aes_eround23 %f30,%f8,%f2,%f2
450 .word 0x88cf0c0a !aes_eround01 %f28,%f10,%f6,%f4
451 .word 0x8ccf8c2a !aes_eround23 %f30,%f10,%f6,%f6
452 .word 0x90c84400 !aes_eround01 %f32,%f0,%f2,%f8
453 .word 0x84c8c420 !aes_eround23 %f34,%f0,%f2,%f2
454 .word 0x94c84c04 !aes_eround01 %f32,%f4,%f6,%f10
455 .word 0x8cc8cc24 !aes_eround23 %f34,%f4,%f6,%f6
456 .word 0x80c94408 !aes_eround01 %f36,%f8,%f2,%f0
457 .word 0x84c9c428 !aes_eround23 %f38,%f8,%f2,%f2
458 .word 0x88c94c0a !aes_eround01 %f36,%f10,%f6,%f4
459 .word 0x8cc9cc2a !aes_eround23 %f38,%f10,%f6,%f6
460 .word 0x90ca4400 !aes_eround01 %f40,%f0,%f2,%f8
461 .word 0x84cac420 !aes_eround23 %f42,%f0,%f2,%f2
462 .word 0x94ca4c04 !aes_eround01 %f40,%f4,%f6,%f10
463 .word 0x8ccacc24 !aes_eround23 %f42,%f4,%f6,%f6
464 .word 0x80cb4408 !aes_eround01 %f44,%f8,%f2,%f0
465 .word 0x84cbc428 !aes_eround23 %f46,%f8,%f2,%f2
466 .word 0x88cb4c0a !aes_eround01 %f44,%f10,%f6,%f4
467 .word 0x8ccbcc2a !aes_eround23 %f46,%f10,%f6,%f6
468 .word 0x90cc4400 !aes_eround01 %f48,%f0,%f2,%f8
469 .word 0x84ccc420 !aes_eround23 %f50,%f0,%f2,%f2
470 .word 0x94cc4c04 !aes_eround01 %f48,%f4,%f6,%f10
471 .word 0x8ccccc24 !aes_eround23 %f50,%f4,%f6,%f6
472 .word 0x80cd4488 !aes_eround01_l %f52,%f8,%f2,%f0
473 .word 0x84cdc4a8 !aes_eround23_l %f54,%f8,%f2,%f2
474 .word 0x88cd4c8a !aes_eround01_l %f52,%f10,%f6,%f4
475 retl
476 .word 0x8ccdccaa !aes_eround23_l %f54,%f10,%f6,%f6
477 .type _aes128_encrypt_2x,#function
478 .size _aes128_encrypt_2x,.-_aes128_encrypt_2x
479
480 .align 32
481 _aes128_loadkey:
482 ldx [%i3 + 0], %g4
483 ldx [%i3 + 8], %g5
484 ldd [%i3 + 16], %f16
485 ldd [%i3 + 24], %f18
486 ldd [%i3 + 32], %f20
487 ldd [%i3 + 40], %f22
488 ldd [%i3 + 48], %f24
489 ldd [%i3 + 56], %f26
490 ldd [%i3 + 64], %f28
491 ldd [%i3 + 72], %f30
492 ldd [%i3 + 80], %f32
493 ldd [%i3 + 88], %f34
494 ldd [%i3 + 96], %f36
495 ldd [%i3 + 104], %f38
496 ldd [%i3 + 112], %f40
497 ldd [%i3 + 120], %f42
498 ldd [%i3 + 128], %f44
499 ldd [%i3 + 136], %f46
500 ldd [%i3 + 144], %f48
501 ldd [%i3 + 152], %f50
502 ldd [%i3 + 160], %f52
503 ldd [%i3 + 168], %f54
504 retl
505 nop
506 .type _aes128_loadkey,#function
507 .size _aes128_loadkey,.-_aes128_loadkey
508 _aes128_load_enckey=_aes128_loadkey
509 _aes128_load_deckey=_aes128_loadkey
510
511 .globl aes128_t4_cbc_encrypt
512 .align 32
513 aes128_t4_cbc_encrypt:
514 save %sp, -192, %sp
515 cmp %i2, 0
516 be,pn %xcc, .L128_cbc_enc_abort
517 srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
518 sub %i0, %i1, %l5 ! %i0!=%i1
519 ld [%i4 + 0], %f0
520 ld [%i4 + 4], %f1
521 ld [%i4 + 8], %f2
522 ld [%i4 + 12], %f3
523 prefetch [%i0], 20
524 prefetch [%i0 + 63], 20
525 call _aes128_load_enckey
526 and %i0, 7, %l0
527 andn %i0, 7, %i0
528 sll %l0, 3, %l0
529 mov 64, %l1
530 mov 0xff, %l3
531 sub %l1, %l0, %l1
532 and %i1, 7, %l2
533 cmp %i2, 127
534 movrnz %l2, 0, %l5 ! if ( %i1&7 ||
535 movleu %xcc, 0, %l5 ! %i2<128 ||
536 brnz,pn %l5, .L128cbc_enc_blk ! %i0==%i1)
537 srl %l3, %l2, %l3
538
539 .word 0xb3b64340 !alignaddrl %i1,%g0,%i1
540 srlx %i2, 4, %i2
541 prefetch [%i1], 22
542
543 .L128_cbc_enc_loop:
544 ldx [%i0 + 0], %o0
545 brz,pt %l0, 4f
546 ldx [%i0 + 8], %o1
547
548 ldx [%i0 + 16], %o2
549 sllx %o0, %l0, %o0
550 srlx %o1, %l1, %g1
551 sllx %o1, %l0, %o1
552 or %g1, %o0, %o0
553 srlx %o2, %l1, %o2
554 or %o2, %o1, %o1
555 4:
556 xor %g4, %o0, %o0 ! ^= rk[0]
557 xor %g5, %o1, %o1
558 .word 0x99b02308 !movxtod %o0,%f12
559 .word 0x9db02309 !movxtod %o1,%f14
560
561 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= ivec
562 .word 0x85b38d82 !fxor %f14,%f2,%f2
563 prefetch [%i1 + 63], 22
564 prefetch [%i0 + 16+63], 20
565 call _aes128_encrypt_1x
566 add %i0, 16, %i0
567
568 brnz,pn %l2, 2f
569 sub %i2, 1, %i2
570
571 std %f0, [%i1 + 0]
572 std %f2, [%i1 + 8]
573 brnz,pt %i2, .L128_cbc_enc_loop
574 add %i1, 16, %i1
575 st %f0, [%i4 + 0]
576 st %f1, [%i4 + 4]
577 st %f2, [%i4 + 8]
578 st %f3, [%i4 + 12]
579 .L128_cbc_enc_abort:
580 ret
581 restore
582
583 .align 16
584 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
585 ! and ~3x deterioration
586 ! in inp==out case
587 .word 0x89b00900 !faligndata %f0,%f0,%f4 ! handle unaligned output
588 .word 0x8db00902 !faligndata %f0,%f2,%f6
589 .word 0x91b08902 !faligndata %f2,%f2,%f8
590
591 stda %f4, [%i1 + %l3]0xc0 ! partial store
592 std %f6, [%i1 + 8]
593 add %i1, 16, %i1
594 orn %g0, %l3, %l3
595 stda %f8, [%i1 + %l3]0xc0 ! partial store
596
597 brnz,pt %i2, .L128_cbc_enc_loop+4
598 orn %g0, %l3, %l3
599 st %f0, [%i4 + 0]
600 st %f1, [%i4 + 4]
601 st %f2, [%i4 + 8]
602 st %f3, [%i4 + 12]
603 ret
604 restore
605
606 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
607 .align 32
608 .L128cbc_enc_blk:
609 add %i1, %i2, %l5
610 and %l5, 63, %l5 ! tail
611 sub %i2, %l5, %i2
612 add %l5, 15, %l5 ! round up to 16n
613 srlx %i2, 4, %i2
614 srl %l5, 4, %l5
615
616 .L128_cbc_enc_blk_loop:
617 ldx [%i0 + 0], %o0
618 brz,pt %l0, 5f
619 ldx [%i0 + 8], %o1
620
621 ldx [%i0 + 16], %o2
622 sllx %o0, %l0, %o0
623 srlx %o1, %l1, %g1
624 sllx %o1, %l0, %o1
625 or %g1, %o0, %o0
626 srlx %o2, %l1, %o2
627 or %o2, %o1, %o1
628 5:
629 xor %g4, %o0, %o0 ! ^= rk[0]
630 xor %g5, %o1, %o1
631 .word 0x99b02308 !movxtod %o0,%f12
632 .word 0x9db02309 !movxtod %o1,%f14
633
634 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= ivec
635 .word 0x85b38d82 !fxor %f14,%f2,%f2
636 prefetch [%i0 + 16+63], 20
637 call _aes128_encrypt_1x
638 add %i0, 16, %i0
639 sub %i2, 1, %i2
640
641 stda %f0, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
642 add %i1, 8, %i1
643 stda %f2, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
644 brnz,pt %i2, .L128_cbc_enc_blk_loop
645 add %i1, 8, %i1
646
647 membar #StoreLoad|#StoreStore
648 brnz,pt %l5, .L128_cbc_enc_loop
649 mov %l5, %i2
650 st %f0, [%i4 + 0]
651 st %f1, [%i4 + 4]
652 st %f2, [%i4 + 8]
653 st %f3, [%i4 + 12]
654 ret
655 restore
656 .type aes128_t4_cbc_encrypt,#function
657 .size aes128_t4_cbc_encrypt,.-aes128_t4_cbc_encrypt
658 .globl aes128_t4_ctr32_encrypt
659 .align 32
660 aes128_t4_ctr32_encrypt:
661 save %sp, -192, %sp
662 srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
663
664 prefetch [%i0], 20
665 prefetch [%i0 + 63], 20
666 call _aes128_load_enckey
667 sllx %i2, 4, %i2
668
669 ld [%i4 + 0], %l4 ! counter
670 ld [%i4 + 4], %l5
671 ld [%i4 + 8], %l6
672 ld [%i4 + 12], %l7
673
674 sllx %l4, 32, %o5
675 or %l5, %o5, %o5
676 sllx %l6, 32, %g1
677 xor %o5, %g4, %g4 ! ^= rk[0]
678 xor %g1, %g5, %g5
679 .word 0x9db02304 !movxtod %g4,%f14 ! most significant 64 bits
680
681 sub %i0, %i1, %l5 ! %i0!=%i1
682 and %i0, 7, %l0
683 andn %i0, 7, %i0
684 sll %l0, 3, %l0
685 mov 64, %l1
686 mov 0xff, %l3
687 sub %l1, %l0, %l1
688 and %i1, 7, %l2
689 cmp %i2, 255
690 movrnz %l2, 0, %l5 ! if ( %i1&7 ||
691 movleu %xcc, 0, %l5 ! %i2<256 ||
692 brnz,pn %l5, .L128_ctr32_blk ! %i0==%i1)
693 srl %l3, %l2, %l3
694
695 andcc %i2, 16, %g0 ! is number of blocks even?
696 .word 0xb3b64340 !alignaddrl %i1,%g0,%i1
697 bz %icc, .L128_ctr32_loop2x
698 srlx %i2, 4, %i2
699 .L128_ctr32_loop:
700 ldx [%i0 + 0], %o0
701 brz,pt %l0, 4f
702 ldx [%i0 + 8], %o1
703
704 ldx [%i0 + 16], %o2
705 sllx %o0, %l0, %o0
706 srlx %o1, %l1, %g1
707 sllx %o1, %l0, %o1
708 or %g1, %o0, %o0
709 srlx %o2, %l1, %o2
710 or %o2, %o1, %o1
711 4:
712 xor %g5, %l7, %g1 ! ^= rk[0]
713 add %l7, 1, %l7
714 .word 0x85b02301 !movxtod %g1,%f2
715 srl %l7, 0, %l7 ! clruw
716 prefetch [%i1 + 63], 22
717 prefetch [%i0 + 16+63], 20
718 .word 0x88cc040e !aes_eround01 %f16,%f14,%f2,%f4
719 .word 0x84cc842e !aes_eround23 %f18,%f14,%f2,%f2
720 call _aes128_encrypt_1x+8
721 add %i0, 16, %i0
722
723 .word 0x95b02308 !movxtod %o0,%f10
724 .word 0x99b02309 !movxtod %o1,%f12
725 .word 0x81b28d80 !fxor %f10,%f0,%f0 ! ^= inp
726 .word 0x85b30d82 !fxor %f12,%f2,%f2
727
728 brnz,pn %l2, 2f
729 sub %i2, 1, %i2
730
731 std %f0, [%i1 + 0]
732 std %f2, [%i1 + 8]
733 brnz,pt %i2, .L128_ctr32_loop2x
734 add %i1, 16, %i1
735
736 ret
737 restore
738
739 .align 16
740 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
741 ! and ~3x deterioration
742 ! in inp==out case
743 .word 0x89b00900 !faligndata %f0,%f0,%f4 ! handle unaligned output
744 .word 0x8db00902 !faligndata %f0,%f2,%f6
745 .word 0x91b08902 !faligndata %f2,%f2,%f8
746 stda %f4, [%i1 + %l3]0xc0 ! partial store
747 std %f6, [%i1 + 8]
748 add %i1, 16, %i1
749 orn %g0, %l3, %l3
750 stda %f8, [%i1 + %l3]0xc0 ! partial store
751
752 brnz,pt %i2, .L128_ctr32_loop2x+4
753 orn %g0, %l3, %l3
754
755 ret
756 restore
757
758 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
759 .align 32
760 .L128_ctr32_loop2x:
761 ldx [%i0 + 0], %o0
762 ldx [%i0 + 8], %o1
763 ldx [%i0 + 16], %o2
764 brz,pt %l0, 4f
765 ldx [%i0 + 24], %o3
766
767 ldx [%i0 + 32], %o4
768 sllx %o0, %l0, %o0
769 srlx %o1, %l1, %g1
770 or %g1, %o0, %o0
771 sllx %o1, %l0, %o1
772 srlx %o2, %l1, %g1
773 or %g1, %o1, %o1
774 sllx %o2, %l0, %o2
775 srlx %o3, %l1, %g1
776 or %g1, %o2, %o2
777 sllx %o3, %l0, %o3
778 srlx %o4, %l1, %o4
779 or %o4, %o3, %o3
780 4:
781 xor %g5, %l7, %g1 ! ^= rk[0]
782 add %l7, 1, %l7
783 .word 0x85b02301 !movxtod %g1,%f2
784 srl %l7, 0, %l7 ! clruw
785 xor %g5, %l7, %g1
786 add %l7, 1, %l7
787 .word 0x8db02301 !movxtod %g1,%f6
788 srl %l7, 0, %l7 ! clruw
789 prefetch [%i1 + 63], 22
790 prefetch [%i0 + 32+63], 20
791 .word 0x90cc040e !aes_eround01 %f16,%f14,%f2,%f8
792 .word 0x84cc842e !aes_eround23 %f18,%f14,%f2,%f2
793 .word 0x94cc0c0e !aes_eround01 %f16,%f14,%f6,%f10
794 .word 0x8ccc8c2e !aes_eround23 %f18,%f14,%f6,%f6
795 call _aes128_encrypt_2x+16
796 add %i0, 32, %i0
797
798 .word 0x91b02308 !movxtod %o0,%f8
799 .word 0x95b02309 !movxtod %o1,%f10
800 .word 0x99b0230a !movxtod %o2,%f12
801 .word 0x81b20d80 !fxor %f8,%f0,%f0 ! ^= inp
802 .word 0x91b0230b !movxtod %o3,%f8
803 .word 0x85b28d82 !fxor %f10,%f2,%f2
804 .word 0x89b30d84 !fxor %f12,%f4,%f4
805 .word 0x8db20d86 !fxor %f8,%f6,%f6
806
807 brnz,pn %l2, 2f
808 sub %i2, 2, %i2
809
810 std %f0, [%i1 + 0]
811 std %f2, [%i1 + 8]
812 std %f4, [%i1 + 16]
813 std %f6, [%i1 + 24]
814 brnz,pt %i2, .L128_ctr32_loop2x
815 add %i1, 32, %i1
816
817 ret
818 restore
819
820 .align 16
821 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
822 ! and ~3x deterioration
823 ! in inp==out case
824 .word 0x91b00900 !faligndata %f0,%f0,%f8 ! handle unaligned output
825 .word 0x81b00902 !faligndata %f0,%f2,%f0
826 .word 0x85b08904 !faligndata %f2,%f4,%f2
827 .word 0x89b10906 !faligndata %f4,%f6,%f4
828 .word 0x8db18906 !faligndata %f6,%f6,%f6
829
830 stda %f8, [%i1 + %l3]0xc0 ! partial store
831 std %f0, [%i1 + 8]
832 std %f2, [%i1 + 16]
833 std %f4, [%i1 + 24]
834 add %i1, 32, %i1
835 orn %g0, %l3, %l3
836 stda %f6, [%i1 + %l3]0xc0 ! partial store
837
838 brnz,pt %i2, .L128_ctr32_loop2x+4
839 orn %g0, %l3, %l3
840
841 ret
842 restore
843
844 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
845 .align 32
846 .L128_ctr32_blk:
847 add %i1, %i2, %l5
848 and %l5, 63, %l5 ! tail
849 sub %i2, %l5, %i2
850 add %l5, 15, %l5 ! round up to 16n
851 srlx %i2, 4, %i2
852 srl %l5, 4, %l5
853 sub %i2, 1, %i2
854 add %l5, 1, %l5
855
856 .L128_ctr32_blk_loop2x:
857 ldx [%i0 + 0], %o0
858 ldx [%i0 + 8], %o1
859 ldx [%i0 + 16], %o2
860 brz,pt %l0, 5f
861 ldx [%i0 + 24], %o3
862
863 ldx [%i0 + 32], %o4
864 sllx %o0, %l0, %o0
865 srlx %o1, %l1, %g1
866 or %g1, %o0, %o0
867 sllx %o1, %l0, %o1
868 srlx %o2, %l1, %g1
869 or %g1, %o1, %o1
870 sllx %o2, %l0, %o2
871 srlx %o3, %l1, %g1
872 or %g1, %o2, %o2
873 sllx %o3, %l0, %o3
874 srlx %o4, %l1, %o4
875 or %o4, %o3, %o3
876 5:
877 xor %g5, %l7, %g1 ! ^= rk[0]
878 add %l7, 1, %l7
879 .word 0x85b02301 !movxtod %g1,%f2
880 srl %l7, 0, %l7 ! clruw
881 xor %g5, %l7, %g1
882 add %l7, 1, %l7
883 .word 0x8db02301 !movxtod %g1,%f6
884 srl %l7, 0, %l7 ! clruw
885 prefetch [%i0 + 32+63], 20
886 .word 0x90cc040e !aes_eround01 %f16,%f14,%f2,%f8
887 .word 0x84cc842e !aes_eround23 %f18,%f14,%f2,%f2
888 .word 0x94cc0c0e !aes_eround01 %f16,%f14,%f6,%f10
889 .word 0x8ccc8c2e !aes_eround23 %f18,%f14,%f6,%f6
890 call _aes128_encrypt_2x+16
891 add %i0, 32, %i0
892 subcc %i2, 2, %i2
893
894 .word 0x91b02308 !movxtod %o0,%f8
895 .word 0x95b02309 !movxtod %o1,%f10
896 .word 0x99b0230a !movxtod %o2,%f12
897 .word 0x81b20d80 !fxor %f8,%f0,%f0 ! ^= inp
898 .word 0x91b0230b !movxtod %o3,%f8
899 .word 0x85b28d82 !fxor %f10,%f2,%f2
900 .word 0x89b30d84 !fxor %f12,%f4,%f4
901 .word 0x8db20d86 !fxor %f8,%f6,%f6
902
903 stda %f0, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
904 add %i1, 8, %i1
905 stda %f2, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
906 add %i1, 8, %i1
907 stda %f4, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
908 add %i1, 8, %i1
909 stda %f6, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
910 bgu,pt %xcc, .L128_ctr32_blk_loop2x
911 add %i1, 8, %i1
912
913 add %l5, %i2, %i2
914 andcc %i2, 1, %g0 ! is number of blocks even?
915 membar #StoreLoad|#StoreStore
916 bnz,pt %icc, .L128_ctr32_loop
917 srl %i2, 0, %i2
918 brnz,pn %i2, .L128_ctr32_loop2x
919 nop
920
921 ret
922 restore
923 .type aes128_t4_ctr32_encrypt,#function
924 .size aes128_t4_ctr32_encrypt,.-aes128_t4_ctr32_encrypt
925 .globl aes128_t4_xts_encrypt
926 .align 32
927 aes128_t4_xts_encrypt:
928 save %sp, -192-16, %sp
929 srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
930
931 mov %i5, %o0
932 add %fp, 2047-16, %o1
933 call aes_t4_encrypt
934 mov %i4, %o2
935
936 add %fp, 2047-16, %l7
937 ldxa [%l7]0x88, %g2
938 add %fp, 2047-8, %l7
939 ldxa [%l7]0x88, %g3 ! %g3:%g2 is tweak
940
941 sethi %hi(0x76543210), %l7
942 or %l7, %lo(0x76543210), %l7
943 .word 0x81b5c320 !bmask %l7,%g0,%g0 ! byte swap mask
944
945 prefetch [%i0], 20
946 prefetch [%i0 + 63], 20
947 call _aes128_load_enckey
948 and %i2, 15, %i5
949 and %i2, -16, %i2
950
951 sub %i0, %i1, %l5 ! %i0!=%i1
952 and %i0, 7, %l0
953 andn %i0, 7, %i0
954 sll %l0, 3, %l0
955 mov 64, %l1
956 mov 0xff, %l3
957 sub %l1, %l0, %l1
958 and %i1, 7, %l2
959 cmp %i2, 255
960 movrnz %l2, 0, %l5 ! if ( %i1&7 ||
961 movleu %xcc, 0, %l5 ! %i2<256 ||
962 brnz,pn %l5, .L128_xts_enblk ! %i0==%i1)
963 srl %l3, %l2, %l3
964
965 andcc %i2, 16, %g0 ! is number of blocks even?
966 .word 0xb3b64340 !alignaddrl %i1,%g0,%i1
967 bz %icc, .L128_xts_enloop2x
968 srlx %i2, 4, %i2
969 .L128_xts_enloop:
970 ldx [%i0 + 0], %o0
971 brz,pt %l0, 4f
972 ldx [%i0 + 8], %o1
973
974 ldx [%i0 + 16], %o2
975 sllx %o0, %l0, %o0
976 srlx %o1, %l1, %g1
977 sllx %o1, %l0, %o1
978 or %g1, %o0, %o0
979 srlx %o2, %l1, %o2
980 or %o2, %o1, %o1
981 4:
982 .word 0x99b02302 !movxtod %g2,%f12
983 .word 0x9db02303 !movxtod %g3,%f14
984 .word 0x99b3098c !bshuffle %f12,%f12,%f12
985 .word 0x9db3898e !bshuffle %f14,%f14,%f14
986
987 xor %g4, %o0, %o0 ! ^= rk[0]
988 xor %g5, %o1, %o1
989 .word 0x81b02308 !movxtod %o0,%f0
990 .word 0x85b02309 !movxtod %o1,%f2
991
992 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
993 .word 0x85b38d82 !fxor %f14,%f2,%f2
994
995 prefetch [%i1 + 63], 22
996 prefetch [%i0 + 16+63], 20
997 call _aes128_encrypt_1x
998 add %i0, 16, %i0
999
1000 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
1001 .word 0x85b38d82 !fxor %f14,%f2,%f2
1002
1003 srax %g3, 63, %l7 ! next tweak value
1004 addcc %g2, %g2, %g2
1005 and %l7, 0x87, %l7
1006 .word 0x87b0c223 !addxc %g3,%g3,%g3
1007 xor %l7, %g2, %g2
1008
1009 brnz,pn %l2, 2f
1010 sub %i2, 1, %i2
1011
1012 std %f0, [%i1 + 0]
1013 std %f2, [%i1 + 8]
1014 brnz,pt %i2, .L128_xts_enloop2x
1015 add %i1, 16, %i1
1016
1017 brnz,pn %i5, .L128_xts_ensteal
1018 nop
1019
1020 ret
1021 restore
1022
1023 .align 16
1024 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
1025 ! and ~3x deterioration
1026 ! in inp==out case
1027 .word 0x89b00900 !faligndata %f0,%f0,%f4 ! handle unaligned output
1028 .word 0x8db00902 !faligndata %f0,%f2,%f6
1029 .word 0x91b08902 !faligndata %f2,%f2,%f8
1030 stda %f4, [%i1 + %l3]0xc0 ! partial store
1031 std %f6, [%i1 + 8]
1032 add %i1, 16, %i1
1033 orn %g0, %l3, %l3
1034 stda %f8, [%i1 + %l3]0xc0 ! partial store
1035
1036 brnz,pt %i2, .L128_xts_enloop2x+4
1037 orn %g0, %l3, %l3
1038
1039 brnz,pn %i5, .L128_xts_ensteal
1040 nop
1041
1042 ret
1043 restore
1044
1045 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1046 .align 32
1047 .L128_xts_enloop2x:
1048 ldx [%i0 + 0], %o0
1049 ldx [%i0 + 8], %o1
1050 ldx [%i0 + 16], %o2
1051 brz,pt %l0, 4f
1052 ldx [%i0 + 24], %o3
1053
1054 ldx [%i0 + 32], %o4
1055 sllx %o0, %l0, %o0
1056 srlx %o1, %l1, %g1
1057 or %g1, %o0, %o0
1058 sllx %o1, %l0, %o1
1059 srlx %o2, %l1, %g1
1060 or %g1, %o1, %o1
1061 sllx %o2, %l0, %o2
1062 srlx %o3, %l1, %g1
1063 or %g1, %o2, %o2
1064 sllx %o3, %l0, %o3
1065 srlx %o4, %l1, %o4
1066 or %o4, %o3, %o3
1067 4:
1068 .word 0x99b02302 !movxtod %g2,%f12
1069 .word 0x9db02303 !movxtod %g3,%f14
1070 .word 0x99b3098c !bshuffle %f12,%f12,%f12
1071 .word 0x9db3898e !bshuffle %f14,%f14,%f14
1072
1073 srax %g3, 63, %l7 ! next tweak value
1074 addcc %g2, %g2, %g2
1075 and %l7, 0x87, %l7
1076 .word 0x87b0c223 !addxc %g3,%g3,%g3
1077 xor %l7, %g2, %g2
1078
1079 .word 0x91b02302 !movxtod %g2,%f8
1080 .word 0x95b02303 !movxtod %g3,%f10
1081 .word 0x91b20988 !bshuffle %f8,%f8,%f8
1082 .word 0x95b2898a !bshuffle %f10,%f10,%f10
1083
1084 xor %g4, %o0, %o0 ! ^= rk[0]
1085 xor %g5, %o1, %o1
1086 xor %g4, %o2, %o2 ! ^= rk[0]
1087 xor %g5, %o3, %o3
1088 .word 0x81b02308 !movxtod %o0,%f0
1089 .word 0x85b02309 !movxtod %o1,%f2
1090 .word 0x89b0230a !movxtod %o2,%f4
1091 .word 0x8db0230b !movxtod %o3,%f6
1092
1093 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
1094 .word 0x85b38d82 !fxor %f14,%f2,%f2
1095 .word 0x89b20d84 !fxor %f8,%f4,%f4 ! ^= tweak[0]
1096 .word 0x8db28d86 !fxor %f10,%f6,%f6
1097
1098 prefetch [%i1 + 63], 22
1099 prefetch [%i0 + 32+63], 20
1100 call _aes128_encrypt_2x
1101 add %i0, 32, %i0
1102
1103 .word 0x91b02302 !movxtod %g2,%f8
1104 .word 0x95b02303 !movxtod %g3,%f10
1105
1106 srax %g3, 63, %l7 ! next tweak value
1107 addcc %g2, %g2, %g2
1108 and %l7, 0x87, %l7
1109 .word 0x87b0c223 !addxc %g3,%g3,%g3
1110 xor %l7, %g2, %g2
1111
1112 .word 0x91b20988 !bshuffle %f8,%f8,%f8
1113 .word 0x95b2898a !bshuffle %f10,%f10,%f10
1114
1115 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
1116 .word 0x85b38d82 !fxor %f14,%f2,%f2
1117 .word 0x89b20d84 !fxor %f8,%f4,%f4
1118 .word 0x8db28d86 !fxor %f10,%f6,%f6
1119
1120 brnz,pn %l2, 2f
1121 sub %i2, 2, %i2
1122
1123 std %f0, [%i1 + 0]
1124 std %f2, [%i1 + 8]
1125 std %f4, [%i1 + 16]
1126 std %f6, [%i1 + 24]
1127 brnz,pt %i2, .L128_xts_enloop2x
1128 add %i1, 32, %i1
1129
1130 .word 0x81b00f04 !fsrc2 %f0,%f4,%f0
1131 .word 0x85b00f06 !fsrc2 %f0,%f6,%f2
1132 brnz,pn %i5, .L128_xts_ensteal
1133 nop
1134
1135 ret
1136 restore
1137
1138 .align 16
1139 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
1140 ! and ~3x deterioration
1141 ! in inp==out case
1142 .word 0x91b00900 !faligndata %f0,%f0,%f8 ! handle unaligned output
1143 .word 0x95b00902 !faligndata %f0,%f2,%f10
1144 .word 0x99b08904 !faligndata %f2,%f4,%f12
1145 .word 0x9db10906 !faligndata %f4,%f6,%f14
1146 .word 0x81b18906 !faligndata %f6,%f6,%f0
1147
1148 stda %f8, [%i1 + %l3]0xc0 ! partial store
1149 std %f10, [%i1 + 8]
1150 std %f12, [%i1 + 16]
1151 std %f14, [%i1 + 24]
1152 add %i1, 32, %i1
1153 orn %g0, %l3, %l3
1154 stda %f0, [%i1 + %l3]0xc0 ! partial store
1155
1156 brnz,pt %i2, .L128_xts_enloop2x+4
1157 orn %g0, %l3, %l3
1158
1159 .word 0x81b00f04 !fsrc2 %f0,%f4,%f0
1160 .word 0x85b00f06 !fsrc2 %f0,%f6,%f2
1161 brnz,pn %i5, .L128_xts_ensteal
1162 nop
1163
1164 ret
1165 restore
1166
1167 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1168 .align 32
1169 .L128_xts_enblk:
1170 add %i1, %i2, %l5
1171 and %l5, 63, %l5 ! tail
1172 sub %i2, %l5, %i2
1173 add %l5, 15, %l5 ! round up to 16n
1174 srlx %i2, 4, %i2
1175 srl %l5, 4, %l5
1176 sub %i2, 1, %i2
1177 add %l5, 1, %l5
1178
1179 .L128_xts_enblk2x:
1180 ldx [%i0 + 0], %o0
1181 ldx [%i0 + 8], %o1
1182 ldx [%i0 + 16], %o2
1183 brz,pt %l0, 5f
1184 ldx [%i0 + 24], %o3
1185
1186 ldx [%i0 + 32], %o4
1187 sllx %o0, %l0, %o0
1188 srlx %o1, %l1, %g1
1189 or %g1, %o0, %o0
1190 sllx %o1, %l0, %o1
1191 srlx %o2, %l1, %g1
1192 or %g1, %o1, %o1
1193 sllx %o2, %l0, %o2
1194 srlx %o3, %l1, %g1
1195 or %g1, %o2, %o2
1196 sllx %o3, %l0, %o3
1197 srlx %o4, %l1, %o4
1198 or %o4, %o3, %o3
1199 5:
1200 .word 0x99b02302 !movxtod %g2,%f12
1201 .word 0x9db02303 !movxtod %g3,%f14
1202 .word 0x99b3098c !bshuffle %f12,%f12,%f12
1203 .word 0x9db3898e !bshuffle %f14,%f14,%f14
1204
1205 srax %g3, 63, %l7 ! next tweak value
1206 addcc %g2, %g2, %g2
1207 and %l7, 0x87, %l7
1208 .word 0x87b0c223 !addxc %g3,%g3,%g3
1209 xor %l7, %g2, %g2
1210
1211 .word 0x91b02302 !movxtod %g2,%f8
1212 .word 0x95b02303 !movxtod %g3,%f10
1213 .word 0x91b20988 !bshuffle %f8,%f8,%f8
1214 .word 0x95b2898a !bshuffle %f10,%f10,%f10
1215
1216 xor %g4, %o0, %o0 ! ^= rk[0]
1217 xor %g5, %o1, %o1
1218 xor %g4, %o2, %o2 ! ^= rk[0]
1219 xor %g5, %o3, %o3
1220 .word 0x81b02308 !movxtod %o0,%f0
1221 .word 0x85b02309 !movxtod %o1,%f2
1222 .word 0x89b0230a !movxtod %o2,%f4
1223 .word 0x8db0230b !movxtod %o3,%f6
1224
1225 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
1226 .word 0x85b38d82 !fxor %f14,%f2,%f2
1227 .word 0x89b20d84 !fxor %f8,%f4,%f4 ! ^= tweak[0]
1228 .word 0x8db28d86 !fxor %f10,%f6,%f6
1229
1230 prefetch [%i0 + 32+63], 20
1231 call _aes128_encrypt_2x
1232 add %i0, 32, %i0
1233
1234 .word 0x91b02302 !movxtod %g2,%f8
1235 .word 0x95b02303 !movxtod %g3,%f10
1236
1237 srax %g3, 63, %l7 ! next tweak value
1238 addcc %g2, %g2, %g2
1239 and %l7, 0x87, %l7
1240 .word 0x87b0c223 !addxc %g3,%g3,%g3
1241 xor %l7, %g2, %g2
1242
1243 .word 0x91b20988 !bshuffle %f8,%f8,%f8
1244 .word 0x95b2898a !bshuffle %f10,%f10,%f10
1245
1246 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
1247 .word 0x85b38d82 !fxor %f14,%f2,%f2
1248 .word 0x89b20d84 !fxor %f8,%f4,%f4
1249 .word 0x8db28d86 !fxor %f10,%f6,%f6
1250
1251 subcc %i2, 2, %i2
1252 stda %f0, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
1253 add %i1, 8, %i1
1254 stda %f2, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
1255 add %i1, 8, %i1
1256 stda %f4, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
1257 add %i1, 8, %i1
1258 stda %f6, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
1259 bgu,pt %xcc, .L128_xts_enblk2x
1260 add %i1, 8, %i1
1261
1262 add %l5, %i2, %i2
1263 andcc %i2, 1, %g0 ! is number of blocks even?
1264 membar #StoreLoad|#StoreStore
1265 bnz,pt %icc, .L128_xts_enloop
1266 srl %i2, 0, %i2
1267 brnz,pn %i2, .L128_xts_enloop2x
1268 nop
1269
1270 .word 0x81b00f04 !fsrc2 %f0,%f4,%f0
1271 .word 0x85b00f06 !fsrc2 %f0,%f6,%f2
1272 brnz,pn %i5, .L128_xts_ensteal
1273 nop
1274
1275 ret
1276 restore
1277 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1278 .align 32
1279 .L128_xts_ensteal:
1280 std %f0, [%fp + 2047-16] ! copy of output
1281 std %f2, [%fp + 2047-8]
1282
1283 srl %l0, 3, %l0
1284 add %fp, 2047-16, %l7
1285 add %i0, %l0, %i0 ! original %i0+%i2&-15
1286 add %i1, %l2, %i1 ! original %i1+%i2&-15
1287 mov 0, %l0
1288 nop ! align
1289
1290 .L128_xts_enstealing:
1291 ldub [%i0 + %l0], %o0
1292 ldub [%l7 + %l0], %o1
1293 dec %i5
1294 stb %o0, [%l7 + %l0]
1295 stb %o1, [%i1 + %l0]
1296 brnz %i5, .L128_xts_enstealing
1297 inc %l0
1298
1299 mov %l7, %i0
1300 sub %i1, 16, %i1
1301 mov 0, %l0
1302 sub %i1, %l2, %i1
1303 ba .L128_xts_enloop ! one more time
1304 mov 1, %i2 ! %i5 is 0
1305 ret
1306 restore
1307 .type aes128_t4_xts_encrypt,#function
1308 .size aes128_t4_xts_encrypt,.-aes128_t4_xts_encrypt
1309 .globl aes128_t4_xts_decrypt
1310 .align 32
1311 aes128_t4_xts_decrypt:
1312 save %sp, -192-16, %sp
1313 srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
1314
1315 mov %i5, %o0
1316 add %fp, 2047-16, %o1
1317 call aes_t4_encrypt
1318 mov %i4, %o2
1319
1320 add %fp, 2047-16, %l7
1321 ldxa [%l7]0x88, %g2
1322 add %fp, 2047-8, %l7
1323 ldxa [%l7]0x88, %g3 ! %g3:%g2 is tweak
1324
1325 sethi %hi(0x76543210), %l7
1326 or %l7, %lo(0x76543210), %l7
1327 .word 0x81b5c320 !bmask %l7,%g0,%g0 ! byte swap mask
1328
1329 prefetch [%i0], 20
1330 prefetch [%i0 + 63], 20
1331 call _aes128_load_deckey
1332 and %i2, 15, %i5
1333 and %i2, -16, %i2
1334 mov 0, %l7
1335 movrnz %i5, 16, %l7
1336 sub %i2, %l7, %i2
1337
1338 sub %i0, %i1, %l5 ! %i0!=%i1
1339 and %i0, 7, %l0
1340 andn %i0, 7, %i0
1341 sll %l0, 3, %l0
1342 mov 64, %l1
1343 mov 0xff, %l3
1344 sub %l1, %l0, %l1
1345 and %i1, 7, %l2
1346 cmp %i2, 255
1347 movrnz %l2, 0, %l5 ! if ( %i1&7 ||
1348 movleu %xcc, 0, %l5 ! %i2<256 ||
1349 brnz,pn %l5, .L128_xts_deblk ! %i0==%i1)
1350 srl %l3, %l2, %l3
1351
1352 andcc %i2, 16, %g0 ! is number of blocks even?
1353 brz,pn %i2, .L128_xts_desteal
1354 .word 0xb3b64340 !alignaddrl %i1,%g0,%i1
1355 bz %icc, .L128_xts_deloop2x
1356 srlx %i2, 4, %i2
1357 .L128_xts_deloop:
1358 ldx [%i0 + 0], %o0
1359 brz,pt %l0, 4f
1360 ldx [%i0 + 8], %o1
1361
1362 ldx [%i0 + 16], %o2
1363 sllx %o0, %l0, %o0
1364 srlx %o1, %l1, %g1
1365 sllx %o1, %l0, %o1
1366 or %g1, %o0, %o0
1367 srlx %o2, %l1, %o2
1368 or %o2, %o1, %o1
1369 4:
1370 .word 0x99b02302 !movxtod %g2,%f12
1371 .word 0x9db02303 !movxtod %g3,%f14
1372 .word 0x99b3098c !bshuffle %f12,%f12,%f12
1373 .word 0x9db3898e !bshuffle %f14,%f14,%f14
1374
1375 xor %g4, %o0, %o0 ! ^= rk[0]
1376 xor %g5, %o1, %o1
1377 .word 0x81b02308 !movxtod %o0,%f0
1378 .word 0x85b02309 !movxtod %o1,%f2
1379
1380 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
1381 .word 0x85b38d82 !fxor %f14,%f2,%f2
1382
1383 prefetch [%i1 + 63], 22
1384 prefetch [%i0 + 16+63], 20
1385 call _aes128_decrypt_1x
1386 add %i0, 16, %i0
1387
1388 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
1389 .word 0x85b38d82 !fxor %f14,%f2,%f2
1390
1391 srax %g3, 63, %l7 ! next tweak value
1392 addcc %g2, %g2, %g2
1393 and %l7, 0x87, %l7
1394 .word 0x87b0c223 !addxc %g3,%g3,%g3
1395 xor %l7, %g2, %g2
1396
1397 brnz,pn %l2, 2f
1398 sub %i2, 1, %i2
1399
1400 std %f0, [%i1 + 0]
1401 std %f2, [%i1 + 8]
1402 brnz,pt %i2, .L128_xts_deloop2x
1403 add %i1, 16, %i1
1404
1405 brnz,pn %i5, .L128_xts_desteal
1406 nop
1407
1408 ret
1409 restore
1410
1411 .align 16
1412 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
1413 ! and ~3x deterioration
1414 ! in inp==out case
1415 .word 0x89b00900 !faligndata %f0,%f0,%f4 ! handle unaligned output
1416 .word 0x8db00902 !faligndata %f0,%f2,%f6
1417 .word 0x91b08902 !faligndata %f2,%f2,%f8
1418 stda %f4, [%i1 + %l3]0xc0 ! partial store
1419 std %f6, [%i1 + 8]
1420 add %i1, 16, %i1
1421 orn %g0, %l3, %l3
1422 stda %f8, [%i1 + %l3]0xc0 ! partial store
1423
1424 brnz,pt %i2, .L128_xts_deloop2x+4
1425 orn %g0, %l3, %l3
1426
1427 brnz,pn %i5, .L128_xts_desteal
1428 nop
1429
1430 ret
1431 restore
1432
1433 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1434 .align 32
1435 .L128_xts_deloop2x:
1436 ldx [%i0 + 0], %o0
1437 ldx [%i0 + 8], %o1
1438 ldx [%i0 + 16], %o2
1439 brz,pt %l0, 4f
1440 ldx [%i0 + 24], %o3
1441
1442 ldx [%i0 + 32], %o4
1443 sllx %o0, %l0, %o0
1444 srlx %o1, %l1, %g1
1445 or %g1, %o0, %o0
1446 sllx %o1, %l0, %o1
1447 srlx %o2, %l1, %g1
1448 or %g1, %o1, %o1
1449 sllx %o2, %l0, %o2
1450 srlx %o3, %l1, %g1
1451 or %g1, %o2, %o2
1452 sllx %o3, %l0, %o3
1453 srlx %o4, %l1, %o4
1454 or %o4, %o3, %o3
1455 4:
1456 .word 0x99b02302 !movxtod %g2,%f12
1457 .word 0x9db02303 !movxtod %g3,%f14
1458 .word 0x99b3098c !bshuffle %f12,%f12,%f12
1459 .word 0x9db3898e !bshuffle %f14,%f14,%f14
1460
1461 srax %g3, 63, %l7 ! next tweak value
1462 addcc %g2, %g2, %g2
1463 and %l7, 0x87, %l7
1464 .word 0x87b0c223 !addxc %g3,%g3,%g3
1465 xor %l7, %g2, %g2
1466
1467 .word 0x91b02302 !movxtod %g2,%f8
1468 .word 0x95b02303 !movxtod %g3,%f10
1469 .word 0x91b20988 !bshuffle %f8,%f8,%f8
1470 .word 0x95b2898a !bshuffle %f10,%f10,%f10
1471
1472 xor %g4, %o0, %o0 ! ^= rk[0]
1473 xor %g5, %o1, %o1
1474 xor %g4, %o2, %o2 ! ^= rk[0]
1475 xor %g5, %o3, %o3
1476 .word 0x81b02308 !movxtod %o0,%f0
1477 .word 0x85b02309 !movxtod %o1,%f2
1478 .word 0x89b0230a !movxtod %o2,%f4
1479 .word 0x8db0230b !movxtod %o3,%f6
1480
1481 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
1482 .word 0x85b38d82 !fxor %f14,%f2,%f2
1483 .word 0x89b20d84 !fxor %f8,%f4,%f4 ! ^= tweak[0]
1484 .word 0x8db28d86 !fxor %f10,%f6,%f6
1485
1486 prefetch [%i1 + 63], 22
1487 prefetch [%i0 + 32+63], 20
1488 call _aes128_decrypt_2x
1489 add %i0, 32, %i0
1490
1491 .word 0x91b02302 !movxtod %g2,%f8
1492 .word 0x95b02303 !movxtod %g3,%f10
1493
1494 srax %g3, 63, %l7 ! next tweak value
1495 addcc %g2, %g2, %g2
1496 and %l7, 0x87, %l7
1497 .word 0x87b0c223 !addxc %g3,%g3,%g3
1498 xor %l7, %g2, %g2
1499
1500 .word 0x91b20988 !bshuffle %f8,%f8,%f8
1501 .word 0x95b2898a !bshuffle %f10,%f10,%f10
1502
1503 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
1504 .word 0x85b38d82 !fxor %f14,%f2,%f2
1505 .word 0x89b20d84 !fxor %f8,%f4,%f4
1506 .word 0x8db28d86 !fxor %f10,%f6,%f6
1507
1508 brnz,pn %l2, 2f
1509 sub %i2, 2, %i2
1510
1511 std %f0, [%i1 + 0]
1512 std %f2, [%i1 + 8]
1513 std %f4, [%i1 + 16]
1514 std %f6, [%i1 + 24]
1515 brnz,pt %i2, .L128_xts_deloop2x
1516 add %i1, 32, %i1
1517
1518 .word 0x81b00f04 !fsrc2 %f0,%f4,%f0
1519 .word 0x85b00f06 !fsrc2 %f0,%f6,%f2
1520 brnz,pn %i5, .L128_xts_desteal
1521 nop
1522
1523 ret
1524 restore
1525
1526 .align 16
1527 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
1528 ! and ~3x deterioration
1529 ! in inp==out case
1530 .word 0x91b00900 !faligndata %f0,%f0,%f8 ! handle unaligned output
1531 .word 0x95b00902 !faligndata %f0,%f2,%f10
1532 .word 0x99b08904 !faligndata %f2,%f4,%f12
1533 .word 0x9db10906 !faligndata %f4,%f6,%f14
1534 .word 0x81b18906 !faligndata %f6,%f6,%f0
1535
1536 stda %f8, [%i1 + %l3]0xc0 ! partial store
1537 std %f10, [%i1 + 8]
1538 std %f12, [%i1 + 16]
1539 std %f14, [%i1 + 24]
1540 add %i1, 32, %i1
1541 orn %g0, %l3, %l3
1542 stda %f0, [%i1 + %l3]0xc0 ! partial store
1543
1544 brnz,pt %i2, .L128_xts_deloop2x+4
1545 orn %g0, %l3, %l3
1546
1547 .word 0x81b00f04 !fsrc2 %f0,%f4,%f0
1548 .word 0x85b00f06 !fsrc2 %f0,%f6,%f2
1549 brnz,pn %i5, .L128_xts_desteal
1550 nop
1551
1552 ret
1553 restore
1554
1555 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1556 .align 32
1557 .L128_xts_deblk:
1558 add %i1, %i2, %l5
1559 and %l5, 63, %l5 ! tail
1560 sub %i2, %l5, %i2
1561 add %l5, 15, %l5 ! round up to 16n
1562 srlx %i2, 4, %i2
1563 srl %l5, 4, %l5
1564 sub %i2, 1, %i2
1565 add %l5, 1, %l5
1566
1567 .L128_xts_deblk2x:
1568 ldx [%i0 + 0], %o0
1569 ldx [%i0 + 8], %o1
1570 ldx [%i0 + 16], %o2
1571 brz,pt %l0, 5f
1572 ldx [%i0 + 24], %o3
1573
1574 ldx [%i0 + 32], %o4
1575 sllx %o0, %l0, %o0
1576 srlx %o1, %l1, %g1
1577 or %g1, %o0, %o0
1578 sllx %o1, %l0, %o1
1579 srlx %o2, %l1, %g1
1580 or %g1, %o1, %o1
1581 sllx %o2, %l0, %o2
1582 srlx %o3, %l1, %g1
1583 or %g1, %o2, %o2
1584 sllx %o3, %l0, %o3
1585 srlx %o4, %l1, %o4
1586 or %o4, %o3, %o3
1587 5:
1588 .word 0x99b02302 !movxtod %g2,%f12
1589 .word 0x9db02303 !movxtod %g3,%f14
1590 .word 0x99b3098c !bshuffle %f12,%f12,%f12
1591 .word 0x9db3898e !bshuffle %f14,%f14,%f14
1592
1593 srax %g3, 63, %l7 ! next tweak value
1594 addcc %g2, %g2, %g2
1595 and %l7, 0x87, %l7
1596 .word 0x87b0c223 !addxc %g3,%g3,%g3
1597 xor %l7, %g2, %g2
1598
1599 .word 0x91b02302 !movxtod %g2,%f8
1600 .word 0x95b02303 !movxtod %g3,%f10
1601 .word 0x91b20988 !bshuffle %f8,%f8,%f8
1602 .word 0x95b2898a !bshuffle %f10,%f10,%f10
1603
1604 xor %g4, %o0, %o0 ! ^= rk[0]
1605 xor %g5, %o1, %o1
1606 xor %g4, %o2, %o2 ! ^= rk[0]
1607 xor %g5, %o3, %o3
1608 .word 0x81b02308 !movxtod %o0,%f0
1609 .word 0x85b02309 !movxtod %o1,%f2
1610 .word 0x89b0230a !movxtod %o2,%f4
1611 .word 0x8db0230b !movxtod %o3,%f6
1612
1613 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
1614 .word 0x85b38d82 !fxor %f14,%f2,%f2
1615 .word 0x89b20d84 !fxor %f8,%f4,%f4 ! ^= tweak[0]
1616 .word 0x8db28d86 !fxor %f10,%f6,%f6
1617
1618 prefetch [%i0 + 32+63], 20
1619 call _aes128_decrypt_2x
1620 add %i0, 32, %i0
1621
1622 .word 0x91b02302 !movxtod %g2,%f8
1623 .word 0x95b02303 !movxtod %g3,%f10
1624
1625 srax %g3, 63, %l7 ! next tweak value
1626 addcc %g2, %g2, %g2
1627 and %l7, 0x87, %l7
1628 .word 0x87b0c223 !addxc %g3,%g3,%g3
1629 xor %l7, %g2, %g2
1630
1631 .word 0x91b20988 !bshuffle %f8,%f8,%f8
1632 .word 0x95b2898a !bshuffle %f10,%f10,%f10
1633
1634 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
1635 .word 0x85b38d82 !fxor %f14,%f2,%f2
1636 .word 0x89b20d84 !fxor %f8,%f4,%f4
1637 .word 0x8db28d86 !fxor %f10,%f6,%f6
1638
1639 subcc %i2, 2, %i2
1640 stda %f0, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
1641 add %i1, 8, %i1
1642 stda %f2, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
1643 add %i1, 8, %i1
1644 stda %f4, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
1645 add %i1, 8, %i1
1646 stda %f6, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
1647 bgu,pt %xcc, .L128_xts_deblk2x
1648 add %i1, 8, %i1
1649
1650 add %l5, %i2, %i2
1651 andcc %i2, 1, %g0 ! is number of blocks even?
1652 membar #StoreLoad|#StoreStore
1653 bnz,pt %icc, .L128_xts_deloop
1654 srl %i2, 0, %i2
1655 brnz,pn %i2, .L128_xts_deloop2x
1656 nop
1657
1658 .word 0x81b00f04 !fsrc2 %f0,%f4,%f0
1659 .word 0x85b00f06 !fsrc2 %f0,%f6,%f2
1660 brnz,pn %i5, .L128_xts_desteal
1661 nop
1662
1663 ret
1664 restore
1665 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1666 .align 32
1667 .L128_xts_desteal:
1668 ldx [%i0 + 0], %o0
1669 brz,pt %l0, 8f
1670 ldx [%i0 + 8], %o1
1671
1672 ldx [%i0 + 16], %o2
1673 sllx %o0, %l0, %o0
1674 srlx %o1, %l1, %g1
1675 sllx %o1, %l0, %o1
1676 or %g1, %o0, %o0
1677 srlx %o2, %l1, %o2
1678 or %o2, %o1, %o1
1679 8:
1680 srax %g3, 63, %l7 ! next tweak value
1681 addcc %g2, %g2, %o2
1682 and %l7, 0x87, %l7
1683 .word 0x97b0c223 !addxc %g3,%g3,%o3
1684 xor %l7, %o2, %o2
1685
1686 .word 0x99b0230a !movxtod %o2,%f12
1687 .word 0x9db0230b !movxtod %o3,%f14
1688 .word 0x99b3098c !bshuffle %f12,%f12,%f12
1689 .word 0x9db3898e !bshuffle %f14,%f14,%f14
1690
1691 xor %g4, %o0, %o0 ! ^= rk[0]
1692 xor %g5, %o1, %o1
1693 .word 0x81b02308 !movxtod %o0,%f0
1694 .word 0x85b02309 !movxtod %o1,%f2
1695
1696 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
1697 .word 0x85b38d82 !fxor %f14,%f2,%f2
1698
1699 call _aes128_decrypt_1x
1700 add %i0, 16, %i0
1701
1702 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
1703 .word 0x85b38d82 !fxor %f14,%f2,%f2
1704
1705 std %f0, [%fp + 2047-16]
1706 std %f2, [%fp + 2047-8]
1707
1708 srl %l0, 3, %l0
1709 add %fp, 2047-16, %l7
1710 add %i0, %l0, %i0 ! original %i0+%i2&-15
1711 add %i1, %l2, %i1 ! original %i1+%i2&-15
1712 mov 0, %l0
1713 add %i1, 16, %i1
1714 nop ! align
1715
1716 .L128_xts_destealing:
1717 ldub [%i0 + %l0], %o0
1718 ldub [%l7 + %l0], %o1
1719 dec %i5
1720 stb %o0, [%l7 + %l0]
1721 stb %o1, [%i1 + %l0]
1722 brnz %i5, .L128_xts_destealing
1723 inc %l0
1724
1725 mov %l7, %i0
1726 sub %i1, 16, %i1
1727 mov 0, %l0
1728 sub %i1, %l2, %i1
1729 ba .L128_xts_deloop ! one more time
1730 mov 1, %i2 ! %i5 is 0
1731 ret
1732 restore
1733 .type aes128_t4_xts_decrypt,#function
1734 .size aes128_t4_xts_decrypt,.-aes128_t4_xts_decrypt
1735 .globl aes128_t4_cbc_decrypt
1736 .align 32
1737 aes128_t4_cbc_decrypt:
1738 save %sp, -192, %sp
1739 cmp %i2, 0
1740 be,pn %xcc, .L128_cbc_dec_abort
1741 srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
1742 sub %i0, %i1, %l5 ! %i0!=%i1
1743 ld [%i4 + 0], %f12 ! load ivec
1744 ld [%i4 + 4], %f13
1745 ld [%i4 + 8], %f14
1746 ld [%i4 + 12], %f15
1747 prefetch [%i0], 20
1748 prefetch [%i0 + 63], 20
1749 call _aes128_load_deckey
1750 and %i0, 7, %l0
1751 andn %i0, 7, %i0
1752 sll %l0, 3, %l0
1753 mov 64, %l1
1754 mov 0xff, %l3
1755 sub %l1, %l0, %l1
1756 and %i1, 7, %l2
1757 cmp %i2, 255
1758 movrnz %l2, 0, %l5 ! if ( %i1&7 ||
1759 movleu %xcc, 0, %l5 ! %i2<256 ||
1760 brnz,pn %l5, .L128cbc_dec_blk ! %i0==%i1)
1761 srl %l3, %l2, %l3
1762
1763 andcc %i2, 16, %g0 ! is number of blocks even?
1764 srlx %i2, 4, %i2
1765 .word 0xb3b64340 !alignaddrl %i1,%g0,%i1
1766 bz %icc, .L128_cbc_dec_loop2x
1767 prefetch [%i1], 22
1768 .L128_cbc_dec_loop:
1769 ldx [%i0 + 0], %o0
1770 brz,pt %l0, 4f
1771 ldx [%i0 + 8], %o1
1772
1773 ldx [%i0 + 16], %o2
1774 sllx %o0, %l0, %o0
1775 srlx %o1, %l1, %g1
1776 sllx %o1, %l0, %o1
1777 or %g1, %o0, %o0
1778 srlx %o2, %l1, %o2
1779 or %o2, %o1, %o1
1780 4:
1781 xor %g4, %o0, %o2 ! ^= rk[0]
1782 xor %g5, %o1, %o3
1783 .word 0x81b0230a !movxtod %o2,%f0
1784 .word 0x85b0230b !movxtod %o3,%f2
1785
1786 prefetch [%i1 + 63], 22
1787 prefetch [%i0 + 16+63], 20
1788 call _aes128_decrypt_1x
1789 add %i0, 16, %i0
1790
1791 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= ivec
1792 .word 0x85b38d82 !fxor %f14,%f2,%f2
1793 .word 0x99b02308 !movxtod %o0,%f12
1794 .word 0x9db02309 !movxtod %o1,%f14
1795
1796 brnz,pn %l2, 2f
1797 sub %i2, 1, %i2
1798
1799 std %f0, [%i1 + 0]
1800 std %f2, [%i1 + 8]
1801 brnz,pt %i2, .L128_cbc_dec_loop2x
1802 add %i1, 16, %i1
1803 st %f12, [%i4 + 0]
1804 st %f13, [%i4 + 4]
1805 st %f14, [%i4 + 8]
1806 st %f15, [%i4 + 12]
1807 .L128_cbc_dec_abort:
1808 ret
1809 restore
1810
1811 .align 16
1812 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
1813 ! and ~3x deterioration
1814 ! in inp==out case
1815 .word 0x89b00900 !faligndata %f0,%f0,%f4 ! handle unaligned output
1816 .word 0x8db00902 !faligndata %f0,%f2,%f6
1817 .word 0x91b08902 !faligndata %f2,%f2,%f8
1818
1819 stda %f4, [%i1 + %l3]0xc0 ! partial store
1820 std %f6, [%i1 + 8]
1821 add %i1, 16, %i1
1822 orn %g0, %l3, %l3
1823 stda %f8, [%i1 + %l3]0xc0 ! partial store
1824
1825 brnz,pt %i2, .L128_cbc_dec_loop2x+4
1826 orn %g0, %l3, %l3
1827 st %f12, [%i4 + 0]
1828 st %f13, [%i4 + 4]
1829 st %f14, [%i4 + 8]
1830 st %f15, [%i4 + 12]
1831 ret
1832 restore
1833
1834 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1835 .align 32
1836 .L128_cbc_dec_loop2x:
1837 ldx [%i0 + 0], %o0
1838 ldx [%i0 + 8], %o1
1839 ldx [%i0 + 16], %o2
1840 brz,pt %l0, 4f
1841 ldx [%i0 + 24], %o3
1842
1843 ldx [%i0 + 32], %o4
1844 sllx %o0, %l0, %o0
1845 srlx %o1, %l1, %g1
1846 or %g1, %o0, %o0
1847 sllx %o1, %l0, %o1
1848 srlx %o2, %l1, %g1
1849 or %g1, %o1, %o1
1850 sllx %o2, %l0, %o2
1851 srlx %o3, %l1, %g1
1852 or %g1, %o2, %o2
1853 sllx %o3, %l0, %o3
1854 srlx %o4, %l1, %o4
1855 or %o4, %o3, %o3
1856 4:
1857 xor %g4, %o0, %o4 ! ^= rk[0]
1858 xor %g5, %o1, %o5
1859 .word 0x81b0230c !movxtod %o4,%f0
1860 .word 0x85b0230d !movxtod %o5,%f2
1861 xor %g4, %o2, %o4
1862 xor %g5, %o3, %o5
1863 .word 0x89b0230c !movxtod %o4,%f4
1864 .word 0x8db0230d !movxtod %o5,%f6
1865
1866 prefetch [%i1 + 63], 22
1867 prefetch [%i0 + 32+63], 20
1868 call _aes128_decrypt_2x
1869 add %i0, 32, %i0
1870
1871 .word 0x91b02308 !movxtod %o0,%f8
1872 .word 0x95b02309 !movxtod %o1,%f10
1873 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= ivec
1874 .word 0x85b38d82 !fxor %f14,%f2,%f2
1875 .word 0x99b0230a !movxtod %o2,%f12
1876 .word 0x9db0230b !movxtod %o3,%f14
1877 .word 0x89b20d84 !fxor %f8,%f4,%f4
1878 .word 0x8db28d86 !fxor %f10,%f6,%f6
1879
1880 brnz,pn %l2, 2f
1881 sub %i2, 2, %i2
1882
1883 std %f0, [%i1 + 0]
1884 std %f2, [%i1 + 8]
1885 std %f4, [%i1 + 16]
1886 std %f6, [%i1 + 24]
1887 brnz,pt %i2, .L128_cbc_dec_loop2x
1888 add %i1, 32, %i1
1889 st %f12, [%i4 + 0]
1890 st %f13, [%i4 + 4]
1891 st %f14, [%i4 + 8]
1892 st %f15, [%i4 + 12]
1893 ret
1894 restore
1895
1896 .align 16
1897 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
1898 ! and ~3x deterioration
1899 ! in inp==out case
1900 .word 0x91b00900 !faligndata %f0,%f0,%f8 ! handle unaligned output
1901 .word 0x81b00902 !faligndata %f0,%f2,%f0
1902 .word 0x85b08904 !faligndata %f2,%f4,%f2
1903 .word 0x89b10906 !faligndata %f4,%f6,%f4
1904 .word 0x8db18906 !faligndata %f6,%f6,%f6
1905 stda %f8, [%i1 + %l3]0xc0 ! partial store
1906 std %f0, [%i1 + 8]
1907 std %f2, [%i1 + 16]
1908 std %f4, [%i1 + 24]
1909 add %i1, 32, %i1
1910 orn %g0, %l3, %l3
1911 stda %f6, [%i1 + %l3]0xc0 ! partial store
1912
1913 brnz,pt %i2, .L128_cbc_dec_loop2x+4
1914 orn %g0, %l3, %l3
1915 st %f12, [%i4 + 0]
1916 st %f13, [%i4 + 4]
1917 st %f14, [%i4 + 8]
1918 st %f15, [%i4 + 12]
1919 ret
1920 restore
1921
1922 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1923 .align 32
1924 .L128cbc_dec_blk:
1925 add %i1, %i2, %l5
1926 and %l5, 63, %l5 ! tail
1927 sub %i2, %l5, %i2
1928 add %l5, 15, %l5 ! round up to 16n
1929 srlx %i2, 4, %i2
1930 srl %l5, 4, %l5
1931 sub %i2, 1, %i2
1932 add %l5, 1, %l5
1933
1934 .L128_cbc_dec_blk_loop2x:
1935 ldx [%i0 + 0], %o0
1936 ldx [%i0 + 8], %o1
1937 ldx [%i0 + 16], %o2
1938 brz,pt %l0, 5f
1939 ldx [%i0 + 24], %o3
1940
1941 ldx [%i0 + 32], %o4
1942 sllx %o0, %l0, %o0
1943 srlx %o1, %l1, %g1
1944 or %g1, %o0, %o0
1945 sllx %o1, %l0, %o1
1946 srlx %o2, %l1, %g1
1947 or %g1, %o1, %o1
1948 sllx %o2, %l0, %o2
1949 srlx %o3, %l1, %g1
1950 or %g1, %o2, %o2
1951 sllx %o3, %l0, %o3
1952 srlx %o4, %l1, %o4
1953 or %o4, %o3, %o3
1954 5:
1955 xor %g4, %o0, %o4 ! ^= rk[0]
1956 xor %g5, %o1, %o5
1957 .word 0x81b0230c !movxtod %o4,%f0
1958 .word 0x85b0230d !movxtod %o5,%f2
1959 xor %g4, %o2, %o4
1960 xor %g5, %o3, %o5
1961 .word 0x89b0230c !movxtod %o4,%f4
1962 .word 0x8db0230d !movxtod %o5,%f6
1963
1964 prefetch [%i0 + 32+63], 20
1965 call _aes128_decrypt_2x
1966 add %i0, 32, %i0
1967 subcc %i2, 2, %i2
1968
1969 .word 0x91b02308 !movxtod %o0,%f8
1970 .word 0x95b02309 !movxtod %o1,%f10
1971 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= ivec
1972 .word 0x85b38d82 !fxor %f14,%f2,%f2
1973 .word 0x99b0230a !movxtod %o2,%f12
1974 .word 0x9db0230b !movxtod %o3,%f14
1975 .word 0x89b20d84 !fxor %f8,%f4,%f4
1976 .word 0x8db28d86 !fxor %f10,%f6,%f6
1977
1978 stda %f0, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
1979 add %i1, 8, %i1
1980 stda %f2, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
1981 add %i1, 8, %i1
1982 stda %f4, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
1983 add %i1, 8, %i1
1984 stda %f6, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
1985 bgu,pt %xcc, .L128_cbc_dec_blk_loop2x
1986 add %i1, 8, %i1
1987
1988 add %l5, %i2, %i2
1989 andcc %i2, 1, %g0 ! is number of blocks even?
1990 membar #StoreLoad|#StoreStore
1991 bnz,pt %icc, .L128_cbc_dec_loop
1992 srl %i2, 0, %i2
1993 brnz,pn %i2, .L128_cbc_dec_loop2x
1994 nop
1995 st %f12, [%i4 + 0] ! write out ivec
1996 st %f13, [%i4 + 4]
1997 st %f14, [%i4 + 8]
1998 st %f15, [%i4 + 12]
1999 ret
2000 restore
2001 .type aes128_t4_cbc_decrypt,#function
2002 .size aes128_t4_cbc_decrypt,.-aes128_t4_cbc_decrypt
2003 .align 32
2004 _aes128_decrypt_1x:
2005 .word 0x88cc0440 !aes_dround01 %f16,%f0,%f2,%f4
2006 .word 0x84cc8460 !aes_dround23 %f18,%f0,%f2,%f2
2007 .word 0x80cd0444 !aes_dround01 %f20,%f4,%f2,%f0
2008 .word 0x84cd8464 !aes_dround23 %f22,%f4,%f2,%f2
2009 .word 0x88ce0440 !aes_dround01 %f24,%f0,%f2,%f4
2010 .word 0x84ce8460 !aes_dround23 %f26,%f0,%f2,%f2
2011 .word 0x80cf0444 !aes_dround01 %f28,%f4,%f2,%f0
2012 .word 0x84cf8464 !aes_dround23 %f30,%f4,%f2,%f2
2013 .word 0x88c84440 !aes_dround01 %f32,%f0,%f2,%f4
2014 .word 0x84c8c460 !aes_dround23 %f34,%f0,%f2,%f2
2015 .word 0x80c94444 !aes_dround01 %f36,%f4,%f2,%f0
2016 .word 0x84c9c464 !aes_dround23 %f38,%f4,%f2,%f2
2017 .word 0x88ca4440 !aes_dround01 %f40,%f0,%f2,%f4
2018 .word 0x84cac460 !aes_dround23 %f42,%f0,%f2,%f2
2019 .word 0x80cb4444 !aes_dround01 %f44,%f4,%f2,%f0
2020 .word 0x84cbc464 !aes_dround23 %f46,%f4,%f2,%f2
2021 .word 0x88cc4440 !aes_dround01 %f48,%f0,%f2,%f4
2022 .word 0x84ccc460 !aes_dround23 %f50,%f0,%f2,%f2
2023 .word 0x80cd44c4 !aes_dround01_l %f52,%f4,%f2,%f0
2024 retl
2025 .word 0x84cdc4e4 !aes_dround23_l %f54,%f4,%f2,%f2
2026 .type _aes128_decrypt_1x,#function
2027 .size _aes128_decrypt_1x,.-_aes128_decrypt_1x
2028
2029 .align 32
2030 _aes128_decrypt_2x:
2031 .word 0x90cc0440 !aes_dround01 %f16,%f0,%f2,%f8
2032 .word 0x84cc8460 !aes_dround23 %f18,%f0,%f2,%f2
2033 .word 0x94cc0c44 !aes_dround01 %f16,%f4,%f6,%f10
2034 .word 0x8ccc8c64 !aes_dround23 %f18,%f4,%f6,%f6
2035 .word 0x80cd0448 !aes_dround01 %f20,%f8,%f2,%f0
2036 .word 0x84cd8468 !aes_dround23 %f22,%f8,%f2,%f2
2037 .word 0x88cd0c4a !aes_dround01 %f20,%f10,%f6,%f4
2038 .word 0x8ccd8c6a !aes_dround23 %f22,%f10,%f6,%f6
2039 .word 0x90ce0440 !aes_dround01 %f24,%f0,%f2,%f8
2040 .word 0x84ce8460 !aes_dround23 %f26,%f0,%f2,%f2
2041 .word 0x94ce0c44 !aes_dround01 %f24,%f4,%f6,%f10
2042 .word 0x8cce8c64 !aes_dround23 %f26,%f4,%f6,%f6
2043 .word 0x80cf0448 !aes_dround01 %f28,%f8,%f2,%f0
2044 .word 0x84cf8468 !aes_dround23 %f30,%f8,%f2,%f2
2045 .word 0x88cf0c4a !aes_dround01 %f28,%f10,%f6,%f4
2046 .word 0x8ccf8c6a !aes_dround23 %f30,%f10,%f6,%f6
2047 .word 0x90c84440 !aes_dround01 %f32,%f0,%f2,%f8
2048 .word 0x84c8c460 !aes_dround23 %f34,%f0,%f2,%f2
2049 .word 0x94c84c44 !aes_dround01 %f32,%f4,%f6,%f10
2050 .word 0x8cc8cc64 !aes_dround23 %f34,%f4,%f6,%f6
2051 .word 0x80c94448 !aes_dround01 %f36,%f8,%f2,%f0
2052 .word 0x84c9c468 !aes_dround23 %f38,%f8,%f2,%f2
2053 .word 0x88c94c4a !aes_dround01 %f36,%f10,%f6,%f4
2054 .word 0x8cc9cc6a !aes_dround23 %f38,%f10,%f6,%f6
2055 .word 0x90ca4440 !aes_dround01 %f40,%f0,%f2,%f8
2056 .word 0x84cac460 !aes_dround23 %f42,%f0,%f2,%f2
2057 .word 0x94ca4c44 !aes_dround01 %f40,%f4,%f6,%f10
2058 .word 0x8ccacc64 !aes_dround23 %f42,%f4,%f6,%f6
2059 .word 0x80cb4448 !aes_dround01 %f44,%f8,%f2,%f0
2060 .word 0x84cbc468 !aes_dround23 %f46,%f8,%f2,%f2
2061 .word 0x88cb4c4a !aes_dround01 %f44,%f10,%f6,%f4
2062 .word 0x8ccbcc6a !aes_dround23 %f46,%f10,%f6,%f6
2063 .word 0x90cc4440 !aes_dround01 %f48,%f0,%f2,%f8
2064 .word 0x84ccc460 !aes_dround23 %f50,%f0,%f2,%f2
2065 .word 0x94cc4c44 !aes_dround01 %f48,%f4,%f6,%f10
2066 .word 0x8ccccc64 !aes_dround23 %f50,%f4,%f6,%f6
2067 .word 0x80cd44c8 !aes_dround01_l %f52,%f8,%f2,%f0
2068 .word 0x84cdc4e8 !aes_dround23_l %f54,%f8,%f2,%f2
2069 .word 0x88cd4cca !aes_dround01_l %f52,%f10,%f6,%f4
2070 retl
2071 .word 0x8ccdccea !aes_dround23_l %f54,%f10,%f6,%f6
2072 .type _aes128_decrypt_2x,#function
2073 .size _aes128_decrypt_2x,.-_aes128_decrypt_2x
2074 .align 32
2075 _aes192_encrypt_1x:
2076 .word 0x88cc0400 !aes_eround01 %f16,%f0,%f2,%f4
2077 .word 0x84cc8420 !aes_eround23 %f18,%f0,%f2,%f2
2078 .word 0x80cd0404 !aes_eround01 %f20,%f4,%f2,%f0
2079 .word 0x84cd8424 !aes_eround23 %f22,%f4,%f2,%f2
2080 .word 0x88ce0400 !aes_eround01 %f24,%f0,%f2,%f4
2081 .word 0x84ce8420 !aes_eround23 %f26,%f0,%f2,%f2
2082 .word 0x80cf0404 !aes_eround01 %f28,%f4,%f2,%f0
2083 .word 0x84cf8424 !aes_eround23 %f30,%f4,%f2,%f2
2084 .word 0x88c84400 !aes_eround01 %f32,%f0,%f2,%f4
2085 .word 0x84c8c420 !aes_eround23 %f34,%f0,%f2,%f2
2086 .word 0x80c94404 !aes_eround01 %f36,%f4,%f2,%f0
2087 .word 0x84c9c424 !aes_eround23 %f38,%f4,%f2,%f2
2088 .word 0x88ca4400 !aes_eround01 %f40,%f0,%f2,%f4
2089 .word 0x84cac420 !aes_eround23 %f42,%f0,%f2,%f2
2090 .word 0x80cb4404 !aes_eround01 %f44,%f4,%f2,%f0
2091 .word 0x84cbc424 !aes_eround23 %f46,%f4,%f2,%f2
2092 .word 0x88cc4400 !aes_eround01 %f48,%f0,%f2,%f4
2093 .word 0x84ccc420 !aes_eround23 %f50,%f0,%f2,%f2
2094 .word 0x80cd4404 !aes_eround01 %f52,%f4,%f2,%f0
2095 .word 0x84cdc424 !aes_eround23 %f54,%f4,%f2,%f2
2096 .word 0x88ce4400 !aes_eround01 %f56,%f0,%f2,%f4
2097 .word 0x84cec420 !aes_eround23 %f58,%f0,%f2,%f2
2098 .word 0x80cf4484 !aes_eround01_l %f60,%f4,%f2,%f0
2099 retl
2100 .word 0x84cfc4a4 !aes_eround23_l %f62,%f4,%f2,%f2
2101 .type _aes192_encrypt_1x,#function
2102 .size _aes192_encrypt_1x,.-_aes192_encrypt_1x
2103
2104 .align 32
2105 _aes192_encrypt_2x:
2106 .word 0x90cc0400 !aes_eround01 %f16,%f0,%f2,%f8
2107 .word 0x84cc8420 !aes_eround23 %f18,%f0,%f2,%f2
2108 .word 0x94cc0c04 !aes_eround01 %f16,%f4,%f6,%f10
2109 .word 0x8ccc8c24 !aes_eround23 %f18,%f4,%f6,%f6
2110 .word 0x80cd0408 !aes_eround01 %f20,%f8,%f2,%f0
2111 .word 0x84cd8428 !aes_eround23 %f22,%f8,%f2,%f2
2112 .word 0x88cd0c0a !aes_eround01 %f20,%f10,%f6,%f4
2113 .word 0x8ccd8c2a !aes_eround23 %f22,%f10,%f6,%f6
2114 .word 0x90ce0400 !aes_eround01 %f24,%f0,%f2,%f8
2115 .word 0x84ce8420 !aes_eround23 %f26,%f0,%f2,%f2
2116 .word 0x94ce0c04 !aes_eround01 %f24,%f4,%f6,%f10
2117 .word 0x8cce8c24 !aes_eround23 %f26,%f4,%f6,%f6
2118 .word 0x80cf0408 !aes_eround01 %f28,%f8,%f2,%f0
2119 .word 0x84cf8428 !aes_eround23 %f30,%f8,%f2,%f2
2120 .word 0x88cf0c0a !aes_eround01 %f28,%f10,%f6,%f4
2121 .word 0x8ccf8c2a !aes_eround23 %f30,%f10,%f6,%f6
2122 .word 0x90c84400 !aes_eround01 %f32,%f0,%f2,%f8
2123 .word 0x84c8c420 !aes_eround23 %f34,%f0,%f2,%f2
2124 .word 0x94c84c04 !aes_eround01 %f32,%f4,%f6,%f10
2125 .word 0x8cc8cc24 !aes_eround23 %f34,%f4,%f6,%f6
2126 .word 0x80c94408 !aes_eround01 %f36,%f8,%f2,%f0
2127 .word 0x84c9c428 !aes_eround23 %f38,%f8,%f2,%f2
2128 .word 0x88c94c0a !aes_eround01 %f36,%f10,%f6,%f4
2129 .word 0x8cc9cc2a !aes_eround23 %f38,%f10,%f6,%f6
2130 .word 0x90ca4400 !aes_eround01 %f40,%f0,%f2,%f8
2131 .word 0x84cac420 !aes_eround23 %f42,%f0,%f2,%f2
2132 .word 0x94ca4c04 !aes_eround01 %f40,%f4,%f6,%f10
2133 .word 0x8ccacc24 !aes_eround23 %f42,%f4,%f6,%f6
2134 .word 0x80cb4408 !aes_eround01 %f44,%f8,%f2,%f0
2135 .word 0x84cbc428 !aes_eround23 %f46,%f8,%f2,%f2
2136 .word 0x88cb4c0a !aes_eround01 %f44,%f10,%f6,%f4
2137 .word 0x8ccbcc2a !aes_eround23 %f46,%f10,%f6,%f6
2138 .word 0x90cc4400 !aes_eround01 %f48,%f0,%f2,%f8
2139 .word 0x84ccc420 !aes_eround23 %f50,%f0,%f2,%f2
2140 .word 0x94cc4c04 !aes_eround01 %f48,%f4,%f6,%f10
2141 .word 0x8ccccc24 !aes_eround23 %f50,%f4,%f6,%f6
2142 .word 0x80cd4408 !aes_eround01 %f52,%f8,%f2,%f0
2143 .word 0x84cdc428 !aes_eround23 %f54,%f8,%f2,%f2
2144 .word 0x88cd4c0a !aes_eround01 %f52,%f10,%f6,%f4
2145 .word 0x8ccdcc2a !aes_eround23 %f54,%f10,%f6,%f6
2146 .word 0x90ce4400 !aes_eround01 %f56,%f0,%f2,%f8
2147 .word 0x84cec420 !aes_eround23 %f58,%f0,%f2,%f2
2148 .word 0x94ce4c04 !aes_eround01 %f56,%f4,%f6,%f10
2149 .word 0x8ccecc24 !aes_eround23 %f58,%f4,%f6,%f6
2150 .word 0x80cf4488 !aes_eround01_l %f60,%f8,%f2,%f0
2151 .word 0x84cfc4a8 !aes_eround23_l %f62,%f8,%f2,%f2
2152 .word 0x88cf4c8a !aes_eround01_l %f60,%f10,%f6,%f4
2153 retl
2154 .word 0x8ccfccaa !aes_eround23_l %f62,%f10,%f6,%f6
2155 .type _aes192_encrypt_2x,#function
2156 .size _aes192_encrypt_2x,.-_aes192_encrypt_2x
2157
2158 .align 32
2159 _aes256_encrypt_1x:
2160 .word 0x88cc0400 !aes_eround01 %f16,%f0,%f2,%f4
2161 .word 0x84cc8420 !aes_eround23 %f18,%f0,%f2,%f2
2162 ldd [%i3 + 208], %f16
2163 ldd [%i3 + 216], %f18
2164 .word 0x80cd0404 !aes_eround01 %f20,%f4,%f2,%f0
2165 .word 0x84cd8424 !aes_eround23 %f22,%f4,%f2,%f2
2166 ldd [%i3 + 224], %f20
2167 ldd [%i3 + 232], %f22
2168 .word 0x88ce0400 !aes_eround01 %f24,%f0,%f2,%f4
2169 .word 0x84ce8420 !aes_eround23 %f26,%f0,%f2,%f2
2170 .word 0x80cf0404 !aes_eround01 %f28,%f4,%f2,%f0
2171 .word 0x84cf8424 !aes_eround23 %f30,%f4,%f2,%f2
2172 .word 0x88c84400 !aes_eround01 %f32,%f0,%f2,%f4
2173 .word 0x84c8c420 !aes_eround23 %f34,%f0,%f2,%f2
2174 .word 0x80c94404 !aes_eround01 %f36,%f4,%f2,%f0
2175 .word 0x84c9c424 !aes_eround23 %f38,%f4,%f2,%f2
2176 .word 0x88ca4400 !aes_eround01 %f40,%f0,%f2,%f4
2177 .word 0x84cac420 !aes_eround23 %f42,%f0,%f2,%f2
2178 .word 0x80cb4404 !aes_eround01 %f44,%f4,%f2,%f0
2179 .word 0x84cbc424 !aes_eround23 %f46,%f4,%f2,%f2
2180 .word 0x88cc4400 !aes_eround01 %f48,%f0,%f2,%f4
2181 .word 0x84ccc420 !aes_eround23 %f50,%f0,%f2,%f2
2182 .word 0x80cd4404 !aes_eround01 %f52,%f4,%f2,%f0
2183 .word 0x84cdc424 !aes_eround23 %f54,%f4,%f2,%f2
2184 .word 0x88ce4400 !aes_eround01 %f56,%f0,%f2,%f4
2185 .word 0x84cec420 !aes_eround23 %f58,%f0,%f2,%f2
2186 .word 0x80cf4404 !aes_eround01 %f60,%f4,%f2,%f0
2187 .word 0x84cfc424 !aes_eround23 %f62,%f4,%f2,%f2
2188 .word 0x88cc0400 !aes_eround01 %f16,%f0,%f2,%f4
2189 .word 0x84cc8420 !aes_eround23 %f18,%f0,%f2,%f2
2190 ldd [%i3 + 16], %f16
2191 ldd [%i3 + 24], %f18
2192 .word 0x80cd0484 !aes_eround01_l %f20,%f4,%f2,%f0
2193 .word 0x84cd84a4 !aes_eround23_l %f22,%f4,%f2,%f2
2194 ldd [%i3 + 32], %f20
2195 retl
2196 ldd [%i3 + 40], %f22
2197 .type _aes256_encrypt_1x,#function
2198 .size _aes256_encrypt_1x,.-_aes256_encrypt_1x
2199
2200 .align 32
2201 _aes256_encrypt_2x:
2202 .word 0x90cc0400 !aes_eround01 %f16,%f0,%f2,%f8
2203 .word 0x84cc8420 !aes_eround23 %f18,%f0,%f2,%f2
2204 .word 0x94cc0c04 !aes_eround01 %f16,%f4,%f6,%f10
2205 .word 0x8ccc8c24 !aes_eround23 %f18,%f4,%f6,%f6
2206 ldd [%i3 + 208], %f16
2207 ldd [%i3 + 216], %f18
2208 .word 0x80cd0408 !aes_eround01 %f20,%f8,%f2,%f0
2209 .word 0x84cd8428 !aes_eround23 %f22,%f8,%f2,%f2
2210 .word 0x88cd0c0a !aes_eround01 %f20,%f10,%f6,%f4
2211 .word 0x8ccd8c2a !aes_eround23 %f22,%f10,%f6,%f6
2212 ldd [%i3 + 224], %f20
2213 ldd [%i3 + 232], %f22
2214 .word 0x90ce0400 !aes_eround01 %f24,%f0,%f2,%f8
2215 .word 0x84ce8420 !aes_eround23 %f26,%f0,%f2,%f2
2216 .word 0x94ce0c04 !aes_eround01 %f24,%f4,%f6,%f10
2217 .word 0x8cce8c24 !aes_eround23 %f26,%f4,%f6,%f6
2218 .word 0x80cf0408 !aes_eround01 %f28,%f8,%f2,%f0
2219 .word 0x84cf8428 !aes_eround23 %f30,%f8,%f2,%f2
2220 .word 0x88cf0c0a !aes_eround01 %f28,%f10,%f6,%f4
2221 .word 0x8ccf8c2a !aes_eround23 %f30,%f10,%f6,%f6
2222 .word 0x90c84400 !aes_eround01 %f32,%f0,%f2,%f8
2223 .word 0x84c8c420 !aes_eround23 %f34,%f0,%f2,%f2
2224 .word 0x94c84c04 !aes_eround01 %f32,%f4,%f6,%f10
2225 .word 0x8cc8cc24 !aes_eround23 %f34,%f4,%f6,%f6
2226 .word 0x80c94408 !aes_eround01 %f36,%f8,%f2,%f0
2227 .word 0x84c9c428 !aes_eround23 %f38,%f8,%f2,%f2
2228 .word 0x88c94c0a !aes_eround01 %f36,%f10,%f6,%f4
2229 .word 0x8cc9cc2a !aes_eround23 %f38,%f10,%f6,%f6
2230 .word 0x90ca4400 !aes_eround01 %f40,%f0,%f2,%f8
2231 .word 0x84cac420 !aes_eround23 %f42,%f0,%f2,%f2
2232 .word 0x94ca4c04 !aes_eround01 %f40,%f4,%f6,%f10
2233 .word 0x8ccacc24 !aes_eround23 %f42,%f4,%f6,%f6
2234 .word 0x80cb4408 !aes_eround01 %f44,%f8,%f2,%f0
2235 .word 0x84cbc428 !aes_eround23 %f46,%f8,%f2,%f2
2236 .word 0x88cb4c0a !aes_eround01 %f44,%f10,%f6,%f4
2237 .word 0x8ccbcc2a !aes_eround23 %f46,%f10,%f6,%f6
2238 .word 0x90cc4400 !aes_eround01 %f48,%f0,%f2,%f8
2239 .word 0x84ccc420 !aes_eround23 %f50,%f0,%f2,%f2
2240 .word 0x94cc4c04 !aes_eround01 %f48,%f4,%f6,%f10
2241 .word 0x8ccccc24 !aes_eround23 %f50,%f4,%f6,%f6
2242 .word 0x80cd4408 !aes_eround01 %f52,%f8,%f2,%f0
2243 .word 0x84cdc428 !aes_eround23 %f54,%f8,%f2,%f2
2244 .word 0x88cd4c0a !aes_eround01 %f52,%f10,%f6,%f4
2245 .word 0x8ccdcc2a !aes_eround23 %f54,%f10,%f6,%f6
2246 .word 0x90ce4400 !aes_eround01 %f56,%f0,%f2,%f8
2247 .word 0x84cec420 !aes_eround23 %f58,%f0,%f2,%f2
2248 .word 0x94ce4c04 !aes_eround01 %f56,%f4,%f6,%f10
2249 .word 0x8ccecc24 !aes_eround23 %f58,%f4,%f6,%f6
2250 .word 0x80cf4408 !aes_eround01 %f60,%f8,%f2,%f0
2251 .word 0x84cfc428 !aes_eround23 %f62,%f8,%f2,%f2
2252 .word 0x88cf4c0a !aes_eround01 %f60,%f10,%f6,%f4
2253 .word 0x8ccfcc2a !aes_eround23 %f62,%f10,%f6,%f6
2254 .word 0x90cc0400 !aes_eround01 %f16,%f0,%f2,%f8
2255 .word 0x84cc8420 !aes_eround23 %f18,%f0,%f2,%f2
2256 .word 0x94cc0c04 !aes_eround01 %f16,%f4,%f6,%f10
2257 .word 0x8ccc8c24 !aes_eround23 %f18,%f4,%f6,%f6
2258 ldd [%i3 + 16], %f16
2259 ldd [%i3 + 24], %f18
2260 .word 0x80cd0488 !aes_eround01_l %f20,%f8,%f2,%f0
2261 .word 0x84cd84a8 !aes_eround23_l %f22,%f8,%f2,%f2
2262 .word 0x88cd0c8a !aes_eround01_l %f20,%f10,%f6,%f4
2263 .word 0x8ccd8caa !aes_eround23_l %f22,%f10,%f6,%f6
2264 ldd [%i3 + 32], %f20
2265 retl
2266 ldd [%i3 + 40], %f22
2267 .type _aes256_encrypt_2x,#function
2268 .size _aes256_encrypt_2x,.-_aes256_encrypt_2x
2269
2270 .align 32
2271 _aes192_loadkey:
2272 ldx [%i3 + 0], %g4
2273 ldx [%i3 + 8], %g5
2274 ldd [%i3 + 16], %f16
2275 ldd [%i3 + 24], %f18
2276 ldd [%i3 + 32], %f20
2277 ldd [%i3 + 40], %f22
2278 ldd [%i3 + 48], %f24
2279 ldd [%i3 + 56], %f26
2280 ldd [%i3 + 64], %f28
2281 ldd [%i3 + 72], %f30
2282 ldd [%i3 + 80], %f32
2283 ldd [%i3 + 88], %f34
2284 ldd [%i3 + 96], %f36
2285 ldd [%i3 + 104], %f38
2286 ldd [%i3 + 112], %f40
2287 ldd [%i3 + 120], %f42
2288 ldd [%i3 + 128], %f44
2289 ldd [%i3 + 136], %f46
2290 ldd [%i3 + 144], %f48
2291 ldd [%i3 + 152], %f50
2292 ldd [%i3 + 160], %f52
2293 ldd [%i3 + 168], %f54
2294 ldd [%i3 + 176], %f56
2295 ldd [%i3 + 184], %f58
2296 ldd [%i3 + 192], %f60
2297 ldd [%i3 + 200], %f62
2298 retl
2299 nop
2300 .type _aes192_loadkey,#function
2301 .size _aes192_loadkey,.-_aes192_loadkey
2302 _aes256_loadkey=_aes192_loadkey
2303 _aes192_load_enckey=_aes192_loadkey
2304 _aes192_load_deckey=_aes192_loadkey
2305 _aes256_load_enckey=_aes192_loadkey
2306 _aes256_load_deckey=_aes192_loadkey
2307 .globl aes256_t4_cbc_encrypt
2308 .align 32
2309 aes256_t4_cbc_encrypt:
2310 save %sp, -192, %sp
2311 cmp %i2, 0
2312 be,pn %xcc, .L256_cbc_enc_abort
2313 srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
2314 sub %i0, %i1, %l5 ! %i0!=%i1
2315 ld [%i4 + 0], %f0
2316 ld [%i4 + 4], %f1
2317 ld [%i4 + 8], %f2
2318 ld [%i4 + 12], %f3
2319 prefetch [%i0], 20
2320 prefetch [%i0 + 63], 20
2321 call _aes256_load_enckey
2322 and %i0, 7, %l0
2323 andn %i0, 7, %i0
2324 sll %l0, 3, %l0
2325 mov 64, %l1
2326 mov 0xff, %l3
2327 sub %l1, %l0, %l1
2328 and %i1, 7, %l2
2329 cmp %i2, 127
2330 movrnz %l2, 0, %l5 ! if ( %i1&7 ||
2331 movleu %xcc, 0, %l5 ! %i2<128 ||
2332 brnz,pn %l5, .L256cbc_enc_blk ! %i0==%i1)
2333 srl %l3, %l2, %l3
2334
2335 .word 0xb3b64340 !alignaddrl %i1,%g0,%i1
2336 srlx %i2, 4, %i2
2337 prefetch [%i1], 22
2338
2339 .L256_cbc_enc_loop:
2340 ldx [%i0 + 0], %o0
2341 brz,pt %l0, 4f
2342 ldx [%i0 + 8], %o1
2343
2344 ldx [%i0 + 16], %o2
2345 sllx %o0, %l0, %o0
2346 srlx %o1, %l1, %g1
2347 sllx %o1, %l0, %o1
2348 or %g1, %o0, %o0
2349 srlx %o2, %l1, %o2
2350 or %o2, %o1, %o1
2351 4:
2352 xor %g4, %o0, %o0 ! ^= rk[0]
2353 xor %g5, %o1, %o1
2354 .word 0x99b02308 !movxtod %o0,%f12
2355 .word 0x9db02309 !movxtod %o1,%f14
2356
2357 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= ivec
2358 .word 0x85b38d82 !fxor %f14,%f2,%f2
2359 prefetch [%i1 + 63], 22
2360 prefetch [%i0 + 16+63], 20
2361 call _aes256_encrypt_1x
2362 add %i0, 16, %i0
2363
2364 brnz,pn %l2, 2f
2365 sub %i2, 1, %i2
2366
2367 std %f0, [%i1 + 0]
2368 std %f2, [%i1 + 8]
2369 brnz,pt %i2, .L256_cbc_enc_loop
2370 add %i1, 16, %i1
2371 st %f0, [%i4 + 0]
2372 st %f1, [%i4 + 4]
2373 st %f2, [%i4 + 8]
2374 st %f3, [%i4 + 12]
2375 .L256_cbc_enc_abort:
2376 ret
2377 restore
2378
2379 .align 16
2380 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
2381 ! and ~3x deterioration
2382 ! in inp==out case
2383 .word 0x89b00900 !faligndata %f0,%f0,%f4 ! handle unaligned output
2384 .word 0x8db00902 !faligndata %f0,%f2,%f6
2385 .word 0x91b08902 !faligndata %f2,%f2,%f8
2386
2387 stda %f4, [%i1 + %l3]0xc0 ! partial store
2388 std %f6, [%i1 + 8]
2389 add %i1, 16, %i1
2390 orn %g0, %l3, %l3
2391 stda %f8, [%i1 + %l3]0xc0 ! partial store
2392
2393 brnz,pt %i2, .L256_cbc_enc_loop+4
2394 orn %g0, %l3, %l3
2395 st %f0, [%i4 + 0]
2396 st %f1, [%i4 + 4]
2397 st %f2, [%i4 + 8]
2398 st %f3, [%i4 + 12]
2399 ret
2400 restore
2401
2402 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2403 .align 32
2404 .L256cbc_enc_blk:
2405 add %i1, %i2, %l5
2406 and %l5, 63, %l5 ! tail
2407 sub %i2, %l5, %i2
2408 add %l5, 15, %l5 ! round up to 16n
2409 srlx %i2, 4, %i2
2410 srl %l5, 4, %l5
2411
2412 .L256_cbc_enc_blk_loop:
2413 ldx [%i0 + 0], %o0
2414 brz,pt %l0, 5f
2415 ldx [%i0 + 8], %o1
2416
2417 ldx [%i0 + 16], %o2
2418 sllx %o0, %l0, %o0
2419 srlx %o1, %l1, %g1
2420 sllx %o1, %l0, %o1
2421 or %g1, %o0, %o0
2422 srlx %o2, %l1, %o2
2423 or %o2, %o1, %o1
2424 5:
2425 xor %g4, %o0, %o0 ! ^= rk[0]
2426 xor %g5, %o1, %o1
2427 .word 0x99b02308 !movxtod %o0,%f12
2428 .word 0x9db02309 !movxtod %o1,%f14
2429
2430 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= ivec
2431 .word 0x85b38d82 !fxor %f14,%f2,%f2
2432 prefetch [%i0 + 16+63], 20
2433 call _aes256_encrypt_1x
2434 add %i0, 16, %i0
2435 sub %i2, 1, %i2
2436
2437 stda %f0, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
2438 add %i1, 8, %i1
2439 stda %f2, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
2440 brnz,pt %i2, .L256_cbc_enc_blk_loop
2441 add %i1, 8, %i1
2442
2443 membar #StoreLoad|#StoreStore
2444 brnz,pt %l5, .L256_cbc_enc_loop
2445 mov %l5, %i2
2446 st %f0, [%i4 + 0]
2447 st %f1, [%i4 + 4]
2448 st %f2, [%i4 + 8]
2449 st %f3, [%i4 + 12]
2450 ret
2451 restore
2452 .type aes256_t4_cbc_encrypt,#function
2453 .size aes256_t4_cbc_encrypt,.-aes256_t4_cbc_encrypt
2454 .globl aes192_t4_cbc_encrypt
2455 .align 32
2456 aes192_t4_cbc_encrypt:
2457 save %sp, -192, %sp
2458 cmp %i2, 0
2459 be,pn %xcc, .L192_cbc_enc_abort
2460 srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
2461 sub %i0, %i1, %l5 ! %i0!=%i1
2462 ld [%i4 + 0], %f0
2463 ld [%i4 + 4], %f1
2464 ld [%i4 + 8], %f2
2465 ld [%i4 + 12], %f3
2466 prefetch [%i0], 20
2467 prefetch [%i0 + 63], 20
2468 call _aes192_load_enckey
2469 and %i0, 7, %l0
2470 andn %i0, 7, %i0
2471 sll %l0, 3, %l0
2472 mov 64, %l1
2473 mov 0xff, %l3
2474 sub %l1, %l0, %l1
2475 and %i1, 7, %l2
2476 cmp %i2, 127
2477 movrnz %l2, 0, %l5 ! if ( %i1&7 ||
2478 movleu %xcc, 0, %l5 ! %i2<128 ||
2479 brnz,pn %l5, .L192cbc_enc_blk ! %i0==%i1)
2480 srl %l3, %l2, %l3
2481
2482 .word 0xb3b64340 !alignaddrl %i1,%g0,%i1
2483 srlx %i2, 4, %i2
2484 prefetch [%i1], 22
2485
2486 .L192_cbc_enc_loop:
2487 ldx [%i0 + 0], %o0
2488 brz,pt %l0, 4f
2489 ldx [%i0 + 8], %o1
2490
2491 ldx [%i0 + 16], %o2
2492 sllx %o0, %l0, %o0
2493 srlx %o1, %l1, %g1
2494 sllx %o1, %l0, %o1
2495 or %g1, %o0, %o0
2496 srlx %o2, %l1, %o2
2497 or %o2, %o1, %o1
2498 4:
2499 xor %g4, %o0, %o0 ! ^= rk[0]
2500 xor %g5, %o1, %o1
2501 .word 0x99b02308 !movxtod %o0,%f12
2502 .word 0x9db02309 !movxtod %o1,%f14
2503
2504 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= ivec
2505 .word 0x85b38d82 !fxor %f14,%f2,%f2
2506 prefetch [%i1 + 63], 22
2507 prefetch [%i0 + 16+63], 20
2508 call _aes192_encrypt_1x
2509 add %i0, 16, %i0
2510
2511 brnz,pn %l2, 2f
2512 sub %i2, 1, %i2
2513
2514 std %f0, [%i1 + 0]
2515 std %f2, [%i1 + 8]
2516 brnz,pt %i2, .L192_cbc_enc_loop
2517 add %i1, 16, %i1
2518 st %f0, [%i4 + 0]
2519 st %f1, [%i4 + 4]
2520 st %f2, [%i4 + 8]
2521 st %f3, [%i4 + 12]
2522 .L192_cbc_enc_abort:
2523 ret
2524 restore
2525
2526 .align 16
2527 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
2528 ! and ~3x deterioration
2529 ! in inp==out case
2530 .word 0x89b00900 !faligndata %f0,%f0,%f4 ! handle unaligned output
2531 .word 0x8db00902 !faligndata %f0,%f2,%f6
2532 .word 0x91b08902 !faligndata %f2,%f2,%f8
2533
2534 stda %f4, [%i1 + %l3]0xc0 ! partial store
2535 std %f6, [%i1 + 8]
2536 add %i1, 16, %i1
2537 orn %g0, %l3, %l3
2538 stda %f8, [%i1 + %l3]0xc0 ! partial store
2539
2540 brnz,pt %i2, .L192_cbc_enc_loop+4
2541 orn %g0, %l3, %l3
2542 st %f0, [%i4 + 0]
2543 st %f1, [%i4 + 4]
2544 st %f2, [%i4 + 8]
2545 st %f3, [%i4 + 12]
2546 ret
2547 restore
2548
2549 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2550 .align 32
2551 .L192cbc_enc_blk:
2552 add %i1, %i2, %l5
2553 and %l5, 63, %l5 ! tail
2554 sub %i2, %l5, %i2
2555 add %l5, 15, %l5 ! round up to 16n
2556 srlx %i2, 4, %i2
2557 srl %l5, 4, %l5
2558
2559 .L192_cbc_enc_blk_loop:
2560 ldx [%i0 + 0], %o0
2561 brz,pt %l0, 5f
2562 ldx [%i0 + 8], %o1
2563
2564 ldx [%i0 + 16], %o2
2565 sllx %o0, %l0, %o0
2566 srlx %o1, %l1, %g1
2567 sllx %o1, %l0, %o1
2568 or %g1, %o0, %o0
2569 srlx %o2, %l1, %o2
2570 or %o2, %o1, %o1
2571 5:
2572 xor %g4, %o0, %o0 ! ^= rk[0]
2573 xor %g5, %o1, %o1
2574 .word 0x99b02308 !movxtod %o0,%f12
2575 .word 0x9db02309 !movxtod %o1,%f14
2576
2577 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= ivec
2578 .word 0x85b38d82 !fxor %f14,%f2,%f2
2579 prefetch [%i0 + 16+63], 20
2580 call _aes192_encrypt_1x
2581 add %i0, 16, %i0
2582 sub %i2, 1, %i2
2583
2584 stda %f0, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
2585 add %i1, 8, %i1
2586 stda %f2, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
2587 brnz,pt %i2, .L192_cbc_enc_blk_loop
2588 add %i1, 8, %i1
2589
2590 membar #StoreLoad|#StoreStore
2591 brnz,pt %l5, .L192_cbc_enc_loop
2592 mov %l5, %i2
2593 st %f0, [%i4 + 0]
2594 st %f1, [%i4 + 4]
2595 st %f2, [%i4 + 8]
2596 st %f3, [%i4 + 12]
2597 ret
2598 restore
2599 .type aes192_t4_cbc_encrypt,#function
2600 .size aes192_t4_cbc_encrypt,.-aes192_t4_cbc_encrypt
2601 .globl aes256_t4_ctr32_encrypt
2602 .align 32
2603 aes256_t4_ctr32_encrypt:
2604 save %sp, -192, %sp
2605 srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
2606
2607 prefetch [%i0], 20
2608 prefetch [%i0 + 63], 20
2609 call _aes256_load_enckey
2610 sllx %i2, 4, %i2
2611
2612 ld [%i4 + 0], %l4 ! counter
2613 ld [%i4 + 4], %l5
2614 ld [%i4 + 8], %l6
2615 ld [%i4 + 12], %l7
2616
2617 sllx %l4, 32, %o5
2618 or %l5, %o5, %o5
2619 sllx %l6, 32, %g1
2620 xor %o5, %g4, %g4 ! ^= rk[0]
2621 xor %g1, %g5, %g5
2622 .word 0x9db02304 !movxtod %g4,%f14 ! most significant 64 bits
2623
2624 sub %i0, %i1, %l5 ! %i0!=%i1
2625 and %i0, 7, %l0
2626 andn %i0, 7, %i0
2627 sll %l0, 3, %l0
2628 mov 64, %l1
2629 mov 0xff, %l3
2630 sub %l1, %l0, %l1
2631 and %i1, 7, %l2
2632 cmp %i2, 255
2633 movrnz %l2, 0, %l5 ! if ( %i1&7 ||
2634 movleu %xcc, 0, %l5 ! %i2<256 ||
2635 brnz,pn %l5, .L256_ctr32_blk ! %i0==%i1)
2636 srl %l3, %l2, %l3
2637
2638 andcc %i2, 16, %g0 ! is number of blocks even?
2639 .word 0xb3b64340 !alignaddrl %i1,%g0,%i1
2640 bz %icc, .L256_ctr32_loop2x
2641 srlx %i2, 4, %i2
2642 .L256_ctr32_loop:
2643 ldx [%i0 + 0], %o0
2644 brz,pt %l0, 4f
2645 ldx [%i0 + 8], %o1
2646
2647 ldx [%i0 + 16], %o2
2648 sllx %o0, %l0, %o0
2649 srlx %o1, %l1, %g1
2650 sllx %o1, %l0, %o1
2651 or %g1, %o0, %o0
2652 srlx %o2, %l1, %o2
2653 or %o2, %o1, %o1
2654 4:
2655 xor %g5, %l7, %g1 ! ^= rk[0]
2656 add %l7, 1, %l7
2657 .word 0x85b02301 !movxtod %g1,%f2
2658 srl %l7, 0, %l7 ! clruw
2659 prefetch [%i1 + 63], 22
2660 prefetch [%i0 + 16+63], 20
2661 .word 0x88cc040e !aes_eround01 %f16,%f14,%f2,%f4
2662 .word 0x84cc842e !aes_eround23 %f18,%f14,%f2,%f2
2663 call _aes256_encrypt_1x+8
2664 add %i0, 16, %i0
2665
2666 .word 0x95b02308 !movxtod %o0,%f10
2667 .word 0x99b02309 !movxtod %o1,%f12
2668 .word 0x81b28d80 !fxor %f10,%f0,%f0 ! ^= inp
2669 .word 0x85b30d82 !fxor %f12,%f2,%f2
2670
2671 brnz,pn %l2, 2f
2672 sub %i2, 1, %i2
2673
2674 std %f0, [%i1 + 0]
2675 std %f2, [%i1 + 8]
2676 brnz,pt %i2, .L256_ctr32_loop2x
2677 add %i1, 16, %i1
2678
2679 ret
2680 restore
2681
2682 .align 16
2683 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
2684 ! and ~3x deterioration
2685 ! in inp==out case
2686 .word 0x89b00900 !faligndata %f0,%f0,%f4 ! handle unaligned output
2687 .word 0x8db00902 !faligndata %f0,%f2,%f6
2688 .word 0x91b08902 !faligndata %f2,%f2,%f8
2689 stda %f4, [%i1 + %l3]0xc0 ! partial store
2690 std %f6, [%i1 + 8]
2691 add %i1, 16, %i1
2692 orn %g0, %l3, %l3
2693 stda %f8, [%i1 + %l3]0xc0 ! partial store
2694
2695 brnz,pt %i2, .L256_ctr32_loop2x+4
2696 orn %g0, %l3, %l3
2697
2698 ret
2699 restore
2700
2701 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2702 .align 32
2703 .L256_ctr32_loop2x:
2704 ldx [%i0 + 0], %o0
2705 ldx [%i0 + 8], %o1
2706 ldx [%i0 + 16], %o2
2707 brz,pt %l0, 4f
2708 ldx [%i0 + 24], %o3
2709
2710 ldx [%i0 + 32], %o4
2711 sllx %o0, %l0, %o0
2712 srlx %o1, %l1, %g1
2713 or %g1, %o0, %o0
2714 sllx %o1, %l0, %o1
2715 srlx %o2, %l1, %g1
2716 or %g1, %o1, %o1
2717 sllx %o2, %l0, %o2
2718 srlx %o3, %l1, %g1
2719 or %g1, %o2, %o2
2720 sllx %o3, %l0, %o3
2721 srlx %o4, %l1, %o4
2722 or %o4, %o3, %o3
2723 4:
2724 xor %g5, %l7, %g1 ! ^= rk[0]
2725 add %l7, 1, %l7
2726 .word 0x85b02301 !movxtod %g1,%f2
2727 srl %l7, 0, %l7 ! clruw
2728 xor %g5, %l7, %g1
2729 add %l7, 1, %l7
2730 .word 0x8db02301 !movxtod %g1,%f6
2731 srl %l7, 0, %l7 ! clruw
2732 prefetch [%i1 + 63], 22
2733 prefetch [%i0 + 32+63], 20
2734 .word 0x90cc040e !aes_eround01 %f16,%f14,%f2,%f8
2735 .word 0x84cc842e !aes_eround23 %f18,%f14,%f2,%f2
2736 .word 0x94cc0c0e !aes_eround01 %f16,%f14,%f6,%f10
2737 .word 0x8ccc8c2e !aes_eround23 %f18,%f14,%f6,%f6
2738 call _aes256_encrypt_2x+16
2739 add %i0, 32, %i0
2740
2741 .word 0x91b02308 !movxtod %o0,%f8
2742 .word 0x95b02309 !movxtod %o1,%f10
2743 .word 0x99b0230a !movxtod %o2,%f12
2744 .word 0x81b20d80 !fxor %f8,%f0,%f0 ! ^= inp
2745 .word 0x91b0230b !movxtod %o3,%f8
2746 .word 0x85b28d82 !fxor %f10,%f2,%f2
2747 .word 0x89b30d84 !fxor %f12,%f4,%f4
2748 .word 0x8db20d86 !fxor %f8,%f6,%f6
2749
2750 brnz,pn %l2, 2f
2751 sub %i2, 2, %i2
2752
2753 std %f0, [%i1 + 0]
2754 std %f2, [%i1 + 8]
2755 std %f4, [%i1 + 16]
2756 std %f6, [%i1 + 24]
2757 brnz,pt %i2, .L256_ctr32_loop2x
2758 add %i1, 32, %i1
2759
2760 ret
2761 restore
2762
2763 .align 16
2764 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
2765 ! and ~3x deterioration
2766 ! in inp==out case
2767 .word 0x91b00900 !faligndata %f0,%f0,%f8 ! handle unaligned output
2768 .word 0x81b00902 !faligndata %f0,%f2,%f0
2769 .word 0x85b08904 !faligndata %f2,%f4,%f2
2770 .word 0x89b10906 !faligndata %f4,%f6,%f4
2771 .word 0x8db18906 !faligndata %f6,%f6,%f6
2772
2773 stda %f8, [%i1 + %l3]0xc0 ! partial store
2774 std %f0, [%i1 + 8]
2775 std %f2, [%i1 + 16]
2776 std %f4, [%i1 + 24]
2777 add %i1, 32, %i1
2778 orn %g0, %l3, %l3
2779 stda %f6, [%i1 + %l3]0xc0 ! partial store
2780
2781 brnz,pt %i2, .L256_ctr32_loop2x+4
2782 orn %g0, %l3, %l3
2783
2784 ret
2785 restore
2786
2787 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2788 .align 32
2789 .L256_ctr32_blk:
2790 add %i1, %i2, %l5
2791 and %l5, 63, %l5 ! tail
2792 sub %i2, %l5, %i2
2793 add %l5, 15, %l5 ! round up to 16n
2794 srlx %i2, 4, %i2
2795 srl %l5, 4, %l5
2796 sub %i2, 1, %i2
2797 add %l5, 1, %l5
2798
2799 .L256_ctr32_blk_loop2x:
2800 ldx [%i0 + 0], %o0
2801 ldx [%i0 + 8], %o1
2802 ldx [%i0 + 16], %o2
2803 brz,pt %l0, 5f
2804 ldx [%i0 + 24], %o3
2805
2806 ldx [%i0 + 32], %o4
2807 sllx %o0, %l0, %o0
2808 srlx %o1, %l1, %g1
2809 or %g1, %o0, %o0
2810 sllx %o1, %l0, %o1
2811 srlx %o2, %l1, %g1
2812 or %g1, %o1, %o1
2813 sllx %o2, %l0, %o2
2814 srlx %o3, %l1, %g1
2815 or %g1, %o2, %o2
2816 sllx %o3, %l0, %o3
2817 srlx %o4, %l1, %o4
2818 or %o4, %o3, %o3
2819 5:
2820 xor %g5, %l7, %g1 ! ^= rk[0]
2821 add %l7, 1, %l7
2822 .word 0x85b02301 !movxtod %g1,%f2
2823 srl %l7, 0, %l7 ! clruw
2824 xor %g5, %l7, %g1
2825 add %l7, 1, %l7
2826 .word 0x8db02301 !movxtod %g1,%f6
2827 srl %l7, 0, %l7 ! clruw
2828 prefetch [%i0 + 32+63], 20
2829 .word 0x90cc040e !aes_eround01 %f16,%f14,%f2,%f8
2830 .word 0x84cc842e !aes_eround23 %f18,%f14,%f2,%f2
2831 .word 0x94cc0c0e !aes_eround01 %f16,%f14,%f6,%f10
2832 .word 0x8ccc8c2e !aes_eround23 %f18,%f14,%f6,%f6
2833 call _aes256_encrypt_2x+16
2834 add %i0, 32, %i0
2835 subcc %i2, 2, %i2
2836
2837 .word 0x91b02308 !movxtod %o0,%f8
2838 .word 0x95b02309 !movxtod %o1,%f10
2839 .word 0x99b0230a !movxtod %o2,%f12
2840 .word 0x81b20d80 !fxor %f8,%f0,%f0 ! ^= inp
2841 .word 0x91b0230b !movxtod %o3,%f8
2842 .word 0x85b28d82 !fxor %f10,%f2,%f2
2843 .word 0x89b30d84 !fxor %f12,%f4,%f4
2844 .word 0x8db20d86 !fxor %f8,%f6,%f6
2845
2846 stda %f0, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
2847 add %i1, 8, %i1
2848 stda %f2, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
2849 add %i1, 8, %i1
2850 stda %f4, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
2851 add %i1, 8, %i1
2852 stda %f6, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
2853 bgu,pt %xcc, .L256_ctr32_blk_loop2x
2854 add %i1, 8, %i1
2855
2856 add %l5, %i2, %i2
2857 andcc %i2, 1, %g0 ! is number of blocks even?
2858 membar #StoreLoad|#StoreStore
2859 bnz,pt %icc, .L256_ctr32_loop
2860 srl %i2, 0, %i2
2861 brnz,pn %i2, .L256_ctr32_loop2x
2862 nop
2863
2864 ret
2865 restore
2866 .type aes256_t4_ctr32_encrypt,#function
2867 .size aes256_t4_ctr32_encrypt,.-aes256_t4_ctr32_encrypt
2868 .globl aes256_t4_xts_encrypt
2869 .align 32
2870 aes256_t4_xts_encrypt:
2871 save %sp, -192-16, %sp
2872 srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
2873
2874 mov %i5, %o0
2875 add %fp, 2047-16, %o1
2876 call aes_t4_encrypt
2877 mov %i4, %o2
2878
2879 add %fp, 2047-16, %l7
2880 ldxa [%l7]0x88, %g2
2881 add %fp, 2047-8, %l7
2882 ldxa [%l7]0x88, %g3 ! %g3:%g2 is tweak
2883
2884 sethi %hi(0x76543210), %l7
2885 or %l7, %lo(0x76543210), %l7
2886 .word 0x81b5c320 !bmask %l7,%g0,%g0 ! byte swap mask
2887
2888 prefetch [%i0], 20
2889 prefetch [%i0 + 63], 20
2890 call _aes256_load_enckey
2891 and %i2, 15, %i5
2892 and %i2, -16, %i2
2893
2894 sub %i0, %i1, %l5 ! %i0!=%i1
2895 and %i0, 7, %l0
2896 andn %i0, 7, %i0
2897 sll %l0, 3, %l0
2898 mov 64, %l1
2899 mov 0xff, %l3
2900 sub %l1, %l0, %l1
2901 and %i1, 7, %l2
2902 cmp %i2, 255
2903 movrnz %l2, 0, %l5 ! if ( %i1&7 ||
2904 movleu %xcc, 0, %l5 ! %i2<256 ||
2905 brnz,pn %l5, .L256_xts_enblk ! %i0==%i1)
2906 srl %l3, %l2, %l3
2907
2908 andcc %i2, 16, %g0 ! is number of blocks even?
2909 .word 0xb3b64340 !alignaddrl %i1,%g0,%i1
2910 bz %icc, .L256_xts_enloop2x
2911 srlx %i2, 4, %i2
2912 .L256_xts_enloop:
2913 ldx [%i0 + 0], %o0
2914 brz,pt %l0, 4f
2915 ldx [%i0 + 8], %o1
2916
2917 ldx [%i0 + 16], %o2
2918 sllx %o0, %l0, %o0
2919 srlx %o1, %l1, %g1
2920 sllx %o1, %l0, %o1
2921 or %g1, %o0, %o0
2922 srlx %o2, %l1, %o2
2923 or %o2, %o1, %o1
2924 4:
2925 .word 0x99b02302 !movxtod %g2,%f12
2926 .word 0x9db02303 !movxtod %g3,%f14
2927 .word 0x99b3098c !bshuffle %f12,%f12,%f12
2928 .word 0x9db3898e !bshuffle %f14,%f14,%f14
2929
2930 xor %g4, %o0, %o0 ! ^= rk[0]
2931 xor %g5, %o1, %o1
2932 .word 0x81b02308 !movxtod %o0,%f0
2933 .word 0x85b02309 !movxtod %o1,%f2
2934
2935 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
2936 .word 0x85b38d82 !fxor %f14,%f2,%f2
2937
2938 prefetch [%i1 + 63], 22
2939 prefetch [%i0 + 16+63], 20
2940 call _aes256_encrypt_1x
2941 add %i0, 16, %i0
2942
2943 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
2944 .word 0x85b38d82 !fxor %f14,%f2,%f2
2945
2946 srax %g3, 63, %l7 ! next tweak value
2947 addcc %g2, %g2, %g2
2948 and %l7, 0x87, %l7
2949 .word 0x87b0c223 !addxc %g3,%g3,%g3
2950 xor %l7, %g2, %g2
2951
2952 brnz,pn %l2, 2f
2953 sub %i2, 1, %i2
2954
2955 std %f0, [%i1 + 0]
2956 std %f2, [%i1 + 8]
2957 brnz,pt %i2, .L256_xts_enloop2x
2958 add %i1, 16, %i1
2959
2960 brnz,pn %i5, .L256_xts_ensteal
2961 nop
2962
2963 ret
2964 restore
2965
2966 .align 16
2967 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
2968 ! and ~3x deterioration
2969 ! in inp==out case
2970 .word 0x89b00900 !faligndata %f0,%f0,%f4 ! handle unaligned output
2971 .word 0x8db00902 !faligndata %f0,%f2,%f6
2972 .word 0x91b08902 !faligndata %f2,%f2,%f8
2973 stda %f4, [%i1 + %l3]0xc0 ! partial store
2974 std %f6, [%i1 + 8]
2975 add %i1, 16, %i1
2976 orn %g0, %l3, %l3
2977 stda %f8, [%i1 + %l3]0xc0 ! partial store
2978
2979 brnz,pt %i2, .L256_xts_enloop2x+4
2980 orn %g0, %l3, %l3
2981
2982 brnz,pn %i5, .L256_xts_ensteal
2983 nop
2984
2985 ret
2986 restore
2987
2988 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2989 .align 32
2990 .L256_xts_enloop2x:
2991 ldx [%i0 + 0], %o0
2992 ldx [%i0 + 8], %o1
2993 ldx [%i0 + 16], %o2
2994 brz,pt %l0, 4f
2995 ldx [%i0 + 24], %o3
2996
2997 ldx [%i0 + 32], %o4
2998 sllx %o0, %l0, %o0
2999 srlx %o1, %l1, %g1
3000 or %g1, %o0, %o0
3001 sllx %o1, %l0, %o1
3002 srlx %o2, %l1, %g1
3003 or %g1, %o1, %o1
3004 sllx %o2, %l0, %o2
3005 srlx %o3, %l1, %g1
3006 or %g1, %o2, %o2
3007 sllx %o3, %l0, %o3
3008 srlx %o4, %l1, %o4
3009 or %o4, %o3, %o3
3010 4:
3011 .word 0x99b02302 !movxtod %g2,%f12
3012 .word 0x9db02303 !movxtod %g3,%f14
3013 .word 0x99b3098c !bshuffle %f12,%f12,%f12
3014 .word 0x9db3898e !bshuffle %f14,%f14,%f14
3015
3016 srax %g3, 63, %l7 ! next tweak value
3017 addcc %g2, %g2, %g2
3018 and %l7, 0x87, %l7
3019 .word 0x87b0c223 !addxc %g3,%g3,%g3
3020 xor %l7, %g2, %g2
3021
3022 .word 0x91b02302 !movxtod %g2,%f8
3023 .word 0x95b02303 !movxtod %g3,%f10
3024 .word 0x91b20988 !bshuffle %f8,%f8,%f8
3025 .word 0x95b2898a !bshuffle %f10,%f10,%f10
3026
3027 xor %g4, %o0, %o0 ! ^= rk[0]
3028 xor %g5, %o1, %o1
3029 xor %g4, %o2, %o2 ! ^= rk[0]
3030 xor %g5, %o3, %o3
3031 .word 0x81b02308 !movxtod %o0,%f0
3032 .word 0x85b02309 !movxtod %o1,%f2
3033 .word 0x89b0230a !movxtod %o2,%f4
3034 .word 0x8db0230b !movxtod %o3,%f6
3035
3036 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
3037 .word 0x85b38d82 !fxor %f14,%f2,%f2
3038 .word 0x89b20d84 !fxor %f8,%f4,%f4 ! ^= tweak[0]
3039 .word 0x8db28d86 !fxor %f10,%f6,%f6
3040
3041 prefetch [%i1 + 63], 22
3042 prefetch [%i0 + 32+63], 20
3043 call _aes256_encrypt_2x
3044 add %i0, 32, %i0
3045
3046 .word 0x91b02302 !movxtod %g2,%f8
3047 .word 0x95b02303 !movxtod %g3,%f10
3048
3049 srax %g3, 63, %l7 ! next tweak value
3050 addcc %g2, %g2, %g2
3051 and %l7, 0x87, %l7
3052 .word 0x87b0c223 !addxc %g3,%g3,%g3
3053 xor %l7, %g2, %g2
3054
3055 .word 0x91b20988 !bshuffle %f8,%f8,%f8
3056 .word 0x95b2898a !bshuffle %f10,%f10,%f10
3057
3058 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
3059 .word 0x85b38d82 !fxor %f14,%f2,%f2
3060 .word 0x89b20d84 !fxor %f8,%f4,%f4
3061 .word 0x8db28d86 !fxor %f10,%f6,%f6
3062
3063 brnz,pn %l2, 2f
3064 sub %i2, 2, %i2
3065
3066 std %f0, [%i1 + 0]
3067 std %f2, [%i1 + 8]
3068 std %f4, [%i1 + 16]
3069 std %f6, [%i1 + 24]
3070 brnz,pt %i2, .L256_xts_enloop2x
3071 add %i1, 32, %i1
3072
3073 .word 0x81b00f04 !fsrc2 %f0,%f4,%f0
3074 .word 0x85b00f06 !fsrc2 %f0,%f6,%f2
3075 brnz,pn %i5, .L256_xts_ensteal
3076 nop
3077
3078 ret
3079 restore
3080
3081 .align 16
3082 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
3083 ! and ~3x deterioration
3084 ! in inp==out case
3085 .word 0x91b00900 !faligndata %f0,%f0,%f8 ! handle unaligned output
3086 .word 0x95b00902 !faligndata %f0,%f2,%f10
3087 .word 0x99b08904 !faligndata %f2,%f4,%f12
3088 .word 0x9db10906 !faligndata %f4,%f6,%f14
3089 .word 0x81b18906 !faligndata %f6,%f6,%f0
3090
3091 stda %f8, [%i1 + %l3]0xc0 ! partial store
3092 std %f10, [%i1 + 8]
3093 std %f12, [%i1 + 16]
3094 std %f14, [%i1 + 24]
3095 add %i1, 32, %i1
3096 orn %g0, %l3, %l3
3097 stda %f0, [%i1 + %l3]0xc0 ! partial store
3098
3099 brnz,pt %i2, .L256_xts_enloop2x+4
3100 orn %g0, %l3, %l3
3101
3102 .word 0x81b00f04 !fsrc2 %f0,%f4,%f0
3103 .word 0x85b00f06 !fsrc2 %f0,%f6,%f2
3104 brnz,pn %i5, .L256_xts_ensteal
3105 nop
3106
3107 ret
3108 restore
3109
3110 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3111 .align 32
3112 .L256_xts_enblk:
3113 add %i1, %i2, %l5
3114 and %l5, 63, %l5 ! tail
3115 sub %i2, %l5, %i2
3116 add %l5, 15, %l5 ! round up to 16n
3117 srlx %i2, 4, %i2
3118 srl %l5, 4, %l5
3119 sub %i2, 1, %i2
3120 add %l5, 1, %l5
3121
3122 .L256_xts_enblk2x:
3123 ldx [%i0 + 0], %o0
3124 ldx [%i0 + 8], %o1
3125 ldx [%i0 + 16], %o2
3126 brz,pt %l0, 5f
3127 ldx [%i0 + 24], %o3
3128
3129 ldx [%i0 + 32], %o4
3130 sllx %o0, %l0, %o0
3131 srlx %o1, %l1, %g1
3132 or %g1, %o0, %o0
3133 sllx %o1, %l0, %o1
3134 srlx %o2, %l1, %g1
3135 or %g1, %o1, %o1
3136 sllx %o2, %l0, %o2
3137 srlx %o3, %l1, %g1
3138 or %g1, %o2, %o2
3139 sllx %o3, %l0, %o3
3140 srlx %o4, %l1, %o4
3141 or %o4, %o3, %o3
3142 5:
3143 .word 0x99b02302 !movxtod %g2,%f12
3144 .word 0x9db02303 !movxtod %g3,%f14
3145 .word 0x99b3098c !bshuffle %f12,%f12,%f12
3146 .word 0x9db3898e !bshuffle %f14,%f14,%f14
3147
3148 srax %g3, 63, %l7 ! next tweak value
3149 addcc %g2, %g2, %g2
3150 and %l7, 0x87, %l7
3151 .word 0x87b0c223 !addxc %g3,%g3,%g3
3152 xor %l7, %g2, %g2
3153
3154 .word 0x91b02302 !movxtod %g2,%f8
3155 .word 0x95b02303 !movxtod %g3,%f10
3156 .word 0x91b20988 !bshuffle %f8,%f8,%f8
3157 .word 0x95b2898a !bshuffle %f10,%f10,%f10
3158
3159 xor %g4, %o0, %o0 ! ^= rk[0]
3160 xor %g5, %o1, %o1
3161 xor %g4, %o2, %o2 ! ^= rk[0]
3162 xor %g5, %o3, %o3
3163 .word 0x81b02308 !movxtod %o0,%f0
3164 .word 0x85b02309 !movxtod %o1,%f2
3165 .word 0x89b0230a !movxtod %o2,%f4
3166 .word 0x8db0230b !movxtod %o3,%f6
3167
3168 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
3169 .word 0x85b38d82 !fxor %f14,%f2,%f2
3170 .word 0x89b20d84 !fxor %f8,%f4,%f4 ! ^= tweak[0]
3171 .word 0x8db28d86 !fxor %f10,%f6,%f6
3172
3173 prefetch [%i0 + 32+63], 20
3174 call _aes256_encrypt_2x
3175 add %i0, 32, %i0
3176
3177 .word 0x91b02302 !movxtod %g2,%f8
3178 .word 0x95b02303 !movxtod %g3,%f10
3179
3180 srax %g3, 63, %l7 ! next tweak value
3181 addcc %g2, %g2, %g2
3182 and %l7, 0x87, %l7
3183 .word 0x87b0c223 !addxc %g3,%g3,%g3
3184 xor %l7, %g2, %g2
3185
3186 .word 0x91b20988 !bshuffle %f8,%f8,%f8
3187 .word 0x95b2898a !bshuffle %f10,%f10,%f10
3188
3189 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
3190 .word 0x85b38d82 !fxor %f14,%f2,%f2
3191 .word 0x89b20d84 !fxor %f8,%f4,%f4
3192 .word 0x8db28d86 !fxor %f10,%f6,%f6
3193
3194 subcc %i2, 2, %i2
3195 stda %f0, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
3196 add %i1, 8, %i1
3197 stda %f2, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
3198 add %i1, 8, %i1
3199 stda %f4, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
3200 add %i1, 8, %i1
3201 stda %f6, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
3202 bgu,pt %xcc, .L256_xts_enblk2x
3203 add %i1, 8, %i1
3204
3205 add %l5, %i2, %i2
3206 andcc %i2, 1, %g0 ! is number of blocks even?
3207 membar #StoreLoad|#StoreStore
3208 bnz,pt %icc, .L256_xts_enloop
3209 srl %i2, 0, %i2
3210 brnz,pn %i2, .L256_xts_enloop2x
3211 nop
3212
3213 .word 0x81b00f04 !fsrc2 %f0,%f4,%f0
3214 .word 0x85b00f06 !fsrc2 %f0,%f6,%f2
3215 brnz,pn %i5, .L256_xts_ensteal
3216 nop
3217
3218 ret
3219 restore
3220 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3221 .align 32
3222 .L256_xts_ensteal:
3223 std %f0, [%fp + 2047-16] ! copy of output
3224 std %f2, [%fp + 2047-8]
3225
3226 srl %l0, 3, %l0
3227 add %fp, 2047-16, %l7
3228 add %i0, %l0, %i0 ! original %i0+%i2&-15
3229 add %i1, %l2, %i1 ! original %i1+%i2&-15
3230 mov 0, %l0
3231 nop ! align
3232
3233 .L256_xts_enstealing:
3234 ldub [%i0 + %l0], %o0
3235 ldub [%l7 + %l0], %o1
3236 dec %i5
3237 stb %o0, [%l7 + %l0]
3238 stb %o1, [%i1 + %l0]
3239 brnz %i5, .L256_xts_enstealing
3240 inc %l0
3241
3242 mov %l7, %i0
3243 sub %i1, 16, %i1
3244 mov 0, %l0
3245 sub %i1, %l2, %i1
3246 ba .L256_xts_enloop ! one more time
3247 mov 1, %i2 ! %i5 is 0
3248 ret
3249 restore
3250 .type aes256_t4_xts_encrypt,#function
3251 .size aes256_t4_xts_encrypt,.-aes256_t4_xts_encrypt
3252 .globl aes256_t4_xts_decrypt
3253 .align 32
3254 aes256_t4_xts_decrypt:
3255 save %sp, -192-16, %sp
3256 srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
3257
3258 mov %i5, %o0
3259 add %fp, 2047-16, %o1
3260 call aes_t4_encrypt
3261 mov %i4, %o2
3262
3263 add %fp, 2047-16, %l7
3264 ldxa [%l7]0x88, %g2
3265 add %fp, 2047-8, %l7
3266 ldxa [%l7]0x88, %g3 ! %g3:%g2 is tweak
3267
3268 sethi %hi(0x76543210), %l7
3269 or %l7, %lo(0x76543210), %l7
3270 .word 0x81b5c320 !bmask %l7,%g0,%g0 ! byte swap mask
3271
3272 prefetch [%i0], 20
3273 prefetch [%i0 + 63], 20
3274 call _aes256_load_deckey
3275 and %i2, 15, %i5
3276 and %i2, -16, %i2
3277 mov 0, %l7
3278 movrnz %i5, 16, %l7
3279 sub %i2, %l7, %i2
3280
3281 sub %i0, %i1, %l5 ! %i0!=%i1
3282 and %i0, 7, %l0
3283 andn %i0, 7, %i0
3284 sll %l0, 3, %l0
3285 mov 64, %l1
3286 mov 0xff, %l3
3287 sub %l1, %l0, %l1
3288 and %i1, 7, %l2
3289 cmp %i2, 255
3290 movrnz %l2, 0, %l5 ! if ( %i1&7 ||
3291 movleu %xcc, 0, %l5 ! %i2<256 ||
3292 brnz,pn %l5, .L256_xts_deblk ! %i0==%i1)
3293 srl %l3, %l2, %l3
3294
3295 andcc %i2, 16, %g0 ! is number of blocks even?
3296 brz,pn %i2, .L256_xts_desteal
3297 .word 0xb3b64340 !alignaddrl %i1,%g0,%i1
3298 bz %icc, .L256_xts_deloop2x
3299 srlx %i2, 4, %i2
3300 .L256_xts_deloop:
3301 ldx [%i0 + 0], %o0
3302 brz,pt %l0, 4f
3303 ldx [%i0 + 8], %o1
3304
3305 ldx [%i0 + 16], %o2
3306 sllx %o0, %l0, %o0
3307 srlx %o1, %l1, %g1
3308 sllx %o1, %l0, %o1
3309 or %g1, %o0, %o0
3310 srlx %o2, %l1, %o2
3311 or %o2, %o1, %o1
3312 4:
3313 .word 0x99b02302 !movxtod %g2,%f12
3314 .word 0x9db02303 !movxtod %g3,%f14
3315 .word 0x99b3098c !bshuffle %f12,%f12,%f12
3316 .word 0x9db3898e !bshuffle %f14,%f14,%f14
3317
3318 xor %g4, %o0, %o0 ! ^= rk[0]
3319 xor %g5, %o1, %o1
3320 .word 0x81b02308 !movxtod %o0,%f0
3321 .word 0x85b02309 !movxtod %o1,%f2
3322
3323 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
3324 .word 0x85b38d82 !fxor %f14,%f2,%f2
3325
3326 prefetch [%i1 + 63], 22
3327 prefetch [%i0 + 16+63], 20
3328 call _aes256_decrypt_1x
3329 add %i0, 16, %i0
3330
3331 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
3332 .word 0x85b38d82 !fxor %f14,%f2,%f2
3333
3334 srax %g3, 63, %l7 ! next tweak value
3335 addcc %g2, %g2, %g2
3336 and %l7, 0x87, %l7
3337 .word 0x87b0c223 !addxc %g3,%g3,%g3
3338 xor %l7, %g2, %g2
3339
3340 brnz,pn %l2, 2f
3341 sub %i2, 1, %i2
3342
3343 std %f0, [%i1 + 0]
3344 std %f2, [%i1 + 8]
3345 brnz,pt %i2, .L256_xts_deloop2x
3346 add %i1, 16, %i1
3347
3348 brnz,pn %i5, .L256_xts_desteal
3349 nop
3350
3351 ret
3352 restore
3353
3354 .align 16
3355 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
3356 ! and ~3x deterioration
3357 ! in inp==out case
3358 .word 0x89b00900 !faligndata %f0,%f0,%f4 ! handle unaligned output
3359 .word 0x8db00902 !faligndata %f0,%f2,%f6
3360 .word 0x91b08902 !faligndata %f2,%f2,%f8
3361 stda %f4, [%i1 + %l3]0xc0 ! partial store
3362 std %f6, [%i1 + 8]
3363 add %i1, 16, %i1
3364 orn %g0, %l3, %l3
3365 stda %f8, [%i1 + %l3]0xc0 ! partial store
3366
3367 brnz,pt %i2, .L256_xts_deloop2x+4
3368 orn %g0, %l3, %l3
3369
3370 brnz,pn %i5, .L256_xts_desteal
3371 nop
3372
3373 ret
3374 restore
3375
3376 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3377 .align 32
3378 .L256_xts_deloop2x:
3379 ldx [%i0 + 0], %o0
3380 ldx [%i0 + 8], %o1
3381 ldx [%i0 + 16], %o2
3382 brz,pt %l0, 4f
3383 ldx [%i0 + 24], %o3
3384
3385 ldx [%i0 + 32], %o4
3386 sllx %o0, %l0, %o0
3387 srlx %o1, %l1, %g1
3388 or %g1, %o0, %o0
3389 sllx %o1, %l0, %o1
3390 srlx %o2, %l1, %g1
3391 or %g1, %o1, %o1
3392 sllx %o2, %l0, %o2
3393 srlx %o3, %l1, %g1
3394 or %g1, %o2, %o2
3395 sllx %o3, %l0, %o3
3396 srlx %o4, %l1, %o4
3397 or %o4, %o3, %o3
3398 4:
3399 .word 0x99b02302 !movxtod %g2,%f12
3400 .word 0x9db02303 !movxtod %g3,%f14
3401 .word 0x99b3098c !bshuffle %f12,%f12,%f12
3402 .word 0x9db3898e !bshuffle %f14,%f14,%f14
3403
3404 srax %g3, 63, %l7 ! next tweak value
3405 addcc %g2, %g2, %g2
3406 and %l7, 0x87, %l7
3407 .word 0x87b0c223 !addxc %g3,%g3,%g3
3408 xor %l7, %g2, %g2
3409
3410 .word 0x91b02302 !movxtod %g2,%f8
3411 .word 0x95b02303 !movxtod %g3,%f10
3412 .word 0x91b20988 !bshuffle %f8,%f8,%f8
3413 .word 0x95b2898a !bshuffle %f10,%f10,%f10
3414
3415 xor %g4, %o0, %o0 ! ^= rk[0]
3416 xor %g5, %o1, %o1
3417 xor %g4, %o2, %o2 ! ^= rk[0]
3418 xor %g5, %o3, %o3
3419 .word 0x81b02308 !movxtod %o0,%f0
3420 .word 0x85b02309 !movxtod %o1,%f2
3421 .word 0x89b0230a !movxtod %o2,%f4
3422 .word 0x8db0230b !movxtod %o3,%f6
3423
3424 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
3425 .word 0x85b38d82 !fxor %f14,%f2,%f2
3426 .word 0x89b20d84 !fxor %f8,%f4,%f4 ! ^= tweak[0]
3427 .word 0x8db28d86 !fxor %f10,%f6,%f6
3428
3429 prefetch [%i1 + 63], 22
3430 prefetch [%i0 + 32+63], 20
3431 call _aes256_decrypt_2x
3432 add %i0, 32, %i0
3433
3434 .word 0x91b02302 !movxtod %g2,%f8
3435 .word 0x95b02303 !movxtod %g3,%f10
3436
3437 srax %g3, 63, %l7 ! next tweak value
3438 addcc %g2, %g2, %g2
3439 and %l7, 0x87, %l7
3440 .word 0x87b0c223 !addxc %g3,%g3,%g3
3441 xor %l7, %g2, %g2
3442
3443 .word 0x91b20988 !bshuffle %f8,%f8,%f8
3444 .word 0x95b2898a !bshuffle %f10,%f10,%f10
3445
3446 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
3447 .word 0x85b38d82 !fxor %f14,%f2,%f2
3448 .word 0x89b20d84 !fxor %f8,%f4,%f4
3449 .word 0x8db28d86 !fxor %f10,%f6,%f6
3450
3451 brnz,pn %l2, 2f
3452 sub %i2, 2, %i2
3453
3454 std %f0, [%i1 + 0]
3455 std %f2, [%i1 + 8]
3456 std %f4, [%i1 + 16]
3457 std %f6, [%i1 + 24]
3458 brnz,pt %i2, .L256_xts_deloop2x
3459 add %i1, 32, %i1
3460
3461 .word 0x81b00f04 !fsrc2 %f0,%f4,%f0
3462 .word 0x85b00f06 !fsrc2 %f0,%f6,%f2
3463 brnz,pn %i5, .L256_xts_desteal
3464 nop
3465
3466 ret
3467 restore
3468
3469 .align 16
3470 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
3471 ! and ~3x deterioration
3472 ! in inp==out case
3473 .word 0x91b00900 !faligndata %f0,%f0,%f8 ! handle unaligned output
3474 .word 0x95b00902 !faligndata %f0,%f2,%f10
3475 .word 0x99b08904 !faligndata %f2,%f4,%f12
3476 .word 0x9db10906 !faligndata %f4,%f6,%f14
3477 .word 0x81b18906 !faligndata %f6,%f6,%f0
3478
3479 stda %f8, [%i1 + %l3]0xc0 ! partial store
3480 std %f10, [%i1 + 8]
3481 std %f12, [%i1 + 16]
3482 std %f14, [%i1 + 24]
3483 add %i1, 32, %i1
3484 orn %g0, %l3, %l3
3485 stda %f0, [%i1 + %l3]0xc0 ! partial store
3486
3487 brnz,pt %i2, .L256_xts_deloop2x+4
3488 orn %g0, %l3, %l3
3489
3490 .word 0x81b00f04 !fsrc2 %f0,%f4,%f0
3491 .word 0x85b00f06 !fsrc2 %f0,%f6,%f2
3492 brnz,pn %i5, .L256_xts_desteal
3493 nop
3494
3495 ret
3496 restore
3497
3498 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3499 .align 32
3500 .L256_xts_deblk:
3501 add %i1, %i2, %l5
3502 and %l5, 63, %l5 ! tail
3503 sub %i2, %l5, %i2
3504 add %l5, 15, %l5 ! round up to 16n
3505 srlx %i2, 4, %i2
3506 srl %l5, 4, %l5
3507 sub %i2, 1, %i2
3508 add %l5, 1, %l5
3509
3510 .L256_xts_deblk2x:
3511 ldx [%i0 + 0], %o0
3512 ldx [%i0 + 8], %o1
3513 ldx [%i0 + 16], %o2
3514 brz,pt %l0, 5f
3515 ldx [%i0 + 24], %o3
3516
3517 ldx [%i0 + 32], %o4
3518 sllx %o0, %l0, %o0
3519 srlx %o1, %l1, %g1
3520 or %g1, %o0, %o0
3521 sllx %o1, %l0, %o1
3522 srlx %o2, %l1, %g1
3523 or %g1, %o1, %o1
3524 sllx %o2, %l0, %o2
3525 srlx %o3, %l1, %g1
3526 or %g1, %o2, %o2
3527 sllx %o3, %l0, %o3
3528 srlx %o4, %l1, %o4
3529 or %o4, %o3, %o3
3530 5:
3531 .word 0x99b02302 !movxtod %g2,%f12
3532 .word 0x9db02303 !movxtod %g3,%f14
3533 .word 0x99b3098c !bshuffle %f12,%f12,%f12
3534 .word 0x9db3898e !bshuffle %f14,%f14,%f14
3535
3536 srax %g3, 63, %l7 ! next tweak value
3537 addcc %g2, %g2, %g2
3538 and %l7, 0x87, %l7
3539 .word 0x87b0c223 !addxc %g3,%g3,%g3
3540 xor %l7, %g2, %g2
3541
3542 .word 0x91b02302 !movxtod %g2,%f8
3543 .word 0x95b02303 !movxtod %g3,%f10
3544 .word 0x91b20988 !bshuffle %f8,%f8,%f8
3545 .word 0x95b2898a !bshuffle %f10,%f10,%f10
3546
3547 xor %g4, %o0, %o0 ! ^= rk[0]
3548 xor %g5, %o1, %o1
3549 xor %g4, %o2, %o2 ! ^= rk[0]
3550 xor %g5, %o3, %o3
3551 .word 0x81b02308 !movxtod %o0,%f0
3552 .word 0x85b02309 !movxtod %o1,%f2
3553 .word 0x89b0230a !movxtod %o2,%f4
3554 .word 0x8db0230b !movxtod %o3,%f6
3555
3556 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
3557 .word 0x85b38d82 !fxor %f14,%f2,%f2
3558 .word 0x89b20d84 !fxor %f8,%f4,%f4 ! ^= tweak[0]
3559 .word 0x8db28d86 !fxor %f10,%f6,%f6
3560
3561 prefetch [%i0 + 32+63], 20
3562 call _aes256_decrypt_2x
3563 add %i0, 32, %i0
3564
3565 .word 0x91b02302 !movxtod %g2,%f8
3566 .word 0x95b02303 !movxtod %g3,%f10
3567
3568 srax %g3, 63, %l7 ! next tweak value
3569 addcc %g2, %g2, %g2
3570 and %l7, 0x87, %l7
3571 .word 0x87b0c223 !addxc %g3,%g3,%g3
3572 xor %l7, %g2, %g2
3573
3574 .word 0x91b20988 !bshuffle %f8,%f8,%f8
3575 .word 0x95b2898a !bshuffle %f10,%f10,%f10
3576
3577 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
3578 .word 0x85b38d82 !fxor %f14,%f2,%f2
3579 .word 0x89b20d84 !fxor %f8,%f4,%f4
3580 .word 0x8db28d86 !fxor %f10,%f6,%f6
3581
3582 subcc %i2, 2, %i2
3583 stda %f0, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
3584 add %i1, 8, %i1
3585 stda %f2, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
3586 add %i1, 8, %i1
3587 stda %f4, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
3588 add %i1, 8, %i1
3589 stda %f6, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
3590 bgu,pt %xcc, .L256_xts_deblk2x
3591 add %i1, 8, %i1
3592
3593 add %l5, %i2, %i2
3594 andcc %i2, 1, %g0 ! is number of blocks even?
3595 membar #StoreLoad|#StoreStore
3596 bnz,pt %icc, .L256_xts_deloop
3597 srl %i2, 0, %i2
3598 brnz,pn %i2, .L256_xts_deloop2x
3599 nop
3600
3601 .word 0x81b00f04 !fsrc2 %f0,%f4,%f0
3602 .word 0x85b00f06 !fsrc2 %f0,%f6,%f2
3603 brnz,pn %i5, .L256_xts_desteal
3604 nop
3605
3606 ret
3607 restore
3608 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3609 .align 32
3610 .L256_xts_desteal:
3611 ldx [%i0 + 0], %o0
3612 brz,pt %l0, 8f
3613 ldx [%i0 + 8], %o1
3614
3615 ldx [%i0 + 16], %o2
3616 sllx %o0, %l0, %o0
3617 srlx %o1, %l1, %g1
3618 sllx %o1, %l0, %o1
3619 or %g1, %o0, %o0
3620 srlx %o2, %l1, %o2
3621 or %o2, %o1, %o1
3622 8:
3623 srax %g3, 63, %l7 ! next tweak value
3624 addcc %g2, %g2, %o2
3625 and %l7, 0x87, %l7
3626 .word 0x97b0c223 !addxc %g3,%g3,%o3
3627 xor %l7, %o2, %o2
3628
3629 .word 0x99b0230a !movxtod %o2,%f12
3630 .word 0x9db0230b !movxtod %o3,%f14
3631 .word 0x99b3098c !bshuffle %f12,%f12,%f12
3632 .word 0x9db3898e !bshuffle %f14,%f14,%f14
3633
3634 xor %g4, %o0, %o0 ! ^= rk[0]
3635 xor %g5, %o1, %o1
3636 .word 0x81b02308 !movxtod %o0,%f0
3637 .word 0x85b02309 !movxtod %o1,%f2
3638
3639 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
3640 .word 0x85b38d82 !fxor %f14,%f2,%f2
3641
3642 call _aes256_decrypt_1x
3643 add %i0, 16, %i0
3644
3645 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= tweak[0]
3646 .word 0x85b38d82 !fxor %f14,%f2,%f2
3647
3648 std %f0, [%fp + 2047-16]
3649 std %f2, [%fp + 2047-8]
3650
3651 srl %l0, 3, %l0
3652 add %fp, 2047-16, %l7
3653 add %i0, %l0, %i0 ! original %i0+%i2&-15
3654 add %i1, %l2, %i1 ! original %i1+%i2&-15
3655 mov 0, %l0
3656 add %i1, 16, %i1
3657 nop ! align
3658
3659 .L256_xts_destealing:
3660 ldub [%i0 + %l0], %o0
3661 ldub [%l7 + %l0], %o1
3662 dec %i5
3663 stb %o0, [%l7 + %l0]
3664 stb %o1, [%i1 + %l0]
3665 brnz %i5, .L256_xts_destealing
3666 inc %l0
3667
3668 mov %l7, %i0
3669 sub %i1, 16, %i1
3670 mov 0, %l0
3671 sub %i1, %l2, %i1
3672 ba .L256_xts_deloop ! one more time
3673 mov 1, %i2 ! %i5 is 0
3674 ret
3675 restore
3676 .type aes256_t4_xts_decrypt,#function
3677 .size aes256_t4_xts_decrypt,.-aes256_t4_xts_decrypt
3678 .globl aes192_t4_ctr32_encrypt
3679 .align 32
3680 aes192_t4_ctr32_encrypt:
3681 save %sp, -192, %sp
3682 srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
3683
3684 prefetch [%i0], 20
3685 prefetch [%i0 + 63], 20
3686 call _aes192_load_enckey
3687 sllx %i2, 4, %i2
3688
3689 ld [%i4 + 0], %l4 ! counter
3690 ld [%i4 + 4], %l5
3691 ld [%i4 + 8], %l6
3692 ld [%i4 + 12], %l7
3693
3694 sllx %l4, 32, %o5
3695 or %l5, %o5, %o5
3696 sllx %l6, 32, %g1
3697 xor %o5, %g4, %g4 ! ^= rk[0]
3698 xor %g1, %g5, %g5
3699 .word 0x9db02304 !movxtod %g4,%f14 ! most significant 64 bits
3700
3701 sub %i0, %i1, %l5 ! %i0!=%i1
3702 and %i0, 7, %l0
3703 andn %i0, 7, %i0
3704 sll %l0, 3, %l0
3705 mov 64, %l1
3706 mov 0xff, %l3
3707 sub %l1, %l0, %l1
3708 and %i1, 7, %l2
3709 cmp %i2, 255
3710 movrnz %l2, 0, %l5 ! if ( %i1&7 ||
3711 movleu %xcc, 0, %l5 ! %i2<256 ||
3712 brnz,pn %l5, .L192_ctr32_blk ! %i0==%i1)
3713 srl %l3, %l2, %l3
3714
3715 andcc %i2, 16, %g0 ! is number of blocks even?
3716 .word 0xb3b64340 !alignaddrl %i1,%g0,%i1
3717 bz %icc, .L192_ctr32_loop2x
3718 srlx %i2, 4, %i2
3719 .L192_ctr32_loop:
3720 ldx [%i0 + 0], %o0
3721 brz,pt %l0, 4f
3722 ldx [%i0 + 8], %o1
3723
3724 ldx [%i0 + 16], %o2
3725 sllx %o0, %l0, %o0
3726 srlx %o1, %l1, %g1
3727 sllx %o1, %l0, %o1
3728 or %g1, %o0, %o0
3729 srlx %o2, %l1, %o2
3730 or %o2, %o1, %o1
3731 4:
3732 xor %g5, %l7, %g1 ! ^= rk[0]
3733 add %l7, 1, %l7
3734 .word 0x85b02301 !movxtod %g1,%f2
3735 srl %l7, 0, %l7 ! clruw
3736 prefetch [%i1 + 63], 22
3737 prefetch [%i0 + 16+63], 20
3738 .word 0x88cc040e !aes_eround01 %f16,%f14,%f2,%f4
3739 .word 0x84cc842e !aes_eround23 %f18,%f14,%f2,%f2
3740 call _aes192_encrypt_1x+8
3741 add %i0, 16, %i0
3742
3743 .word 0x95b02308 !movxtod %o0,%f10
3744 .word 0x99b02309 !movxtod %o1,%f12
3745 .word 0x81b28d80 !fxor %f10,%f0,%f0 ! ^= inp
3746 .word 0x85b30d82 !fxor %f12,%f2,%f2
3747
3748 brnz,pn %l2, 2f
3749 sub %i2, 1, %i2
3750
3751 std %f0, [%i1 + 0]
3752 std %f2, [%i1 + 8]
3753 brnz,pt %i2, .L192_ctr32_loop2x
3754 add %i1, 16, %i1
3755
3756 ret
3757 restore
3758
3759 .align 16
3760 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
3761 ! and ~3x deterioration
3762 ! in inp==out case
3763 .word 0x89b00900 !faligndata %f0,%f0,%f4 ! handle unaligned output
3764 .word 0x8db00902 !faligndata %f0,%f2,%f6
3765 .word 0x91b08902 !faligndata %f2,%f2,%f8
3766 stda %f4, [%i1 + %l3]0xc0 ! partial store
3767 std %f6, [%i1 + 8]
3768 add %i1, 16, %i1
3769 orn %g0, %l3, %l3
3770 stda %f8, [%i1 + %l3]0xc0 ! partial store
3771
3772 brnz,pt %i2, .L192_ctr32_loop2x+4
3773 orn %g0, %l3, %l3
3774
3775 ret
3776 restore
3777
3778 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3779 .align 32
3780 .L192_ctr32_loop2x:
3781 ldx [%i0 + 0], %o0
3782 ldx [%i0 + 8], %o1
3783 ldx [%i0 + 16], %o2
3784 brz,pt %l0, 4f
3785 ldx [%i0 + 24], %o3
3786
3787 ldx [%i0 + 32], %o4
3788 sllx %o0, %l0, %o0
3789 srlx %o1, %l1, %g1
3790 or %g1, %o0, %o0
3791 sllx %o1, %l0, %o1
3792 srlx %o2, %l1, %g1
3793 or %g1, %o1, %o1
3794 sllx %o2, %l0, %o2
3795 srlx %o3, %l1, %g1
3796 or %g1, %o2, %o2
3797 sllx %o3, %l0, %o3
3798 srlx %o4, %l1, %o4
3799 or %o4, %o3, %o3
3800 4:
3801 xor %g5, %l7, %g1 ! ^= rk[0]
3802 add %l7, 1, %l7
3803 .word 0x85b02301 !movxtod %g1,%f2
3804 srl %l7, 0, %l7 ! clruw
3805 xor %g5, %l7, %g1
3806 add %l7, 1, %l7
3807 .word 0x8db02301 !movxtod %g1,%f6
3808 srl %l7, 0, %l7 ! clruw
3809 prefetch [%i1 + 63], 22
3810 prefetch [%i0 + 32+63], 20
3811 .word 0x90cc040e !aes_eround01 %f16,%f14,%f2,%f8
3812 .word 0x84cc842e !aes_eround23 %f18,%f14,%f2,%f2
3813 .word 0x94cc0c0e !aes_eround01 %f16,%f14,%f6,%f10
3814 .word 0x8ccc8c2e !aes_eround23 %f18,%f14,%f6,%f6
3815 call _aes192_encrypt_2x+16
3816 add %i0, 32, %i0
3817
3818 .word 0x91b02308 !movxtod %o0,%f8
3819 .word 0x95b02309 !movxtod %o1,%f10
3820 .word 0x99b0230a !movxtod %o2,%f12
3821 .word 0x81b20d80 !fxor %f8,%f0,%f0 ! ^= inp
3822 .word 0x91b0230b !movxtod %o3,%f8
3823 .word 0x85b28d82 !fxor %f10,%f2,%f2
3824 .word 0x89b30d84 !fxor %f12,%f4,%f4
3825 .word 0x8db20d86 !fxor %f8,%f6,%f6
3826
3827 brnz,pn %l2, 2f
3828 sub %i2, 2, %i2
3829
3830 std %f0, [%i1 + 0]
3831 std %f2, [%i1 + 8]
3832 std %f4, [%i1 + 16]
3833 std %f6, [%i1 + 24]
3834 brnz,pt %i2, .L192_ctr32_loop2x
3835 add %i1, 32, %i1
3836
3837 ret
3838 restore
3839
3840 .align 16
3841 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
3842 ! and ~3x deterioration
3843 ! in inp==out case
3844 .word 0x91b00900 !faligndata %f0,%f0,%f8 ! handle unaligned output
3845 .word 0x81b00902 !faligndata %f0,%f2,%f0
3846 .word 0x85b08904 !faligndata %f2,%f4,%f2
3847 .word 0x89b10906 !faligndata %f4,%f6,%f4
3848 .word 0x8db18906 !faligndata %f6,%f6,%f6
3849
3850 stda %f8, [%i1 + %l3]0xc0 ! partial store
3851 std %f0, [%i1 + 8]
3852 std %f2, [%i1 + 16]
3853 std %f4, [%i1 + 24]
3854 add %i1, 32, %i1
3855 orn %g0, %l3, %l3
3856 stda %f6, [%i1 + %l3]0xc0 ! partial store
3857
3858 brnz,pt %i2, .L192_ctr32_loop2x+4
3859 orn %g0, %l3, %l3
3860
3861 ret
3862 restore
3863
3864 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3865 .align 32
3866 .L192_ctr32_blk:
3867 add %i1, %i2, %l5
3868 and %l5, 63, %l5 ! tail
3869 sub %i2, %l5, %i2
3870 add %l5, 15, %l5 ! round up to 16n
3871 srlx %i2, 4, %i2
3872 srl %l5, 4, %l5
3873 sub %i2, 1, %i2
3874 add %l5, 1, %l5
3875
3876 .L192_ctr32_blk_loop2x:
3877 ldx [%i0 + 0], %o0
3878 ldx [%i0 + 8], %o1
3879 ldx [%i0 + 16], %o2
3880 brz,pt %l0, 5f
3881 ldx [%i0 + 24], %o3
3882
3883 ldx [%i0 + 32], %o4
3884 sllx %o0, %l0, %o0
3885 srlx %o1, %l1, %g1
3886 or %g1, %o0, %o0
3887 sllx %o1, %l0, %o1
3888 srlx %o2, %l1, %g1
3889 or %g1, %o1, %o1
3890 sllx %o2, %l0, %o2
3891 srlx %o3, %l1, %g1
3892 or %g1, %o2, %o2
3893 sllx %o3, %l0, %o3
3894 srlx %o4, %l1, %o4
3895 or %o4, %o3, %o3
3896 5:
3897 xor %g5, %l7, %g1 ! ^= rk[0]
3898 add %l7, 1, %l7
3899 .word 0x85b02301 !movxtod %g1,%f2
3900 srl %l7, 0, %l7 ! clruw
3901 xor %g5, %l7, %g1
3902 add %l7, 1, %l7
3903 .word 0x8db02301 !movxtod %g1,%f6
3904 srl %l7, 0, %l7 ! clruw
3905 prefetch [%i0 + 32+63], 20
3906 .word 0x90cc040e !aes_eround01 %f16,%f14,%f2,%f8
3907 .word 0x84cc842e !aes_eround23 %f18,%f14,%f2,%f2
3908 .word 0x94cc0c0e !aes_eround01 %f16,%f14,%f6,%f10
3909 .word 0x8ccc8c2e !aes_eround23 %f18,%f14,%f6,%f6
3910 call _aes192_encrypt_2x+16
3911 add %i0, 32, %i0
3912 subcc %i2, 2, %i2
3913
3914 .word 0x91b02308 !movxtod %o0,%f8
3915 .word 0x95b02309 !movxtod %o1,%f10
3916 .word 0x99b0230a !movxtod %o2,%f12
3917 .word 0x81b20d80 !fxor %f8,%f0,%f0 ! ^= inp
3918 .word 0x91b0230b !movxtod %o3,%f8
3919 .word 0x85b28d82 !fxor %f10,%f2,%f2
3920 .word 0x89b30d84 !fxor %f12,%f4,%f4
3921 .word 0x8db20d86 !fxor %f8,%f6,%f6
3922
3923 stda %f0, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
3924 add %i1, 8, %i1
3925 stda %f2, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
3926 add %i1, 8, %i1
3927 stda %f4, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
3928 add %i1, 8, %i1
3929 stda %f6, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
3930 bgu,pt %xcc, .L192_ctr32_blk_loop2x
3931 add %i1, 8, %i1
3932
3933 add %l5, %i2, %i2
3934 andcc %i2, 1, %g0 ! is number of blocks even?
3935 membar #StoreLoad|#StoreStore
3936 bnz,pt %icc, .L192_ctr32_loop
3937 srl %i2, 0, %i2
3938 brnz,pn %i2, .L192_ctr32_loop2x
3939 nop
3940
3941 ret
3942 restore
3943 .type aes192_t4_ctr32_encrypt,#function
3944 .size aes192_t4_ctr32_encrypt,.-aes192_t4_ctr32_encrypt
3945 .globl aes192_t4_cbc_decrypt
3946 .align 32
3947 aes192_t4_cbc_decrypt:
3948 save %sp, -192, %sp
3949 cmp %i2, 0
3950 be,pn %xcc, .L192_cbc_dec_abort
3951 srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
3952 sub %i0, %i1, %l5 ! %i0!=%i1
3953 ld [%i4 + 0], %f12 ! load ivec
3954 ld [%i4 + 4], %f13
3955 ld [%i4 + 8], %f14
3956 ld [%i4 + 12], %f15
3957 prefetch [%i0], 20
3958 prefetch [%i0 + 63], 20
3959 call _aes192_load_deckey
3960 and %i0, 7, %l0
3961 andn %i0, 7, %i0
3962 sll %l0, 3, %l0
3963 mov 64, %l1
3964 mov 0xff, %l3
3965 sub %l1, %l0, %l1
3966 and %i1, 7, %l2
3967 cmp %i2, 255
3968 movrnz %l2, 0, %l5 ! if ( %i1&7 ||
3969 movleu %xcc, 0, %l5 ! %i2<256 ||
3970 brnz,pn %l5, .L192cbc_dec_blk ! %i0==%i1)
3971 srl %l3, %l2, %l3
3972
3973 andcc %i2, 16, %g0 ! is number of blocks even?
3974 srlx %i2, 4, %i2
3975 .word 0xb3b64340 !alignaddrl %i1,%g0,%i1
3976 bz %icc, .L192_cbc_dec_loop2x
3977 prefetch [%i1], 22
3978 .L192_cbc_dec_loop:
3979 ldx [%i0 + 0], %o0
3980 brz,pt %l0, 4f
3981 ldx [%i0 + 8], %o1
3982
3983 ldx [%i0 + 16], %o2
3984 sllx %o0, %l0, %o0
3985 srlx %o1, %l1, %g1
3986 sllx %o1, %l0, %o1
3987 or %g1, %o0, %o0
3988 srlx %o2, %l1, %o2
3989 or %o2, %o1, %o1
3990 4:
3991 xor %g4, %o0, %o2 ! ^= rk[0]
3992 xor %g5, %o1, %o3
3993 .word 0x81b0230a !movxtod %o2,%f0
3994 .word 0x85b0230b !movxtod %o3,%f2
3995
3996 prefetch [%i1 + 63], 22
3997 prefetch [%i0 + 16+63], 20
3998 call _aes192_decrypt_1x
3999 add %i0, 16, %i0
4000
4001 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= ivec
4002 .word 0x85b38d82 !fxor %f14,%f2,%f2
4003 .word 0x99b02308 !movxtod %o0,%f12
4004 .word 0x9db02309 !movxtod %o1,%f14
4005
4006 brnz,pn %l2, 2f
4007 sub %i2, 1, %i2
4008
4009 std %f0, [%i1 + 0]
4010 std %f2, [%i1 + 8]
4011 brnz,pt %i2, .L192_cbc_dec_loop2x
4012 add %i1, 16, %i1
4013 st %f12, [%i4 + 0]
4014 st %f13, [%i4 + 4]
4015 st %f14, [%i4 + 8]
4016 st %f15, [%i4 + 12]
4017 .L192_cbc_dec_abort:
4018 ret
4019 restore
4020
4021 .align 16
4022 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
4023 ! and ~3x deterioration
4024 ! in inp==out case
4025 .word 0x89b00900 !faligndata %f0,%f0,%f4 ! handle unaligned output
4026 .word 0x8db00902 !faligndata %f0,%f2,%f6
4027 .word 0x91b08902 !faligndata %f2,%f2,%f8
4028
4029 stda %f4, [%i1 + %l3]0xc0 ! partial store
4030 std %f6, [%i1 + 8]
4031 add %i1, 16, %i1
4032 orn %g0, %l3, %l3
4033 stda %f8, [%i1 + %l3]0xc0 ! partial store
4034
4035 brnz,pt %i2, .L192_cbc_dec_loop2x+4
4036 orn %g0, %l3, %l3
4037 st %f12, [%i4 + 0]
4038 st %f13, [%i4 + 4]
4039 st %f14, [%i4 + 8]
4040 st %f15, [%i4 + 12]
4041 ret
4042 restore
4043
4044 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
4045 .align 32
4046 .L192_cbc_dec_loop2x:
4047 ldx [%i0 + 0], %o0
4048 ldx [%i0 + 8], %o1
4049 ldx [%i0 + 16], %o2
4050 brz,pt %l0, 4f
4051 ldx [%i0 + 24], %o3
4052
4053 ldx [%i0 + 32], %o4
4054 sllx %o0, %l0, %o0
4055 srlx %o1, %l1, %g1
4056 or %g1, %o0, %o0
4057 sllx %o1, %l0, %o1
4058 srlx %o2, %l1, %g1
4059 or %g1, %o1, %o1
4060 sllx %o2, %l0, %o2
4061 srlx %o3, %l1, %g1
4062 or %g1, %o2, %o2
4063 sllx %o3, %l0, %o3
4064 srlx %o4, %l1, %o4
4065 or %o4, %o3, %o3
4066 4:
4067 xor %g4, %o0, %o4 ! ^= rk[0]
4068 xor %g5, %o1, %o5
4069 .word 0x81b0230c !movxtod %o4,%f0
4070 .word 0x85b0230d !movxtod %o5,%f2
4071 xor %g4, %o2, %o4
4072 xor %g5, %o3, %o5
4073 .word 0x89b0230c !movxtod %o4,%f4
4074 .word 0x8db0230d !movxtod %o5,%f6
4075
4076 prefetch [%i1 + 63], 22
4077 prefetch [%i0 + 32+63], 20
4078 call _aes192_decrypt_2x
4079 add %i0, 32, %i0
4080
4081 .word 0x91b02308 !movxtod %o0,%f8
4082 .word 0x95b02309 !movxtod %o1,%f10
4083 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= ivec
4084 .word 0x85b38d82 !fxor %f14,%f2,%f2
4085 .word 0x99b0230a !movxtod %o2,%f12
4086 .word 0x9db0230b !movxtod %o3,%f14
4087 .word 0x89b20d84 !fxor %f8,%f4,%f4
4088 .word 0x8db28d86 !fxor %f10,%f6,%f6
4089
4090 brnz,pn %l2, 2f
4091 sub %i2, 2, %i2
4092
4093 std %f0, [%i1 + 0]
4094 std %f2, [%i1 + 8]
4095 std %f4, [%i1 + 16]
4096 std %f6, [%i1 + 24]
4097 brnz,pt %i2, .L192_cbc_dec_loop2x
4098 add %i1, 32, %i1
4099 st %f12, [%i4 + 0]
4100 st %f13, [%i4 + 4]
4101 st %f14, [%i4 + 8]
4102 st %f15, [%i4 + 12]
4103 ret
4104 restore
4105
4106 .align 16
4107 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
4108 ! and ~3x deterioration
4109 ! in inp==out case
4110 .word 0x91b00900 !faligndata %f0,%f0,%f8 ! handle unaligned output
4111 .word 0x81b00902 !faligndata %f0,%f2,%f0
4112 .word 0x85b08904 !faligndata %f2,%f4,%f2
4113 .word 0x89b10906 !faligndata %f4,%f6,%f4
4114 .word 0x8db18906 !faligndata %f6,%f6,%f6
4115 stda %f8, [%i1 + %l3]0xc0 ! partial store
4116 std %f0, [%i1 + 8]
4117 std %f2, [%i1 + 16]
4118 std %f4, [%i1 + 24]
4119 add %i1, 32, %i1
4120 orn %g0, %l3, %l3
4121 stda %f6, [%i1 + %l3]0xc0 ! partial store
4122
4123 brnz,pt %i2, .L192_cbc_dec_loop2x+4
4124 orn %g0, %l3, %l3
4125 st %f12, [%i4 + 0]
4126 st %f13, [%i4 + 4]
4127 st %f14, [%i4 + 8]
4128 st %f15, [%i4 + 12]
4129 ret
4130 restore
4131
4132 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
4133 .align 32
4134 .L192cbc_dec_blk:
4135 add %i1, %i2, %l5
4136 and %l5, 63, %l5 ! tail
4137 sub %i2, %l5, %i2
4138 add %l5, 15, %l5 ! round up to 16n
4139 srlx %i2, 4, %i2
4140 srl %l5, 4, %l5
4141 sub %i2, 1, %i2
4142 add %l5, 1, %l5
4143
4144 .L192_cbc_dec_blk_loop2x:
4145 ldx [%i0 + 0], %o0
4146 ldx [%i0 + 8], %o1
4147 ldx [%i0 + 16], %o2
4148 brz,pt %l0, 5f
4149 ldx [%i0 + 24], %o3
4150
4151 ldx [%i0 + 32], %o4
4152 sllx %o0, %l0, %o0
4153 srlx %o1, %l1, %g1
4154 or %g1, %o0, %o0
4155 sllx %o1, %l0, %o1
4156 srlx %o2, %l1, %g1
4157 or %g1, %o1, %o1
4158 sllx %o2, %l0, %o2
4159 srlx %o3, %l1, %g1
4160 or %g1, %o2, %o2
4161 sllx %o3, %l0, %o3
4162 srlx %o4, %l1, %o4
4163 or %o4, %o3, %o3
4164 5:
4165 xor %g4, %o0, %o4 ! ^= rk[0]
4166 xor %g5, %o1, %o5
4167 .word 0x81b0230c !movxtod %o4,%f0
4168 .word 0x85b0230d !movxtod %o5,%f2
4169 xor %g4, %o2, %o4
4170 xor %g5, %o3, %o5
4171 .word 0x89b0230c !movxtod %o4,%f4
4172 .word 0x8db0230d !movxtod %o5,%f6
4173
4174 prefetch [%i0 + 32+63], 20
4175 call _aes192_decrypt_2x
4176 add %i0, 32, %i0
4177 subcc %i2, 2, %i2
4178
4179 .word 0x91b02308 !movxtod %o0,%f8
4180 .word 0x95b02309 !movxtod %o1,%f10
4181 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= ivec
4182 .word 0x85b38d82 !fxor %f14,%f2,%f2
4183 .word 0x99b0230a !movxtod %o2,%f12
4184 .word 0x9db0230b !movxtod %o3,%f14
4185 .word 0x89b20d84 !fxor %f8,%f4,%f4
4186 .word 0x8db28d86 !fxor %f10,%f6,%f6
4187
4188 stda %f0, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
4189 add %i1, 8, %i1
4190 stda %f2, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
4191 add %i1, 8, %i1
4192 stda %f4, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
4193 add %i1, 8, %i1
4194 stda %f6, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
4195 bgu,pt %xcc, .L192_cbc_dec_blk_loop2x
4196 add %i1, 8, %i1
4197
4198 add %l5, %i2, %i2
4199 andcc %i2, 1, %g0 ! is number of blocks even?
4200 membar #StoreLoad|#StoreStore
4201 bnz,pt %icc, .L192_cbc_dec_loop
4202 srl %i2, 0, %i2
4203 brnz,pn %i2, .L192_cbc_dec_loop2x
4204 nop
4205 st %f12, [%i4 + 0] ! write out ivec
4206 st %f13, [%i4 + 4]
4207 st %f14, [%i4 + 8]
4208 st %f15, [%i4 + 12]
4209 ret
4210 restore
4211 .type aes192_t4_cbc_decrypt,#function
4212 .size aes192_t4_cbc_decrypt,.-aes192_t4_cbc_decrypt
4213 .globl aes256_t4_cbc_decrypt
4214 .align 32
4215 aes256_t4_cbc_decrypt:
4216 save %sp, -192, %sp
4217 cmp %i2, 0
4218 be,pn %xcc, .L256_cbc_dec_abort
4219 srln %i2, 0, %i2 ! needed on v8+, "nop" on v9
4220 sub %i0, %i1, %l5 ! %i0!=%i1
4221 ld [%i4 + 0], %f12 ! load ivec
4222 ld [%i4 + 4], %f13
4223 ld [%i4 + 8], %f14
4224 ld [%i4 + 12], %f15
4225 prefetch [%i0], 20
4226 prefetch [%i0 + 63], 20
4227 call _aes256_load_deckey
4228 and %i0, 7, %l0
4229 andn %i0, 7, %i0
4230 sll %l0, 3, %l0
4231 mov 64, %l1
4232 mov 0xff, %l3
4233 sub %l1, %l0, %l1
4234 and %i1, 7, %l2
4235 cmp %i2, 255
4236 movrnz %l2, 0, %l5 ! if ( %i1&7 ||
4237 movleu %xcc, 0, %l5 ! %i2<256 ||
4238 brnz,pn %l5, .L256cbc_dec_blk ! %i0==%i1)
4239 srl %l3, %l2, %l3
4240
4241 andcc %i2, 16, %g0 ! is number of blocks even?
4242 srlx %i2, 4, %i2
4243 .word 0xb3b64340 !alignaddrl %i1,%g0,%i1
4244 bz %icc, .L256_cbc_dec_loop2x
4245 prefetch [%i1], 22
4246 .L256_cbc_dec_loop:
4247 ldx [%i0 + 0], %o0
4248 brz,pt %l0, 4f
4249 ldx [%i0 + 8], %o1
4250
4251 ldx [%i0 + 16], %o2
4252 sllx %o0, %l0, %o0
4253 srlx %o1, %l1, %g1
4254 sllx %o1, %l0, %o1
4255 or %g1, %o0, %o0
4256 srlx %o2, %l1, %o2
4257 or %o2, %o1, %o1
4258 4:
4259 xor %g4, %o0, %o2 ! ^= rk[0]
4260 xor %g5, %o1, %o3
4261 .word 0x81b0230a !movxtod %o2,%f0
4262 .word 0x85b0230b !movxtod %o3,%f2
4263
4264 prefetch [%i1 + 63], 22
4265 prefetch [%i0 + 16+63], 20
4266 call _aes256_decrypt_1x
4267 add %i0, 16, %i0
4268
4269 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= ivec
4270 .word 0x85b38d82 !fxor %f14,%f2,%f2
4271 .word 0x99b02308 !movxtod %o0,%f12
4272 .word 0x9db02309 !movxtod %o1,%f14
4273
4274 brnz,pn %l2, 2f
4275 sub %i2, 1, %i2
4276
4277 std %f0, [%i1 + 0]
4278 std %f2, [%i1 + 8]
4279 brnz,pt %i2, .L256_cbc_dec_loop2x
4280 add %i1, 16, %i1
4281 st %f12, [%i4 + 0]
4282 st %f13, [%i4 + 4]
4283 st %f14, [%i4 + 8]
4284 st %f15, [%i4 + 12]
4285 .L256_cbc_dec_abort:
4286 ret
4287 restore
4288
4289 .align 16
4290 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
4291 ! and ~3x deterioration
4292 ! in inp==out case
4293 .word 0x89b00900 !faligndata %f0,%f0,%f4 ! handle unaligned output
4294 .word 0x8db00902 !faligndata %f0,%f2,%f6
4295 .word 0x91b08902 !faligndata %f2,%f2,%f8
4296
4297 stda %f4, [%i1 + %l3]0xc0 ! partial store
4298 std %f6, [%i1 + 8]
4299 add %i1, 16, %i1
4300 orn %g0, %l3, %l3
4301 stda %f8, [%i1 + %l3]0xc0 ! partial store
4302
4303 brnz,pt %i2, .L256_cbc_dec_loop2x+4
4304 orn %g0, %l3, %l3
4305 st %f12, [%i4 + 0]
4306 st %f13, [%i4 + 4]
4307 st %f14, [%i4 + 8]
4308 st %f15, [%i4 + 12]
4309 ret
4310 restore
4311
4312 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
4313 .align 32
4314 .L256_cbc_dec_loop2x:
4315 ldx [%i0 + 0], %o0
4316 ldx [%i0 + 8], %o1
4317 ldx [%i0 + 16], %o2
4318 brz,pt %l0, 4f
4319 ldx [%i0 + 24], %o3
4320
4321 ldx [%i0 + 32], %o4
4322 sllx %o0, %l0, %o0
4323 srlx %o1, %l1, %g1
4324 or %g1, %o0, %o0
4325 sllx %o1, %l0, %o1
4326 srlx %o2, %l1, %g1
4327 or %g1, %o1, %o1
4328 sllx %o2, %l0, %o2
4329 srlx %o3, %l1, %g1
4330 or %g1, %o2, %o2
4331 sllx %o3, %l0, %o3
4332 srlx %o4, %l1, %o4
4333 or %o4, %o3, %o3
4334 4:
4335 xor %g4, %o0, %o4 ! ^= rk[0]
4336 xor %g5, %o1, %o5
4337 .word 0x81b0230c !movxtod %o4,%f0
4338 .word 0x85b0230d !movxtod %o5,%f2
4339 xor %g4, %o2, %o4
4340 xor %g5, %o3, %o5
4341 .word 0x89b0230c !movxtod %o4,%f4
4342 .word 0x8db0230d !movxtod %o5,%f6
4343
4344 prefetch [%i1 + 63], 22
4345 prefetch [%i0 + 32+63], 20
4346 call _aes256_decrypt_2x
4347 add %i0, 32, %i0
4348
4349 .word 0x91b02308 !movxtod %o0,%f8
4350 .word 0x95b02309 !movxtod %o1,%f10
4351 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= ivec
4352 .word 0x85b38d82 !fxor %f14,%f2,%f2
4353 .word 0x99b0230a !movxtod %o2,%f12
4354 .word 0x9db0230b !movxtod %o3,%f14
4355 .word 0x89b20d84 !fxor %f8,%f4,%f4
4356 .word 0x8db28d86 !fxor %f10,%f6,%f6
4357
4358 brnz,pn %l2, 2f
4359 sub %i2, 2, %i2
4360
4361 std %f0, [%i1 + 0]
4362 std %f2, [%i1 + 8]
4363 std %f4, [%i1 + 16]
4364 std %f6, [%i1 + 24]
4365 brnz,pt %i2, .L256_cbc_dec_loop2x
4366 add %i1, 32, %i1
4367 st %f12, [%i4 + 0]
4368 st %f13, [%i4 + 4]
4369 st %f14, [%i4 + 8]
4370 st %f15, [%i4 + 12]
4371 ret
4372 restore
4373
4374 .align 16
4375 2: ldxa [%i0]0x82, %o0 ! avoid read-after-write hazard
4376 ! and ~3x deterioration
4377 ! in inp==out case
4378 .word 0x91b00900 !faligndata %f0,%f0,%f8 ! handle unaligned output
4379 .word 0x81b00902 !faligndata %f0,%f2,%f0
4380 .word 0x85b08904 !faligndata %f2,%f4,%f2
4381 .word 0x89b10906 !faligndata %f4,%f6,%f4
4382 .word 0x8db18906 !faligndata %f6,%f6,%f6
4383 stda %f8, [%i1 + %l3]0xc0 ! partial store
4384 std %f0, [%i1 + 8]
4385 std %f2, [%i1 + 16]
4386 std %f4, [%i1 + 24]
4387 add %i1, 32, %i1
4388 orn %g0, %l3, %l3
4389 stda %f6, [%i1 + %l3]0xc0 ! partial store
4390
4391 brnz,pt %i2, .L256_cbc_dec_loop2x+4
4392 orn %g0, %l3, %l3
4393 st %f12, [%i4 + 0]
4394 st %f13, [%i4 + 4]
4395 st %f14, [%i4 + 8]
4396 st %f15, [%i4 + 12]
4397 ret
4398 restore
4399
4400 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
4401 .align 32
4402 .L256cbc_dec_blk:
4403 add %i1, %i2, %l5
4404 and %l5, 63, %l5 ! tail
4405 sub %i2, %l5, %i2
4406 add %l5, 15, %l5 ! round up to 16n
4407 srlx %i2, 4, %i2
4408 srl %l5, 4, %l5
4409 sub %i2, 1, %i2
4410 add %l5, 1, %l5
4411
4412 .L256_cbc_dec_blk_loop2x:
4413 ldx [%i0 + 0], %o0
4414 ldx [%i0 + 8], %o1
4415 ldx [%i0 + 16], %o2
4416 brz,pt %l0, 5f
4417 ldx [%i0 + 24], %o3
4418
4419 ldx [%i0 + 32], %o4
4420 sllx %o0, %l0, %o0
4421 srlx %o1, %l1, %g1
4422 or %g1, %o0, %o0
4423 sllx %o1, %l0, %o1
4424 srlx %o2, %l1, %g1
4425 or %g1, %o1, %o1
4426 sllx %o2, %l0, %o2
4427 srlx %o3, %l1, %g1
4428 or %g1, %o2, %o2
4429 sllx %o3, %l0, %o3
4430 srlx %o4, %l1, %o4
4431 or %o4, %o3, %o3
4432 5:
4433 xor %g4, %o0, %o4 ! ^= rk[0]
4434 xor %g5, %o1, %o5
4435 .word 0x81b0230c !movxtod %o4,%f0
4436 .word 0x85b0230d !movxtod %o5,%f2
4437 xor %g4, %o2, %o4
4438 xor %g5, %o3, %o5
4439 .word 0x89b0230c !movxtod %o4,%f4
4440 .word 0x8db0230d !movxtod %o5,%f6
4441
4442 prefetch [%i0 + 32+63], 20
4443 call _aes256_decrypt_2x
4444 add %i0, 32, %i0
4445 subcc %i2, 2, %i2
4446
4447 .word 0x91b02308 !movxtod %o0,%f8
4448 .word 0x95b02309 !movxtod %o1,%f10
4449 .word 0x81b30d80 !fxor %f12,%f0,%f0 ! ^= ivec
4450 .word 0x85b38d82 !fxor %f14,%f2,%f2
4451 .word 0x99b0230a !movxtod %o2,%f12
4452 .word 0x9db0230b !movxtod %o3,%f14
4453 .word 0x89b20d84 !fxor %f8,%f4,%f4
4454 .word 0x8db28d86 !fxor %f10,%f6,%f6
4455
4456 stda %f0, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
4457 add %i1, 8, %i1
4458 stda %f2, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
4459 add %i1, 8, %i1
4460 stda %f4, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
4461 add %i1, 8, %i1
4462 stda %f6, [%i1]0xe2 ! ASI_BLK_INIT, T4-specific
4463 bgu,pt %xcc, .L256_cbc_dec_blk_loop2x
4464 add %i1, 8, %i1
4465
4466 add %l5, %i2, %i2
4467 andcc %i2, 1, %g0 ! is number of blocks even?
4468 membar #StoreLoad|#StoreStore
4469 bnz,pt %icc, .L256_cbc_dec_loop
4470 srl %i2, 0, %i2
4471 brnz,pn %i2, .L256_cbc_dec_loop2x
4472 nop
4473 st %f12, [%i4 + 0] ! write out ivec
4474 st %f13, [%i4 + 4]
4475 st %f14, [%i4 + 8]
4476 st %f15, [%i4 + 12]
4477 ret
4478 restore
4479 .type aes256_t4_cbc_decrypt,#function
4480 .size aes256_t4_cbc_decrypt,.-aes256_t4_cbc_decrypt
4481 .align 32
4482 _aes256_decrypt_1x:
4483 .word 0x88cc0440 !aes_dround01 %f16,%f0,%f2,%f4
4484 .word 0x84cc8460 !aes_dround23 %f18,%f0,%f2,%f2
4485 ldd [%i3 + 208], %f16
4486 ldd [%i3 + 216], %f18
4487 .word 0x80cd0444 !aes_dround01 %f20,%f4,%f2,%f0
4488 .word 0x84cd8464 !aes_dround23 %f22,%f4,%f2,%f2
4489 ldd [%i3 + 224], %f20
4490 ldd [%i3 + 232], %f22
4491 .word 0x88ce0440 !aes_dround01 %f24,%f0,%f2,%f4
4492 .word 0x84ce8460 !aes_dround23 %f26,%f0,%f2,%f2
4493 .word 0x80cf0444 !aes_dround01 %f28,%f4,%f2,%f0
4494 .word 0x84cf8464 !aes_dround23 %f30,%f4,%f2,%f2
4495 .word 0x88c84440 !aes_dround01 %f32,%f0,%f2,%f4
4496 .word 0x84c8c460 !aes_dround23 %f34,%f0,%f2,%f2
4497 .word 0x80c94444 !aes_dround01 %f36,%f4,%f2,%f0
4498 .word 0x84c9c464 !aes_dround23 %f38,%f4,%f2,%f2
4499 .word 0x88ca4440 !aes_dround01 %f40,%f0,%f2,%f4
4500 .word 0x84cac460 !aes_dround23 %f42,%f0,%f2,%f2
4501 .word 0x80cb4444 !aes_dround01 %f44,%f4,%f2,%f0
4502 .word 0x84cbc464 !aes_dround23 %f46,%f4,%f2,%f2
4503 .word 0x88cc4440 !aes_dround01 %f48,%f0,%f2,%f4
4504 .word 0x84ccc460 !aes_dround23 %f50,%f0,%f2,%f2
4505 .word 0x80cd4444 !aes_dround01 %f52,%f4,%f2,%f0
4506 .word 0x84cdc464 !aes_dround23 %f54,%f4,%f2,%f2
4507 .word 0x88ce4440 !aes_dround01 %f56,%f0,%f2,%f4
4508 .word 0x84cec460 !aes_dround23 %f58,%f0,%f2,%f2
4509 .word 0x80cf4444 !aes_dround01 %f60,%f4,%f2,%f0
4510 .word 0x84cfc464 !aes_dround23 %f62,%f4,%f2,%f2
4511 .word 0x88cc0440 !aes_dround01 %f16,%f0,%f2,%f4
4512 .word 0x84cc8460 !aes_dround23 %f18,%f0,%f2,%f2
4513 ldd [%i3 + 16], %f16
4514 ldd [%i3 + 24], %f18
4515 .word 0x80cd04c4 !aes_dround01_l %f20,%f4,%f2,%f0
4516 .word 0x84cd84e4 !aes_dround23_l %f22,%f4,%f2,%f2
4517 ldd [%i3 + 32], %f20
4518 retl
4519 ldd [%i3 + 40], %f22
4520 .type _aes256_decrypt_1x,#function
4521 .size _aes256_decrypt_1x,.-_aes256_decrypt_1x
4522
4523 .align 32
4524 _aes256_decrypt_2x:
4525 .word 0x90cc0440 !aes_dround01 %f16,%f0,%f2,%f8
4526 .word 0x84cc8460 !aes_dround23 %f18,%f0,%f2,%f2
4527 .word 0x94cc0c44 !aes_dround01 %f16,%f4,%f6,%f10
4528 .word 0x8ccc8c64 !aes_dround23 %f18,%f4,%f6,%f6
4529 ldd [%i3 + 208], %f16
4530 ldd [%i3 + 216], %f18
4531 .word 0x80cd0448 !aes_dround01 %f20,%f8,%f2,%f0
4532 .word 0x84cd8468 !aes_dround23 %f22,%f8,%f2,%f2
4533 .word 0x88cd0c4a !aes_dround01 %f20,%f10,%f6,%f4
4534 .word 0x8ccd8c6a !aes_dround23 %f22,%f10,%f6,%f6
4535 ldd [%i3 + 224], %f20
4536 ldd [%i3 + 232], %f22
4537 .word 0x90ce0440 !aes_dround01 %f24,%f0,%f2,%f8
4538 .word 0x84ce8460 !aes_dround23 %f26,%f0,%f2,%f2
4539 .word 0x94ce0c44 !aes_dround01 %f24,%f4,%f6,%f10
4540 .word 0x8cce8c64 !aes_dround23 %f26,%f4,%f6,%f6
4541 .word 0x80cf0448 !aes_dround01 %f28,%f8,%f2,%f0
4542 .word 0x84cf8468 !aes_dround23 %f30,%f8,%f2,%f2
4543 .word 0x88cf0c4a !aes_dround01 %f28,%f10,%f6,%f4
4544 .word 0x8ccf8c6a !aes_dround23 %f30,%f10,%f6,%f6
4545 .word 0x90c84440 !aes_dround01 %f32,%f0,%f2,%f8
4546 .word 0x84c8c460 !aes_dround23 %f34,%f0,%f2,%f2
4547 .word 0x94c84c44 !aes_dround01 %f32,%f4,%f6,%f10
4548 .word 0x8cc8cc64 !aes_dround23 %f34,%f4,%f6,%f6
4549 .word 0x80c94448 !aes_dround01 %f36,%f8,%f2,%f0
4550 .word 0x84c9c468 !aes_dround23 %f38,%f8,%f2,%f2
4551 .word 0x88c94c4a !aes_dround01 %f36,%f10,%f6,%f4
4552 .word 0x8cc9cc6a !aes_dround23 %f38,%f10,%f6,%f6
4553 .word 0x90ca4440 !aes_dround01 %f40,%f0,%f2,%f8
4554 .word 0x84cac460 !aes_dround23 %f42,%f0,%f2,%f2
4555 .word 0x94ca4c44 !aes_dround01 %f40,%f4,%f6,%f10
4556 .word 0x8ccacc64 !aes_dround23 %f42,%f4,%f6,%f6
4557 .word 0x80cb4448 !aes_dround01 %f44,%f8,%f2,%f0
4558 .word 0x84cbc468 !aes_dround23 %f46,%f8,%f2,%f2
4559 .word 0x88cb4c4a !aes_dround01 %f44,%f10,%f6,%f4
4560 .word 0x8ccbcc6a !aes_dround23 %f46,%f10,%f6,%f6
4561 .word 0x90cc4440 !aes_dround01 %f48,%f0,%f2,%f8
4562 .word 0x84ccc460 !aes_dround23 %f50,%f0,%f2,%f2
4563 .word 0x94cc4c44 !aes_dround01 %f48,%f4,%f6,%f10
4564 .word 0x8ccccc64 !aes_dround23 %f50,%f4,%f6,%f6
4565 .word 0x80cd4448 !aes_dround01 %f52,%f8,%f2,%f0
4566 .word 0x84cdc468 !aes_dround23 %f54,%f8,%f2,%f2
4567 .word 0x88cd4c4a !aes_dround01 %f52,%f10,%f6,%f4
4568 .word 0x8ccdcc6a !aes_dround23 %f54,%f10,%f6,%f6
4569 .word 0x90ce4440 !aes_dround01 %f56,%f0,%f2,%f8
4570 .word 0x84cec460 !aes_dround23 %f58,%f0,%f2,%f2
4571 .word 0x94ce4c44 !aes_dround01 %f56,%f4,%f6,%f10
4572 .word 0x8ccecc64 !aes_dround23 %f58,%f4,%f6,%f6
4573 .word 0x80cf4448 !aes_dround01 %f60,%f8,%f2,%f0
4574 .word 0x84cfc468 !aes_dround23 %f62,%f8,%f2,%f2
4575 .word 0x88cf4c4a !aes_dround01 %f60,%f10,%f6,%f4
4576 .word 0x8ccfcc6a !aes_dround23 %f62,%f10,%f6,%f6
4577 .word 0x90cc0440 !aes_dround01 %f16,%f0,%f2,%f8
4578 .word 0x84cc8460 !aes_dround23 %f18,%f0,%f2,%f2
4579 .word 0x94cc0c44 !aes_dround01 %f16,%f4,%f6,%f10
4580 .word 0x8ccc8c64 !aes_dround23 %f18,%f4,%f6,%f6
4581 ldd [%i3 + 16], %f16
4582 ldd [%i3 + 24], %f18
4583 .word 0x80cd04c8 !aes_dround01_l %f20,%f8,%f2,%f0
4584 .word 0x84cd84e8 !aes_dround23_l %f22,%f8,%f2,%f2
4585 .word 0x88cd0cca !aes_dround01_l %f20,%f10,%f6,%f4
4586 .word 0x8ccd8cea !aes_dround23_l %f22,%f10,%f6,%f6
4587 ldd [%i3 + 32], %f20
4588 retl
4589 ldd [%i3 + 40], %f22
4590 .type _aes256_decrypt_2x,#function
4591 .size _aes256_decrypt_2x,.-_aes256_decrypt_2x
4592
4593 .align 32
4594 _aes192_decrypt_1x:
4595 .word 0x88cc0440 !aes_dround01 %f16,%f0,%f2,%f4
4596 .word 0x84cc8460 !aes_dround23 %f18,%f0,%f2,%f2
4597 .word 0x80cd0444 !aes_dround01 %f20,%f4,%f2,%f0
4598 .word 0x84cd8464 !aes_dround23 %f22,%f4,%f2,%f2
4599 .word 0x88ce0440 !aes_dround01 %f24,%f0,%f2,%f4
4600 .word 0x84ce8460 !aes_dround23 %f26,%f0,%f2,%f2
4601 .word 0x80cf0444 !aes_dround01 %f28,%f4,%f2,%f0
4602 .word 0x84cf8464 !aes_dround23 %f30,%f4,%f2,%f2
4603 .word 0x88c84440 !aes_dround01 %f32,%f0,%f2,%f4
4604 .word 0x84c8c460 !aes_dround23 %f34,%f0,%f2,%f2
4605 .word 0x80c94444 !aes_dround01 %f36,%f4,%f2,%f0
4606 .word 0x84c9c464 !aes_dround23 %f38,%f4,%f2,%f2
4607 .word 0x88ca4440 !aes_dround01 %f40,%f0,%f2,%f4
4608 .word 0x84cac460 !aes_dround23 %f42,%f0,%f2,%f2
4609 .word 0x80cb4444 !aes_dround01 %f44,%f4,%f2,%f0
4610 .word 0x84cbc464 !aes_dround23 %f46,%f4,%f2,%f2
4611 .word 0x88cc4440 !aes_dround01 %f48,%f0,%f2,%f4
4612 .word 0x84ccc460 !aes_dround23 %f50,%f0,%f2,%f2
4613 .word 0x80cd4444 !aes_dround01 %f52,%f4,%f2,%f0
4614 .word 0x84cdc464 !aes_dround23 %f54,%f4,%f2,%f2
4615 .word 0x88ce4440 !aes_dround01 %f56,%f0,%f2,%f4
4616 .word 0x84cec460 !aes_dround23 %f58,%f0,%f2,%f2
4617 .word 0x80cf44c4 !aes_dround01_l %f60,%f4,%f2,%f0
4618 retl
4619 .word 0x84cfc4e4 !aes_dround23_l %f62,%f4,%f2,%f2
4620 .type _aes192_decrypt_1x,#function
4621 .size _aes192_decrypt_1x,.-_aes192_decrypt_1x
4622
4623 .align 32
4624 _aes192_decrypt_2x:
4625 .word 0x90cc0440 !aes_dround01 %f16,%f0,%f2,%f8
4626 .word 0x84cc8460 !aes_dround23 %f18,%f0,%f2,%f2
4627 .word 0x94cc0c44 !aes_dround01 %f16,%f4,%f6,%f10
4628 .word 0x8ccc8c64 !aes_dround23 %f18,%f4,%f6,%f6
4629 .word 0x80cd0448 !aes_dround01 %f20,%f8,%f2,%f0
4630 .word 0x84cd8468 !aes_dround23 %f22,%f8,%f2,%f2
4631 .word 0x88cd0c4a !aes_dround01 %f20,%f10,%f6,%f4
4632 .word 0x8ccd8c6a !aes_dround23 %f22,%f10,%f6,%f6
4633 .word 0x90ce0440 !aes_dround01 %f24,%f0,%f2,%f8
4634 .word 0x84ce8460 !aes_dround23 %f26,%f0,%f2,%f2
4635 .word 0x94ce0c44 !aes_dround01 %f24,%f4,%f6,%f10
4636 .word 0x8cce8c64 !aes_dround23 %f26,%f4,%f6,%f6
4637 .word 0x80cf0448 !aes_dround01 %f28,%f8,%f2,%f0
4638 .word 0x84cf8468 !aes_dround23 %f30,%f8,%f2,%f2
4639 .word 0x88cf0c4a !aes_dround01 %f28,%f10,%f6,%f4
4640 .word 0x8ccf8c6a !aes_dround23 %f30,%f10,%f6,%f6
4641 .word 0x90c84440 !aes_dround01 %f32,%f0,%f2,%f8
4642 .word 0x84c8c460 !aes_dround23 %f34,%f0,%f2,%f2
4643 .word 0x94c84c44 !aes_dround01 %f32,%f4,%f6,%f10
4644 .word 0x8cc8cc64 !aes_dround23 %f34,%f4,%f6,%f6
4645 .word 0x80c94448 !aes_dround01 %f36,%f8,%f2,%f0
4646 .word 0x84c9c468 !aes_dround23 %f38,%f8,%f2,%f2
4647 .word 0x88c94c4a !aes_dround01 %f36,%f10,%f6,%f4
4648 .word 0x8cc9cc6a !aes_dround23 %f38,%f10,%f6,%f6
4649 .word 0x90ca4440 !aes_dround01 %f40,%f0,%f2,%f8
4650 .word 0x84cac460 !aes_dround23 %f42,%f0,%f2,%f2
4651 .word 0x94ca4c44 !aes_dround01 %f40,%f4,%f6,%f10
4652 .word 0x8ccacc64 !aes_dround23 %f42,%f4,%f6,%f6
4653 .word 0x80cb4448 !aes_dround01 %f44,%f8,%f2,%f0
4654 .word 0x84cbc468 !aes_dround23 %f46,%f8,%f2,%f2
4655 .word 0x88cb4c4a !aes_dround01 %f44,%f10,%f6,%f4
4656 .word 0x8ccbcc6a !aes_dround23 %f46,%f10,%f6,%f6
4657 .word 0x90cc4440 !aes_dround01 %f48,%f0,%f2,%f8
4658 .word 0x84ccc460 !aes_dround23 %f50,%f0,%f2,%f2
4659 .word 0x94cc4c44 !aes_dround01 %f48,%f4,%f6,%f10
4660 .word 0x8ccccc64 !aes_dround23 %f50,%f4,%f6,%f6
4661 .word 0x80cd4448 !aes_dround01 %f52,%f8,%f2,%f0
4662 .word 0x84cdc468 !aes_dround23 %f54,%f8,%f2,%f2
4663 .word 0x88cd4c4a !aes_dround01 %f52,%f10,%f6,%f4
4664 .word 0x8ccdcc6a !aes_dround23 %f54,%f10,%f6,%f6
4665 .word 0x90ce4440 !aes_dround01 %f56,%f0,%f2,%f8
4666 .word 0x84cec460 !aes_dround23 %f58,%f0,%f2,%f2
4667 .word 0x94ce4c44 !aes_dround01 %f56,%f4,%f6,%f10
4668 .word 0x8ccecc64 !aes_dround23 %f58,%f4,%f6,%f6
4669 .word 0x80cf44c8 !aes_dround01_l %f60,%f8,%f2,%f0
4670 .word 0x84cfc4e8 !aes_dround23_l %f62,%f8,%f2,%f2
4671 .word 0x88cf4cca !aes_dround01_l %f60,%f10,%f6,%f4
4672 retl
4673 .word 0x8ccfccea !aes_dround23_l %f62,%f10,%f6,%f6
4674 .type _aes192_decrypt_2x,#function
4675 .size _aes192_decrypt_2x,.-_aes192_decrypt_2x
4676 .asciz "AES for SPARC T4, David S. Miller, Andy Polyakov"
4677 .align 4
4678